diff --git a/.github/workflows/compiler-build.yml b/.github/workflows/compiler-build.yml
index c5c082e802..3a3e600c21 100644
--- a/.github/workflows/compiler-build.yml
+++ b/.github/workflows/compiler-build.yml
@@ -17,7 +17,7 @@ jobs:
strategy:
matrix:
config:
- - {name: x86_64-macos, os: macos-latest, cmakeArgs: -DENABLE_X86SIMD=OFF, buildType: Release}
+ - {name: aarch64-macos, os: macos-14, cmakeArgs: '', buildType: Release}
- {name: x86_64-linux, os: ubuntu-latest, cmakeArgs: '', buildType: Release}
- {name: x86_64-windows, os: windows-latest, arch: x64, cmakeArgs: -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl, buildType: Release}
@@ -25,22 +25,17 @@ jobs:
- uses: actions/checkout@v3
- uses: seanmiddleditch/gha-setup-ninja@master
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+
- name: Set up build environment (Windows, Visual Studio)
uses: ilammy/msvc-dev-cmd@v1
with:
arch: ${{matrix.config.arch}}
if: runner.os == 'Windows'
- - name: Set up build environment (Macos)
- run: |
- brew install sunnycase/core/libomp@11.1.0
- if: runner.os == 'Macos'
-
- - name: Setup Python
- uses: actions/setup-python@v4
- with:
- python-version: 3.7
-
- name: Install Conan
shell: bash
run: |
@@ -54,6 +49,13 @@ jobs:
echo "CXX=g++-10" >> $GITHUB_ENV
if: runner.os == 'Linux'
+ - name: Configure Conan (Macos)
+ run: |
+ conan config init
+ sed -i '' 's/xtensalx7]/xtensalx7, arm64]/g' ~/.conan/settings.yml
+ sed -i '' 's/"14.0"]/"14.0", "15"]/g' ~/.conan/settings.yml
+ if: runner.os == 'Macos'
+
- name: Configure CMake
shell: bash
run: |
@@ -79,12 +81,14 @@ jobs:
matrix:
dotnet-version: ['7.0']
config:
- - {name: x86_64-macos, os: macos-latest, shell: bash, rid: osx-x64, buildType: Release}
+ - {name: aarch64-macos, os: macos-14, shell: bash, rid: osx-arm64, buildType: Release}
- {name: x86_64-linux, os: ubuntu-latest, shell: bash, rid: linux-x64, buildType: Release}
- - {name: x86_64-windows, os: windows-latest, shell: bash, rid: win-x64, buildType: Release}
+ - {name: x86_64-windows, os: windows-latest, arch: x64, shell: bash, rid: win-x64, buildType: Release}
steps:
- uses: actions/checkout@v2
+ - uses: seanmiddleditch/gha-setup-ninja@master
+
- name: Setup .NET
uses: actions/setup-dotnet@v1
with:
@@ -104,11 +108,6 @@ jobs:
name: nncase-native-${{matrix.config.name}}
path: ${{github.workspace}}/install
- - name: Set up build environment (Macos)
- run: |
- brew install sunnycase/core/libomp@11.1.0
- if: runner.os == 'Macos'
-
- name: Build
run: |
dotnet restore -r ${{matrix.config.rid}}
@@ -142,7 +141,7 @@ jobs:
working-directory: ${{github.workspace}}
run: |
dotnet tool install --global dotnet-coverage
- dotnet-coverage collect -s tools/dotnet_coverage.settings.xml -f cobertura -o coverage/unit.xml "dotnet test -c ${{matrix.config.buildType}} -s test.runsettings --no-build --verbosity normal"
+ dotnet-coverage collect -s tools/dotnet_coverage.settings.xml -f cobertura -o coverage/unit.xml "dotnet test -c ${{matrix.config.buildType}} -s test.runsettings --no-build --verbosity normal --blame"
dotnet-coverage merge -o coverage.unit.xml -f cobertura -r coverage/*.xml
- name: Upload Coverage
@@ -168,20 +167,29 @@ jobs:
matrix:
dotnet-version: ['7.0']
config:
- - {name: x86_64-macos, os: macos-latest, shell: bash}
+ - {name: aarch64-macos, os: macos-14, shell: bash}
- {name: x86_64-linux, os: ubuntu-latest, shell: bash}
- - {name: x86_64-windows, os: windows-latest, shell: bash}
+ - {name: x86_64-windows, os: windows-latest, arch: x64, shell: bash}
env:
- VULKANSDK_VER: 1.3.268.0
+ VULKANSDK_VER: 1.3.280.0
steps:
- uses: actions/checkout@v3
+ - uses: seanmiddleditch/gha-setup-ninja@master
+
- name: Setup .NET
uses: actions/setup-dotnet@v1
with:
dotnet-version: ${{matrix.dotnet-version}}
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+ cache: 'pip'
+ cache-dependency-path: '**/requirements.test.txt'
+
- name: Install nncase native Artifact
uses: actions/download-artifact@v3
with:
@@ -196,16 +204,11 @@ jobs:
- name: Set up test environment (macOS)
run: |
- brew install sunnycase/core/libomp@11.1.0
- aria2c --parameterized-uri=true https://{sdk.lunarg.com/sdk/download/${VULKANSDK_VER}/mac,distfiles.macports.org/MoltenVK}/vulkansdk-macos-${VULKANSDK_VER}.dmg
+ aria2c --parameterized-uri=true https://sdk.lunarg.com/sdk/download/${VULKANSDK_VER}/mac/vulkansdk-macos-${VULKANSDK_VER}.dmg
hdiutil attach ./vulkansdk-macos-*.dmg
sudo /Volumes/vulkansdk-macos-*/InstallVulkan.app/Contents/MacOS/InstallVulkan --root $HOME/VulkanSDK --accept-licenses --default-answer --confirm-command install
hdiutil detach /Volumes/vulkansdk-macos-*
echo "VULKAN_SDK=$HOME/VulkanSDK/macOS" >> $GITHUB_ENV
- wget https://github.com/sunnycase/swiftshader/releases/download/v1.0/swiftshader-macos-10.15-x86_64.zip -O swiftshader.zip
- unzip swiftshader.zip
- sudo cmake -E make_directory /usr/local/share/vulkan/icd.d
- sudo cp lib/* /usr/local/share/vulkan/icd.d
cp install/lib/*.dylib install/
echo "PYTHONPATH=$GITHUB_WORKSPACE/install/lib:$GITHUB_WORKSPACE/install/python:$GITHUB_WORKSPACE/tests" >> $GITHUB_ENV
if: runner.os == 'macOS'
@@ -232,18 +235,12 @@ jobs:
Expand-Archive swiftshader.zip
Copy-Item swiftshader\lib\vk_swiftshader_icd.json swiftshader\bin\
Copy-Item install/bin/*.dll install/
+ Copy-Item install/bin/*.dll install/lib/
echo "VK_ICD_FILENAMES=${env:GITHUB_WORKSPACE}/swiftshader/bin/vk_swiftshader_icd.json" >> $env:GITHUB_ENV
echo "PYTHONPATH=${env:GITHUB_WORKSPACE}/install/lib;${env:GITHUB_WORKSPACE}/install/python;${env:GITHUB_WORKSPACE}/tests" >> $env:GITHUB_ENV
echo "PATH=${env:PATH};${env:GITHUB_WORKSPACE}/install/bin" >> $env:GITHUB_ENV
if: runner.os == 'Windows'
- - name: Setup Python
- uses: actions/setup-python@v4
- with:
- python-version: 3.7
- cache: 'pip'
- cache-dependency-path: '**/requirements.test.txt'
-
- name: Install Python Packages
run:
python -m pip install --upgrade pip
@@ -263,7 +260,7 @@ jobs:
dotnet-coverage collect -s tools/dotnet_coverage.settings.xml -f cobertura -o coverage/onnx_combine.xml pytest tests/importer/onnx_/combine/ --doctest-modules --junitxml=test_results/onnx_combine.xml
dotnet-coverage collect -s tools/dotnet_coverage.settings.xml -f cobertura -o coverage/tflite_basic.xml pytest tests/importer/tflite_/basic/ --doctest-modules --junitxml=test_results/tflite_basic.xml
dotnet-coverage collect -s tools/dotnet_coverage.settings.xml -f cobertura -o coverage/tflite_combine.xml pytest tests/importer/tflite_/combine/ --doctest-modules --junitxml=test_results/tflite_combine.xml
- dotnet-coverage collect -s tools/dotnet_coverage.settings.xml -f cobertura -o coverage/tflite_model.xml pytest tests/importer/tflite_/model/ --doctest-modules --junitxml=test_results/tflite_model.xml
+ #dotnet-coverage collect -s tools/dotnet_coverage.settings.xml -f cobertura -o coverage/tflite_model.xml pytest tests/importer/tflite_/model/ --doctest-modules --junitxml=test_results/tflite_model.xml
dotnet-coverage collect -s tools/dotnet_coverage.settings.xml -f cobertura -o coverage/ncnn_basic.xml pytest tests/importer/ncnn_/basic/ --doctest-modules --junitxml=test_results/ncnn_basic.xml
dotnet-coverage merge -o coverage.integration.xml -f cobertura -r coverage/*.xml
@@ -327,4 +324,4 @@ jobs:
with:
name: nncase-coverage-report
path: coveragereport
- if-no-files-found: error
\ No newline at end of file
+ if-no-files-found: error
diff --git a/.github/workflows/compiler-python-release.yml b/.github/workflows/compiler-python-release.yml
index 5e0db927a0..1bf17c17e0 100644
--- a/.github/workflows/compiler-python-release.yml
+++ b/.github/workflows/compiler-python-release.yml
@@ -14,7 +14,7 @@ jobs:
matrix:
dotnet-version: ['7.0']
config:
- - {name: x86_64-macos, os: macos-latest, shell: bash, rid: osx-x64, buildType: Release}
+ # - {name: aarch64-macos, os: macos-14, shell: bash, rid: osx-arm64, buildType: Release}
- {name: x86_64-linux, os: ubuntu-latest, shell: bash, rid: linux-x64, buildType: Release}
- {name: x86_64-windows, os: windows-latest, shell: bash, rid: win-x64, buildType: Release}
@@ -53,7 +53,7 @@ jobs:
matrix:
dotnet-version: ['7.0']
config:
- - {name: x86_64-macos, os: macos-latest}
+ # - {name: aarch64-macos, os: macos-14}
- {name: x86_64-linux, os: ubuntu-latest}
- {name: x86_64-windows, os: windows-latest, arch: x64}
@@ -88,7 +88,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v4
with:
- python-version: 3.7
+ python-version: '3.10'
- name: Install cibuildwheel
run: pip install cibuildwheel
diff --git a/.github/workflows/jupyter-test.yml b/.github/workflows/jupyter-test.yml
index 1d2ee23550..19a74c8086 100755
--- a/.github/workflows/jupyter-test.yml
+++ b/.github/workflows/jupyter-test.yml
@@ -10,7 +10,7 @@ jobs:
strategy:
matrix:
config:
- - {name: x86_64-macos, os: macos-latest}
+ - {name: aarch64-macos, os: macos-14}
- {name: x86_64-linux, os: ubuntu-latest}
- {name: x86_64-windows, os: windows-latest}
@@ -20,7 +20,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
- python-version: 3.7
+ python-version: '3.10'
- name: Install dependencies
run: pip install --upgrade pip && pip install jupyterlab pytest nbmake
diff --git a/.github/workflows/runtime-build.yml b/.github/workflows/runtime-build.yml
index c11d287f2d..228e74224c 100644
--- a/.github/workflows/runtime-build.yml
+++ b/.github/workflows/runtime-build.yml
@@ -13,7 +13,7 @@ jobs:
strategy:
matrix:
config:
- - { name: x86_64-macos, os: macos-latest, cmakeArgs: '', buildType: Release }
+ #- { name: aarch64-macos, os: macos-14, cmakeArgs: '', buildType: Release }
- { name: x86_64-linux, os: ubuntu-latest, cmakeArgs: '', buildType: Release }
- { name: x86_64-windows, os: windows-latest, arch: x64, cmakeArgs: -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl, buildType: Release }
@@ -27,15 +27,10 @@ jobs:
arch: ${{matrix.config.arch}}
if: runner.os == 'Windows'
- - name: Set up build environment (Macos)
- run: |
- brew install sunnycase/core/libomp@11.1.0
- if: runner.os == 'Macos'
-
- name: Setup Python
uses: actions/setup-python@v4
with:
- python-version: 3.7
+ python-version: '3.10'
- name: Install Conan
run: |
@@ -51,10 +46,17 @@ jobs:
echo "CXX=g++-10" >> $GITHUB_ENV
if: runner.os == 'Linux'
+ - name: Configure Conan (Macos)
+ run: |
+ conan config init
+ sed -i '' 's/xtensalx7]/xtensalx7, arm64]/g' ~/.conan/settings.yml
+ sed -i '' 's/"14.0"]/"14.0", "15"]/g' ~/.conan/settings.yml
+ if: runner.os == 'Macos'
+
- name: Configure CMake
shell: bash
run: |
- conan install . -if build --build=missing -s build_type=${{matrix.config.buildType}} --profile=default -o runtime=True -o python=False -o tests=True -s compiler.cppstd=17
+ conan install . -if build --build=missing -s build_type=${{matrix.config.buildType}} --profile=default -o runtime=True -o python=False -o tests=True -s compiler.cppstd=20
- name: Build & Install
run: |
@@ -101,7 +103,7 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v4
with:
- python-version: 3.7
+ python-version: '3.10'
- name: Install toolchain and QEMU
shell: bash
@@ -129,7 +131,7 @@ jobs:
- name: Configure CMake
run: |
- conan install . -if build --build=missing -s build_type=${{matrix.config.buildType}} --profile:host=toolchains/riscv64-unknown-linux.profile.jinja --profile:build=default -o runtime=True -o python=False -o tests=True -s compiler.cppstd=17
+ conan install . -if build --build=missing -s build_type=${{matrix.config.buildType}} --profile:host=toolchains/riscv64-unknown-linux.profile.jinja --profile:build=default -o runtime=True -o python=False -o tests=True -s compiler.cppstd=20
- name: Build & Install
run: |
diff --git a/.gitignore b/.gitignore
index 5b1e72c18f..eaffc2eb90 100644
--- a/.gitignore
+++ b/.gitignore
@@ -261,6 +261,7 @@ __pycache__/
# vscode
.vscode/
+.mono/
# clangd
.cache/
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7ac7539a47..c5c4cd42a2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,8 +39,6 @@ project(nncase
VERSION ${NNCASE_VERSION}
LANGUAGES C CXX ASM)
-option(ENABLE_OPENMP "OpenMP support" ON)
-option(ENABLE_HALIDE "halide kernels support" ON)
option(DOTNET_INIT_FOR_CONFIG "Initialize dotnet from runtimeconfig" OFF)
option(BUILD_PYTHON_BINDING "Build python binding" ON)
option(BUILD_CSHARP_BINDING "Build csharp binding" ON)
@@ -106,7 +104,7 @@ if (BUILDING_RUNTIME)
else()
add_compile_options(-Wall -Wextra -pedantic -Werror -Wno-multichar -Wno-missing-field-initializers -Wno-unused-function -Wno-type-limits)
if (APPLE)
- add_compile_options(-Wno-four-char-constants -Wno-sometimes-uninitialized)
+ add_compile_options(-Wno-four-char-constants -Wno-sometimes-uninitialized -Wno-deprecated-declarations)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
add_compile_options(-Wno-uninitialized -Wno-unused-private-field)
else()
@@ -124,6 +122,9 @@ if (BUILDING_RUNTIME)
# add_subdirectory(src/Native/src/kernels)
# add_subdirectory(src/Native/src/runtime)
add_subdirectory(src/Native/src)
+ if(BUILD_TESTING)
+ add_subdirectory(src/Native/test)
+ endif()
# add_subdirectory(src/Native/src/functional)
if(BUILD_BENCHMARK)
# add_subdirectory(benchmark)
@@ -214,7 +215,9 @@ else()
add_subdirectory(src/Native/include/nncase)
add_subdirectory(src/Native/src)
-
+if(BUILD_TESTING)
+ add_subdirectory(src/Native/test)
+endif()
# Python binding
if(BUILD_PYTHON_BINDING)
add_subdirectory(python/nncase/native)
diff --git a/Directory.Packages.props b/Directory.Packages.props
index 3bb76c949a..2aa955d89e 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -12,13 +12,12 @@
true
-
-
-
-
-
+
+
+
+
+
-
@@ -43,26 +42,29 @@
1.1.1
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
-
+
@@ -74,7 +76,6 @@
-
diff --git a/NuGet.Config b/NuGet.Config
index fd11e2a06a..5e7849eabb 100644
--- a/NuGet.Config
+++ b/NuGet.Config
@@ -2,11 +2,13 @@
+
+
diff --git a/benchmark/models/models.cpp b/benchmark/models/models.cpp
index b7b7239cfb..0b59b54b37 100644
--- a/benchmark/models/models.cpp
+++ b/benchmark/models/models.cpp
@@ -23,7 +23,7 @@ using namespace nncase;
namespace
{
-gsl::span get_model_impl(const std::string &name, size_t id)
+std::span get_model_impl(const std::string &name, size_t id)
{
auto hres = FindResourceW(NULL, MAKEINTRESOURCEW(id), L"Binary");
if (!hres)
@@ -33,7 +33,7 @@ gsl::span get_model_impl(const std::string &name, size_t id)
if (!hmem)
return {};
auto res_data = LockResource(hmem);
- return { reinterpret_cast(res_data), (size_t)size };
+ return { reinterpret_cast(res_data), (size_t)size };
}
}
@@ -41,7 +41,7 @@ gsl::span get_model_impl(const std::string &name, size_t id)
if (name == #model) \
return get_model_impl(name, IDR_cpu_##model)
-gsl::span nncase::get_model(const std::string &name)
+std::span nncase::get_model(const std::string &name)
{
GET_MODEL_IMPL(mnist);
GET_MODEL_IMPL(mobilenet_v2);
@@ -55,9 +55,9 @@ INCBIN(mobilenet_v2, "cpu/mobilenet_v2.kmodel");
#define GET_MODEL_IMPL(model) \
if (name == #model) \
- return { reinterpret_cast(g##model##_data), g##model##_size }
+ return { reinterpret_cast(g##model##_data), g##model##_size }
-gsl::span nncase::get_model(const std::string &name)
+std::span nncase::get_model(const std::string &name)
{
GET_MODEL_IMPL(mnist);
GET_MODEL_IMPL(mobilenet_v2);
diff --git a/benchmark/models/models.h b/benchmark/models/models.h
index 7ee9ce92c2..56096be505 100644
--- a/benchmark/models/models.h
+++ b/benchmark/models/models.h
@@ -17,5 +17,5 @@
namespace nncase
{
-gsl::span get_model(const std::string &name);
+std::span get_model(const std::string &name);
}
diff --git a/cmake/configure-conan.cmake b/cmake/configure-conan.cmake
index e5b75ca340..63662a9fd0 100644
--- a/cmake/configure-conan.cmake
+++ b/cmake/configure-conan.cmake
@@ -14,16 +14,10 @@ endfunction()
_SET_CONANOPT(CONAN_OPTS "runtime" BUILDING_RUNTIME)
_SET_CONANOPT(CONAN_OPTS "tests" BUILD_TESTING)
_SET_CONANOPT(CONAN_OPTS "python" BUILD_PYTHON_BINDING)
-_SET_CONANOPT(CONAN_OPTS "openmp" ENABLE_OPENMP)
_SET_CONANOPT(CONAN_OPTS "vulkan_runtime" ENABLE_VULKAN_RUNTIME)
-_SET_CONANOPT(CONAN_OPTS "halide" ENABLE_HALIDE)
if (NOT DEFINED CMAKE_CXX_STANDARD)
- if (BUILDING_RUNTIME)
- set (CMAKE_CXX_STANDARD 17)
- else ()
- set (CMAKE_CXX_STANDARD 20)
- endif ()
+ set (CMAKE_CXX_STANDARD 20)
endif ()
_SET_CONANSETTING(CONAN_SETTINGS "compiler.cppstd" ${CMAKE_CXX_STANDARD})
diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake
index 85e8e1213e..2827003ab2 100644
--- a/cmake/dependencies.cmake
+++ b/cmake/dependencies.cmake
@@ -1,25 +1,13 @@
-find_package(gsl-lite REQUIRED)
if (ENABLE_OPENMP)
find_package(OpenMP COMPONENTS CXX REQUIRED)
endif ()
-if ((NOT BUILDING_RUNTIME) OR ENABLE_VULKAN_RUNTIME)
- find_package(Vulkan REQUIRED)
-endif ()
-
if (NOT BUILDING_RUNTIME)
- find_package(absl REQUIRED)
find_package(nethost REQUIRED)
find_package(fmt REQUIRED)
- find_package(magic_enum REQUIRED)
- find_package(spdlog REQUIRED)
- find_package(inja REQUIRED)
+ find_package(nlohmann_json REQUIRED)
endif ()
if (BUILD_TESTING)
find_package(GTest REQUIRED)
endif ()
-
-if (ENABLE_HALIDE)
- find_package(hkg REQUIRED)
-endif ()
\ No newline at end of file
diff --git a/cmake/nncaseConfig.cmake.in b/cmake/nncaseConfig.cmake.in
index 7d1a54245e..bf853ae583 100644
--- a/cmake/nncaseConfig.cmake.in
+++ b/cmake/nncaseConfig.cmake.in
@@ -1,3 +1,2 @@
include(${CMAKE_CURRENT_LIST_DIR}/nncaseTargets.cmake)
-find_package(gsl-lite REQUIRED)
find_package(fmt REQUIRED)
diff --git a/cmake/nncaseruntimeConfig.cmake.in b/cmake/nncaseruntimeConfig.cmake.in
index cce5810298..b4500a2ae9 100644
--- a/cmake/nncaseruntimeConfig.cmake.in
+++ b/cmake/nncaseruntimeConfig.cmake.in
@@ -1,5 +1 @@
include(${CMAKE_CURRENT_LIST_DIR}/nncaseruntimeTargets.cmake)
-
-if(NOT TARGET gsl-lite)
- find_package(gsl-lite REQUIRED)
-endif()
\ No newline at end of file
diff --git a/conanfile.py b/conanfile.py
index 8a3a0c72b7..9a4200dadd 100644
--- a/conanfile.py
+++ b/conanfile.py
@@ -24,20 +24,16 @@ class nncaseConan(ConanFile):
"fPIC": [True, False],
"runtime": [True, False],
"tests": [True, False],
- "halide": [True, False],
"python": [True, False],
- "vulkan_runtime": [True, False],
- "openmp": [True, False]
+ "vulkan_runtime": [True, False]
}
default_options = {
"shared": False,
"fPIC": True,
"runtime": False,
"tests": False,
- "halide": True,
"python": True,
- "vulkan_runtime": False,
- "openmp": True
+ "vulkan_runtime": False
}
def imports(self):
@@ -46,67 +42,42 @@ def imports(self):
self.copy("ortki.dll", "bin", "bin")
def requirements(self):
- self.requires('gsl-lite/0.37.0')
- self.requires('hkg/0.0.1')
if self.options.tests:
self.requires('gtest/1.10.0')
self.requires('ortki/0.0.2')
self.requires('rapidjson/1.1.x')
if self.options.python:
- self.requires('pybind11/2.6.1')
+ self.requires('pybind11/2.11.1')
if not self.options.runtime:
- self.requires('abseil/20220623.1')
- self.requires('nethost/6.0.11')
+ self.requires('nethost/7.0.5')
self.requires('fmt/7.1.3')
- self.requires('magic_enum/0.7.0')
- self.requires('spdlog/1.8.2')
- self.requires('inja/3.2.0')
- if self.options.tests:
- self.requires('gtest/1.10.0')
-
- if (not self.options.runtime) or self.options.vulkan_runtime:
- self.requires('vulkan-headers/1.2.182')
- self.requires('vulkan-loader/1.2.182')
+ self.requires('nlohmann_json/3.9.1')
def build_requirements(self):
pass
def configure(self):
- min_cppstd = "17" if self.options.runtime else "20"
+ min_cppstd = "20"
tools.check_min_cppstd(self, min_cppstd)
if self.settings.os == 'Windows':
self.settings.compiler.toolset = 'ClangCL'
-
- if self.settings.arch not in ("x86_64",):
- self.options.halide = False
if not self.options.runtime:
if self.settings.os == 'Windows':
self.options["nethost"].shared = True
- if (not self.options.runtime) or self.options.vulkan_runtime:
- if self.settings.os == 'Linux':
- self.options["vulkan-loader"].with_wsi_xcb = False
- self.options["vulkan-loader"].with_wsi_xlib = False
- self.options["vulkan-loader"].with_wsi_wayland = False
- self.options["vulkan-loader"].with_wsi_directfb = False
-
if self.options.tests:
self.options["ortki"].shared = True
def cmake_configure(self):
cmake = CMake(self)
cmake.definitions['BUILDING_RUNTIME'] = self.options.runtime
- cmake.definitions['ENABLE_OPENMP'] = self.options.openmp
cmake.definitions['ENABLE_VULKAN_RUNTIME'] = self.options.vulkan_runtime
- cmake.definitions['ENABLE_HALIDE'] = self.options.halide
cmake.definitions['BUILD_PYTHON_BINDING'] = self.options.python
cmake.definitions['BUILD_TESTING'] = self.options.tests
- if self.options.runtime:
- cmake.definitions["CMAKE_CXX_STANDARD"] = 17
cmake.configure()
return cmake
diff --git a/csharp/RuntimeTensor.h b/csharp/RuntimeTensor.h
index d25c52a565..0b6b9fdf66 100644
--- a/csharp/RuntimeTensor.h
+++ b/csharp/RuntimeTensor.h
@@ -94,8 +94,8 @@ RuntimeTensor_from_buffer(const uint8_t *buffer_ptr, datatype_t datatype,
host_runtime_tensor::create(
(datatype_t)datatype, to_shape(shape_ptr, shape_size),
to_strides(stride_ptr, shape_size),
- gsl::make_span((gsl::byte *)(buffer_ptr), total_items * item_size),
- [=](gsl::byte *) {})
+ gsl::make_span((std::byte *)(buffer_ptr), total_items * item_size),
+ [=](std::byte *) {})
.unwrap_or_throw();
auto rt = new runtime_tensor(std::move(hostrt));
return rt;
diff --git a/csharp/interpreter.cpp b/csharp/interpreter.cpp
index ebda591c3f..29cd9ba136 100644
--- a/csharp/interpreter.cpp
+++ b/csharp/interpreter.cpp
@@ -37,7 +37,7 @@ interpreter_init() {
EXPORT_API(void)
interpreter_load_model(uint8_t *buffer_ptr, int size) {
auto buffer =
- gsl::span((const gsl::byte *)(buffer_ptr), size);
+ std::span((const std::byte *)(buffer_ptr), size);
_interp->load_model(buffer).unwrap_or_throw();
}
diff --git a/modules/Nncase.Modules.CPU/CPUApplicationPart.cs b/modules/Nncase.Modules.CPU/CPUApplicationPart.cs
new file mode 100644
index 0000000000..ecaeb388ad
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CPUApplicationPart.cs
@@ -0,0 +1,31 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Reflection;
+using System.Text;
+using System.Threading.Tasks;
+using DryIoc;
+using Nncase.Hosting;
+
+namespace Nncase;
+
+///
+/// CPU application part extensions.
+///
+public static class CPUApplicationPart
+{
+ ///
+ /// Add CPU assembly.
+ ///
+ /// Service registrator.
+ /// Configured service registrator.
+ public static IRegistrator AddCPU(this IRegistrator registrator)
+ {
+ return registrator.RegisterModule()
+ .RegisterModule()
+ .RegisterModule();
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CPUModule.cs b/modules/Nncase.Modules.CPU/CPUModule.cs
new file mode 100644
index 0000000000..5e91015cef
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CPUModule.cs
@@ -0,0 +1,19 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using DryIoc;
+using Nncase.Hosting;
+using Nncase.Targets;
+
+namespace Nncase;
+
+///
+/// CPU module.
+///
+internal class CPUModule : IApplicationPart
+{
+ public void ConfigureServices(IRegistrator registrator)
+ {
+ registrator.Register(reuse: Reuse.Singleton);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceBuiltn.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceBuiltn.cs
new file mode 100644
index 0000000000..a9e75554b5
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceBuiltn.cs
@@ -0,0 +1,59 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+using System.Runtime.CompilerServices;
+using DryIoc.ImTools;
+using NetFabric.Hyperlinq;
+using Razor.Templating.Core;
+
+namespace Nncase.CodeGen.CPU;
+
+public static class CSourceBuiltn
+{
+ public const string KernelHeader = @"#pragma once
+#include
+using namespace nncase::ntt;
+
+";
+
+ public static string CMakeDef(string name)
+ {
+ var cmakePath = CMakePath(Path.Combine(Path.GetDirectoryName(typeof(CSourceBuiltn).Assembly.Location)!, "Runtime", "src", "cpu_runtime.cmake"));
+ var content = RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/CMakeLists.txt.cshtml", new { CMakePath = cmakePath }).Result;
+ return content;
+ }
+
+ public static string MakeKernel(string ctype, string kernelImpl)
+ {
+ return KernelHeader + ctype + kernelImpl;
+ }
+
+ public static string MakeMain(TIR.PrimFunction primFunction, IEnumerable rdataBuffers)
+ {
+ string init_tensors = string.Join("\n", primFunction.Parameters.ToArray().Select((b, i) =>
+ {
+ var buffer = (TIR.Buffer)b;
+ var size = TensorUtilities.GetSize(b.CheckedShape.ToValueArray(), TensorUtilities.GetStrides(b.CheckedShape.ToValueArray()), 1);
+ return $@" std::span<{buffer.ElemType.ToC()}, {size}> p{buffer.Name}(({buffer.ElemType.ToC()} *)inputs[{i}], {size});
+ tensor_view<{buffer.ElemType.ToC()}, {KernelUtility.DimensionsToC(buffer.Dimensions)}, {KernelUtility.StridesToC(buffer.Strides)}> {buffer.Name}(p{buffer.Name});
+";
+ }).Concat(rdataBuffers.Select(b =>
+ {
+ var size = TensorUtilities.GetSize(b.CheckedShape.ToValueArray(), TensorUtilities.GetStrides(b.CheckedShape.ToValueArray()), 1);
+ return $@" std::span<{b.ElemType.ToC()}, {size}> p{b.Name}(({b.ElemType.ToC()}*)(rdata + {((IR.TensorConst)b.MemSpan.Start).Value.ToScalar()}), {size});
+ tensor_view<{b.ElemType.ToC()}, {KernelUtility.DimensionsToC(b.Dimensions)}, {KernelUtility.StridesToC(b.Strides)}> {b.Name}(p{b.Name});";
+ })));
+ return @$"#include
+#include ""../device.h""
+#include ""kernel.h""
+
+extern ""C"" void kernel_entry(nncase_runtime_cpu_mt_t *cpu_mt, uint8_t **inputs, uint8_t *rdata, uint8_t *l1_data) {{
+g_cpu_mt = cpu_mt;
+{init_tensors}
+
+ {primFunction.Name}({string.Join(", ", primFunction.Parameters.AsValueEnumerable().Select(b => ((TIR.Buffer)b).Name).ToArray().Concat(rdataBuffers.Select(b => b.Name)).ToArray())}, l1_data);
+}}";
+ }
+
+ private static string CMakePath(string path) =>
+ path.Replace("\\", "/", StringComparison.Ordinal);
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceCompiler.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceCompiler.cs
new file mode 100644
index 0000000000..929219da36
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceCompiler.cs
@@ -0,0 +1,202 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.InteropServices;
+using System.Text;
+using Nncase.IR;
+using Nncase.Schedule;
+using Nncase.TIR;
+
+namespace Nncase.CodeGen.CPU;
+
+///
+/// the csource code compiler.
+///
+public class CSourceCompiler
+{
+ ///
+ /// compiler exe name.
+ ///
+ private string _exe = string.Empty;
+
+ ///
+ /// compiler exe name.
+ ///
+ private string _arch = string.Empty;
+
+ ///
+ /// compiler exe name.
+ ///
+ private string _ext = string.Empty;
+
+ public CSourceCompiler()
+ {
+ PlatformSpecific();
+ ArchSpecific();
+ }
+
+ protected string Exe
+ {
+ get => _exe;
+ }
+
+ protected string Arch
+ {
+ get => _arch;
+ }
+
+ protected string Ext
+ {
+ get => _ext;
+ }
+
+ ///
+ /// compile the source txt, write to the out_path.
+ ///
+ /// c source code.
+ /// out .so path.
+ /// outPath.
+ public string Compile(string sourcePath, string outPath)
+ {
+ var errMsg = new StringBuilder();
+ using (var errWriter = new StringWriter(errMsg))
+ {
+ using (var proc = new Process())
+ {
+ proc.StartInfo.FileName = Exe;
+ proc.StartInfo.Arguments = ArgumentsSpecific(sourcePath, outPath);
+ proc.StartInfo.WorkingDirectory = Directory.GetCurrentDirectory();
+ proc.StartInfo.RedirectStandardError = true;
+ proc.StartInfo.RedirectStandardOutput = true;
+ proc.OutputDataReceived += (sender, e) => errWriter.WriteLine(e.Data);
+ proc.ErrorDataReceived += (sender, e) => errWriter.WriteLine(e.Data);
+ proc.Start();
+ proc.BeginErrorReadLine();
+ proc.BeginOutputReadLine();
+ proc.WaitForExit();
+ if (proc.ExitCode != 0)
+ {
+ throw new InvalidOperationException(errMsg.ToString());
+ }
+ }
+ }
+
+ return outPath;
+ }
+
+ ///
+ /// create the temp dll file and compile source
+ /// .
+ ///
+ public string Compile(string sourcePath) => Compile(sourcePath, Path.Join(sourcePath, "build", Path.GetFileName(sourcePath)));
+
+ private static string? FindVCVarPath()
+ {
+ var vsDir = Environment.GetEnvironmentVariable("VSAPPIDDIR");
+ if (!string.IsNullOrEmpty(vsDir))
+ {
+ return Path.Combine(vsDir, "..\\..\\VC\\Auxiliary\\Build\\vcvarsall.bat");
+ }
+ else
+ {
+ var vsWhereDir = Path.Combine(Environment.GetEnvironmentVariable("ProgramFiles(x86)")!, "Microsoft Visual Studio\\Installer\\vswhere");
+ if (string.IsNullOrEmpty(vsWhereDir))
+ {
+ return null;
+ }
+
+ using (var proc = new Process())
+ {
+ proc.StartInfo.FileName = vsWhereDir;
+ proc.StartInfo.Arguments = "-prerelease -latest -property installationPath";
+ proc.StartInfo.RedirectStandardOutput = true;
+ proc.Start();
+ proc.WaitForExit();
+ vsDir = proc.StandardOutput.ReadLine()!;
+ return Path.Combine(vsDir, "VC\\Auxiliary\\Build\\vcvarsall.bat");
+ }
+ }
+ }
+
+ ///
+ /// select current pattern's exe.
+ ///
+ /// NotSupportedException.
+ private void PlatformSpecific()
+ {
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ {
+ _exe = "/bin/bash";
+ _ext = "so";
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ _exe = "/bin/bash";
+ _ext = "dylib";
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ _exe = "cmd";
+ _ext = "dll";
+ }
+
+ if (System.Environment.GetEnvironmentVariable("NNCASE_CPU_COMPILER") is string exe)
+ {
+ _exe = exe;
+ }
+ }
+
+ private void ArchSpecific()
+ {
+ _arch = RuntimeInformation.OSArchitecture switch
+ {
+ Architecture.X64 => RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ? "x86-64" : "x86_64",
+ Architecture.Arm64 => "arm64",
+ _ => throw new NotSupportedException(RuntimeInformation.OSArchitecture.ToString()),
+ };
+ }
+
+ private string ArgumentsSpecific(string sourcePath, string outPath)
+ {
+ var archConfig = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? "-DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl" : string.Empty;
+
+#if DEBUG
+ var config = "Debug";
+#else
+ var config = "Release";
+#endif
+ var script = $"""
+ cd {sourcePath} &&
+ cmake -E remove_directory build &&
+ cmake -G Ninja -S . -B build -DCMAKE_BUILD_TYPE={config} {archConfig} &&
+ cmake --build build --config {config}
+ """.Replace("\r\n", " ", StringComparison.Ordinal);
+
+ if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
+ {
+ return $"-c \"{script}\"";
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
+ {
+ return $"-c \"{script}\"";
+ }
+ else if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ {
+ var vcVarPath = FindVCVarPath();
+ if (!string.IsNullOrEmpty(vcVarPath))
+ {
+ return $"/C \"(\"{vcVarPath}\" x64) && {script}\"";
+ }
+
+ return $"/C {script}";
+ }
+
+ throw new NotSupportedException("Only Support Linux/Osx/Windows");
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceExtensions.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceExtensions.cs
new file mode 100644
index 0000000000..f1f918613f
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceExtensions.cs
@@ -0,0 +1,137 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.IR;
+using Nncase.TIR;
+
+namespace Nncase.CodeGen.CPU;
+
+///
+/// convert the type/op to c name.
+///
+internal static class CSourceExtensions
+{
+ private static readonly Dictionary _primTypeToC = new()
+ {
+ { DataTypes.Boolean, "uint8_t" },
+ { DataTypes.Int8, "int8_t" },
+ { DataTypes.Int16, "int16_t" },
+ { DataTypes.Int32, "int32_t" },
+ { DataTypes.Int64, "int64_t" },
+ { DataTypes.UInt8, "uint8_t" },
+ { DataTypes.UInt16, "uint16_t" },
+ { DataTypes.UInt32, "uint32_t" },
+ { DataTypes.UInt64, "uint64_t" },
+ { DataTypes.Float32, "float" },
+ { DataTypes.Float64, "double" },
+ };
+
+ public static string ToC(this PrimType primType) =>
+ _primTypeToC[primType];
+
+ public static string ToC(this ReduceArgOp op) => op switch
+ {
+ ReduceArgOp.ArgMin => "arg_min",
+ ReduceArgOp.ArgMax => "arg_max",
+ _ => throw new NotImplementedException(),
+ };
+
+ public static string ToC(this DataType dataType) => dataType switch
+ {
+ PrimType ptype => ptype.ToC(),
+ PointerType => "uint8_t *",
+ VectorType vtype => $"vector<{vtype.ElemType.ToC()},{string.Join(",", vtype.Lanes)}>",
+ _ => throw new NotSupportedException(dataType.ToString()),
+ };
+
+ public static string ToC(this MemoryLocation location) => location switch
+ {
+ MemoryLocation.Output or MemoryLocation.Input or MemoryLocation.Rdata => "loc_t::device",
+ MemoryLocation.L2Data => "loc_t::shared",
+ MemoryLocation.L1Data => "loc_t::local",
+ _ => throw new NotSupportedException(location.ToString()),
+ };
+
+ public static string ToC(this ImageResizeMode mode) => mode switch
+ {
+ ImageResizeMode.Bilinear => "bilinear",
+ ImageResizeMode.NearestNeighbor => "nearest_neighbor",
+ _ => throw new NotImplementedException(),
+ };
+
+ public static string ToC(this ImageResizeTransformationMode mode) => mode switch
+ {
+ ImageResizeTransformationMode.HalfPixel => "half_pixel",
+ ImageResizeTransformationMode.PytorchHalfPixel => "pytorch_half_pixel",
+ ImageResizeTransformationMode.AlignCorners => "align_corners",
+ ImageResizeTransformationMode.Asymmetric => "asymmetric",
+ ImageResizeTransformationMode.TFCropAndResize => "tfcrop_and_resize",
+ _ => throw new NotImplementedException(),
+ };
+
+ public static string ToC(this ImageResizeNearestMode mode) => mode switch
+ {
+ ImageResizeNearestMode.RoundPreferFloor => "round_prefer_floor",
+ ImageResizeNearestMode.RoundPreferCeil => "round_prefer_ceil",
+ ImageResizeNearestMode.Floor => "floor",
+ ImageResizeNearestMode.Ceil => "ceil",
+ _ => throw new NotImplementedException(),
+ };
+
+ public static string ToSlicing(this IEnumerable dims, string[] begins, IRArray ndsbp, Placement placement)
+ {
+ var hstrides = TensorUtilities.GetStrides(placement.Hierarchy.ToArray());
+ var splits = Enumerable.Range(0, begins.Length).Select(_ => new List<(int H, SBPSplit S)>()).ToArray();
+ foreach (var (sbp, i) in ndsbp.Select((s, i) => (s, i)))
+ {
+ if (sbp is SBPSplit { Axis: int axis } split)
+ {
+ splits[axis].Add((i, split));
+ }
+ }
+
+ foreach (var splist in splits)
+ {
+ splist.Sort((a, b) => -a.H.CompareTo(b.H));
+ }
+
+ for (int i = 0; i < begins.Length; i++)
+ {
+ var sp = splits[i];
+ if (sp.Count > 0)
+ {
+ var dimi = dims.ElementAt(i);
+ if (dimi.IndexOf('?', System.StringComparison.CurrentCulture) is int s && dimi.IndexOf(':', System.StringComparison.CurrentCulture) is int e && s != -1 && e != -1)
+ {
+ dimi = dimi[(s + 1)..e].Trim();
+ }
+
+ begins[i] += " + " + sp.Skip(1).Aggregate($"{placement.Name[sp[0].H]}id", (acc, p) => $"({acc} + {TensorUtilities.GetProduct(placement.Hierarchy[(p.H + 1)..])} * {placement.Name[p.H]}id)") + $" * {dimi}";
+ }
+ }
+
+ return $".view(make_ranked_shape({string.Join(',', begins)}), fixed_shape<{string.Join(",", dims.Select(d => d.ToString()))}>{{}})";
+ }
+
+ public static string ToSlicing(this IEnumerable dims, IRArray ndsbp, Placement placement) => ToSlicing(dims, Enumerable.Repeat("0", dims.Count()).ToArray(), ndsbp, placement);
+
+ public static string ToC(this BinaryOp binaryOp) => binaryOp switch
+ {
+ BinaryOp.Add => "+",
+ BinaryOp.Sub => "-",
+ BinaryOp.Mul => "*",
+ BinaryOp.Div => "/",
+ _ => throw new NotSupportedException(binaryOp.ToString()),
+ };
+
+ public static string ToC(this CompareOp op) => op switch
+ {
+ CompareOp.Equal => "==",
+ CompareOp.NotEqual => "!=",
+ CompareOp.LowerThan => "<",
+ CompareOp.LowerOrEqual => "<=",
+ CompareOp.GreaterThan => ">=",
+ CompareOp.GreaterOrEqual => ">",
+ _ => throw new NotSupportedException(op.ToString()),
+ };
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceUtilities.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceUtilities.cs
new file mode 100644
index 0000000000..a1359ebdaa
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/CSourceUtilities.cs
@@ -0,0 +1,78 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System.CommandLine;
+using System.Globalization;
+using DryIoc.ImTools;
+using Nncase.Diagnostics;
+using Nncase.IR.Math;
+
+namespace Nncase.CodeGen.CPU;
+
+internal static class CSourceUtilities
+{
+ public static string ContertBinary(Binary binary, CSymbol[] arguments)
+ {
+ var lhs = arguments[Binary.Lhs.Index].Name;
+ var rhs = arguments[Binary.Rhs.Index].Name;
+ string str;
+ switch (binary.BinaryOp)
+ {
+ case BinaryOp.Add or BinaryOp.Sub or BinaryOp.Mul or BinaryOp.Div:
+ str = $"({lhs} {binary.BinaryOp.ToC()} {rhs})";
+ break;
+ case BinaryOp.Min:
+ str = $"std::min({lhs}, {rhs})";
+ break;
+ default:
+ throw new NotSupportedException();
+ }
+
+ return str;
+ }
+
+ public static bool TryGetDivRem(string dim, out int div, out int rem)
+ {
+ div = 0;
+ rem = 0;
+ if (dim.IndexOf('?', System.StringComparison.CurrentCulture) is int s && dim.IndexOf(':', System.StringComparison.CurrentCulture) is int e && s != -1 && e != -1)
+ {
+ div = int.Parse(dim[(s + 1)..e].Trim());
+ rem = int.Parse(dim[(e + 1)..^1].Trim());
+ return true;
+ }
+
+ return false;
+ }
+
+ internal static string ContertUnary(Unary op, CSymbol[] arguments)
+ {
+ var input = arguments[Unary.Input.Index].Name;
+ string str;
+ switch (op.UnaryOp)
+ {
+ default:
+ str = $"nncase_mt->{arguments[0].Type}_{nameof(Unary).ToLower(CultureInfo.CurrentCulture)}_{op.UnaryOp.ToString().ToLower(CultureInfo.CurrentCulture)}{input}";
+ break;
+ }
+
+ return str;
+ }
+
+ internal static string ContertCompare(Compare op, CSymbol[] arguments)
+ {
+ var lhs = arguments[Compare.Lhs.Index].Name;
+ var rhs = arguments[Compare.Rhs.Index].Name;
+ string str = $"({lhs} {op.CompareOp.ToC()} {rhs})";
+ return str;
+ }
+
+ internal static string ContertSelect(Select s, CSymbol[] arguments)
+ {
+ var p = arguments[Select.Predicate.Index].Name;
+ var lhs = arguments[Select.TrueValue.Index].Name;
+ var rhs = arguments[Select.FalseValue.Index].Name;
+ string str = $"({p} ? {lhs} : {rhs})";
+ return str;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/DeviceCSourceConvertVisitor.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/DeviceCSourceConvertVisitor.cs
new file mode 100644
index 0000000000..bbf91a3810
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/DeviceCSourceConvertVisitor.cs
@@ -0,0 +1,390 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+#define MULTI_CORE_XPU
+
+// #define DEBUG_PRINT
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Reactive;
+using System.Runtime.InteropServices;
+using System.Text;
+using DryIoc;
+using Google.OrTools.Sat;
+using NetFabric.Hyperlinq;
+using Nncase.IR;
+using Nncase.Runtime;
+using Nncase.TIR;
+using Nncase.Utilities;
+using Razor.Templating.Core;
+
+namespace Nncase.CodeGen.CPU;
+
+internal sealed class DeviceCSourceConvertVisitor : ExprFunctor
+{
+ private readonly Dictionary _exprMemo;
+ private readonly StringBuilder _deviceBuilder;
+
+ public DeviceCSourceConvertVisitor()
+ {
+ _exprMemo = new(ReferenceEqualityComparer.Instance);
+ _deviceBuilder = new();
+ }
+
+ public PrimFunction VisitEntry => (TIR.PrimFunction)VisitRoot!;
+
+ public string GetHeader()
+ {
+ return _deviceBuilder.ToString();
+ }
+
+ ///
+ protected override CSymbol VisitPrimFunction(PrimFunction expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ if (expr.CheckedType is not CallableType { ReturnType: TupleType r } || r != TupleType.Void)
+ {
+ throw new NotSupportedException("The PrimFunction must return void!");
+ }
+
+ var ctype = $"template<{string.Join(", ", Enumerable.Range(0, expr.Parameters.Length).Select(x => $"class T{x}"))}>" +
+ $"void {expr.Name}({string.Join(", ", expr.Parameters.AsValueEnumerable().Select(Visit).Select((s, i) => $"T{i} &&{s.Name}").ToArray())})";
+
+ using (var scope = new IndentScope(_deviceBuilder))
+ {
+ // 1. Function signature
+ IndentScope.Writer.IndWrite($"{ctype} {{\n");
+
+ // 2. Function body
+ using (_ = new IndentScope())
+ {
+ Visit(expr.Body);
+ }
+
+ // 3. Function closing
+ IndentScope.Writer.IndWrite("}\n");
+ }
+
+ symbol = new(ctype, expr.Name);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ protected override CSymbol VisitIfThenElse(IfThenElse expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ var cond = Visit(expr.Condition);
+ IndentScope.Writer.IndWrite($"if ({cond.Name}) {{\n");
+ using (_ = new IndentScope())
+ {
+ Visit(expr.Then);
+ }
+
+ IndentScope.Writer.IndWrite("}\n");
+ IndentScope.Writer.IndWrite("else {\n");
+ using (_ = new IndentScope())
+ {
+ Visit(expr.Else);
+ }
+
+ IndentScope.Writer.IndWrite("}\n");
+
+ symbol = new(string.Empty, string.Empty);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ protected override CSymbol VisitLet(Let expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ var @var = Visit(expr.Var);
+ var value = Visit(expr.Expression);
+
+#if DEBUG_PRINT
+ IndentScope.Writer.IndWrite($"runtime_util->printf(\"let {@var.Name}\\n\");\n");
+#endif
+ IndentScope.Writer.IndWrite($"{value.Type} {@var.Name} = {value.Name};\n");
+ Visit(expr.Body);
+
+ symbol = new(string.Empty, string.Empty);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ ///
+ protected override CSymbol VisitMemSpan(MemSpan expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ var start = Visit(expr.Start);
+ var size = Visit(expr.Size);
+ string name = expr.Location switch
+ {
+ MemoryLocation.L2Data => start.Name,
+ MemoryLocation.Input or MemoryLocation.Output => start.Name,
+ _ => throw new NotSupportedException(expr.Location.ToString()),
+ };
+
+ symbol = new(start.Type, $"std::span({name}, {size.Name})");
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ protected override CSymbol VisitBuffer(TIR.Buffer expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ var type = $"tensor_view<{expr.ElemType.ToC()}, {KernelUtility.DimensionsToC(expr.Dimensions)}, {KernelUtility.StridesToC(expr.Strides)}> ";
+
+ symbol = new(type, expr.Name);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ protected override CSymbol VisitCall(Call expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ string type = expr.CheckedType switch
+ {
+ TupleType x when x == TupleType.Void => string.Empty,
+ TensorType { IsScalar: true } x => x.DType.ToC(),
+ TensorType { Shape: { IsRanked: true } } x => x.Shape.IsFixed switch
+ {
+ true => $"tensor_view<{x.DType.ToC()}, fixed_shape<{x.Shape.ToString()[1..^1]}>>",
+ false => $"tensor_view<{x.DType.ToC()}, ranked_shape<{x.Shape.Rank}>>",
+ },
+ _ => throw new NotSupportedException(),
+ };
+
+ string str = string.Empty;
+ var arguments = expr.Arguments.AsValueEnumerable().Select(Visit).ToArray();
+ switch (expr.Target)
+ {
+ case PrimFunction deviceFunc:
+ IndentScope.Writer.IndWrite($"{deviceFunc.Name}({string.Join(",", arguments.Select(arg => arg.Name))});\n");
+ break;
+ case IR.Math.Binary op:
+ str = CSourceUtilities.ContertBinary(op, arguments);
+ break;
+ case IR.Math.Unary op:
+ str = CSourceUtilities.ContertUnary(op, arguments);
+ break;
+ case IR.Math.Compare op:
+ str = CSourceUtilities.ContertCompare(op, arguments);
+ break;
+ case IR.Math.Select op:
+ str = CSourceUtilities.ContertSelect(op, arguments);
+ break;
+ case TIR.CPU.SramPtr op:
+ str = $"g_cpu_mt->sram_address(bid, tid) + {arguments[0].Name}";
+ break;
+ case TIR.Load op:
+ str = $"{arguments[0].Name}[{arguments[1].Name}]";
+ break;
+ case TIR.Store op:
+#if DEBUG_PRINT
+ IndentScope.Writer.IndWrite($"runtime_util->printf(\"{arguments[0].Name}[%d]\\n\", {arguments[1].Name});\n");
+#endif
+ IndentScope.Writer.IndWrite($"{arguments[0].Name}[{arguments[1].Name}] = {arguments[2].Name};\n");
+ break;
+ case TIR.CPU.PtrOf op:
+ str = op.PtrName + ".data()";
+ break;
+ case IR.Buffers.Allocate op:
+ str = $"({type})runtime_util->malloc({arguments[0].Name})";
+ break;
+ case IR.Buffers.AllocateBufferView op:
+ {
+ var buffer = (TIR.Buffer)expr.Arguments[0];
+ if (buffer.CheckedShape.IsFixed)
+ {
+ str = $"{{span_cast<{buffer.ElemType.ToC()}>({Visit(buffer.MemSpan).Name}), {KernelUtility.DimensionsToC(buffer.Dimensions)}{{}}, {KernelUtility.StridesToC(buffer.Strides)}{{}}}}";
+ }
+ else
+ {
+ str = $"{{span_cast<{buffer.ElemType.ToC()}>({Visit(buffer.MemSpan).Name}), make_ranked_shape({StringUtility.Join(", ", buffer.Dimensions.AsValueEnumerable().Select(x => Visit(x).Name))})}}";
+ }
+ }
+
+ break;
+ case IR.Tensors.Cast op:
+ str = $"(({op.NewType.ToC()}){arguments[0].Name})";
+ break;
+ case TIR.CPU.Memcopy op:
+ IndentScope.Writer.IndWrite($"tensor_copy({arguments[1].Name}, {arguments[0].Name});\n");
+ break;
+ case TIR.CPU.Unary op:
+ IndentScope.Writer.IndWrite(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/Unary.cshtml", new UnaryKernelTemplateModel
+ {
+ Arguments = arguments.Select(x => new KernelArgument { Symbol = x }).ToArray(),
+ UnaryOp = op.UnaryOp,
+ }).Result);
+ break;
+ case TIR.CPU.Binary op:
+ IndentScope.Writer.IndWrite(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/Binary.cshtml", new BinaryKernelTemplateModel
+ {
+ Arguments = arguments.Select(x => new KernelArgument { Symbol = x }).ToArray(),
+ BinaryOp = op.BinaryOp,
+ }).Result);
+ break;
+ default:
+ throw new NotSupportedException();
+ }
+
+ symbol = new(type, str);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ ///
+ protected override CSymbol VisitConst(Const expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ string type;
+ string str;
+ if (expr is TensorConst { Value: Tensor { ElementType: PrimType ptype, Shape: { IsScalar: true } } scalar })
+ {
+ str = scalar[0].ToString() switch
+ {
+ "True" => "1",
+ "False" => "0",
+ null => string.Empty,
+ var x => x,
+ };
+
+ type = ptype.ToC();
+ }
+ else if (expr is TensorConst { Value: Tensor { ElementType: PointerType { ElemType: PrimType }, Shape: { IsScalar: true } } pointer })
+ {
+ str = pointer.ToScalar().ToString();
+ type = pointer.ElementType.ToC();
+ }
+ else
+ {
+ throw new NotSupportedException();
+ }
+
+ symbol = new(type, str);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ ///
+ protected override CSymbol VisitSequential(Sequential expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ foreach (var field in expr.Fields)
+ {
+ Visit(field);
+ }
+
+ symbol = new(string.Empty, string.Empty);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ protected override CSymbol VisitFor(For expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ // 1. For Loop signature
+ var loopVar = Visit(expr.LoopVar);
+ IndentScope.Writer.IndWrite($"for ({loopVar.Type} {loopVar.Name} = {Visit(expr.Domain.Start).Name}; {loopVar.Name} < {Visit(expr.Domain.Stop).Name}; {loopVar.Name} += {Visit(expr.Domain.Step).Name}) {{\n");
+#if DEBUG_PRINT
+ IndentScope.Writer.IndWrite($"runtime_util->printf(\"{loopVar.Name} = %d\\n\", {loopVar.Name});\n");
+#endif
+
+ using (_ = new IndentScope())
+ {
+ // 2. For Body
+ Visit(expr.Body);
+ }
+
+ // 3. For closing
+ IndentScope.Writer.IndWrite("}\n");
+
+ symbol = new(string.Empty, string.Empty);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ protected override CSymbol VisitVar(Var expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ symbol = new(
+ expr.CheckedType switch
+ {
+ TensorType t => t.DType.ToC(),
+ _ => throw new ArgumentOutOfRangeException(nameof(expr)),
+ },
+ expr.Name + expr.GlobalVarIndex.ToString());
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ protected override CSymbol VisitBufferRegion(BufferRegion expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ var buffer = Visit(expr.Buffer);
+ if (expr.Region.AsValueEnumerable().All(r => r is { Start: TensorConst, Stop: TensorConst, Step: TensorConst step } && step.Value.ToScalar() == 1))
+ {
+ var begins = $"{StringUtility.Join(", ", expr.Region.AsValueEnumerable().Select(x => Visit(x.Start).Name))}";
+ var extents = $"{StringUtility.Join(", ", expr.Region.AsValueEnumerable().Select(x => Visit(x.Stop).Name))}";
+ symbol = new(string.Empty, $"{buffer.Name}.view(fixed_shape<{begins}>{{}}, fixed_shape<{extents}>{{}})");
+ _exprMemo.Add(expr, symbol);
+ }
+ else
+ {
+ var begins = $"{StringUtility.Join(", ", expr.Region.AsValueEnumerable().Select(x => Visit(x.Start).Name))}";
+ var extents = $"{StringUtility.Join(", ", expr.Region.AsValueEnumerable().Select(x => Visit(x.Stop - x.Start).Name))}";
+ symbol = new(string.Empty, $"{buffer.Name}.view(make_ranked_shape({begins}), make_ranked_shape({extents}))");
+ _exprMemo.Add(expr, symbol);
+ }
+
+ return symbol;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/FunctionBuilder.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/FunctionBuilder.cs
new file mode 100644
index 0000000000..f1625b40b4
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/FunctionBuilder.cs
@@ -0,0 +1,86 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading.Tasks;
+using NetFabric.Hyperlinq;
+using Nncase.CodeGen.CPU;
+using Nncase.IR;
+
+namespace Nncase.CodeGen.CPU;
+
+///
+/// StackVM function builder.
+///
+internal class FunctionBuilder
+{
+ public const string KernelHeaderSectionName = ".desc";
+ private readonly uint _id;
+ private readonly SectionManager _sectionManager;
+ private readonly BinaryWriter _textWriter;
+ private readonly BinaryWriter _rdataWriter;
+
+ public FunctionBuilder(uint id, BinaryWriter rdataWriter)
+ {
+ _id = id;
+ _sectionManager = new();
+ _textWriter = _sectionManager.GetWriter(WellknownSectionNames.Text);
+ _rdataWriter = rdataWriter;
+ }
+
+ public unsafe ILinkableFunction Build(TIR.PrimFunction function)
+ {
+ if (function.Name.EndsWith("kernel"))
+ {
+ // 1. convert func to csource
+ var visitor = new KernelCSourceConvertVisitor();
+ visitor.Visit(function);
+ var functionCSource = visitor.GetCSource();
+
+ // 2. write the kernel header
+ using (var writer = _sectionManager.GetWriter(KernelHeaderSectionName))
+ {
+ var header = default(DescHeader);
+ header.DataPoolSize = function.SchedResult.DataUsage;
+ header.DataAlign = function.SchedResult.DataAlign;
+ writer.Write(ref header);
+ }
+
+ // 3. write the rdata
+ foreach (var (@const, range) in function.SchedResult.Rdatas)
+ {
+ var bytes = ((TensorConst)@const).Value.BytesBuffer;
+ var size = range.Max - range.Min;
+ if ((uint)bytes.Length != size)
+ {
+ throw new InvalidDataException("The Buffer Size Not Equal!");
+ }
+
+ _rdataWriter.Position(range.Min);
+ _rdataWriter.Write(bytes);
+ }
+
+ return new LinkableKernelFunction(_id, function, functionCSource, _sectionManager.GetContent(WellknownSectionNames.Text)!, new LinkedSection(_sectionManager.GetContent(KernelHeaderSectionName), KernelHeaderSectionName, 0, 8, (uint)sizeof(DescHeader)));
+ }
+ else if (function.Name.EndsWith("device"))
+ {
+ var visitor = new DeviceCSourceConvertVisitor();
+ visitor.Visit(function);
+ var header = visitor.GetHeader();
+
+ return new LinkableDeviceFunction(_id, function, header, _sectionManager.GetContent(WellknownSectionNames.Text)!);
+ }
+
+ throw new NotSupportedException("the function name is invalid");
+ }
+
+ [StructLayout(LayoutKind.Sequential)]
+ private unsafe struct DescHeader
+ {
+ [MarshalAs(UnmanagedType.U8)]
+ public ulong DataPoolSize;
+
+ [MarshalAs(UnmanagedType.U8)]
+ public ulong DataAlign;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/FunctionCSource.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/FunctionCSource.cs
new file mode 100644
index 0000000000..396d1fb986
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/FunctionCSource.cs
@@ -0,0 +1,20 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+#define MULTI_CORE_XPU
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Runtime.InteropServices;
+using System.Text;
+using Nncase.IR;
+using Nncase.Schedule;
+using Nncase.TIR;
+
+namespace Nncase.CodeGen;
+
+internal sealed record KernelCSource(string Main, string Kernel)
+{
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/KernelCSourceConvertVisitor.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/KernelCSourceConvertVisitor.cs
new file mode 100644
index 0000000000..ce8d1ad47d
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/KernelCSourceConvertVisitor.cs
@@ -0,0 +1,584 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+#define MULTI_CORE_CPU
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Reactive;
+using System.Runtime.InteropServices;
+using System.Text;
+using DryIoc.ImTools;
+using NetFabric.Hyperlinq;
+using Nncase.CodeGen.CPU;
+using Nncase.IR;
+using Nncase.Runtime;
+using Nncase.TIR;
+using Razor.Templating.Core;
+
+namespace Nncase.CodeGen.CPU;
+
+internal struct IndentScope : IDisposable
+{
+ private static readonly AsyncLocal _writer = new AsyncLocal();
+
+ private readonly bool _initialized;
+
+ private readonly IndentWriter? _originalWriter;
+
+ public IndentScope(StringBuilder sb)
+ {
+ _initialized = true;
+ _originalWriter = _writer.Value;
+ _writer.Value = new IndentWriter(sb);
+ }
+
+ public IndentScope()
+ {
+ _initialized = true;
+ if (_writer.Value is null)
+ {
+ return;
+ }
+
+ _originalWriter = _writer.Value;
+ _writer.Value = new(_originalWriter.GetStringBuilder(), _originalWriter.Indent + 2);
+ }
+
+ public static IndentWriter Writer => _writer.Value!;
+
+ public void Dispose()
+ {
+ if (_initialized)
+ {
+ _writer.Value = _originalWriter;
+ }
+ }
+}
+
+///
+/// the c symbol define.
+///
+public sealed class CSymbol
+{
+ public CSymbol(string type, string name)
+ {
+ Type = type;
+ Name = name;
+ }
+
+ public static IReadOnlyList Builtns => new CSymbol[] {
+ new CSymbol("nncase_mt_t*", "nncase_mt"),
+ new CSymbol("uint8_t*", "data"),
+ new CSymbol("const uint8_t*", "rdata"),
+ };
+
+ public string Type { get; }
+
+ public string Name { get; }
+
+ public override string ToString() => $"{Type} {Name}";
+}
+
+internal sealed class IndentWriter : StringWriter
+{
+ public IndentWriter(StringBuilder sb, int indent = 0)
+ : base(sb)
+ {
+ Indent = indent;
+ }
+
+ public int Indent { get; set; }
+
+ public void IndWrite(string? value)
+ {
+ for (int i = 0; i < Indent; i++)
+ {
+ Write(' ');
+ }
+
+ Write(value);
+ }
+}
+
+///
+/// convert single prim function to c source.
+///
+internal sealed class KernelCSourceConvertVisitor : ExprFunctor, IDisposable
+{
+ private readonly Dictionary _exprMemo;
+ private readonly StringBuilder _kernelBuilder;
+
+ private readonly StringBuilder _sharedBuilder;
+ private readonly HashSet _refFuncs;
+ private readonly StringWriter _sharedWriter;
+
+ public KernelCSourceConvertVisitor()
+ {
+ _kernelBuilder = new StringBuilder();
+ _sharedBuilder = new StringBuilder();
+ _sharedWriter = new StringWriter(_sharedBuilder);
+ _exprMemo = new(ReferenceEqualityComparer.Instance);
+ _refFuncs = new(ReferenceEqualityComparer.Instance);
+ }
+
+ public PrimFunction VisitEntry => (TIR.PrimFunction)VisitRoot!;
+
+ public KernelCSource GetCSource()
+ {
+ var ctype = $"void {VisitEntry.Name}({string.Join(", ", VisitEntry.Parameters.AsValueEnumerable().Select(Visit).Select(s => $"{s.Type} {s.Name}").ToArray().Concat(_exprMemo.Keys.OfType().Where(b => b.MemSpan.Location == MemoryLocation.Rdata).Select(Visit).Select(s => $" {s.Type} {s.Name}").ToArray()))}, uint8_t* l1_data)";
+ return new(
+ CSourceBuiltn.MakeMain(VisitEntry, _exprMemo.Keys.OfType().Where(b => b.MemSpan.Location == MemoryLocation.Rdata)),
+ CSourceBuiltn.MakeKernel(ctype, _kernelBuilder.ToString()));
+ }
+
+ ///
+ public void Dispose()
+ {
+ _sharedWriter.Dispose();
+ }
+
+ protected override CSymbol VisitVar(Var expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ symbol = new(string.Empty, expr.Name);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ ///
+ protected override CSymbol VisitPrimFunction(PrimFunction expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ if (expr.CheckedType is not CallableType { ReturnType: TupleType r } || r != TupleType.Void)
+ {
+ throw new NotSupportedException("The PrimFunction must return void!");
+ }
+
+ var ctype = $"void {expr.Name}({string.Join(", ", expr.Parameters.AsValueEnumerable().Select(Visit).Select(s => $"{s.Type} {s.Name}").ToArray())})";
+
+ using (var scope = new IndentScope(_kernelBuilder))
+ {
+ // 1. Function signature
+ IndentScope.Writer.IndWrite($"{{\n");
+
+ // 2. Function body
+ using (_ = new IndentScope())
+ {
+ Visit(expr.Body);
+ }
+
+ // 3. Function closing
+ IndentScope.Writer.IndWrite("}\n");
+ }
+
+ symbol = new(ctype, expr.Name);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ ///
+ protected override CSymbol VisitMemSpan(MemSpan expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ var start = Visit(expr.Start);
+ _ = Visit(expr.Size);
+ string loc = (expr.Location, expr.Hierarchy) switch
+ {
+ (MemoryLocation.Rdata, 0) => "rdata",
+ (MemoryLocation.Data, 0) => "data",
+ (MemoryLocation.Data, 1) => "l1_data",
+ _ => throw new NotSupportedException(),
+ };
+ var ptype = (PointerType)expr.CheckedDataType;
+ var ptypeName = ptype.ElemType.ToC();
+ var spanSize = ((TensorConst)expr.Size).Value.ToScalar() / ptype.ElemType.SizeInBytes;
+ var name = $"std::span<{ptypeName}, {spanSize}> (reinterpret_cast<{ptypeName}*>({loc} + {start.Name}), {spanSize})";
+
+ symbol = new(start.Type, name);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ protected override CSymbol VisitBuffer(TIR.Buffer expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ var type = VisitEntry.Parameters.AsValueEnumerable().Contains(expr) || expr.MemSpan.Location == MemoryLocation.Rdata || expr.MemSpan.Start is TensorConst
+ ? $"tensor_view<{expr.ElemType.ToC()}, {KernelUtility.DimensionsToC(expr.Dimensions)}, {KernelUtility.StridesToC(expr.Strides)}> "
+ : $"tensor<{expr.ElemType.ToC()}, {KernelUtility.DimensionsToC(expr.Dimensions)}> ";
+
+ symbol = new(type, expr.Name);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ ///
+ protected override CSymbol VisitCall(Call expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ string type = expr.CheckedType switch
+ {
+ TupleType x when x == TupleType.Void => string.Empty,
+ TensorType { IsScalar: true } x => x.DType.ToC(),
+ _ => throw new NotSupportedException(),
+ };
+
+ string str = string.Empty;
+ if (expr.Target is TIR.CPU.CPUKernelOp xpuOp)
+ {
+ foreach (var item in expr.Arguments.ToArray().OfType())
+ {
+ DeclBuffer(item);
+ }
+
+ var args = expr.Arguments.ToArray().OfType().ToArray();
+ switch (xpuOp)
+ {
+ case TIR.CPU.Unary unary:
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/Unary.cshtml", new UnaryKernelTemplateModel
+ {
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ UnaryOp = unary.UnaryOp,
+ }).Result);
+ break;
+ case TIR.CPU.TensorLoad load:
+ if (args.Length == 1)
+ {
+ var fullShape = Enumerable.Repeat(1, args[0].Dimensions.Length).ToArray();
+ var splitAxisAndScale = load.NdSbp.Select((sbp, i) => sbp is SBPSplit s ? (s.Axis, load.Placement.Hierarchy[i]) : (0, 1)).ToArray();
+ foreach (var s in splitAxisAndScale)
+ {
+ fullShape[s.Item1] *= s.Item2;
+ }
+
+ foreach (var (dimS, axis) in args[0].Dimensions.ToArray().Select((e, axis) => (Visit(e).Name, axis)))
+ {
+ if (int.TryParse(dimS, out var div))
+ {
+ fullShape[axis] *= div;
+ }
+ else if (CSourceUtilities.TryGetDivRem(dimS, out div, out var rem))
+ {
+ fullShape[axis] = (fullShape[axis] - 1) * div;
+ fullShape[axis] += rem;
+ }
+ }
+
+ IndentScope.Writer.Write($"tensor_boxing_load({Visit(args[0]).Name}, {{{string.Join(',', fullShape)}}}, {args[0].Dimensions.ToArray().Select(e => Visit(e).Name).ToSlicing(load.NdSbp, load.Placement)[1..^1]}, ctx);\n");
+ }
+ else
+ {
+ IndentScope.Writer.Write($"tensor_copy({Visit(args[1]).Name}{args[0].Dimensions.ToArray().Select(e => Visit(e).Name).ToSlicing(load.NdSbp, load.Placement)}, {Visit(args[0]).Name});\n");
+ }
+
+ break;
+ case TIR.CPU.TensorStore store:
+ if (args.Length == 1)
+ {
+ var fullShape = Enumerable.Repeat(1, args[0].Dimensions.Length).ToArray();
+ var splitAxisAndScale = store.NdSbp.Select((sbp, i) => sbp is SBPSplit s ? (s.Axis, store.Placement.Hierarchy[i]) : (0, 1)).ToArray();
+ foreach (var s in splitAxisAndScale)
+ {
+ fullShape[s.Item1] *= s.Item2;
+ }
+
+ foreach (var (dimS, axis) in args[0].Dimensions.ToArray().Select((e, axis) => (Visit(e).Name, axis)))
+ {
+ if (int.TryParse(dimS, out var div))
+ {
+ fullShape[axis] *= div;
+ }
+ else if (CSourceUtilities.TryGetDivRem(dimS, out div, out var rem))
+ {
+ fullShape[axis] = (fullShape[axis] - 1) * div;
+ fullShape[axis] += rem;
+ }
+ }
+
+ IndentScope.Writer.Write($"tensor_boxing_store({Visit(args[0]).Name}, {{{string.Join(',', fullShape)}}}, {args[0].Dimensions.ToArray().Select(e => Visit(e).Name).ToSlicing(store.NdSbp, store.Placement)[1..^1]}, ctx);\n");
+ }
+ else
+ {
+ IndentScope.Writer.Write($"tensor_copy({Visit(args[0]).Name}, {Visit(args[1]).Name}{args[0].Dimensions.ToArray().Select(e => Visit(e).Name).ToSlicing(store.NdSbp, store.Placement)});\n");
+ }
+
+ break;
+ case TIR.CPU.Binary binary:
+ {
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/Binary.cshtml", new BinaryKernelTemplateModel
+ {
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ BinaryOp = binary.BinaryOp,
+ }).Result);
+ }
+
+ break;
+ case TIR.CPU.Pack pack:
+ {
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/Pack.cshtml", new TypedKernelTemplateModel(pack)
+ {
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ }).Result);
+ }
+
+ break;
+
+ case TIR.CPU.Unpack unpack:
+ {
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/Unpack.cshtml", new TypedKernelTemplateModel(unpack)
+ {
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ }).Result);
+ }
+
+ break;
+ case TIR.CPU.PackedLayerNorm packedLayerNorm:
+ {
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/PackedLayerNorm.cshtml", new TypedKernelTemplateModel(packedLayerNorm)
+ {
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ Args = args.ToArray(),
+ }).Result);
+ }
+
+ break;
+ case TIR.CPU.PackedSoftmax packedsoftmax:
+ {
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/PackedSoftMax.cshtml", new TypedKernelTemplateModel(packedsoftmax)
+ {
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ Args = args.ToArray(),
+ }).Result);
+ }
+
+ break;
+ case TIR.CPU.PackedBinary packedBinary:
+ {
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/Binary.cshtml", new BinaryKernelTemplateModel
+ {
+ BinaryOp = packedBinary.BinaryOp,
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ }).Result);
+ }
+
+ break;
+ case TIR.CPU.PackedMatMul packedMatmul:
+ {
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/PackedMatmul.cshtml", new TypedKernelTemplateModel(packedMatmul)
+ {
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ }).Result);
+ }
+
+ break;
+ case TIR.CPU.PackedTranspose transpose:
+ {
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/PackedTranspose.cshtml", new TypedKernelTemplateModel(transpose)
+ {
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ Args = args.ToArray(),
+ }).Result);
+ }
+
+ break;
+
+ case TIR.CPU.Memcopy copy:
+ IndentScope.Writer.Write($"tensor_copy({Visit(args[0]).Name}, {Visit(args[1]).Name});\n");
+ break;
+ case TIR.CPU.Gather gather:
+ IndentScope.Writer.Write($"gather<{gather.Axis}>({Visit(args[0]).Name}, {Visit(args[1]).Name}, {Visit(args[2]).Name});\n");
+ break;
+ case TIR.CPU.Reshape reshape:
+ {
+ IndentScope.Writer.Write(RazorTemplateEngine.RenderAsync("~/CodeGen/CPU/Templates/Kernels/Reshape.cshtml", new TypedKernelTemplateModel(reshape)
+ {
+ Arguments = args.Select(x => new KernelArgument { Symbol = Visit(x) }).ToArray(),
+ Args = args.ToArray(),
+ }).Result);
+ }
+
+ break;
+ case TIR.CPU.Matmul matmul:
+ IndentScope.Writer.Write($"matmul({Visit(args[0]).Name}, {Visit(args[1]).Name}, {Visit(args[2]).Name});\n");
+ break;
+ case TIR.CPU.Swish swish:
+ if (swish.Beta != 1.0f)
+ {
+ throw new NotSupportedException();
+ }
+
+ IndentScope.Writer.Write($"unary({Visit(args[0]).Name}, {Visit(args[1]).Name});\n");
+ break;
+ case TIR.CPU.Slice slice:
+ IndentScope.Writer.Write($"slice, fixed_shape<{string.Join(",", slice.Ends)}>, fixed_shape<{string.Join(",", slice.Axes)}>, fixed_shape<{string.Join(",", slice.Strides)}>>({Visit(args[0]).Name}, {Visit(args[1]).Name});\n");
+ break;
+ case TIR.CPU.Concat concat:
+ IndentScope.Writer.Write($"concat<{concat.Axis}>(std::make_tuple({string.Join(",", args.SkipLast(1).Select(Visit).Select(s => s.Name))}), {Visit(args[^1]).Name});\n");
+ break;
+ case TIR.CPU.Transpose transpose:
+ IndentScope.Writer.Write($"transpose>({Visit(args[0]).Name}, {Visit(args[1]).Name});\n");
+ break;
+ case TIR.CPU.Pad pad:
+ IndentScope.Writer.Write($"pad<{string.Join(",", pad.Paddings)}>({Visit(args[0]).Name}, {Visit(args[1]).Name}, {args[0].CheckedDataType.ToC()} {{ {pad.PadValue} }} );\n");
+ break;
+ default:
+ throw new NotSupportedException(xpuOp.ToString());
+ }
+ }
+ else if (expr.Target is PrimFunction deviceFunc)
+ {
+ foreach (var item in expr.Arguments.ToArray().OfType())
+ {
+ DeclBuffer(item);
+ }
+#if DEBUG_PRINT
+ IndentScope.Writer.IndWrite($"runtime_util->printf(\"call {deviceFunc.Name} bid %d tid %d\\n\", bid, tid);\n");
+#endif
+ var arguments = expr.Arguments.AsValueEnumerable().Select(Visit).ToArray();
+ _refFuncs.Add(deviceFunc);
+ IndentScope.Writer.IndWrite($"{deviceFunc.Name}({string.Join(",", arguments.Select(arg => arg.Name))});\n");
+ }
+ else
+ {
+ var arguments = expr.Arguments.AsValueEnumerable().Select(Visit).ToArray();
+ switch (expr.Target)
+ {
+ case IR.Math.Binary op:
+ str = CSourceUtilities.ContertBinary(op, arguments);
+ break;
+ case IR.Math.Unary op:
+ str = CSourceUtilities.ContertUnary(op, arguments);
+ break;
+ case IR.Math.Compare op:
+ str = CSourceUtilities.ContertCompare(op, arguments);
+ break;
+ case IR.Math.Select op:
+ str = CSourceUtilities.ContertSelect(op, arguments);
+ break;
+ case TIR.Load op:
+ str = $"{arguments[0].Name}[{arguments[1].Name}]";
+ break;
+ case TIR.Store op:
+ IndentScope.Writer.IndWrite($"{arguments[0].Name}[{arguments[1].Name}] = {arguments[1].Name};\n");
+ break;
+ case TIR.CPU.PtrOf op:
+ str = op.PtrName;
+ break;
+ default:
+ throw new NotSupportedException();
+ }
+ }
+
+ symbol = new(type, str);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ ///
+ protected override CSymbol VisitConst(Const expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ string type;
+ string str;
+ if (expr is TensorConst { Value: Tensor { ElementType: PrimType ptype, Shape: { IsScalar: true } } scalar })
+ {
+ str = scalar[0].ToString() switch
+ {
+ "True" => "1",
+ "False" => "0",
+ null => string.Empty,
+ var x => x,
+ };
+
+ type = ptype.ToC();
+ }
+ else if (expr is TensorConst { Value: Tensor { ElementType: PointerType { ElemType: DataType }, Shape: { IsScalar: true } } pointer })
+ {
+ str = pointer.ToScalar().ToString();
+ type = "uint8_t *";
+ }
+ else
+ {
+ throw new NotSupportedException();
+ }
+
+ symbol = new(type, str);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ ///
+ protected override CSymbol VisitSequential(Sequential expr)
+ {
+ if (_exprMemo.TryGetValue(expr, out var symbol))
+ {
+ return symbol;
+ }
+
+ foreach (var field in expr.Fields)
+ {
+ if (field is Call call)
+ {
+ IndentScope.Writer.IndWrite(Visit(call).Name);
+ }
+ else
+ {
+ Visit(field);
+ }
+ }
+
+ symbol = new(string.Empty, string.Empty);
+ _exprMemo.Add(expr, symbol);
+ return symbol;
+ }
+
+ private void DeclBuffer(TIR.Buffer buffer)
+ {
+ if (_exprMemo.ContainsKey(buffer))
+ {
+ return;
+ }
+
+ var symbol = Visit(buffer);
+
+ if (buffer.MemSpan.Location == MemoryLocation.Rdata)
+ {
+ return;
+ }
+
+ IndentScope.Writer.IndWrite($"{symbol.Type} {symbol.Name}");
+ if (buffer.MemSpan.Start is not None)
+ {
+ IndentScope.Writer.IndWrite($"({Visit(buffer.MemSpan).Name})");
+ }
+
+ IndentScope.Writer.Write($";\n");
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/KernelTemplateModel.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/KernelTemplateModel.cs
new file mode 100644
index 0000000000..cb84404374
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/KernelTemplateModel.cs
@@ -0,0 +1,43 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+
+namespace Nncase.CodeGen.CPU;
+
+public class KernelArgument
+{
+ public CSymbol Symbol { get; set; } = null!;
+}
+
+public class KernelTemplateModel
+{
+ public KernelArgument[] Arguments { get; set; } = null!;
+}
+
+public class UnaryKernelTemplateModel : KernelTemplateModel
+{
+ public UnaryOp UnaryOp { get; set; }
+}
+
+public class BinaryKernelTemplateModel : KernelTemplateModel
+{
+ public BinaryOp BinaryOp { get; set; }
+}
+
+public class TypedKernelTemplateModel : KernelTemplateModel
+ where T : IR.Op
+{
+ public TypedKernelTemplateModel(T target)
+ {
+ Target = target;
+ }
+
+ public T Target { get; }
+
+ public IR.Expr[] Args { get; set; } = Array.Empty();
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/KernelUtility.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/KernelUtility.cs
new file mode 100644
index 0000000000..9289e43f1c
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/KernelUtility.cs
@@ -0,0 +1,66 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.CommandLine;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Nncase.IR;
+
+namespace Nncase.CodeGen.CPU;
+
+public static class KernelUtility
+{
+ public static ulong GetLength(TIR.Buffer buffer)
+ {
+ // Scalar
+ if (buffer.Dimensions.Length == 0)
+ {
+ return 1;
+ }
+
+ ulong length = 1;
+ foreach (var dim in buffer.Dimensions)
+ {
+ length *= ((TensorConst)dim).Value.Cast()[0];
+ }
+
+ return length;
+ }
+
+ public static string DimensionsToC(ReadOnlySpan dimensions)
+ {
+ var sb = new StringBuilder("fixed_shape<");
+ for (int i = 0; i < dimensions.Length; i++)
+ {
+ var value = ((TensorConst)dimensions[i]).Value.Cast()[0];
+ sb.Append(value);
+ if (i != dimensions.Length - 1)
+ {
+ sb.Append(", ");
+ }
+ }
+
+ sb.Append('>');
+ return sb.ToString();
+ }
+
+ public static string StridesToC(ReadOnlySpan dimensions)
+ {
+ var sb = new StringBuilder("fixed_strides<");
+ for (int i = 0; i < dimensions.Length; i++)
+ {
+ var value = ((TensorConst)dimensions[i]).Value.Cast()[0];
+ sb.Append(value);
+ if (i != dimensions.Length - 1)
+ {
+ sb.Append(", ");
+ }
+ }
+
+ sb.Append('>');
+ return sb.ToString();
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/LinkableFunction.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/LinkableFunction.cs
new file mode 100644
index 0000000000..c0102dbff9
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/LinkableFunction.cs
@@ -0,0 +1,60 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System.Runtime.InteropServices;
+using Nncase.IR;
+
+namespace Nncase.CodeGen.CPU;
+internal sealed class LinkableKernelFunction : ILinkableFunction
+{
+ public LinkableKernelFunction(uint id, TIR.PrimFunction sourceFunction, KernelCSource funcCSource, Stream text, params ILinkedSection[] sections)
+ {
+ Id = id;
+ SourceFunction = sourceFunction;
+ PrimFunction = sourceFunction;
+ FunctionCSource = funcCSource;
+ Text = text;
+ Sections = sections;
+ }
+
+ public uint Id { get; }
+
+ public BaseFunction SourceFunction { get; }
+
+ public TIR.PrimFunction PrimFunction { get; }
+
+ public KernelCSource FunctionCSource { get; }
+
+ public Stream Text { get; }
+
+ public IEnumerable FunctionRefs => Enumerable.Empty();
+
+ public IReadOnlyList Sections { get; }
+}
+
+internal sealed class LinkableDeviceFunction : ILinkableFunction
+{
+ public LinkableDeviceFunction(uint id, TIR.PrimFunction sourceFunction, string header, Stream text)
+ {
+ Id = id;
+ SourceFunction = sourceFunction;
+ Header = header;
+ PrimFunction = sourceFunction;
+ Text = text;
+ Sections = Array.Empty();
+ }
+
+ public uint Id { get; }
+
+ public BaseFunction SourceFunction { get; }
+
+ public string Header { get; }
+
+ public TIR.PrimFunction PrimFunction { get; }
+
+ public Stream Text { get; }
+
+ public IEnumerable FunctionRefs => Enumerable.Empty();
+
+ public IReadOnlyList Sections { get; }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/LinkableModule.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/LinkableModule.cs
new file mode 100644
index 0000000000..5d0567970b
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/LinkableModule.cs
@@ -0,0 +1,110 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading.Tasks;
+using DryIoc.ImTools;
+using Nncase.CodeGen.CPU;
+using Nncase.Diagnostics;
+using Nncase.Runtime.StackVM;
+
+namespace Nncase.CodeGen.CPU;
+
+internal sealed class LinkableModule : ILinkableModule
+{
+ private readonly Stream _rdata;
+
+ private readonly IReadOnlyList _functions;
+ private readonly CompileOptions _options;
+
+ public LinkableModule(Stream rdata, IReadOnlyList functions, CompileOptions options)
+ {
+ _rdata = rdata;
+ _functions = functions;
+ _options = options;
+ }
+
+ public ILinkedModule Link(ILinkContext linkContext)
+ {
+ {
+ if (!Directory.Exists(_options.DumpDir))
+ {
+ Directory.CreateDirectory(_options.DumpDir);
+ }
+
+ using (var writer = new StreamWriter(File.Open(Path.Join(_options.DumpDir, "device.h"), FileMode.Create)))
+ {
+ writer.Write(CSourceBuiltn.KernelHeader);
+
+ foreach (var func in _functions.OfType())
+ {
+ writer.Write(func.Header);
+ }
+ }
+ }
+
+ foreach (var func in _functions.OfType())
+ {
+ var dumpPath = Path.Join(_options.DumpDir, func.PrimFunction.Name);
+ if (!Directory.Exists(dumpPath))
+ {
+ Directory.CreateDirectory(dumpPath);
+ }
+
+ using (var fs = File.Open(Path.Join(dumpPath, "main.cpp"), FileMode.Create))
+ {
+ using (var writer = new StreamWriter(fs))
+ {
+ writer.Write(func.FunctionCSource.Main);
+ }
+ }
+
+ using (var fs = File.Open(Path.Join(dumpPath, "kernel.h"), FileMode.Create))
+ {
+ using (var writer = new StreamWriter(fs))
+ {
+ writer.Write(func.FunctionCSource.Kernel);
+ }
+ }
+
+ using (var fs = File.Open(Path.Join(dumpPath, "CMakeLists.txt"), FileMode.Create))
+ {
+ using (var writer = new StreamWriter(fs))
+ {
+ writer.Write(CSourceBuiltn.CMakeDef(func.PrimFunction.Name));
+ }
+ }
+ }
+
+ var manager = new SectionManager();
+ var textWriter = manager.GetWriter(WellknownSectionNames.Text);
+ var linkedFunctions = new List();
+ int offset = 0;
+ foreach (var func in _functions.OfType())
+ {
+ var dumpPath = Path.Join(_options.DumpDir, func.PrimFunction.Name);
+ var elfPath = CompileCSource(dumpPath);
+
+ var func_text = File.ReadAllBytes(elfPath);
+ textWriter.Write(func_text);
+ linkedFunctions.Add(new LinkedFunction(func.Id, func.SourceFunction, (uint)offset, (uint)func_text.Length, func.Sections));
+ offset += func_text.Length;
+ }
+
+ return new LinkedModule(linkedFunctions, manager.GetContent(WellknownSectionNames.Text)!, _rdata);
+ }
+
+ private string CompileCSource(string sourcePath)
+ {
+ var compiler = new CSourceCompiler();
+ var binDir = RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
+ ? Path.Join(sourcePath, "build", "nncase_cpu_module.exe")
+ : Path.Join(sourcePath, "build", "nncase_cpu_module");
+ return compiler.Compile(sourcePath, binDir);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/LinkedModule.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/LinkedModule.cs
new file mode 100644
index 0000000000..a94e9a76f3
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/LinkedModule.cs
@@ -0,0 +1,32 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Nncase.Runtime.StackVM;
+
+namespace Nncase.CodeGen.CPU;
+
+internal sealed class LinkedModule : ILinkedModule
+{
+ public LinkedModule(IReadOnlyList functions, Stream text, Stream rdata)
+ {
+ Functions = functions;
+ Sections = new[]
+ {
+ new LinkedSection(text, WellknownSectionNames.Text, 0, 8, (ulong)text.Length),
+ new LinkedSection(rdata, WellknownSectionNames.Rdata, 0, 8, (ulong)rdata.Length),
+ };
+ }
+
+ public string ModuleKind => "cpu";
+
+ public uint Version => 0;
+
+ public IReadOnlyList Functions { get; }
+
+ public IReadOnlyList Sections { get; }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/ModuleBuilder.cs b/modules/Nncase.Modules.CPU/CodeGen/CPU/ModuleBuilder.cs
new file mode 100644
index 0000000000..ccbdb0d572
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/ModuleBuilder.cs
@@ -0,0 +1,38 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System.Text;
+using Nncase.Diagnostics;
+using Nncase.IR;
+
+namespace Nncase.CodeGen.CPU;
+
+///
+/// K230CoreModule builder.
+///
+public sealed class CPUModuleBuilder : IModuleBuilder
+{
+ private readonly SectionManager _sectionManager;
+ private readonly BinaryWriter _rdataWriter;
+
+ public CPUModuleBuilder(CompileOptions options)
+ {
+ _sectionManager = new();
+ _rdataWriter = _sectionManager.GetWriter(WellknownSectionNames.Rdata);
+ CompileOptions = options;
+ }
+
+ public CompileOptions CompileOptions { get; }
+
+ ///
+ public string ModuleKind => "cpu";
+
+ ///
+ public ILinkableModule Build(IReadOnlyList functions)
+ {
+ var linkableFunctions = functions.OfType().Select((f, i) => new FunctionBuilder((uint)i, _rdataWriter).Build(f)).ToArray();
+ _rdataWriter.Flush();
+
+ return new LinkableModule(_sectionManager.GetContent(WellknownSectionNames.Rdata)!, linkableFunctions, CompileOptions);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/CMakeLists.txt.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/CMakeLists.txt.cshtml
new file mode 100644
index 0000000000..7b48b304d5
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/CMakeLists.txt.cshtml
@@ -0,0 +1,28 @@
+# This file is generated by Nncase CPU module builder.
+
+cmake_minimum_required(VERSION 3.15)
+
+project(nncase_cpu_module)
+
+include(@Html.Raw(Model.CMakePath))
+
+add_executable(nncase_cpu_module main.cpp)
+target_compile_features(nncase_cpu_module PUBLIC cxx_std_20)
+target_link_libraries(nncase_cpu_module PRIVATE nncase_cpu_runtime)
+target_compile_definitions(nncase_cpu_module PUBLIC -DNNCASE_CPU_MODULE=1)
+
+if (MSVC)
+ set_target_properties(nncase_cpu_module PROPERTIES LINK_FLAGS /SUBSYSTEM:CONSOLE)
+ target_link_options(nncase_cpu_module PRIVATE /ENTRY:kernel_entry /NODEFAULTLIB)
+ target_link_libraries(nncase_cpu_module PRIVATE libvcruntime msvcrt)
+ set_property(TARGET nncase_cpu_module PROPERTY
+ MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>")
+else()
+ target_link_options(nncase_cpu_module PRIVATE -static)
+ if (APPLE)
+ target_link_options(nncase_cpu_module PRIVATE -e _kernel_entry -bundle -ld_classic -lc)
+ else()
+ target_link_options(nncase_cpu_module PRIVATE -e kernel_entry -nostdlib)
+ target_link_libraries(nncase_cpu_module PRIVATE gcc)
+ endif()
+endif()
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Binary.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Binary.cshtml
new file mode 100644
index 0000000000..bbd4779985
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Binary.cshtml
@@ -0,0 +1,17 @@
+@model Nncase.CodeGen.CPU.BinaryKernelTemplateModel
+@{
+ string BinaryToCFunction(BinaryOp op) =>
+ op switch
+ {
+ BinaryOp.Add => "ops::add",
+ BinaryOp.Sub => "ops::sub",
+ BinaryOp.Mul => "ops::mul",
+ BinaryOp.Div => "ops::div",
+ BinaryOp.Mod => "ops::mod",
+ BinaryOp.Min => "ops::min",
+ BinaryOp.Max => "ops::max",
+ BinaryOp.Pow => "ops::pow",
+ _ => throw new NotSupportedException($"Unsupported binary: {op}."),
+ };
+}
+binary<@BinaryToCFunction(Model.BinaryOp)>(@Html.Raw(Model.Arguments[0].Symbol.Name), @Html.Raw(Model.Arguments[1].Symbol.Name), @Html.Raw(Model.Arguments[2].Symbol.Name));
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Pack.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Pack.cshtml
new file mode 100644
index 0000000000..952534aaf2
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Pack.cshtml
@@ -0,0 +1,4 @@
+@model Nncase.CodeGen.CPU.TypedKernelTemplateModel
+@{
+}
+pack<@string.Join(",", Model.Target.Axes)>(@Html.Raw(Model.Arguments[0].Symbol.Name), @Html.Raw(Model.Arguments[1].Symbol.Name));
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedLayerNorm.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedLayerNorm.cshtml
new file mode 100644
index 0000000000..0ca328fb0b
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedLayerNorm.cshtml
@@ -0,0 +1,4 @@
+@model Nncase.CodeGen.CPU.TypedKernelTemplateModel
+@{
+}
+packed_layer_norm<@Model.Target.Axis>(@Html.Raw(Model.Arguments[0].Symbol.Name), @Html.Raw(Model.Arguments[1].Symbol.Name), @Html.Raw(Model.Arguments[2].Symbol.Name), @Html.Raw(Model.Arguments[3].Symbol.Name), @Html.Raw(Model.Args[0].CheckedTensorType.DType.ToC()) { @Model.Target.Epsilon }, @Model.Target.UseMean.ToString().ToLower(), fixed_shape<@string.Join(",", Model.Target.PackedAxes)>{}, fixed_shape<@string.Join(",", Model.Target.PadedNums)>{});
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedMatmul.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedMatmul.cshtml
new file mode 100644
index 0000000000..28f1af3bb9
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedMatmul.cshtml
@@ -0,0 +1,5 @@
+@model Nncase.CodeGen.CPU.TypedKernelTemplateModel
+@{
+}
+packed_matmul(@Html.Raw(Model.Arguments[0].Symbol.Name), @Html.Raw(Model.Arguments[1].Symbol.Name), @Html.Raw(Model.Arguments[2].Symbol.Name), fixed_shape<@string.Join(",", Model.Target.LhsPackedAxes)>{}, fixed_shape<@string.Join(",", Model.Target.LhsPadedNums)>{}, fixed_shape<@string.Join(",", Model.Target.RhsPackedAxes)>{}, fixed_shape<@string.Join(",", Model.Target.RhsPadedNums)>{});
+
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedSoftMax.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedSoftMax.cshtml
new file mode 100644
index 0000000000..015c5a10c5
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedSoftMax.cshtml
@@ -0,0 +1,5 @@
+@model Nncase.CodeGen.CPU.TypedKernelTemplateModel
+@{
+}
+packed_softmax<@Model.Target.Axis>(@Html.Raw(Model.Arguments[0].Symbol.Name), @Html.Raw(Model.Arguments[1].Symbol.Name), fixed_shape<@string.Join(",", Model.Target.PackedAxes)>{});
+
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedTranspose.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedTranspose.cshtml
new file mode 100644
index 0000000000..213d6c78b4
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/PackedTranspose.cshtml
@@ -0,0 +1,4 @@
+@model Nncase.CodeGen.CPU.TypedKernelTemplateModel
+@{
+}
+transpose>(@Html.Raw(Model.Arguments[0].Symbol.Name), @Html.Raw(Model.Arguments[1].Symbol.Name));
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Reshape.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Reshape.cshtml
new file mode 100644
index 0000000000..79d8d8a6bc
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Reshape.cshtml
@@ -0,0 +1,4 @@
+@model Nncase.CodeGen.CPU.TypedKernelTemplateModel
+@{
+}
+tensor_copy(@(Html.Raw(Model.Arguments[0].Symbol.Name)).reshape(fixed_shape<@string.Join(",", Model.Target.NewShape)>{}), @Html.Raw(Model.Arguments[1].Symbol.Name));
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Unary.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Unary.cshtml
new file mode 100644
index 0000000000..29b5f56f79
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Unary.cshtml
@@ -0,0 +1,29 @@
+@model Nncase.CodeGen.CPU.UnaryKernelTemplateModel
+@{
+ string UnaryToCFunction(UnaryOp op) =>
+ op switch
+ {
+ UnaryOp.Abs => "ops::abs",
+ UnaryOp.Acos => "ops::acos",
+ UnaryOp.Acosh => "ops::acosh",
+ UnaryOp.Asin => "ops::asin",
+ UnaryOp.Asinh => "ops::asinh",
+ UnaryOp.Ceil => "ops::ceil",
+ UnaryOp.Cos => "ops::cos",
+ UnaryOp.Cosh => "ops::cosh",
+ UnaryOp.Exp => "ops::exp",
+ UnaryOp.Floor => "ops::floor",
+ UnaryOp.Log => "ops::log",
+ UnaryOp.Neg => "ops::neg",
+ UnaryOp.Round => "ops::round",
+ UnaryOp.Rsqrt => "ops::rsqrt",
+ UnaryOp.Sign => "ops::sign",
+ UnaryOp.Sin => "ops::sin",
+ UnaryOp.Sinh => "ops::sinh",
+ UnaryOp.Sqrt => "ops::sqrt",
+ UnaryOp.Square => "ops::square",
+ UnaryOp.Tanh => "ops::tanh",
+ _ => throw new NotSupportedException($"Unsupported unary: {op}."),
+ };
+}
+unary<@UnaryToCFunction(Model.UnaryOp)>(@Html.Raw(Model.Arguments[0].Symbol.Name), @Html.Raw(Model.Arguments[1].Symbol.Name));
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Unpack.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Unpack.cshtml
new file mode 100644
index 0000000000..3154087509
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/Kernels/Unpack.cshtml
@@ -0,0 +1,4 @@
+@model Nncase.CodeGen.CPU.TypedKernelTemplateModel
+@{
+}
+unpack<@string.Join(",", Model.Target.Axes)>(@Html.Raw(Model.Arguments[0].Symbol.Name), @Html.Raw(Model.Arguments[1].Symbol.Name));
diff --git a/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/_ViewImports.cshtml b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/_ViewImports.cshtml
new file mode 100644
index 0000000000..ad79fd8715
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/CodeGen/CPU/Templates/_ViewImports.cshtml
@@ -0,0 +1,4 @@
+@using Nncase
+@using Nncase.CodeGen.CPU
+@using Nncase.TIR
+@*@addTagHelper *, Microsoft.AspNetCore.Mvc.TagHelpers*@
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/Boxing.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/Boxing.cs
new file mode 100644
index 0000000000..e88422dc16
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/Boxing.cs
@@ -0,0 +1,155 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+#pragma warning disable SA1010, SA1008
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+using Nncase.Utilities;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class BoxingEvaluator : ITypeInferencer, ICostEvaluator, IEvaluator
+{
+ private const int _burstLength = 256;
+
+ public IRType Visit(ITypeInferenceContext context, Boxing target)
+ {
+ return context.GetArgumentType(target, Boxing.Input) switch
+ {
+ InvalidType inv => inv,
+ _ => target.NewType,
+ };
+ }
+
+ public Cost Visit(ICostEvaluateContext context, Boxing target)
+ {
+ var inType = context.GetArgumentType(target, Boxing.Input);
+ var returnType = context.GetReturnType();
+ var cost = new Cost() { [CostFactorNames.MemoryLoad] = 0, [CostFactorNames.MemoryStore] = 0 };
+ switch (inType, returnType)
+ {
+ case (TensorType tensorType, DistributedType distTensorType):
+ cost = new Cost()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(tensorType),
+ [CostFactorNames.MemoryStore] = (UInt128)((float)CostUtility.GetMemoryAccess(distTensorType) / DistributedUtility.GetDividedTensorEfficiency(distTensorType, _burstLength)),
+ };
+ break;
+ case (DistributedType distTensorType, TensorType tensorType):
+ cost = new Cost()
+ {
+ [CostFactorNames.MemoryLoad] = (UInt128)((float)CostUtility.GetMemoryAccess(distTensorType) / DistributedUtility.GetDividedTensorEfficiency(distTensorType, _burstLength)),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(tensorType),
+ };
+ break;
+
+ case (DistributedType a, DistributedType b) when a.Placement == b.Placement && a.NdSBP != b.NdSBP:
+ {
+ var fullLoadStore = new Cost()
+ {
+ [CostFactorNames.MemoryStore] = (UInt128)((float)CostUtility.GetMemoryAccess(a) / DistributedUtility.GetDividedTensorEfficiency(a, _burstLength)),
+ [CostFactorNames.MemoryLoad] = (UInt128)((float)CostUtility.GetMemoryAccess(b) / DistributedUtility.GetDividedTensorEfficiency(b, _burstLength)),
+ };
+
+ float scatterPart = 1;
+ float gatherPart = 1;
+ for (int i = 0; i < a.Placement.Rank; i++)
+ {
+ switch (a.NdSBP[i], b.NdSBP[i])
+ {
+ case (SBPSplit { Axis: int ax }, SBP sbpout):
+ switch (sbpout)
+ {
+ case SBPSplit { Axis: int bx }:
+ if (ax != bx)
+ {
+ // when split different axis, need global load store.
+ return fullLoadStore;
+ }
+
+ break;
+ case SBPBroadCast:
+ scatterPart *= a.Placement.Hierarchy[i];
+ gatherPart *= a.Placement.Hierarchy[i];
+ break;
+ default:
+ throw new NotSupportedException("split to partial");
+ }
+
+ break;
+ case (SBPBroadCast, SBPBroadCast or SBPSplit):
+ // no cost.
+ cost += new Cost()
+ {
+ [CostFactorNames.CPUCycles] = 1,
+ };
+ break;
+ case (SBPPartialSum, SBP sbpout):
+ switch (sbpout)
+ {
+ case SBPPartialSum:
+ break;
+ case SBPBroadCast or SBPSplit:
+ gatherPart *= a.Placement.Hierarchy[i];
+ if (i == 0)
+ {
+ scatterPart *= a.Placement.Hierarchy[i];
+ }
+
+ break;
+ }
+
+ break;
+ default:
+ throw new NotSupportedException($"{a} to {b}");
+ }
+ }
+
+ if (gatherPart > 1f)
+ {
+ cost += new Cost()
+ {
+ [CostFactorNames.MemoryStore] = (UInt128)((gatherPart - 1) * (float)CostUtility.GetMemoryAccess(DistributedUtility.GetDividedTensorType(a)) / gatherPart),
+ };
+ }
+
+ if (scatterPart > 1f)
+ {
+ cost += new Cost()
+ {
+ [CostFactorNames.MemoryLoad] = (UInt128)((scatterPart - 1) * (float)CostUtility.GetMemoryAccess(DistributedUtility.GetDividedTensorType(b)) / scatterPart),
+ };
+ }
+ }
+
+ break;
+ case (DistributedType a, DistributedType b) when a.TensorType != b.TensorType && a.Placement == b.Placement:
+ cost = new Cost()
+ {
+ [CostFactorNames.MemoryStore] = (UInt128)((float)CostUtility.GetMemoryAccess(a) / DistributedUtility.GetDividedTensorEfficiency(a, _burstLength)),
+ [CostFactorNames.MemoryLoad] = (UInt128)((float)CostUtility.GetMemoryAccess(b) / DistributedUtility.GetDividedTensorEfficiency(b, _burstLength)),
+ };
+ break;
+ case (DistributedType a, DistributedType b) when a == b:
+ throw new InvalidOperationException($"the boxing inType == outType");
+ default:
+ throw new NotSupportedException($"{inType} {returnType}");
+ }
+
+ return cost;
+ }
+
+ public IValue Visit(IEvaluateContext context, Boxing target)
+ {
+ var input = context.GetArgumentValueAsTensor(target, Boxing.Input);
+ return target.NewType switch
+ {
+ TensorType t => Value.FromTensor(Tensor.FromBytes(input.ElementType, input.BytesBuffer.ToArray(), t.Shape)),
+ DistributedType d => Value.FromTensor(Tensor.FromBytes(input.ElementType, input.BytesBuffer.ToArray(), d.TensorType.Shape)),
+ _ => Value.FromTensor(input),
+ };
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/CPUKernelOp.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/CPUKernelOp.cs
new file mode 100644
index 0000000000..39e1f58601
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/CPUKernelOp.cs
@@ -0,0 +1,34 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Linq;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+///
+/// Evaluator for .
+///
+public class CPUKernelOpEvaluator : IEvaluator, ITypeInferencer, ICostEvaluator
+{
+ ///
+ public IValue Visit(IEvaluateContext context, CPUKernelOp target)
+ {
+ return CompilerServices.EvaluateOp(target.Target, context);
+ }
+
+ ///
+ public IRType Visit(ITypeInferenceContext context, CPUKernelOp target)
+ {
+ return CompilerServices.InferenceOp(target.Target, context, new());
+ }
+
+ ///
+ public Cost Visit(ICostEvaluateContext context, CPUKernelOp target)
+ {
+ return CompilerServices.EvaluateOpCost(target.Target, context);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/CPUModule.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/CPUModule.cs
new file mode 100644
index 0000000000..70c0fc141c
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/CPUModule.cs
@@ -0,0 +1,28 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using DryIoc;
+using Nncase.Hosting;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+///
+/// CPU module.
+///
+internal class CPUModule : IApplicationPart
+{
+ public void ConfigureServices(IRegistrator registrator)
+ {
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/Load.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/Load.cs
new file mode 100644
index 0000000000..cf0902ce46
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/Load.cs
@@ -0,0 +1,27 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class LoadEvaluator : ITypeInferencer, ICostEvaluator
+{
+ public IRType Visit(ITypeInferenceContext context, Load target)
+ {
+ return context.GetArgumentType(target, Load.Input);
+ }
+
+ public Cost Visit(ICostEvaluateContext context, Load target) => new Cost()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(context.GetArgumentType(target, Load.Input)),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(context.GetArgumentType(target, Load.Input)),
+ };
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/Pack.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/Pack.cs
new file mode 100644
index 0000000000..710a29cbc5
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/Pack.cs
@@ -0,0 +1,81 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+#pragma warning disable SA1010, SA1008
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+using Nncase.IR.Tensors;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class PackEvaluator : ITypeInferencer, ICostEvaluator, IEvaluator
+{
+ ///
+ public IValue Visit(IEvaluateContext context, Pack target)
+ {
+ var input = context.GetOrtArgumentValue(target, Pack.Input);
+ foreach (var (lanes, axis) in target.Lanes.Zip(target.Axes))
+ {
+ input = input.Pack(lanes, axis);
+ }
+
+ return Value.FromTensor(Tensor.FromBytes(new VectorType(input.DataType.ToDataType(), target.Lanes), input.BytesBuffer.ToArray(), input.Shape.ToArray().SkipLast(target.Lanes.Count).Select(i => (int)i).ToArray()));
+ }
+
+ ///
+ public IRType Visit(ITypeInferenceContext context, Pack target)
+ {
+ var input = context.CheckArgumentType(target, Pack.Input);
+
+ return input switch
+ {
+ DistributedType d => Visit(context, target, d),
+ TensorType t => Visit(context, target, t),
+ AnyType => AnyType.Default,
+ _ => new InvalidType(input.GetType().ToString()),
+ };
+ }
+
+ ///
+ public Cost Visit(ICostEvaluateContext context, Pack target)
+ {
+ var inputType = context.GetArgumentType(target, Pack.Input);
+ var outputType = context.GetReturnType();
+
+ return new()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(inputType),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(outputType),
+ };
+ }
+
+ public Metric Visit(IMetricEvaluateContext context, Pack target)
+ {
+ var returnType = context.GetReturnType();
+ return new()
+ {
+ [MetricFactorNames.OffChipMemoryTraffic] = CostUtility.GetMemoryAccess(returnType) * 2,
+ };
+ }
+
+ private IRType Visit(ITypeInferenceContext context, Pack target, TensorType input)
+ {
+ return TypeInference.PackType(input, target.Lanes, target.Axes);
+ }
+
+ private IRType Visit(ITypeInferenceContext context, Pack target, DistributedType input)
+ {
+ if (Visit(context, target, input.TensorType) is not TensorType tensorType)
+ {
+ throw new InvalidOperationException();
+ }
+
+ return new DistributedType(tensorType, input.NdSBP, input.Placement);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedBinary.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedBinary.cs
new file mode 100644
index 0000000000..8a1d5fa5aa
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedBinary.cs
@@ -0,0 +1,230 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+#pragma warning disable SA1008 // Opening parenthesis should be spaced correctly
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class PackedBinaryEvaluator : IEvaluator, ITypeInferencer, ICostEvaluator
+{
+ internal enum DimKind : int
+ {
+ E, // elemwise
+ B, // broadcast
+ }
+
+ public IValue Visit(IEvaluateContext context, PackedBinary target)
+ {
+ var a = context.GetOrtArgumentValue(target, PackedBinary.Lhs);
+ var b = context.GetOrtArgumentValue(target, PackedBinary.Rhs);
+ _ = System.Math.Max(target.LhsPackedAxes.Count, target.RhsPackedAxes.Count);
+
+ switch (target.LhsPackedAxes.Count, target.RhsPackedAxes.Count)
+ {
+ case (2, 1):
+ b = OrtKI.Unsqueeze(b, new long[] { -2 });
+ break;
+ case (1, 2):
+ a = OrtKI.Unsqueeze(a, new long[] { -2 });
+ break;
+ default:
+ break;
+ }
+
+ var binary = target.BinaryOp switch
+ {
+ BinaryOp.Add => a + b,
+ BinaryOp.Sub => a - b,
+ BinaryOp.Mul => a * b,
+ BinaryOp.Div => a / b,
+ _ => throw new ArgumentOutOfRangeException(target.BinaryOp.ToString()),
+ };
+
+ return Value.FromTensor(Tensor.FromBytes(context.CurrentCall.CheckedDataType, binary.BytesBuffer.ToArray(), context.CurrentCall.CheckedShape));
+ }
+
+ public IRType Visit(ITypeInferenceContext context, PackedBinary target)
+ {
+ var lhs = context.CheckArgumentType(target, PackedBinary.Lhs);
+ var rhs = context.CheckArgumentType(target, PackedBinary.Rhs);
+
+ return (lhs, rhs) switch
+ {
+ (DistributedType a, DistributedType b) => Visit(target, a, b),
+ (TensorType a, TensorType b) => Visit(target, a, b),
+ _ => new InvalidType("not support"),
+ };
+ }
+
+ public Cost Visit(ICostEvaluateContext context, PackedBinary target)
+ {
+ var lhs = context.GetArgumentType(target, PackedBinary.Lhs);
+ var rhs = context.GetArgumentType(target, PackedBinary.Rhs);
+ var outputType = context.GetReturnType();
+
+ uint macPerElement = 1;
+ if (lhs is TensorType { Shape: Shape lhsShape })
+ {
+ macPerElement = lhsShape[^1].IsFixed ? (uint)lhsShape[^1].FixedValue : 1U;
+ }
+ else if (lhs is DistributedType distributedType)
+ {
+ var lhsType = DistributedUtility.GetDividedTensorType(distributedType);
+ macPerElement = lhsType.Shape[^1].IsFixed ? (uint)lhsType.Shape[^1].FixedValue : 1U;
+ }
+
+ return new()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(lhs) + CostUtility.GetMemoryAccess(rhs),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(outputType),
+ [CostFactorNames.CPUCycles] = CostUtility.GetCPUCycles(outputType, macPerElement),
+ };
+ }
+
+ private IRType Visit(PackedBinary target, TensorType a, TensorType b)
+ {
+ var rank = System.Math.Max(a.Shape.Rank, b.Shape.Rank);
+ var outShape = new int[rank];
+ var lhsOrginShape = a.Shape.ToValueArray();
+ var rhsOrginShape = b.Shape.ToValueArray();
+ for (int i = 0; i < target.LhsPackedAxes.Count; i++)
+ {
+ lhsOrginShape[target.LhsPackedAxes[i]] = (lhsOrginShape[target.LhsPackedAxes[i]] * ((VectorType)a.DType).Lanes[i]) - target.LhsPadedNums[i];
+ }
+
+ for (int i = 0; i < target.RhsPackedAxes.Count; i++)
+ {
+ rhsOrginShape[target.RhsPackedAxes[i]] = (rhsOrginShape[target.RhsPackedAxes[i]] * ((VectorType)b.DType).Lanes[i]) - target.RhsPadedNums[i];
+ }
+
+ var orginKinds = new DimKind[rank];
+
+ for (int i = -1; i >= -rank; i--)
+ {
+ var aAxis = a.Shape.Rank + i;
+ var bAxis = b.Shape.Rank + i;
+ switch (aAxis, bAxis)
+ {
+ case ( < 0, _):
+ outShape[rank + i] = b.Shape[bAxis].FixedValue;
+ orginKinds[rank + i] = DimKind.B;
+ break;
+ case (_, < 0):
+ outShape[rank + i] = a.Shape[aAxis].FixedValue;
+ orginKinds[rank + i] = DimKind.B;
+ break;
+ case ( >= 0, >= 0):
+ switch (lhsOrginShape[aAxis], rhsOrginShape[bAxis])
+ {
+ case (int l, int r) when l == r:
+ outShape[rank + i] = a.Shape[aAxis].FixedValue;
+ orginKinds[rank + i] = DimKind.E;
+ break;
+ case (1, _):
+ outShape[rank + i] = b.Shape[bAxis].FixedValue;
+ orginKinds[rank + i] = DimKind.B;
+ break;
+ case (_, 1):
+ outShape[rank + i] = a.Shape[aAxis].FixedValue;
+ orginKinds[rank + i] = DimKind.B;
+ break;
+ default:
+ return new InvalidType("packed binary not support dim");
+ }
+
+ break;
+ default:
+ throw new NotSupportedException();
+ }
+ }
+
+ // second check the dtype.
+ DataType dataType;
+ switch (a.DType, b.DType)
+ {
+ case (VectorType va, VectorType vb):
+ {
+ var lanes = System.Math.Max(va.Lanes.Count, vb.Lanes.Count);
+ var valid = true;
+ for (int i = -1; i >= -lanes; --i)
+ {
+ var ai = va.Lanes.Count + i;
+ var bi = vb.Lanes.Count + i;
+ switch (ai, bi)
+ {
+ case ( < 0, _):
+ valid &= orginKinds[target.RhsPackedAxes[bi] - b.Shape.Rank + rank] == DimKind.B && rhsOrginShape[target.RhsPackedAxes[bi]] != 1;
+ break;
+ case (_, < 0):
+ valid &= orginKinds[target.LhsPackedAxes[ai] - a.Shape.Rank + rank] == DimKind.B && lhsOrginShape[target.LhsPackedAxes[ai]] != 1;
+ break;
+ case ( >= 0, >= 0):
+ var laxis = target.LhsPackedAxes[ai] - a.Shape.Rank + rank;
+ var raxis = target.RhsPackedAxes[bi] - b.Shape.Rank + rank;
+ valid &= lhsOrginShape[target.LhsPackedAxes[ai]] == rhsOrginShape[target.RhsPackedAxes[bi]] && laxis == raxis && orginKinds[laxis] == orginKinds[raxis] && orginKinds[raxis] == DimKind.E;
+ break;
+ }
+ }
+
+ if (valid)
+ {
+ dataType = va.Lanes.Count >= vb.Lanes.Count ? va : vb;
+ }
+ else
+ {
+ return new InvalidType("can't pack on the broadcast axis!");
+ }
+ }
+
+ break;
+ case (VectorType va, PrimType pb):
+ if (va.ElemType != pb)
+ {
+ return new InvalidType("Shape Can't Broadcast");
+ }
+
+ dataType = va;
+ break;
+ case (PrimType pa, VectorType vb):
+ if (vb.ElemType != pa)
+ {
+ return new InvalidType("Shape Can't Broadcast");
+ }
+
+ dataType = vb;
+ break;
+ default:
+ return new InvalidType("Shape Can't Broadcast");
+ }
+
+ return new TensorType(dataType, outShape);
+ }
+
+ private IRType Visit(PackedBinary target, DistributedType a, DistributedType b)
+ {
+ if (a.Placement != b.Placement)
+ {
+ return new InvalidType("lhs rhs have different placement");
+ }
+
+ var rType = Visit(target, a.TensorType, b.TensorType);
+ if (rType is not TensorType tensorType)
+ {
+ return rType;
+ }
+
+ return Math.BinaryEvaluator.CheckSBP(target.BinaryOp, tensorType, a, b);
+ }
+}
+#pragma warning restore SA1008 // Opening parenthesis should be spaced correctly
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedLayerNorm.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedLayerNorm.cs
new file mode 100644
index 0000000000..5d2397daee
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedLayerNorm.cs
@@ -0,0 +1,206 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Linq;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class PackedLayerNormEvaluator : IEvaluator, ITypeInferencer, ICostEvaluator,
+ IShapeEvaluator, IMetricEvaluator
+{
+ ///
+ public IValue Visit(IEvaluateContext context, PackedLayerNorm target)
+ {
+ var input = context.GetOrtArgumentValue(target, PackedLayerNorm.Input);
+ var scale = context.GetOrtArgumentValue(target, PackedLayerNorm.Scale);
+ var bias = context.GetOrtArgumentValue(target, PackedLayerNorm.Bias);
+ var lanes = input.Shape.TakeLast(target.PackedAxes.Count).Select(i => (int)i).ToArray();
+ var unpackedInput = UnpackTensor(input, target.PackedAxes, target.PadedNums);
+ var packAxes = target.PackedAxes.Where(axis => axis >= target.Axis).Select(axis => axis - target.Axis).ToArray();
+ var padedNums = target.PadedNums.Skip(target.PackedAxes.Count - packAxes.Length).ToArray();
+ var unpackedScale = UnpackTensor(scale, packAxes, padedNums);
+ var unpackedBias = UnpackTensor(bias, packAxes, padedNums);
+
+ var shape = unpackedInput.Shape.Select(i => (int)i).ToArray();
+ var inputBuffer = unpackedInput.BytesBuffer.ToArray();
+ var inputSpan = MemoryMarshal.Cast(inputBuffer);
+ var scaleBuffer = unpackedScale.BytesBuffer.ToArray();
+ var scaleSpan = MemoryMarshal.Cast(scaleBuffer);
+ var biasBuffer = unpackedBias.BytesBuffer.ToArray();
+ var biasSpan = MemoryMarshal.Cast(biasBuffer);
+
+ var output = NN.LayerNormEvaluator.LayerNormImpl(shape, inputSpan, scaleSpan, biasSpan, target.Axis, target.Epsilon, target.UseMean);
+ var outputTensor = OrtKISharp.Tensor.MakeTensor(new Memory(output), OrtDataType.Float, unpackedInput.Shape);
+ outputTensor = RepackTensor(outputTensor, lanes, target.PackedAxes, target.PadedNums);
+
+ return Value.FromTensor(Tensor.FromBytes(new VectorType(DataTypes.Float32, lanes), outputTensor.BytesBuffer.ToArray(), outputTensor.Shape.SkipLast(target.PackedAxes.Count).Select(i => (int)i).ToArray()));
+ }
+
+ ///
+ public IRType Visit(ITypeInferenceContext context, PackedLayerNorm target)
+ {
+ var input = context.CheckArgumentType(target, PackedLayerNorm.Input);
+ var scale = context.CheckArgumentType(target, PackedLayerNorm.Scale);
+ var bias = context.CheckArgumentType(target, PackedLayerNorm.Bias);
+
+ return (input, scale, bias) switch
+ {
+ (DistributedType a, DistributedType b, DistributedType c) => Visit(a, b, c, target.Axis),
+ (TensorType a, TensorType, TensorType) => Visit(a),
+ _ => new InvalidType(input.GetType().ToString()),
+ };
+ }
+
+ ///
+ public Cost Visit(ICostEvaluateContext context, PackedLayerNorm target)
+ {
+ var inputType = context.GetArgumentType(target, PackedLayerNorm.Input);
+ var returnType = context.GetReturnType();
+ switch (inputType, returnType)
+ {
+ case (TensorType, TensorType):
+ return new()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(inputType),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(returnType),
+ };
+
+ case (DistributedType inputDistributedType, DistributedType):
+ var scaleType = context.GetArgumentType(target, PackedLayerNorm.Scale);
+ var biasType = context.GetArgumentType(target, PackedLayerNorm.Bias);
+ var ring = GetRingReduceCommunicate(scaleType, new[] { 0, 1 }) + GetRingReduceCommunicate(biasType, new[] { 0, 1 });
+ var reCompute = inputDistributedType.NdSBP.Select((sbp, i) => sbp is SBPSplit ? 1 : inputDistributedType.Placement.Hierarchy[i]).ToArray().Aggregate(1, (acc, rep) => acc * rep);
+ return new()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(inputType) + ring,
+ [CostFactorNames.CPUCycles] = CostUtility.GetCPUCycles(inputType, 1) * (UInt128)reCompute,
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(returnType) + ring,
+ };
+ default:
+ throw new NotSupportedException();
+ }
+ }
+
+ public Metric Visit(IMetricEvaluateContext context, PackedLayerNorm target)
+ {
+ var inputType = context.GetArgumentType(target, PackedLayerNorm.Input);
+ var returnType = context.GetReturnType();
+
+ var r = MetricUtility.GetFLOPs(returnType);
+ var i = MetricUtility.GetFLOPs(inputType);
+ var outter = i / r;
+ var inner = i / outter;
+
+ return new()
+ {
+ [MetricFactorNames.OffChipMemoryTraffic] = CostUtility.GetMemoryAccess(inputType) + CostUtility.GetMemoryAccess(returnType),
+ [MetricFactorNames.FLOPs] = outter * ((inner * 7) + MetricUtility.SqrtFLOPs),
+ [MetricFactorNames.Parallel] = 4,
+ };
+ }
+
+ public Expr Visit(IShapeEvaluateContext context, PackedLayerNorm target) => context.GetArgumentShape(target, PackedLayerNorm.Input);
+
+ private static OrtKISharp.Tensor UnpackTensor(OrtKISharp.Tensor input, IRArray packedAxes, IRArray padNums)
+ {
+ OrtKISharp.Tensor unpacked = input;
+ foreach (var axis in packedAxes.Reverse())
+ {
+ unpacked = unpacked.Unpack(axis);
+ }
+
+ var shape = unpacked.Shape.ToArray();
+
+ OrtKISharp.Tensor sliced = unpacked;
+ if (padNums.Any(i => i > 0))
+ {
+ sliced = OrtKI.Slice(unpacked, Enumerable.Repeat(0L, padNums.Count).ToArray(), Enumerable.Range(0, padNums.Count).Select(i => shape[packedAxes[i]] - padNums[i]).ToArray(), packedAxes.Select(i => (long)i).ToArray(), Enumerable.Range(0, padNums.Count).Select(i => 1L).ToArray());
+ }
+
+ return sliced;
+ }
+
+ private static OrtKISharp.Tensor RepackTensor(OrtKISharp.Tensor input, IRArray lanes, IRArray packedAxes, IRArray padNums)
+ {
+ OrtKISharp.Tensor paded = input;
+ var shape = input.Shape;
+
+ if (padNums.Any(i => i > 0))
+ {
+ var pads = Enumerable.Repeat(0L, shape.Length * 2).ToArray();
+ for (int i = 0; i < packedAxes.Count; i++)
+ {
+ pads[shape.Length + packedAxes[i]] = padNums[i];
+ }
+
+ // bottom_0,bottom_1,..., top_0, top_1, ...
+ paded = OrtKI.Pad(paded, pads, 0f, "constant");
+ }
+
+ OrtKISharp.Tensor packed = paded;
+ foreach (var (lane, axis) in lanes.Zip(packedAxes))
+ {
+ packed = packed.Pack(lane, axis);
+ }
+
+ return packed;
+ }
+
+ private IRType Visit(TensorType input)
+ {
+ return input;
+ }
+
+ private IRType Visit(DistributedType input, DistributedType scale, DistributedType bias, int raxis)
+ {
+ var invalid = new InvalidType($"{input}, {scale}, {bias} not support");
+ if (input.Placement != scale.Placement || scale.Placement != bias.Placement)
+ {
+ return invalid;
+ }
+
+ var ndsbp = new SBP[input.Placement.Rank];
+
+ for (int i = 0; i < input.Placement.Rank; i++)
+ {
+ switch (input.NdSBP[i], scale.NdSBP[i], bias.NdSBP[i])
+ {
+ case (SBPSplit { Axis: int ix }, SBPSplit { Axis: int sx }, SBPSplit { Axis: int bx }) when ix >= raxis && sx == (ix - raxis) && bx == sx:
+ ndsbp[i] = SBP.S(ix);
+ break;
+ case (SBPSplit { Axis: int ix }, SBPBroadCast, SBPBroadCast) when ix < raxis:
+ ndsbp[i] = SBP.S(ix);
+ break;
+ case (SBPBroadCast, SBPBroadCast, SBPBroadCast):
+ ndsbp[i] = SBP.B;
+ break;
+ default:
+ return invalid;
+ }
+ }
+
+ return new DistributedType(input.TensorType, ndsbp, input.Placement);
+ }
+
+ private UInt128 GetRingReduceCommunicate(DistributedType distributedType, int[] axes)
+ {
+ var ttype = Utilities.DistributedUtility.GetDividedTensorType(distributedType);
+ var splits = axes.Where(i => i < distributedType.Placement.Rank && distributedType.NdSBP[i] is SBPSplit);
+ if (!splits.Any())
+ {
+ return 0;
+ }
+
+ var p = (UInt128)splits.Select(i => distributedType.Placement.Hierarchy[i]).Aggregate(1, (acc, i) => acc * i);
+ var v = CostUtility.GetMemoryAccess(distributedType.TensorType);
+ return (p - 1) * (v / p);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedMatMul.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedMatMul.cs
new file mode 100644
index 0000000000..e327b2b4fd
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedMatMul.cs
@@ -0,0 +1,146 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Linq;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class PackedMatMulEvaluator : IEvaluator, ITypeInferencer, ICostEvaluator
+{
+ public IValue Visit(IEvaluateContext context, PackedMatMul target)
+ {
+ var lhs = context.GetOrtArgumentValue(target, PackedMatMul.Lhs); // [x,m/32,k/32,m',k']
+ var rhs = context.GetOrtArgumentValue(target, PackedMatMul.Rhs); // [x,k/32,n/32,k',n']
+
+ var outLanes = target.LhsPackedAxes.Count == 1 ? Array.Empty() : new[] { (int)lhs.Shape[^2], (int)rhs.Shape[^1] };
+ var outshape = target.LhsPackedAxes.Count == 1 ? new[] { (int)lhs.Shape[^3], (int)rhs.Shape[^2] } : new[] { (int)lhs.Shape[^4], (int)rhs.Shape[^3] };
+ var maxRank = System.Math.Max(lhs.Shape.Length, rhs.Shape.Length);
+ outshape = Enumerable.Repeat(1L, maxRank - lhs.Shape.Length).Concat(lhs.Shape.SkipLast(2 + target.LhsPackedAxes.Count)).
+ Zip(Enumerable.Repeat(1L, maxRank - rhs.Shape.Length).Concat(rhs.Shape.SkipLast(2 + target.RhsPackedAxes.Count))).
+ Select(p => (int)System.Math.Max(p.First, p.Second)).
+ Concat(outshape).ToArray();
+
+ foreach (var axis in target.LhsPackedAxes.Reverse())
+ {
+ lhs = lhs.Unpack(axis);
+ }
+
+ foreach (var axis in target.RhsPackedAxes.Reverse())
+ {
+ rhs = rhs.Unpack(axis);
+ }
+
+ // lhs = OrtKI.Unsqueeze(lhs, new long[] { -4, -1 }); // [x,m/32,k/32, 1 , m' ,k', 1 ]
+ // rhs = OrtKI.Unsqueeze(rhs, new long[] { -6, -3 }); // [x, 1 ,k/32,n/32, 1 ,k', n']
+ // var matmul = OrtKI.Mul(lhs, rhs); // [x, m/32,k/32,n/32,m',k',n']
+ // matmul = OrtKI.ReduceSum(matmul, new long[] { -2, -5 }, 0, 1);
+ var matmul = OrtKI.MatMul(lhs, rhs);
+ if (target.LhsPackedAxes.Count == 2)
+ {
+ foreach (var (lane, axis) in outLanes.Zip(new[] { -2 + outshape.Length, -1 + outshape.Length }))
+ {
+ matmul = matmul.Pack(lane, axis);
+ }
+ }
+
+ return Value.FromTensor(Tensor.FromBytes(outLanes.Length == 0 ? DataTypes.Float32 : new VectorType(DataTypes.Float32, outLanes), matmul.BytesBuffer.ToArray(), outshape));
+ }
+
+ public IRType Visit(ITypeInferenceContext context, PackedMatMul target)
+ {
+ var lhs = context.CheckArgumentType(target, PackedMatMul.Lhs);
+ var rhs = context.CheckArgumentType(target, PackedMatMul.Rhs);
+
+ bool CheckPackAxes(Shape lhs, Shape rhs)
+ {
+ bool valid = true;
+ switch (target.LhsPackedAxes.Count, target.RhsPackedAxes.Count)
+ {
+ case (1, 1):
+ if (target.LhsPackedAxes[0] != lhs.Rank - 1 || target.RhsPackedAxes[0] != rhs.Rank - 2)
+ {
+ valid = false;
+ }
+
+ break;
+ case (2, 2):
+ if (target.LhsPackedAxes[0] != lhs.Rank - 2 || target.LhsPackedAxes[1] != lhs.Rank - 1)
+ {
+ valid = false;
+ }
+
+ if (target.RhsPackedAxes[0] != rhs.Rank - 2 || target.RhsPackedAxes[1] != rhs.Rank - 1)
+ {
+ valid = false;
+ }
+
+ break;
+ default:
+ valid = false;
+ break;
+ }
+
+ return valid;
+ }
+
+ IRType rType;
+ switch (lhs, rhs)
+ {
+ case (DistributedType a, DistributedType b):
+ if (!CheckPackAxes(a.TensorType.Shape, b.TensorType.Shape))
+ {
+ goto ERROR;
+ }
+
+ rType = Math.MatMulEvaluator.VisitDistributedType(a, b);
+
+ break;
+ case (TensorType a, TensorType b):
+ if (!CheckPackAxes(a.Shape, b.Shape))
+ {
+ goto ERROR;
+ }
+
+ rType = Math.MatMulEvaluator.VisitTensorType(a, b);
+ break;
+ default:
+ ERROR: rType = new InvalidType($"{lhs} {rhs} not support");
+ break;
+ }
+
+ return rType;
+ }
+
+ public Cost Visit(ICostEvaluateContext context, PackedMatMul target)
+ {
+ var lhs = context.GetArgumentType(target, PackedMatMul.Lhs);
+ var rhs = context.GetArgumentType(target, PackedMatMul.Rhs);
+ var outputType = context.GetReturnType();
+
+ uint macPerElement = 1;
+ if (lhs is TensorType { Shape: Shape lhsShape })
+ {
+ macPerElement = lhsShape[^1].IsFixed ? (uint)lhsShape[^1].FixedValue : 1U;
+ }
+ else if (lhs is DistributedType distributedType)
+ {
+ var lhsType = DistributedUtility.GetDividedTensorType(distributedType);
+ macPerElement = lhsType.Shape[^1].IsFixed ? (uint)lhsType.Shape[^1].FixedValue : 1U;
+ }
+
+ return new()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(lhs) + CostUtility.GetMemoryAccess(rhs),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(outputType),
+ [CostFactorNames.CPUCycles] = CostUtility.GetCPUCycles(outputType, macPerElement),
+ };
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedSoftMax.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedSoftMax.cs
new file mode 100644
index 0000000000..0171708cf1
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedSoftMax.cs
@@ -0,0 +1,85 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+using Nncase.IR.Tensors;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class PackedSoftMaxEvaluator : ITypeInferencer, ICostEvaluator, IEvaluator
+{
+ public IRType Visit(ITypeInferenceContext context, PackedSoftmax target)
+ {
+ var input = context.CheckArgumentType(target, PackedSoftmax.Input);
+
+ return input switch
+ {
+ DistributedType d => Visit(context, target, d),
+ TensorType t => Visit(context, target, t),
+ AnyType => AnyType.Default,
+ _ => new InvalidType(input.GetType().ToString()),
+ };
+ }
+
+ public Cost Visit(ICostEvaluateContext context, PackedSoftmax target)
+ {
+ var returnType = context.GetReturnType();
+ return new()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(returnType),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(returnType),
+ };
+ }
+
+ public IValue Visit(IEvaluateContext context, PackedSoftmax target)
+ {
+ var input = context.GetOrtArgumentValue(target, PackedSoftmax.Input);
+ var shape = input.Shape.Select(i => (int)i).ToArray();
+ OrtKISharp.Tensor softmax;
+ if (!target.PackedAxes.Any(i => i == target.Axis))
+ {
+ softmax = OrtKI.Softmax(input, target.Axis);
+ }
+ else
+ {
+ var packedAxis = shape.Length - target.PackedAxes.Count + target.PackedAxes.IndexOf(target.Axis);
+ var max = OrtKI.ReduceMax(input, new long[] { target.Axis, packedAxis }, 1);
+ var exp = OrtKI.Exp(input - max);
+ var reduceSum = OrtKI.ReduceSum(exp, new long[] { target.Axis, packedAxis }, 1, 0);
+ softmax = OrtKI.Div(exp, reduceSum);
+ }
+
+ return Value.FromTensor(Tensor.FromBytes(new TensorType(new VectorType(input.DataType.ToDataType(), shape.TakeLast(target.PackedAxes.Count).ToArray()), shape.SkipLast(target.PackedAxes.Count).ToArray()), softmax.BytesBuffer.ToArray()));
+ }
+
+ private IRType Visit(ITypeInferenceContext context, PackedSoftmax target, TensorType input)
+ {
+ foreach (var axis in target.PackedAxes)
+ {
+ if (axis >= input.Shape.Rank)
+ {
+ return new InvalidType("axis out of range");
+ }
+ }
+
+ return input;
+ }
+
+ private IRType Visit(ITypeInferenceContext context, PackedSoftmax target, DistributedType input)
+ {
+ if (Visit(context, target, input.TensorType) is not TensorType tensorType)
+ {
+ throw new InvalidOperationException();
+ }
+
+ return new DistributedType(tensorType, input.NdSBP, input.Placement);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedTranspose.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedTranspose.cs
new file mode 100644
index 0000000000..e2b8d1ab9f
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/PackedTranspose.cs
@@ -0,0 +1,60 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class PackedTransposeEvaluator : IEvaluator, ITypeInferencer, ICostEvaluator
+{
+ public IValue Visit(IEvaluateContext context, PackedTranspose target)
+ {
+ var input = context.GetOrtArgumentValue(target, PackedTranspose.Input);
+ var perm = context.GetArgumentValueAsArray(target, PackedTranspose.Perm);
+
+ var packedAxes = target.PackedAxes.Select(axis => perm.IndexOf(axis)).ToArray();
+ var restAxis = LinqUtility.Range(perm.Length, packedAxes.Length).ToArray();
+ restAxis = packedAxes.Zip(restAxis).OrderBy(p => p.First).Select(p => p.Second).ToArray();
+
+ perm = perm.Concat(restAxis).ToArray();
+
+ var transposed = OrtKI.Transpose(input, perm);
+
+ return Value.FromTensor(Tensor.FromBytes(context.CurrentCall.CheckedDataType, transposed.BytesBuffer.ToArray(), context.CurrentCall.CheckedShape.ToValueArray()));
+ }
+
+ public IRType Visit(ITypeInferenceContext context, PackedTranspose target)
+ {
+ var input = context.CheckArgumentType(target, PackedTranspose.Input);
+ var permExpr = context.GetArgument(target, PackedTranspose.Perm);
+
+ return input switch
+ {
+ DistributedType d => Tensors.TransposeEvaluator.Visit(d, permExpr),
+ TensorType t => Tensors.TransposeEvaluator.Visit(t, permExpr),
+ AnyType => AnyType.Default,
+ _ => new InvalidType(input.GetType().ToString()),
+ };
+ }
+
+ ///
+ public Cost Visit(ICostEvaluateContext context, PackedTranspose target)
+ {
+ var inputType = context.GetArgumentType(target, PackedTranspose.Input);
+ var outputType = context.GetReturnType();
+
+ return new()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(inputType),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(outputType),
+ };
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/Store.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/Store.cs
new file mode 100644
index 0000000000..a367696bba
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/Store.cs
@@ -0,0 +1,27 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class StoreEvaluator : ITypeInferencer, ICostEvaluator
+{
+ public IRType Visit(ITypeInferenceContext context, Store target)
+ {
+ return context.GetArgumentType(target, Store.Input);
+ }
+
+ public Cost Visit(ICostEvaluateContext context, Store target) => new Cost()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(context.GetArgumentType(target, Store.Input)),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(context.GetArgumentType(target, Store.Input)),
+ };
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/CPU/Unpack.cs b/modules/Nncase.Modules.CPU/Evaluator/CPU/Unpack.cs
new file mode 100644
index 0000000000..0f861e7160
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/CPU/Unpack.cs
@@ -0,0 +1,82 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+#pragma warning disable SA1010, SA1008
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using DryIoc.ImTools;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.IR.CPU;
+using Nncase.IR.Tensors;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.IR.CPU;
+
+public sealed class UnpackEvaluator : ITypeInferencer, ICostEvaluator, IEvaluator
+{
+ ///
+ public IValue Visit(IEvaluateContext context, Unpack target)
+ {
+ var input = context.GetOrtArgumentValue(target, Unpack.Input);
+ foreach (var axis in target.Axes.Reverse())
+ {
+ input = input.Unpack(axis);
+ }
+
+ return Value.FromTensor(input.ToTensor());
+ }
+
+ ///
+ public IRType Visit(ITypeInferenceContext context, Unpack target)
+ {
+ var input = context.CheckArgumentType(target, Unpack.Input);
+
+ return input switch
+ {
+ DistributedType d => Visit(context, target, d),
+ TensorType t => Visit(context, target, t),
+ AnyType => AnyType.Default,
+ _ => new InvalidType(input.GetType().ToString()),
+ };
+ }
+
+ ///
+ public Cost Visit(ICostEvaluateContext context, Unpack target)
+ {
+ var inputType = context.GetArgumentType(target, Unpack.Input);
+ var outputType = context.GetReturnType();
+
+ return new()
+ {
+ [CostFactorNames.MemoryLoad] = CostUtility.GetMemoryAccess(inputType),
+ [CostFactorNames.MemoryStore] = CostUtility.GetMemoryAccess(outputType),
+ };
+ }
+
+ public Metric Visit(IMetricEvaluateContext context, Unpack target)
+ {
+ var returnType = context.GetReturnType();
+ return new()
+ {
+ [MetricFactorNames.OffChipMemoryTraffic] = CostUtility.GetMemoryAccess(returnType) * 2,
+ };
+ }
+
+ private IRType Visit(ITypeInferenceContext context, Unpack target, TensorType input)
+ {
+ return TypeInference.UnpackType(input, target.Axes);
+ }
+
+ private IRType Visit(ITypeInferenceContext context, Unpack target, DistributedType input)
+ {
+ if (Visit(context, target, input.TensorType) is not TensorType tensorType)
+ {
+ throw new InvalidOperationException();
+ }
+
+ return new DistributedType(tensorType, input.NdSBP, input.Placement);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Binary.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Binary.cs
new file mode 100644
index 0000000000..71f8cb4e8b
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Binary.cs
@@ -0,0 +1,15 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class BinaryEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Binary target)
+ {
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/CPUModule.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/CPUModule.cs
new file mode 100644
index 0000000000..2f81cdf0ba
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/CPUModule.cs
@@ -0,0 +1,42 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using DryIoc;
+using Nncase.Evaluator.Imaging;
+using Nncase.Evaluator.NN;
+using Nncase.Evaluator.Tensors;
+using Nncase.Hosting;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+///
+/// CPU module.
+///
+internal class CPUModule : IApplicationPart
+{
+ public void ConfigureServices(IRegistrator registrator)
+ {
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ registrator.RegisterManyInterface(reuse: Reuse.Singleton);
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Concat.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Concat.cs
new file mode 100644
index 0000000000..bb173f3c71
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Concat.cs
@@ -0,0 +1,18 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.Evaluator;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class ConcatEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Concat target)
+ {
+ context.CheckArgumentType(target, Concat.Input);
+ context.CheckArgumentType(target, Concat.Output);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Gather.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Gather.cs
new file mode 100644
index 0000000000..0c2fbc4b0e
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Gather.cs
@@ -0,0 +1,15 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class GatherEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Gather target)
+ {
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Matmul.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Matmul.cs
new file mode 100644
index 0000000000..6ad2912cfb
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Matmul.cs
@@ -0,0 +1,12 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class MatmulEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Matmul target) => TupleType.Void;
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Memcopy.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Memcopy.cs
new file mode 100644
index 0000000000..e88830a734
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Memcopy.cs
@@ -0,0 +1,17 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public class MemcopyEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Memcopy target)
+ {
+ _ = context.CheckArgumentType(target, Memcopy.Dest);
+ _ = context.CheckArgumentType(target, Memcopy.Src);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Pack.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Pack.cs
new file mode 100644
index 0000000000..b85558fae5
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Pack.cs
@@ -0,0 +1,19 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class PackEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Pack target) => TupleType.Void;
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedBinary.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedBinary.cs
new file mode 100644
index 0000000000..88e65c8e30
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedBinary.cs
@@ -0,0 +1,20 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class PackedBinaryEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, PackedBinary target) => TupleType.Void;
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedLayerNorm.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedLayerNorm.cs
new file mode 100644
index 0000000000..6d7bc11e13
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedLayerNorm.cs
@@ -0,0 +1,20 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Linq;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class PackedLayerNormEvaluator : ITypeInferencer
+{
+ ///
+ public IRType Visit(ITypeInferenceContext context, PackedLayerNorm target) => TupleType.Void;
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedMatMul.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedMatMul.cs
new file mode 100644
index 0000000000..7410f8f21f
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedMatMul.cs
@@ -0,0 +1,19 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Linq;
+using System.Numerics;
+using System.Runtime.InteropServices;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class PackedMatMulEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, PackedMatMul target) => TupleType.Void;
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedSoftMax.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedSoftMax.cs
new file mode 100644
index 0000000000..0035dea489
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedSoftMax.cs
@@ -0,0 +1,19 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class PackedSoftMaxEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, PackedSoftmax target) => TupleType.Void;
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedTranspose.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedTranspose.cs
new file mode 100644
index 0000000000..1ec6a81748
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PackedTranspose.cs
@@ -0,0 +1,19 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class PackedTransposeEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, PackedTranspose target) => TupleType.Void;
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Pad.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Pad.cs
new file mode 100644
index 0000000000..9b811b7fa5
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Pad.cs
@@ -0,0 +1,18 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.Evaluator;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class PadEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Pad target)
+ {
+ context.CheckArgumentType(target, Pad.Input);
+ context.CheckArgumentType(target, Pad.Output);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PtrOf.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PtrOf.cs
new file mode 100644
index 0000000000..3508f6f931
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/PtrOf.cs
@@ -0,0 +1,22 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class PtrOfEvaluator : ITypeInferencer, IOpPrinter
+{
+ public IRType Visit(ITypeInferenceContext context, PtrOf target) => new PointerType(target.DataType);
+
+ public string Visit(IIRPrinterContext context, PtrOf target, bool iLmode)
+ {
+ if (iLmode)
+ {
+ throw new NotSupportedException();
+ }
+
+ return $"PtrOf({target.PtrName})";
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Reshape.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Reshape.cs
new file mode 100644
index 0000000000..b5e11095b9
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Reshape.cs
@@ -0,0 +1,18 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.Evaluator;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class ReshapeEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Reshape target)
+ {
+ context.CheckArgumentType(target, Reshape.Input);
+ context.CheckArgumentType(target, Reshape.Output);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Slice.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Slice.cs
new file mode 100644
index 0000000000..a26491b8eb
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Slice.cs
@@ -0,0 +1,18 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.Evaluator;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class SliceEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Slice target)
+ {
+ context.CheckArgumentType(target, Slice.Input);
+ context.CheckArgumentType(target, Slice.Output);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/SramPtr.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/SramPtr.cs
new file mode 100644
index 0000000000..c9d591d2ac
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/SramPtr.cs
@@ -0,0 +1,12 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class SramPtrEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, SramPtr target) => new PointerType(target.DataType);
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Swish.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Swish.cs
new file mode 100644
index 0000000000..fb8209afc5
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Swish.cs
@@ -0,0 +1,18 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.Evaluator;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class SwishEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Swish target)
+ {
+ context.CheckArgumentType(target, Swish.Input);
+ context.CheckArgumentType(target, Swish.Output);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/TensorLoad.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/TensorLoad.cs
new file mode 100644
index 0000000000..c41eacf55f
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/TensorLoad.cs
@@ -0,0 +1,17 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public class TensorLoadEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, TensorLoad target)
+ {
+ _ = context.CheckArgumentType(target, TensorLoad.Dest);
+ _ = context.CheckArgumentType(target, TensorLoad.Src);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/TensorStore.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/TensorStore.cs
new file mode 100644
index 0000000000..742a8f1592
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/TensorStore.cs
@@ -0,0 +1,17 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class TensorStoreEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, TensorStore target)
+ {
+ _ = context.CheckArgumentType(target, TensorStore.Src);
+ _ = context.CheckArgumentType(target, TensorStore.Dest);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Transpose.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Transpose.cs
new file mode 100644
index 0000000000..c769ce19e6
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Transpose.cs
@@ -0,0 +1,18 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.Evaluator;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class TransposeEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Transpose target)
+ {
+ context.CheckArgumentType(target, Transpose.Input);
+ context.CheckArgumentType(target, Transpose.Output);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Unary.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Unary.cs
new file mode 100644
index 0000000000..5fd104b57f
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Unary.cs
@@ -0,0 +1,18 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using Nncase.Evaluator;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class UnaryEvaluator : ITypeInferencer
+{
+ public IRType Visit(ITypeInferenceContext context, Unary target)
+ {
+ context.CheckArgumentType(target, Unary.Input);
+ context.CheckArgumentType(target, Unary.Output);
+ return TupleType.Void;
+ }
+}
diff --git a/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Unpack.cs b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Unpack.cs
new file mode 100644
index 0000000000..7e4d468377
--- /dev/null
+++ b/modules/Nncase.Modules.CPU/Evaluator/TIR/CPU/Unpack.cs
@@ -0,0 +1,21 @@
+// Copyright (c) Canaan Inc. All rights reserved.
+// Licensed under the Apache license. See LICENSE file in the project root for full license information.
+
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using DryIoc.ImTools;
+using Nncase.CostModel;
+using Nncase.IR;
+using Nncase.TIR.CPU;
+using Nncase.Utilities;
+using OrtKISharp;
+
+namespace Nncase.Evaluator.TIR.CPU;
+
+public sealed class UnpackEvaluator : ITypeInferencer
+{
+ ///