[BodhiApp] changes for bodhiapp. #18
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: llama-server standalone | |
on: | |
workflow_dispatch: # allows manual triggering | |
inputs: | |
create_release: | |
description: 'Create new release' | |
required: true | |
type: boolean | |
push: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} | |
cancel-in-progress: true | |
# Fine-grant permission | |
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token | |
permissions: | |
contents: write # for creating release | |
env: | |
BRANCH_NAME: ${{ github.head_ref || github.ref_name }} | |
jobs: | |
macos-arm64: | |
runs-on: macos-14 | |
strategy: | |
matrix: | |
variant: [metal, cpu] | |
include: | |
- variant: metal | |
cmake_flags: >- | |
-DGGML_METAL=ON | |
-DGGML_METAL_EMBED_LIBRARY=ON | |
- variant: cpu | |
cmake_flags: >- | |
-DGGML_METAL=OFF | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Dependencies | |
continue-on-error: true | |
run: | | |
brew update | |
- name: Build | |
run: | | |
cmake -B build \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_FATAL_WARNINGS=OFF \ | |
-DLLAMA_CURL=OFF \ | |
-DBUILD_SHARED_LIBS=OFF \ | |
${{ matrix.cmake_flags }} | |
cmake --build build --target llama-server -j $(sysctl -n hw.logicalcpu) | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--aarch64-apple-darwin--${{ matrix.variant }} | |
path: build/bin/llama-server | |
ubuntu-cpu: | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install build-essential | |
- name: Build | |
run: | | |
cmake -B build \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_FATAL_WARNINGS=OFF \ | |
-DLLAMA_CURL=OFF \ | |
-DBUILD_SHARED_LIBS=OFF | |
cmake --build build --target llama-server -j $(nproc) | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-unknown-linux-gnu--cpu | |
path: build/bin/llama-server | |
ubuntu-cuda: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
include: | |
- version: "12.6.2" | |
os: "ubuntu24.04" | |
arch: "89-real;90-real" | |
name: "ada-lovelace-hopper" | |
container: nvidia/cuda:${{ matrix.version }}-devel-${{ matrix.os }} | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Install dependencies | |
env: | |
DEBIAN_FRONTEND: noninteractive | |
run: | | |
apt update | |
apt install -y cmake build-essential ninja-build libgomp1 git zip | |
- name: Build | |
run: | | |
cmake -B build -G Ninja \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_FATAL_WARNINGS=OFF \ | |
-DLLAMA_CURL=OFF \ | |
-DBUILD_SHARED_LIBS=ON \ | |
-DCMAKE_CUDA_ARCHITECTURES="${{ matrix.arch }}" \ | |
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ | |
-DGGML_NATIVE=OFF \ | |
-DGGML_CUDA=ON \ | |
-DCMAKE_CUDA_RUNTIME_LIBRARY=Shared | |
cmake --build build --target llama-server | |
- name: Zip artifacts | |
run: | | |
cd build/bin | |
zip -r ../llama-server--x86_64-unknown-linux-gnu--cuda-${{ matrix.version }}-${{ matrix.name }}.zip * | |
cd ../.. | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-unknown-linux-gnu--cuda-${{ matrix.version }}-${{ matrix.name }} | |
path: build/llama-server--x86_64-unknown-linux-gnu--cuda-${{ matrix.version }}-${{ matrix.name }}.zip | |
ubuntu-vulkan: | |
runs-on: ubuntu-22.04 | |
if: false | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Dependencies | |
run: | | |
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - | |
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list | |
sudo apt-get update -y | |
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk | |
- name: Build | |
run: | | |
cmake -B build \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_FATAL_WARNINGS=OFF \ | |
-DLLAMA_CURL=OFF \ | |
-DBUILD_SHARED_LIBS=OFF \ | |
-DGGML_VULKAN=ON | |
cmake --build build --target llama-server -j $(nproc) | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-unknown-linux-gnu--vulkan | |
path: build/bin/llama-server | |
ubuntu-hip: | |
runs-on: ubuntu-22.04 | |
container: rocm/dev-ubuntu-22.04:6.0.2 | |
if: false | |
strategy: | |
matrix: | |
build: ['native', 'legacy'] | |
include: | |
- build: 'native' | |
cmake_flags: '-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang"' | |
- build: 'legacy' | |
cmake_flags: '-DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc' | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Dependencies | |
run: | | |
apt-get update | |
apt-get install -y build-essential git cmake rocblas-dev hipblas-dev | |
- name: Build | |
run: | | |
cmake -B build \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_FATAL_WARNINGS=OFF \ | |
-DLLAMA_CURL=OFF \ | |
-DBUILD_SHARED_LIBS=OFF \ | |
-DGGML_HIP=ON \ | |
${{ matrix.cmake_flags }} | |
cmake --build build --target llama-server -j $(nproc) | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-unknown-linux-gnu--hip-${{ matrix.build }} | |
path: build/bin/llama-server | |
ubuntu-musa: | |
runs-on: ubuntu-22.04 | |
container: mthreads/musa:rc3.1.0-devel-ubuntu22.04 | |
if: false | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Dependencies | |
run: | | |
apt-get update | |
apt-get install -y build-essential git cmake | |
- name: Build | |
run: | | |
cmake -B build \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_FATAL_WARNINGS=OFF \ | |
-DLLAMA_CURL=OFF \ | |
-DBUILD_SHARED_LIBS=OFF \ | |
-DGGML_MUSA=ON | |
cmake --build build --target llama-server -j $(nproc) | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-unknown-linux-gnu--musa | |
path: build/bin/llama-server | |
ubuntu-sycl: | |
runs-on: ubuntu-22.04 | |
continue-on-error: true | |
if: false | |
strategy: | |
matrix: | |
variant: [default, fp16] | |
include: | |
- variant: default | |
cmake_flags: "" | |
- variant: fp16 | |
cmake_flags: "-DGGML_SYCL_F16=ON" | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Add oneAPI to apt | |
run: | | |
cd /tmp | |
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | |
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | |
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | |
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" | |
- name: Install dependencies | |
run: | | |
sudo apt update | |
sudo apt install -y intel-oneapi-compiler-dpcpp-cpp intel-oneapi-mkl-devel | |
- name: Build | |
run: | | |
source /opt/intel/oneapi/setvars.sh | |
cmake -B build \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DLLAMA_FATAL_WARNINGS=OFF \ | |
-DLLAMA_CURL=OFF \ | |
-DBUILD_SHARED_LIBS=OFF \ | |
-DGGML_SYCL=ON \ | |
-DCMAKE_C_COMPILER=icx \ | |
-DCMAKE_CXX_COMPILER=icpx \ | |
${{ matrix.cmake_flags }} | |
cmake --build build --target llama-server -j $(nproc) | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-unknown-linux-gnu--sycl-${{ matrix.variant }} | |
path: build/bin/llama-server | |
windows-cuda: | |
runs-on: windows-2019 | |
strategy: | |
matrix: | |
cuda: | |
- "11.7" | |
- "12.4" | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Install Cuda Toolkit 11.7 | |
if: ${{ matrix.cuda == '11.7' }} | |
run: | | |
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | |
choco install unzip -y | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip" | |
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y | |
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append | |
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append | |
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 | |
echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 | |
- name: Install Cuda Toolkit 12.4 | |
if: ${{ matrix.cuda == '12.4' }} | |
run: | | |
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | |
choco install unzip -y | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip" | |
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip" | |
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y | |
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y | |
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append | |
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append | |
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 | |
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 | |
- name: Install Ninja | |
run: | | |
choco install ninja | |
- name: Build | |
shell: cmd | |
run: | | |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |
cmake -S . -B build -G "Ninja Multi-Config" ^ | |
-DCMAKE_BUILD_TYPE=Release ^ | |
-DLLAMA_FATAL_WARNINGS=OFF ^ | |
-DLLAMA_CURL=OFF ^ | |
-DBUILD_SHARED_LIBS=ON ^ | |
-DGGML_NATIVE=OFF ^ | |
-DGGML_CUDA=ON ^ | |
-DCMAKE_CUDA_RUNTIME_LIBRARY=Shared | |
cmake --build build --config Release --target llama-server | |
- name: Install Zip | |
run: choco install zip | |
- name: Zip artifacts | |
run: | | |
cd build/bin/Release | |
zip -r ../../llama-server--x86_64-pc-windows-msvc--cuda-${{ matrix.cuda }}.zip * | |
cd ../../.. | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-pc-windows-msvc--cuda-${{ matrix.cuda }} | |
path: build/llama-server--x86_64-pc-windows-msvc--cuda-${{ matrix.cuda }}.zip | |
windows-cpu: | |
runs-on: windows-latest | |
env: | |
OPENBLAS_VERSION: 0.3.23 | |
strategy: | |
matrix: | |
include: | |
- build: 'cpu' | |
defines: '-DGGML_NATIVE=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF' | |
# - build: 'noavx' | |
# defines: '-DGGML_NATIVE=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF' | |
# - build: 'avx2' | |
# defines: '-DGGML_NATIVE=OFF' | |
# - build: 'avx' | |
# defines: '-DGGML_NATIVE=OFF -DGGML_AVX2=OFF' | |
# - build: 'avx512' | |
# defines: '-DGGML_NATIVE=OFF -DGGML_AVX512=ON' | |
# - build: 'openblas' | |
# defines: '-DGGML_NATIVE=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Download OpenBLAS | |
id: get_openblas | |
if: ${{ matrix.build == 'openblas' }} | |
run: | | |
curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip" | |
curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE" | |
mkdir $env:RUNNER_TEMP/openblas | |
tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas | |
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) | |
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) | |
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe') | |
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll | |
- name: Install Ninja | |
run: | | |
choco install ninja | |
- name: Build | |
run: | | |
cmake -B build ` | |
-DCMAKE_BUILD_TYPE=Release ` | |
-DLLAMA_FATAL_WARNINGS=OFF ` | |
-DLLAMA_CURL=OFF ` | |
-DBUILD_SHARED_LIBS=OFF ` | |
${{ matrix.defines }} | |
cmake --build build --config Release --target llama-server | |
- name: Add libopenblas.dll | |
if: ${{ matrix.build == 'openblas' }} | |
run: | | |
cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll | |
cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt | |
- name: Check AVX512F support | |
if: ${{ matrix.build == 'avx512' }} | |
continue-on-error: true | |
run: | | |
cd build | |
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) | |
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) | |
$cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe') | |
echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c | |
& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main | |
.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO" | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-pc-windows-msvc--${{ matrix.build }} | |
path: build/bin/Release/llama-server.exe | |
windows-vulkan: | |
runs-on: windows-latest | |
env: | |
VULKAN_VERSION: 1.3.261.1 | |
if: false | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Install Vulkan SDK | |
run: | | |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" | |
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install | |
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" | |
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" | |
- name: Install Ninja | |
run: | | |
choco install ninja | |
- name: Build | |
run: | | |
cmake -B build ` | |
-DCMAKE_BUILD_TYPE=Release ` | |
-DLLAMA_FATAL_WARNINGS=OFF ` | |
-DLLAMA_CURL=OFF ` | |
-DBUILD_SHARED_LIBS=OFF ` | |
-DGGML_NATIVE=OFF ` | |
-DGGML_VULKAN=ON | |
cmake --build build --config Release --target llama-server | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-pc-windows-msvc--vulkan | |
path: build/bin/Release/llama-server.exe | |
windows-kompute: | |
runs-on: windows-latest | |
env: | |
VULKAN_VERSION: 1.3.261.1 | |
if: false | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Clone Kompute submodule | |
run: | | |
git submodule update --init ggml/src/ggml-kompute/kompute | |
- name: Install Vulkan SDK | |
run: | | |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" | |
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install | |
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" | |
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" | |
- name: Install Ninja | |
run: | | |
choco install ninja | |
- name: Build | |
run: | | |
cmake -B build ` | |
-DCMAKE_BUILD_TYPE=Release ` | |
-DLLAMA_FATAL_WARNINGS=OFF ` | |
-DLLAMA_CURL=OFF ` | |
-DBUILD_SHARED_LIBS=OFF ` | |
-DGGML_NATIVE=OFF ` | |
-DGGML_KOMPUTE=ON ` | |
-DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON | |
cmake --build build --config Release --target llama-server | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-pc-windows-msvc--kompute | |
path: build/bin/Release/llama-server.exe | |
windows-sycl: | |
runs-on: windows-latest | |
if: false | |
defaults: | |
run: | |
shell: bash | |
env: | |
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe | |
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel | |
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Install | |
run: | | |
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL | |
- name: Build | |
run: | | |
examples/sycl/win-build-sycl.bat | |
- name: Package with Runtime DLLs | |
run: | | |
# Copy required runtime DLLs | |
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin | |
cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-pc-windows-msvc--sycl | |
path: build/bin/* | |
windows-hip: | |
runs-on: windows-latest | |
strategy: | |
matrix: | |
gpu_target: [gfx1100, gfx1101, gfx1030] | |
if: false | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
- name: Install AMD HIP SDK | |
run: | | |
$ErrorActionPreference = "Stop" | |
write-host "Downloading AMD HIP SDK Installer" | |
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" | |
write-host "Installing AMD HIP SDK" | |
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait | |
- name: Verify ROCm | |
run: | | |
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version | |
- name: Build | |
run: | | |
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) | |
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" | |
cmake -G "Unix Makefiles" -B build -S . ` | |
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` | |
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` | |
-DCMAKE_BUILD_TYPE=Release ` | |
-DLLAMA_FATAL_WARNINGS=OFF ` | |
-DLLAMA_CURL=OFF ` | |
-DBUILD_SHARED_LIBS=OFF ` | |
-DAMDGPU_TARGETS=${{ matrix.gpu_target }} ` | |
-DGGML_HIP=ON | |
cmake --build build --config Release --target llama-server | |
- name: Copy HIP Runtime DLLs | |
run: | | |
md "build\bin\rocblas\library\" | |
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" | |
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" | |
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" | |
- name: Upload artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
name: llama-server--x86_64-pc-windows-msvc--hip-${{ matrix.gpu_target }} | |
path: build/bin/llama-server.exe | |
release: | |
if: always() || github.event.inputs.create_release == 'true' | |
runs-on: ubuntu-latest | |
needs: | |
- macos-arm64 | |
- ubuntu-cpu | |
- ubuntu-cuda | |
# - ubuntu-vulkan | |
# - ubuntu-hip | |
# - ubuntu-musa | |
# - ubuntu-sycl | |
- windows-cpu | |
- windows-cuda | |
# - windows-vulkan | |
# - windows-kompute | |
# - windows-sycl | |
# - windows-hip | |
steps: | |
- name: Clone | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
- name: Determine tag name | |
id: tag | |
shell: bash | |
run: | | |
BUILD_NUMBER="$(git rev-list --count HEAD)" | |
SHORT_HASH="$(git rev-parse --short=7 HEAD)" | |
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then | |
echo "name=server-b${BUILD_NUMBER}" >> $GITHUB_OUTPUT | |
else | |
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') | |
echo "name=server-${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT | |
fi | |
- name: Download artifacts | |
uses: actions/download-artifact@v4 | |
continue-on-error: true # Don't fail if some artifacts are missing | |
with: | |
path: ./artifacts | |
- name: Create release | |
id: create_release | |
uses: ggml-org/action-create-release@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
tag_name: ${{ steps.tag.outputs.name }} | |
- name: Upload release artifacts | |
uses: actions/github-script@v3 | |
with: | |
github-token: ${{ secrets.GITHUB_TOKEN }} | |
script: | | |
const path = require('path'); | |
const fs = require('fs'); | |
const release_id = '${{ steps.create_release.outputs.id }}'; | |
// Recursively find all artifacts | |
function* walkSync(dir) { | |
const files = fs.readdirSync(dir, { withFileTypes: true }); | |
for (const file of files) { | |
const res = path.resolve(dir, file.name); | |
if (file.isDirectory()) { | |
yield* walkSync(res); | |
} else { | |
yield res; | |
} | |
} | |
} | |
// Upload each artifact | |
for (const filePath of walkSync('./artifacts')) { | |
const fileName = path.basename(filePath); | |
const parentDir = path.basename(path.dirname(filePath)); | |
const fileExt = path.extname(fileName); | |
// Filter by llama-server or llama-server.exe | |
if (fileName === 'llama-server' || fileName === 'llama-server.exe') { | |
const newFileName = `${parentDir}-${fileName}`; // Combine parent folder name and file name | |
console.log('Uploading:', newFileName); | |
try { | |
await github.repos.uploadReleaseAsset({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
release_id: release_id, | |
name: newFileName, | |
data: await fs.readFileSync(filePath) | |
}); | |
} catch (error) { | |
console.error(`Failed to upload ${newFileName}:`, error); | |
// Continue with other files even if one fails | |
} | |
} else if (fileExt === '.zip') { | |
// Upload zip files as is | |
console.log('Uploading zip file:', fileName); | |
try { | |
await github.repos.uploadReleaseAsset({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
release_id: release_id, | |
name: fileName, | |
data: await fs.readFileSync(filePath) | |
}); | |
} catch (error) { | |
console.error(`Failed to upload ${fileName}:`, error); | |
} | |
} | |
} |