Added the ability to use guide tokens for OuteTTS, greatly improving TTS recitation accuracy over long input sequences. #18421

Workflow file for this run

	name: CI

	on:
	workflow_dispatch: # allows manual triggering
	inputs:
	create_release:
	description: 'Create new release'
	required: true
	type: boolean
	push:
	branches:
	- master
	paths: ['.github/workflows/build.yml', '/CMakeLists.txt', '/Makefile', '*/.h', '*/.hpp', '*/.c', '*/.cpp', '*/.cu', '*/.cuh', '*/.swift', '*/.m', '*/.metal']
	pull_request:
	types: [opened, synchronize, reopened]
	paths: ['.github/workflows/build.yml', '/CMakeLists.txt', '/Makefile', '*/.h', '*/.hpp', '*/.c', '*/.cpp', '*/.cu', '*/.cuh', '*/.swift', '*/.m', '*/.metal']

	concurrency:
	group: ${{ github.workflow }}-${{ github.head_ref && github.ref \|\| github.run_id }}
	cancel-in-progress: true

	# Fine-grant permission
	# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
	permissions:
	contents: write # for creating release

	env:
	BRANCH_NAME: ${{ github.head_ref \|\| github.ref_name }}
	GGML_NLOOP: 3
	GGML_N_THREADS: 1
	LLAMA_LOG_COLORS: 1
	LLAMA_LOG_PREFIX: 1
	LLAMA_LOG_TIMESTAMPS: 1

	jobs:
	macOS-latest-cmake-arm64:
	runs-on: macos-14

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	mkdir build
	cd build
	cmake .. \
	-DLLAMA_FATAL_WARNINGS=ON \
	-DLLAMA_CURL=ON \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DGGML_RPC=ON
	cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L 'main\|curl' --verbose --timeout 900

	- name: Determine tag name
	id: tag
	shell: bash
	run: \|
	BUILD_NUMBER="$(git rev-list --count HEAD)"
	SHORT_HASH="$(git rev-parse --short=7 HEAD)"
	if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
	echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
	else
	SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" \| tr '/' '-')
	echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
	fi

	- name: Pack artifacts
	id: pack_artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	run: \|
	cp LICENSE ./build/bin/
	zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*

	- name: Upload artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	uses: actions/upload-artifact@v4
	with:
	path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
	name: llama-bin-macos-arm64.zip

	macOS-latest-cmake-x64:
	runs-on: macos-13

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	# Metal is disabled due to intermittent failures with Github runners not having a GPU:
	# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
	cmake -B build \
	-DLLAMA_FATAL_WARNINGS=ON \
	-DLLAMA_CURL=ON \
	-DGGML_METAL=OFF \
	-DGGML_RPC=ON
	cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose --timeout 900

	- name: Determine tag name
	id: tag
	shell: bash
	run: \|
	BUILD_NUMBER="$(git rev-list --count HEAD)"
	SHORT_HASH="$(git rev-parse --short=7 HEAD)"
	if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
	echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
	else
	SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" \| tr '/' '-')
	echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
	fi

	- name: Pack artifacts
	id: pack_artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	run: \|
	cp LICENSE ./build/bin/
	zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*

	- name: Upload artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	uses: actions/upload-artifact@v4
	with:
	path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
	name: llama-bin-macos-x64.zip

	ubuntu-latest-cmake:
	runs-on: ubuntu-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential libcurl4-openssl-dev

	- name: Build
	id: cmake_build
	run: \|
	mkdir build
	cd build
	cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON
	cmake --build . --config Release -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L 'main\|curl' --verbose --timeout 900

	- name: Test llama2c conversion
	id: llama2c_test
	run: \|
	cd build
	echo "Fetch tokenizer"
	wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
	echo "Fetch llama2c model"
	wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
	./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
	./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256

	- name: Determine tag name
	id: tag
	shell: bash
	run: \|
	BUILD_NUMBER="$(git rev-list --count HEAD)"
	SHORT_HASH="$(git rev-parse --short=7 HEAD)"
	if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
	echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
	else
	SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" \| tr '/' '-')
	echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
	fi

	- name: Pack artifacts
	id: pack_artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	run: \|
	cp LICENSE ./build/bin/
	zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*

	- name: Upload artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	uses: actions/upload-artifact@v4
	with:
	path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
	name: llama-bin-ubuntu-x64.zip

	ubuntu-latest-cmake-sanitizer:
	runs-on: ubuntu-latest

	continue-on-error: true

	strategy:
	matrix:
	sanitizer: [ADDRESS, THREAD, UNDEFINED]
	build_type: [Debug, Release]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential

	- name: Build
	id: cmake_build
	if: ${{ matrix.sanitizer != 'THREAD' }}
	run: \|
	mkdir build
	cd build
	cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
	cmake --build . --config ${{ matrix.build_type }} -j $(nproc)

	- name: Build (no OpenMP)
	id: cmake_build_no_openmp
	if: ${{ matrix.sanitizer == 'THREAD' }}
	run: \|
	mkdir build
	cd build
	cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DGGML_OPENMP=OFF
	cmake --build . --config ${{ matrix.build_type }} -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose --timeout 900

	ubuntu-latest-cmake-rpc:
	runs-on: ubuntu-latest

	continue-on-error: true

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install build-essential

	- name: Build
	id: cmake_build
	run: \|
	mkdir build
	cd build
	cmake -DGGML_RPC=ON ..
	cmake --build . --config Release -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose

	ubuntu-22-cmake-vulkan:
	runs-on: ubuntu-22.04

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	run: \|
	wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc \| sudo apt-key add -
	sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
	sudo apt-get update -y
	sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk

	- name: Build
	id: cmake_build
	run: \|
	mkdir build
	cd build
	cmake -DGGML_VULKAN=ON ..
	cmake --build . --config Release -j $(nproc)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose --timeout 900

	ubuntu-22-cmake-hip:
	runs-on: ubuntu-22.04
	container: rocm/dev-ubuntu-22.04:6.0.2

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	run: \|
	sudo apt-get update
	sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev

	- name: Build with native CMake HIP support
	id: cmake_build
	run: \|
	cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
	cmake --build build --config Release -j $(nproc)

	- name: Build with legacy HIP support
	id: cmake_build_legacy_hip
	run: \|
	cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
	cmake --build build2 --config Release -j $(nproc)

	ubuntu-22-cmake-musa:
	runs-on: ubuntu-22.04
	container: mthreads/musa:rc3.1.0-devel-ubuntu22.04

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	run: \|
	apt-get update
	apt-get install -y build-essential git cmake libcurl4-openssl-dev

	- name: Build with native CMake MUSA support
	id: cmake_build
	run: \|
	cmake -B build -S . -DGGML_MUSA=ON
	cmake --build build --config Release -j $(nproc)

	ubuntu-22-cmake-sycl:
	runs-on: ubuntu-22.04

	continue-on-error: true

	steps:
	- uses: actions/checkout@v4

	- name: add oneAPI to apt
	shell: bash
	run: \|
	cd /tmp
	wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"

	- name: install oneAPI dpcpp compiler
	shell: bash
	run: \|
	sudo apt update
	sudo apt install intel-oneapi-compiler-dpcpp-cpp

	- name: install oneAPI MKL library
	shell: bash
	run: \|
	sudo apt install intel-oneapi-mkl-devel

	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Build
	id: cmake_build
	run: \|
	source /opt/intel/oneapi/setvars.sh
	mkdir build
	cd build
	cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
	cmake --build . --config Release -j $(nproc)

	ubuntu-22-cmake-sycl-fp16:
	runs-on: ubuntu-22.04

	continue-on-error: true

	steps:
	- uses: actions/checkout@v4

	- name: add oneAPI to apt
	shell: bash
	run: \|
	cd /tmp
	wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
	sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"

	- name: install oneAPI dpcpp compiler
	shell: bash
	run: \|
	sudo apt update
	sudo apt install intel-oneapi-compiler-dpcpp-cpp

	- name: install oneAPI MKL library
	shell: bash
	run: \|
	sudo apt install intel-oneapi-mkl-devel

	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Build
	id: cmake_build
	run: \|
	source /opt/intel/oneapi/setvars.sh
	mkdir build
	cd build
	cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
	cmake --build . --config Release -j $(nproc)

	# TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
	# how to debug it.
	# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
	# would be great if we fix these
	macOS-latest-cmake:
	runs-on: macos-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	mkdir build
	cd build
	cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF ..
	cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)

	- name: Test
	id: cmake_test
	run: \|
	cd build
	ctest -L main --verbose --timeout 900

	macOS-latest-cmake-ios:
	runs-on: macos-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	mkdir build
	cd build
	cmake -G Xcode .. \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DLLAMA_BUILD_EXAMPLES=OFF \
	-DLLAMA_BUILD_TESTS=OFF \
	-DLLAMA_BUILD_SERVER=OFF \
	-DCMAKE_SYSTEM_NAME=iOS \
	-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
	-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
	cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

	macOS-latest-cmake-tvos:
	runs-on: macos-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	mkdir build
	cd build
	cmake -G Xcode .. \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DLLAMA_BUILD_EXAMPLES=OFF \
	-DLLAMA_BUILD_TESTS=OFF \
	-DLLAMA_BUILD_SERVER=OFF \
	-DCMAKE_SYSTEM_NAME=tvOS \
	-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
	-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
	cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

	macOS-latest-swift:
	runs-on: macos-latest

	strategy:
	matrix:
	destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Dependencies
	id: depends
	continue-on-error: true
	run: \|
	brew update

	- name: Build llama.cpp with CMake
	id: cmake_build
	run: \|
	sysctl -a
	mkdir build
	cd build
	cmake -G Xcode .. \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DLLAMA_BUILD_EXAMPLES=OFF \
	-DLLAMA_BUILD_TESTS=OFF \
	-DLLAMA_BUILD_SERVER=OFF \
	-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
	cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
	sudo cmake --install . --config Release

	- name: xcodebuild for swift package
	id: xcodebuild
	run: \|
	xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"

	windows-msys2:
	runs-on: windows-latest

	strategy:
	fail-fast: false
	matrix:
	include:
	- { sys: UCRT64, env: ucrt-x86_64, build: Release }
	- { sys: CLANG64, env: clang-x86_64, build: Release }

	steps:
	- name: Clone
	uses: actions/checkout@v4

	- name: Setup ${{ matrix.sys }}
	uses: msys2/setup-msys2@v2
	with:
	update: true
	msystem: ${{matrix.sys}}
	install: >-
	base-devel
	mingw-w64-${{matrix.env}}-toolchain
	mingw-w64-${{matrix.env}}-cmake
	mingw-w64-${{matrix.env}}-openblas

	- name: Build using CMake
	shell: msys2 {0}
	run: \|
	cmake -B build
	cmake --build build --config ${{ matrix.build }} -j $(nproc)

	- name: Clean after building using CMake
	shell: msys2 {0}
	run: \|
	rm -rf build

	- name: Build using CMake w/ OpenBLAS
	shell: msys2 {0}
	run: \|
	cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
	cmake --build build --config ${{ matrix.build }} -j $(nproc)

	windows-latest-cmake:
	runs-on: windows-latest

	env:
	OPENBLAS_VERSION: 0.3.23
	SDE_VERSION: 9.33.0-2024-01-07
	VULKAN_VERSION: 1.3.261.1

	strategy:
	matrix:
	include:
	- build: 'noavx-x64'
	defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
	- build: 'avx2-x64'
	defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
	- build: 'avx-x64'
	defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
	- build: 'avx512-x64'
	defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
	- build: 'openblas-x64'
	defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
	- build: 'kompute-x64'
	defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
	- build: 'vulkan-x64'
	defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
	- build: 'llvm-arm64'
	defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
	- build: 'msvc-arm64'
	defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
	- build: 'llvm-arm64-opencl-adreno'
	defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Clone Kompute submodule
	id: clone_kompute
	if: ${{ matrix.build == 'kompute-x64' }}
	run: \|
	git submodule update --init ggml/src/ggml-kompute/kompute

	- name: Download OpenBLAS
	id: get_openblas
	if: ${{ matrix.build == 'openblas-x64' }}
	run: \|
	curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
	curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
	mkdir $env:RUNNER_TEMP/openblas
	tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
	$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
	$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
	$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
	& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll

	- name: Install Vulkan SDK
	id: get_vulkan
	if: ${{ matrix.build == 'kompute-x64' \|\| matrix.build == 'vulkan-x64' }}
	run: \|
	curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
	& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
	Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
	Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"

	- name: Install Ninja
	id: install_ninja
	run: \|
	choco install ninja

	- name: Install OpenCL Headers and Libs
	id: install_opencl
	if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
	run: \|
	git clone https://github.com/KhronosGroup/OpenCL-Headers
	cd OpenCL-Headers
	mkdir build && cd build
	cmake .. `
	-DBUILD_TESTING=OFF `
	-DOPENCL_HEADERS_BUILD_TESTING=OFF `
	-DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
	-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
	cmake --build . --target install
	git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
	cd OpenCL-ICD-Loader
	mkdir build-arm64-release && cd build-arm64-release
	cmake .. `
	-A arm64 `
	-DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
	-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
	cmake --build . --target install --config release

	- name: Build
	id: cmake_build
	run: \|
	cmake -S . -B build ${{ matrix.defines }}
	cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}

	- name: Add libopenblas.dll
	id: add_libopenblas_dll
	if: ${{ matrix.build == 'openblas-x64' }}
	run: \|
	cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
	cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt

	- name: Check AVX512F support
	id: check_avx512f
	if: ${{ matrix.build == 'avx512-x64' }}
	continue-on-error: true
	run: \|
	cd build
	$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
	$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
	$cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
	echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
	& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
	.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) \|\| echo "AVX512F: NO"

	- name: Test
	id: cmake_test
	# not all machines have native AVX-512
	if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' \|\| env.HAS_AVX512F == '1') }}
	run: \|
	cd build
	ctest -L main -C Release --verbose --timeout 900

	- name: Test (Intel SDE)
	id: cmake_test_sde
	if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
	run: \|
	curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
	# for some weird reason windows tar doesn't like sde tar.xz
	7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
	7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
	$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
	cd build
	$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
	& $sde -future -- ctest -L main -C Release --verbose --timeout 900

	- name: Determine tag name
	id: tag
	shell: bash
	run: \|
	BUILD_NUMBER="$(git rev-list --count HEAD)"
	SHORT_HASH="$(git rev-parse --short=7 HEAD)"
	if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
	echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
	else
	SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" \| tr '/' '-')
	echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
	fi

	- name: Pack artifacts
	id: pack_artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	run: \|
	Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
	7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*

	- name: Upload artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	uses: actions/upload-artifact@v4
	with:
	path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
	name: llama-bin-win-${{ matrix.build }}.zip

	ubuntu-latest-cmake-cuda:
	runs-on: ubuntu-latest
	container: nvidia/cuda:12.6.2-devel-ubuntu24.04

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Install dependencies
	env:
	DEBIAN_FRONTEND: noninteractive
	run: \|
	apt update
	apt install -y cmake build-essential ninja-build libgomp1 git

	- name: Build with CMake
	run: \|
	cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89-real -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined -DLLAMA_FATAL_WARNINGS=ON
	cmake --build build

	windows-2019-cmake-cuda:
	runs-on: windows-2019

	strategy:
	matrix:
	cuda: ['12.4', '11.7']
	build: ['cuda']

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Install Cuda Toolkit 11.7
	if: ${{ matrix.cuda == '11.7' }}
	run: \|
	mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
	choco install unzip -y
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
	unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
	echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" \| Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
	echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" \| Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
	echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" \| Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
	echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" \| Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8

	- name: Install Cuda Toolkit 12.4
	if: ${{ matrix.cuda == '12.4' }}
	run: \|
	mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
	choco install unzip -y
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
	curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
	unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
	xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
	echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" \| Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
	echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" \| Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
	echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" \| Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
	echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" \| Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8

	- name: Install ccache
	uses: hendrikmuhs/ccache-action@v1.2
	with:
	key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}

	- name: Install Ninja
	id: install_ninja
	run: \|
	choco install ninja

	- name: Build
	id: cmake_build
	shell: cmd
	run: \|
	call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
	cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DGGML_RPC=ON
	set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
	cmake --build build --config Release -j %NINJA_JOBS% -t ggml
	cmake --build build --config Release

	- name: Determine tag name
	id: tag
	shell: bash
	run: \|
	BUILD_NUMBER="$(git rev-list --count HEAD)"
	SHORT_HASH="$(git rev-parse --short=7 HEAD)"
	if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
	echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
	else
	SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" \| tr '/' '-')
	echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
	fi

	- name: Pack artifacts
	id: pack_artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	run: \|
	7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*

	- name: Upload artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	uses: actions/upload-artifact@v4
	with:
	path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
	name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip

	- name: Copy and pack Cuda runtime
	if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
	run: \|
	echo "Cuda install location: ${{ env.CUDA_PATH }}"
	$dst='.\build\bin\cudart\'
	robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_.dll cublas64_.dll cublasLt64_*.dll
	robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_.dll cublas64_.dll cublasLt64_*.dll
	7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*

	- name: Upload Cuda runtime
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	uses: actions/upload-artifact@v4
	with:
	path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
	name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip

	windows-latest-cmake-sycl:
	runs-on: windows-latest

	defaults:
	run:
	shell: bash

	env:
	WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
	WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
	ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Install
	run: \|
	scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL

	- name: Build
	id: cmake_build
	run: examples/sycl/win-build-sycl.bat

	- name: Determine tag name
	id: tag
	shell: bash
	run: \|
	BUILD_NUMBER="$(git rev-list --count HEAD)"
	SHORT_HASH="$(git rev-parse --short=7 HEAD)"
	if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
	echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
	else
	SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" \| tr '/' '-')
	echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
	fi

	- name: Build the release package
	id: pack_artifacts
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	run: \|
	echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"

	cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
	cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
	cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin

	cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
	cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
	cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
	cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin

	cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
	cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
	cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
	cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin

	cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
	cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin

	echo "cp oneAPI running time dll files to ./build/bin done"
	7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*

	- name: Upload the release package
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	uses: actions/upload-artifact@v4
	with:
	path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
	name: llama-bin-win-sycl-x64.zip

	windows-latest-cmake-hip:
	if: ${{ github.event.inputs.create_release != 'true' }}
	runs-on: windows-latest

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4

	- name: Install
	id: depends
	run: \|
	$ErrorActionPreference = "Stop"
	write-host "Downloading AMD HIP SDK Installer"
	Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
	write-host "Installing AMD HIP SDK"
	Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
	write-host "Completed AMD HIP SDK installation"

	- name: Verify ROCm
	id: verify
	run: \|
	& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version

	- name: Install ccache
	uses: hendrikmuhs/ccache-action@v1.2
	with:
	key: ${{ github.job }}

	- name: Build
	id: cmake_build
	run: \|
	$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' \| split-path \| split-path)
	$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
	cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
	cmake --build build -j ${env:NUMBER_OF_PROCESSORS}

	windows-latest-cmake-hip-release:
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}
	runs-on: windows-latest

	strategy:
	matrix:
	gpu_target: [gfx1100, gfx1101, gfx1030]

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Install
	id: depends
	run: \|
	$ErrorActionPreference = "Stop"
	write-host "Downloading AMD HIP SDK Installer"
	Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
	write-host "Installing AMD HIP SDK"
	Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
	write-host "Completed AMD HIP SDK installation"

	- name: Verify ROCm
	id: verify
	run: \|
	& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version

	- name: Build
	id: cmake_build
	run: \|
	$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' \| split-path \| split-path)
	$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
	cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
	cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
	md "build\bin\rocblas\library\"
	cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
	cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
	cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"

	- name: Determine tag name
	id: tag
	shell: bash
	run: \|
	BUILD_NUMBER="$(git rev-list --count HEAD)"
	SHORT_HASH="$(git rev-parse --short=7 HEAD)"
	if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
	echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
	else
	SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" \| tr '/' '-')
	echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
	fi

	- name: Pack artifacts
	id: pack_artifacts
	run: \|
	7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*

	- name: Upload artifacts
	uses: actions/upload-artifact@v4
	with:
	path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
	name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip

	ios-xcode-build:
	runs-on: macos-latest

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Build
	id: cmake_build
	run: \|
	sysctl -a
	mkdir build
	cd build
	cmake -G Xcode .. \
	-DGGML_METAL_USE_BF16=ON \
	-DGGML_METAL_EMBED_LIBRARY=ON \
	-DLLAMA_BUILD_EXAMPLES=OFF \
	-DLLAMA_BUILD_TESTS=OFF \
	-DLLAMA_BUILD_SERVER=OFF \
	-DCMAKE_SYSTEM_NAME=iOS \
	-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
	-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
	cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
	sudo cmake --install . --config Release

	- name: xcodebuild for swift package
	id: xcodebuild
	run: \|
	xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'

	- name: Build Xcode project
	run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build

	android-build:
	runs-on: ubuntu-latest

	steps:
	- name: Clone
	uses: actions/checkout@v4

	- name: Set up JDK
	uses: actions/setup-java@v3
	with:
	java-version: 17
	distribution: zulu

	- name: Setup Android SDK
	uses: android-actions/setup-android@v3
	with:
	log-accepted-android-sdk-licenses: false

	- name: Build
	run: \|
	cd examples/llama.android

	./gradlew build --no-daemon

	release:
	if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) \|\| github.event.inputs.create_release == 'true' }}

	runs-on: ubuntu-latest

	needs:
	- ubuntu-latest-cmake
	- macOS-latest-cmake
	- windows-latest-cmake
	- windows-2019-cmake-cuda
	- windows-latest-cmake-hip-release
	- macOS-latest-cmake-arm64
	- macOS-latest-cmake-x64

	steps:
	- name: Clone
	id: checkout
	uses: actions/checkout@v4
	with:
	fetch-depth: 0

	- name: Determine tag name
	id: tag
	shell: bash
	run: \|
	BUILD_NUMBER="$(git rev-list --count HEAD)"
	SHORT_HASH="$(git rev-parse --short=7 HEAD)"
	if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
	echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
	else
	SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" \| tr '/' '-')
	echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
	fi

	- name: Download artifacts
	id: download-artifact
	uses: actions/download-artifact@v4
	with:
	path: ./artifact

	- name: Move artifacts
	id: move_artifacts
	run: mkdir -p ./artifact/release && mv ./artifact//.zip ./artifact/release

	- name: Create release
	id: create_release
	uses: ggml-org/action-create-release@v1
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	with:
	tag_name: ${{ steps.tag.outputs.name }}

	- name: Upload release
	id: upload_release
	uses: actions/github-script@v3
	with:
	github-token: ${{secrets.GITHUB_TOKEN}}
	script: \|
	const path = require('path');
	const fs = require('fs');
	const release_id = '${{ steps.create_release.outputs.id }}';
	for (let file of await fs.readdirSync('./artifact/release')) {
	if (path.extname(file) === '.zip') {
	console.log('uploadReleaseAsset', file);
	await github.repos.uploadReleaseAsset({
	owner: context.repo.owner,
	repo: context.repo.repo,
	release_id: release_id,
	name: file,
	data: await fs.readFileSync(`./artifact/release/${file}`)
	});
	}
	}

	# ubuntu-latest-gcc:
	# runs-on: ubuntu-latest
	#
	# strategy:
	# matrix:
	# build: [Debug, Release]
	#
	# steps:
	# - name: Clone
	# uses: actions/checkout@v4
	#
	# - name: Dependencies
	# run: \|
	# sudo apt-get update
	# sudo apt-get install build-essential
	# sudo apt-get install cmake
	#
	# - name: Configure
	# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
	#
	# - name: Build
	# run: \|
	# make
	#
	# ubuntu-latest-clang:
	# runs-on: ubuntu-latest
	#
	# strategy:
	# matrix:
	# build: [Debug, Release]
	#
	# steps:
	# - name: Clone
	# uses: actions/checkout@v4
	#
	# - name: Dependencies
	# run: \|
	# sudo apt-get update
	# sudo apt-get install build-essential
	# sudo apt-get install cmake
	#
	# - name: Configure
	# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
	#
	# - name: Build
	# run: \|
	# make
	#
	# ubuntu-latest-gcc-sanitized:
	# runs-on: ubuntu-latest
	#
	# strategy:
	# matrix:
	# sanitizer: [ADDRESS, THREAD, UNDEFINED]
	#
	# steps:
	# - name: Clone
	# uses: actions/checkout@v4
	#
	# - name: Dependencies
	# run: \|
	# sudo apt-get update
	# sudo apt-get install build-essential
	# sudo apt-get install cmake
	#
	# - name: Configure
	# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
	#
	# - name: Build
	# run: \|
	# make
	#
	# windows:
	# runs-on: windows-latest
	#
	# strategy:
	# matrix:
	# build: [Release]
	# arch: [Win32, x64]
	# include:
	# - arch: Win32
	# s2arc: x86
	# - arch: x64
	# s2arc: x64
	#
	# steps:
	# - name: Clone
	# uses: actions/checkout@v4
	#
	# - name: Add msbuild to PATH
	# uses: microsoft/setup-msbuild@v1
	#
	# - name: Configure
	# run: >
	# cmake -S . -B ./build -A ${{ matrix.arch }}
	# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
	#
	# - name: Build
	# run: \|
	# cd ./build
	# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
	#
	# - name: Upload binaries
	# uses: actions/upload-artifact@v4
	# with:
	# name: llama-bin-${{ matrix.arch }}
	# path: build/bin/${{ matrix.build }}
	#
	# windows-blas:
	# runs-on: windows-latest
	#
	# strategy:
	# matrix:
	# build: [Release]
	# arch: [Win32, x64]
	# blas: [ON]
	# include:
	# - arch: Win32
	# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
	# s2arc: x86
	# - arch: x64
	# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
	# s2arc: x64
	#
	# steps:
	# - name: Clone
	# uses: actions/checkout@v4
	#
	# - name: Add msbuild to PATH
	# uses: microsoft/setup-msbuild@v1
	#
	# - name: Fetch OpenBLAS
	# if: matrix.blas == 'ON'
	# run: \|
	# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
	# 7z x blas.zip -oblas -y
	# copy blas/include/cblas.h .
	# copy blas/include/openblas_config.h .
	# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
	#
	# - name: Configure
	# run: >
	# cmake -S . -B ./build -A ${{ matrix.arch }}
	# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
	# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
	# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
	#
	# - name: Build
	# run: \|
	# cd ./build
	# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
	#
	# - name: Copy libopenblas.dll
	# if: matrix.blas == 'ON'
	# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
	#
	# - name: Upload binaries
	# if: matrix.blas == 'ON'
	# uses: actions/upload-artifact@v4
	# with:
	# name: llama-blas-bin-${{ matrix.arch }}
	# path: build/bin/${{ matrix.build }}
	#
	# emscripten:
	# runs-on: ubuntu-latest
	#
	# strategy:
	# matrix:
	# build: [Release]
	#
	# steps:
	# - name: Clone
	# uses: actions/checkout@v4
	#
	# - name: Dependencies
	# run: \|
	# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
	# tar -xvf master.tar.gz
	# emsdk-master/emsdk update
	# emsdk-master/emsdk install latest
	# emsdk-master/emsdk activate latest
	#
	# - name: Configure
	# run: echo "tmp"
	#
	# - name: Build
	# run: \|
	# pushd emsdk-master
	# source ./emsdk_env.sh
	# popd
	# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
	# make

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Added the ability to use guide tokens for OuteTTS, greatly improving TTS recitation accuracy over long input sequences. #18421

Workflow file

Added the ability to use guide tokens for OuteTTS, greatly improving TTS recitation accuracy over long input sequences. #18421

Jobs

Run details

Workflow file for this run