Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUDA backend #2310

Merged
merged 52 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
dc03f81
remove lingering references to SOM license
cebtenzzre Apr 29, 2024
c482bae
backend: bring some upstream changes into llama.cpp.cmake
cebtenzzre Apr 29, 2024
1684f7b
backend: update llama.cpp and list of supported models
cebtenzzre Apr 30, 2024
dc8cd8c
backend: bring more upstream changes into llama.cpp.cmake
cebtenzzre Apr 30, 2024
75d05c5
llamamodel: remove dependency on internal llama_token_to_piece
cebtenzzre Apr 30, 2024
1ebd3cf
backend: initial port to Occam's Vulkan backend
cebtenzzre Apr 30, 2024
9f92731
vulkan: improve the implementation
cebtenzzre May 2, 2024
ce2164e
backend: also build CUDA backend by default
cebtenzzre May 2, 2024
011935e
backend: make CUDA build useful
cebtenzzre May 2, 2024
9e457bf
backend: add option to build ROCm backend
cebtenzzre May 2, 2024
c47900e
cuda: implement device enumeration
cebtenzzre May 6, 2024
fef041c
cmake: don't build CPU variant on Linux/Windows
cebtenzzre May 6, 2024
bbe5cc0
llmodel: add a backend field to LLModel::GPUDevice
cebtenzzre May 6, 2024
dc334ae
llmodel: select a backend, not a build variant
cebtenzzre May 6, 2024
b54151f
llmodel: list GPU devices from all backends
cebtenzzre May 6, 2024
d4feaeb
python: implement selectable GPU backend
cebtenzzre May 6, 2024
1a10587
chat: implement basic UI backend selection
cebtenzzre May 6, 2024
a9ffe5a
cmake: set RUNPATH of llamamodel-mainline-cuda correctly
cebtenzzre May 7, 2024
d30d5b2
backend: fix kompute-avxonly build
cebtenzzre May 7, 2024
16170f4
cuda: fix dependency bundling on Windows
cebtenzzre May 7, 2024
2417105
ci: install CUDA toolkit
cebtenzzre May 7, 2024
9e8f7c3
cuda: ignore libcuda.so.* on Linux
cebtenzzre May 7, 2024
9e54277
ci: install additional deps to make linuxdeployqt happy
cebtenzzre May 7, 2024
335b129
cmake: do not ship static llama library
cebtenzzre May 8, 2024
b14992c
llama.cpp: do not install kompute or fmt
cebtenzzre May 8, 2024
2b5d6d3
cmake: let linuxdeployqt handle CUDA deps on Linux
cebtenzzre May 8, 2024
831a146
cmake: install llmodel to lib/ on Linux and bin/ on Windows
cebtenzzre May 8, 2024
eec9ec9
cmake: do not install import libraries on Windows
cebtenzzre May 8, 2024
9eaa326
kompute: use slightly newer Vulkan headers to avoid installation
cebtenzzre May 8, 2024
004a262
llama.cpp: rebase onto latest master
cebtenzzre May 8, 2024
f1558d7
chat: specify backend of reported device
cebtenzzre May 8, 2024
fe78377
Merge branch 'main' into add-cuda-support
cebtenzzre May 8, 2024
0e3d90a
cmake: fix upstream spelling error
cebtenzzre May 8, 2024
530b224
chat: give the "force metal" option a chance of working
cebtenzzre May 8, 2024
2e0e4fc
chat: make device selection meaningful on macOS
cebtenzzre May 8, 2024
bb0402b
cmake: remove an old reference to libbert-*.dylib
cebtenzzre May 8, 2024
0069196
llama.cpp: sync with upstream for CUDA graphs
cebtenzzre May 9, 2024
ad0c3ea
Merge branch 'main' into add-cuda-support
cebtenzzre May 9, 2024
6394494
backend: use "cpu" impl if "kompute" is not found
cebtenzzre May 9, 2024
553fc89
python: fix documentation for device parameter
cebtenzzre May 9, 2024
035ea59
cmake: fix chat install if built without Kompute or CUDA
cebtenzzre May 9, 2024
85b9e2f
cmake: clearly indicate how to build without CUDA/Kompute on failure
cebtenzzre May 9, 2024
c9b6732
llmodel: fix compile errors
cebtenzzre May 9, 2024
41ce930
metal: copy default.metallib, not ggml-metal.metal
cebtenzzre May 9, 2024
14588bc
llamamodel: update model whitelist
cebtenzzre May 10, 2024
21bc8c8
build_and_run: mention compiler and CUDA
cebtenzzre May 11, 2024
0cbca27
settings: prefix vk devices with "Vulkan: ", update old names
cebtenzzre May 13, 2024
8dbe93d
kompute: fix device name leaks
cebtenzzre May 13, 2024
dbf38b2
chat: bump version to 2.8.0
cebtenzzre May 15, 2024
1c09b6d
Merge branch 'main' into add-cuda-support
cebtenzzre May 15, 2024
c7f8c93
python: update README to reflect CUDA build dependency
cebtenzzre May 15, 2024
5875d83
python: bump version for CUDA support
cebtenzzre May 15, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 41 additions & 4 deletions .circleci/continue_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ jobs:
command: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev vulkan-sdk patchelf
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev vulkan-sdk patchelf cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
- run:
name: Installing Qt
command: |
Expand All @@ -121,6 +123,7 @@ jobs:
set -eo pipefail
export CMAKE_PREFIX_PATH=~/Qt/6.5.1/gcc_64/lib/cmake
export PATH=$PATH:$HOME/Qt/Tools/QtInstallerFramework/4.7/bin
export PATH=$PATH:/usr/local/cuda/bin
mkdir build
cd build
mkdir upload
Expand Down Expand Up @@ -162,6 +165,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Build
command: |
Expand Down Expand Up @@ -218,7 +226,9 @@ jobs:
command: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev vulkan-sdk
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt update && sudo apt install -y libfontconfig1 libfreetype6 libx11-6 libx11-xcb1 libxext6 libxfixes3 libxi6 libxrender1 libxcb1 libxcb-cursor0 libxcb-glx0 libxcb-keysyms1 libxcb-image0 libxcb-shm0 libxcb-icccm4 libxcb-sync1 libxcb-xfixes0 libxcb-shape0 libxcb-randr0 libxcb-render-util0 libxcb-util1 libxcb-xinerama0 libxcb-xkb1 libxkbcommon0 libxkbcommon-x11-0 bison build-essential flex gperf python3 gcc g++ libgl1-mesa-dev libwayland-dev vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
- run:
name: Installing Qt
command: |
Expand All @@ -235,6 +245,7 @@ jobs:
name: Build
command: |
export CMAKE_PREFIX_PATH=~/Qt/6.5.1/gcc_64/lib/cmake
export PATH=$PATH:/usr/local/cuda/bin
~/Qt/Tools/CMake/bin/cmake -DCMAKE_BUILD_TYPE=Release -S gpt4all-chat -B build
~/Qt/Tools/CMake/bin/cmake --build build --target all

Expand Down Expand Up @@ -269,6 +280,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Build
command: |
Expand Down Expand Up @@ -394,12 +410,15 @@ jobs:
command: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cmake build-essential vulkan-sdk
sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
pip install setuptools wheel cmake
- run:
name: Build C library
command: |
export PATH=$PATH:/usr/local/cuda/bin
git submodule update --init --recursive
cd gpt4all-backend
cmake -B build
Expand Down Expand Up @@ -459,6 +478,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Install dependencies
command:
Expand Down Expand Up @@ -530,11 +554,14 @@ jobs:
command: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install -y cmake build-essential vulkan-sdk
sudo apt-get install -y cmake build-essential vulkan-sdk cuda-compiler-12-4 libcublas-dev-12-4 libnvidia-compute-550-server libmysqlclient21 libodbc2 libpq5
- run:
name: Build Libraries
command: |
export PATH=$PATH:/usr/local/cuda/bin
cd gpt4all-backend
mkdir -p runtimes/build
cd runtimes/build
Expand Down Expand Up @@ -599,6 +626,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Install dependencies
command: |
Expand Down Expand Up @@ -642,6 +674,11 @@ jobs:
command: |
Invoke-WebRequest -Uri https://sdk.lunarg.com/sdk/download/1.3.261.1/windows/VulkanSDK-1.3.261.1-Installer.exe -OutFile VulkanSDK-1.3.261.1-Installer.exe
.\VulkanSDK-1.3.261.1-Installer.exe --accept-licenses --default-answer --confirm-command install
- run:
name: Install CUDA Toolkit
command: |
Invoke-WebRequest -Uri https://developer.download.nvidia.com/compute/cuda/12.4.1/network_installers/cuda_12.4.1_windows_network.exe -OutFile cuda_12.4.1_windows_network.exe
.\cuda_12.4.1_windows_network.exe -s cudart_12.4 nvcc_12.4 cublas_12.4 cublas_dev_12.4
- run:
name: Install dependencies
command: |
Expand Down
30 changes: 0 additions & 30 deletions LICENSE_SOM.txt

This file was deleted.

80 changes: 63 additions & 17 deletions gpt4all-backend/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,23 @@ cmake_minimum_required(VERSION 3.16)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

if(APPLE)
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
if(BUILD_UNIVERSAL)
if (APPLE)
option(BUILD_UNIVERSAL "Build a Universal binary on macOS" ON)
else()
option(LLMODEL_KOMPUTE "llmodel: use Kompute" ON)
option(LLMODEL_VULKAN "llmodel: use Vulkan" OFF)
option(LLMODEL_CUDA "llmodel: use CUDA" ON)
option(LLMODEL_ROCM "llmodel: use ROCm" OFF)
endif()

if (APPLE)
if (BUILD_UNIVERSAL)
# Build a Universal binary on macOS
# This requires that the found Qt library is compiled as Universal binaries.
set(CMAKE_OSX_ARCHITECTURES "arm64;x86_64" CACHE STRING "" FORCE)
else()
# Build for the host architecture on macOS
if(NOT CMAKE_OSX_ARCHITECTURES)
if (NOT CMAKE_OSX_ARCHITECTURES)
set(CMAKE_OSX_ARCHITECTURES "${CMAKE_HOST_SYSTEM_PROCESSOR}" CACHE STRING "" FORCE)
endif()
endif()
Expand Down Expand Up @@ -39,36 +47,70 @@ else()
message(STATUS "Interprocedural optimization support detected")
endif()

set(DIRECTORY llama.cpp-mainline)
include(llama.cpp.cmake)

set(BUILD_VARIANTS default avxonly)
if (${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
set(BUILD_VARIANTS ${BUILD_VARIANTS} metal)
set(BUILD_VARIANTS)
set(GPTJ_BUILD_VARIANT cpu)
if (APPLE)
list(APPEND BUILD_VARIANTS metal)
endif()
if (LLMODEL_KOMPUTE)
list(APPEND BUILD_VARIANTS kompute kompute-avxonly)
set(GPTJ_BUILD_VARIANT kompute)
else()
list(PREPEND BUILD_VARIANTS cpu cpu-avxonly)
endif()
if (LLMODEL_VULKAN)
list(APPEND BUILD_VARIANTS vulkan vulkan-avxonly)
endif()
if (LLMODEL_CUDA)
include(CheckLanguage)
check_language(CUDA)
if (NOT CMAKE_CUDA_COMPILER)
message(WARNING "CUDA Toolkit not found. To build without CUDA, use -DLLMODEL_CUDA=OFF.")
endif()
enable_language(CUDA)
list(APPEND BUILD_VARIANTS cuda cuda-avxonly)
endif()
if (LLMODEL_ROCM)
enable_language(HIP)
list(APPEND BUILD_VARIANTS rocm rocm-avxonly)
endif()

set(CMAKE_VERBOSE_MAKEFILE ON)

# Go through each build variant
foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
# Determine flags
if (BUILD_VARIANT STREQUAL avxonly)
set(GPT4ALL_ALLOW_NON_AVX NO)
if (BUILD_VARIANT MATCHES avxonly)
set(GPT4ALL_ALLOW_NON_AVX OFF)
else()
set(GPT4ALL_ALLOW_NON_AVX YES)
set(GPT4ALL_ALLOW_NON_AVX ON)
endif()
set(LLAMA_AVX2 ${GPT4ALL_ALLOW_NON_AVX})
set(LLAMA_F16C ${GPT4ALL_ALLOW_NON_AVX})
set(LLAMA_FMA ${GPT4ALL_ALLOW_NON_AVX})

if (BUILD_VARIANT STREQUAL metal)
set(LLAMA_METAL YES)
else()
set(LLAMA_METAL NO)
set(LLAMA_METAL OFF)
set(LLAMA_KOMPUTE OFF)
set(LLAMA_VULKAN OFF)
set(LLAMA_CUDA OFF)
set(LLAMA_ROCM OFF)
if (BUILD_VARIANT MATCHES metal)
set(LLAMA_METAL ON)
elseif (BUILD_VARIANT MATCHES kompute)
set(LLAMA_KOMPUTE ON)
elseif (BUILD_VARIANT MATCHES vulkan)
set(LLAMA_VULKAN ON)
elseif (BUILD_VARIANT MATCHES cuda)
set(LLAMA_CUDA ON)
elseif (BUILD_VARIANT MATCHES rocm)
set(LLAMA_HIPBLAS ON)
endif()

# Include GGML
set(LLAMA_K_QUANTS YES)
include_ggml(llama.cpp-mainline -mainline-${BUILD_VARIANT} ON)
include_ggml(-mainline-${BUILD_VARIANT})

# Function for preparing individual implementations
function(prepare_target TARGET_NAME BASE_LIB)
Expand All @@ -93,11 +135,15 @@ foreach(BUILD_VARIANT IN LISTS BUILD_VARIANTS)
LLAMA_VERSIONS=>=3 LLAMA_DATE=999999)
prepare_target(llamamodel-mainline llama-mainline)

if (NOT LLAMA_METAL)
if (BUILD_VARIANT MATCHES ${GPTJ_BUILD_VARIANT})
add_library(gptj-${BUILD_VARIANT} SHARED
gptj.cpp utils.h utils.cpp llmodel_shared.cpp llmodel_shared.h)
prepare_target(gptj llama-mainline)
endif()

if (BUILD_VARIANT STREQUAL cuda)
set(CUDAToolkit_BIN_DIR ${CUDAToolkit_BIN_DIR} PARENT_SCOPE)
endif()
endforeach()

add_library(llmodel
Expand Down
2 changes: 1 addition & 1 deletion gpt4all-backend/llama.cpp-mainline
Loading
Loading