From 2e53df7b44371b91ed28bb90b1a78d9f854ba69c Mon Sep 17 00:00:00 2001 From: Philip Taron Date: Fri, 22 Dec 2023 12:33:09 -0800 Subject: [PATCH] flake.nix: rewrite 1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one. --- apps.nix | 14 ++++ devshells.nix | 32 +++++++++ flake.lock | 40 +----------- flake.nix | 178 +++++++++++++------------------------------------- llama-cpp.nix | 118 +++++++++++++++++++++++++++++++++ overlays.nix | 20 ++++++ 6 files changed, 231 insertions(+), 171 deletions(-) create mode 100644 apps.nix create mode 100644 devshells.nix create mode 100644 llama-cpp.nix create mode 100644 overlays.nix diff --git a/apps.nix b/apps.nix new file mode 100644 index 00000000000000..29774220a6c783 --- /dev/null +++ b/apps.nix @@ -0,0 +1,14 @@ +names: pkgs: + +let + default = builtins.elemAt names 0; + mkApp = name: { + ${name} = { + type = "app"; + program = "${pkgs.llama-cpp}/bin/${name}"; + }; + }; + result = builtins.foldl' (acc: name: (mkApp name) // acc) {} names; +in + +result // { default = result.${default}; } diff --git a/devshells.nix b/devshells.nix new file mode 100644 index 00000000000000..1a2bbe545e1bcb --- /dev/null +++ b/devshells.nix @@ -0,0 +1,32 @@ +pkgs: + +let + llama-python = pkgs.python3.withPackages (ps: [ + ps.numpy + ps.sentencepiece + ]); + + # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime + llama-python-extra = pkgs.python3.withPackages (ps: [ + ps.numpy + ps.sentencepiece + ps.torchWithoutCuda + ps.transformers + ]); +in + +{ + default = pkgs.mkShell { + name = "default"; + description = "contains numpy and sentencepiece"; + inputsFrom = [ pkgs.llama-cpp ]; + buildInputs = [ llama-python ]; + }; + + extra = pkgs.mkShell { + name = "extra"; + description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; + inputsFrom = [ pkgs.llama-cpp ]; + buildInputs = [ llama-python-extra ]; + }; +} diff --git a/flake.lock b/flake.lock index 0455f65617a2dc..fdcd6d411b3240 100644 --- a/flake.lock +++ b/flake.lock @@ -1,30 +1,12 @@ { "nodes": { - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1694529238, - "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "ff7b65b44d01cf9ba6a71320833626af21126384", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, "nixpkgs": { "locked": { - "lastModified": 1698318101, - "narHash": "sha256-gUihHt3yPD7bVqg+k/UVHgngyaJ3DMEBchbymBMvK1E=", + "lastModified": 1703013332, + "narHash": "sha256-+tFNwMvlXLbJZXiMHqYq77z/RfmpfpiI3yjL6o/Zo9M=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "63678e9f3d3afecfeafa0acead6239cdb447574c", + "rev": "54aac082a4d9bb5bbc5c4e899603abfb76a3f6d6", "type": "github" }, "original": { @@ -36,24 +18,8 @@ }, "root": { "inputs": { - "flake-utils": "flake-utils", "nixpkgs": "nixpkgs" } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } } }, "root": "root", diff --git a/flake.nix b/flake.nix index 4cf28d5c11c0fd..08d82dba03afd9 100644 --- a/flake.nix +++ b/flake.nix @@ -1,139 +1,49 @@ { inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; - flake-utils.url = "github:numtide/flake-utils"; }; - outputs = { self, nixpkgs, flake-utils }: - flake-utils.lib.eachDefaultSystem (system: - let - name = "llama.cpp"; - src = ./.; - meta.mainProgram = "llama"; - inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin; - buildInputs = with pkgs; [ openmpi ]; - osSpecific = with pkgs; buildInputs ++ ( - if isAarch64 && isDarwin then - with pkgs.darwin.apple_sdk_11_0.frameworks; [ - Accelerate - MetalKit - ] - else if isAarch32 && isDarwin then - with pkgs.darwin.apple_sdk.frameworks; [ - Accelerate - CoreGraphics - CoreVideo - ] - else if isDarwin then - with pkgs.darwin.apple_sdk.frameworks; [ - Accelerate - CoreGraphics - CoreVideo - ] - else - with pkgs; [ openblas ] - ); - pkgs = import nixpkgs { inherit system; }; - nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ]; - cudatoolkit_joined = with pkgs; symlinkJoin { - # HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit - # see https://github.com/NixOS/nixpkgs/issues/224291 - # copied from jaxlib - name = "${cudaPackages.cudatoolkit.name}-merged"; - paths = [ - cudaPackages.cudatoolkit.lib - cudaPackages.cudatoolkit.out - ] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [ - # for some reason some of the required libs are in the targets/x86_64-linux - # directory; not sure why but this works around it - "${cudaPackages.cudatoolkit}/targets/${system}" - ]; - }; - llama-python = - pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]); - # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime - llama-python-extra = - pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]); - postPatch = '' - substituteInPlace ./ggml-metal.m \ - --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" - substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python' - ''; - postInstall = '' - mv $out/bin/main $out/bin/llama - mv $out/bin/server $out/bin/llama-server - mkdir -p $out/include - cp ${src}/llama.h $out/include/ - ''; - cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ]; - in - { - packages.default = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = osSpecific; - cmakeFlags = cmakeFlags - ++ (if isAarch64 && isDarwin then [ - "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" - "-DLLAMA_METAL=ON" - ] else [ - "-DLLAMA_BLAS=ON" - "-DLLAMA_BLAS_VENDOR=OpenBLAS" - ]); - }; - packages.opencl = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = with pkgs; buildInputs ++ [ clblast ]; - cmakeFlags = cmakeFlags ++ [ - "-DLLAMA_CLBLAST=ON" - ]; - }; - packages.cuda = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ]; - cmakeFlags = cmakeFlags ++ [ - "-DLLAMA_CUBLAS=ON" - ]; - }; - packages.rocm = pkgs.stdenv.mkDerivation { - inherit name src meta postPatch nativeBuildInputs postInstall; - buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ]; - cmakeFlags = cmakeFlags ++ [ - "-DLLAMA_HIPBLAS=1" - "-DCMAKE_C_COMPILER=hipcc" - "-DCMAKE_CXX_COMPILER=hipcc" - # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM - # in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt - # and select the line that matches the current nixpkgs version of rocBLAS. - "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" - ]; - }; - apps.llama-server = { - type = "app"; - program = "${self.packages.${system}.default}/bin/llama-server"; - }; - apps.llama-embedding = { - type = "app"; - program = "${self.packages.${system}.default}/bin/embedding"; - }; - apps.llama = { - type = "app"; - program = "${self.packages.${system}.default}/bin/llama"; - }; - apps.quantize = { - type = "app"; - program = "${self.packages.${system}.default}/bin/quantize"; - }; - apps.train-text-from-scratch = { - type = "app"; - program = "${self.packages.${system}.default}/bin/train-text-from-scratch"; - }; - apps.default = self.apps.${system}.llama; - devShells.default = pkgs.mkShell { - buildInputs = [ llama-python ]; - packages = nativeBuildInputs ++ osSpecific; - }; - devShells.extra = pkgs.mkShell { - buildInputs = [ llama-python-extra ]; - packages = nativeBuildInputs ++ osSpecific; - }; - }); + + outputs = { self, nixpkgs }: + + let + inherit (nixpkgs.lib) genAttrs; + overlays = import ./overlays.nix; + importNixpkgs = system: import nixpkgs { + inherit system; + overlays = [ overlays ]; + }; + systems = [ "aarch64-darwin" "aarch64-linux" "x86_64-darwin" "x86_64-linux" ]; + withSystemPackages = f: genAttrs systems (system: f (importNixpkgs system)); + in + + { + # These define the various ways to build the llama.cpp project. + # Integrate them into your flake.nix configuration by adding this + # overlay to nixpkgs.overlays. + overlays = { + default = overlays; + }; + + # These use the definitions from ./overlays.nix and expose them as installables. + packages = withSystemPackages (pkgs: { + default = pkgs.llama-cpp; + opencl = pkgs.llama-cpp-opencl; + cuda = pkgs.llama-cpp-cuda; + rocm = pkgs.llama-cpp-rocm; + }); + + # These use the definition of llama-cpp from ./overlays.nix and expose various + # binaries as apps so that they're able to be run with `nix run`. + apps = withSystemPackages (import ./apps.nix [ + "llama" + "llama-embedding" + "llama-server" + "quantize" + "train-text-from-scratch" + ]); + + # These expose a build environment for either a "default" or an "extra" set of + # dependencies. + devShells = withSystemPackages (import ./devshells.nix); + }; } diff --git a/llama-cpp.nix b/llama-cpp.nix new file mode 100644 index 00000000000000..1191d33df122bf --- /dev/null +++ b/llama-cpp.nix @@ -0,0 +1,118 @@ +{ lib +, stdenv +, cmake +, ninja +, pkg-config +, symlinkJoin +, python3 +, openmpi +, openblas +, cudaPackages +, rocmPackages +, clblast +, Accelerate ? null +, MetalKit ? null +, CoreVideo ? null +, CoreGraphics ? null +, useOpenCL ? false +, useCuda ? false +, useRocm ? false +}: + +let + inherit (lib) optional optionals versionOlder; + isDefault = !useOpenCL && !useCuda && !useRocm; + + # Give a little description difference between the flavors. + descriptionSuffix = if useOpenCL then + " (OpenCL accelerated)" + else if useCuda then + " (CUDA accelerated)" + else if useRocm then + " (ROCm accelerated)" + else if (MetalKit != null) then + " (MetalKit accelerated)" + else ""; + + llama-python = python3.withPackages (ps: [ ps.numpy ps.sentencepiece ]); + + # See ./overlay.nix for where these dependencies are passed in. + defaultBuildInputs = builtins.filter (p: p != null) [ + Accelerate + MetalKit + CoreVideo + CoreGraphics + ]; + + cudaBuildInput = symlinkJoin (with cudaPackages; { + # HACK(Green-Sky): Nix currently has issues with CMake findcudatoolkit + # see https://github.com/NixOS/nixpkgs/issues/224291 + # copied from jaxlib + name = "${cudatoolkit.name}-merged"; + paths = [ + cudatoolkit.lib + cudatoolkit.out + ] ++ optionals (versionOlder cudatoolkit.version "11") [ + # For some reason, some of the required libs are in the targets/x86_64-linux + # directory; not sure why, but this works around it. + "${cudatoolkit}/targets/${stdenv.system}" + ]; + }); + + rocmBuildInputs = with rocmPackages; [ clr hipblas rocblas ]; +in + +stdenv.mkDerivation { + name = "llama.cpp"; + src = ./.; + meta = { + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; + mainProgram = "llama"; + }; + + postPatch = '' + substituteInPlace ./ggml-metal.m \ + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" + substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" + ''; + + nativeBuildInputs = [ cmake ninja pkg-config ]; + + buildInputs = [ openmpi ] + ++ optional useOpenCL clblast + ++ optional useCuda cudaBuildInput + ++ optionals useRocm rocmBuildInputs + ++ optionals isDefault defaultBuildInputs; + + cmakeFlags = [ + "-DLLAMA_NATIVE=OFF" + "-DLLAMA_BUILD_SERVER=ON" + "-DBUILD_SHARED_LIBS=ON" + "-DCMAKE_SKIP_BUILD_RPATH=ON" + ] + ++ optional useOpenCL "-DLLAMA_CLBLAST=ON" + ++ optional useCuda "-DLLAMA_CUBLAS=ON" + ++ optionals useRocm [ + "-DLLAMA_HIPBLAS=1" + "-DCMAKE_C_COMPILER=hipcc" + "-DCMAKE_CXX_COMPILER=hipcc" + # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM + # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt + # and select the line that matches the current nixpkgs version of rocBLAS. + "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" + ] + ++ optionals isDefault (if (MetalKit != null) then [ + "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" + "-DLLAMA_METAL=ON" + ] else [ + "-DLLAMA_BLAS=ON" + "-DLLAMA_BLAS_VENDOR=OpenBLAS" + ]); + + postInstall = '' + mv $out/bin/main $out/bin/llama + mv $out/bin/server $out/bin/llama-server + mkdir -p $out/include + cp $src/llama.h $out/include/ + ''; +} diff --git a/overlays.nix b/overlays.nix new file mode 100644 index 00000000000000..0292576ab4db0e --- /dev/null +++ b/overlays.nix @@ -0,0 +1,20 @@ +final: prev: + +let + inherit (prev.stdenv) isAarch32 isAarch64 isDarwin; + + darwinSpecific = if isAarch64 then { + inherit (prev.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit; + } else { + inherit (prev.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo; + }; + + osSpecific = if isDarwin then darwinSpecific else {}; +in + +{ + llama-cpp = prev.callPackage ./llama-cpp.nix osSpecific; + llama-cpp-opencl = prev.callPackage ./llama-cpp.nix (osSpecific // { useOpenCL = true; }); + llama-cpp-cuda = prev.callPackage ./llama-cpp.nix (osSpecific // { useCuda = true; }); + llama-cpp-rocm = prev.callPackage ./llama-cpp.nix (osSpecific // { useRocm = true; }); +}