-
Notifications
You must be signed in to change notification settings - Fork 10.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one.
- Loading branch information
1 parent
7082d24
commit 2e53df7
Showing
6 changed files
with
231 additions
and
171 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
names: pkgs: | ||
|
||
let | ||
default = builtins.elemAt names 0; | ||
mkApp = name: { | ||
${name} = { | ||
type = "app"; | ||
program = "${pkgs.llama-cpp}/bin/${name}"; | ||
}; | ||
}; | ||
result = builtins.foldl' (acc: name: (mkApp name) // acc) {} names; | ||
in | ||
|
||
result // { default = result.${default}; } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
pkgs: | ||
|
||
let | ||
llama-python = pkgs.python3.withPackages (ps: [ | ||
ps.numpy | ||
ps.sentencepiece | ||
]); | ||
|
||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime | ||
llama-python-extra = pkgs.python3.withPackages (ps: [ | ||
ps.numpy | ||
ps.sentencepiece | ||
ps.torchWithoutCuda | ||
ps.transformers | ||
]); | ||
in | ||
|
||
{ | ||
default = pkgs.mkShell { | ||
name = "default"; | ||
description = "contains numpy and sentencepiece"; | ||
inputsFrom = [ pkgs.llama-cpp ]; | ||
buildInputs = [ llama-python ]; | ||
}; | ||
|
||
extra = pkgs.mkShell { | ||
name = "extra"; | ||
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; | ||
inputsFrom = [ pkgs.llama-cpp ]; | ||
buildInputs = [ llama-python-extra ]; | ||
}; | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,139 +1,49 @@ | ||
{ | ||
inputs = { | ||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; | ||
flake-utils.url = "github:numtide/flake-utils"; | ||
}; | ||
outputs = { self, nixpkgs, flake-utils }: | ||
flake-utils.lib.eachDefaultSystem (system: | ||
let | ||
name = "llama.cpp"; | ||
src = ./.; | ||
meta.mainProgram = "llama"; | ||
inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin; | ||
buildInputs = with pkgs; [ openmpi ]; | ||
osSpecific = with pkgs; buildInputs ++ ( | ||
if isAarch64 && isDarwin then | ||
with pkgs.darwin.apple_sdk_11_0.frameworks; [ | ||
Accelerate | ||
MetalKit | ||
] | ||
else if isAarch32 && isDarwin then | ||
with pkgs.darwin.apple_sdk.frameworks; [ | ||
Accelerate | ||
CoreGraphics | ||
CoreVideo | ||
] | ||
else if isDarwin then | ||
with pkgs.darwin.apple_sdk.frameworks; [ | ||
Accelerate | ||
CoreGraphics | ||
CoreVideo | ||
] | ||
else | ||
with pkgs; [ openblas ] | ||
); | ||
pkgs = import nixpkgs { inherit system; }; | ||
nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ]; | ||
cudatoolkit_joined = with pkgs; symlinkJoin { | ||
# HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit | ||
# see https://github.com/NixOS/nixpkgs/issues/224291 | ||
# copied from jaxlib | ||
name = "${cudaPackages.cudatoolkit.name}-merged"; | ||
paths = [ | ||
cudaPackages.cudatoolkit.lib | ||
cudaPackages.cudatoolkit.out | ||
] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [ | ||
# for some reason some of the required libs are in the targets/x86_64-linux | ||
# directory; not sure why but this works around it | ||
"${cudaPackages.cudatoolkit}/targets/${system}" | ||
]; | ||
}; | ||
llama-python = | ||
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]); | ||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime | ||
llama-python-extra = | ||
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]); | ||
postPatch = '' | ||
substituteInPlace ./ggml-metal.m \ | ||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | ||
substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python' | ||
''; | ||
postInstall = '' | ||
mv $out/bin/main $out/bin/llama | ||
mv $out/bin/server $out/bin/llama-server | ||
mkdir -p $out/include | ||
cp ${src}/llama.h $out/include/ | ||
''; | ||
cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ]; | ||
in | ||
{ | ||
packages.default = pkgs.stdenv.mkDerivation { | ||
inherit name src meta postPatch nativeBuildInputs postInstall; | ||
buildInputs = osSpecific; | ||
cmakeFlags = cmakeFlags | ||
++ (if isAarch64 && isDarwin then [ | ||
"-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" | ||
"-DLLAMA_METAL=ON" | ||
] else [ | ||
"-DLLAMA_BLAS=ON" | ||
"-DLLAMA_BLAS_VENDOR=OpenBLAS" | ||
]); | ||
}; | ||
packages.opencl = pkgs.stdenv.mkDerivation { | ||
inherit name src meta postPatch nativeBuildInputs postInstall; | ||
buildInputs = with pkgs; buildInputs ++ [ clblast ]; | ||
cmakeFlags = cmakeFlags ++ [ | ||
"-DLLAMA_CLBLAST=ON" | ||
]; | ||
}; | ||
packages.cuda = pkgs.stdenv.mkDerivation { | ||
inherit name src meta postPatch nativeBuildInputs postInstall; | ||
buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ]; | ||
cmakeFlags = cmakeFlags ++ [ | ||
"-DLLAMA_CUBLAS=ON" | ||
]; | ||
}; | ||
packages.rocm = pkgs.stdenv.mkDerivation { | ||
inherit name src meta postPatch nativeBuildInputs postInstall; | ||
buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ]; | ||
cmakeFlags = cmakeFlags ++ [ | ||
"-DLLAMA_HIPBLAS=1" | ||
"-DCMAKE_C_COMPILER=hipcc" | ||
"-DCMAKE_CXX_COMPILER=hipcc" | ||
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM | ||
# in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt | ||
# and select the line that matches the current nixpkgs version of rocBLAS. | ||
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" | ||
]; | ||
}; | ||
apps.llama-server = { | ||
type = "app"; | ||
program = "${self.packages.${system}.default}/bin/llama-server"; | ||
}; | ||
apps.llama-embedding = { | ||
type = "app"; | ||
program = "${self.packages.${system}.default}/bin/embedding"; | ||
}; | ||
apps.llama = { | ||
type = "app"; | ||
program = "${self.packages.${system}.default}/bin/llama"; | ||
}; | ||
apps.quantize = { | ||
type = "app"; | ||
program = "${self.packages.${system}.default}/bin/quantize"; | ||
}; | ||
apps.train-text-from-scratch = { | ||
type = "app"; | ||
program = "${self.packages.${system}.default}/bin/train-text-from-scratch"; | ||
}; | ||
apps.default = self.apps.${system}.llama; | ||
devShells.default = pkgs.mkShell { | ||
buildInputs = [ llama-python ]; | ||
packages = nativeBuildInputs ++ osSpecific; | ||
}; | ||
devShells.extra = pkgs.mkShell { | ||
buildInputs = [ llama-python-extra ]; | ||
packages = nativeBuildInputs ++ osSpecific; | ||
}; | ||
}); | ||
|
||
outputs = { self, nixpkgs }: | ||
|
||
let | ||
inherit (nixpkgs.lib) genAttrs; | ||
overlays = import ./overlays.nix; | ||
importNixpkgs = system: import nixpkgs { | ||
inherit system; | ||
overlays = [ overlays ]; | ||
}; | ||
systems = [ "aarch64-darwin" "aarch64-linux" "x86_64-darwin" "x86_64-linux" ]; | ||
withSystemPackages = f: genAttrs systems (system: f (importNixpkgs system)); | ||
in | ||
|
||
{ | ||
# These define the various ways to build the llama.cpp project. | ||
# Integrate them into your flake.nix configuration by adding this | ||
# overlay to nixpkgs.overlays. | ||
overlays = { | ||
default = overlays; | ||
}; | ||
|
||
# These use the definitions from ./overlays.nix and expose them as installables. | ||
packages = withSystemPackages (pkgs: { | ||
default = pkgs.llama-cpp; | ||
opencl = pkgs.llama-cpp-opencl; | ||
cuda = pkgs.llama-cpp-cuda; | ||
rocm = pkgs.llama-cpp-rocm; | ||
}); | ||
|
||
# These use the definition of llama-cpp from ./overlays.nix and expose various | ||
# binaries as apps so that they're able to be run with `nix run`. | ||
apps = withSystemPackages (import ./apps.nix [ | ||
"llama" | ||
"llama-embedding" | ||
"llama-server" | ||
"quantize" | ||
"train-text-from-scratch" | ||
]); | ||
|
||
# These expose a build environment for either a "default" or an "extra" set of | ||
# dependencies. | ||
devShells = withSystemPackages (import ./devshells.nix); | ||
}; | ||
} |
Oops, something went wrong.