-
Notifications
You must be signed in to change notification settings - Fork 10.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one. 5. Use specific CUDA packages instead of cudatoolkit on the advice of @SomeoneSerge.
- Loading branch information
1 parent
708e179
commit afb5d28
Showing
6 changed files
with
238 additions
and
171 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
names: pkgs: | ||
|
||
let | ||
default = builtins.elemAt names 0; | ||
mkApp = name: { | ||
${name} = { | ||
type = "app"; | ||
program = "${pkgs.llama-cpp}/bin/${name}"; | ||
}; | ||
}; | ||
result = builtins.foldl' (acc: name: (mkApp name) // acc) {} names; | ||
in | ||
|
||
result // { default = result.${default}; } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
pkgs: | ||
|
||
let | ||
llama-python = pkgs.python3.withPackages (ps: [ | ||
ps.numpy | ||
ps.sentencepiece | ||
]); | ||
|
||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime | ||
llama-python-extra = pkgs.python3.withPackages (ps: [ | ||
ps.numpy | ||
ps.sentencepiece | ||
ps.torchWithoutCuda | ||
ps.transformers | ||
]); | ||
in | ||
|
||
{ | ||
default = pkgs.mkShell { | ||
name = "default"; | ||
description = "contains numpy and sentencepiece"; | ||
inputsFrom = [ pkgs.llama-cpp ]; | ||
buildInputs = [ llama-python ]; | ||
}; | ||
|
||
extra = pkgs.mkShell { | ||
name = "extra"; | ||
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; | ||
inputsFrom = [ pkgs.llama-cpp ]; | ||
buildInputs = [ llama-python-extra ]; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
final: prev: | ||
|
||
let | ||
inherit (prev.stdenv) isAarch32 isAarch64 isDarwin; | ||
|
||
darwinSpecific = if isAarch64 then { | ||
inherit (prev.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit; | ||
} else { | ||
inherit (prev.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo; | ||
}; | ||
|
||
osSpecific = if isDarwin then darwinSpecific else {}; | ||
in | ||
|
||
{ | ||
llama-cpp = prev.callPackage ./package.nix osSpecific; | ||
llama-cpp-opencl = prev.callPackage ./package.nix (osSpecific // { useOpenCL = true; }); | ||
llama-cpp-cuda = prev.callPackage ./package.nix (osSpecific // { useCuda = true; }); | ||
llama-cpp-rocm = prev.callPackage ./package.nix (osSpecific // { useRocm = true; }); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
{ lib | ||
, config | ||
, stdenv | ||
, cmake | ||
, ninja | ||
, pkg-config | ||
, git | ||
, python3 | ||
, openmpi | ||
, openblas | ||
, cudaPackages | ||
, rocmPackages | ||
, clblast | ||
, Accelerate ? null | ||
, MetalKit ? null | ||
, CoreVideo ? null | ||
, CoreGraphics ? null | ||
, useOpenCL ? false | ||
, useCuda ? config.cudaSupport | ||
, useRocm ? config.rocmSupport | ||
}@inputs: | ||
|
||
let | ||
inherit (lib) cmakeBool cmakeFeature optional optionals versionOlder; | ||
isDefault = !useOpenCL && !useCuda && !useRocm; | ||
|
||
# It's necessary to consistently use backendStdenv when building with CUDA support, | ||
# otherwise we get libstdc++ errors downstream. | ||
stdenv = throw "Use effectiveStdenv instead"; | ||
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv; | ||
|
||
# Give a little description difference between the flavors. | ||
descriptionSuffix = if useOpenCL then | ||
" (OpenCL accelerated)" | ||
else if useCuda then | ||
" (CUDA accelerated)" | ||
else if useRocm then | ||
" (ROCm accelerated)" | ||
else if (MetalKit != null) then | ||
" (MetalKit accelerated)" | ||
else ""; | ||
|
||
# TODO: package the Python in this repository in a Nix-like way. | ||
llama-python = python3.withPackages (ps: [ ps.numpy ps.sentencepiece ]); | ||
|
||
# See ./overlay.nix for where these dependencies are passed in. | ||
defaultBuildInputs = builtins.filter (p: p != null) [ | ||
Accelerate | ||
MetalKit | ||
CoreVideo | ||
CoreGraphics | ||
]; | ||
|
||
cudaBuildInputs = with cudaPackages; [ | ||
cuda_cccl.dev # <nv/target> | ||
cuda_cudart | ||
libcublas | ||
]; | ||
|
||
rocmBuildInputs = with rocmPackages; [ clr hipblas rocblas ]; | ||
in | ||
|
||
effectiveStdenv.mkDerivation { | ||
name = "llama.cpp"; | ||
src = ../.; | ||
meta = { | ||
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; | ||
mainProgram = "llama"; | ||
}; | ||
|
||
postPatch = '' | ||
substituteInPlace ./ggml-metal.m \ | ||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | ||
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" | ||
''; | ||
|
||
nativeBuildInputs = [ cmake ninja pkg-config git ] | ||
++ optional useCuda [ cudaPackages.cuda_nvcc ]; | ||
|
||
buildInputs = [ openmpi ] | ||
++ optional useOpenCL clblast | ||
++ optionals useCuda cudaBuildInputs | ||
++ optionals useRocm rocmBuildInputs | ||
++ optionals isDefault defaultBuildInputs; | ||
|
||
cmakeFlags = [ | ||
(cmakeBool "LLAMA_NATIVE" true) | ||
(cmakeBool "LLAMA_BUILD_SERVER" true) | ||
(cmakeBool "BUILD_SHARED_LIBS" true) | ||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) | ||
] | ||
++ optional useOpenCL (cmakeBool "LLAMA_CLBLAST" true) | ||
++ optional useCuda (cmakeBool "LLAMA_CUBLAS" true) | ||
++ optionals useRocm [ | ||
(cmakeBool "LLAMA_HIPBLAS" true) | ||
(cmakeFeature "CMAKE_C_COMPILER" "hipcc") | ||
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") | ||
|
||
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM | ||
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt | ||
# and select the line that matches the current nixpkgs version of rocBLAS. | ||
# Should likely use `rocmPackages.clr.gpuTargets`. | ||
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" | ||
] | ||
++ optionals isDefault (if (MetalKit != null) then [ | ||
"-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1" | ||
"-DLLAMA_METAL=ON" | ||
] else [ | ||
"-DLLAMA_BLAS=ON" | ||
"-DLLAMA_BLAS_VENDOR=OpenBLAS" | ||
]); | ||
|
||
postInstall = '' | ||
mv $out/bin/main $out/bin/llama | ||
mv $out/bin/server $out/bin/llama-server | ||
mkdir -p $out/include | ||
cp $src/llama.h $out/include/ | ||
''; | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.