Skip to content

Commit

Permalink
flake.nix: rewrite
Browse files Browse the repository at this point in the history
1. Split into separate files per output.

2. Added overlays, so that this flake can be integrated into others.
   The names in the overlay are `llama-cpp`, `llama-cpp-opencl`,
   `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the
   broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs).

3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/)
   rather than `with pkgs;` so that there's dependency injection rather
   than dependency lookup.

4. Add a description and meta information for each package.
   The description includes a bit about what's trying to accelerate each one.

5. Use specific CUDA packages instead of cudatoolkit on the advice of @SomeoneSerge.
  • Loading branch information
philiptaron committed Dec 23, 2023
1 parent 708e179 commit afb5d28
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 171 deletions.
14 changes: 14 additions & 0 deletions .devops/apps.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
names: pkgs:

let
default = builtins.elemAt names 0;
mkApp = name: {
${name} = {
type = "app";
program = "${pkgs.llama-cpp}/bin/${name}";
};
};
result = builtins.foldl' (acc: name: (mkApp name) // acc) {} names;
in

result // { default = result.${default}; }
32 changes: 32 additions & 0 deletions .devops/devshells.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
pkgs:

let
llama-python = pkgs.python3.withPackages (ps: [
ps.numpy
ps.sentencepiece
]);

# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra = pkgs.python3.withPackages (ps: [
ps.numpy
ps.sentencepiece
ps.torchWithoutCuda
ps.transformers
]);
in

{
default = pkgs.mkShell {
name = "default";
description = "contains numpy and sentencepiece";
inputsFrom = [ pkgs.llama-cpp ];
buildInputs = [ llama-python ];
};

extra = pkgs.mkShell {
name = "extra";
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
inputsFrom = [ pkgs.llama-cpp ];
buildInputs = [ llama-python-extra ];
};
}
20 changes: 20 additions & 0 deletions .devops/overlay.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
final: prev:

let
inherit (prev.stdenv) isAarch32 isAarch64 isDarwin;

darwinSpecific = if isAarch64 then {
inherit (prev.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit;
} else {
inherit (prev.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo;
};

osSpecific = if isDarwin then darwinSpecific else {};
in

{
llama-cpp = prev.callPackage ./package.nix osSpecific;
llama-cpp-opencl = prev.callPackage ./package.nix (osSpecific // { useOpenCL = true; });
llama-cpp-cuda = prev.callPackage ./package.nix (osSpecific // { useCuda = true; });
llama-cpp-rocm = prev.callPackage ./package.nix (osSpecific // { useRocm = true; });
}
119 changes: 119 additions & 0 deletions .devops/package.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
{ lib
, config
, stdenv
, cmake
, ninja
, pkg-config
, git
, python3
, openmpi
, openblas
, cudaPackages
, rocmPackages
, clblast
, Accelerate ? null
, MetalKit ? null
, CoreVideo ? null
, CoreGraphics ? null
, useOpenCL ? false
, useCuda ? config.cudaSupport
, useRocm ? config.rocmSupport
}@inputs:

let
inherit (lib) cmakeBool cmakeFeature optional optionals versionOlder;
isDefault = !useOpenCL && !useCuda && !useRocm;

# It's necessary to consistently use backendStdenv when building with CUDA support,
# otherwise we get libstdc++ errors downstream.
stdenv = throw "Use effectiveStdenv instead";
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;

# Give a little description difference between the flavors.
descriptionSuffix = if useOpenCL then
" (OpenCL accelerated)"
else if useCuda then
" (CUDA accelerated)"
else if useRocm then
" (ROCm accelerated)"
else if (MetalKit != null) then
" (MetalKit accelerated)"
else "";

# TODO: package the Python in this repository in a Nix-like way.
llama-python = python3.withPackages (ps: [ ps.numpy ps.sentencepiece ]);

# See ./overlay.nix for where these dependencies are passed in.
defaultBuildInputs = builtins.filter (p: p != null) [
Accelerate
MetalKit
CoreVideo
CoreGraphics
];

cudaBuildInputs = with cudaPackages; [
cuda_cccl.dev # <nv/target>
cuda_cudart
libcublas
];

rocmBuildInputs = with rocmPackages; [ clr hipblas rocblas ];
in

effectiveStdenv.mkDerivation {
name = "llama.cpp";
src = ../.;
meta = {
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
mainProgram = "llama";
};

postPatch = ''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
'';

nativeBuildInputs = [ cmake ninja pkg-config git ]
++ optional useCuda [ cudaPackages.cuda_nvcc ];

buildInputs = [ openmpi ]
++ optional useOpenCL clblast
++ optionals useCuda cudaBuildInputs
++ optionals useRocm rocmBuildInputs
++ optionals isDefault defaultBuildInputs;

cmakeFlags = [
(cmakeBool "LLAMA_NATIVE" true)
(cmakeBool "LLAMA_BUILD_SERVER" true)
(cmakeBool "BUILD_SHARED_LIBS" true)
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
]
++ optional useOpenCL (cmakeBool "LLAMA_CLBLAST" true)
++ optional useCuda (cmakeBool "LLAMA_CUBLAS" true)
++ optionals useRocm [
(cmakeBool "LLAMA_HIPBLAS" true)
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")

# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
# and select the line that matches the current nixpkgs version of rocBLAS.
# Should likely use `rocmPackages.clr.gpuTargets`.
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
]
++ optionals isDefault (if (MetalKit != null) then [
"-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
"-DLLAMA_METAL=ON"
] else [
"-DLLAMA_BLAS=ON"
"-DLLAMA_BLAS_VENDOR=OpenBLAS"
]);

postInstall = ''
mv $out/bin/main $out/bin/llama
mv $out/bin/server $out/bin/llama-server
mkdir -p $out/include
cp $src/llama.h $out/include/
'';
}
40 changes: 3 additions & 37 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit afb5d28

Please sign in to comment.