bachittle · bachittle · Sep 29, 2023 · Sep 29, 2023 · Sep 29, 2023 · Sep 29, 2023
diff --git a/.dockerignore b/.dockerignore
@@ -1,6 +1,9 @@
 *.o
 *.a
 .cache/
+.git/
+.github/
+.gitignore
 .vs/
 .vscode/
 .DS_Store

diff --git a/.github/ISSUE_TEMPLATE/custom.md → .github/ISSUE_TEMPLATE/bug.md b/.github/ISSUE_TEMPLATE/custom.md → .github/ISSUE_TEMPLATE/bug.md
@@ -1,8 +1,7 @@
 ---
-name: Issue and enhancement template
-about: Used to report issues and request enhancements for llama.cpp
-title: "[User] Insert summary of your issue or enhancement.."
-labels: ''
+name: Bug template
+about: Used to report bugs in llama.cpp
+labels: ["bug-unconfirmed"]
 assignees: ''
 
 ---
@@ -46,7 +45,7 @@ $ g++ --version
 
 # Failure Information (for bugs)
 
-Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template.
+Please help provide information about the failure / bug.
 
 # Steps to Reproduce
 

diff --git a/.github/ISSUE_TEMPLATE/enhancement.md b/.github/ISSUE_TEMPLATE/enhancement.md
@@ -0,0 +1,28 @@
+---
+name: Enhancement template
+about: Used to request enhancements for llama.cpp
+labels: ["enhancement"]
+assignees: ''
+
+---
+
+# Prerequisites
+
+Please answer the following questions for yourself before submitting an issue.
+
+- [ ] I am running the latest code. Development is very rapid so there are no tagged versions as of now.
+- [ ] I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
+- [ ] I [searched using keywords relevant to my issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) to make sure that I am creating a new issue that is not already open (or closed).
+- [ ] I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new bug or useful enhancement to share.
+
+# Feature Description
+
+Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
+
+# Motivation
+
+Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
+
+# Possible Implementation
+
+If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -10,10 +10,10 @@ on:
   push:
     branches:
       - master
-    paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
+    paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
   pull_request:
     types: [opened, synchronize, reopened]
-    paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
+    paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
 
 env:
   BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
@@ -38,13 +38,13 @@ jobs:
       - name: Build
         id: make_build
         run: |
-          CC=gcc-8 make
+          CC=gcc-8 make -j $(nproc)
 
       - name: Test
         id: make_test
         run: |
-          CC=gcc-8 make tests
-          make test
+          CC=gcc-8 make tests -j $(nproc)
+          make test -j $(nproc)
 
   ubuntu-latest-cmake:
     runs-on: ubuntu-latest
@@ -66,7 +66,7 @@ jobs:
           mkdir build
           cd build
           cmake ..
-          cmake --build . --config Release
+          cmake --build . --config Release -j $(nproc)
 
       - name: Test
         id: cmake_test
@@ -101,7 +101,7 @@ jobs:
           mkdir build
           cd build
           cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
-          cmake --build . --config ${{ matrix.build_type }}
+          cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
 
       - name: Test
         id: cmake_test
@@ -135,7 +135,7 @@ jobs:
           mkdir build
           cd build
           cmake -DLLAMA_MPI=ON ..
-          cmake --build . --config Release
+          cmake --build . --config Release -j $(nproc)
 
       - name: Test
         id: cmake_test
@@ -160,13 +160,13 @@ jobs:
       - name: Build
         id: make_build
         run: |
-          make
+          make -j $(sysctl -n hw.logicalcpu)
 
       - name: Test
         id: make_test
         run: |
-          make tests
-          make test
+          make tests -j $(sysctl -n hw.logicalcpu)
+          make test -j $(sysctl -n hw.logicalcpu)
 
   macOS-latest-cmake:
     runs-on: macos-latest
@@ -188,8 +188,8 @@ jobs:
           sysctl -a
           mkdir build
           cd build
-          cmake -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF ..
-          cmake --build . --config Release
+          cmake ..
+          cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
 
       - name: Test
         id: cmake_test
@@ -223,7 +223,7 @@ jobs:
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=iOS \
             -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
-          cmake --build . --config Release
+          cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
 
   macOS-latest-cmake-tvos:
     runs-on: macos-latest
@@ -251,7 +251,35 @@ jobs:
             -DLLAMA_BUILD_SERVER=OFF \
             -DCMAKE_SYSTEM_NAME=tvOS \
             -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
-          cmake --build . --config Release
+          cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
+
+  macOS-latest-swift:
+    runs-on: macos-latest
+
+    strategy:
+      matrix:
+        destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v1
+
+      - name: Dependencies
+        id: depends
+        continue-on-error: true
+        run: |
+          brew update
+
+      - name: xcodebuild for swift package
+        id: xcodebuild
+        run: |
+          xcodebuild -scheme llama -destination "${{ matrix.destination }}"
+
+      - name: Build Swift Example
+        id: make_build_swift_example
+        run: |
+            make swift
 
   windows-latest-cmake:
     runs-on: windows-latest
@@ -260,22 +288,23 @@ jobs:
       OPENBLAS_VERSION: 0.3.23
       OPENCL_VERSION: 2023.04.17
       CLBLAST_VERSION: 1.6.0
+      SDE_VERSION: 9.21.1-2023-04-24
 
     strategy:
       matrix:
         include:
           - build: 'noavx'
-            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
           - build: 'avx2'
-            defines: '-DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
           - build: 'avx'
-            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
           - build: 'avx512'
-            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
+            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
           - build: 'clblast'
-            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
+            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
           - build: 'openblas'
-            defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
+            defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
 
     steps:
       - name: Clone
@@ -324,7 +353,7 @@ jobs:
           mkdir build
           cd build
           cmake .. ${{ matrix.defines }}
-          cmake --build . --config Release
+          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
 
       - name: Add clblast.dll
         id: add_clblast_dll
@@ -355,11 +384,23 @@ jobs:
 
       - name: Test
         id: cmake_test
-        if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # Test AVX-512 only when possible
+        if: ${{ matrix.build != 'clblast' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }} # not all machines have native AVX-512
         run: |
           cd build
           ctest -C Release --verbose --timeout 900
 
+      - name: Test (Intel SDE)
+        id: cmake_test_sde
+        if: ${{ matrix.build == 'avx512' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
+        run: |
+          curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/777395/sde-external-${env:SDE_VERSION}-win.tar.xz"
+          # for some weird reason windows tar doesn't like sde tar.xz
+          7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
+          7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
+          $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
+          cd build
+          & $sde -future -- ctest -C Release --verbose --timeout 900
+
       - name: Determine tag name
         id: tag
         shell: bash
@@ -414,8 +455,8 @@ jobs:
         run: |
           mkdir build
           cd build
-          cmake .. -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
-          cmake --build . --config Release
+          cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
+          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
 
       - name: Determine tag name
         id: tag
@@ -457,22 +498,22 @@ jobs:
           path: |
             cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
 
-  freeBSD-latest:
-    runs-on: macos-12
-    steps:
-    - name: Clone
-      uses: actions/checkout@v3
-
-    - name: Build
-      uses: cross-platform-actions/action@v0.19.0
-      with:
-        operating_system: freebsd
-        version: '13.2'
-        hypervisor: 'qemu'
-        run: |
-            sudo pkg update
-            sudo pkg install -y gmake automake autoconf pkgconf llvm15 clinfo clover opencl clblast openblas
-            gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15
+#  freeBSD-latest:
+#    runs-on: macos-12
+#    steps:
+#    - name: Clone
+#      uses: actions/checkout@v3
+#
+#    - name: Build
+#      uses: cross-platform-actions/action@v0.19.0
+#      with:
+#        operating_system: freebsd
+#        version: '13.2'
+#        hypervisor: 'qemu'
+#        run: |
+#            sudo pkg update
+#            sudo pkg install -y gmake automake autoconf pkgconf llvm15 clinfo clover opencl clblast openblas
+#            gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
 
   release:
     if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}

diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml
@@ -36,8 +36,9 @@ jobs:
         poetry install
 
     - name: Build package
-      run: poetry build
+      run: cd gguf-py && poetry build
     - name: Publish package
       uses: pypa/gh-action-pypi-publish@release/v1
       with:
         password: ${{ secrets.PYPI_API_TOKEN }}
+        packages-dir: gguf-py/dist
diff --git a/.github/workflows/zig-build.yml b/.github/workflows/zig-build.yml
@@ -0,0 +1,25 @@
+name: Zig CI
+
+on:
+  pull_request:
+  push:
+    branches:
+      - master
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        runs-on: [ubuntu-latest, macos-latest, windows-latest]
+    runs-on: ${{ matrix.runs-on }}
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: recursive
+          fetch-depth: 0
+      - uses: goto-bus-stop/setup-zig@v2
+        with:
+          version: 0.11.0
+      - name: Build Summary
+        run: zig build --summary all -freference-trace
diff --git a/.gitignore b/.gitignore
@@ -10,9 +10,12 @@
 *.gcno
 *.gcda
 *.dot
+*.bat
+*.metallib
 .DS_Store
 .build/
 .cache/
+.ccls-cache/
 .direnv/
 .envrc
 .swiftpm
@@ -40,21 +43,29 @@ models-mnt
 /embedding
 /gguf
 /gguf-llama-simple
+/infill
 /libllama.so
 /llama-bench
+/llava-cli
 /main
 /metal
 /perplexity
+/q8dot
 /quantize
 /quantize-stats
 /result
 /save-load-state
 /server
 /simple
+/batched
+/batched-bench
+/export-lora
+/finetune
 /speculative
+/parallel
 /train-text-from-scratch
 /vdot
-build-info.h
+/common/build-info.cpp
 arm_neon.h
 compile_commands.json
 CMakeSettings.json
@@ -85,4 +96,5 @@ tests/test-quantize-perf
 tests/test-sampling
 tests/test-tokenizer-0-llama
 tests/test-tokenizer-0-falcon
-tests/test-tokenizer-1
+tests/test-tokenizer-1-llama
+tests/test-tokenizer-1-bpe