Skip to content

Commit

Permalink
Merge pull request #16 from apicalshark/temp
Browse files Browse the repository at this point in the history
Temp
  • Loading branch information
apicalshark authored Nov 8, 2024
2 parents c9f3add + c0d480a commit 9e3c483
Show file tree
Hide file tree
Showing 62 changed files with 30,973 additions and 2,573 deletions.
10 changes: 10 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,16 @@ insert_final_newline = unset
[examples/server/public/*]
indent_size = 2

[examples/server/public/deps_*]
trim_trailing_whitespace = unset
indent_style = unset
indent_size = unset

[examples/server/deps_*]
trim_trailing_whitespace = unset
indent_style = unset
indent_size = unset

[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
indent_style = tab

Expand Down
9 changes: 9 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ env:

jobs:

# TODO: build with GGML_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
# how to debug it.
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124

# TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
# how to debug it.
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
# would be great if we fix these

# CUDA Release

Expand Down Expand Up @@ -232,6 +240,7 @@ jobs:

release:
permissions: write-all

if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}

runs-on: ubuntu-latest
Expand Down
17 changes: 4 additions & 13 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1455,22 +1455,13 @@ llama-server: \
examples/server/server.cpp \
examples/server/utils.hpp \
examples/server/httplib.h \
examples/server/colorthemes.css.hpp \
examples/server/style.css.hpp \
examples/server/theme-beeninorder.css.hpp \
examples/server/theme-ketivah.css.hpp \
examples/server/theme-mangotango.css.hpp \
examples/server/theme-playground.css.hpp \
examples/server/theme-polarnight.css.hpp \
examples/server/theme-snowstorm.css.hpp \
examples/server/index.html.hpp \
examples/server/index-new.html.hpp \
examples/server/index.js.hpp \
examples/server/completion.js.hpp \
examples/server/system-prompts.js.hpp \
examples/server/prompt-formats.js.hpp \
examples/server/json-schema-to-grammar.mjs.hpp \
examples/server/loading.html.hpp \
examples/server/deps_daisyui.min.css.hpp \
examples/server/deps_markdown-it.js.hpp \
examples/server/deps_tailwindcss.js.hpp \
examples/server/deps_vue.esm-browser.js.hpp \
common/json.hpp \
common/stb_image.h \
$(OBJ_ALL)
Expand Down
6 changes: 0 additions & 6 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3748,10 +3748,7 @@ def __init__(self, *args, **kwargs):

# Embeddings scale
self.embeddings_scale = 1.0
# note: For some JAIS flavors, output is tied to (same as) wte in original model
self.output_is_wte = False
if 'mup_embeddings_scale' in self.hparams:
self.output_is_wte = True # Hack (?)
self.embeddings_scale = self.hparams['mup_embeddings_scale']
elif 'embeddings_scale' in self.hparams:
self.embeddings_scale = self.hparams['embeddings_scale']
Expand Down Expand Up @@ -3808,10 +3805,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter

if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
tensors.append((new_name, data_torch * self.embeddings_scale))
if self.output_is_wte:
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
assert not self.output_is_wte
tensors.append((new_name, data_torch * self.width_scale))
else:
tensors.append((new_name, data_torch))
Expand Down
2 changes: 1 addition & 1 deletion docs/backend/SYCL.md
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ found 2 SYCL devices:

|Chosen Device ID|Setting|
|-|-|
|0|`export ONEAPI_DEVICE_SELECTOR="level_zero:1"` or no action|
|0|`export ONEAPI_DEVICE_SELECTOR="level_zero:0"` or no action|
|1|`export ONEAPI_DEVICE_SELECTOR="level_zero:1"`|
|0 & 1|`export ONEAPI_DEVICE_SELECTOR="level_zero:0;level_zero:1"`|

Expand Down
17 changes: 4 additions & 13 deletions examples/server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,13 @@ set(TARGET_SRCS
httplib.h
)
set(PUBLIC_ASSETS
colorthemes.css
style.css
theme-beeninorder.css
theme-ketivah.css
theme-mangotango.css
theme-playground.css
theme-polarnight.css
theme-snowstorm.css
index.html
index-new.html
index.js
completion.js
system-prompts.js
prompt-formats.js
json-schema-to-grammar.mjs
loading.html
deps_daisyui.min.css
deps_markdown-it.js
deps_tailwindcss.js
deps_vue.esm-browser.js
)

foreach(asset ${PUBLIC_ASSETS})
Expand Down
10 changes: 10 additions & 0 deletions examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,16 @@ Apart from error types supported by OAI, we also have custom types that are spec
}
```

### Legacy completion web UI

A new chat-based UI has replaced the old completion-based since [this PR](https://github.com/ggerganov/llama.cpp/pull/10175). If you want to use the old completion, start the server with `--path ./examples/server/public_legacy`

For example:

```sh
./llama-server -m my_model.gguf -c 8192 --path ./examples/server/public_legacy
```

### Extending or building alternative Web Front End

You can extend the front end by running the server binary with `--path` set to `./your-directory` and importing `/completion.js` to get access to the llamaComplete() method.
Expand Down
2 changes: 1 addition & 1 deletion examples/server/chat.mjs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import * as readline from 'node:readline'
import { stdin, stdout } from 'node:process'
import { readFileSync } from 'node:fs'
import { SchemaConverter } from './public/json-schema-to-grammar.mjs'
import { SchemaConverter } from './public_legacy/json-schema-to-grammar.mjs'

const args = process.argv.slice(2);
const grammarJsonSchemaFile = args.find(
Expand Down
19 changes: 17 additions & 2 deletions examples/server/deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,20 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
PUBLIC=$DIR/public

echo "download js bundle files"
curl https://npm.reversehttp.com/@preact/signals-core,@preact/signals,htm/preact,preact,preact/hooks > $PUBLIC/index.js
echo >> $PUBLIC/index.js # add newline

# Note for contributors: Always pin to a specific version "maj.min.patch" to avoid breaking the CI

curl -L https://cdn.tailwindcss.com/3.4.14 > $PUBLIC/deps_tailwindcss.js
echo >> $PUBLIC/deps_tailwindcss.js # add newline

curl -L https://cdnjs.cloudflare.com/ajax/libs/daisyui/4.12.14/styled.min.css > $PUBLIC/deps_daisyui.min.css
curl -L https://cdnjs.cloudflare.com/ajax/libs/daisyui/4.12.14/themes.min.css >> $PUBLIC/deps_daisyui.min.css
echo >> $PUBLIC/deps_daisyui.min.css # add newline

curl -L https://unpkg.com/vue@3.5.12/dist/vue.esm-browser.js > $PUBLIC/deps_vue.esm-browser.js
echo >> $PUBLIC/deps_vue.esm-browser.js # add newline

curl -L https://cdnjs.cloudflare.com/ajax/libs/markdown-it/13.0.2/markdown-it.js > $PUBLIC/deps_markdown-it.js
echo >> $PUBLIC/deps_markdown-it.js # add newline

ls -lah $PUBLIC
29 changes: 25 additions & 4 deletions examples/server/public/completion.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
const paramDefaults = {
stream: true,
n_predict: 500,
temperature: 0.2,
stop: ["</s>"]
};

let generation_settings = null;

export class CompletionError extends Error {
constructor(message, name, data) {
super(message);
this.name = name;
}
};

// Completes the prompt as a generator. Recommended for most use cases.
//
Expand All @@ -29,7 +33,7 @@ export async function* llama(prompt, params = {}, config = {}) {

const completionParams = { ...paramDefaults, ...params, prompt };

const response = await fetch(`${api_url}/completion`, {
const response = await fetch(`${api_url}${config.endpoint || '/completion'}`, {
method: 'POST',
body: JSON.stringify(completionParams),
headers: {
Expand All @@ -41,6 +45,18 @@ export async function* llama(prompt, params = {}, config = {}) {
signal: controller.signal,
});

const status = response.status;
if (status !== 200) {
try {
const body = await response.json();
if (body && body.error && body.error.message) {
throw new CompletionError(body.error.message, 'ServerError');
}
} catch (err) {
throw new CompletionError(err.message, 'ServerError');
}
}

const reader = response.body.getReader();
const decoder = new TextDecoder();

Expand Down Expand Up @@ -78,7 +94,12 @@ export async function* llama(prompt, params = {}, config = {}) {
for (const line of lines) {
const match = regex.exec(line);
if (match) {
result[match[1]] = match[2]
result[match[1]] = match[2];
if (result.data === '[DONE]') {
cont = false;
break;
}

// since we know this is llama.cpp, let's just decode the json in data
if (result.data) {
result.data = JSON.parse(result.data);
Expand Down
13 changes: 13 additions & 0 deletions examples/server/public/deps_daisyui.min.css

Large diffs are not rendered by default.

Loading

0 comments on commit 9e3c483

Please sign in to comment.