Skip to content

Commit

Permalink
server : add /apply-template endpoint for additional use cases of Min…
Browse files Browse the repository at this point in the history
…ja functionality (#11489)

* add /apply-template endpoint to server

* remove unnecessary line

* add /apply-template documentation

* return only "prompt" field in /apply-template

* use suggested idea instead of my overly verbose way
  • Loading branch information
pnb authored Jan 29, 2025
1 parent 66ee4f2 commit eb7cf15
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 0 deletions.
8 changes: 8 additions & 0 deletions examples/server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,14 @@ With input 'á' (utf8 hex: C3 A1) on tinyllama/stories260k

`tokens`: Set the tokens to detokenize.

### POST `/apply-template`: Apply chat template to a conversation

Uses the server's prompt template formatting functionality to convert chat messages to a single string expected by a chat model as input, but does not perform inference. Instead, the prompt string is returned in the `prompt` field of the JSON response. The prompt can then be modified as desired (for example, to insert "Sure!" at the beginning of the model's response) before sending to `/completion` to generate the chat response.

*Options:*

`messages`: (Required) Chat turns in the same format as `/v1/chat/completions`.

### POST `/embedding`: Generate embedding of a given text

> [!IMPORTANT]
Expand Down
9 changes: 9 additions & 0 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4124,6 +4124,14 @@ int main(int argc, char ** argv) {
res_ok(res, root);
};

const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) {
auto body = json::parse(req.body);
const auto & chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default;
json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja);

res_ok(res, {{ "prompt", data.at("prompt") }});
};

const auto handle_embeddings = [&handle_embeddings_impl](const httplib::Request & req, httplib::Response & res) {
handle_embeddings_impl(req, res, OAICOMPAT_TYPE_NONE);
};
Expand Down Expand Up @@ -4300,6 +4308,7 @@ int main(int argc, char ** argv) {
svr->Post("/v1/reranking", handle_rerank);
svr->Post("/tokenize", handle_tokenize);
svr->Post("/detokenize", handle_detokenize);
svr->Post("/apply-template", handle_apply_template);
// LoRA adapters hotswap
svr->Get ("/lora-adapters", handle_lora_adapters_list);
svr->Post("/lora-adapters", handle_lora_adapters_apply);
Expand Down
15 changes: 15 additions & 0 deletions examples/server/tests/unit/test_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,21 @@ def test_chat_template():
assert res.body["__verbose"]["prompt"] == "<s> <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"


def test_apply_chat_template():
global server
server.chat_template = "command-r"
server.start()
res = server.make_request("POST", "/apply-template", data={
"messages": [
{"role": "system", "content": "You are a test."},
{"role": "user", "content":"Hi there"},
]
})
assert res.status_code == 200
assert "prompt" in res.body
assert res.body["prompt"] == "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>You are a test.<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hi there<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"


@pytest.mark.parametrize("response_format,n_predicted,re_content", [
({"type": "json_object", "schema": {"const": "42"}}, 6, "\"42\""),
({"type": "json_object", "schema": {"items": [{"type": "integer"}]}}, 10, "[ -3000 ]"),
Expand Down

0 comments on commit eb7cf15

Please sign in to comment.