Skip to content

Commit

Permalink
Stop copying truss server code over for "custom server" (#1189)
Browse files Browse the repository at this point in the history
* update

* update

* tag

* fix

* check

* check

* update

* update

* update

* check to test

* check to test

* update

* ervert tag

* fix test

* lint

* lint

* update

* lint

* lint

* check

* lint

* update

* update

* update

* update

* update

* update

* update

* make liveness_endpoint and readiness_endpoint optional

* docker_server config assertion

* lint

* sleep 3

* sleep comments

* lint

---------

Co-authored-by: Tianshu Cheng <tianshuc@Tianshus-MacBook-Pro.local>
Co-authored-by: Tianshu Cheng <tianshuc@Tianshus-MBP.MG8702>
Co-authored-by: Tianshu Cheng <tianshuc@Tianshus-MBP.localdomain>
Co-authored-by: Tianshu Cheng <tianshuc@Mac.MG8702>
  • Loading branch information
5 people authored Dec 3, 2024
1 parent 7472bb6 commit 6964ddd
Show file tree
Hide file tree
Showing 17 changed files with 93 additions and 57 deletions.
17 changes: 7 additions & 10 deletions truss/base/truss_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,27 +411,24 @@ def to_dict(self):

@dataclass
class DockerServer:
setup_command: str
start_command: str
server_port: int
readiness_endpoint: str
liveness_endpoint: str
predict_endpoint: str
readiness_endpoint: Optional[str] = None
liveness_endpoint: Optional[str] = None

@staticmethod
def from_dict(d) -> "DockerServer":
return DockerServer(
setup_command=d.get("setup_command", ""),
start_command=d.get("start_command", ""),
server_port=d.get("server_port", 8000),
readiness_endpoint=d.get("readiness_endpoint", ""),
liveness_endpoint=d.get("liveness_endpoint", ""),
predict_endpoint=d.get("predict_endpoint", ""),
start_command=d.get("start_command"),
server_port=d.get("server_port"),
predict_endpoint=d.get("predict_endpoint"),
readiness_endpoint=d.get("readiness_endpoint", None),
liveness_endpoint=d.get("liveness_endpoint", None),
)

def to_dict(self):
return {
"setup_command": self.setup_command,
"start_command": self.start_command,
"server_port": self.server_port,
"readiness_endpoint": self.readiness_endpoint,
Expand Down
11 changes: 10 additions & 1 deletion truss/contexts/image_builder/serving_image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,13 @@ def generate_docker_server_nginx_config(build_dir, config):
DOCKER_SERVER_TEMPLATES_DIR, "proxy.conf.jinja"
)

assert (
config.docker_server.predict_endpoint is not None
), "docker_server.predict_endpoint is required to use custom server"
assert (
config.docker_server.server_port is not None
), "docker_server.server_port is required to use custom server"

nginx_content = nginx_template.render(
server_endpoint=config.docker_server.predict_endpoint,
readiness_endpoint=config.docker_server.readiness_endpoint,
Expand All @@ -321,9 +328,11 @@ def generate_docker_server_supervisord_config(build_dir, config):
supervisord_template = read_template_from_fs(
DOCKER_SERVER_TEMPLATES_DIR, "supervisord.conf.jinja"
)
assert (
config.docker_server.start_command is not None
), "docker_server.start_command is required to use custom server"
supervisord_contents = supervisord_template.render(
start_command=config.docker_server.start_command,
setup_command=config.docker_server.setup_command,
)
supervisord_filepath = build_dir / "supervisord.conf"
supervisord_filepath.write_text(supervisord_contents)
Expand Down
4 changes: 2 additions & 2 deletions truss/templates/base.Dockerfile.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ RUN pip install -r {{config_requirements_filename}} --no-cache-dir && rm -rf /ro




{%- if not config.docker_server %}
ENV APP_HOME="/app"
WORKDIR $APP_HOME

{%- endif %}

{% block app_copy %}
{% endblock %}
Expand Down
11 changes: 7 additions & 4 deletions truss/templates/docker_server/proxy.conf.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ server {
# We use the proxy_read_timeout directive here (instead of proxy_send_timeout) as it sets the timeout for reading a response from the proxied server vs. setting a timeout for sending a request to the proxied server.
listen 8080;
client_max_body_size {{client_max_body_size}};
# Liveness

{%- if liveness_endpoint %}
# Liveness endpoint override
location = / {
proxy_redirect off;
proxy_read_timeout 300s;
Expand All @@ -11,8 +13,9 @@ server {

proxy_pass http://127.0.0.1:{{server_port}};
}

# Readiness
{%- endif %}
{%- if readiness_endpoint %}
# Readiness endpoint override
location ~ ^/v1/models/model$ {
proxy_redirect off;
proxy_read_timeout 300s;
Expand All @@ -21,7 +24,7 @@ server {

proxy_pass http://127.0.0.1:{{server_port}};
}

{%- endif %}
# Predict
location ~ ^/v1/models/model:predict$ {
proxy_redirect off;
Expand Down
4 changes: 3 additions & 1 deletion truss/templates/server.Dockerfile.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,9 @@ RUN {% for secret,path in config.build.secret_to_path_mapping.items() %} --mount
COPY ./{{config.data_dir}} /app/data
{%- endif %}

{%- if not config.docker_server %}
COPY ./server /app
{%- endif %}

{%- if use_local_chains_src %}
{# This path takes precedence over site-packages. #}
Expand Down Expand Up @@ -111,7 +113,7 @@ RUN mkdir -p {{ supervisor_log_dir }}
COPY supervisord.conf {{ supervisor_config_path }}
ENV SUPERVISOR_SERVER_URL="{{ supervisor_server_url }}"
ENV SERVER_START_CMD="supervisord -c {{ supervisor_config_path }}"
ENTRYPOINT ["/usr/local/bin/supervisord", "-c", "{{ supervisor_config_path }}"]
ENTRYPOINT ["supervisord", "-c", "{{ supervisor_config_path }}"]
{%- elif config.live_reload %}
ENV HASH_TRUSS="{{truss_hash}}"
ENV CONTROL_SERVER_PORT="8080"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,27 +1,38 @@
import pytest
import requests
from tenacity import stop_after_attempt

from truss.local.local_config_handler import LocalConfigHandler
from truss.tests.test_testing_utilities_for_other_tests import ensure_kill_all
from truss.truss_handle.truss_handle import TrussHandle


@pytest.mark.integration
def test_docker_server_truss(test_data_path):
def test_custom_server_truss(test_data_path):
with ensure_kill_all():
truss_dir = test_data_path / "test_docker_server_truss"

print("Running test_custom_server_truss")
truss_dir = test_data_path / "test_custom_server_truss"
print(f"truss_dir: {truss_dir}")
tr = TrussHandle(truss_dir)
print("Setting secret")
LocalConfigHandler.set_secret("hf_access_token", "123")
_ = tr.docker_run(local_port=8090, detach=True, wait_for_server_ready=True)
try:
print("Starting container")
_ = tr.docker_run(
local_port=8090,
detach=True,
wait_for_server_ready=True,
model_server_stop_retry_override=stop_after_attempt(3),
)
except Exception as e:
raise Exception(f"Failed to start container: {e}")
truss_server_addr = "http://localhost:8090"
full_url = f"{truss_server_addr}/v1/models/model:predict"

response = requests.post(full_url, json={})
assert response.status_code == 200
assert response.json() == {
"message": "Hello World",
"is_torch_cuda_available": False,
"is_env_var_passed": True,
"is_secret_mounted": True,
}
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
base_image:
image: baseten/fastapi-test:0.1.1
image: baseten/fastapi-test:0.1.2
docker_server:
start_command: fastapi dev /home/app.py
readiness_endpoint: /health
liveness_endpoint: /health
start_command: python main.py
predict_endpoint: /predict
server_port: 8000
resources:
accelerator: null
cpu: '1'
memory: 2Gi
use_gpu: false
requirements:
- torch>=2.0.1
model_name: Test Docker Server Truss
secrets:
hf_access_token: null
environment_variables:
HF_TOKEN: 123
HF_TOKEN: 123456
runtime:
predict_concurrency: 1
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@ FROM python:3.11-slim
# Update package lists and install curl
RUN apt-get update && apt-get install -y curl

# Install FastAPI
# Install pip dependencies
RUN pip install fastapi[standard]

# Copy the FastAPI application code
COPY app.py /home/app.py
COPY main.py /home/main.py

# Set the working directory
WORKDIR /home

# Command to run FastAPI directly
ENTRYPOINT ["fastapi", "dev", "/home/app.py"]
ENTRYPOINT ["python", "main.py"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
We built this minimal fastapi docker image to be used in integration test `test_custom_server_truss.py::test_custom_server_truss`

Steps to update testing docker image

1. run `docker login`
2. cd into this directory
3. update version number in VERSION file
(before running the next step, make sure you meet with the [prerequisites](https://docs.docker.com/build/building/multi-platform/#prerequisites) here)
4. run `sh build_upload_new_image.sh`
5. update image tag to latest version in config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os

import torch
from fastapi import FastAPI

app = FastAPI()
Expand All @@ -15,7 +14,6 @@ async def health():
async def root():
return {
"message": "Hello World",
"is_torch_cuda_available": torch.cuda.is_available(),
"is_env_var_passed": os.environ.get("HF_TOKEN") is not None,
"is_secret_mounted": os.path.exists("/secrets/hf_access_token"),
}

This file was deleted.

1 change: 1 addition & 0 deletions truss/tests/test_model_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def test_requirements_file_truss(test_data_path):
truss_dir = test_data_path / "test_requirements_file_truss"
tr = TrussHandle(truss_dir)
_ = tr.docker_run(local_port=8090, detach=True, wait_for_server_ready=True)
time.sleep(3) # Sleeping to allow the load to finish

# The prediction imports torch which is specified in a requirements.txt and returns if GPU is available.
response = requests.post(PREDICT_URL, json={})
Expand Down
42 changes: 28 additions & 14 deletions truss/truss_handle/truss_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def docker_run(
wait_for_server_ready: bool = True,
network: Optional[str] = None,
container_name_prefix: Optional[str] = None,
model_server_stop_retry_override=None,
):
"""
Builds a docker image and runs it as a container. For control trusses,
Expand Down Expand Up @@ -336,7 +337,12 @@ def _run_docker(gpus: Optional[str] = None):
)
model_base_url = f"http://localhost:{local_port}/v1/models/model"
try:
wait_for_truss(model_base_url, container, wait_for_server_ready)
wait_for_truss(
model_base_url,
container,
wait_for_server_ready,
model_server_stop_retry_override,
)
except ContainerNotFoundError as err:
raise err
except (ContainerIsDownError, HTTPError, ConnectionError) as err:
Expand Down Expand Up @@ -1083,31 +1089,39 @@ def _wait_for_docker_build(container) -> None:
raise ContainerIsDownError(f"Container stuck in state: {state.value}.")


@retry(
stop=stop_after_delay(120),
wait=wait_fixed(2),
retry=(
retry_if_result(lambda response: response.status_code in [502, 503])
| retry_if_exception_type(exceptions.ConnectionError)
),
)
def _wait_for_model_server(url: str) -> Response:
return requests.get(url)
def _wait_for_model_server(url: str, stop=stop_after_delay(120)) -> Response: # type: ignore[return]
for attempt in Retrying(
stop=stop,
wait=wait_fixed(2),
retry=(
retry_if_result(lambda response: response.status_code in [502, 503])
| retry_if_exception_type(exceptions.ConnectionError)
),
):
with attempt:
response = requests.get(url)
return response


def wait_for_truss(
url: str, container: str, wait_for_server_ready: bool = True
url: str,
container: str,
wait_for_server_ready: bool = True,
model_server_stop_retry_override=None,
) -> None:
from python_on_whales.exceptions import NoSuchContainer

try:
_wait_for_docker_build(container)
if wait_for_server_ready:
if model_server_stop_retry_override is not None:
_wait_for_model_server(url, stop=model_server_stop_retry_override)
else:
_wait_for_model_server(url)
except NoSuchContainer:
raise ContainerNotFoundError(message=f"Container {container} was not found")
except RetryError as retry_err:
retry_err.reraise()
if wait_for_server_ready:
_wait_for_model_server(url)


def _prepare_secrets_mount_dir() -> Path:
Expand Down

0 comments on commit 6964ddd

Please sign in to comment.