Stop copying truss server code over for "custom server" (#1189)

* update * update * tag * fix * check * check * update * update * update * check to test * check to test * update * ervert tag * fix test * lint * lint * update * lint * lint * check * lint * update * update * update * update * update * update * update * make liveness_endpoint and readiness_endpoint optional * docker_server config assertion * lint * sleep 3 * sleep comments * lint --------- Co-authored-by: Tianshu Cheng <tianshuc@Tianshus-MacBook-Pro.local> Co-authored-by: Tianshu Cheng <tianshuc@Tianshus-MBP.MG8702> Co-authored-by: Tianshu Cheng <tianshuc@Tianshus-MBP.localdomain> Co-authored-by: Tianshu Cheng <tianshuc@Mac.MG8702>
basetenlabs · Dec 3, 2024 · 6964ddd · 6964ddd
1 parent 7472bb6
commit 6964ddd
Show file tree

Hide file tree

Showing 17 changed files with 93 additions and 57 deletions.
diff --git a/truss/base/truss_config.py b/truss/base/truss_config.py
@@ -411,27 +411,24 @@ def to_dict(self):
 
 @dataclass
 class DockerServer:
-    setup_command: str
     start_command: str
     server_port: int
-    readiness_endpoint: str
-    liveness_endpoint: str
     predict_endpoint: str
+    readiness_endpoint: Optional[str] = None
+    liveness_endpoint: Optional[str] = None
 
     @staticmethod
     def from_dict(d) -> "DockerServer":
         return DockerServer(
-            setup_command=d.get("setup_command", ""),
-            start_command=d.get("start_command", ""),
-            server_port=d.get("server_port", 8000),
-            readiness_endpoint=d.get("readiness_endpoint", ""),
-            liveness_endpoint=d.get("liveness_endpoint", ""),
-            predict_endpoint=d.get("predict_endpoint", ""),
+            start_command=d.get("start_command"),
+            server_port=d.get("server_port"),
+            predict_endpoint=d.get("predict_endpoint"),
+            readiness_endpoint=d.get("readiness_endpoint", None),
+            liveness_endpoint=d.get("liveness_endpoint", None),
         )
 
     def to_dict(self):
         return {
-            "setup_command": self.setup_command,
             "start_command": self.start_command,
             "server_port": self.server_port,
             "readiness_endpoint": self.readiness_endpoint,

diff --git a/truss/contexts/image_builder/serving_image_builder.py b/truss/contexts/image_builder/serving_image_builder.py
@@ -306,6 +306,13 @@ def generate_docker_server_nginx_config(build_dir, config):
         DOCKER_SERVER_TEMPLATES_DIR, "proxy.conf.jinja"
     )
 
+    assert (
+        config.docker_server.predict_endpoint is not None
+    ), "docker_server.predict_endpoint is required to use custom server"
+    assert (
+        config.docker_server.server_port is not None
+    ), "docker_server.server_port is required to use custom server"
+
     nginx_content = nginx_template.render(
         server_endpoint=config.docker_server.predict_endpoint,
         readiness_endpoint=config.docker_server.readiness_endpoint,
@@ -321,9 +328,11 @@ def generate_docker_server_supervisord_config(build_dir, config):
     supervisord_template = read_template_from_fs(
         DOCKER_SERVER_TEMPLATES_DIR, "supervisord.conf.jinja"
     )
+    assert (
+        config.docker_server.start_command is not None
+    ), "docker_server.start_command is required to use custom server"
     supervisord_contents = supervisord_template.render(
         start_command=config.docker_server.start_command,
-        setup_command=config.docker_server.setup_command,
     )
     supervisord_filepath = build_dir / "supervisord.conf"
     supervisord_filepath.write_text(supervisord_contents)

diff --git a/truss/templates/base.Dockerfile.jinja b/truss/templates/base.Dockerfile.jinja
@@ -51,10 +51,10 @@ RUN pip install -r {{config_requirements_filename}} --no-cache-dir && rm -rf /ro
 
 
 
-
+{%- if not config.docker_server %}
 ENV APP_HOME="/app"
 WORKDIR $APP_HOME
-
+{%- endif %}
 
 {% block app_copy %}
 {% endblock %}

diff --git a/truss/templates/docker_server/proxy.conf.jinja b/truss/templates/docker_server/proxy.conf.jinja
@@ -2,7 +2,9 @@ server {
     # We use the proxy_read_timeout directive here (instead of proxy_send_timeout) as it sets the timeout for reading a response from the proxied server vs. setting a timeout for sending a request to the proxied server.
     listen 8080;
     client_max_body_size {{client_max_body_size}};
-    # Liveness
+
+{%- if liveness_endpoint %}
+    # Liveness endpoint override
     location = / {
         proxy_redirect off;
         proxy_read_timeout 300s;
@@ -11,8 +13,9 @@ server {
 
         proxy_pass http://127.0.0.1:{{server_port}};
     }
-
-    # Readiness
+{%- endif %}
+{%- if readiness_endpoint %}
+    # Readiness endpoint override
     location ~ ^/v1/models/model$ {
         proxy_redirect off;
         proxy_read_timeout 300s;
@@ -21,7 +24,7 @@ server {
 
         proxy_pass http://127.0.0.1:{{server_port}};
     }
-
+{%- endif %}
     # Predict
     location ~ ^/v1/models/model:predict$ {
         proxy_redirect off;

diff --git a/truss/templates/server.Dockerfile.jinja b/truss/templates/server.Dockerfile.jinja
@@ -76,7 +76,9 @@ RUN {% for secret,path in config.build.secret_to_path_mapping.items() %} --mount
 COPY ./{{config.data_dir}} /app/data
 {%- endif %}
 
+{%- if not config.docker_server %}
 COPY ./server /app
+{%- endif %}
 
 {%- if use_local_chains_src %}
 {# This path takes precedence over site-packages. #}
@@ -111,7 +113,7 @@ RUN mkdir -p {{ supervisor_log_dir }}
 COPY supervisord.conf {{ supervisor_config_path }}
 ENV SUPERVISOR_SERVER_URL="{{ supervisor_server_url }}"
 ENV SERVER_START_CMD="supervisord -c {{ supervisor_config_path }}"
-ENTRYPOINT ["/usr/local/bin/supervisord", "-c", "{{ supervisor_config_path }}"]
+ENTRYPOINT ["supervisord", "-c", "{{ supervisor_config_path }}"]
     {%- elif config.live_reload %}
 ENV HASH_TRUSS="{{truss_hash}}"
 ENV CONTROL_SERVER_PORT="8080"

diff --git a/truss/tests/test_trussless_docker_server.py → truss/tests/test_custom_server.py b/truss/tests/test_trussless_docker_server.py → truss/tests/test_custom_server.py
@@ -1,27 +1,38 @@
 import pytest
 import requests
+from tenacity import stop_after_attempt
 
 from truss.local.local_config_handler import LocalConfigHandler
 from truss.tests.test_testing_utilities_for_other_tests import ensure_kill_all
 from truss.truss_handle.truss_handle import TrussHandle
 
 
 @pytest.mark.integration
-def test_docker_server_truss(test_data_path):
+def test_custom_server_truss(test_data_path):
     with ensure_kill_all():
-        truss_dir = test_data_path / "test_docker_server_truss"
-
+        print("Running test_custom_server_truss")
+        truss_dir = test_data_path / "test_custom_server_truss"
+        print(f"truss_dir: {truss_dir}")
         tr = TrussHandle(truss_dir)
+        print("Setting secret")
         LocalConfigHandler.set_secret("hf_access_token", "123")
-        _ = tr.docker_run(local_port=8090, detach=True, wait_for_server_ready=True)
+        try:
+            print("Starting container")
+            _ = tr.docker_run(
+                local_port=8090,
+                detach=True,
+                wait_for_server_ready=True,
+                model_server_stop_retry_override=stop_after_attempt(3),
+            )
+        except Exception as e:
+            raise Exception(f"Failed to start container: {e}")
         truss_server_addr = "http://localhost:8090"
         full_url = f"{truss_server_addr}/v1/models/model:predict"
 
         response = requests.post(full_url, json={})
         assert response.status_code == 200
         assert response.json() == {
             "message": "Hello World",
-            "is_torch_cuda_available": False,
             "is_env_var_passed": True,
             "is_secret_mounted": True,
         }
diff --git a/...data/test_docker_server_truss/__init__.py → ...data/test_custom_server_truss/__init__.py b/...data/test_docker_server_truss/__init__.py → ...data/test_custom_server_truss/__init__.py
diff --git a/...data/test_docker_server_truss/config.yaml → ...data/test_custom_server_truss/config.yaml b/...data/test_docker_server_truss/config.yaml → ...data/test_custom_server_truss/config.yaml
@@ -1,22 +1,18 @@
 base_image:
-  image: baseten/fastapi-test:0.1.1
+  image: baseten/fastapi-test:0.1.2
 docker_server:
-  start_command: fastapi dev /home/app.py
-  readiness_endpoint: /health
-  liveness_endpoint: /health
+  start_command: python main.py
   predict_endpoint: /predict
   server_port: 8000
 resources:
   accelerator: null
   cpu: '1'
   memory: 2Gi
   use_gpu: false
-requirements:
-  - torch>=2.0.1
 model_name: Test Docker Server Truss
 secrets:
   hf_access_token: null
 environment_variables:
-  HF_TOKEN: 123
+  HF_TOKEN: 123456
 runtime:
   predict_concurrency: 1
diff --git a/...server_truss/test_docker_image/Dockerfile → ...server_truss/test_docker_image/Dockerfile b/...server_truss/test_docker_image/Dockerfile → ...server_truss/test_docker_image/Dockerfile
@@ -3,11 +3,15 @@ FROM python:3.11-slim
 # Update package lists and install curl
 RUN apt-get update && apt-get install -y curl
 
-# Install FastAPI
+# Install pip dependencies
 RUN pip install fastapi[standard]
 
 # Copy the FastAPI application code
 COPY app.py /home/app.py
+COPY main.py /home/main.py
+
+# Set the working directory
+WORKDIR /home
 
 # Command to run FastAPI directly
-ENTRYPOINT ["fastapi", "dev", "/home/app.py"]
+ENTRYPOINT ["python", "main.py"]
diff --git a/truss/tests/test_data/test_custom_server_truss/test_docker_image/README.md b/truss/tests/test_data/test_custom_server_truss/test_docker_image/README.md
@@ -0,0 +1,10 @@
+We built this minimal fastapi docker image to be used in integration test `test_custom_server_truss.py::test_custom_server_truss`
+
+Steps to update testing docker image
+
+1. run `docker login`
+2. cd into this directory
+3. update version number in VERSION file
+(before running the next step, make sure you meet with the [prerequisites](https://docs.docker.com/build/building/multi-platform/#prerequisites) here)
+4. run `sh build_upload_new_image.sh`
+5. update image tag to latest version in config.yaml
diff --git a/...er_server_truss/test_docker_image/VERSION → ...om_server_truss/test_docker_image/VERSION b/...er_server_truss/test_docker_image/VERSION → ...om_server_truss/test_docker_image/VERSION
diff --git a/...erver_truss/test_docker_image/__init__.py → ...erver_truss/test_docker_image/__init__.py b/...erver_truss/test_docker_image/__init__.py → ...erver_truss/test_docker_image/__init__.py
diff --git a/...ker_server_truss/test_docker_image/app.py → ...tom_server_truss/test_docker_image/app.py b/...ker_server_truss/test_docker_image/app.py → ...tom_server_truss/test_docker_image/app.py
@@ -1,6 +1,5 @@
 import os
 
-import torch
 from fastapi import FastAPI
 
 app = FastAPI()
@@ -15,7 +14,6 @@ async def health():
 async def root():
     return {
         "message": "Hello World",
-        "is_torch_cuda_available": torch.cuda.is_available(),
         "is_env_var_passed": os.environ.get("HF_TOKEN") is not None,
         "is_secret_mounted": os.path.exists("/secrets/hf_access_token"),
     }
diff --git a/...st_docker_image/build_upload_new_image.sh → ...st_docker_image/build_upload_new_image.sh b/...st_docker_image/build_upload_new_image.sh → ...st_docker_image/build_upload_new_image.sh
diff --git a/truss/tests/test_data/test_docker_server_truss/test_docker_image/README.md b/truss/tests/test_data/test_docker_server_truss/test_docker_image/README.md
diff --git a/truss/tests/test_model_inference.py b/truss/tests/test_model_inference.py
@@ -245,6 +245,7 @@ def test_requirements_file_truss(test_data_path):
         truss_dir = test_data_path / "test_requirements_file_truss"
         tr = TrussHandle(truss_dir)
         _ = tr.docker_run(local_port=8090, detach=True, wait_for_server_ready=True)
+        time.sleep(3)  # Sleeping to allow the load to finish
 
         # The prediction imports torch which is specified in a requirements.txt and returns if GPU is available.
         response = requests.post(PREDICT_URL, json={})

diff --git a/truss/truss_handle/truss_handle.py b/truss/truss_handle/truss_handle.py
@@ -243,6 +243,7 @@ def docker_run(
         wait_for_server_ready: bool = True,
         network: Optional[str] = None,
         container_name_prefix: Optional[str] = None,
+        model_server_stop_retry_override=None,
     ):
         """
         Builds a docker image and runs it as a container. For control trusses,
@@ -336,7 +337,12 @@ def _run_docker(gpus: Optional[str] = None):
             )
         model_base_url = f"http://localhost:{local_port}/v1/models/model"
         try:
-            wait_for_truss(model_base_url, container, wait_for_server_ready)
+            wait_for_truss(
+                model_base_url,
+                container,
+                wait_for_server_ready,
+                model_server_stop_retry_override,
+            )
         except ContainerNotFoundError as err:
             raise err
         except (ContainerIsDownError, HTTPError, ConnectionError) as err:
@@ -1083,31 +1089,39 @@ def _wait_for_docker_build(container) -> None:
                 raise ContainerIsDownError(f"Container stuck in state: {state.value}.")
 
 
-@retry(
-    stop=stop_after_delay(120),
-    wait=wait_fixed(2),
-    retry=(
-        retry_if_result(lambda response: response.status_code in [502, 503])
-        | retry_if_exception_type(exceptions.ConnectionError)
-    ),
-)
-def _wait_for_model_server(url: str) -> Response:
-    return requests.get(url)
+def _wait_for_model_server(url: str, stop=stop_after_delay(120)) -> Response:  # type: ignore[return]
+    for attempt in Retrying(
+        stop=stop,
+        wait=wait_fixed(2),
+        retry=(
+            retry_if_result(lambda response: response.status_code in [502, 503])
+            | retry_if_exception_type(exceptions.ConnectionError)
+        ),
+    ):
+        with attempt:
+            response = requests.get(url)
+            return response
 
 
 def wait_for_truss(
-    url: str, container: str, wait_for_server_ready: bool = True
+    url: str,
+    container: str,
+    wait_for_server_ready: bool = True,
+    model_server_stop_retry_override=None,
 ) -> None:
     from python_on_whales.exceptions import NoSuchContainer
 
     try:
         _wait_for_docker_build(container)
+        if wait_for_server_ready:
+            if model_server_stop_retry_override is not None:
+                _wait_for_model_server(url, stop=model_server_stop_retry_override)
+            else:
+                _wait_for_model_server(url)
     except NoSuchContainer:
         raise ContainerNotFoundError(message=f"Container {container} was not found")
     except RetryError as retry_err:
         retry_err.reraise()
-    if wait_for_server_ready:
-        _wait_for_model_server(url)
 
 
 def _prepare_secrets_mount_dir() -> Path: