diff --git a/serving/build.gradle b/serving/build.gradle index 3b9ff487c5..993c98aca4 100644 --- a/serving/build.gradle +++ b/serving/build.gradle @@ -22,6 +22,7 @@ dependencies { runtimeOnly "ai.djl:model-zoo" runtimeOnly "ai.djl.tensorflow:tensorflow-model-zoo" runtimeOnly "ai.djl.pytorch:pytorch-model-zoo" + runtimeOnly "ai.djl.onnxruntime:onnxruntime-engine" runtimeOnly "ai.djl.huggingface:tokenizers" runtimeOnly "ai.djl.tensorrt:tensorrt" if (JavaVersion.current() >= JavaVersion.VERSION_15) { diff --git a/serving/docker/pytorch-gpu.Dockerfile b/serving/docker/pytorch-gpu.Dockerfile index ca89339906..8653ae7abe 100644 --- a/serving/docker/pytorch-gpu.Dockerfile +++ b/serving/docker/pytorch-gpu.Dockerfile @@ -15,8 +15,9 @@ FROM nvidia/cuda:$version as base ARG djl_version=0.28.0~SNAPSHOT ARG cuda_version=cu121 -ARG torch_version=2.1.1 -ARG torch_vision_version=0.16.1 +ARG torch_version=2.1.2 +ARG torch_vision_version=0.16.2 +ARG onnx_version=1.17.1 ARG python_version=3.10 RUN mkdir -p /opt/djl/conf && \ @@ -51,6 +52,8 @@ RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh && \ mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \ echo "${djl_version} pytorchgpu" > /opt/djl/bin/telemetry && \ scripts/install_djl_serving.sh $djl_version ${torch_version} && \ + rm -f /usr/local/djl-serving-*/lib/onnxruntime-$onnx_version.jar && \ + curl -o /usr/local/djl-serving-*/lib/onnxruntime_gpu-$onnx_version.jar https://publish.djl.ai/onnxruntime/$onnx_version/onnxruntime_gpu-$onnx_version.jar && \ scripts/install_python.sh ${python_version} && \ scripts/install_s5cmd.sh x64 && \ pip3 install numpy && pip3 install torch==${torch_version} torchvision==${torch_vision_version} --extra-index-url https://download.pytorch.org/whl/cu121 && \ diff --git a/serving/src/main/java/ai/djl/serving/plugins/DependencyManager.java b/serving/src/main/java/ai/djl/serving/plugins/DependencyManager.java index 8c5828fad1..7b6368dbdd 100644 --- a/serving/src/main/java/ai/djl/serving/plugins/DependencyManager.java +++ b/serving/src/main/java/ai/djl/serving/plugins/DependencyManager.java @@ -19,7 +19,6 @@ import ai.djl.serving.util.ConfigManager; import ai.djl.serving.util.MutableClassLoader; import ai.djl.util.Utils; -import ai.djl.util.cuda.CudaUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,15 +75,6 @@ public void installEngine(String engineName) throws IOException { installDependency("ai.djl.mxnet:mxnet-engine:" + djlVersion); installDependency("ai.djl.mxnet:mxnet-model-zoo:" + djlVersion); break; - case "OnnxRuntime": - installDependency("ai.djl.onnxruntime:onnxruntime-engine:" + djlVersion); - String ortVersion = getOrtVersion(djlVersion); - if (CudaUtils.hasCuda()) { - installDependency("com.microsoft.onnxruntime:onnxruntime_gpu:" + ortVersion); - } else { - installDependency("com.microsoft.onnxruntime:onnxruntime:" + ortVersion); - } - break; case "PaddlePaddle": installDependency("ai.djl.paddlepaddle:paddlepaddle-engine:" + djlVersion); break; diff --git a/serving/src/test/java/ai/djl/serving/plugins/DependencyManagerTest.java b/serving/src/test/java/ai/djl/serving/plugins/DependencyManagerTest.java index 53aa01ee32..d45722961a 100644 --- a/serving/src/test/java/ai/djl/serving/plugins/DependencyManagerTest.java +++ b/serving/src/test/java/ai/djl/serving/plugins/DependencyManagerTest.java @@ -29,7 +29,6 @@ public void testInstallDependency() throws IOException { System.setProperty("MODEL_SERVER_HOME", "build/home"); try { DependencyManager dm = DependencyManager.getInstance(); - dm.installEngine("OnnxRuntime"); dm.installEngine("XGBoost"); dm.installDependency("ai.djl.pytorch:pytorch-jni:2.1.1-0.27.0");