From ee540bedb0c4bc28abcf845d5eb60f4051aa17c4 Mon Sep 17 00:00:00 2001 From: Frank Liu Date: Mon, 3 Jun 2024 18:16:07 -0700 Subject: [PATCH] [docker] Fixes duplicated onnx jar file issue (#2017) --- serving/docker/lmi.Dockerfile | 2 +- wlm/src/main/java/ai/djl/serving/wlm/LmiUtils.java | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/serving/docker/lmi.Dockerfile b/serving/docker/lmi.Dockerfile index 6afa69953..648262af1 100644 --- a/serving/docker/lmi.Dockerfile +++ b/serving/docker/lmi.Dockerfile @@ -85,7 +85,7 @@ RUN mv *.deb djl-serving_all.deb || true RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev g++ && \ scripts/install_djl_serving.sh $djl_version && \ - rm -f /usr/local/djl-serving-*/lib/onnxruntime-$onnx_version.jar && \ + rm -f /usr/local/djl-serving-*/lib/onnxruntime-1.*.jar && \ curl -o $(ls -d /usr/local/djl-serving-*/)lib/onnxruntime_gpu-$onnx_version.jar https://publish.djl.ai/onnxruntime/$onnx_version/onnxruntime_gpu-$onnx_version.jar && \ mkdir -p /opt/djl/bin && cp scripts/telemetry.sh /opt/djl/bin && \ echo "${djl_version} lmi" > /opt/djl/bin/telemetry && \ diff --git a/wlm/src/main/java/ai/djl/serving/wlm/LmiUtils.java b/wlm/src/main/java/ai/djl/serving/wlm/LmiUtils.java index 05ab789cc..972a61c30 100644 --- a/wlm/src/main/java/ai/djl/serving/wlm/LmiUtils.java +++ b/wlm/src/main/java/ai/djl/serving/wlm/LmiUtils.java @@ -172,6 +172,9 @@ private static Path convertOnnx(String modelId) throws IOException { return repoDir; } + Engine onnx = Engine.getEngine("OnnxRuntime"); + boolean hasCuda = onnx.getGpuCount() > 0; + String[] cmd = { "djl-convert", "--output-dir", @@ -181,9 +184,9 @@ private static Path convertOnnx(String modelId) throws IOException { "-m", modelId, "--optimize", - CudaUtils.hasCuda() ? "O4" : "O2", + hasCuda ? "O4" : "O2", "--device", - CudaUtils.hasCuda() ? "cuda" : "cpu" + hasCuda ? "cuda" : "cpu" }; boolean success = false; try {