From f8a26e5ac1184bf2f2504d993105aa8e2c5a7a2f Mon Sep 17 00:00:00 2001 From: jooho Date: Fri, 15 Sep 2023 18:22:12 -0400 Subject: [PATCH 1/4] update tag to latest for main branch config folder manifests Signed-off-by: jooho --- .github/workflows/fvt.yml | 7 -- config/default/config-defaults.yaml | 6 +- config/dependencies/quickstart.yaml | 2 +- config/manager/kustomization.yaml | 2 +- fvt/fvtclient.go | 2 +- .../dependencies/quickstart.yaml | 2 +- .../manager/kustomization.yaml | 2 +- .../dependencies/quickstart.yaml | 2 +- .../manager/kustomization.yaml | 2 +- .../scripts/manifests/fvt/kustomization.yaml | 4 +- .../scripts/manifests/fvt/quickstart.yaml | 2 +- scripts/download-images-on-nodes.sh | 78 +++++++++++++------ version | 2 +- 13 files changed, 67 insertions(+), 46 deletions(-) diff --git a/.github/workflows/fvt.yml b/.github/workflows/fvt.yml index 8668c13d..bd6cdd95 100644 --- a/.github/workflows/fvt.yml +++ b/.github/workflows/fvt.yml @@ -109,12 +109,6 @@ jobs: docker pull nvcr.io/nvidia/tritonserver:23.04-py3 docker pull seldonio/mlserver:1.3.2 docker pull openvino/model_server:2022.2 -<<<<<<< HEAD:.github/workflows/run-fvt.yml - # docker pull pytorch/torchserve:0.6.0-cpu - docker pull kserve/modelmesh:v0.11.0-alpha - docker pull kserve/modelmesh-runtime-adapter:v0.11.0-alpha - docker pull kserve/rest-proxy:v0.10.0 -======= # docker pull pytorch/torchserve:0.7.1-cpu docker pull kserve/modelmesh:latest docker pull kserve/modelmesh-minio-dev-examples:latest @@ -122,7 +116,6 @@ jobs: docker pull kserve/modelmesh-runtime-adapter:latest docker pull kserve/rest-proxy:latest ->>>>>>> v0.11.0-rc0:.github/workflows/fvt.yml - name: Check installation run: | eval $(minikube -p minikube docker-env) diff --git a/config/default/config-defaults.yaml b/config/default/config-defaults.yaml index 616812ef..cdfbaff5 100644 --- a/config/default/config-defaults.yaml +++ b/config/default/config-defaults.yaml @@ -16,7 +16,7 @@ podsPerRuntime: 2 headlessService: true modelMeshImage: name: kserve/modelmesh - tag: v0.11.0-rc0 + tag: latest modelMeshResources: requests: cpu: "300m" @@ -29,7 +29,7 @@ restProxy: port: 8008 image: name: kserve/rest-proxy - tag: v0.11.0-rc0 + tag: latest resources: requests: cpu: "50m" @@ -39,7 +39,7 @@ restProxy: memory: "512Mi" storageHelperImage: name: kserve/modelmesh-runtime-adapter - tag: v0.11.0-rc0 + tag: latest command: ["/opt/app/puller"] storageHelperResources: requests: diff --git a/config/dependencies/quickstart.yaml b/config/dependencies/quickstart.yaml index b976d1aa..e04bfeae 100644 --- a/config/dependencies/quickstart.yaml +++ b/config/dependencies/quickstart.yaml @@ -110,7 +110,7 @@ spec: - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY # image: quay.io/cloudservices/minio:latest - image: kserve/modelmesh-minio-examples:v0.11.0-rc0 + image: kserve/modelmesh-minio-examples:latest name: minio --- apiVersion: v1 diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index a3f93dff..55e768ba 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -18,4 +18,4 @@ images: - name: modelmesh-controller newName: kserve/modelmesh-controller ## NOTE THIS SHOULD BE REPLACED WITH LATEST CONTROLLER IMAGE TAG - newTag: v0.11.0-rc0 + newTag: latest diff --git a/fvt/fvtclient.go b/fvt/fvtclient.go index a722c840..002ded41 100644 --- a/fvt/fvtclient.go +++ b/fvt/fvtclient.go @@ -59,7 +59,7 @@ import ( torchserveapi "github.com/kserve/modelmesh-serving/fvt/generated/torchserve/apis" ) -const PredictorTimeout = time.Second * 600 // absolute time to wait for predictor to become ready +const PredictorTimeout = time.Second * 120 // absolute time to wait for predictor to become ready const TimeForStatusToStabilize = time.Second * 5 // time to wait between watcher events before assuming a stable state type ModelServingConnectionType int diff --git a/opendatahub/odh-manifests/model-mesh/odh-modelmesh-controller/dependencies/quickstart.yaml b/opendatahub/odh-manifests/model-mesh/odh-modelmesh-controller/dependencies/quickstart.yaml index b976d1aa..e04bfeae 100644 --- a/opendatahub/odh-manifests/model-mesh/odh-modelmesh-controller/dependencies/quickstart.yaml +++ b/opendatahub/odh-manifests/model-mesh/odh-modelmesh-controller/dependencies/quickstart.yaml @@ -110,7 +110,7 @@ spec: - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY # image: quay.io/cloudservices/minio:latest - image: kserve/modelmesh-minio-examples:v0.11.0-rc0 + image: kserve/modelmesh-minio-examples:latest name: minio --- apiVersion: v1 diff --git a/opendatahub/odh-manifests/model-mesh/odh-modelmesh-controller/manager/kustomization.yaml b/opendatahub/odh-manifests/model-mesh/odh-modelmesh-controller/manager/kustomization.yaml index a3f93dff..55e768ba 100644 --- a/opendatahub/odh-manifests/model-mesh/odh-modelmesh-controller/manager/kustomization.yaml +++ b/opendatahub/odh-manifests/model-mesh/odh-modelmesh-controller/manager/kustomization.yaml @@ -18,4 +18,4 @@ images: - name: modelmesh-controller newName: kserve/modelmesh-controller ## NOTE THIS SHOULD BE REPLACED WITH LATEST CONTROLLER IMAGE TAG - newTag: v0.11.0-rc0 + newTag: latest diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dependencies/quickstart.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dependencies/quickstart.yaml index b976d1aa..c49f4856 100644 --- a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dependencies/quickstart.yaml +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dependencies/quickstart.yaml @@ -110,7 +110,7 @@ spec: - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY # image: quay.io/cloudservices/minio:latest - image: kserve/modelmesh-minio-examples:v0.11.0-rc0 + image: kserve/modelmesh-minio-examples:v0.11.0 name: minio --- apiVersion: v1 diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/manager/kustomization.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/manager/kustomization.yaml index a3f93dff..76a8aa2a 100644 --- a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/manager/kustomization.yaml +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/manager/kustomization.yaml @@ -18,4 +18,4 @@ images: - name: modelmesh-controller newName: kserve/modelmesh-controller ## NOTE THIS SHOULD BE REPLACED WITH LATEST CONTROLLER IMAGE TAG - newTag: v0.11.0-rc0 + newTag: v0.11.0 diff --git a/opendatahub/scripts/manifests/fvt/kustomization.yaml b/opendatahub/scripts/manifests/fvt/kustomization.yaml index c85709f8..9ab2d943 100644 --- a/opendatahub/scripts/manifests/fvt/kustomization.yaml +++ b/opendatahub/scripts/manifests/fvt/kustomization.yaml @@ -1,7 +1,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- fvt.yaml + - fvt.yaml patchesStrategicMerge: -- remove_etcd_patch.yaml + - remove_etcd_patch.yaml namespace: modelmesh-serving diff --git a/opendatahub/scripts/manifests/fvt/quickstart.yaml b/opendatahub/scripts/manifests/fvt/quickstart.yaml index b976d1aa..e04bfeae 100644 --- a/opendatahub/scripts/manifests/fvt/quickstart.yaml +++ b/opendatahub/scripts/manifests/fvt/quickstart.yaml @@ -110,7 +110,7 @@ spec: - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY # image: quay.io/cloudservices/minio:latest - image: kserve/modelmesh-minio-examples:v0.11.0-rc0 + image: kserve/modelmesh-minio-examples:latest name: minio --- apiVersion: v1 diff --git a/scripts/download-images-on-nodes.sh b/scripts/download-images-on-nodes.sh index 6e778761..eff9ddb8 100755 --- a/scripts/download-images-on-nodes.sh +++ b/scripts/download-images-on-nodes.sh @@ -1,27 +1,55 @@ -TRITON_SERVER=nvcr.io/nvidia/tritonserver:23.04-py3 -ML_SERVER=seldonio/mlserver:1.3.2 -OPENVINO=openvino/model_server:2022.3 -TORCHSERVE=pytorch/torchserve:0.7.1-cpu -MODELMESH=kserve/modelmesh:v0.11.0 -MODELMESH_RUNTIME=kserve/modelmesh-runtime-adapter:v0.11.0 -REST_PROXY=kserve/rest-proxy:v0.11.0 - -# TODO - automation -# TRITON_SERVER_IMG=nvcr.io/nvidia/tritonserver -# ML_SERVER_IMG=seldonio/mlserver -# TORCHSERVE_IMG=pytorch/torchserve -# OPENVINO_IMG=openvino/model_server - -# TRITON_SERVER_TAG=$(cat ../config/runtimes/kustomization.yaml |grep ${TRITON_SERVER_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d " ") -# ML_SERVER_TAG=$(cat ../config/runtimes/kustomization.yaml |grep ${ML_SERVER_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d " ") -# TORCHSERVE_TAG=$(cat ../config/runtimes/kustomization.yaml |grep ${TORCHSERVE_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d " ") -# OPENVINO_TAG=$(cat ../config/runtimes/kustomization.yaml |grep ${OPENVINO_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d " ") - -# export TRITON_SERVER=${TRITON_SERVER_IMG}:${TRITON_SERVER_TAG} -# export ML_SERVER=${ML_SERVER_IMG}:${ML_SERVER_TAG} -# export TORCHSERVE=${TORCHSERVE_IMG}:${TORCHSERVE_TAG} -# export OPENVINO=${OPENVINO_IMG}:${OPENVINO_TAG} +#TRITON_SERVER=nvcr.io/nvidia/tritonserver:23.04-py3 +#ML_SERVER=seldonio/mlserver:1.3.2 +#OPENVINO=openvino/model_server:2022.3 +#TORCHSERVE=pytorch/torchserve:0.7.1-cpu +export SCRIPT_DIR=$(dirname "$(realpath "$0")") +export RUNTIME_DIR=${SCRIPT_DIR}/../config/runtimes +export DEFAULT_DIR=${SCRIPT_DIR}/../config/default +# automation +MODELMESH_NAME=$(cat ${DEFAULT_DIR}/config-defaults.yaml |grep modelMeshImage -A1|grep "name"|cut -d: -f2|tr -d " ") +MODELMESH_RUNTIME_NAME=$(cat ${DEFAULT_DIR}/config-defaults.yaml |grep storageHelperImage -A1|grep "name"|cut -d: -f2|tr -d " ") +REST_PROXY_NAME=$(cat ${DEFAULT_DIR}/config-defaults.yaml |grep restProxy -A4|grep "name"|cut -d: -f2|tr -d " ") + +MODELMESH_TAG=$(cat ${DEFAULT_DIR}/config-defaults.yaml |grep modelMeshImage -A2|grep "tag"|cut -d: -f2|tr -d " ") +MODELMESH_RUNTIME_TAG=$(cat ${DEFAULT_DIR}/config-defaults.yaml |grep storageHelperImage -A2|grep "tag"|cut -d: -f2|tr -d " ") +REST_PROXY_TAG=$(cat ${DEFAULT_DIR}/config-defaults.yaml |grep restProxy -A5|grep "tag"|cut -d: -f2|tr -d " ") + +MODELMESH=${MODELMESH_NAME}:${MODELMESH_TAG} +MODELMESH_RUNTIME=${MODELMESH_RUNTIME_NAME}:${MODELMESH_RUNTIME_TAG} +REST_PROXY=${REST_PROXY_NAME}:${REST_PROXY_TAG} + +echo ${MODELMESH} +echo $MODELMESH_RUNTIME +echo $REST_PROXY + +echo $SCRIPT_DIR +echo $RUNTIME_DIR +TRITON_SERVER_IMG_NAME=tritonserver +ML_SERVER_IMG_NAME=mlserver +TORCHSERVE_IMG_NAME=torchserve +OPENVINO_IMG_NAME=model_server + +TRITON_SERVER_IMG=$(cat ${RUNTIME_DIR}/kustomization.yaml |grep ${TRITON_SERVER_IMG_NAME} -A1|grep "newName"|cut -d: -f2|tr -d " ") +ML_SERVER_IMG=$(cat ${RUNTIME_DIR}/kustomization.yaml |grep ${ML_SERVER_IMG_NAME} -A1|grep "newName"|cut -d: -f2|tr -d " ") +TORCHSERVE_IMG=$(cat ${RUNTIME_DIR}/kustomization.yaml |grep ${TORCHSERVE_IMG_NAME} -A1|grep "newName"|cut -d: -f2|tr -d " ") +OPENVINO_IMG=$(cat ${RUNTIME_DIR}/kustomization.yaml |grep ${OPENVINO_IMG_NAME} -A1|grep "newName"|cut -d: -f2|tr -d " ") + +TRITON_SERVER_TAG=$(cat ${RUNTIME_DIR}/kustomization.yaml |grep ${TRITON_SERVER_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d '"'|tr -d " ") +ML_SERVER_TAG=$(cat ${RUNTIME_DIR}/kustomization.yaml |grep ${ML_SERVER_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d '"'|tr -d " ") +TORCHSERVE_TAG=$(cat ${RUNTIME_DIR}/kustomization.yaml |grep ${TORCHSERVE_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d '"'|tr -d " ") +OPENVINO_TAG=$(cat ${RUNTIME_DIR}/kustomization.yaml |grep ${OPENVINO_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d '"'|tr -d " ") + +export TRITON_SERVER=${TRITON_SERVER_IMG}:${TRITON_SERVER_TAG} +export ML_SERVER=${ML_SERVER_IMG}:${ML_SERVER_TAG} +export TORCHSERVE=${TORCHSERVE_IMG}:${TORCHSERVE_TAG} +export OPENVINO=${OPENVINO_IMG}:${OPENVINO_TAG} + +# Debug purpose +# echo ${TRITON_SERVER} +# echo ${ML_SERVER} +# echo ${TORCHSERVE} +# echo ${OPENVINO} images=(${TRITON_SERVER} ${ML_SERVER} ${OPENVINO} ${TORCHSERVE} ${MODELMESH} ${MODELMESH_RUNTIME} ${REST_PROXY}) wait_downloading_images(){ @@ -47,7 +75,7 @@ wait_downloading_images(){ echo "triton-server-count count: ${triton_server_count}" fi ;; - *openvino*) + *model_server*) isDownloaded=$(oc describe pod -l app=image-downloader|grep "Successfully pulled image \"${OPENVINO}\""|wc -l) existImage=$(oc describe pod -l app=image-downloader|grep "Container image \"${OPENVINO}\" already present on machine"|wc -l) if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then @@ -107,7 +135,7 @@ wait_downloading_images(){ fi ;; *) - echo "Not expected images" + echo "Not expected images(${element})" exit 1 ;; esac diff --git a/version b/version index 460f1427..c0ed9c84 100644 --- a/version +++ b/version @@ -1,4 +1,4 @@ # Version information -upstream kserve/modelmesh-serving version: v0.11.0-rc0 +upstream kserve/modelmesh-serving version: v0.11.0 opendatahub version: latest opendatahub modelmesh-serving branch: main From bccabaf8006148f2a3a6e16c84711ff33a966d89 Mon Sep 17 00:00:00 2001 From: jooho Date: Fri, 15 Sep 2023 19:51:14 -0400 Subject: [PATCH 2/4] update deploy_fvt.sh to get minio tag dynamically Signed-off-by: jooho --- opendatahub/scripts/deploy_fvt.sh | 5 +-- opendatahub/scripts/manifests/fvt/fvt.yaml | 4 +-- .../fvt/fvt_templates/kustomization.yaml | 7 ---- .../fvt/fvt_templates/remove_etcd_patch.yaml | 17 --------- opendatahub/scripts/manifests/params.env | 10 +++--- .../manifests/runtimes/kustomization.yaml | 35 ++++++++++--------- 6 files changed, 29 insertions(+), 49 deletions(-) delete mode 100644 opendatahub/scripts/manifests/fvt/fvt_templates/kustomization.yaml delete mode 100644 opendatahub/scripts/manifests/fvt/fvt_templates/remove_etcd_patch.yaml diff --git a/opendatahub/scripts/deploy_fvt.sh b/opendatahub/scripts/deploy_fvt.sh index f4d500bc..04808a31 100755 --- a/opendatahub/scripts/deploy_fvt.sh +++ b/opendatahub/scripts/deploy_fvt.sh @@ -101,8 +101,9 @@ if [[ ! -d $MANIFESTS_DIR/fvt ]] || [[ ${force} == "true" ]];then cp -R $MANIFESTS_DIR/fvt_templates $MANIFESTS_DIR/fvt cp -R $ODH_MANIFESTS_DIR/${target_modelmesh_dir}/odh-modelmesh-controller/dependencies/* $MANIFESTS_DIR/fvt/. # Convert imaes to use quay.io image (avoid dockerhub pull limit) - sed 's+kserve/modelmesh-minio-dev-examples:latest+quay.io/jooholee/minio-examples:latest+g' -i opendatahub/scripts/manifests/fvt/fvt.yaml - sed 's+kserve/modelmesh-minio-examples:latest+quay.io/jooholee/minio-examples:latest+g' -i opendatahub/scripts/manifests/fvt/fvt.yaml + minio_tag=$(grep kserve/modelmesh-minio-dev-examples ./config/dependencies/fvt.yaml |cut -d: -f3) + sed "s+kserve/modelmesh-minio-dev-examples:${minio_tag}+quay.io/jooholee/modelmesh-minio-dev-examples:${minio_tag}+g" -i opendatahub/scripts/manifests/fvt/fvt.yaml + sed "s+kserve/modelmesh-minio-examples:${minio_tag}+quay.io/jooholee/modelmesh-minio-examples:${minio_tag}+g" -i opendatahub/scripts/manifests/fvt/fvt.yaml sed 's+ubuntu+quay.io/fedora/fedora:38+g' -i opendatahub/scripts/manifests/fvt/fvt.yaml fi diff --git a/opendatahub/scripts/manifests/fvt/fvt.yaml b/opendatahub/scripts/manifests/fvt/fvt.yaml index 4d27a1d4..04228bee 100644 --- a/opendatahub/scripts/manifests/fvt/fvt.yaml +++ b/opendatahub/scripts/manifests/fvt/fvt.yaml @@ -112,7 +112,7 @@ spec: value: AKIAIOSFODNN7EXAMPLE - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY - image: quay.io/jooholee/minio-examples:latest + image: quay.io/jooholee/modelmesh-minio-dev-examples:latest name: minio --- apiVersion: v1 @@ -175,7 +175,7 @@ spec: restartPolicy: OnFailure containers: - name: "copy-pod" - image: quay.io/jooholee/minio-examples:latest + image: quay.io/jooholee/modelmesh-minio-examples:latest securityContext: allowPrivilegeEscalation: false command: ["/bin/sh", "-ex", "-c"] diff --git a/opendatahub/scripts/manifests/fvt/fvt_templates/kustomization.yaml b/opendatahub/scripts/manifests/fvt/fvt_templates/kustomization.yaml deleted file mode 100644 index 9ab2d943..00000000 --- a/opendatahub/scripts/manifests/fvt/fvt_templates/kustomization.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - fvt.yaml -patchesStrategicMerge: - - remove_etcd_patch.yaml -namespace: modelmesh-serving diff --git a/opendatahub/scripts/manifests/fvt/fvt_templates/remove_etcd_patch.yaml b/opendatahub/scripts/manifests/fvt/fvt_templates/remove_etcd_patch.yaml deleted file mode 100644 index 8a5bebf4..00000000 --- a/opendatahub/scripts/manifests/fvt/fvt_templates/remove_etcd_patch.yaml +++ /dev/null @@ -1,17 +0,0 @@ -$patch: delete -apiVersion: v1 -kind: Service -metadata: - name: etcd ---- -$patch: delete -apiVersion: v1 -kind: Secret -metadata: - name: model-serving-etcd ---- -$patch: delete -apiVersion: apps/v1 -kind: Deployment -metadata: - name: etcd diff --git a/opendatahub/scripts/manifests/params.env b/opendatahub/scripts/manifests/params.env index c90a63ed..c2f7c5f4 100644 --- a/opendatahub/scripts/manifests/params.env +++ b/opendatahub/scripts/manifests/params.env @@ -1,7 +1,7 @@ monitoring-namespace=opendatahub -odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:v0.11.0 -odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:v0.11.0 -odh-modelmesh=quay.io/opendatahub/modelmesh:v0.11.0 +odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:fast +odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:fast +odh-modelmesh=quay.io/opendatahub/modelmesh:fast odh-openvino=quay.io/opendatahub/openvino_model_server:2022.3-release -odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:v0.11.0 -odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.11.0 +odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:fast +odh-model-controller=quay.io/opendatahub/odh-model-controller:fast diff --git a/opendatahub/scripts/manifests/runtimes/kustomization.yaml b/opendatahub/scripts/manifests/runtimes/kustomization.yaml index e89f335d..2ea7ce81 100644 --- a/opendatahub/scripts/manifests/runtimes/kustomization.yaml +++ b/opendatahub/scripts/manifests/runtimes/kustomization.yaml @@ -12,24 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. resources: - - triton-2.x.yaml - - mlserver-1.x.yaml - - ovms-1.x.yaml - - torchserve-0.x.yaml +- triton-2.x.yaml +- mlserver-1.x.yaml +- ovms-1.x.yaml +- torchserve-0.x.yaml + + + images: - - name: tritonserver-2 - newName: nvcr.io/nvidia/tritonserver - newTag: 23.04-py3 - - name: mlserver-1 - newName: seldonio/mlserver - newTag: 1.3.2 - - name: ovms-1 - newName: openvino/model_server - newTag: "2022.3" - - name: torchserve-0 - newName: pytorch/torchserve - newTag: 0.7.1-cpu +- name: tritonserver-2 + newName: nvcr.io/nvidia/tritonserver + newTag: 23.04-py3 +- name: mlserver-1 + newName: seldonio/mlserver + newTag: 1.3.2 +- name: ovms-1 + newName: openvino/model_server + newTag: "2022.3" +- name: torchserve-0 + newName: pytorch/torchserve + newTag: 0.7.1-cpu apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization From 8d815be5ddaaed2d9264e43c46a05ed7e16d03a2 Mon Sep 17 00:00:00 2001 From: jooho Date: Fri, 15 Sep 2023 22:53:55 -0400 Subject: [PATCH 3/4] update manifests for 0.11.0 and scripts Signed-off-by: jooho --- .github/workflows/fvt.yml | 10 +-- config/default/config-defaults.yaml | 6 +- config/dependencies/fvt.yaml | 6 +- config/dependencies/quickstart.yaml | 4 +- config/manager/kustomization.yaml | 2 +- .../servingruntime_controller_upstream.golden | 80 +++++++++++-------- opendatahub/kfdef/kfdef-stable.yaml | 10 +-- .../dependencies/fvt.yaml | 4 +- opendatahub/scripts/deploy_fvt.sh | 6 +- .../scripts/download_images_on_nodes.sh | 13 --- opendatahub/scripts/manifests/fvt/fvt.yaml | 4 +- .../scripts/manifests/fvt/quickstart.yaml | 2 +- opendatahub/scripts/manifests/params.env | 10 +-- .../manifests/runtimes/kustomization.yaml | 35 ++++---- 14 files changed, 95 insertions(+), 97 deletions(-) diff --git a/.github/workflows/fvt.yml b/.github/workflows/fvt.yml index bd6cdd95..091d762b 100644 --- a/.github/workflows/fvt.yml +++ b/.github/workflows/fvt.yml @@ -110,11 +110,11 @@ jobs: docker pull seldonio/mlserver:1.3.2 docker pull openvino/model_server:2022.2 # docker pull pytorch/torchserve:0.7.1-cpu - docker pull kserve/modelmesh:latest - docker pull kserve/modelmesh-minio-dev-examples:latest - docker pull kserve/modelmesh-minio-examples:latest - docker pull kserve/modelmesh-runtime-adapter:latest - docker pull kserve/rest-proxy:latest + docker pull kserve/modelmesh:v0.11.0 + docker pull kserve/modelmesh-minio-dev-examples:v0.11.0 + docker pull kserve/modelmesh-minio-examples:v0.11.0 + docker pull kserve/modelmesh-runtime-adapter:v0.11.0 + docker pull kserve/rest-proxy:v0.11.0 - name: Check installation run: | diff --git a/config/default/config-defaults.yaml b/config/default/config-defaults.yaml index cdfbaff5..f9010f66 100644 --- a/config/default/config-defaults.yaml +++ b/config/default/config-defaults.yaml @@ -16,7 +16,7 @@ podsPerRuntime: 2 headlessService: true modelMeshImage: name: kserve/modelmesh - tag: latest + tag: v0.11.0 modelMeshResources: requests: cpu: "300m" @@ -29,7 +29,7 @@ restProxy: port: 8008 image: name: kserve/rest-proxy - tag: latest + tag: v0.11.0 resources: requests: cpu: "50m" @@ -39,7 +39,7 @@ restProxy: memory: "512Mi" storageHelperImage: name: kserve/modelmesh-runtime-adapter - tag: latest + tag: v0.11.0 command: ["/opt/app/puller"] storageHelperResources: requests: diff --git a/config/dependencies/fvt.yaml b/config/dependencies/fvt.yaml index 611ad309..0ec6012d 100644 --- a/config/dependencies/fvt.yaml +++ b/config/dependencies/fvt.yaml @@ -112,7 +112,7 @@ spec: value: AKIAIOSFODNN7EXAMPLE - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY - image: kserve/modelmesh-minio-dev-examples:latest + image: kserve/modelmesh-minio-dev-examples:v0.11.0 name: minio --- apiVersion: v1 @@ -175,7 +175,7 @@ spec: restartPolicy: OnFailure containers: - name: "copy-pod" - image: kserve/modelmesh-minio-examples:latest + image: kserve/modelmesh-minio-dev-examples:v0.11.0 securityContext: allowPrivilegeEscalation: false command: ["/bin/sh", "-ex", "-c"] @@ -226,7 +226,7 @@ metadata: spec: containers: - name: main - image: ubuntu + image: quay.io/fedora/fedora:38 command: ["/bin/sh", "-ec", "sleep 10000"] volumeMounts: - name: "pvc1" diff --git a/config/dependencies/quickstart.yaml b/config/dependencies/quickstart.yaml index e04bfeae..aa2011cc 100644 --- a/config/dependencies/quickstart.yaml +++ b/config/dependencies/quickstart.yaml @@ -109,8 +109,8 @@ spec: value: AKIAIOSFODNN7EXAMPLE - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY - # image: quay.io/cloudservices/minio:latest - image: kserve/modelmesh-minio-examples:latest + # image: quay.io/cloudservices/minio:v0.11.0 + image: kserve/modelmesh-minio-examples:v0.11.0 name: minio --- apiVersion: v1 diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 55e768ba..76a8aa2a 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -18,4 +18,4 @@ images: - name: modelmesh-controller newName: kserve/modelmesh-controller ## NOTE THIS SHOULD BE REPLACED WITH LATEST CONTROLLER IMAGE TAG - newTag: latest + newTag: v0.11.0 diff --git a/controllers/testdata/servingruntime_controller_upstream.golden b/controllers/testdata/servingruntime_controller_upstream.golden index ccf308c0..5794cc9c 100644 --- a/controllers/testdata/servingruntime_controller_upstream.golden +++ b/controllers/testdata/servingruntime_controller_upstream.golden @@ -12,7 +12,7 @@ spec: selector: matchLabels: modelmesh-service: modelmesh-serving - name: modelmesh-serving-mlserver-0.x + name: modelmesh-serving-mlserver-1.x strategy: rollingUpdate: maxSurge: 75% @@ -26,7 +26,7 @@ spec: app.kubernetes.io/managed-by: modelmesh-controller app.kubernetes.io/name: modelmesh-controller modelmesh-service: modelmesh-serving - name: modelmesh-serving-mlserver-0.x + name: modelmesh-serving-mlserver-1.x spec: affinity: nodeAffinity: @@ -37,6 +37,7 @@ spec: operator: In values: - amd64 + - arm64 containers: - command: - /opt/app/mlserver-adapter @@ -92,7 +93,7 @@ spec: - args: - --https-address=:8443 - --provider=openshift - - --openshift-service-account= + - --openshift-service-account="modelmesh-serving-sa" - --upstream=http://localhost:8008 - --tls-cert=/etc/tls/private/tls.crt - --tls-key=/etc/tls/private/tls.key @@ -102,7 +103,7 @@ spec: - '--openshift-sar={"namespace": "default", "resource": "services", "verb": "get"}' - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' - image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.8 + image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4bef31eb993feb6f1096b51b4876c65a6fb1f4401fee97fa4f4542b6b7c9bc46 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 @@ -156,7 +157,7 @@ spec: value: 127.0.0.1 - name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH value: "-1" - image: mlserver-0:replace + image: mlserver-1:replace imagePullPolicy: IfNotPresent lifecycle: preStop: @@ -220,7 +221,7 @@ spec: - name: MM_PAYLOAD_PROCESSORS value: example:8080/consumer/kserve/v2 example2:8080/consumer/kserve/v2 - name: MM_LABELS - value: mt:lightgbm,mt:lightgbm:3,mt:sklearn,mt:sklearn:0,mt:xgboost,mt:xgboost:1,pv:grpc-v2,rt:mlserver-0.x + value: mt:lightgbm,mt:lightgbm:3,mt:sklearn,mt:sklearn:0,mt:xgboost,mt:xgboost:1,pv:grpc-v2,rt:mlserver-1.x - name: MM_TYPE_CONSTRAINTS_PATH value: /etc/watson/mmesh/config/type_constraints - name: MM_DATAPLANE_CONFIG_PATH @@ -281,6 +282,8 @@ spec: restartPolicy: Always schedulerName: default-scheduler securityContext: {} + serviceAccount: modelmesh-serving-sa + serviceAccountName: modelmesh-serving-sa terminationGracePeriodSeconds: 90 volumes: - name: proxy-tls @@ -316,7 +319,7 @@ spec: selector: matchLabels: modelmesh-service: modelmesh-serving - name: modelmesh-serving-mlserver-0.x + name: modelmesh-serving-mlserver-1.x strategy: rollingUpdate: maxSurge: 75% @@ -335,7 +338,7 @@ spec: app.kubernetes.io/managed-by: modelmesh-controller app.kubernetes.io/name: modelmesh-controller modelmesh-service: modelmesh-serving - name: modelmesh-serving-mlserver-0.x + name: modelmesh-serving-mlserver-1.x spec: affinity: nodeAffinity: @@ -346,6 +349,7 @@ spec: operator: In values: - amd64 + - arm64 containers: - command: - /opt/app/mlserver-adapter @@ -401,7 +405,7 @@ spec: - args: - --https-address=:8443 - --provider=openshift - - --openshift-service-account=modelmesh-serving-sa + - --openshift-service-account="modelmesh-serving-sa" - --upstream=http://localhost:8008 - --tls-cert=/etc/tls/private/tls.crt - --tls-key=/etc/tls/private/tls.key @@ -411,7 +415,7 @@ spec: - '--openshift-sar={"namespace": "default", "resource": "services", "verb": "get"}' - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' - image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.8 + image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4bef31eb993feb6f1096b51b4876c65a6fb1f4401fee97fa4f4542b6b7c9bc46 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 @@ -465,7 +469,7 @@ spec: value: 127.0.0.1 - name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH value: "-1" - image: mlserver-0:replace + image: mlserver-1:replace imagePullPolicy: IfNotPresent lifecycle: preStop: @@ -527,7 +531,7 @@ spec: - name: MM_DEFAULT_VMODEL_OWNER value: ksp - name: MM_LABELS - value: mt:lightgbm,mt:lightgbm:3,mt:sklearn,mt:sklearn:0,mt:xgboost,mt:xgboost:1,pv:grpc-v2,rt:mlserver-0.x + value: mt:lightgbm,mt:lightgbm:3,mt:sklearn,mt:sklearn:0,mt:xgboost,mt:xgboost:1,pv:grpc-v2,rt:mlserver-1.x - name: MM_TYPE_CONSTRAINTS_PATH value: /etc/watson/mmesh/config/type_constraints - name: MM_DATAPLANE_CONFIG_PATH @@ -628,7 +632,7 @@ spec: selector: matchLabels: modelmesh-service: modelmesh-serving - name: modelmesh-serving-mlserver-0.x + name: modelmesh-serving-mlserver-1.x strategy: rollingUpdate: maxSurge: 75% @@ -642,7 +646,7 @@ spec: app.kubernetes.io/managed-by: modelmesh-controller app.kubernetes.io/name: modelmesh-controller modelmesh-service: modelmesh-serving - name: modelmesh-serving-mlserver-0.x + name: modelmesh-serving-mlserver-1.x spec: affinity: nodeAffinity: @@ -653,6 +657,7 @@ spec: operator: In values: - amd64 + - arm64 containers: - env: - name: REST_PROXY_LISTEN_PORT @@ -663,7 +668,7 @@ spec: value: "false" - name: REST_PROXY_GRPC_MAX_MSG_SIZE_BYTES value: "16777216" - image: kserve/rest-proxy:latest + image: kserve/rest-proxy:v0.11.0 imagePullPolicy: Always name: rest-proxy ports: @@ -682,7 +687,7 @@ spec: - args: - --https-address=:8443 - --provider=openshift - - --openshift-service-account=modelmesh-serving-sa + - --openshift-service-account="modelmesh-serving-sa" - --upstream=http://localhost:8008 - --tls-cert=/etc/tls/private/tls.crt - --tls-key=/etc/tls/private/tls.key @@ -692,7 +697,7 @@ spec: - '--openshift-sar={"namespace": "default", "resource": "services", "verb": "get"}' - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' - image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.8 + image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4bef31eb993feb6f1096b51b4876c65a6fb1f4401fee97fa4f4542b6b7c9bc46 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 @@ -746,7 +751,7 @@ spec: value: 127.0.0.1 - name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH value: "-1" - image: mlserver-0:replace + image: mlserver-1:replace imagePullPolicy: IfNotPresent lifecycle: preStop: @@ -859,7 +864,7 @@ spec: - name: MM_DEFAULT_VMODEL_OWNER value: ksp - name: MM_LABELS - value: mt:lightgbm,mt:lightgbm:3,mt:sklearn,mt:sklearn:0,mt:xgboost,mt:xgboost:1,pv:grpc-v2,pv:v2,rt:mlserver-0.x + value: mt:lightgbm,mt:lightgbm:3,mt:sklearn,mt:sklearn:0,mt:xgboost,mt:xgboost:1,pv:grpc-v2,pv:v2,rt:mlserver-1.x - name: MM_TYPE_CONSTRAINTS_PATH value: /etc/watson/mmesh/config/type_constraints - name: MM_DATAPLANE_CONFIG_PATH @@ -945,7 +950,7 @@ spec: secretName: secret status: {} ''' -"Sample Runtime config/runtimes/mlserver-0.x.yaml should be a valid runtime specification" = ''' +"Sample Runtime config/runtimes/mlserver-1.x.yaml should be a valid runtime specification" = ''' apiVersion: apps/v1 kind: Deployment metadata: @@ -957,7 +962,7 @@ spec: selector: matchLabels: modelmesh-service: modelmesh-serving - name: modelmesh-serving-mlserver-0.x + name: modelmesh-serving-mlserver-1.x strategy: rollingUpdate: maxSurge: 75% @@ -971,7 +976,7 @@ spec: app.kubernetes.io/managed-by: modelmesh-controller app.kubernetes.io/name: modelmesh-controller modelmesh-service: modelmesh-serving - name: modelmesh-serving-mlserver-0.x + name: modelmesh-serving-mlserver-1.x spec: affinity: nodeAffinity: @@ -982,6 +987,7 @@ spec: operator: In values: - amd64 + - arm64 containers: - command: - /opt/app/mlserver-adapter @@ -1037,7 +1043,7 @@ spec: - args: - --https-address=:8443 - --provider=openshift - - --openshift-service-account=modelmesh-serving-sa + - --openshift-service-account="modelmesh-serving-sa" - --upstream=http://localhost:8008 - --tls-cert=/etc/tls/private/tls.crt - --tls-key=/etc/tls/private/tls.key @@ -1047,7 +1053,7 @@ spec: - '--openshift-sar={"namespace": "default", "resource": "services", "verb": "get"}' - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' - image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.8 + image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4bef31eb993feb6f1096b51b4876c65a6fb1f4401fee97fa4f4542b6b7c9bc46 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 @@ -1101,7 +1107,7 @@ spec: value: 127.0.0.1 - name: MLSERVER_GRPC_MAX_MESSAGE_LENGTH value: "-1" - image: mlserver-0:replace + image: mlserver-1:replace imagePullPolicy: IfNotPresent lifecycle: preStop: @@ -1163,7 +1169,7 @@ spec: - name: MM_DEFAULT_VMODEL_OWNER value: ksp - name: MM_LABELS - value: mt:lightgbm,mt:lightgbm:3,mt:sklearn,mt:sklearn:0,mt:xgboost,mt:xgboost:1,pv:grpc-v2,rt:mlserver-0.x + value: mt:lightgbm,mt:lightgbm:3,mt:sklearn,mt:sklearn:0,mt:xgboost,mt:xgboost:1,pv:grpc-v2,rt:mlserver-1.x - name: MM_TYPE_CONSTRAINTS_PATH value: /etc/watson/mmesh/config/type_constraints - name: MM_DATAPLANE_CONFIG_PATH @@ -1286,6 +1292,7 @@ spec: operator: In values: - amd64 + - arm64 containers: - command: - /opt/app/ovms-adapter @@ -1341,7 +1348,7 @@ spec: - args: - --https-address=:8443 - --provider=openshift - - --openshift-service-account=modelmesh-serving-sa + - --openshift-service-account="modelmesh-serving-sa" - --upstream=http://localhost:8008 - --tls-cert=/etc/tls/private/tls.crt - --tls-key=/etc/tls/private/tls.key @@ -1351,7 +1358,7 @@ spec: - '--openshift-sar={"namespace": "default", "resource": "services", "verb": "get"}' - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' - image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.8 + image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4bef31eb993feb6f1096b51b4876c65a6fb1f4401fee97fa4f4542b6b7c9bc46 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 @@ -1582,6 +1589,7 @@ spec: operator: In values: - amd64 + - arm64 containers: - command: - /opt/app/torchserve-adapter @@ -1637,7 +1645,7 @@ spec: - args: - --https-address=:8443 - --provider=openshift - - --openshift-service-account=modelmesh-serving-sa + - --openshift-service-account="modelmesh-serving-sa" - --upstream=http://localhost:8008 - --tls-cert=/etc/tls/private/tls.crt - --tls-key=/etc/tls/private/tls.key @@ -1647,7 +1655,7 @@ spec: - '--openshift-sar={"namespace": "default", "resource": "services", "verb": "get"}' - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' - image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.8 + image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4bef31eb993feb6f1096b51b4876c65a6fb1f4401fee97fa4f4542b6b7c9bc46 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 @@ -1881,6 +1889,7 @@ spec: operator: In values: - amd64 + - arm64 containers: - command: - /opt/app/triton-adapter @@ -1938,7 +1947,7 @@ spec: - args: - --https-address=:8443 - --provider=openshift - - --openshift-service-account=modelmesh-serving-sa + - --openshift-service-account="modelmesh-serving-sa" - --upstream=http://localhost:8008 - --tls-cert=/etc/tls/private/tls.crt - --tls-key=/etc/tls/private/tls.key @@ -1948,7 +1957,7 @@ spec: - '--openshift-sar={"namespace": "default", "resource": "services", "verb": "get"}' - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' - image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.8 + image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4bef31eb993feb6f1096b51b4876c65a6fb1f4401fee97fa4f4542b6b7c9bc46 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 @@ -2195,6 +2204,7 @@ spec: operator: In values: - amd64 + - arm64 containers: - env: - name: MODEL_DIRECTORY_PATH @@ -2205,7 +2215,7 @@ spec: containerName: modelserver divisor: "0" resource: requests.memory - image: seldonio/mlserver:0.3.2 + image: seldonio/mlserver:1.3.2 imagePullPolicy: IfNotPresent lifecycle: preStop: @@ -2233,7 +2243,7 @@ spec: - args: - --https-address=:8443 - --provider=openshift - - --openshift-service-account=modelmesh-serving-sa + - --openshift-service-account="modelmesh-serving-sa" - --upstream=http://localhost:8008 - --tls-cert=/etc/tls/private/tls.crt - --tls-key=/etc/tls/private/tls.key @@ -2243,7 +2253,7 @@ spec: - '--openshift-sar={"namespace": "default", "resource": "services", "verb": "get"}' - --skip-auth-regex='(^/metrics|^/apis/v1beta1/healthz)' - image: registry.redhat.io/openshift4/ose-oauth-proxy:v4.8 + image: registry.redhat.io/openshift4/ose-oauth-proxy@sha256:4bef31eb993feb6f1096b51b4876c65a6fb1f4401fee97fa4f4542b6b7c9bc46 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 diff --git a/opendatahub/kfdef/kfdef-stable.yaml b/opendatahub/kfdef/kfdef-stable.yaml index e20e456e..7e1e1026 100644 --- a/opendatahub/kfdef/kfdef-stable.yaml +++ b/opendatahub/kfdef/kfdef-stable.yaml @@ -13,15 +13,15 @@ spec: - kustomizeConfig: parameters: - name: odh-mm-rest-proxy - value: quay.io/opendatahub/rest-proxy:stable + value: quay.io/opendatahub/rest-proxy:v0.11.0 - name: odh-modelmesh-runtime-adapter - value: quay.io/opendatahub/modelmesh-runtime-adapter:stable + value: quay.io/opendatahub/modelmesh-runtime-adapter:v0.11.0 - name: odh-modelmesh - value: quay.io/opendatahub/modelmesh:stable + value: quay.io/opendatahub/modelmesh:v0.11.0 - name: odh-modelmesh-controller - value: quay.io/opendatahub/modelmesh-controller:stable + value: quay.io/opendatahub/modelmesh-controller:v0.11.0 - name: odh-model-controller - value: quay.io/opendatahub/odh-model-controller:stable + value: quay.io/opendatahub/odh-model-controller:v0.11.0 - name: monitoring-namespace value: %controller-namespace% repoRef: diff --git a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dependencies/fvt.yaml b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dependencies/fvt.yaml index 611ad309..54f8f440 100644 --- a/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dependencies/fvt.yaml +++ b/opendatahub/odh-manifests/model-mesh_stable/odh-modelmesh-controller/dependencies/fvt.yaml @@ -112,7 +112,7 @@ spec: value: AKIAIOSFODNN7EXAMPLE - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY - image: kserve/modelmesh-minio-dev-examples:latest + image: kserve/modelmesh-minio-dev-examples:v0.11.0 name: minio --- apiVersion: v1 @@ -175,7 +175,7 @@ spec: restartPolicy: OnFailure containers: - name: "copy-pod" - image: kserve/modelmesh-minio-examples:latest + image: kserve/modelmesh-minio-examples:v0.11.0 securityContext: allowPrivilegeEscalation: false command: ["/bin/sh", "-ex", "-c"] diff --git a/opendatahub/scripts/deploy_fvt.sh b/opendatahub/scripts/deploy_fvt.sh index 04808a31..62c3fea1 100755 --- a/opendatahub/scripts/deploy_fvt.sh +++ b/opendatahub/scripts/deploy_fvt.sh @@ -67,6 +67,10 @@ while (($# > 0)); do shift done +if [[ ${tag} == "stable" ]];then + stable_manifests=true +fi + info ".. Downloading kustomize" if [[ ! -d ${ROOT_DIR}/bin ]]; then info ".. Creating a bin folder" @@ -101,7 +105,7 @@ if [[ ! -d $MANIFESTS_DIR/fvt ]] || [[ ${force} == "true" ]];then cp -R $MANIFESTS_DIR/fvt_templates $MANIFESTS_DIR/fvt cp -R $ODH_MANIFESTS_DIR/${target_modelmesh_dir}/odh-modelmesh-controller/dependencies/* $MANIFESTS_DIR/fvt/. # Convert imaes to use quay.io image (avoid dockerhub pull limit) - minio_tag=$(grep kserve/modelmesh-minio-dev-examples ./config/dependencies/fvt.yaml |cut -d: -f3) + minio_tag=$(grep kserve/modelmesh-minio-dev-examples opendatahub/scripts/manifests/fvt/fvt.yaml |cut -d: -f3) sed "s+kserve/modelmesh-minio-dev-examples:${minio_tag}+quay.io/jooholee/modelmesh-minio-dev-examples:${minio_tag}+g" -i opendatahub/scripts/manifests/fvt/fvt.yaml sed "s+kserve/modelmesh-minio-examples:${minio_tag}+quay.io/jooholee/modelmesh-minio-examples:${minio_tag}+g" -i opendatahub/scripts/manifests/fvt/fvt.yaml sed 's+ubuntu+quay.io/fedora/fedora:38+g' -i opendatahub/scripts/manifests/fvt/fvt.yaml diff --git a/opendatahub/scripts/download_images_on_nodes.sh b/opendatahub/scripts/download_images_on_nodes.sh index 342a0063..375a56f6 100755 --- a/opendatahub/scripts/download_images_on_nodes.sh +++ b/opendatahub/scripts/download_images_on_nodes.sh @@ -24,19 +24,6 @@ export MODELMESH=$(cat $MANIFESTS_DIR/params.env |grep odh-modelmesh=|cut -d= -f export MODELMESH_RUNTIME=$(cat $MANIFESTS_DIR/params.env |grep odh-modelmesh-runtime-adapter=|cut -d= -f2) export REST_PROXY=$(cat $MANIFESTS_DIR/params.env |grep odh-mm-rest-proxy=|cut -d= -f2) -# You can choose fast/stable for image tag to test easily -if [[ ${tag} == "fast" ]]; then - info ".. TAG=fast is set" - export MODELMESH=quay.io/opendatahub/modelmesh:fast - export MODELMESH_RUNTIME=quay.io/opendatahub/modelmesh-runtime-adapter:fast - export REST_PROXY=quay.io/opendatahub/rest-proxy:fast -elif [[ ${tag} == "stable" ]]; then - info ".. TAG=stable is set" - export MODELMESH=quay.io/opendatahub/modelmesh:stable - export MODELMESH_RUNTIME=quay.io/opendatahub/modelmesh-runtime-adapter:stable - export REST_PROXY=quay.io/opendatahub/rest-proxy:stable -fi - # You can set custom image for comoponents if [[ z${img_name} != z ]]; then case $img_name in diff --git a/opendatahub/scripts/manifests/fvt/fvt.yaml b/opendatahub/scripts/manifests/fvt/fvt.yaml index 04228bee..a206dce0 100644 --- a/opendatahub/scripts/manifests/fvt/fvt.yaml +++ b/opendatahub/scripts/manifests/fvt/fvt.yaml @@ -112,7 +112,7 @@ spec: value: AKIAIOSFODNN7EXAMPLE - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY - image: quay.io/jooholee/modelmesh-minio-dev-examples:latest + image: quay.io/jooholee/modelmesh-minio-dev-examples:v0.11.0 name: minio --- apiVersion: v1 @@ -175,7 +175,7 @@ spec: restartPolicy: OnFailure containers: - name: "copy-pod" - image: quay.io/jooholee/modelmesh-minio-examples:latest + image: quay.io/jooholee/modelmesh-minio-examples:v0.11.0 securityContext: allowPrivilegeEscalation: false command: ["/bin/sh", "-ex", "-c"] diff --git a/opendatahub/scripts/manifests/fvt/quickstart.yaml b/opendatahub/scripts/manifests/fvt/quickstart.yaml index e04bfeae..c49f4856 100644 --- a/opendatahub/scripts/manifests/fvt/quickstart.yaml +++ b/opendatahub/scripts/manifests/fvt/quickstart.yaml @@ -110,7 +110,7 @@ spec: - name: MINIO_SECRET_KEY value: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY # image: quay.io/cloudservices/minio:latest - image: kserve/modelmesh-minio-examples:latest + image: kserve/modelmesh-minio-examples:v0.11.0 name: minio --- apiVersion: v1 diff --git a/opendatahub/scripts/manifests/params.env b/opendatahub/scripts/manifests/params.env index c2f7c5f4..c90a63ed 100644 --- a/opendatahub/scripts/manifests/params.env +++ b/opendatahub/scripts/manifests/params.env @@ -1,7 +1,7 @@ monitoring-namespace=opendatahub -odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:fast -odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:fast -odh-modelmesh=quay.io/opendatahub/modelmesh:fast +odh-mm-rest-proxy=quay.io/opendatahub/rest-proxy:v0.11.0 +odh-modelmesh-runtime-adapter=quay.io/opendatahub/modelmesh-runtime-adapter:v0.11.0 +odh-modelmesh=quay.io/opendatahub/modelmesh:v0.11.0 odh-openvino=quay.io/opendatahub/openvino_model_server:2022.3-release -odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:fast -odh-model-controller=quay.io/opendatahub/odh-model-controller:fast +odh-modelmesh-controller=quay.io/opendatahub/modelmesh-controller:v0.11.0 +odh-model-controller=quay.io/opendatahub/odh-model-controller:v0.11.0 diff --git a/opendatahub/scripts/manifests/runtimes/kustomization.yaml b/opendatahub/scripts/manifests/runtimes/kustomization.yaml index 2ea7ce81..e89f335d 100644 --- a/opendatahub/scripts/manifests/runtimes/kustomization.yaml +++ b/opendatahub/scripts/manifests/runtimes/kustomization.yaml @@ -12,27 +12,24 @@ # See the License for the specific language governing permissions and # limitations under the License. resources: -- triton-2.x.yaml -- mlserver-1.x.yaml -- ovms-1.x.yaml -- torchserve-0.x.yaml - - - + - triton-2.x.yaml + - mlserver-1.x.yaml + - ovms-1.x.yaml + - torchserve-0.x.yaml images: -- name: tritonserver-2 - newName: nvcr.io/nvidia/tritonserver - newTag: 23.04-py3 -- name: mlserver-1 - newName: seldonio/mlserver - newTag: 1.3.2 -- name: ovms-1 - newName: openvino/model_server - newTag: "2022.3" -- name: torchserve-0 - newName: pytorch/torchserve - newTag: 0.7.1-cpu + - name: tritonserver-2 + newName: nvcr.io/nvidia/tritonserver + newTag: 23.04-py3 + - name: mlserver-1 + newName: seldonio/mlserver + newTag: 1.3.2 + - name: ovms-1 + newName: openvino/model_server + newTag: "2022.3" + - name: torchserve-0 + newName: pytorch/torchserve + newTag: 0.7.1-cpu apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization From ed7c8301e8fbbadb23534ef6204fb73a3ff1825f Mon Sep 17 00:00:00 2001 From: jooho Date: Sat, 16 Sep 2023 14:23:58 -0400 Subject: [PATCH 4/4] add max-retries for downloading-images --- .../scripts/download_images_on_nodes.sh | 19 +++++--- opendatahub/scripts/utils.sh | 35 +++++++++------ scripts/download-images-on-nodes.sh | 43 ++++++++++++------- 3 files changed, 62 insertions(+), 35 deletions(-) diff --git a/opendatahub/scripts/download_images_on_nodes.sh b/opendatahub/scripts/download_images_on_nodes.sh index 375a56f6..6e8ecef3 100755 --- a/opendatahub/scripts/download_images_on_nodes.sh +++ b/opendatahub/scripts/download_images_on_nodes.sh @@ -8,9 +8,13 @@ tag=$2 img_name=$3 img_url=$4 -TRITON_SERVER_IMG=nvcr.io/nvidia/tritonserver -ML_SERVER_IMG=seldonio/mlserver -TORCHSERVE_IMG=pytorch/torchserve +TRITON_SERVER_IMG_NAME=tritonserver +ML_SERVER_IMG_NAME=mlserver +TORCHSERVE_IMG_NAME=torchserve + +TRITON_SERVER_IMG=$(cat ${MANIFESTS_DIR}/runtimes/kustomization.yaml |grep ${TRITON_SERVER_IMG_NAME} -A1|grep "newName"|cut -d: -f2|tr -d " ") +ML_SERVER_IMG=$(cat ${MANIFESTS_DIR}/runtimes/kustomization.yaml |grep ${ML_SERVER_IMG_NAME} -A1|grep "newName"|cut -d: -f2|tr -d " ") +TORCHSERVE_IMG=$(cat ${MANIFESTS_DIR}/runtimes/kustomization.yaml |grep ${TORCHSERVE_IMG_NAME} -A1|grep "newName"|cut -d: -f2|tr -d " ") TRITON_SERVER_TAG=$(cat ${MANIFESTS_DIR}/runtimes/kustomization.yaml |grep ${TRITON_SERVER_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d " ") ML_SERVER_TAG=$(cat ${MANIFESTS_DIR}/runtimes/kustomization.yaml |grep ${ML_SERVER_IMG} -A1|grep "newTag"|cut -d: -f2|tr -d " ") @@ -104,9 +108,12 @@ spec: EOF wait_downloading_images $images $namespace - +export result=$(echo $?) echo info "Delete image downloading daemonset" oc delete daemonset image-downloader --force --grace-period=0 - -success "[SUCCESS] Downloaded necessary images on all nodes" +if [[ $result == 0 ]];then + success "[SUCCESS] Downloaded necessary images on all nodes" +else + info "[INFO] Exceed retries nubmer to downloaded so it will move on next step" +fi diff --git a/opendatahub/scripts/utils.sh b/opendatahub/scripts/utils.sh index a642edec..d7083ae2 100644 --- a/opendatahub/scripts/utils.sh +++ b/opendatahub/scripts/utils.sh @@ -118,7 +118,7 @@ wait_downloading_images(){ expectedTotalCount=$((${#images[@]}*${nodeCount})) totalCount=0 retries=0 - max_retries=20 + max_retries=10 echo "Node: ${nodeCount}, Required Images: ${#images[@]}, Expected Downloading Count: ${expectedTotalCount}" sleep 10s @@ -128,7 +128,6 @@ wait_downloading_images(){ echo "Downloading required images.. please wait!" for element in "${images[@]}" do - retries=$((retries + 1 )) case "$element" in *triton*) isDownloaded=$(oc describe pod -n $namespace -l app=image-downloader|grep "Successfully pulled image \"${TRITON_SERVER}\""|wc -l) @@ -139,7 +138,7 @@ wait_downloading_images(){ echo "triton-server-count count: ${triton_server_count} - ${element}" fi ;; - *openvino*) + *model_server*) isDownloaded=$(oc describe pod -n $namespace -l app=image-downloader|grep "Successfully pulled image \"${OPENVINO}\""|wc -l) existImage=$(oc describe pod -n $namespace -l app=image-downloader|grep "Container image \"${OPENVINO}\" already present on machine"|wc -l) if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then @@ -164,7 +163,7 @@ wait_downloading_images(){ existImage=$(oc describe pod -n $namespace -l app=image-downloader|grep "Container image \"${TORCHSERVE}\" already present on machine"|wc -l) if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then torchserve_count=$(( ${isDownloaded} + ${existImage} )) - totalCount=$((totalCount + ${ml_server_count} )) + totalCount=$((totalCount + ${torchserve_count} )) echo "torchserve downloaded: ${torchserve_count} - ${element}" fi ;; @@ -208,20 +207,28 @@ wait_downloading_images(){ fi ;; *) - echo "Not expected images" + echo "Not expected images(${element})" exit 1 ;; esac done - if [[ $totalCount -lt $expectedTotalCount ]] && [[ ${retries} -lt ${max_retries} ]]; then - echo - echo "Reset totalCount = 0 and checking it again after 60s" - sleep 60s + # echo "2- $totalCount" + # echo "3- $expectedTotalCount" + # echo "4- $retries" + # echo "5- $max_retries" + + if [[ $totalCount -lt $expectedTotalCount ]]; then + if [[ ${retries} -lt ${max_retries} ]]; then + echo + retries=$((retries + 1 )) + echo "Reset totalCount = 0 and checking it again after 60s" + sleep 60s + else + echo "Exceed max retries(${max_retries})" + return 1 + fi + else + echo "All images are downloaded" fi done - if [[ ${retries} -lt ${max_retries} ]]; then - echo "All images are downloaded" - else - echo "Exceed max retries(${max_retries})" - fi } diff --git a/scripts/download-images-on-nodes.sh b/scripts/download-images-on-nodes.sh index eff9ddb8..90e613fd 100755 --- a/scripts/download-images-on-nodes.sh +++ b/scripts/download-images-on-nodes.sh @@ -56,6 +56,8 @@ wait_downloading_images(){ nodeCount=$(oc get node|grep worker|grep -v infra|wc -l) expectedTotalCount=$((${#images[@]}*${nodeCount})) totalCount=0 + retries=0 + max_retries=10 echo "Node: ${nodeCount}, Required Images: ${#images[@]}, Expected Downloading Count: ${expectedTotalCount}" sleep 10s @@ -72,7 +74,7 @@ wait_downloading_images(){ if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then triton_server_count=$(( ${isDownloaded} + ${existImage} )) totalCount=$((totalCount + ${triton_server_count})) - echo "triton-server-count count: ${triton_server_count}" + echo "triton-server-count count: ${triton_server_count} - ${element}" fi ;; *model_server*) @@ -81,7 +83,7 @@ wait_downloading_images(){ if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then openvino_count=$(( ${isDownloaded} + ${existImage} )) totalCount=$((totalCount + ${openvino_count})) - echo "openvino downloaded: ${openvino_count}" + echo "openvino downloaded: ${openvino_count} - ${element}" fi ;; @@ -91,7 +93,7 @@ wait_downloading_images(){ if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then ml_server_count=$(( ${isDownloaded} + ${existImage} )) totalCount=$((totalCount + ${ml_server_count} )) - echo "ml-server downloaded: ${ml_server_count}" + echo "ml-server downloaded: ${ml_server_count} - ${element}" fi ;; @@ -100,8 +102,8 @@ wait_downloading_images(){ existImage=$(oc describe pod -l app=image-downloader|grep "Container image \"${TORCHSERVE}\" already present on machine"|wc -l) if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then torchserve_count=$(( ${isDownloaded} + ${existImage} )) - totalCount=$((totalCount + ${ml_server_count} )) - echo "torchserve downloaded: ${torchserve_count}" + totalCount=$((totalCount + ${torchserve_count} )) + echo "torchserve downloaded: ${torchserve_count} - ${element}" fi ;; @@ -111,7 +113,7 @@ wait_downloading_images(){ if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then modelmesh_count=$(( ${isDownloaded} + ${existImage} )) totalCount=$((totalCount + ${modelmesh_count})) - echo "modelmesh downloaded: ${modelmesh_count}" + echo "modelmesh downloaded: ${modelmesh_count} - ${element}" fi ;; @@ -121,7 +123,7 @@ wait_downloading_images(){ if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then modlemesh_runtime_count=$(( ${isDownloaded} + ${existImage} )) totalCount=$((totalCount + ${modlemesh_runtime_count} )) - echo "modelmesh-runtime downloaded: ${modlemesh_runtime_count}" + echo "modelmesh-runtime downloaded: ${modlemesh_runtime_count} - ${element}" fi ;; @@ -131,24 +133,35 @@ wait_downloading_images(){ if [[ ${isDownloaded} != 0 || ${existImage} != 0 ]]; then rest_proxy_count=$(( ${isDownloaded} + ${existImage} )) totalCount=$((totalCount + ${rest_proxy_count} )) - echo "rest-proxy downloaded: ${rest_proxy_count}" + echo "rest-proxy downloaded: ${rest_proxy_count} - ${element}" fi ;; *) - echo "Not expected images(${element})" + echo "Not expected images(${element})" exit 1 ;; esac done - if [[ $totalCount != $expectedTotalCount ]]; then - echo - echo "Reset totalCount = 0 and checking it again after 60s" - sleep 60s + + # echo "2- $totalCount" + # echo "3- $expectedTotalCount" + # echo "4- $retries" + # echo "5- $max_retries" + if [[ $totalCount -lt $expectedTotalCount ]]; then + if [[ ${retries} -lt ${max_retries} ]]; then + echo + retries=$((retries + 1 )) + echo "Reset totalCount = 0 and checking it again after 60s" + sleep 60s + else + echo "[INFO] Exceed max retries(${max_retries}) to downloaded so it will move on next step" + break + fi + else + echo "[SUCCESS] Downloaded necessary images on all nodes" fi done - echo "All images are downloaded" } - cat <