Skip to content

Commit

Permalink
[RHOAIENG-5073] Add pod port fowarding for raw due to headless service (
Browse files Browse the repository at this point in the history
#1343)

Signed-off-by: Tarun Kumar <takumar@redhat.com>
  • Loading branch information
tarukumar authored Apr 3, 2024
1 parent e10f8fb commit 01b755e
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 28 deletions.
9 changes: 5 additions & 4 deletions ods_ci/tests/Resources/CLI/ModelServing/llm.resource
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ Compile Inference Service YAML
${mode}= Set Variable ${DSC_KSERVE_MODE}
ELSE
${mode}= Get KServe Default Deployment Mode From DSC
END
END
Log message=Using defaultDeploymentMode set in the DSC: ${mode}
END

Expand Down Expand Up @@ -388,7 +388,8 @@ Compile Deploy And Query LLM model
... namespace=${namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${isvc_name}
... namespace=${namespace}
IF ${IS_KSERVE_RAW} Start Port-forwarding namespace=${namespace} model_name=${model_name}
${pod_name}= Get Pod Name namespace=${namespace} label_selector=serving.kserve.io/inferenceservice=${isvc_name}
IF ${IS_KSERVE_RAW} Start Port-forwarding namespace=${namespace} pod_name=${pod_name}
Query Model Multiple Times isvc_name=${isvc_name} model_name=${model_name}
... n_times=${n_queries} namespace=${namespace} query_idx=${query_idx}
... validate_response=${validate_response} protocol=${protocol}
Expand Down Expand Up @@ -755,8 +756,8 @@ Get KServe Default Deployment Mode From DSC
RETURN ${mode}

Start Port-forwarding
[Arguments] ${namespace} ${model_name} ${process_alias}=llm-query-process
${process}= Start Process oc -n ${namespace} port-forward svc/${model_name}-predictor 8033:80
[Arguments] ${namespace} ${pod_name} ${process_alias}=llm-query-process
${process}= Start Process oc -n ${namespace} port-forward pod/${pod_name} 8033:8033
... alias=${process_alias} stderr=STDOUT shell=yes
Process Should Be Running ${process}
sleep 7s
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ Verify User Can Serve And Query A bigscience/mt0-xxl Model
... namespace=${test_namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name}
... namespace=${test_namespace} timeout=900s
${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name}
Run Keyword If "${KSERVE_MODE}"=="RawDeployment"
... Start Port-forwarding namespace=${test_namespace} model_name=${model_name}
... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name}
Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME}
... inference_type=all-tokens n_times=1 protocol=grpc
... namespace=${test_namespace} query_idx=2 validate_response=${TRUE} # temp
Expand Down Expand Up @@ -83,8 +84,9 @@ Verify User Can Serve And Query A google/flan-t5-xl Model
... namespace=${test_namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name}
... namespace=${test_namespace} timeout=900s
${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name}
Run Keyword If "${KSERVE_MODE}"=="RawDeployment"
... Start Port-forwarding namespace=${test_namespace} model_name=${model_name}
... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name}
Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME}
... inference_type=all-tokens n_times=1 protocol=grpc
... namespace=${test_namespace} query_idx=3 validate_response=${TRUE}
Expand Down Expand Up @@ -128,8 +130,9 @@ Verify User Can Serve And Query A google/flan-t5-xxl Model
... namespace=${test_namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name}
... namespace=${test_namespace} timeout=900s
${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name}
Run Keyword If "${KSERVE_MODE}"=="RawDeployment"
... Start Port-forwarding namespace=${test_namespace} model_name=${model_name}
... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name}
Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME}
... inference_type=all-tokens n_times=1 protocol=grpc
... namespace=${test_namespace} query_idx=3 validate_response=${TRUE}
Expand Down Expand Up @@ -172,8 +175,9 @@ Verify User Can Serve And Query A elyza/elyza-japanese-llama-2-7b-instruct Model
... namespace=${test_namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name}
... namespace=${test_namespace} timeout=900s
${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name}
Run Keyword If "${KSERVE_MODE}"=="RawDeployment"
... Start Port-forwarding namespace=${test_namespace} model_name=${model_name}
... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name}
Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME}
... inference_type=all-tokens n_times=1 protocol=grpc
... namespace=${test_namespace} query_idx=4 validate_response=${TRUE} # temp
Expand Down Expand Up @@ -217,8 +221,9 @@ Verify User Can Serve And Query A ibm/mpt-7b-instruct2 Model
... namespace=${test_namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name}
... namespace=${test_namespace} timeout=900s
${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name}
Run Keyword If "${KSERVE_MODE}"=="RawDeployment"
... Start Port-forwarding namespace=${test_namespace} model_name=${model_name}
... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name}
Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME}
... inference_type=all-tokens n_times=1 protocol=grpc
... namespace=${test_namespace} query_idx=0 validate_response=${TRUE}
Expand Down Expand Up @@ -262,8 +267,9 @@ Verify User Can Serve And Query A google/flan-ul-2 Model
... namespace=${test_namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name}
... namespace=${test_namespace} timeout=900s
${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name}
Run Keyword If "${KSERVE_MODE}"=="RawDeployment"
... Start Port-forwarding namespace=${test_namespace} model_name=${model_name}
... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name}
Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME}
... inference_type=all-tokens n_times=1 protocol=grpc
... namespace=${test_namespace} query_idx=3 validate_response=${TRUE}
Expand Down Expand Up @@ -307,8 +313,9 @@ Verify User Can Serve And Query A codellama/codellama-34b-instruct-hf Model
... namespace=${test_namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name}
... namespace=${test_namespace} timeout=3000s
${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name}
Run Keyword If "${KSERVE_MODE}"=="RawDeployment"
... Start Port-forwarding namespace=${test_namespace} model_name=${model_name}
... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name}
Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME}
... inference_type=all-tokens n_times=1 protocol=grpc
... namespace=${test_namespace} query_idx=5 validate_response=${TRUE}
Expand Down Expand Up @@ -343,8 +350,9 @@ Verify User Can Serve And Query A meta-llama/llama-2-13b-chat Model
... namespace=${test_namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name}
... namespace=${test_namespace} timeout=900s
${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name}
Run Keyword If "${KSERVE_MODE}"=="RawDeployment"
... Start Port-forwarding namespace=${test_namespace} model_name=${model_name}
... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name}
Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME}
... inference_type=all-tokens n_times=1 protocol=grpc
... namespace=${test_namespace} query_idx=0 validate_response=${TRUE} # temp
Expand Down Expand Up @@ -393,8 +401,9 @@ Verify User Can Serve And Query A google/flan-t5-xl Prompt Tuned Model
... namespace=${test_namespace}
Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${model_name}
... namespace=${test_namespace} timeout=300s
${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${model_name}
Run Keyword If "${KSERVE_MODE}"=="RawDeployment"
... Start Port-forwarding namespace=${test_namespace} model_name=${model_name}
... Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name}
${prompt_tuned_params}= Create Dictionary prefix_id=flan-t5-xl-tuned
Query Model Multiple Times model_name=${model_name} runtime=${TGIS_RUNTIME_NAME}
... inference_type=all-tokens n_times=1 protocol=grpc
Expand Down
Loading

0 comments on commit 01b755e

Please sign in to comment.