[AWS Sagemaker] aws-samples kmeans-hpo pipeline test (kubeflow#3905)

* aws-samples kmeans-hpo pipeline test - code clean up - removed unused args * add test dependency * Trigger Build
RedbackThomson · Jun 17, 2020 · aa89fa3 · aa89fa3
1 parent 9db2919
commit aa89fa3
Show file tree

Hide file tree

Showing 16 changed files with 67 additions and 59 deletions.
diff --git a/...nents/aws/sagemaker/tests/integration_tests/component_tests/test_groundtruth_component.py b/...nents/aws/sagemaker/tests/integration_tests/component_tests/test_groundtruth_component.py
@@ -1,11 +1,9 @@
 import pytest
 import os
-import json
 import utils
 from utils import kfp_client_utils
 from utils import sagemaker_utils
 from test_workteam_component import create_workteamjob
-import time
 
 
 @pytest.mark.parametrize(

diff --git a/components/aws/sagemaker/tests/integration_tests/component_tests/test_hpo_component.py b/components/aws/sagemaker/tests/integration_tests/component_tests/test_hpo_component.py
@@ -10,7 +10,12 @@
 
 @pytest.mark.parametrize(
     "test_file_dir",
-    [pytest.param("resources/config/kmeans-mnist-hpo", marks=pytest.mark.canary_test)],
+    [
+        pytest.param(
+            "resources/config/kmeans-mnist-hpo", marks=pytest.mark.canary_test
+        ),
+        "resources/config/aws-samples-hpo-spot-training",
+    ],
 )
 def test_hyperparameter_tuning(
     kfp_client, experiment_id, region, sagemaker_client, test_file_dir
@@ -23,19 +28,10 @@ def test_hyperparameter_tuning(
             os.path.join(download_dir, "config.yaml"),
         )
     )
-
-    test_params["Arguments"]["channels"] = json.dumps(
-        test_params["Arguments"]["channels"]
-    )
-    test_params["Arguments"]["static_parameters"] = json.dumps(
-        test_params["Arguments"]["static_parameters"]
-    )
-    test_params["Arguments"]["integer_parameters"] = json.dumps(
-        test_params["Arguments"]["integer_parameters"]
-    )
-    test_params["Arguments"]["categorical_parameters"] = json.dumps(
-        test_params["Arguments"]["categorical_parameters"]
-    )
+    if "job_name" in test_params["Arguments"]:
+        test_params["Arguments"]["job_name"] = (
+            utils.generate_random_string(5) + "-" + test_params["Arguments"]["job_name"]
+        )
 
     _, _, workflow_json = kfp_client_utils.compile_run_monitor_pipeline(
         kfp_client,
@@ -68,6 +64,8 @@ def test_hyperparameter_tuning(
     print(f"HPO job name: {hpo_job_name}")
     hpo_response = sagemaker_utils.describe_hpo_job(sagemaker_client, hpo_job_name)
     assert hpo_response["HyperParameterTuningJobStatus"] == "Completed"
+    if "job_name" in test_params["Arguments"]:
+        assert hpo_response["HyperParameterTuningJobName"] == hpo_job_name
 
     # Verify training image output is an ECR image
     training_image = utils.read_from_file_in_tar(

diff --git a/components/aws/sagemaker/tests/integration_tests/component_tests/test_train_component.py b/components/aws/sagemaker/tests/integration_tests/component_tests/test_train_component.py
@@ -1,6 +1,5 @@
 import pytest
 import os
-import json
 import utils
 from utils import kfp_client_utils
 from utils import minio_utils
@@ -33,12 +32,6 @@ def test_trainingjob(
         )
     )
 
-    test_params["Arguments"]["hyperparameters"] = json.dumps(
-        test_params["Arguments"]["hyperparameters"]
-    )
-    test_params["Arguments"]["channels"] = json.dumps(
-        test_params["Arguments"]["channels"]
-    )
     _, _, workflow_json = kfp_client_utils.compile_run_monitor_pipeline(
         kfp_client,
         experiment_id,

diff --git a/components/aws/sagemaker/tests/integration_tests/component_tests/test_workteam_component.py b/components/aws/sagemaker/tests/integration_tests/component_tests/test_workteam_component.py
@@ -1,6 +1,5 @@
 import pytest
 import os
-import json
 import utils
 from utils import kfp_client_utils
 from utils import sagemaker_utils
@@ -58,7 +57,7 @@ def test_workteamjob(
 
     outputs = {"sagemaker-private-workforce": ["workteam_arn"]}
 
-    try: 
+    try:
         output_files = minio_utils.artifact_download_iterator(
             workflow_json, outputs, download_dir
         )

diff --git a/components/aws/sagemaker/tests/integration_tests/environment.yml b/components/aws/sagemaker/tests/integration_tests/environment.yml
@@ -18,5 +18,6 @@ dependencies:
     - kfp==0.5.*
     - minio==5.0.10
     - sagemaker==1.56.*
+    - ruamel.yaml==0.16.*
 
 
diff --git a/...emaker/tests/integration_tests/resources/config/aws-samples-hpo-spot-training/config.yaml b/...emaker/tests/integration_tests/resources/config/aws-samples-hpo-spot-training/config.yaml
@@ -0,0 +1,31 @@
+PipelineDefinition: ../../../../../samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py
+TestName: aws-samples-hpo-spot-training
+Timeout: 7200
+ExpectedTrainingImage: ((KMEANS_REGISTRY)).dkr.ecr.((REGION)).amazonaws.com/kmeans:1
+Arguments:
+  job_name: HPO-kmeans-sample
+  region: ((REGION))
+  channels:
+    - ChannelName: train
+      DataSource:
+        S3DataSource:
+          S3Uri: s3://((DATA_BUCKET))/mnist_kmeans_example/train_data
+          S3DataType: S3Prefix
+          S3DataDistributionType: FullyReplicated
+      CompressionType: None
+      RecordWrapperType: None
+      InputMode: File
+    - ChannelName: test
+      DataSource:
+        S3DataSource:
+          S3Uri: s3://((DATA_BUCKET))/mnist_kmeans_example/test_data
+          S3DataType: S3Prefix
+          S3DataDistributionType: FullyReplicated
+      CompressionType: None
+      RecordWrapperType: None
+      InputMode: File
+  output_location: s3://((DATA_BUCKET))/mnist_kmeans_example/output
+  spot_instance: True
+  checkpoint_config: 
+    S3Uri: s3://((DATA_BUCKET))/mnist_kmeans_example/output/checkpoints/
+  role_arn: ((ROLE_ARN))
diff --git a/...ker/tests/integration_tests/resources/config/image-classification-groundtruth/config.yaml b/...ker/tests/integration_tests/resources/config/image-classification-groundtruth/config.yaml
@@ -1,6 +1,6 @@
 PipelineDefinition: resources/definition/groundtruth_pipeline.py
 TestName: image-classification-groundtruth
-Timeout: 1200
+Timeout: 300
 StatusToCheck: 'running'
 Arguments:
   region: ((REGION))

diff --git a/...ts/aws/sagemaker/tests/integration_tests/resources/definition/create_endpoint_pipeline.py b/...ts/aws/sagemaker/tests/integration_tests/resources/definition/create_endpoint_pipeline.py
@@ -1,7 +1,6 @@
 import kfp
 from kfp import components
 from kfp import dsl
-from kfp.aws import use_aws_secret
 
 sagemaker_model_op = components.load_component_from_file("../../model/component.yaml")
 sagemaker_deploy_op = components.load_component_from_file("../../deploy/component.yaml")

diff --git a/...nents/aws/sagemaker/tests/integration_tests/resources/definition/create_model_pipeline.py b/...nents/aws/sagemaker/tests/integration_tests/resources/definition/create_model_pipeline.py
@@ -1,7 +1,6 @@
 import kfp
 from kfp import components
 from kfp import dsl
-from kfp.aws import use_aws_secret
 
 sagemaker_model_op = components.load_component_from_file("../../model/component.yaml")
 

diff --git a/...onents/aws/sagemaker/tests/integration_tests/resources/definition/groundtruth_pipeline.py b/...onents/aws/sagemaker/tests/integration_tests/resources/definition/groundtruth_pipeline.py
@@ -1,9 +1,6 @@
 import kfp
-import json
-import copy
 from kfp import components
 from kfp import dsl
-from kfp.aws import use_aws_secret
 
 sagemaker_gt_op = components.load_component_from_file(
     "../../ground_truth/component.yaml"
@@ -34,7 +31,7 @@ def ground_truth_test(
     workteam_arn="",
 ):
 
-    ground_truth_train = sagemaker_gt_op(
+    sagemaker_gt_op(
         region=region,
         role=role,
         job_name=ground_truth_train_job_name,

diff --git a/components/aws/sagemaker/tests/integration_tests/resources/definition/hpo_pipeline.py b/components/aws/sagemaker/tests/integration_tests/resources/definition/hpo_pipeline.py
@@ -1,7 +1,7 @@
 import kfp
 from kfp import components
 from kfp import dsl
-from kfp.aws import use_aws_secret
+
 
 sagemaker_hpo_op = components.load_component_from_file(
     "../../hyperparameter_tuning/component.yaml"

diff --git a/...ents/aws/sagemaker/tests/integration_tests/resources/definition/transform_job_pipeline.py b/...ents/aws/sagemaker/tests/integration_tests/resources/definition/transform_job_pipeline.py
@@ -1,7 +1,6 @@
 import kfp
 from kfp import components
 from kfp import dsl
-from kfp.aws import use_aws_secret
 
 sagemaker_model_op = components.load_component_from_file("../../model/component.yaml")
 sagemaker_batch_transform_op = components.load_component_from_file(

diff --git a/components/aws/sagemaker/tests/integration_tests/resources/definition/workteam_pipeline.py b/components/aws/sagemaker/tests/integration_tests/resources/definition/workteam_pipeline.py
@@ -1,11 +1,8 @@
 #!/usr/bin/env python3
 
 import kfp
-import json
-import copy
 from kfp import components
 from kfp import dsl
-from kfp.aws import use_aws_secret
 
 sagemaker_workteam_op = components.load_component_from_file(
     "../../workteam/component.yaml"
@@ -20,7 +17,7 @@ def workteam_test(
     region="", team_name="", description="", user_pool="", user_groups="", client_id=""
 ):
 
-    workteam = sagemaker_workteam_op(
+    sagemaker_workteam_op(
         region=region,
         team_name=team_name,
         description=description,

diff --git a/components/aws/sagemaker/tests/integration_tests/utils/__init__.py b/components/aws/sagemaker/tests/integration_tests/utils/__init__.py
@@ -2,7 +2,7 @@
 import subprocess
 import pytest
 import tarfile
-import yaml
+from ruamel.yaml import YAML
 import random
 import string
 import shutil
@@ -93,7 +93,8 @@ def replace_placeholders(input_filename, output_filename):
 
 def load_params(file_name):
     with open(file_name, "r") as f:
-        return yaml.safe_load(f)
+        yaml = YAML(typ="safe")
+        return yaml.load(f)
 
 
 def generate_random_string(length):

diff --git a/components/aws/sagemaker/tests/integration_tests/utils/kfp_client_utils.py b/components/aws/sagemaker/tests/integration_tests/utils/kfp_client_utils.py
@@ -1,7 +1,6 @@
 import os
 import utils
 import pytest
-import time
 
 from utils import argo_utils
 
@@ -24,23 +23,17 @@ def compile_and_run_pipeline(
     return run.id
 
 
-def wait_for_job_completion(client, run_id, timeout, status_to_check):
-    response = client.wait_for_run_completion(run_id, timeout)
-    status = None
-    if response.run.status:
-       status = response.run.status.lower() == status_to_check
-    return status
-
-
 def wait_for_job_status(client, run_id, timeout, status_to_check="succeeded"):
-    if status_to_check == "succeeded":
-        status = wait_for_job_completion(client, run_id, timeout, status_to_check)
-    else:
-        time.sleep(timeout)
+    response = None
+    try:
+        response = client.wait_for_run_completion(run_id, timeout)
+    except TimeoutError:
+        print(f"run-id: {run_id} did not stop within specified timeout")
         response = client.get_run(run_id)
-        status = None
-        if response.run.status:
-           status = response.run.status.lower() == status_to_check
+
+    status = False
+    if response and response.run.status:
+        status = response.run.status.lower() == status_to_check
     return status
 
 

diff --git a/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py b/samples/contrib/aws-samples/mnist-kmeans-sagemaker/kmeans-hpo-pipeline.py
@@ -3,12 +3,15 @@
 
 import kfp
 import json
+import os
 import copy
 from kfp import components
 from kfp import dsl
 from kfp.aws import use_aws_secret
 
-sagemaker_hpo_op = components.load_component_from_file('../../../../components/aws/sagemaker/hyperparameter_tuning/component.yaml')
+
+components_dir = os.path.join(os.path.dirname(__file__), '../../../../components/aws/sagemaker/')
+sagemaker_hpo_op = components.load_component_from_file(os.path.join(components_dir, 'hyperparameter_tuning/component.yaml'))
 
 
 channelObjList = []
@@ -40,7 +43,7 @@
     description='SageMaker hyperparameter tuning job test'
 )
 def hpo_test(region='us-east-1',
-    hpo_job_name='HPO-kmeans-sample',
+    job_name='HPO-kmeans-sample',
     image='',
     algorithm_name='K-Means',
     training_input_mode='File',
@@ -81,7 +84,7 @@ def hpo_test(region='us-east-1',
     training = sagemaker_hpo_op(
         region=region,
         endpoint_url=endpoint_url,
-        job_name=hpo_job_name,
+        job_name=job_name,
         image=image,
         training_input_mode=training_input_mode,
         algorithm_name=algorithm_name,