Skip to content

Commit

Permalink
Update get credentials to work with new auto deploy naming schema
Browse files Browse the repository at this point in the history
* Auto deployed clusters are no longer recycling names; instead each
  auto deployed cluster will have a unique name

* Use regexes to identify the appropriate auto deployed cluster

* Only consider clusters with a minimum age; this is a hack to ensure
  clusters are properly setup.

* Related to: kubeflow#444
  • Loading branch information
Jeremy Lewi committed Nov 5, 2019
1 parent e05ee13 commit a5e1640
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 36 deletions.
81 changes: 56 additions & 25 deletions py/kubeflow/testing/get_kf_testing_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,21 @@
"""

import argparse
import datetime
from dateutil import parser as date_parser
import logging
import pprint
import re
import subprocess
import yaml

from googleapiclient import discovery
from kubeflow.testing import util
from oauth2client.client import GoogleCredentials

# Default pattern to match auto deployed clusters from master
DEFAULT_PATTERN = r"kf-vmaster-(?!n\d\d)"

def get_deployment_endpoint(project, deployment):
"""Format endpoint service name using default logic.
Expand All @@ -32,15 +38,19 @@ def get_deployment_endpoint(project, deployment):
project=project,
deployment=deployment)

def list_deployments(project, name_prefix, testing_label, http=None, desc_ordered=True):
def list_deployments(project, name_pattern, testing_label, http=None,
desc_ordered=True, min_age=datetime.timedelta(minutes=20)):
"""List all the deployments matching name prefix and having testing labels.
Args:
project: string, Name of the deployed project.
name_prefix: string, Base name of deployments.
name_pattern: string, Regex pattern to match eligible clusters.
testing_label: string, labels assigned to testing clusters used for identification.
http: httplib2.Http, An instance of httplib2.Http or something that acts
like it that HTTP requests will be made through. Should only be used in tests.
min_age: Minimum age for a deployment to be eligible for inclusion.
This is a bit of a hack to ensure a Kubeflow deployment is fully
deployed before we start running samples on it.
Returns:
deployments: list of dictionary in the format of {
Expand All @@ -60,9 +70,11 @@ def list_deployments(project, name_prefix, testing_label, http=None, desc_ordere
dm_client = dm.deployments()
resource_client = dm.resources()

list_filter = "labels.purpose eq " + testing_label
list_filter = ""
if testing_label:
list_filter = "labels.purpose eq " + testing_label
# pylint: disable=anomalous-backslash-in-string
name_re = re.compile("{0}\-n[0-9]+\Z".format(name_prefix))
name_re = re.compile(name_pattern)
# pylint: enable=anomalous-backslash-in-string
deployments = dm_client.list(project=project, filter=list_filter).execute()
next_page_token = None
Expand All @@ -73,7 +85,28 @@ def list_deployments(project, name_prefix, testing_label, http=None, desc_ordere
name = d.get("name", "")
if not name or name_re.match(name) is None:
continue
resource = resource_client.get(project=project, deployment=name, resource=name).execute()

if name.endswith("storage"):
continue
resource = resource_client.get(project=project, deployment=name,
resource=name).execute()

full_insert_time = d.get("insertTime")

if not full_insert_time:
logging.info("Skipping deployment %s; insertion time is unknown",
full_insert_time)
continue
create_time = date_parser.parse(full_insert_time)
now = datetime.datetime.now(create_time.tzinfo)

age = now - create_time

if age < min_age:
logging.info("Skipping deployment %s with age %s; it is too new",
name, age)
continue

# Skip the latest deployment if having any kind of errors.
if (resource.get("error", None) and resource.get("error", {}).get("errors", [])) or \
not resource.get("properties", ""):
Expand All @@ -85,7 +118,7 @@ def list_deployments(project, name_prefix, testing_label, http=None, desc_ordere
cls.append({
"name": name,
"endpoint": get_deployment_endpoint(project, name),
"insertTime": d.get("insertTime", "1969-12-31T23:59:59+00:00"),
"insertTime": full_insert_time,
"zone": info["zone"],
})

Expand Down Expand Up @@ -137,12 +170,11 @@ def get_deployment(project, name_prefix, testing_label, http=None, desc_ordered=

return deployments[0][field]

def get_latest(version, project="kubeflow-ci-deployment", testing_label="kf-test-cluster",
base_name="kf-v", http=None, field="endpoint"):
"""Convenient function to get the latest deployment's information using just version.
def get_latest(project="kubeflow-ci-deployment", testing_label=None,
base_name=DEFAULT_PATTERN, http=None, field="endpoint"):
"""Convenient function to get the latest deployment's information using regex.
Args:
version: string, version of deployed testing clusters to find.
project: string, Name of deployed GCP project. Optional.
testing_label: string, annotation used to identify testing clusters. Optional.
http: httplib2.Http, An instance of httplib2.Http or something that acts
Expand All @@ -158,42 +190,43 @@ def get_latest(version, project="kubeflow-ci-deployment", testing_label="kf-test
}
field == ("endpoint", "zone", "name"): string value of the field specified.
"""
name_prefix = base_name + version
return get_deployment(project, name_prefix, testing_label, http=http, field=field)
return get_deployment(project, base_name, testing_label, http=http, field=field)

def get_latest_credential(version, project="kubeflow-ci-deployment", base_name="kf-v",
testing_label="kf-test-cluster"):
def get_latest_credential(project="kubeflow-ci-deployment",
base_name=DEFAULT_PATTERN,
testing_label=None):
"""Convenient function to get the latest deployment information and use it to get
credentials from GCP.
Args:
version: string, version of deployed testing clusters to find.
project: string, Name of deployed GCP project. Optional.
testing_label: string, annotation used to identify testing clusters. Optional.
"""
dm = get_latest(version, project=project, testing_label=testing_label, base_name=base_name,
field="all")
util.maybe_activate_service_account()
dm = get_latest(project=project, testing_label=testing_label,
base_name=base_name, field="all")

subprocess.call(["gcloud", "container", "clusters", "get-credentials", dm["name"],
"--project="+project, "--zone="+dm["zone"]])

def list_dms(args):
logging.info("Calling list deployments.")
name_prefix = args.base_name + args.version
name_prefix = args.base_name
pp = pprint.PrettyPrinter(indent=1)
pp.pprint(list_deployments(args.project, name_prefix, args.testing_cluster_label,
desc_ordered=args.find_latest_deployed))

def get_dm(args):
logging.info("Calling get deployment.")
name_prefix = args.base_name + args.version
name_prefix = args.base_name
pp = pprint.PrettyPrinter(indent=1)
pp.pprint((get_deployment(args.project, name_prefix, args.testing_cluster_label,
field=args.field,
desc_ordered=args.find_latest_deployed)))

def get_credential(args):
logging.info("Calling get_credential - this call needs gcloud client CLI.")
name_prefix = args.base_name + args.version
name_prefix = args.base_name
dm = get_deployment(args.project, name_prefix, args.testing_cluster_label,
desc_ordered=args.find_latest_deployed,
field="all")
Expand All @@ -214,12 +247,10 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
"--project", default="kubeflow-ci-deployment", type=str,
help=("The project."))
parser.add_argument(
"--base_name", default="kf-v", type=str, help=("Deployment name prefix"))
parser.add_argument(
"--version", default="master", type=str, choices=["0-5", "master"],
help=("Kubeflow main version."))
"--base_name", default=DEFAULT_PATTERN, type=str,
help=("Regex to match clusters"))
parser.add_argument(
"--testing_cluster_label", default="kf-test-cluster", type=str,
"--testing_cluster_label", default="", type=str,
help=("Label used to identify the deployment is for testing."))
parser.add_argument(
"--field", default="endpoint", type=str,
Expand Down
27 changes: 16 additions & 11 deletions py/kubeflow/tests/get_kf_testing_cluster_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
TEST_PROJECT = "kubeflow-ci-foo"
TEST_LABEL = "kf-foo-label"

class Deployment(object):
class Deployment:
"""Simple data carrier for a deployment."""
def __init__(self, name, insert_time, zone="us-west1-b"):
self.name = name
Expand Down Expand Up @@ -109,10 +109,11 @@ def test_list_deployments_name_filter(self):
self.assertListEqual(actual, expected)

def test_list_deployments_default_insertime(self):
"""Verify behavior when one of the deployments is missing a timestamp."""
deployments = [
Deployment("kf-vfoo-n00", "2019-04-01T23:59:59+00:00"),
Deployment("kf-vfoo-n01", "2019-04-02T23:59:59+00:00"),
Deployment("kf-vfoo-n02", "2019-04-03T23:59:59+00:00"),
Deployment("kf-vfoo-00", "2019-04-01T23:59:59+00:00"),
Deployment("kf-vfoo-01", "2019-04-02T23:59:59+00:00"),
Deployment("kf-vfoo-02", "2019-04-03T23:59:59+00:00"),
]
list_resp = {
"deployments": create_mock_list_resp(deployments),
Expand All @@ -127,11 +128,14 @@ def test_list_deployments_default_insertime(self):
({"status": "200"}, json.dumps(create_mock_resource_resp(deployments[2]))),
])
actual = get_kf_testing_cluster.list_deployments(TEST_PROJECT,
"kf-vfoo",
"kf-vfoo-??",
TEST_LABEL,
http=http)
expected = create_expected_list_resp(deployments)
expected[-1]["insertTime"] = "1969-12-31T23:59:59+00:00"

# Since the last deployment doesn't have an insertTime it will be ignored
expected = expected[0:2]

expected.sort(key=lambda entry: entry["insertTime"],
reverse=True)
self.assertListEqual(actual, expected)
Expand Down Expand Up @@ -208,9 +212,9 @@ def test_get_deployment(self):

def test_get_latest(self):
deployments = [
Deployment("kf-vfoo-n00", "2019-04-01T23:59:59+00:00"),
Deployment("kf-vfoo-n01", "2019-04-02T23:59:59+00:00"),
Deployment("kf-vfoo-n02", "2019-04-03T23:59:59+00:00"),
Deployment("kf-vfoo-00", "2019-04-01T23:59:59+00:00"),
Deployment("kf-vfoo-01", "2019-04-02T23:59:59+00:00"),
Deployment("kf-vfoo-02", "2019-04-03T23:59:59+00:00"),
]
list_resp = {
"deployments": create_mock_list_resp(deployments),
Expand All @@ -222,9 +226,10 @@ def test_get_latest(self):
({"status": "200"}, json.dumps(create_mock_resource_resp(deployments[1]))),
({"status": "200"}, json.dumps(create_mock_resource_resp(deployments[2]))),
])
self.assertEqual(get_kf_testing_cluster.get_latest("foo", http=http, project=TEST_PROJECT),
self.assertEqual(get_kf_testing_cluster.get_latest(
project=TEST_PROJECT, base_name="kf-vfoo-??", http=http),
get_kf_testing_cluster.get_deployment_endpoint(TEST_PROJECT,
"kf-vfoo-n02"))
"kf-vfoo-02"))

if __name__ == '__main__':
unittest.main()

0 comments on commit a5e1640

Please sign in to comment.