From 5396b82e919759faaf3440bf27a1dd71d5cfdd5d Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Mon, 21 Jan 2019 01:22:10 -0800 Subject: [PATCH 1/2] SDK/Components - Moved naming-related functions to _naming.py --- sdk/python/kfp/components/_components.py | 39 +------------- sdk/python/kfp/components/_dsl_bridge.py | 2 +- sdk/python/kfp/components/_naming.py | 66 ++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 39 deletions(-) create mode 100644 sdk/python/kfp/components/_naming.py diff --git a/sdk/python/kfp/components/_components.py b/sdk/python/kfp/components/_components.py index e9c70511eb2..a8049416ca5 100644 --- a/sdk/python/kfp/components/_components.py +++ b/sdk/python/kfp/components/_components.py @@ -21,6 +21,7 @@ import sys from collections import OrderedDict +from ._naming import _sanitize_file_name, _sanitize_python_function_name, _make_name_unique_by_adding_index from ._yaml_utils import load_yaml from ._structures import ComponentSpec from ._structures import * @@ -122,35 +123,6 @@ def _create_task_factory_from_component_dict(component_dict, component_filename= return _create_task_factory_from_component_spec(component_spec, component_filename) -def _normalize_identifier_name(name): - import re - normalized_name = name.lower() - normalized_name = re.sub(r'[\W_]', ' ', normalized_name) #No non-word characters - normalized_name = re.sub(' +', ' ', normalized_name).strip() #No double spaces, leading or trailing spaces - if re.match(r'\d', normalized_name): - normalized_name = 'n' + normalized_name #No leading digits - return normalized_name - - -def _sanitize_kubernetes_resource_name(name): - return _normalize_identifier_name(name).replace(' ', '-') - - -def _sanitize_python_function_name(name): - return _normalize_identifier_name(name).replace(' ', '_') - - -def _sanitize_file_name(name): - import re - return re.sub('[^-_.0-9a-zA-Z]+', '_', name) - - -def _generate_unique_suffix(data): - import time - import hashlib - string_data = str( (data, time.time()) ) - return hashlib.sha256(string_data.encode()).hexdigest()[0:8] - _inputs_dir = '/inputs' _outputs_dir = '/outputs' _single_io_file_name = 'data' @@ -177,15 +149,6 @@ def _try_get_object_by_name(obj_name): return obj_name -def _make_name_unique_by_adding_index(name:str, collection, delimiter:str): - unique_name = name - if unique_name in collection: - for i in range(2, sys.maxsize**10): - unique_name = name + delimiter + str(i) - if unique_name not in collection: - break - return unique_name - #Holds the transformation functions that are called each time TaskSpec instance is created from a component. If there are multiple handlers, the last one is used. _created_task_transformation_handler = [] diff --git a/sdk/python/kfp/components/_dsl_bridge.py b/sdk/python/kfp/components/_dsl_bridge.py index 0f06ffc814f..356ac823903 100644 --- a/sdk/python/kfp/components/_dsl_bridge.py +++ b/sdk/python/kfp/components/_dsl_bridge.py @@ -134,7 +134,7 @@ def _create_container_op_from_resolved_task(name:str, container_image:str, comma _dummy_pipeline = dsl.Pipeline('dummy pipeline') _dummy_pipeline.__enter__() - from ._components import _sanitize_kubernetes_resource_name, _make_name_unique_by_adding_index + from ._naming import _sanitize_kubernetes_resource_name, _make_name_unique_by_adding_index output_name_to_kubernetes = {} kubernetes_name_to_output_name = {} for output_name in (output_paths or {}).keys(): diff --git a/sdk/python/kfp/components/_naming.py b/sdk/python/kfp/components/_naming.py new file mode 100644 index 00000000000..b56e589bbce --- /dev/null +++ b/sdk/python/kfp/components/_naming.py @@ -0,0 +1,66 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__all__ = [ + '_normalize_identifier_name', + '_sanitize_kubernetes_resource_name', + '_sanitize_python_function_name', + '_sanitize_file_name', + '_generate_unique_suffix', + '_make_name_unique_by_adding_index', +] + + +import re +import sys + + +def _normalize_identifier_name(name): + import re + normalized_name = name.lower() + normalized_name = re.sub(r'[\W_]', ' ', normalized_name) #No non-word characters + normalized_name = re.sub(' +', ' ', normalized_name).strip() #No double spaces, leading or trailing spaces + if re.match(r'\d', normalized_name): + normalized_name = 'n' + normalized_name #No leading digits + return normalized_name + + +def _sanitize_kubernetes_resource_name(name): + return _normalize_identifier_name(name).replace(' ', '-') + + +def _sanitize_python_function_name(name): + return _normalize_identifier_name(name).replace(' ', '_') + + +def _sanitize_file_name(name): + import re + return re.sub('[^-_.0-9a-zA-Z]+', '_', name) + + +def _generate_unique_suffix(data): + import time + import hashlib + string_data = str( (data, time.time()) ) + return hashlib.sha256(string_data.encode()).hexdigest()[0:8] + + +def _make_name_unique_by_adding_index(name:str, collection, delimiter:str): + unique_name = name + if unique_name in collection: + for i in range(2, sys.maxsize**10): + unique_name = name + delimiter + str(i) + if unique_name not in collection: + break + return unique_name From 98fe55553900fb9755756bb7ba97ef88365c58d6 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Mon, 21 Jan 2019 01:32:49 -0800 Subject: [PATCH 2/2] SDK/Components - Added _naming._convert_to_human_name function --- sdk/python/kfp/components/_naming.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sdk/python/kfp/components/_naming.py b/sdk/python/kfp/components/_naming.py index b56e589bbce..71bd54f68a1 100644 --- a/sdk/python/kfp/components/_naming.py +++ b/sdk/python/kfp/components/_naming.py @@ -17,6 +17,7 @@ '_sanitize_kubernetes_resource_name', '_sanitize_python_function_name', '_sanitize_file_name', + '_convert_to_human_name', '_generate_unique_suffix', '_make_name_unique_by_adding_index', ] @@ -49,6 +50,13 @@ def _sanitize_file_name(name): return re.sub('[^-_.0-9a-zA-Z]+', '_', name) +def _convert_to_human_name(name: str): + '''Converts underscore or dash delimited name to space-delimited name that starts with a capital letter. + Does not handle "camelCase" names. + ''' + return name.replace('_', ' ').replace('-', ' ').strip().capitalize() + + def _generate_unique_suffix(data): import time import hashlib