Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SDK/Components - Added _naming._convert_to_human_name function #715

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 1 addition & 38 deletions sdk/python/kfp/components/_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import sys
from collections import OrderedDict
from ._naming import _sanitize_file_name, _sanitize_python_function_name, _make_name_unique_by_adding_index
from ._yaml_utils import load_yaml
from ._structures import ComponentSpec
from ._structures import *
Expand Down Expand Up @@ -122,35 +123,6 @@ def _create_task_factory_from_component_dict(component_dict, component_filename=
return _create_task_factory_from_component_spec(component_spec, component_filename)


def _normalize_identifier_name(name):
import re
normalized_name = name.lower()
normalized_name = re.sub(r'[\W_]', ' ', normalized_name) #No non-word characters
normalized_name = re.sub(' +', ' ', normalized_name).strip() #No double spaces, leading or trailing spaces
if re.match(r'\d', normalized_name):
normalized_name = 'n' + normalized_name #No leading digits
return normalized_name


def _sanitize_kubernetes_resource_name(name):
return _normalize_identifier_name(name).replace(' ', '-')


def _sanitize_python_function_name(name):
return _normalize_identifier_name(name).replace(' ', '_')


def _sanitize_file_name(name):
import re
return re.sub('[^-_.0-9a-zA-Z]+', '_', name)


def _generate_unique_suffix(data):
import time
import hashlib
string_data = str( (data, time.time()) )
return hashlib.sha256(string_data.encode()).hexdigest()[0:8]

_inputs_dir = '/inputs'
_outputs_dir = '/outputs'
_single_io_file_name = 'data'
Expand All @@ -177,15 +149,6 @@ def _try_get_object_by_name(obj_name):
return obj_name


def _make_name_unique_by_adding_index(name:str, collection, delimiter:str):
unique_name = name
if unique_name in collection:
for i in range(2, sys.maxsize**10):
unique_name = name + delimiter + str(i)
if unique_name not in collection:
break
return unique_name


#Holds the transformation functions that are called each time TaskSpec instance is created from a component. If there are multiple handlers, the last one is used.
_created_task_transformation_handler = []
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/kfp/components/_dsl_bridge.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def _create_container_op_from_resolved_task(name:str, container_image:str, comma
_dummy_pipeline = dsl.Pipeline('dummy pipeline')
_dummy_pipeline.__enter__()

from ._components import _sanitize_kubernetes_resource_name, _make_name_unique_by_adding_index
from ._naming import _sanitize_kubernetes_resource_name, _make_name_unique_by_adding_index
output_name_to_kubernetes = {}
kubernetes_name_to_output_name = {}
for output_name in (output_paths or {}).keys():
Expand Down
74 changes: 74 additions & 0 deletions sdk/python/kfp/components/_naming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
'_normalize_identifier_name',
'_sanitize_kubernetes_resource_name',
'_sanitize_python_function_name',
'_sanitize_file_name',
'_convert_to_human_name',
'_generate_unique_suffix',
'_make_name_unique_by_adding_index',
]


import re
import sys


def _normalize_identifier_name(name):
import re
normalized_name = name.lower()
normalized_name = re.sub(r'[\W_]', ' ', normalized_name) #No non-word characters
normalized_name = re.sub(' +', ' ', normalized_name).strip() #No double spaces, leading or trailing spaces
if re.match(r'\d', normalized_name):
normalized_name = 'n' + normalized_name #No leading digits
return normalized_name


def _sanitize_kubernetes_resource_name(name):
return _normalize_identifier_name(name).replace(' ', '-')


def _sanitize_python_function_name(name):
return _normalize_identifier_name(name).replace(' ', '_')


def _sanitize_file_name(name):
import re
return re.sub('[^-_.0-9a-zA-Z]+', '_', name)


def _convert_to_human_name(name: str):
'''Converts underscore or dash delimited name to space-delimited name that starts with a capital letter.
Does not handle "camelCase" names.
'''
return name.replace('_', ' ').replace('-', ' ').strip().capitalize()


def _generate_unique_suffix(data):
import time
import hashlib
string_data = str( (data, time.time()) )
return hashlib.sha256(string_data.encode()).hexdigest()[0:8]


def _make_name_unique_by_adding_index(name:str, collection, delimiter:str):
unique_name = name
if unique_name in collection:
for i in range(2, sys.maxsize**10):
unique_name = name + delimiter + str(i)
if unique_name not in collection:
break
return unique_name