From 87437c315fd4658448eaee82ab3af88a43cf70f4 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 6 Nov 2018 00:10:29 -0800 Subject: [PATCH 1/4] SDK/Components/Python - Removed python_op in favor of python_component * Added test for python_component + func_to_container_op. * dsl.PythonComponent class is removed, because the metadata is now properly stored in the function object itself. * build_python_component now optionally accepts base_image and no longer requires the decorator (but can use decorator's metadata). * Made staging_gcs_path optional since it's only needed when build_image == True * Added more validation * Added description and parameter help to python_component --- sdk/python/kfp/compiler/_component_builder.py | 55 +++++++++---- sdk/python/kfp/components/_python_op.py | 49 ++++-------- sdk/python/kfp/dsl/__init__.py | 2 +- sdk/python/kfp/dsl/_component.py | 55 +++++-------- sdk/python/tests/components/test_python_op.py | 77 +++++++------------ 5 files changed, 101 insertions(+), 137 deletions(-) diff --git a/sdk/python/kfp/compiler/_component_builder.py b/sdk/python/kfp/compiler/_component_builder.py index 830563206d8..147aa555b44 100644 --- a/sdk/python/kfp/compiler/_component_builder.py +++ b/sdk/python/kfp/compiler/_component_builder.py @@ -293,16 +293,20 @@ def build_image_from_dockerfile(self, dockerfile_path, timeout, namespace): # Clean up GCSHelper.remove_gcs_blob(self._gcs_path) -def _generate_pythonop(component_func, target_image): +def _generate_pythonop(component_func, target_image, target_component_file=None): """ Generate operator for the pipeline authors - component_meta is a dict of name, description, base_image, target_image, input_list The returned value is in fact a function, which should generates a container_op instance. """ - component_meta = dsl.PythonComponent.get_python_component(component_func) + + from ..components._python_op import _python_function_name_to_component_name + + component_name = getattr(component_func, '_component_human_name', None) or _python_function_name_to_component_name(component_func.__name__) + component_description = getattr(component_func, '_component_description', None) or (component_func.__doc__.strip() if component_func.__doc__ else None) + input_names = inspect.getfullargspec(component_func)[0] component_artifact = {} - component_artifact['name'] = component_meta['name'] - component_artifact['description'] = component_meta['description'] + component_artifact['name'] = component_name + component_artifact['description'] = component_description component_artifact['outputs'] = [{'name': 'output'}] component_artifact['inputs'] = [] component_artifact['implementation'] = { @@ -320,14 +324,23 @@ def _generate_pythonop(component_func, target_image): 'type': 'str' }) component_artifact['implementation']['dockerContainer']['arguments'].append({'value': input}) + + target_component_file = target_component_file or getattr(component_func, '_component_target_component_file', None) + if target_component_file: + from ..components._yaml_utils import dump_yaml + component_text = dump_yaml(component_artifact) + Path(target_component_file).write_text(component_text) + return _create_task_factory_from_component_dict(component_artifact) -def build_python_component(component_func, staging_gcs_path, target_image, build_image=True, timeout=600, namespace='kubeflow'): +def build_python_component(component_func, target_image, base_image=None, staging_gcs_path=None, build_image=True, timeout=600, namespace='kubeflow', target_component_file=None): """ build_component automatically builds a container image for the component_func based on the base_image and pushes to the target_image. Args: component_func (python function): The python function to build components upon + base_image (str): Docker image to use as a base image + target_image (str): Full URI to push the target image staging_gcs_path (str): GCS blob that can store temporary build files timeout (int): the timeout for the image build(in secs), default is 600 seconds namespace (str): the namespace within which to run the kubernetes kaniko job, default is "kubeflow" @@ -338,22 +351,30 @@ def build_python_component(component_func, staging_gcs_path, target_image, build """ logging.basicConfig() logging.getLogger().setLevel('INFO') - component_meta = dsl.PythonComponent.get_python_component(component_func) - component_meta['inputs'] = inspect.getfullargspec(component_func)[0] - - if component_meta is None: - raise ValueError('The function "%s" does not exist. ' - 'Did you forget @dsl.python_component decoration?' % component_func) - logging.info('Build an image that is based on ' + - component_meta['base_image'] + + + if component_func is None: + raise ValueError('component_func must not be None') + if target_image is None: + raise ValueError('target_image must not be None') + + if build_image: + if staging_gcs_path is None: + raise ValueError('staging_gcs_path must not be None') + + if base_image is None: + base_image = getattr(component_func, '_component_base_image', None) + if base_image is None: + raise ValueError('base_image must not be None') + + logging.info('Build an image that is based on ' + + base_image + ' and push the image to ' + target_image) - if build_image: builder = ImageBuilder(gcs_base=staging_gcs_path, target_image=target_image) builder.build_image_from_func(component_func, namespace=namespace, - base_image=component_meta['base_image'], timeout=timeout) + base_image=base_image, timeout=timeout) logging.info('Build component complete.') - return _generate_pythonop(component_func, target_image) + return _generate_pythonop(component_func, target_image, target_component_file) def build_docker_image(staging_gcs_path, target_image, dockerfile_path, timeout=600, namespace='kubeflow'): """ build_docker_image automatically builds a container image based on the specification in the dockerfile and diff --git a/sdk/python/kfp/components/_python_op.py b/sdk/python/kfp/components/_python_op.py index c9984096ff1..285bdc6444f 100644 --- a/sdk/python/kfp/components/_python_op.py +++ b/sdk/python/kfp/components/_python_op.py @@ -13,7 +13,6 @@ # limitations under the License. __all__ = [ - 'python_op', 'func_to_container_op', 'func_to_component_text', ] @@ -47,6 +46,16 @@ def _python_function_name_to_component_name(name): def _func_to_component_spec(func, extra_code='', base_image=_default_base_image) -> ComponentSpec: + decorator_base_image = getattr(func, '_component_base_image', None) + if decorator_base_image is not None: + if base_image is not _default_base_image and decorator_base_image != base_image: + raise ValueError('base_image ({}) conflicts with the decorator-specified base image metadata ({})'.format(base_image, decorator_base_image)) + else: + base_image = decorator_base_image + else: + if base_image is None: + raise ValueError('base_image cannot be None') + import inspect import re from collections import OrderedDict @@ -199,8 +208,10 @@ def annotation_to_argument_kind_and_type_name(annotation): #Removing consecutive blank lines full_source = re.sub('\n\n\n+', '\n\n', full_source).strip('\n') + '\n' - component_name = _python_function_name_to_component_name(func_name) - description = func.__doc__.strip() + '\n' if func.__doc__ else None #Interesting: unlike ruamel.yaml, PyYaml cannot handle trailing spaces in the last line (' \n') and switches the style to double-quoted. + component_name = getattr(func, '_component_human_name', None) or _python_function_name_to_component_name(func.__name__) + description = getattr(func, '_component_description', None) or func.__doc__ + if description: + description = description.strip() + '\n' #Interesting: unlike ruamel.yaml, PyYaml cannot handle trailing spaces in the last line (' \n') and switches the style to double-quoted. component_spec = ComponentSpec( name=component_name, @@ -299,6 +310,7 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s component_spec = _func_to_component_spec(func, extra_code, base_image) + output_component_file = output_component_file or getattr(func, '_component_target_component_file', None) if output_component_file: component_dict = component_spec.to_struct() component_yaml = dump_yaml(component_dict) @@ -306,34 +318,3 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s #TODO: assert ComponentSpec.from_struct(load_yaml(output_component_file)) == component_spec return _create_task_factory_from_component_spec(component_spec) - - -def python_op(func=None, base_image=_default_base_image, output_component_file=None, extra_code=''): - ''' - Decorator that replaces a Python function with an equivalent task (ContainerOp) factory - - Function docstring is used as component description. - Argument and return annotations are used as component input/output types. - To declare a function with multiple return values, use the NamedTuple return annotation syntax: - - from typing import NamedTuple - @python_op(base_image='tensorflow/tensorflow:1.11.0-py3') - def add_multiply_two_numbers_op(a: float, b: float) -> NamedTuple('DummyName', [('sum', float), ('product', float)]): - """Returns sum and product of two arguments""" - return (a + b, a * b) - - Args: - func: The python function to convert - base_image: Optional. Specify a custom Docker containerimage to use in the component. For lightweight components, the image needs to have python and the fire package. - output_component_file: Optional. Write a component definition to a local file. Can be used for sharing. - extra_code: Optional. Extra code to add before the function code. May contain imports and other functions. - - Returns: - A factory function with a strongly-typed signature taken from the python function. - Once called with the required arguments, the factory constructs a pipeline task instance (ContainerOp) that can run the original function in a container. - ''' - - if func: - return func_to_container_op(func, output_component_file, base_image, extra_code) - else: - return lambda f: func_to_container_op(f, output_component_file, base_image, extra_code) diff --git a/sdk/python/kfp/dsl/__init__.py b/sdk/python/kfp/dsl/__init__.py index 3b662e2cfc0..ac44ae5e1c6 100644 --- a/sdk/python/kfp/dsl/__init__.py +++ b/sdk/python/kfp/dsl/__init__.py @@ -17,4 +17,4 @@ from ._pipeline import Pipeline, pipeline from ._container_op import ContainerOp from ._ops_group import OpsGroup, ExitHandler, Condition -from ._component import PythonComponent, python_component +from ._component import python_component diff --git a/sdk/python/kfp/dsl/_component.py b/sdk/python/kfp/dsl/_component.py index 6788b15b966..bb14f37feae 100644 --- a/sdk/python/kfp/dsl/_component.py +++ b/sdk/python/kfp/dsl/_component.py @@ -12,51 +12,38 @@ # See the License for the specific language governing permissions and # limitations under the License. -from kfp import dsl -def python_component(name, description, base_image): - """Decorator of component functions. +def python_component(name, description=None, base_image=None, target_component_file: str = None): + """Decorator for Python component functions. + This decorator adds the metadata to the function object itself. + + Args: + name: Human-readable name of the component + description: Optional. Description of the component + base_image: Optional. Docker container image to use as the base of the component. Needs to have Python 3.5+ installed. + target_component_file: Optional. Local file to store the component definition. The file can then be used for sharing. + + Returns: + The same function (with some metadata fields set). Usage: ```python @dsl.python_component( name='my awesome component', - description='Come, Let's play' - base_image='tensorflow/tensorflow' + description='Come, Let's play', + base_image='tensorflow/tensorflow:1.11.0-py3', ) def my_component(a: str, b: int) -> str: ... ``` """ def _python_component(func): - dsl.PythonComponent.add_python_component(name, description, base_image, func) + func._component_human_name = name + if description: + func._component_description = description + if base_image: + func._component_base_image = base_image + if target_component_file: + func._component_target_component_file = target_component_file return func return _python_component - -class PythonComponent(): - """A pipeline contains a list of operators. - - This class is not supposed to be used by component authors since component authors can use - component functions (decorated with @python_component) to reference their pipelines. This class - is useful for implementing a compiler. For example, the compiler can use the following - to get the PythonComponent object: - """ - - - # All pipeline functions with @pipeline decorator that are imported. - # Each key is a pipeline function. Each value is a dictionary of name, description, base_image. - _component_functions = {} - - @staticmethod - def add_python_component(name, description, base_image, func): - """ Add a python component """ - dsl.PythonComponent._component_functions[func] = { - 'name': name, - 'description': description, - 'base_image': base_image - } - - @staticmethod - def get_python_component(func): - """ Get a python component """ - return dsl.PythonComponent._component_functions.get(func, None) \ No newline at end of file diff --git a/sdk/python/tests/components/test_python_op.py b/sdk/python/tests/components/test_python_op.py index f75f540e725..ec525eda1e4 100644 --- a/sdk/python/tests/components/test_python_op.py +++ b/sdk/python/tests/components/test_python_op.py @@ -19,8 +19,6 @@ import unittest from pathlib import Path -import kfp.components as comp -from kfp.components._yaml_utils import load_yaml _this_file = Path(__file__).resolve() _this_dir = _this_file.parent @@ -30,17 +28,10 @@ sys.path.insert(0, _sdk_root_dir) -def add_two_numbers(a: float, b: float) -> float: - '''Returns sum of two arguments''' - return a + b - -@comp.python_op -def add_two_numbers_decorated(a: float, b: float) -> float: - '''Returns sum of two arguments''' - return a + b +import kfp.components as comp +from kfp.components._yaml_utils import load_yaml -@comp.python_op(base_image='ark7/python-fire') -def add_two_numbers_decorated_with_parameters(a: float, b: float) -> float: +def add_two_numbers(a: float, b: float) -> float: '''Returns sum of two arguments''' return a + b @@ -98,12 +89,6 @@ def test_func_to_container_op_local_call(self): self.helper_test_2_in_1_out_component_using_local_call(func, op) - def test_decorated_func_to_container_op_local_call(self): - self.helper_test_2_in_1_out_component_using_local_call(add_two_numbers, add_two_numbers_decorated) - - def test_decorated_with_parameters_func_to_container_op_local_call(self): - self.helper_test_2_in_1_out_component_using_local_call(add_two_numbers, add_two_numbers_decorated_with_parameters) - def test_indented_func_to_container_op_local_call(self): def add_two_numbers_indented(a: float, b: float) -> float: '''Returns sum of two arguments''' @@ -114,22 +99,6 @@ def add_two_numbers_indented(a: float, b: float) -> float: self.helper_test_2_in_1_out_component_using_local_call(func, op) - def test_indented_decorated_func_to_container_op_local_call(self): - @comp.python_op - def add_two_numbers_indented_decorated(a: float, b: float) -> float: - '''Returns sum of two arguments''' - return a + b - - self.helper_test_2_in_1_out_component_using_local_call(add_two_numbers, add_two_numbers_indented_decorated) - - def test_indented_decorated_with_parameters_func_to_container_op_local_call(self): - @comp.python_op(base_image='ark7/python-fire') - def add_two_numbers_indented_decorated_with_parameters(a: float, b: float) -> float: - '''Returns sum of two arguments''' - return a + b - - self.helper_test_2_in_1_out_component_using_local_call(add_two_numbers, add_two_numbers_indented_decorated_with_parameters) - def test_func_to_container_op_multiple_named_typed_outputs(self): from typing import NamedTuple def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('sum', float), ('product', float)]): @@ -196,10 +165,27 @@ def add_two_numbers_indented(a: float, Output: float) -> float: self.helper_test_2_in_1_out_component_using_local_call(func, op) + def test_python_component_decorator(self): + from kfp.dsl import python_component + import kfp.components._python_op as _python_op + + expected_name = 'Sum component name' + expected_description = 'Sum component description' + expected_image = 'org/image' + + @python_component(name=expected_name, description=expected_description, base_image=expected_image) + def add_two_numbers_decorated(a: float, b: float) -> float: + '''Returns sum of two arguments''' + return a + b + + component_spec = _python_op._func_to_component_spec(add_two_numbers_decorated) + + self.assertEqual(component_spec.name, expected_name) + self.assertEqual(component_spec.description.strip(), expected_description.strip()) + self.assertEqual(component_spec.implementation.docker_container.image, expected_image) def test_end_to_end_python_component_pipeline_compilation(self): import kfp.components as comp - from kfp.components import python_op #Defining the Python function def add(a: float, b: float) -> float: @@ -208,23 +194,13 @@ def add(a: float, b: float) -> float: with tempfile.TemporaryDirectory() as temp_dir_name: add_component_file = str(Path(temp_dir_name).joinpath('add.component.yaml')) - subtract_component_file = str(Path(temp_dir_name).joinpath('subtract.component.yaml')) #Converting the function to a component. Instantiate it to create a pipeline task (ContaineOp instance) - add_op = comp.func_to_container_op(add, base_image='ark7/python-fire', output_component_file=add_component_file) + add_op = comp.func_to_container_op(add, base_image='python:3.5', output_component_file=add_component_file) #Checking that the component artifact is usable: add_op2 = comp.load_component_from_file(add_component_file) - #Using decorator to perform the same thing (convert function to a component): - @python_op(base_image='ark7/python-fire', output_component_file=subtract_component_file) - def subtract_op(a: float, b: float) -> float: - '''Returns difference between two arguments''' - return a - b - - #Checking that the component artifact is usable: - subtract_op2 = comp.load_component_from_file(subtract_component_file) - #Building the pipeline import kfp.dsl as dsl @dsl.pipeline( @@ -236,11 +212,10 @@ def calc_pipeline( a2=dsl.PipelineParam('a2', value='7'), a3=dsl.PipelineParam('a3', value='17'), ): - task_11 = add_op(a1, a2) - task_12 = subtract_op(task_11.output, a3) - task_21 = add_op2(a1, a2) - task_22 = subtract_op2(task_21.output, a3) - task_3 = subtract_op(task_12.output, task_22.output) + task_1 = add_op(a1, a2) + task_2 = add_op2(a1, a2) + task_3 = add_op(task_1.output, task_2.output) + task_4 = add_op2(task_3.output, a3) #Compiling the pipleine: pipeline_filename = Path(temp_dir_name).joinpath(calc_pipeline.__name__ + '.pipeline.tar.gz') From daeb91bb87db8d37112be18edf91f610af3f90f6 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Tue, 6 Nov 2018 00:41:14 -0800 Subject: [PATCH 2/4] Fixed the pipeline path string after merge. --- sdk/python/tests/components/test_python_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/python/tests/components/test_python_op.py b/sdk/python/tests/components/test_python_op.py index ec525eda1e4..76ae33bbcf1 100644 --- a/sdk/python/tests/components/test_python_op.py +++ b/sdk/python/tests/components/test_python_op.py @@ -218,7 +218,7 @@ def calc_pipeline( task_4 = add_op2(task_3.output, a3) #Compiling the pipleine: - pipeline_filename = Path(temp_dir_name).joinpath(calc_pipeline.__name__ + '.pipeline.tar.gz') + pipeline_filename = str(Path(temp_dir_name).joinpath(calc_pipeline.__name__ + '.pipeline.tar.gz')) import kfp.compiler as compiler compiler.Compiler().compile(calc_pipeline, pipeline_filename) From 725be74cc7e986af774df3266d979309ac9efc7a Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 9 Nov 2018 18:51:44 -0800 Subject: [PATCH 3/4] Addressed the PR comments. Added more detailed explanation of base_image selection to the docstring. --- sdk/python/kfp/compiler/_component_builder.py | 2 ++ sdk/python/kfp/components/_python_op.py | 25 ++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/sdk/python/kfp/compiler/_component_builder.py b/sdk/python/kfp/compiler/_component_builder.py index 147aa555b44..1ef4f982671 100644 --- a/sdk/python/kfp/compiler/_component_builder.py +++ b/sdk/python/kfp/compiler/_component_builder.py @@ -299,6 +299,8 @@ def _generate_pythonop(component_func, target_image, target_component_file=None) from ..components._python_op import _python_function_name_to_component_name + #Component name and description are derived from the function's name and docstribng, but can be overridden by @python_component function decorator + #The decorator can set the _component_human_name and _component_description attributes. getattr is needed to prevent error when these attributes do not exist. component_name = getattr(component_func, '_component_human_name', None) or _python_function_name_to_component_name(component_func.__name__) component_description = getattr(component_func, '_component_description', None) or (component_func.__doc__.strip() if component_func.__doc__ else None) diff --git a/sdk/python/kfp/components/_python_op.py b/sdk/python/kfp/components/_python_op.py index 285bdc6444f..4c3e1fe99eb 100644 --- a/sdk/python/kfp/components/_python_op.py +++ b/sdk/python/kfp/components/_python_op.py @@ -46,6 +46,14 @@ def _python_function_name_to_component_name(name): def _func_to_component_spec(func, extra_code='', base_image=_default_base_image) -> ComponentSpec: + '''Takes a self-contained python function and converts it to component + + Args: + func: Required. The function to be converted + base_image: Optional. Docker image to be used as a base image for the python component. Must have python 3.5+ installed. Default is tensorflow/tensorflow:1.11.0-py3 + Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised. + extra_code: Optional. Python source code that gets placed before the function code. Can be used as workaround to define types used in function signature. + ''' decorator_base_image = getattr(func, '_component_base_image', None) if decorator_base_image is not None: if base_image is not _default_base_image and decorator_base_image != base_image: @@ -208,6 +216,8 @@ def annotation_to_argument_kind_and_type_name(annotation): #Removing consecutive blank lines full_source = re.sub('\n\n\n+', '\n\n', full_source).strip('\n') + '\n' + #Component name and description are derived from the function's name and docstribng, but can be overridden by @python_component function decorator + #The decorator can set the _component_human_name and _component_description attributes. getattr is needed to prevent error when these attributes do not exist. component_name = getattr(func, '_component_human_name', None) or _python_function_name_to_component_name(func.__name__) description = getattr(func, '_component_description', None) or func.__doc__ if description: @@ -249,8 +259,9 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s Args: func: The python function to convert - base_image: Optional. Specify a custom Docker containerimage to use in the component. For lightweight components, the image needs to have python and the fire package. - extra_code: Optional. Extra code to add before the function code. May contain imports and other functions. + base_image: Optional. Specify a custom Docker container image to use in the component. For lightweight components, the image needs to have python 3.5+. Default is tensorflow/tensorflow:1.11.0-py3 + Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised. + extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature. Returns: Textual representation of a component definition @@ -275,8 +286,9 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s Args: func: The python function to convert output_component_file: Write a component definition to a local file. Can be used for sharing. - base_image: Optional. Specify a custom Docker containerimage to use in the component. For lightweight components, the image needs to have python and the fire package. - extra_code: Optional. Extra code to add before the function code. May contain imports and other functions. + base_image: Optional. Specify a custom Docker container image to use in the component. For lightweight components, the image needs to have python 3.5+. Default is tensorflow/tensorflow:1.11.0-py3 + Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised. + extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature. ''' component_yaml = func_to_component_text(func, extra_code, base_image) @@ -299,9 +311,10 @@ def add_multiply_two_numbers(a: float, b: float) -> NamedTuple('DummyName', [('s Args: func: The python function to convert - base_image: Optional. Specify a custom Docker containerimage to use in the component. For lightweight components, the image needs to have python and the fire package. + base_image: Optional. Specify a custom Docker container image to use in the component. For lightweight components, the image needs to have python 3.5+. Default is tensorflow/tensorflow:1.11.0-py3 + Note: The image can also be specified by decorating the function with the @python_component decorator. If different base images are explicitly specified in both places, an error is raised. output_component_file: Optional. Write a component definition to a local file. Can be used for sharing. - extra_code: Optional. Extra code to add before the function code. May contain imports and other functions. + extra_code: Optional. Extra code to add before the function code. Can be used as workaround to define types used in function signature. Returns: A factory function with a strongly-typed signature taken from the python function. From 89aeb3c55b73672f3ed1e9889e1e65485116c869 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Fri, 16 Nov 2018 18:19:21 -0800 Subject: [PATCH 4/4] Removed extra empty lines --- sdk/python/tests/components/test_python_op.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sdk/python/tests/components/test_python_op.py b/sdk/python/tests/components/test_python_op.py index 916f1270523..726f422319d 100644 --- a/sdk/python/tests/components/test_python_op.py +++ b/sdk/python/tests/components/test_python_op.py @@ -17,8 +17,6 @@ import unittest from pathlib import Path - - import kfp.components as comp def add_two_numbers(a: float, b: float) -> float: