Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add component metadata #891

Merged
merged 21 commits into from
Mar 6, 2019
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sdk/python/kfp/dsl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@

from ._pipeline_param import PipelineParam
from ._pipeline import Pipeline, pipeline, get_pipeline_conf
from ._container_op import ContainerOp
from ._container_op import ContainerOp, ComponentMeta, ParameterMeta, TypeMeta
from ._ops_group import OpsGroup, ExitHandler, Condition
from ._python_component import python_component
73 changes: 71 additions & 2 deletions sdk/python/kfp/dsl/_container_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,77 @@
from . import _pipeline_param
from ._pipeline_param import _extract_pipelineparams
import re
from typing import Dict

from typing import Dict, List
from abc import ABCMeta, abstractmethod
from ._types import _check_valid_dict

class BaseMeta(object):
__metaclass__ = ABCMeta
def __init__(self):
pass

@abstractmethod
def to_dict(self):
pass

def serialize(self):
import yaml
return yaml.dump(self.to_dict())

class TypeMeta(BaseMeta):
def __init__(self,
name: str = '',
properties: Dict = {}):
self.name = name
self.properties = properties

def to_dict(self):
return {self.name: self.properties}

@staticmethod
def from_dict(json_dict):
if not _check_valid_dict(json_dict):
raise ValueError(json_dict + ' is not a valid type string')
type_meta = TypeMeta()
type_meta.name, type_meta.properties = list(json_dict.items())[0]
return type_meta

def __eq__(self, other):
return self.__dict__ == other.__dict__

class ParameterMeta(BaseMeta):
def __init__(self,
name: str = '',
description: str = '',
param_type: TypeMeta = TypeMeta()):
self.name = name
self.description = description
self.param_type = param_type

def to_dict(self):
return {'name': self.name,
'description': self.description,
'type': self.param_type.to_dict()}

class ComponentMeta(BaseMeta):
def __init__(
self,
name: str = '',
description: str = '',
inputs: List[ParameterMeta] = [],
outputs: List[ParameterMeta] = []
):
self.name = name
self.description = description
self.inputs = inputs
self.outputs = outputs

def to_dict(self):
return {'name': self.name,
'description': self.description,
'inputs': [ input.to_dict() for input in self.inputs ],
'outputs': [ output.to_dict() for output in self.outputs ]
}

class ContainerOp(object):
"""Represents an op implemented by a docker container image."""
Expand Down
173 changes: 173 additions & 0 deletions sdk/python/kfp/dsl/_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

class BaseType:
'''MetaType is a base type for all scalar and artifact types.
'''
pass

# Primitive Types
class Integer(BaseType):
openapi_schema_validator = '''{
"type": "integer"
}'''

class String(BaseType):
openapi_schema_validator = '''{
"type": "string"
}'''

class Float(BaseType):
openapi_schema_validator = '''{
"type": "number"
}'''

class Bool(BaseType):
openapi_schema_validator = '''{
"type": "boolean"
}'''

class List(BaseType):
openapi_schema_validator = '''{
"type": "array"
}'''

class Dict(BaseType):
openapi_schema_validator = '''{
"type": "object",
}'''

# GCP Types
class GCSPath(BaseType):
openapi_schema_validator = '''{
"type": "string",
"pattern": "^gs://$"
}'''

def __init__(self, path_type='', file_type=''):
'''
Args
:param path_type: describes the paths, for example, bucket, directory, file, etc
:param file_type: describes the files, for example, JSON, CSV, etc.
'''
self.path_type = path_type
self.file_type = file_type

class GCRPath(BaseType):
openapi_schema_validator = '''{
"type": "string",
"pattern": "^(us.|eu.|asia.)?gcr\\.io/.*$"
}
}'''

class GCPRegion(BaseType):
openapi_schema_validator = '''{
"type": "string",
"enum": ["asia-east1","asia-east2","asia-northeast1",
"asia-south1","asia-southeast1","australia-southeast1",
"europe-north1","europe-west1","europe-west2",
"europe-west3","europe-west4","northamerica-northeast1",
"southamerica-east1","us-central1","us-east1",
"us-east4","us-west1", "us-west4" ]
}'''

class GCPProjectID(BaseType):
'''MetaGCPProjectID: GCP project id'''
openapi_schema_validator = '''{
"type": "string"
}'''

# General Types
class LocalPath(BaseType):
#TODO: add restriction to path
openapi_schema_validator = '''{
"type": "string"
}'''

class InconsistentTypeException(Exception):
'''InconsistencyTypeException is raised when two types are not consistent'''
pass

def _check_valid_dict(payload):
'''_check_valid_dict_type checks whether a dict is a correct serialization of a type
Args:
payload(dict)
'''
if not isinstance(payload, dict) or len(payload) != 1:
return False
for type_name in payload:
if not isinstance(payload[type_name], dict):
return False
property_types = (int, str, float, bool)
for property_name in payload[type_name]:
if not isinstance(property_name, property_types) or not isinstance(payload[type_name][property_name], property_types):
return False
return True

def _instance_to_dict(instance):
'''serialize_type serializes the type instance into a json string
Args:
instance(BaseType): An instance that describes a type

Return:
dict
'''
return {type(instance).__name__: instance.__dict__}

def _str_to_dict(payload):
import json
json_dict = json.loads(payload)
if not _check_valid_dict(json_dict):
raise ValueError(payload + ' is not a valid type string')
return json_dict

def _check_dict_types(typeA, typeB):
'''_check_type_types checks the type consistency.
Args:
typeA (dict): A dict that describes a type from the upstream component output
typeB (dict): A dict that describes a type from the downstream component input
'''
typeA_name,_ = list(typeA.items())[0]
typeB_name,_ = list(typeB.items())[0]
if typeA_name != typeB_name:
return False
type_name = typeA_name
for type_property in typeA[type_name]:
if type_property not in typeB[type_name]:
print(type_name + ' has a property ' + str(type_property) + ' that the latter does not.')
return False
if typeA[type_name][type_property] != typeB[type_name][type_property]:
print(type_name + ' has a property ' + str(type_property) + ' with value: ' +
str(typeA[type_name][type_property]) + ' and ' +
str(typeB[type_name][type_property]))
return False
return True

def check_types(typeA, typeB):
'''check_types checks the type consistency.
For each of the attribute in typeA, there is the same attribute in typeB with the same value.
However, typeB could contain more attributes that typeA does not contain.
Args:
typeA (BaseType/str/dict): it describes a type from the upstream component output
typeB (BaseType/str/dict): it describes a type from the downstream component input
'''
if isinstance(typeA, BaseType):
typeA = _instance_to_dict(typeA)
elif isinstance(typeA, str):
typeA = _str_to_dict(typeA)
if isinstance(typeB, BaseType):
typeB = _instance_to_dict(typeB)
elif isinstance(typeB, str):
typeB = _str_to_dict(typeB)
return _check_dict_types(typeA, typeB)
81 changes: 81 additions & 0 deletions sdk/python/tests/dsl/container_op_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,89 @@


from kfp.dsl import Pipeline, PipelineParam, ContainerOp
from kfp.dsl import ComponentMeta, ParameterMeta, TypeMeta
import unittest

class TestComponentMeta(unittest.TestCase):

def test_to_dict(self):
component_meta = ComponentMeta(name='foobar',
description='foobar example',
inputs=[ParameterMeta(name='input1',
description='input1 desc',
param_type=TypeMeta(name='GCSPath',
properties={'bucket_type': 'directory',
'file_type': 'csv'
}
)
),
ParameterMeta(name='input2',
description='input2 desc',
param_type=TypeMeta(name='TFModel',
properties={'input_data': 'tensor',
'version': '1.8.0'
}
)
),
],
outputs=[ParameterMeta(name='output1',
description='output1 desc',
param_type=TypeMeta(name='Schema',
properties={'file_type': 'tsv'
}
)
)
]
)
golden_meta = {
'name': 'foobar',
'description': 'foobar example',
'inputs': [
{
'name': 'input1',
'description': 'input1 desc',
'type': {
'GCSPath': {
'bucket_type': 'directory',
'file_type': 'csv'
}
}
},
{
'name': 'input2',
'description': 'input2 desc',
'type': {
'TFModel': {
'input_data': 'tensor',
'version': '1.8.0'
}
}
}
],
'outputs': [
{
'name': 'output1',
'description': 'output1 desc',
'type': {
'Schema': {
'file_type': 'tsv'
}
}
}
]
}
self.assertEqual(component_meta.to_dict(), golden_meta)

def test_type_meta_from_dict(self):
component_dict = {
'GCSPath': {
'bucket_type': 'directory',
'file_type': 'csv'
}
}
golden_type_meta = TypeMeta(name='GCSPath', properties={'bucket_type': 'directory',
'file_type': 'csv'})
self.assertEqual(TypeMeta.from_dict(component_dict), golden_type_meta)

class TestContainerOp(unittest.TestCase):

Expand Down
3 changes: 2 additions & 1 deletion sdk/python/tests/dsl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@
import pipeline_param_tests
import container_op_tests
import ops_group_tests

import type_tests

if __name__ == '__main__':
suite = unittest.TestSuite()
suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(pipeline_param_tests))
suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(pipeline_tests))
suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(container_op_tests))
suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(ops_group_tests))
suite.addTests(unittest.defaultTestLoader.loadTestsFromModule(type_tests))
runner = unittest.TextTestRunner()
if not runner.run(suite).wasSuccessful():
sys.exit(1)
Expand Down
Loading