Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] JSON IDL V2 #2741

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile.dev
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ COPY . /flytekit
RUN SETUPTOOLS_SCM_PRETEND_VERSION_FOR_FLYTEKIT=$PSEUDO_VERSION \
SETUPTOOLS_SCM_PRETEND_VERSION_FOR_FLYTEIDL=3.0.0dev0 \
uv pip install --system --no-cache-dir -U \
"git+https://github.com/flyteorg/flyte.git@master#subdirectory=flyteidl" \
"git+https://github.com/flyteorg/flyte.git@4d65a313a1ee7e7b2684bc8220c611fd60ebf472#subdirectory=flyteidl" \
-e /flytekit \
-e /flytekit/plugins/flytekit-deck-standard \
-e /flytekit/plugins/flytekit-flyteinteractive \
Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-e file:.
flyteidl @ git+https://github.com/flyteorg/flyte.git@master#subdirectory=flyteidl
flyteidl @ git+https://github.com/flyteorg/flyte.git@4d65a313a1ee7e7b2684bc8220c611fd60ebf472#subdirectory=flyteidl

coverage[toml]
hypothesis
Expand Down
111 changes: 105 additions & 6 deletions flytekit/core/type_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
Scalar,
Union,
Void,
Json
)
from flytekit.models.types import LiteralType, SimpleType, TypeStructure, UnionType

Expand Down Expand Up @@ -196,6 +197,26 @@ def to_html(self, ctx: FlyteContext, python_val: T, expected_python_type: Type[T
"""
return str(python_val)

def to_json(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
json_str = json.dumps(python_val)
serialization_format = expected.simple.JSON
json_bytes = None
if serialization_format == "UTF-8":
json_bytes = json_str.encode("UTF-8")
if json_bytes is None:
json_bytes = json_str.encode("UTF-8") # default to UTF-8
return Literal(scalar=Scalar(json=Json(value=json_bytes, serialization_format="UTF-8")))

def from_json(self, ctx: FlyteContext, json_idl_object: Json, expected_python_type: Type[T]) -> T:
value = json_idl_object.value
serialization_format = json_idl_object.serialization_format
json_str = None
if serialization_format == "UTF-8":
json_str = value.decode("UTF-8")
if json_str is None:
json_str = value.decode("UTF-8") # default to UTF-8
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
serialization_format = json_idl_object.serialization_format
json_str = None
if serialization_format == "UTF-8":
json_str = value.decode("UTF-8")
if json_str is None:
json_str = value.decode("UTF-8") # default to UTF-8
serialization_format = json_idl_object.serialization_format or "UTF-8"
json_str = None
if serialization_format == "UTF-8":
json_str = value.decode("UTF-8")
else:
raise ValueError(...)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will do this later, thank you.

return json.loads(json_str)

def __repr__(self):
return f"{self._name} Transforms ({self._t}) to Flyte native"

Expand Down Expand Up @@ -488,9 +509,83 @@ def get_literal_type(self, t: Type[T]) -> LiteralType:

ts = TypeStructure(tag="", dataclass_type=literal_type)

return _type_models.LiteralType(simple=_type_models.SimpleType.STRUCT, metadata=schema, structure=ts)
return _type_models.LiteralType(simple=_type_models.SimpleType.JSON, metadata=schema, structure=ts)

# We use UTF-8 as the default serialization format for JSON
def to_json(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
if isinstance(python_val, dict):
json_str = json.dumps(python_val)
json_bytes = json_str.encode("UTF-8")
return Literal(scalar=Scalar(json=Json(value=json_bytes, serialization_format="UTF-8")))

if not dataclasses.is_dataclass(python_val):
Comment on lines +521 to +527
Copy link
Member Author

@Future-Outlier Future-Outlier Sep 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't want to reuse the code in to_literal and to_python_val because I think this will be far more readable and more easier to customize behavior in the future when we want to have more flexible change to the JSON IDL object.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't come up with the scenario, but this is just my instinct.

raise TypeTransformerFailedError(
f"{type(python_val)} is not of type @dataclass, only Dataclasses are supported for "
f"user defined datatypes in Flytekit"
)

self._make_dataclass_json_serializable(python_val, python_type)

# The `to_json` integrated through mashumaro's `DataClassJSONMixin` allows for more
# functionality than JSONEncoder
# We can't use hasattr(python_val, "to_json") here because we rely on mashumaro's API to customize the serialization behavior for Flyte types.
if isinstance(python_val, DataClassJSONMixin):
json_str = python_val.to_json()
else:
# The function looks up or creates a JSONEncoder specifically designed for the object's type.
# This encoder is then used to convert a data class into a JSON string.
try:
encoder = self._encoder[python_type]
except KeyError:
encoder = JSONEncoder(python_type)
self._encoder[python_type] = encoder

try:
json_str = encoder.encode(python_val)
except NotImplementedError:
# you can refer FlyteFile, FlyteDirectory and StructuredDataset to see how flyte types can be implemented.
raise NotImplementedError(
f"{python_type} should inherit from mashumaro.types.SerializableType"
f" and implement _serialize and _deserialize methods."
)

json_bytes = json_str.encode("UTF-8")
return Literal(scalar=Scalar(json=Json(value=json_bytes, serialization_format="UTF-8")))

# We use UTF-8 as the default serialization format for JSON
def from_json(self, ctx: FlyteContext, json_idl_object: Json, expected_python_type: Type[T]) -> T:
value = json_idl_object.value
serialization_format = json_idl_object.serialization_format
json_str = None

if serialization_format == "UTF-8":
json_str = value.decode("UTF-8")
if json_str is None:
json_str = value.decode("UTF-8") # default to UTF-8

# The `from_json` function is provided from mashumaro's `DataClassJSONMixin`.
# It deserializes a JSON string into a data class, and supports additional functionality over JSONDecoder
# We can't use hasattr(expected_python_type, "from_json") here because we rely on mashumaro's API to customize the deserialization behavior for Flyte types.
if issubclass(expected_python_type, DataClassJSONMixin):
dc = expected_python_type.from_json(json_str) # type: ignore
else:
# The function looks up or creates a JSONDecoder specifically designed for the object's type.
# This decoder is then used to convert a JSON string into a data class.
try:
decoder = self._decoder[expected_python_type]
except KeyError:
decoder = JSONDecoder(expected_python_type)
self._decoder[expected_python_type] = decoder

dc = decoder.decode(json_str)

dc = self._fix_structured_dataset_type(expected_python_type, dc)
return self._fix_dataclass_int(expected_python_type, dc)

def to_literal(self, ctx: FlyteContext, python_val: T, python_type: Type[T], expected: LiteralType) -> Literal:
if expected.simple == SimpleType.JSON:
return self.to_json(ctx, python_val, python_type, expected)

if isinstance(python_val, dict):
json_str = json.dumps(python_val)
return Literal(scalar=Scalar(generic=_json_format.Parse(json_str, _struct.Struct())))
Expand Down Expand Up @@ -570,7 +665,7 @@ def _fix_structured_dataset_type(self, python_type: Type[T], python_val: typing.
python_val.__setattr__(field.name, self._fix_structured_dataset_type(field.type, val))
return python_val

def _make_dataclass_serializable(self, python_val: T, python_type: Type[T]) -> typing.Any:
def _make_dataclass_json_serializable(self, python_val: T, python_type: Type[T]) -> typing.Any:
"""
If any field inside the dataclass is flyte type, we should use flyte type transformer for that field.
"""
Expand All @@ -581,18 +676,18 @@ def _make_dataclass_serializable(self, python_val: T, python_type: Type[T]) -> t
if UnionTransformer.is_optional_type(python_type):
if python_val is None:
return None
return self._make_dataclass_serializable(python_val, get_args(python_type)[0])
return self._make_dataclass_json_serializable(python_val, get_args(python_type)[0])

if hasattr(python_type, "__origin__") and get_origin(python_type) is list:
if python_val is None:
return None
return [self._make_dataclass_serializable(v, get_args(python_type)[0]) for v in cast(list, python_val)]
return [self._make_dataclass_json_serializable(v, get_args(python_type)[0]) for v in cast(list, python_val)]

if hasattr(python_type, "__origin__") and get_origin(python_type) is dict:
if python_val is None:
return None
return {
k: self._make_dataclass_serializable(v, get_args(python_type)[1])
k: self._make_dataclass_json_serializable(v, get_args(python_type)[1])
for k, v in cast(dict, python_val).items()
}

Expand All @@ -618,7 +713,7 @@ def _make_dataclass_serializable(self, python_val: T, python_type: Type[T]) -> t
dataclass_attributes = typing.get_type_hints(python_type)
for n, t in dataclass_attributes.items():
val = python_val.__getattribute__(n)
python_val.__setattr__(n, self._make_dataclass_serializable(val, t))
python_val.__setattr__(n, self._make_dataclass_json_serializable(val, t))
return python_val

def _fix_val_int(self, t: typing.Type, val: typing.Any) -> typing.Any:
Expand Down Expand Up @@ -672,6 +767,10 @@ def to_python_value(self, ctx: FlyteContext, lv: Literal, expected_python_type:
"user defined datatypes in Flytekit"
)

json_idl_object = lv.scalar.json
if json_idl_object:
return self.from_json(ctx, json_idl_object, expected_python_type)

json_str = _json_format.MessageToJson(lv.scalar.generic)

# The `from_json` function is provided from mashumaro's `DataClassJSONMixin`.
Expand Down
2 changes: 1 addition & 1 deletion flytekit/interaction/click_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ def literal_type_to_click_type(lt: LiteralType, python_type: typing.Type) -> cli
Converts a Flyte LiteralType given a python_type to a click.ParamType
"""
if lt.simple:
if lt.simple == SimpleType.STRUCT:
if lt.simple == SimpleType.STRUCT or lt.simple == SimpleType.JSON:
ct = JsonParamType(python_type)
ct.name = f"JSON object {python_type.__name__}"
return ct
Expand Down
2 changes: 2 additions & 0 deletions flytekit/interaction/string_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def scalar_to_string(scalar: Scalar) -> typing.Any:
return MessageToDict(scalar.generic)
if scalar.union:
return literal_string_repr(scalar.union.value)
if scalar.json:
return scalar.json
raise ValueError(f"Unknown scalar type {scalar}")


Expand Down
44 changes: 44 additions & 0 deletions flytekit/models/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,40 @@ def from_flyte_idl(cls, proto):
return cls()


class Json(_common.FlyteIdlEntity):
def __init__(self, value: bytes, serialization_format: str = "utf-8"):
self._value = value
self._serialization_format = serialization_format

@property
def value(self):
"""
:rtype: bytes
"""
return self._value

@property
def serialization_format(self):
"""
:rtype: str
"""
return self._serialization_format

def to_flyte_idl(self):
"""
:rtype: flyteidl.core.literals_pb2.Json
"""
return _literals_pb2.Json(value=self.value, serialization_format=self.serialization_format)

@classmethod
def from_flyte_idl(cls, proto):
"""
:param flyteidl.core.literals_pb2.Json proto:
:rtype: Json
"""
return cls(value=proto.value, serialization_format=proto.serialization_format)


class BindingDataMap(_common.FlyteIdlEntity):
def __init__(self, bindings):
"""
Expand Down Expand Up @@ -712,6 +746,7 @@ def __init__(
error: Error = None,
generic: Struct = None,
structured_dataset: StructuredDataset = None,
json: Json = None,
):
"""
Scalar wrapper around Flyte types. Only one can be specified.
Expand All @@ -724,6 +759,7 @@ def __init__(
:param Error error:
:param google.protobuf.struct_pb2.Struct generic:
:param StructuredDataset structured_dataset:
:param Json json:
"""

self._primitive = primitive
Expand All @@ -735,6 +771,7 @@ def __init__(
self._error = error
self._generic = generic
self._structured_dataset = structured_dataset
self._json = json

@property
def primitive(self):
Expand Down Expand Up @@ -796,6 +833,10 @@ def generic(self):
def structured_dataset(self) -> StructuredDataset:
return self._structured_dataset

@property
def json(self) -> Json:
return self._json

@property
def value(self):
"""
Expand All @@ -812,6 +853,7 @@ def value(self):
or self.error
or self.generic
or self.structured_dataset
or self.json
)

def to_flyte_idl(self):
Expand All @@ -828,6 +870,7 @@ def to_flyte_idl(self):
error=self.error.to_flyte_idl() if self.error is not None else None,
generic=self.generic,
structured_dataset=self.structured_dataset.to_flyte_idl() if self.structured_dataset is not None else None,
json=self.json.to_flyte_idl() if self.json is not None else None,
)

@classmethod
Expand All @@ -849,6 +892,7 @@ def from_flyte_idl(cls, pb2_object):
structured_dataset=StructuredDataset.from_flyte_idl(pb2_object.structured_dataset)
if pb2_object.HasField("structured_dataset")
else None,
json=Json.from_flyte_idl(pb2_object.json) if pb2_object.HasField("json") else None,
)


Expand Down
1 change: 1 addition & 0 deletions flytekit/models/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class SimpleType(object):
BINARY = _types_pb2.BINARY
ERROR = _types_pb2.ERROR
STRUCT = _types_pb2.STRUCT
JSON = _types_pb2.JSON


class SchemaType(_common.FlyteIdlEntity):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dependencies = [
"diskcache>=5.2.1",
"docker>=4.0.0",
"docstring-parser>=0.9.0",
"flyteidl>=1.13.1",
"flyteidl",
"fsspec>=2023.3.0",
"gcsfs>=2023.3.0",
"googleapis-common-protos>=1.57",
Expand Down
Loading