Skip to content

Commit

Permalink
[SPARK-48414][PYTHON] Fix breaking change in python's fromJson
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

Fix breaking change in `fromJson` method by having default param values.

### Why are the changes needed?

In order to not break clients that don't care about collations.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing UTs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes apache#46737 from stefankandic/fromJsonBreakingChange.

Authored-by: Stefan Kandic <stefan.kandic@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
stefankandic authored and HyukjinKwon committed Jul 24, 2024
1 parent 118167f commit fdcf975
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 7 deletions.
32 changes: 32 additions & 0 deletions python/pyspark/sql/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,38 @@ def test_schema_with_collations_on_non_string_types(self):
PySparkTypeError, lambda: _parse_datatype_json_string(collations_in_nested_map_json)
)

def test_array_type_from_json(self):
arrayWithoutCollations = ArrayType(StringType(), True)
arrayWithCollations = ArrayType(StringType("UNICODE"), True)
array_json = {"type": "array", "elementType": "string", "containsNull": True}
collationsMap = {"element": "UNICODE"}

self.assertEqual(arrayWithoutCollations, ArrayType.fromJson(array_json))
self.assertEqual(
arrayWithCollations,
ArrayType.fromJson(array_json, fieldPath="", collationsMap=collationsMap),
)
self.assertEqual(
arrayWithCollations, ArrayType.fromJson(array_json, collationsMap=collationsMap)
)

def test_map_type_from_json(self):
mapWithoutCollations = MapType(StringType(), StringType(), True)
mapWithCollations = MapType(StringType("UNICODE"), StringType("UNICODE"), True)
map_json = {
"type": "map",
"keyType": "string",
"valueType": "string",
"valueContainsNull": True,
}
collationsMap = {"key": "UNICODE", "value": "UNICODE"}

self.assertEqual(mapWithoutCollations, MapType.fromJson(map_json))
self.assertEqual(
mapWithCollations, MapType.fromJson(map_json, fieldPath="", collationsMap=collationsMap)
)
self.assertEqual(mapWithCollations, MapType.fromJson(map_json, collationsMap=collationsMap))

def test_schema_with_bad_collations_provider(self):
from pyspark.sql.types import _parse_datatype_json_string, _COLLATIONS_METADATA_KEY

Expand Down
18 changes: 11 additions & 7 deletions python/pyspark/sql/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -762,11 +762,13 @@ def jsonValue(self) -> Dict[str, Any]:
def fromJson(
cls,
json: Dict[str, Any],
fieldPath: str,
collationsMap: Optional[Dict[str, str]],
fieldPath: str = "",
collationsMap: Optional[Dict[str, str]] = None,
) -> "ArrayType":
elementType = _parse_datatype_json_value(
json["elementType"], fieldPath + ".element", collationsMap
json["elementType"],
"element" if fieldPath == "" else fieldPath + ".element",
collationsMap,
)
return ArrayType(elementType, json["containsNull"])

Expand Down Expand Up @@ -902,12 +904,14 @@ def jsonValue(self) -> Dict[str, Any]:
def fromJson(
cls,
json: Dict[str, Any],
fieldPath: str,
collationsMap: Optional[Dict[str, str]],
fieldPath: str = "",
collationsMap: Optional[Dict[str, str]] = None,
) -> "MapType":
keyType = _parse_datatype_json_value(json["keyType"], fieldPath + ".key", collationsMap)
keyType = _parse_datatype_json_value(
json["keyType"], "key" if fieldPath == "" else fieldPath + ".key", collationsMap
)
valueType = _parse_datatype_json_value(
json["valueType"], fieldPath + ".value", collationsMap
json["valueType"], "value" if fieldPath == "" else fieldPath + ".value", collationsMap
)
return MapType(
keyType,
Expand Down

0 comments on commit fdcf975

Please sign in to comment.