diff --git a/python/pyspark/sql/connect/client.py b/python/pyspark/sql/connect/client.py index 070c4ab19d3d8..4fdab52928b9b 100644 --- a/python/pyspark/sql/connect/client.py +++ b/python/pyspark/sql/connect/client.py @@ -173,7 +173,8 @@ def __init__(self, url: str, channelOptions: Optional[List[Tuple[str, Any]]] = N raise PySparkValueError( error_class="INVALID_CONNECT_URL", message_parameters={ - "detail": "URL scheme must be set to `sc`.", + "detail": "The URL must start with 'sc://'. Please update the URL to " + "follow the correct format, e.g., 'sc://hostname:port'.", }, ) # Rewrite the URL to use http as the scheme so that we can leverage @@ -185,8 +186,8 @@ def __init__(self, url: str, channelOptions: Optional[List[Tuple[str, Any]]] = N raise PySparkValueError( error_class="INVALID_CONNECT_URL", message_parameters={ - "detail": f"Path component for connection URI `{self.url.path}` " - f"must be empty.", + "detail": f"The path component '{self.url.path}' must be empty. Please update " + f"the URL to follow the correct format, e.g., 'sc://hostname:port'.", }, ) self._extract_attributes() @@ -210,7 +211,9 @@ def _extract_attributes(self) -> None: raise PySparkValueError( error_class="INVALID_CONNECT_URL", message_parameters={ - "detail": f"Parameter '{p}' is not a valid parameter key-value pair.", + "detail": f"Parameter '{p}' should be provided as a " + f"key-value pair separated by an equal sign (=). Please update " + f"the parameter to follow the correct format, e.g., 'key=value'.", }, ) self.params[kv[0]] = urllib.parse.unquote(kv[1]) @@ -226,8 +229,9 @@ def _extract_attributes(self) -> None: raise PySparkValueError( error_class="INVALID_CONNECT_URL", message_parameters={ - "detail": f"Target destination {self.url.netloc} does not match " - f"':' pattern.", + "detail": f"Target destination '{self.url.netloc}' should match the " + f"':' pattern. Please update the destination to follow " + f"the correct format, e.g., 'hostname:port'.", }, )