diff --git a/csvy/readers.py b/csvy/readers.py index 668eda0..9edc1fd 100644 --- a/csvy/readers.py +++ b/csvy/readers.py @@ -34,7 +34,7 @@ "Polars is not installed. Reading into a pl.DataFrame will not work." ) -from .validators import validate_read +from .validators import validate_header def get_comment(line: str, marker: str = "---") -> str: @@ -95,7 +95,7 @@ def read_header( line = line.lstrip(comment) header.append(line) - return validate_read(yaml.safe_load("".join(header), **kwargs)), nlines, comment + return validate_header(yaml.safe_load("".join(header), **kwargs)), nlines, comment def read_metadata( diff --git a/csvy/validators.py b/csvy/validators.py index 31ccbcd..4900c5e 100644 --- a/csvy/validators.py +++ b/csvy/validators.py @@ -1,6 +1,7 @@ """Module that contains validators for the CSVY file format.""" import csv +from collections.abc import Mapping from typing import Any, Callable, Optional, TypeVar from pydantic import BaseModel, Field @@ -39,11 +40,17 @@ def decorator(cls: type[BaseModel]) -> type[BaseModel]: return decorator -def validate_read(header: dict[str, Any]) -> dict[str, Any]: - """Run the validators on the header in a read operation. +def validate_header(header: dict[str, Any]) -> dict[str, Any]: + """Run the validators on the header. This function runs the validators on the header. It uses the keys of the header to - find the validators in the registry and runs them on the corresponding values. + find the validators in the registry and runs them on the corresponding values. As + a result, some values in the header may be replaced by the validated values in the + form of Pydantic models. + + If the header is an already validated header, the Pydantic models within, if any, + are dumped to dictionaries and re-validated, again. This accounts for the case where + attributes of the Pydantic models are changed to invalid values. Args: header: The header of the CSVY file. @@ -52,28 +59,32 @@ def validate_read(header: dict[str, Any]) -> dict[str, Any]: The validated header. """ - validated_header = {} + validated_header: dict[str, Any] = {} for key, value in header.items(): + value_ = value.model_dump() if isinstance(value, BaseModel) else value if key in VALIDATORS_REGISTRY: + if not isinstance(value_, Mapping): + raise TypeError( + f"Value for '{key}' must be a mapping, not a '{type(value_)}'." + ) validator = VALIDATORS_REGISTRY[key] - validated_header[key] = validator(**value) + validated_header[key] = validator(**value_) else: - validated_header[key] = value + validated_header[key] = value_ return validated_header -def validate_write(header: dict[str, Any]) -> dict[str, Any]: - """Use the validators to create the header in a write operation. +def header_to_dict(header: dict[str, Any]) -> dict[str, Any]: + """Transform the header into a serializable dictionary. Transforms the header with validators to a header with dictionaries that can be - saved as yaml. It is the reversed operation of validate_read, so calling - validate_write(validate_read(header)) should return the original header. + saved as yaml. Args: header: Dictionary to be saved as the header of the CSVY file. Returns: - The validated header. + The validated header, as a serializable dictionary. """ validated_header = {} diff --git a/csvy/writers.py b/csvy/writers.py index f2f9e70..9e9f12f 100644 --- a/csvy/writers.py +++ b/csvy/writers.py @@ -11,7 +11,7 @@ import yaml -from .validators import validate_write +from .validators import header_to_dict, validate_header KNOWN_WRITERS: list[Callable[[Path | str, Any, str], bool]] = [] @@ -147,7 +147,7 @@ def write_header( arguments, it will be set to sort_keys=False. """ - header_ = validate_write(header) + header_ = header_to_dict(validate_header(header)) if not isinstance(file, TextIOBase): with Path(file).open("w", encoding=encoding) as f: write_header(f, header_, comment, **kwargs) diff --git a/tests/test_validators.py b/tests/test_validators.py index 073eeee..fcade69 100644 --- a/tests/test_validators.py +++ b/tests/test_validators.py @@ -75,29 +75,37 @@ class _: pass -def test_validate_read(validators_registry): +def test_validate_header(validators_registry): """Test that we can run validators on the header.""" from pydantic import BaseModel, PositiveInt - from csvy.validators import register_validator, validate_read + from csvy.validators import register_validator, validate_header @register_validator("my_validator") class MyValidator(BaseModel): value: PositiveInt header = {"author": "Gandalf", "my_validator": {"value": 42}} - validated_header = validate_read(header) + validated_header = validate_header(header) assert isinstance(validated_header["my_validator"], MyValidator) assert validated_header["my_validator"].value == 42 assert validated_header["author"] == header["author"] + # If the header is already validated, it should pass + assert validate_header(validated_header) == validated_header -def test_validate_read_missing(validators_registry): + # But if the validated header is changed to an invalid value, it should fail + validated_header["my_validator"].value = -1 + with pytest.raises(ValueError): + validate_header(validated_header) + + +def test_validate_header_missing(validators_registry): """Test that we can run validators on the header.""" from pydantic import BaseModel, PositiveInt, ValidationError - from csvy.validators import register_validator, validate_read + from csvy.validators import register_validator, validate_header @register_validator("my_validator") class _(BaseModel): @@ -106,21 +114,37 @@ class _(BaseModel): header = {"author": "Gandalf", "my_validator": {}} with pytest.raises(ValidationError): - validate_read(header) + validate_header(header) + + +def test_validate_header_wrong_type(validators_registry): + """Test that we can run validators on the header.""" + from pydantic import BaseModel, PositiveInt + + from csvy.validators import register_validator, validate_header + + @register_validator("my_validator") + class _(BaseModel): + value: PositiveInt + + header = {"author": "Gandalf", "my_validator": 42} + + with pytest.raises(TypeError): + validate_header(header) def test_validate_write(validators_registry): """Test that we can create the header using the validators.""" from pydantic import BaseModel, PositiveInt - from csvy.validators import register_validator, validate_read, validate_write + from csvy.validators import header_to_dict, register_validator, validate_header @register_validator("my_validator") class _(BaseModel): value: PositiveInt header = {"author": "Gandalf", "my_validator": {"value": 42}} - validated_header = validate_read(header) - new_header = validate_write(validated_header) + validated_header = validate_header(header) + new_header = header_to_dict(validated_header) assert new_header == header