Skip to content

Commit

Permalink
Merge pull request #167 from ImperialCollegeLondon/validate_user_header
Browse files Browse the repository at this point in the history
Validate user header
  • Loading branch information
dalonsoa authored Dec 18, 2024
2 parents 2de2871 + 08e187c commit 3881fc3
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 24 deletions.
4 changes: 2 additions & 2 deletions csvy/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"Polars is not installed. Reading into a pl.DataFrame will not work."
)

from .validators import validate_read
from .validators import validate_header


def get_comment(line: str, marker: str = "---") -> str:
Expand Down Expand Up @@ -95,7 +95,7 @@ def read_header(
line = line.lstrip(comment)
header.append(line)

return validate_read(yaml.safe_load("".join(header), **kwargs)), nlines, comment
return validate_header(yaml.safe_load("".join(header), **kwargs)), nlines, comment


def read_metadata(
Expand Down
33 changes: 22 additions & 11 deletions csvy/validators.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Module that contains validators for the CSVY file format."""

import csv
from collections.abc import Mapping
from typing import Any, Callable, Optional, TypeVar

from pydantic import BaseModel, Field
Expand Down Expand Up @@ -39,11 +40,17 @@ def decorator(cls: type[BaseModel]) -> type[BaseModel]:
return decorator


def validate_read(header: dict[str, Any]) -> dict[str, Any]:
"""Run the validators on the header in a read operation.
def validate_header(header: dict[str, Any]) -> dict[str, Any]:
"""Run the validators on the header.
This function runs the validators on the header. It uses the keys of the header to
find the validators in the registry and runs them on the corresponding values.
find the validators in the registry and runs them on the corresponding values. As
a result, some values in the header may be replaced by the validated values in the
form of Pydantic models.
If the header is an already validated header, the Pydantic models within, if any,
are dumped to dictionaries and re-validated, again. This accounts for the case where
attributes of the Pydantic models are changed to invalid values.
Args:
header: The header of the CSVY file.
Expand All @@ -52,28 +59,32 @@ def validate_read(header: dict[str, Any]) -> dict[str, Any]:
The validated header.
"""
validated_header = {}
validated_header: dict[str, Any] = {}
for key, value in header.items():
value_ = value.model_dump() if isinstance(value, BaseModel) else value
if key in VALIDATORS_REGISTRY:
if not isinstance(value_, Mapping):
raise TypeError(
f"Value for '{key}' must be a mapping, not a '{type(value_)}'."
)
validator = VALIDATORS_REGISTRY[key]
validated_header[key] = validator(**value)
validated_header[key] = validator(**value_)
else:
validated_header[key] = value
validated_header[key] = value_
return validated_header


def validate_write(header: dict[str, Any]) -> dict[str, Any]:
"""Use the validators to create the header in a write operation.
def header_to_dict(header: dict[str, Any]) -> dict[str, Any]:
"""Transform the header into a serializable dictionary.
Transforms the header with validators to a header with dictionaries that can be
saved as yaml. It is the reversed operation of validate_read, so calling
validate_write(validate_read(header)) should return the original header.
saved as yaml.
Args:
header: Dictionary to be saved as the header of the CSVY file.
Returns:
The validated header.
The validated header, as a serializable dictionary.
"""
validated_header = {}
Expand Down
4 changes: 2 additions & 2 deletions csvy/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import yaml

from .validators import validate_write
from .validators import header_to_dict, validate_header

KNOWN_WRITERS: list[Callable[[Path | str, Any, str], bool]] = []

Expand Down Expand Up @@ -147,7 +147,7 @@ def write_header(
arguments, it will be set to sort_keys=False.
"""
header_ = validate_write(header)
header_ = header_to_dict(validate_header(header))
if not isinstance(file, TextIOBase):
with Path(file).open("w", encoding=encoding) as f:
write_header(f, header_, comment, **kwargs)
Expand Down
42 changes: 33 additions & 9 deletions tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,29 +75,37 @@ class _:
pass


def test_validate_read(validators_registry):
def test_validate_header(validators_registry):
"""Test that we can run validators on the header."""
from pydantic import BaseModel, PositiveInt

from csvy.validators import register_validator, validate_read
from csvy.validators import register_validator, validate_header

@register_validator("my_validator")
class MyValidator(BaseModel):
value: PositiveInt

header = {"author": "Gandalf", "my_validator": {"value": 42}}
validated_header = validate_read(header)
validated_header = validate_header(header)

assert isinstance(validated_header["my_validator"], MyValidator)
assert validated_header["my_validator"].value == 42
assert validated_header["author"] == header["author"]

# If the header is already validated, it should pass
assert validate_header(validated_header) == validated_header

def test_validate_read_missing(validators_registry):
# But if the validated header is changed to an invalid value, it should fail
validated_header["my_validator"].value = -1
with pytest.raises(ValueError):
validate_header(validated_header)


def test_validate_header_missing(validators_registry):
"""Test that we can run validators on the header."""
from pydantic import BaseModel, PositiveInt, ValidationError

from csvy.validators import register_validator, validate_read
from csvy.validators import register_validator, validate_header

@register_validator("my_validator")
class _(BaseModel):
Expand All @@ -106,21 +114,37 @@ class _(BaseModel):
header = {"author": "Gandalf", "my_validator": {}}

with pytest.raises(ValidationError):
validate_read(header)
validate_header(header)


def test_validate_header_wrong_type(validators_registry):
"""Test that we can run validators on the header."""
from pydantic import BaseModel, PositiveInt

from csvy.validators import register_validator, validate_header

@register_validator("my_validator")
class _(BaseModel):
value: PositiveInt

header = {"author": "Gandalf", "my_validator": 42}

with pytest.raises(TypeError):
validate_header(header)


def test_validate_write(validators_registry):
"""Test that we can create the header using the validators."""
from pydantic import BaseModel, PositiveInt

from csvy.validators import register_validator, validate_read, validate_write
from csvy.validators import header_to_dict, register_validator, validate_header

@register_validator("my_validator")
class _(BaseModel):
value: PositiveInt

header = {"author": "Gandalf", "my_validator": {"value": 42}}
validated_header = validate_read(header)
new_header = validate_write(validated_header)
validated_header = validate_header(header)
new_header = header_to_dict(validated_header)

assert new_header == header

0 comments on commit 3881fc3

Please sign in to comment.