Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate user header #167

Merged
merged 5 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions csvy/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"Polars is not installed. Reading into a pl.DataFrame will not work."
)

from .validators import validate_read
from .validators import validate_header


def get_comment(line: str, marker: str = "---") -> str:
Expand Down Expand Up @@ -95,7 +95,7 @@ def read_header(
line = line.lstrip(comment)
header.append(line)

return validate_read(yaml.safe_load("".join(header), **kwargs)), nlines, comment
return validate_header(yaml.safe_load("".join(header), **kwargs)), nlines, comment


def read_metadata(
Expand Down
32 changes: 21 additions & 11 deletions csvy/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,17 @@ def decorator(cls: type[BaseModel]) -> type[BaseModel]:
return decorator


def validate_read(header: dict[str, Any]) -> dict[str, Any]:
"""Run the validators on the header in a read operation.
def validate_header(header: dict[str, Any]) -> dict[str, Any]:
"""Run the validators on the header.

This function runs the validators on the header. It uses the keys of the header to
find the validators in the registry and runs them on the corresponding values.
find the validators in the registry and runs them on the corresponding values. As
a result, some values in the header may be replaced by the validated values in the
form of Pydantic models.

If the header is an already validated header, the Pydantic models within, if any,
are dumped to dictionaries and re-validated, again. This accounts for the case where
attributes of the Pydantic models are changed to invalid values.

Args:
header: The header of the CSVY file.
Expand All @@ -52,28 +58,32 @@ def validate_read(header: dict[str, Any]) -> dict[str, Any]:
The validated header.

"""
validated_header = {}
validated_header: dict[str, Any] = {}
for key, value in header.items():
value_ = value.model_dump() if isinstance(value, BaseModel) else value
if key in VALIDATORS_REGISTRY:
if not isinstance(value_, dict):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thinking about this, I suppose any Mapping type would work, not just dicts. The same is true for the input argument.

That said, users probably aren't going to be using other types of Mapping here all that often.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And would TypeError maybe be more appropriate than ValueError?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes to the second - I'll update. About the first, anything that can be unpacked with ** would work. I was not sure if Mappings in general allow this, so I sticked to dict that I know will work and, as you say, it is what most people will use.

Copy link
Collaborator Author

@dalonsoa dalonsoa Dec 18, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed in 08e187c

raise ValueError(
f"Value for '{key}' must be a dictionary, not a '{type(value_)}'."
)
validator = VALIDATORS_REGISTRY[key]
validated_header[key] = validator(**value)
validated_header[key] = validator(**value_)
else:
validated_header[key] = value
validated_header[key] = value_
return validated_header


def validate_write(header: dict[str, Any]) -> dict[str, Any]:
"""Use the validators to create the header in a write operation.
def header_to_dict(header: dict[str, Any]) -> dict[str, Any]:
"""Transform the header into a serializable dictionary.

Transforms the header with validators to a header with dictionaries that can be
saved as yaml. It is the reversed operation of validate_read, so calling
validate_write(validate_read(header)) should return the original header.
saved as yaml.

Args:
header: Dictionary to be saved as the header of the CSVY file.

Returns:
The validated header.
The validated header, as a serializable dictionary.

"""
validated_header = {}
Expand Down
4 changes: 2 additions & 2 deletions csvy/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import yaml

from .validators import validate_write
from .validators import header_to_dict, validate_header

KNOWN_WRITERS: list[Callable[[Path | str, Any, str], bool]] = []

Expand Down Expand Up @@ -147,7 +147,7 @@ def write_header(
arguments, it will be set to sort_keys=False.

"""
header_ = validate_write(header)
header_ = header_to_dict(validate_header(header))
if not isinstance(file, TextIOBase):
with Path(file).open("w", encoding=encoding) as f:
write_header(f, header_, comment, **kwargs)
Expand Down
24 changes: 16 additions & 8 deletions tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,29 +75,37 @@ class _:
pass


def test_validate_read(validators_registry):
def test_validate_header(validators_registry):
"""Test that we can run validators on the header."""
from pydantic import BaseModel, PositiveInt

from csvy.validators import register_validator, validate_read
from csvy.validators import register_validator, validate_header

@register_validator("my_validator")
class MyValidator(BaseModel):
value: PositiveInt

header = {"author": "Gandalf", "my_validator": {"value": 42}}
validated_header = validate_read(header)
validated_header = validate_header(header)

assert isinstance(validated_header["my_validator"], MyValidator)
assert validated_header["my_validator"].value == 42
assert validated_header["author"] == header["author"]

# If the header is already validated, it should pass
assert validate_header(validated_header) == validated_header

# But if the validated header is changed to an invalid value, it should fail
validated_header["my_validator"].value = -1
with pytest.raises(ValueError):
validate_header(validated_header)


def test_validate_read_missing(validators_registry):
"""Test that we can run validators on the header."""
from pydantic import BaseModel, PositiveInt, ValidationError

from csvy.validators import register_validator, validate_read
from csvy.validators import register_validator, validate_header

@register_validator("my_validator")
class _(BaseModel):
Expand All @@ -106,21 +114,21 @@ class _(BaseModel):
header = {"author": "Gandalf", "my_validator": {}}

with pytest.raises(ValidationError):
validate_read(header)
validate_header(header)


def test_validate_write(validators_registry):
"""Test that we can create the header using the validators."""
from pydantic import BaseModel, PositiveInt

from csvy.validators import register_validator, validate_read, validate_write
from csvy.validators import header_to_dict, register_validator, validate_header

@register_validator("my_validator")
class _(BaseModel):
value: PositiveInt

header = {"author": "Gandalf", "my_validator": {"value": 42}}
validated_header = validate_read(header)
new_header = validate_write(validated_header)
validated_header = validate_header(header)
new_header = header_to_dict(validated_header)

assert new_header == header
Loading