Skip to content

Commit

Permalink
Merge branch 'main' into table_validator
Browse files Browse the repository at this point in the history
  • Loading branch information
dalonsoa authored Dec 19, 2024
2 parents 3a69368 + 91ea71b commit 2f845c8
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 1 deletion.
49 changes: 49 additions & 0 deletions csvy/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import logging
from itertools import zip_longest
from pathlib import Path
from typing import Any, Literal

Expand Down Expand Up @@ -299,3 +300,51 @@ def read_to_list(
data.append(row)

return data, header


def read_to_dict(
filename: Path | str,
marker: str = "---",
encoding: str = "utf-8",
csv_options: dict[str, Any] | None = None,
yaml_options: dict[str, Any] | None = None,
*,
column_names: list[Any] | int | None = None,
fillvalue: Any = None,
) -> tuple[dict[str, list[Any]], dict[str, Any]]:
"""Read a CSVY file into a dictionary with the header and the data as dictionaries.
Internally, it calls `read_to_list` and then transforms the data into a dictionary.
Args:
filename: Name of the file to read.
marker: The marker characters that indicate the yaml header.
encoding: The character encoding in the file to read.
csv_options: Options to pass to csv.reader.
yaml_options: Options to pass to yaml.safe_load.
column_names: Either a list with the column names, the row number containing the
column names or None. If None (the default) an automatic column name
('col_0', 'col_1', ...) will be used.
fillvalue: Value to use for missing data in the columns.
Returns:
Tuple containing: The data and the header both as a dictionaries.
"""
data, header = read_to_list(filename, marker, encoding, csv_options, yaml_options)

longest_row = len(max(data, key=len))
if column_names is None:
column_names = [f"col_{i}" for i in range(longest_row)]
else:
if isinstance(column_names, int):
column_names = data.pop(column_names)

if len(column_names) != longest_row:
raise ValueError(
"The number of column names must be exactly the length of the longest "
f"row ({len({column_names})} != {longest_row})."
)

columns = list(map(list, zip_longest(*data, fillvalue=fillvalue)))
return dict(zip(column_names, columns)), header
40 changes: 39 additions & 1 deletion csvy/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

import csv
import logging
from collections.abc import Callable, Iterable
from collections.abc import Callable, Iterable, Mapping
from io import TextIOBase
from itertools import zip_longest
from pathlib import Path
from typing import Any

Expand Down Expand Up @@ -310,6 +311,43 @@ def write_polars(
return False


@register_writer
def write_dict(
filename: Path | str,
data: Any,
comment: str = "",
encoding: str = "utf-8",
*,
fillvalue: Any = None,
**kwargs: Any,
) -> bool:
"""Write the dictionary to the chosen file, adding it after the header.
It transforms the dictionary into a tabular format before saving it using the
generic `write_csv` function.
Args:
filename: Name of the file to save the data into. The data will be added to the
end of the file.
data: The data as a dictionary.
comment: String to use to mark the header lines as comments.
encoding: The character encoding to use in the file to write.
fillvalue: Value to use to fill the missing values in the dictionary.
**kwargs: Arguments to be passed to the underlaying saving method.
Returns:
True if the writer worked, False otherwise.
"""
if not isinstance(data, Mapping):
return False

data_ = [list(data.keys())]
data_.extend(list(map(list, zip_longest(*data.values(), fillvalue=fillvalue))))

return write_csv(filename, data_, comment, encoding, **kwargs)


def write_csv(
filename: Path | str,
data: Any,
Expand Down
44 changes: 44 additions & 0 deletions tests/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,47 @@ def test_read_to_list(array_data_path):
assert len(data[0]) == 4
assert isinstance(header, dict)
assert len(header) > 0


def test_read_to_dict_with_default_column_names(array_data_path):
"""Test the read_to_list function."""
from csvy.readers import read_to_dict

data, header = read_to_dict(array_data_path, csv_options={"delimiter": ","})

assert isinstance(data, dict)
assert len(data) == 4
assert list(data.keys()) == ["col_0", "col_1", "col_2", "col_3"]
assert len(data["col_0"]) == 15
assert len(header) > 0


def test_read_to_dict_with_custom_column_names(array_data_path):
"""Test the read_to_list function."""
from csvy.readers import read_to_dict

column_names = ["A", "B", "C", "D"]
data, header = read_to_dict(
array_data_path, column_names=column_names, csv_options={"delimiter": ","}
)

assert isinstance(data, dict)
assert len(data) == 4
assert list(data.keys()) == column_names
assert len(data["A"]) == 15
assert len(header) > 0


def test_read_to_dict_with_row_based_column_names(data_path):
"""Test the read_to_list function."""
from csvy.readers import read_to_dict

data, header = read_to_dict(
data_path, column_names=0, csv_options={"delimiter": ","}
)

assert isinstance(data, dict)
assert len(data) == 2
assert list(data.keys()) == ["Date", "WTI"]
assert len(data["Date"]) == 15
assert len(header) > 0
19 changes: 19 additions & 0 deletions tests/test_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,25 @@ class Writer:
assert Writer.writerow.call_count == len(data)


@patch("csv.writer")
def test_write_dict(mock_save, tmpdir):
"""Test the write_csv function."""
from csvy.writers import write_dict

class Writer:
writerow = MagicMock()

mock_save.return_value = Writer
filename = tmpdir / "some_file.csv"

data = {"a": [1, 2, 3, 4], "b": [1, 2, 3], "c": [1, 2, 3, 4, 5]}
expected_rows = max(map(len, data.values())) + 1 # +1 for the column names
assert write_dict(filename, data)

mock_save.assert_called_once()
assert Writer.writerow.call_count == expected_rows


@patch("csv.writer")
@patch("csvy.writers.write_header")
@pytest.mark.parametrize(
Expand Down

0 comments on commit 2f845c8

Please sign in to comment.