From 9be592857135de2ed3eb607b4084405a7e383e29 Mon Sep 17 00:00:00 2001 From: Martin van der Schelling <61459087+mpvanderschelling@users.noreply.github.com> Date: Tue, 25 Jun 2024 10:26:44 +0200 Subject: [PATCH] Added docstings and tests for _newdata2 object --- .../experimentdata/_experimental/_newdata2.py | 389 +++++++++++++++-- .../_experimental/_newexperimentdata2.py | 5 - tests/newdata/conftest.py | 33 +- tests/newdata/test_data.py | 391 +++++++++--------- 4 files changed, 559 insertions(+), 259 deletions(-) diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py index 759473e6..4bff29cd 100644 --- a/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py +++ b/src/f3dasm/_src/experimentdata/_experimental/_newdata2.py @@ -24,25 +24,76 @@ MISSING_VALUE = np.nan +# ============================================================================= + class _Data: def __init__(self, data: Dict[int, Dict[str, Any]] = None): + """ + Initialize the _Data object. + + Parameters + ---------- + data : Dict[int, Dict[str, Any]], optional + The data dictionary with integer keys and dictionaries as values. + """ self.data = data if data is not None else {} def __len__(self) -> int: + """ + Get the number of items in the data. + + Returns + ------- + int + Number of items in the data. + """ return len(self.data) def __iter__(self): + """ + Get an iterator over the data values. + + Returns + ------- + iterator + Iterator over the data values. + """ return iter(self.data.values()) def __getitem__(self, rows: int | slice | Iterable[int]) -> _Data: - + """ + Get a subset of the data. + + Parameters + ---------- + rows : int or slice or Iterable[int] + The rows to retrieve. + + Returns + ------- + _Data + The subset of the data. + """ if isinstance(rows, int): rows = [rows] return _Data({row: self.data.get(row, {}) for row in rows}) def __add__(self, __o: _Data) -> _Data: + """ + Add another _Data object to this one. + + Parameters + ---------- + __o : _Data + The other _Data object. + + Returns + ------- + _Data + The combined _Data object. + """ if self.is_empty(): return __o @@ -56,78 +107,254 @@ def __add__(self, __o: _Data) -> _Data: return _data_copy def __eq__(self, __o: _Data) -> bool: + """ + Check if another _Data object is equal to this one. + + Parameters + ---------- + __o : _Data + The other _Data object. + + Returns + ------- + bool + True if the objects are equal, False otherwise. + """ return self.data == __o.data def _repr_html_(self) -> str: + """ + Get the HTML representation of the data. + + Returns + ------- + str + The HTML representation of the data. + """ return self.to_dataframe()._repr_html_() def __repr__(self) -> str: + """ + Get the string representation of the data. + + Returns + ------- + str + The string representation of the data. + """ return self.to_dataframe().__repr__() + +# Properties +# ============================================================================= + @property def indices(self) -> List[int]: + """ + Get the indices of the data. + + Returns + ------- + List[int] + The list of indices. + """ return list(self.data.keys()) @property def names(self) -> List[str]: + """ + Get the column names of the data. + + Returns + ------- + List[str] + The list of column names. + """ return self.to_dataframe().columns.tolist() + def is_empty(self) -> bool: + """ + Check if the data is empty. + + Returns + ------- + bool + True if the data is empty, False otherwise. + """ + return not bool(self.data) + + +# Initialization +# ============================================================================= + @classmethod - def from_indices(cls, rows: Iterable[int]): + def from_indices(cls, rows: Iterable[int]) -> _Data: + """ + Create a _Data object from a list of indices. + + Parameters + ---------- + rows : Iterable[int] + The indices to create the _Data object from. + + Returns + ------- + _Data + The created _Data object. + """ return cls({row: {} for row in rows}) - # @classmethod - # def from_domain(cls, space: Iterable[str]): - # return cls(None) - @classmethod def from_file(cls, filename: Path) -> _Data: + """ + Create a _Data object from a file. + + Parameters + ---------- + filename : Path + The file to read the data from. + + Returns + ------- + _Data + The created _Data object. + """ ... @classmethod def from_numpy(cls: Type[_Data], array: np.ndarray, keys: Optional[Iterable[str]] = None) -> _Data: + """ + Create a _Data object from a numpy array. + + Parameters + ---------- + array : np.ndarray + The numpy array to create the _Data object from. + keys : Optional[Iterable[str]], optional + The keys for the columns of the data. + + Returns + ------- + _Data + The created _Data object. + """ if keys is not None: return _Data( {index: {key: col for key, col in zip(keys, row) } for index, row in enumerate(array)}) else: - # Look out! i is now an integer key! return _Data( {index: {i: col for i, col in enumerate(row) } for index, row in enumerate(array)}) @classmethod def from_dataframe(cls, df: pd.DataFrame) -> _Data: + """ + Create a _Data object from a pandas DataFrame. + + Parameters + ---------- + df : pd.DataFrame + The DataFrame to create the _Data object from. + + Returns + ------- + _Data + The created _Data object. + """ return _Data( {index: row.to_dict() for index, (_, row) in enumerate(df.iterrows())}) +# Exporting +# ============================================================================= + def to_numpy(self) -> np.ndarray: + """ + Convert the data to a numpy array. + + Returns + ------- + np.ndarray + The numpy array representation of the data. + """ return self.to_dataframe().to_numpy() def to_xarray(self, label: str): + """ + Convert the data to an xarray DataArray. + + Parameters + ---------- + label : str + The label for the xarray DataArray. + + Returns + ------- + xr.DataArray + The xarray DataArray representation of the data. + """ df = self.to_dataframe() - # Can create the xarray with the information from the domain! return xr.DataArray( self.to_dataframe(), dims=['iterations', label], coords={ 'iterations': df.index, label: df.columns}) def to_dataframe(self) -> pd.DataFrame: - # Can create the dataframe from the numpy array + column names!! + """ + Convert the data to a pandas DataFrame. + + Returns + ------- + pd.DataFrame + The DataFrame representation of the data. + """ return pd.DataFrame(self.data).T def store(self, filename: Path): + """ + Store the data to a file. + + Parameters + ---------- + filename : Path + The file to store the data in. + """ ... - def n_best_samples(self, nosamples: int, key: str) -> _Data: - df = self.to_dataframe() - return df.nsmallest( - n=nosamples, columns=key) + def get_data_dict(self, row: int) -> Dict[str, Any]: + """ + Get the data dictionary for a specific row. + + Parameters + ---------- + row : int + The row to retrieve the data from. + + Returns + ------- + Dict[str, Any] + The data dictionary for the specified row. + """ + return self.data[row] - def select_columns(self, keys: Iterable[str] | str) -> _Data: - # This only works for single ints or slices!! +# Selecting and combining +# ============================================================================= + def select_columns(self, keys: Iterable[str] | str) -> _Data: + """ + Select specific columns from the data. + + Parameters + ---------- + keys : Iterable[str] or str + The keys of the columns to select. + + Returns + ------- + _Data + The _Data object with only the selected columns. + """ if isinstance(keys, str): keys = [keys] @@ -136,47 +363,135 @@ def select_columns(self, keys: Iterable[str] | str) -> _Data: for index, row in self.data.items()}) def drop(self, keys: Iterable[str] | str) -> _Data: - # Might be depreciated? - + """ + Drop specific columns from the data. + + Parameters + ---------- + keys : Iterable[str] or str + The keys of the columns to drop. + + Returns + ------- + _Data + The _Data object with the specified columns removed. + """ if isinstance(keys, str): keys = [keys] - for row in self.data: + for row in self: for key in keys: if key in row: - del self.data[row][key] + del row[key] + + def join(self, __o: _Data) -> _Data: + """ + Join another _Data object with this one. + + Parameters + ---------- + __o : _Data + The other _Data object to join with this one. + + Returns + ------- + _Data + The combined _Data object. + """ + _data = deepcopy(self) + for row, other_row in zip(_data, __o): + row.update(other_row) + + return _data + +# Modifying +# ============================================================================= + + def n_best_samples(self, nosamples: int, key: str) -> pd.DataFrame: + """ + Get the top N samples based on a specific key. + + Parameters + ---------- + nosamples : int + The number of samples to retrieve. + key : str + The key to sort the samples by. + + Returns + ------- + pd.DataFrame + The DataFrame with the top N samples. + """ + df = self.to_dataframe() + return df.nsmallest(n=nosamples, columns=key) def add_column(self, key: str): + """ + Add a new column to the data with missing values. + + Parameters + ---------- + key : str + The key for the new column. + """ for row in self.data: self.data[row][key] = MISSING_VALUE def remove(self, rows: Iterable[int]): + """ + Remove specific rows from the data. + + Parameters + ---------- + rows : Iterable[int] + The rows to remove. + """ for row in rows: - del self.data[row] # = deleting the row + del self.data[row] def overwrite(self, rows: Iterable[int], __o: _Data): + """ + Overwrite specific rows with data from another _Data object. + + Parameters + ---------- + rows : Iterable[int] + The rows to overwrite. + __o : _Data + The _Data object to overwrite the rows with. + """ for index, other_row in zip(rows, __o): self.data[index] = other_row - def join(self, __o: _Data) -> _Data: - _data = deepcopy(self) - for row, other_row in zip(_data, __o): - row.update(other_row) - - return _Data(_data) - - def get_data_dict(self, row: int) -> Dict[str, Any]: - return self.data[row] - def set_data(self, row: int, value: Any, key: str): + """ + Set a specific value in the data. + + Parameters + ---------- + row : int + The row to set the value in. + value : Any + The value to set. + key : str + The key for the value. + """ self.data[row][key] = value def reset_index(self, rows: Iterable[int] = None): - self.data = {index: values for index, values in enumerate(self.data) - } + """ + Reset the index of the data. - def is_empty(self) -> bool: - return not bool(self.data) + Parameters + ---------- + rows : Iterable[int], optional + The rows to reset the index for. + + """ + self.data = {index: values for index, values in enumerate(self)} + +# ============================================================================= def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data: @@ -193,7 +508,9 @@ def _convert_dict_to_data(dictionary: Dict[str, Any]) -> _Data: _Data The data object. """ - return _Data({0: {dictionary}}) + return _Data({0: dictionary}) + +# ============================================================================= def _data_factory(data: DataTypes) -> _Data: @@ -217,5 +534,7 @@ def _data_factory(data: DataTypes) -> _Data: f"Data must be of type _Data, pd.DataFrame, np.ndarray, " f"Path or str, not {type(data)}") +# ============================================================================= + DataTypes = Union[pd.DataFrame, np.ndarray, Path, str, _Data] diff --git a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py index 9e762296..0a2cc770 100644 --- a/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py +++ b/src/f3dasm/_src/experimentdata/_experimental/_newexperimentdata2.py @@ -142,11 +142,6 @@ def __len__(self): """The len() method returns the number of datapoints""" return len(self._jobs) - # if self._input_data.is_empty(): - # return len(self._output_data) - - # return len(self._input_data) - def __iter__(self) -> Iterator[Tuple[Dict[str, Any]]]: self.current_index = 0 return self diff --git a/tests/newdata/conftest.py b/tests/newdata/conftest.py index be072701..acde62e9 100644 --- a/tests/newdata/conftest.py +++ b/tests/newdata/conftest.py @@ -1,40 +1,31 @@ import numpy as np import pytest -from f3dasm._src.experimentdata._columns import _Columns -from f3dasm._src.experimentdata._newdata import _Index +from f3dasm._src.experimentdata._experimental._newdata2 import _Data from f3dasm.design import Domain @pytest.fixture(scope="package") def list_1(): - return [[np.array([0.3, 5.0, 0.34]), 'd', 3], [np.array( - [0.23, 5.0, 0.0]), 'f', 4], [np.array([0.3, 5.0, 0.2]), 'c', 0]] - - -@pytest.fixture(scope="package") -def columns_1(): - return _Columns({'a': None, 'b': None, 'c': None}) - - -@pytest.fixture(scope="package") -def indices_1(): - return _Index([3, 5, 6]) + return {0: {'a': np.array([0.3, 5.0, 0.34]), 'b': 'd', 'c': 3}, + 1: {'a': np.array([0.23, 5.0, 0.0]), 'b': 'f', 'c': 4}, + 2: {'a': np.array([0.3, 5.0, 0.2]), 'b': 'c', 'c': 0} + } @pytest.fixture(scope="package") def list_2(): - return [[np.array([0.3, 0.2])], [np.array([0.4, 0.3])], [np.array([0.0, 1.0])]] - - -@pytest.fixture(scope="package") -def columns_2(): - return _Columns({'a': None}) + return {0: {'a': np.array([0.3, 0.2])}, + 1: {'a': np.array([0.4, 0.3]), 'b': np.array([0.0, 1.0])} + } @pytest.fixture(scope="package") def list_3(): - return [[np.array([1.1, 0.2])], [np.array([8.9, 0.3])], [np.array([0.0, 0.87])]] + return {0: {'a': np.array([1.1, 0.2])}, + 1: {'a': np.array([8.9, 0.3])}, + 2: {'a': np.array([0.0, 0.87])} + } @pytest.fixture(scope="package") diff --git a/tests/newdata/test_data.py b/tests/newdata/test_data.py index 38b1b0ce..fb5f0cba 100644 --- a/tests/newdata/test_data.py +++ b/tests/newdata/test_data.py @@ -1,292 +1,287 @@ from copy import deepcopy -from typing import Any, List +from typing import Any, Dict, List import numpy as np import pandas as pd import pytest +import xarray as xr -from f3dasm._src.experimentdata._columns import _Columns -from f3dasm._src.experimentdata._newdata import _Data, _Index -from f3dasm.design import Domain +from f3dasm._src.experimentdata._experimental._newdata2 import ( + _convert_dict_to_data, _Data, _data_factory) pytestmark = pytest.mark.smoke -DataType = List[List[Any]] +DataType = Dict[int, Dict[str, Any]] +# Initialization +# ============================================================================= -def test_init(list_1: DataType): - data = _Data(list_1) - assert data.data == list_1 - assert data.columns.names == [0, 1, 2] - assert data.indices.equals(pd.Index([0, 1, 2])) +def test_init(): + data = _Data({0: {"a": 1, "b": 2}}) + assert len(data) == 1 + assert not data.is_empty() + assert data.data == {0: {"a": 1, "b": 2}} -def test_init_with_columns(list_1: DataType, columns_1: _Columns): - data = _Data(list_1, columns_1) - assert data.data == list_1 - assert data.names == ['a', 'b', 'c'] +def test_init_empty(): + data = _Data() + assert len(data) == 0 + assert data.is_empty() -def test_init_with_columns_and_indices( - list_1: DataType, columns_1: _Columns, indices_1: _Index): - data = _Data(list_1, columns_1, indices_1) - assert data.data == list_1 - assert data.names == ['a', 'b', 'c'] - assert data.indices.equals(pd.Index([3, 5, 6])) +def test_init_with_data(): + input_data = {0: {"a": 1, "b": 2}} + data = _Data(input_data) + assert len(data) == 1 + assert not data.is_empty() + assert data.data == input_data -def test__len__(list_1: DataType): - data = _Data(list_1) - assert len(data) == 3 +def test_from_numpy(): + array = np.array([[1, 2, 3], [4, 5, 6]]) + data = _Data.from_numpy(array) + expected_data = {0: {0: 1, 1: 2, 2: 3}, 1: {0: 4, 1: 5, 2: 6}} + assert data.data == expected_data -def test__iter__(list_1: DataType): - data = _Data(list_1) - for i, row in enumerate(data): - assert row == list_1[i] +def test_from_numpy_with_keys(): + array = np.array([[1, 2, 3], [4, 5, 6]]) + data = _Data.from_numpy(array, keys=["a", "b", "c"]) + expected_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}} + assert data.data == expected_data -def test__getitem__(list_1: DataType): - data = _Data(list_1) - assert data[0].data[0] == list_1[0] - assert data[1].data[0] == list_1[1] - assert data[2].data[0] == list_1[2] +def test_from_dataframe(): + df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + data = _Data.from_dataframe(df) + expected_data = {0: {"a": 1, "b": 3}, 1: {"a": 2, "b": 4}} + assert data.data == expected_data -def test__getitem__list(list_1: DataType): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns( - {'a': None, 'b': None, 'c': None}), index=_Index([3, 45])) - assert data[[3, 45]].data == data.data +def test_from_indices(): + data = _Data.from_indices([0, 1]) + assert data.data == {0: {}, 1: {}} -def test__add__(list_1: DataType, list_3: DataType): - data_1 = _Data(list_1) - data_2 = _Data(list_3) - data_3 = data_1 + data_2 - assert data_3.data == list_1 + list_3 - assert data_3.columns.names == [0, 1, 2] +# Exporting +# ============================================================================= -def test__add__empty(list_3: DataType): - data_1 = _Data(columns=_Columns({0: None, 1: None, 2: None})) - data_2 = _Data(list_3) - data_3 = data_1 + data_2 - assert data_3.data == list_3 - assert data_3.columns.names == [0, 1, 2] +def test_to_numpy(): + input_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}} + data = _Data(input_data) + np_array = data.to_numpy() + expected_array = np.array([[1, 2], [3, 4]]) + np.testing.assert_array_equal(np_array, expected_array) -def test__eq__(list_1: DataType): - data_1 = _Data(list_1) - data_2 = _Data(list_1) - assert data_1 == data_2 +def test_to_dataframe(): + input_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}} + data = _Data(input_data) + df = data.to_dataframe() + expected_df = pd.DataFrame({"a": [1, 3], "b": [2, 4]}) + pd.testing.assert_frame_equal(df, expected_df) -def test_repr_html(list_1: DataType): - data = _Data(list_1) - assert data._repr_html_() == data.to_dataframe()._repr_html_() +def test_to_xarray(): + input_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}} + data = _Data(input_data) + xarray = data.to_xarray('test') + expected_xarray = xr.DataArray( + [[1, 2], [3, 4]], dims=["iterations", "test"], + coords={"iterations": [0, 1], "test": ["a", "b"]}) + xr.testing.assert_equal(xarray, expected_xarray) -# Properties -# ============================================================================= +def test_get_data_dict(): + input_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}} + data = _Data(input_data) + assert data.get_data_dict(0) == {"a": 1, "b": 2} -def test_names(list_1: DataType, columns_1: _Columns): - data = _Data(list_1, columns=columns_1) - assert data.names == ['a', 'b', 'c'] +def test_convert_dict_to_data(): + dictionary = {"a": 1, "b": 2} + data = _convert_dict_to_data(dictionary) + expected_data = _Data({0: {"a": 1, "b": 2}}) + assert data == expected_data -def test_names_default(list_1: DataType): - data = _Data(list_1) - assert data.names == [0, 1, 2] +# Properties +# ============================================================================= -def test_indices(list_1: DataType, indices_1: _Index): - data = _Data(list_1, index=indices_1) - assert data.indices.equals(pd.Index([3, 5, 6])) +def test_len(): + data = _Data({0: {"a": 1}, 1: {"a": 2}}) + assert len(data) == 2 -def test_indices_default(list_1: DataType): - data = _Data(list_1) - assert data.indices.equals(pd.Index([0, 1, 2])) +def test_indices(): + data = _Data({0: {"a": 1}, 1: {"a": 2}}) + assert data.indices == [0, 1] -# Alternative constructors -# ============================================================================= - -def test_from_indices(): - data = _Data.from_indices(pd.Index([0, 1])) - assert data.indices.equals(pd.Index(([0, 1]))) - assert not data.names - assert data.is_empty() +def test_names(): + data = _Data({0: {"a": 1}, 1: {"a": 2}}) + assert data.names == ["a"] -def test_from_domain(domain: Domain): - data = _Data.from_domain(domain) - assert data.indices.equals(pd.Index([])) - assert data.names == ['a', 'b', 'c', 'd', 'e'] +def test_is_empty(): + data = _Data() assert data.is_empty() + data = _Data({0: {"a": 1}}) + assert not data.is_empty() -def test_from_numpy(): - data = _Data.from_numpy(np.array([[1, 2, 3], [4, 5, 6]])) - assert data.data == [[1, 2, 3], [4, 5, 6]] - assert data.names == [0, 1, 2] - assert data.indices.equals(pd.Index([0, 1])) +def test_getitem(): + data = _Data({0: {"a": 1}, 1: {"a": 2}}) + assert data[0] == _Data({0: {"a": 1}}) + assert data[1] == _Data({1: {"a": 2}}) + assert data[[0, 1]] == data -def test_from_dataframe(): - data = _Data.from_dataframe(pd.DataFrame([[1, 2, 3], [4, 5, 6]])) - assert data.data == [[1, 2, 3], [4, 5, 6]] - assert data.names == [0, 1, 2] - assert data.indices.equals(pd.Index([0, 1])) +def test_repr(): + data = _Data({0: {"a": 1}, 1: {"a": 2}}) + assert isinstance(data.__repr__(), str) -def test_reset(): - data = _Data.from_numpy(np.array([[1, 2, 3], [4, 5, 6]])) - data.reset() - assert data.data == [] - assert not data.names - assert data.indices.equals(pd.Index([])) +def test_repr_html(): + data = _Data({0: {"a": 1}, 1: {"a": 2}}) + assert isinstance(data._repr_html_(), str) +# Selecting and combining +# ============================================================================= -def test_reset_with_domain(domain: Domain): - data = _Data.from_numpy(np.array([[1, 2, 3], [4, 5, 6]])) - data.reset(domain) - assert data.data == [] - assert data.names == domain.names - assert data.indices.equals(pd.Index([])) +def test_join(): + data1 = _Data({0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}}) + data2 = _Data({0: {"c": 5, "d": 6}, 1: {"c": 7, "d": 8}}) + data3 = data1.join(data2) + expected_data = {0: {"a": 1, "b": 2, "c": 5, "d": 6}, + 1: {"a": 3, "b": 4, "c": 7, "d": 8}} + assert data3 == _Data(expected_data) -# Export -# ============================================================================= +def test_select_columns(): + input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}} + data = _Data(input_data) + selected_data = data.select_columns(["a", "c"]) + expected_data = {0: {"a": 1, "c": 3}, 1: {"a": 4, "c": 6}} + assert selected_data.data == expected_data -def test_to_numpy(list_1: DataType): - data = _Data(list_1) - data.to_numpy() +def test_select_columns_single(): + input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}} + data = _Data(input_data) + selected_data = data.select_columns("a") + expected_data = {0: {"a": 1}, 1: {"a": 4}} + assert selected_data.data == expected_data -def to_dataframe(list_1: DataType): - data = _Data(list_1) - data.to_dataframe() - assert data.to_dataframe().equals(pd.DataFrame(list_1)) +def test_drop(): + input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}} + data = _Data(input_data) + data.drop(["b"]) + expected_data = {0: {"a": 1, "c": 3}, 1: {"a": 4, "c": 6}} + assert data.data == expected_data -def test_select_columns(list_1: DataType, columns_1: _Columns): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=columns_1) - new_data = data.select_columns(['a', 'c']) - assert new_data.names == ['a', 'c'] - assert new_data.data == [[1, 3], [4, 6]] +def test_drop_single_key(): + input_data = {0: {"a": 1, "b": 2, "c": 3}, 1: {"a": 4, "b": 5, "c": 6}} + data = _Data(input_data) + data.drop("b") + expected_data = {0: {"a": 1, "c": 3}, 1: {"a": 4, "c": 6}} + assert data.data == expected_data -def test_select_column(list_1: DataType, columns_1: _Columns): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=columns_1) - new_data = data.select_columns('a') - assert new_data.names == ['a'] - assert new_data.data == [[1], [4]] +# Modifying +# ============================================================================= -def test_add(list_2: DataType, list_3: DataType): - data_0 = _Data(deepcopy(list_2)) - data_1 = _Data(deepcopy(list_2)) - data_2 = _Data(list_3) - data_1.add(data_2.to_dataframe()) - assert data_1 == (data_0 + data_2) +def test_add(): + data1 = _Data({0: {"a": 1, "b": 2}}) + data2 = _Data({0: {"a": 3, "b": 4}}) + data3 = data1 + data2 + expected_data = {0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}} + assert data3.data == expected_data -def test_add_empty_rows(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]]) - data.add_empty_rows(2) - assert data.data == [[1, 2, 3], [4, 5, 6], [ - np.nan, np.nan, np.nan], [np.nan, np.nan, np.nan]] +def test_add_empty(): + data1 = _Data() + data2 = _Data({0: {"a": 3, "b": 4}}) + data3 = data1 + data2 + assert data3.data == {0: {"a": 3, "b": 4}} def test_add_column(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]]) - data.add_column('a') - assert data.data == [[1, 2, 3, np.nan], [4, 5, 6, np.nan]] - assert data.names == [0, 1, 2, 'a'] + missing_value = np.nan + data = _Data({0: {"a": 1}, 1: {"a": 2}}) + data.add_column("b") + expected_data = {0: {"a": 1, "b": missing_value}, + 1: {"a": 2, "b": missing_value}} + assert data.data == expected_data -def test_remove(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]]) - data.remove(0) - assert data.data == [[4, 5, 6]] - assert data.names == [0, 1, 2] - - -def test_remove_list(): - data = _Data(data=[[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - data.remove([0, 2]) - assert data.data == [[4, 5, 6]] - assert data.names == [0, 1, 2] +def test_overwrite(): + data = _Data({0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}}) + data2 = _Data({0: {"a": 5, "b": 6}, 1: {"a": 7, "b": 8}}) + data.overwrite([0], data2) + assert data.data == {0: {"a": 5, "b": 6}, 1: {"a": 3, "b": 4}} -def test_get_data_dict(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]]) - assert data.get_data_dict(0) == {0: 1, 1: 2, 2: 3} +def test_remove(): + data = _Data({0: {"a": 1, "b": 2}, 1: {"a": 3, "b": 4}}) + data.remove([1]) + assert data.data == {0: {"a": 1, "b": 2}} -def test_set_data_all_columns(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]]) - data.set_data(index=0, value=[4, 5, 6]) - assert data.data == [[4, 5, 6], [4, 5, 6]] +def test_n_best_samples(): + df = pd.DataFrame({"a": [3, 1, 2], "b": [6, 4, 5]}) + data = _Data.from_dataframe(df) + best_samples = data.n_best_samples(2, "a") + expected_df = pd.DataFrame({"a": [1, 2], "b": [4, 5]}, index=[1, 2]) + pd.testing.assert_frame_equal(best_samples, expected_df) def test_set_data(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns( - {'a': None, 'b': None, 'c': None})) - data.set_data(index=0, value=99, column='b') - assert data.data == [[1, 99, 3], [4, 5, 6]] + data = _Data({0: {"a": 1}}) + data.set_data(0, 2, "a") + assert data.data[0]["a"] == 2 -def test_set_data_no_valid_index(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns( - {'a': None, 'b': None, 'c': None})) - with pytest.raises(IndexError): - data.set_data(index=2, value=99, column='b') +def test_reset_index(): + data = _Data({1: {"a": 1}, 3: {"a": 2}}) + data.reset_index() + expected_data = {0: {"a": 1}, 1: {"a": 2}} + assert data.data == expected_data -def test_set_data_unknown_column(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns( - {'a': None, 'b': None, 'c': None})) +def test_data_factory_pandas(): + df = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + data = _data_factory(df) + expected_data = _Data.from_dataframe(df) + assert data == expected_data - data.set_data(index=0, value=99, column='d') - assert data.names == ['a', 'b', 'c', 'd'] - assert data.data == [[1, 2, 3, 99], [4, 5, 6, np.nan]] +def test_data_factory_numpy(): + np_array = np.array([[1, 2], [3, 4]]) + data = _data_factory(np_array) + expected_data = _Data.from_numpy(np_array) + assert data == expected_data -def test_reset_index(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns( - {'a': None, 'b': None, 'c': None}), index=_Index([3, 45])) - data.reset_index() - assert data.indices.equals(pd.Index([0, 1])) +def test_data_factory_none(): + data = _data_factory(None) + expected_data = _Data() + assert data == expected_data -def test_is_empty(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns( - {'a': None, 'b': None, 'c': None}), index=_Index([3, 45])) - assert not data.is_empty() - data.reset() - assert data.is_empty() +def test_data_factory_unrecognized_datatype(): + with pytest.raises(TypeError): + _ = _data_factory(0) -def test_has_columnnames(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns( - {'a': None, 'b': None, 'c': None}), index=_Index([3, 45])) - assert not data.has_columnnames('d') - assert data.has_columnnames('c') - data.add_column('d') - assert data.has_columnnames('d') - -def test_set_columnnames(): - data = _Data(data=[[1, 2, 3], [4, 5, 6]], columns=_Columns( - {'a': None, 'b': None, 'c': None}), index=_Index([3, 45])) - data.set_columnnames(['d', 'f', 'g']) - assert data.names == ['d', 'f', 'g'] +def test_data_factory_data_object(): + data = _data_factory(_Data({0: {"a": 1}})) + expected_data = _Data({0: {"a": 1}}) + assert data == expected_data if __name__ == "__main__": # pragma: no cover pytest.main() - - # return [[np.array([0.3, 5.0, 0.34]), 'd', 3], [np.array( - # [0.23, 5.0, 0.0]), 'f', 4], [np.array([0.3, 5.0, 0.2]), 'c', 0]]