Skip to content

Commit

Permalink
Add load_from_json and save_to_json to SingleTableMetadata (#908)
Browse files Browse the repository at this point in the history
  • Loading branch information
pvk-developer authored and amontanez24 committed Feb 17, 2023
1 parent 2942c2d commit cdd1848
Show file tree
Hide file tree
Showing 2 changed files with 264 additions and 2 deletions.
72 changes: 71 additions & 1 deletion sdv/metadata/single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import pandas as pd

from sdv.constraints import Constraint


class SingleTableMetadata:
"""Single Table Metadata class."""
Expand Down Expand Up @@ -84,7 +86,17 @@ def to_dict(self):
"""Return a python ``dict`` representation of the ``SingleTableMetadata``."""
metadata = {}
for key, value in self._metadata.items():
if value:
if key == 'constraints' and value:
constraints = []
for constraint in value:
if not isinstance(constraint, dict):
constraints.append(constraint.to_dict())
else:
constraints.append(constraint)

metadata[key] = constraints

elif value:
metadata[key] = value

return copy.deepcopy(metadata)
Expand All @@ -99,6 +111,9 @@ def _set_metadata_dict(self, metadata):
self._metadata = {}
for key in self.KEYS:
value = copy.deepcopy(metadata.get(key))
if key == 'constraints' and value:
value = [Constraint.from_dict(constraint_dict) for constraint_dict in value]

if value:
self._metadata[key] = value
setattr(self, f'_{key}', value)
Expand All @@ -120,6 +135,61 @@ def _load_from_dict(cls, metadata):
instance._set_metadata_dict(metadata)
return instance

@classmethod
def load_from_json(cls, filepath):
"""Create an instance from a ``json`` file.
Args:
filepath (str):
String that represents the ``path`` to the ``json`` file.
Raises:
- An ``Error`` if the path does not exist.
- An ``Error`` if the ``json`` file does not contain the ``SCHEMA_VERSION``.
Returns:
A ``SingleTableMetadata`` instance.
"""
filepath = Path(filepath)
if not filepath.exists():
raise ValueError(
f"A file named '{filepath.name}' does not exist. "
'Please specify a different filename.'
)

with open(filepath, 'r', encoding='utf-8') as metadata_file:
metadata = json.load(metadata_file)

if 'SCHEMA_VERSION' not in metadata:
raise ValueError(
'This metadata file is incompatible with the ``SingleTableMetadata`` '
'class and version.'
)

return cls._load_from_dict(metadata)

def save_to_json(self, filepath):
"""Save the current ``SingleTableMetadata`` in to a ``json`` file.
Args:
filepath (str):
String that represent the ``path`` to the ``json`` file to be written.
Raises:
Raises an ``Error`` if the path already exists.
"""
filepath = Path(filepath)
if filepath.exists():
raise ValueError(
f"A file named '{filepath.name}' already exists in this folder. Please specify "
'a different filename.'
)

metadata = self.to_dict()
metadata['SCHEMA_VERSION'] = self.SCHEMA_VERSION
with open(filepath, 'w', encoding='utf-8') as metadata_file:
json.dump(metadata, metadata_file, indent=4)

def __repr__(self):
"""Pretty print the ``SingleTableMetadata```SingleTableMetadata``."""
printed = json.dumps(self.to_dict(), indent=4)
Expand Down
194 changes: 193 additions & 1 deletion tests/unit/metadata/test_single_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest.mock import call, patch
from unittest.mock import Mock, call, patch

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -237,23 +237,35 @@ def test_to_dict(self):
Setup:
- Instance of ``SingleTableMetadata`` and modify the ``instance._columns`` to ensure
that ``to_dict`` works properly.
- Add constraint Mock and ensure that `to_dict` of the object is being called.
Output:
- A dictionary representation of the ``instance`` that does not modify the
internal dictionaries.
"""
# Setup
instance = SingleTableMetadata()
instance._columns['my_column'] = 'value'
constraint = Mock()
constraint.to_dict.return_value = {'column': 'value', 'scalar': 1}
dict_constraint = {'column': 'value', 'increment_value': 20}
instance._constraints.extend([constraint, dict_constraint])

# Run
result = instance.to_dict()

# Assert
assert result == {
'columns': {'my_column': 'value'},
'constraints': [
{'column': 'value', 'scalar': 1},
{'column': 'value', 'increment_value': 20}
],
'SCHEMA_VERSION': 'SINGLE_TABLE_V1'
}

constraint.to_dict.assert_called_once()

# Ensure that the output object does not alterate the inside object
result['columns']['my_column'] = 1
assert instance._columns['my_column'] == 'value'

Expand Down Expand Up @@ -309,6 +321,186 @@ def test__load_from_dict(self):
assert instance._alternate_keys == []
assert instance._constraints == []

@patch('sdv.metadata.single_table.Path')
def test_load_from_json_path_does_not_exist(self, mock_path):
"""Test the ``load_from_json`` method.
Test that the method raises a ``ValueError`` when the specified path does not exist.
Mock:
- Mock the ``Path`` library in order to return ``False``, that the file does not exist.
Input:
- String representing a filepath.
Side Effects:
- A ``ValueError`` is raised pointing that the ``file`` does not exist.
"""
# Setup
mock_path.return_value.exists.return_value = False
mock_path.return_value.name = 'filepath.json'

# Run / Assert
error_msg = (
"A file named 'filepath.json' does not exist. Please specify a different filename."
)
with pytest.raises(ValueError, match=error_msg):
SingleTableMetadata.load_from_json('filepath.json')

@patch('sdv.metadata.single_table.open')
@patch('sdv.metadata.single_table.Path')
@patch('sdv.metadata.single_table.json')
def test_load_from_json_schema_not_present(self, mock_json, mock_path, mock_open):
"""Test the ``load_from_json`` method.
Test that the method raises a ``ValueError`` when the specified ``json`` file does
not contain a ``SCHEMA_VERSION`` in it.
Mock:
- Mock the ``Path`` library in order to return ``True``, so the file exists.
- Mock the ``json`` library in order to use a custom return.
- Mock the ``open`` in order to avoid loading a binary file.
Input:
- String representing a filepath.
Side Effects:
- A ``ValueError`` is raised pointing that the given metadata configuration is not
compatible with the current version.
"""
# Setup
mock_path.return_value.exists.return_value = True
mock_path.return_value.name = 'filepath.json'
mock_json.load.return_value = {
'columns': {
'animals': {
'type': 'categorical'
}
},
'primary_key': 'animals',
}

# Run / Assert
error_msg = (
'This metadata file is incompatible with the ``SingleTableMetadata`` '
'class and version.'
)
with pytest.raises(ValueError, match=error_msg):
SingleTableMetadata.load_from_json('filepath.json')

@patch('sdv.metadata.single_table.Constraint')
@patch('sdv.metadata.single_table.open')
@patch('sdv.metadata.single_table.Path')
@patch('sdv.metadata.single_table.json')
def test_load_from_json(self, mock_json, mock_path, mock_open, mock_constraint):
"""Test the ``load_from_json`` method.
Test that ``load_from_json`` function creates an instance with the contents returned by the
``json`` load function.
Mock:
- Mock the ``Path`` library in order to return ``True``.
- Mock the ``json`` library in order to use a custom return.
- Mock the ``open`` in order to avoid loading a binary file.
- Mock the ``Constraint`` to ensure that is being loaded.
Input:
- String representing a filepath.
Output:
- ``SingleTableMetadata`` instance with the custom configuration from the ``json``
file (``json.load`` return value)
"""
# Setup
instance = SingleTableMetadata()
mock_path.return_value.exists.return_value = True
mock_path.return_value.name = 'filepath.json'
mock_constraint.from_dict.return_value = {'my_constraint': 'my_params'}
mock_json.load.return_value = {
'columns': {
'animals': {
'type': 'categorical'
}
},
'primary_key': 'animals',
'constraints': [{
'my_constraint': 'my_params'
}],
'SCHEMA_VERSION': 'SINGLE_TABLE_V1'
}

# Run
instance = SingleTableMetadata.load_from_json('filepath.json')

# Assert
expected_metadata = {
'columns': {'animals': {'type': 'categorical'}},
'primary_key': 'animals',
'alternate_keys': [],
'constraints': [{'my_constraint': 'my_params'}],
'SCHEMA_VERSION': 'SINGLE_TABLE_V1'
}
assert instance._columns == {'animals': {'type': 'categorical'}}
assert instance._primary_key == 'animals'
assert instance._constraints == [{'my_constraint': 'my_params'}]
assert instance._alternate_keys == []
assert instance._metadata == expected_metadata
mock_constraint.from_dict.assert_called_once()

@patch('sdv.metadata.single_table.Path')
def test_save_to_json_file_exists(self, mock_path):
"""Test the ``save_to_json`` method.
Test that when attempting to write over a file that already exists, the method
raises a ``ValueError``.
Setup:
- instance of ``SingleTableMetadata``.
Mock:
- Mock ``Path`` in order to point that the file does exist.
Side Effects:
- Raise ``ValueError`` pointing that the file does exist.
"""
# Setup
instance = SingleTableMetadata()
mock_path.return_value.exists.return_value = True
mock_path.return_value.name = 'filepath.json'

# Run / Assert
error_msg = (
"A file named 'filepath.json' already exists in this folder. Please specify "
'a different filename.'
)
with pytest.raises(ValueError, match=error_msg):
instance.save_to_json('filepath.json')

def test_save_to_json(self):
"""Test the ``save_to_json`` method.
Test that ``save_to_json`` stores a ``json`` file and dumps the instance dict into
it.
Setup:
- instance of ``SingleTableMetadata``.
- Use ``TemporaryDirectory`` to store the file in order to read it afterwards and
assert it's contents.
Side Effects:
- Creates a json representation of the instance.
"""
# Setup
instance = SingleTableMetadata()

# Run / Assert
with TemporaryDirectory() as temp_dir:
file_name = Path(temp_dir) / 'singletable.json'
instance.save_to_json(file_name)

with open(file_name, 'rb') as single_table_file:
saved_metadata = json.load(single_table_file)
assert saved_metadata == instance.to_dict()

@patch('sdv.metadata.single_table.json')
def test___repr__(self, mock_json):
"""Test that the ``__repr__`` method.
Expand Down

0 comments on commit cdd1848

Please sign in to comment.