Skip to content

Commit

Permalink
update typing, add docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
mialy-defelice committed Apr 1, 2024
1 parent 81b1efe commit d1deb0e
Showing 1 changed file with 24 additions and 19 deletions.
43 changes: 24 additions & 19 deletions schematic/models/validate_attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def generate_cross_warning(
manifest_id: Optional[list[str]] = None,
invalid_entry: Optional[list[str]] = None,
row_num: Optional[list[str]] = None,
) -> list[str]:
) -> tuple[[list[str], list[str]]]:
"""
Purpose:
Generate an logging error as well as a stored error message, when
Expand Down Expand Up @@ -291,11 +291,7 @@ def generate_cross_warning(
elif "matchExactly" in val_rule:
if matching_manifests and matching_manifests != []:
error_message = f"All values from attribute {attribute_name} in the source manifest are present in {len(matching_manifests)} manifests instead of only 1."
error_message += (
f" Manifests {matching_manifests} match the values in the source attribute."
if matching_manifests
else ""
)
error_message += f" Manifests {matching_manifests} match the values in the source attribute."

elif "set" in val_rule:
error_message = f"No matches for the values from attribute {attribute_name} in the source manifest are present in any other manifests instead of being present in exactly 1. "
Expand Down Expand Up @@ -525,11 +521,11 @@ class ValidateAttribute(object):
- Add string length validator
"""

def __init__(self, dmge: DataModelGraphExplorer):
def __init__(self, dmge: DataModelGraphExplorer) -> None:
self.dmge = dmge

def get_target_manifests(
self, target_component, project_scope: list, access_token: str = None
self, target_component: str, project_scope: list[str], access_token: str = None
):
t_manifest_search = perf_counter()
target_manifest_ids = []
Expand Down Expand Up @@ -974,8 +970,9 @@ def _gather_set_warnings_errors(
Args:
val_rule, str: Validation Rule
source_attribute, str: Source manifest column name
set_validation_store, tuple[dict[str, pd.core.series.Series], list[string], dict[str, pd.core.series.Series]]: contains the missing_manifest_log, present_manifest_log,
and repeat_manifest_log
set_validation_store, tuple[dict[str, pd.core.series.Series], list[string],
dict[str, pd.core.series.Series]]:
contains the missing_manifest_log, present_manifest_log, and repeat_manifest_log
dmge: DataModelGraphExplorer Object.
Returns:
Expand Down Expand Up @@ -1084,7 +1081,9 @@ def _gather_value_warnings_errors(
Args:
val_rule, str: Validation rule
source_attribute, str: source manifest column name
value_validation_store, tuple(dict[str, pd.core.series.Series], dict[str, pd.core.series.Series], dict[str, pd.core.series.Series]): contains missing_values, duplicated_values, and repeat values
value_validation_store, tuple(dict[str, pd.core.series.Series], dict[str, pd.core.series.Series],
dict[str, pd.core.series.Series]):
contains missing_values, duplicated_values, and repeat values
Returns:
errors, list[str]: list of errors to raise, as appropriate, if values in current manifest do
not pass relevant cross mannifest validation across the target manifest(s)
Expand Down Expand Up @@ -1133,7 +1132,6 @@ def _run_validation_across_targets_set(
column_names: dict[str, str],
manifest_col: pd.core.series.Series,
target_attribute: str,
target_column: pd.core.series.Series,
target_manifest: pd.core.series.Series,
target_manifest_id: str,
missing_manifest_log: dict[str, pd.core.series.Series],
Expand Down Expand Up @@ -1203,7 +1201,9 @@ def _gather_target_columns_value(
concatenated_target_column: pd.core.series.Series,
target_manifest: pd.core.series.Series,
) -> pd.core.series.Series:
"""
"""A helper function for creating a concatenating all target attribute columns across all target manifest. This function checks if the
target attribute is in the current target manifest. If it is, and is the first manifest with this column, start recording it, if it has
already been recorded from another manifest concatenate the new column to the concatenated_target_column series.
Args:
column_names, dict: {stripped_col_name:original_column_name}
target_attribute, str: current target attribute
Expand All @@ -1213,14 +1213,15 @@ def _gather_target_columns_value(
Returns:
concatenated_target_column, pd.core.series.Series: All target columns concatenated into a single column
"""
# Gather information as needed to perform set/value cross manifest validation

# Check if the target_attribute is in the current target manifest.
if target_attribute in column_names:
# If it is, make sure the column names match the original column names
target_manifest.rename(
columns={column_names[target_attribute]: target_attribute},
inplace=True,
)

# If matches with other columns have already been found, concatenate current target attribute column to
# the series
if concatenated_target_column.any():
concatenated_target_column = pd.concat(
objs=[
Expand All @@ -1231,7 +1232,9 @@ def _gather_target_columns_value(
ignore_index=True,
)
else:
# Otherwise, start recording the target_attribute column
concatenated_target_column = target_manifest[target_attribute]

concatenated_target_column = concatenated_target_column.astype("object")

return concatenated_target_column
Expand Down Expand Up @@ -1314,8 +1317,11 @@ def _run_validation_across_target_manifests(
target_column, pd.core.series.Series: Empty target_column to fill out in this function
Returns:
start_time, float: start time in fractional seconds
validation_store, Union[tuple[dict[str, pd.core.series.Series], list[str], dict[str, pd.core.series.Series]],
tuple[dict[str, pd.core.series.Series], dict[str, pd.core.series.Series], dict[str, pd.core.series.Series]]: validation outputs, exact types depend on scope,
validation_store, Union[
tuple[dict[str, pd.core.series.Series], list[str], dict[str, pd.core.series.Series]],
tuple[dict[str, pd.core.series.Series], dict[str, pd.core.series.Series],
dict[str, pd.core.series.Series]]:
validation outputs, exact types depend on scope,
"""
# Initialize variables
present_manifest_log = []
Expand Down Expand Up @@ -1366,7 +1372,6 @@ def _run_validation_across_target_manifests(
column_names=column_names,
manifest_col=manifest_col,
target_attribute=target_attribute,
target_column=target_column,
target_manifest=target_manifest,
target_manifest_id=target_manifest_id,
missing_manifest_log=missing_manifest_log,
Expand Down

0 comments on commit d1deb0e

Please sign in to comment.