Skip to content

Commit

Permalink
Merge pull request #1356 from Sage-Bionetworks/develop-data-model-lab…
Browse files Browse the repository at this point in the history
…els-FDS-1426

Data Model Labels, display_label or class_label options
  • Loading branch information
mialy-defelice authored Feb 8, 2024
2 parents eb32285 + 3563cef commit 3d75f1b
Show file tree
Hide file tree
Showing 31 changed files with 2,827 additions and 3,427 deletions.
30 changes: 30 additions & 0 deletions schematic/help.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@
"Specify to alphabetize valid attribute values either ascending (a) or descending (d)."
"Optional"
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
),
},
"migrate": {
"short_help": (
Expand Down Expand Up @@ -133,6 +139,12 @@
"class_label, display_label, display_name, default, class_label. When true annotations and table columns will be uploaded with the display name formatting with blacklisted characters removed. "
"To use for tables, use in conjunction with the use_schema_label flag."
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
),
},
"validate": {
"short_help": ("Validation of manifest files."),
Expand All @@ -158,6 +170,12 @@
"project_scope": (
"Specify a comma-separated list of projects to search through for cross manifest validation."
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
),
},
}
}
Expand All @@ -173,6 +191,12 @@
"output_jsonld": (
"Path to where the generated JSON-LD file needs to be outputted."
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
),
}
}
}
Expand Down Expand Up @@ -200,6 +224,12 @@
"text_format": (
"Specify the type of text to gather for tangled tree visualization, either 'plain' or 'highlighted'."
),
"data_model_labels": (
"Choose how to set the label in the data model. "
"display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to class_label. "
"class_label, default, use standard class or property label. "
"Do not change from default unless there is a real need, using 'display_label' can have consequences if not used properly."
),
},
}
}
10 changes: 9 additions & 1 deletion schematic/manifest/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ def manifest(ctx, config): # use as `schematic manifest ...`
default="ascending",
help=query_dict(manifest_commands, ("manifest", "get", "alphabetize_valid_values")),
)
@click.option(
"--data_model_labels",
"-dml",
default="class_label",
type=click.Choice(["display_label", "class_label"], case_sensitive=True),
help=query_dict(manifest_commands, ("manifest", "get", "data_model_labels")),
)
@click.pass_obj
def get_manifest(
ctx,
Expand All @@ -118,6 +125,7 @@ def get_manifest(
json_schema,
output_xlsx,
alphabetize_valid_values,
data_model_labels,
):
"""
Running CLI with manifest generation options.
Expand All @@ -141,7 +149,7 @@ def get_manifest(
parsed_data_model = data_model_parser.parse_model()

# Instantiate DataModelGraph
data_model_grapher = DataModelGraph(parsed_data_model)
data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)

# Generate graph
logger.info("Generating data model graph.")
Expand Down
4 changes: 3 additions & 1 deletion schematic/manifest/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
from schematic.utils.df_utils import update_df, load_df
from schematic.utils.validate_utils import rule_in_rule_list
from schematic.utils.schema_utils import DisplayLabelType

# TODO: This module should only be aware of the store interface
# we shouldn't need to expose Synapse functionality explicitly
Expand Down Expand Up @@ -1620,6 +1621,7 @@ def create_single_manifest(
def create_manifests(
path_to_data_model: str,
data_types: list,
data_model_labels: DisplayLabelType = "class_label",
access_token: Optional[str] = None,
dataset_ids: Optional[list] = None,
output_format: Literal["google_sheet", "excel", "dataframe"] = "google_sheet",
Expand Down Expand Up @@ -1667,7 +1669,7 @@ def create_manifests(
parsed_data_model = data_model_parser.parse_model()

# Instantiate DataModelGraph
data_model_grapher = DataModelGraph(parsed_data_model)
data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)

# Generate graph
graph_data_model = data_model_grapher.generate_data_model_graph()
Expand Down
30 changes: 27 additions & 3 deletions schematic/models/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ def model(ctx, config): # use as `schematic model ...`
type=click.Choice(["replace", "upsert"], case_sensitive=True),
help=query_dict(model_commands, ("model", "submit", "table_manipulation")),
)
@click.option(
"--data_model_labels",
"-dml",
default="class_label",
type=click.Choice(["display_label", "class_label"], case_sensitive=True),
help=query_dict(model_commands, ("model", "submit", "data_model_labels")),
)
@click.option(
"--table_column_names",
"-tcn",
Expand Down Expand Up @@ -135,6 +142,7 @@ def submit_manifest(
restrict_rules,
project_scope,
table_manipulation,
data_model_labels,
table_column_names,
annotation_keys,
):
Expand All @@ -146,7 +154,9 @@ def submit_manifest(
log_value_from_config("jsonld", jsonld)

metadata_model = MetadataModel(
inputMModelLocation=jsonld, inputMModelLocationType="local"
inputMModelLocation=jsonld,
inputMModelLocationType="local",
data_model_labels=data_model_labels,
)

manifest_id = metadata_model.submit_metadata_manifest(
Expand Down Expand Up @@ -207,9 +217,21 @@ def submit_manifest(
callback=parse_synIDs,
help=query_dict(model_commands, ("model", "validate", "project_scope")),
)
@click.option(
"--data_model_labels",
"-dml",
is_flag=True,
help=query_dict(model_commands, ("model", "validate", "data_model_labels")),
)
@click.pass_obj
def validate_manifest(
ctx, manifest_path, data_type, json_schema, restrict_rules, project_scope
ctx,
manifest_path,
data_type,
json_schema,
restrict_rules,
project_scope,
data_model_labels,
):
"""
Running CLI for manifest validation.
Expand All @@ -233,7 +255,9 @@ def validate_manifest(
log_value_from_config("jsonld", jsonld)

metadata_model = MetadataModel(
inputMModelLocation=jsonld, inputMModelLocationType="local"
inputMModelLocation=jsonld,
inputMModelLocationType="local",
data_model_labels=data_model_labels,
)

errors, warnings = metadata_model.validateModelManifest(
Expand Down
3 changes: 2 additions & 1 deletion schematic/models/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def __init__(
self,
inputMModelLocation: str,
inputMModelLocationType: str,
data_model_labels: str,
) -> None:
"""Instantiates a MetadataModel object.
Expand All @@ -59,7 +60,7 @@ def __init__(
parsed_data_model = data_model_parser.parse_model()

# Instantiate DataModelGraph
data_model_grapher = DataModelGraph(parsed_data_model)
data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)

# Generate graph
self.graph_data_model = data_model_grapher.generate_data_model_graph()
Expand Down
1 change: 1 addition & 0 deletions schematic/models/validate_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from urllib import error

from schematic.models.validate_attribute import ValidateAttribute, GenerateError

from schematic.schemas.data_model_graph import DataModelGraphExplorer
from schematic.store.synapse import SynapseStorage
from schematic.models.GE_Helpers import GreatExpectationsHelpers
Expand Down
11 changes: 9 additions & 2 deletions schematic/schemas/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,20 @@ def schema(): # use as `schematic model ...`
@click.argument(
"schema", type=click.Path(exists=True), metavar="<DATA_MODEL_CSV>", nargs=1
)
@click.option(
"--data_model_labels",
"-dml",
default="class_label",
type=click.Choice(["display_label", "class_label"], case_sensitive=True),
help=query_dict(schema_commands, ("schema", "convert", "data_model_labels")),
)
@click.option(
"--output_jsonld",
"-o",
metavar="<OUTPUT_PATH>",
help=query_dict(schema_commands, ("schema", "convert", "output_jsonld")),
)
def convert(schema, output_jsonld):
def convert(schema, data_model_labels, output_jsonld):
"""
Running CLI to convert data model specification in CSV format to
data model in JSON-LD format.
Expand All @@ -67,7 +74,7 @@ def convert(schema, output_jsonld):

# Convert parsed model to graph
# Instantiate DataModelGraph
data_model_grapher = DataModelGraph(parsed_data_model)
data_model_grapher = DataModelGraph(parsed_data_model, data_model_labels)

# Generate graph
logger.info("Generating data model graph.")
Expand Down
16 changes: 13 additions & 3 deletions schematic/schemas/data_model_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from schematic.utils.schema_utils import (
get_property_label_from_display_name,
get_class_label_from_display_name,
DisplayLabelType,
)
from schematic.utils.general import unlist
from schematic.utils.viz_utils import visualize
Expand Down Expand Up @@ -43,20 +44,27 @@ class DataModelGraph:

__metaclass__ = DataModelGraphMeta

def __init__(self, attribute_relationships_dict: dict) -> None:
def __init__(
self,
attribute_relationships_dict: dict,
data_model_labels: DisplayLabelType = "class_label",
) -> None:
"""Load parsed data model.
Args:
attributes_relationship_dict, dict: generated in data_model_parser
{Attribute Display Name: {
Relationships: {
CSV Header: Value}}}
Raises:
data_model_labels: str, display_label or class_label.
display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to schema_label.
class_label, default, use standard class or property label. Raises:
ValueError, attribute_relationship_dict not loaded.
"""
self.attribute_relationships_dict = attribute_relationships_dict
self.dmn = DataModelNodes(self.attribute_relationships_dict)
self.dme = DataModelEdges()
self.dmr = DataModelRelationships()
self.data_model_labels = data_model_labels

if not self.attribute_relationships_dict:
raise ValueError(
Expand Down Expand Up @@ -86,7 +94,9 @@ def generate_data_model_graph(self) -> nx.MultiDiGraph:
for node in all_nodes:
# Gather information for each node
node_dict = self.dmn.generate_node_dict(
node, self.attribute_relationships_dict
node_display_name=node,
attr_rel_dict=self.attribute_relationships_dict,
data_model_labels=self.data_model_labels,
)

# Add each node to the all_node_dict to be used for generating edges
Expand Down
1 change: 1 addition & 0 deletions schematic/schemas/data_model_jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,7 @@ def fill_entry_template(self, template: dict, node: str) -> dict:
template = self.reorder_template_entries(
template=template,
)

# Add contexts to certain values
template = self.add_contexts_to_entries(
template=template,
Expand Down
21 changes: 17 additions & 4 deletions schematic/schemas/data_model_nodes.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from inspect import isfunction
import networkx as nx
from rdflib import Namespace
from typing import Any, Dict, Optional, Text, List, Callable
from typing import Any, Dict, Optional, Text, List, Literal, Callable

from schematic.schemas.data_model_parser import DataModelJSONLDParser
from schematic.schemas.data_model_relationships import DataModelRelationships
Expand All @@ -11,6 +11,7 @@
get_attribute_display_name_from_label,
convert_bool_to_str,
parse_validation_rules,
DisplayLabelType,
)
from schematic.utils.validate_rules_utils import validate_schema_rules
from schematic.schemas.curie import uri2curie, curie2uri
Expand Down Expand Up @@ -130,6 +131,7 @@ def run_rel_functions(
attr_relationships={},
csv_header="",
entry_type="",
data_model_labels: DisplayLabelType = "class_label",
):
"""This function exists to centralzie handling of functions for filling out node information, makes sure all the proper parameters are passed to each function.
Args:
Expand Down Expand Up @@ -157,7 +159,9 @@ def run_rel_functions(

elif rel_func == get_label_from_display_name:
return get_label_from_display_name(
display_name=node_display_name, entry_type=entry_type
display_name=node_display_name,
entry_type=entry_type,
data_model_labels=data_model_labels,
)

elif rel_func == convert_bool_to_str:
Expand All @@ -176,15 +180,22 @@ def run_rel_functions(
f"The function provided ({rel_func}) to define the relationship {key} is not captured in the function run_rel_functions, please update."
)

def generate_node_dict(self, node_display_name: str, attr_rel_dict: dict) -> dict:
def generate_node_dict(
self,
node_display_name: str,
attr_rel_dict: dict,
data_model_labels: DisplayLabelType = "class_label",
) -> dict:
"""Gather information to be attached to each node.
Args:
node_display_name, str: display name for current node
attr_rel_dict, dict: generated in data_model_parser
{Attribute Display Name: {
Relationships: {
CSV Header: Value}}}
data_model_labels: str, display_label or class_label.
display_label, use the display name as a label, if it is valid (contains no blacklisted characters) otherwise will default to schema_label.
class_label, default, use standard class or property label.
Returns:
node_dict, dict: dictionary of relationship information about the current node
{'displayName': '', 'label': '', 'comment': 'TBD', 'required': None, 'validationRules': [], 'isPartOf': '', 'uri': ''}
Expand Down Expand Up @@ -228,6 +239,7 @@ def generate_node_dict(self, node_display_name: str, attr_rel_dict: dict) -> dic
attr_relationships=attr_relationships,
csv_header=csv_header,
entry_type=entry_type,
data_model_labels=data_model_labels,
)
}
)
Expand All @@ -249,6 +261,7 @@ def generate_node_dict(self, node_display_name: str, attr_rel_dict: dict) -> dic
attr_relationships=attr_relationships,
csv_header=csv_header,
entry_type=entry_type,
data_model_labels=data_model_labels,
)
}
)
Expand Down
5 changes: 2 additions & 3 deletions schematic/schemas/data_model_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import pandas as pd
import pathlib

from typing import Any, Dict, Optional, Text, List, Union

from schematic.utils.df_utils import load_df
Expand Down Expand Up @@ -418,9 +419,7 @@ def gather_jsonld_attributes_relationships(self, model_jsonld: List[dict]) -> Di
else:
attr_rel_dictionary[p_attr_key][
"Relationships"
].update(
{rel_csv_header: [entry[dn_jsonld_key]]}
)
][rel_csv_header].extend([entry[dn_jsonld_key]])
# If the parsed_val is not already recorded in the dictionary, add it
elif attr_in_dict == False:
# Get the display name for the parsed value
Expand Down
Loading

0 comments on commit 3d75f1b

Please sign in to comment.