Sage-Bionetworks · mialy-defelice · Oct 25, 2023 · Oct 24, 2023 · Oct 24, 2023 · Oct 24, 2023
diff --git a/schematic/schemas/data_model_edges.py b/schematic/schemas/data_model_edges.py
@@ -10,12 +10,12 @@ def __init__(self):
 
     def generate_edge(
         self,
-        G: nx.MultiDiGraph,
         node: str,
         all_node_dict: dict,
         attr_rel_dict: dict,
         edge_relationships: dict,
-    ) -> nx.MultiDiGraph:
+        edge_list:list,
+    ) -> list[tuple[str, str, dict[str:str, str:int]]]:
         """Generate an edge between a target node and relevant other nodes the data model. In short, does this current node belong to a recorded relationship in the attribute, relationshps dictionary. Go through each attribute and relationship to find where the node may be.
         Args:
             G, nx.MultiDiGraph: networkx graph representation of the data model, that is in the process of being fully built. At this point, all the nodes would have been added, and edges are being added per target node.
@@ -28,9 +28,11 @@ def generate_edge(
                         Relationships: {
                                     CSV Header: Value}}}
             edge_relationships: dict, rel_key: csv_header if the key represents a value relationship.
-
+            edge_list: list(tuple), list of tuples describing the edges and the edge attributes, organized as (node_1, node_2, {key:edge_relationship_key, weight:int})
+                At this point, the edge list will be in the process of being built. Adding edges from list so they will be added properly to the graph without being overwritten in the loop, and passing the Graph around more.
         Returns:
-            G, nx.MultiDiGraph: networkx graph representation of the data model, that has had new edges attached.
+            edge_list: list(tuple), list of tuples describing the edges and the edge attributes, organized as (node_1, node_2, {key:edge_relationship_key, weight:int})
+                At this point, the edge list will have additional edges added related to the current node.
         """
         # For each attribute in the model.
         for attribute_display_name, relationship in attr_rel_dict.items():
@@ -65,26 +67,25 @@ def generate_edge(
                         # Add edges, in a manner that preserves directionality
                         # TODO: rewrite to use edge_dir
                         if rel_key in ["subClassOf", "domainIncludes"]:
-                            G.add_edge(
+                            edge_list.append((
                                 all_node_dict[node]["label"],
                                 all_node_dict[attribute_display_name]["label"],
-                                key=edge_key,
-                                weight=weight,
-                            )
+                                {'key':edge_key,
+                                'weight':weight,})
+                                )
                         else:
-                            G.add_edge(
+                            edge_list.append((
                                 all_node_dict[attribute_display_name]["label"],
                                 all_node_dict[node]["label"],
-                                key=edge_key,
-                                weight=weight,
-                            )
+                                {'key':edge_key,
+                                'weight':weight},)
+                                )
                         # Add add rangeIncludes/valid value relationships in reverse as well, making the attribute the parent of the valid value.
                         if rel_key == "rangeIncludes":
-                            G.add_edge(
+                            edge_list.append((
                                 all_node_dict[attribute_display_name]["label"],
                                 all_node_dict[node]["label"],
-                                key="parentOf",
-                                weight=weight,
-                            )
-
-        return G
+                                {'key':"parentOf",
+                                'weight':weight},)
+                                )
+        return edge_list
diff --git a/schematic/schemas/data_model_graph.py b/schematic/schemas/data_model_graph.py
@@ -1,3 +1,4 @@
+from copy import deepcopy
 import graphviz
 import logging
 from typing import Any, Dict, Optional, Text
@@ -94,16 +95,22 @@ def generate_data_model_graph(self) -> nx.MultiDiGraph:
             # Generate node and attach information (attributes) to each node
             G = self.dmn.generate_node(G, node_dict)
 
+        edge_list = []
         ## Connect nodes via edges
         for node in all_nodes:
             # Generate edges
-            G = self.dme.generate_edge(
-                G,
+            edge_list_2 = self.dme.generate_edge(
                 node,
                 all_node_dict,
                 self.attribute_relationships_dict,
                 edge_relationships,
+                edge_list,
             )
+            edge_list = edge_list_2.copy()
+
+        # Add edges to the Graph
+        for node_1, node_2, edge_dict in edge_list:
+            G.add_edge(node_1, node_2, key=edge_dict['key'], weight=edge_dict['weight'])
         return G
 
 
@@ -357,6 +364,8 @@ def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]:
             )
 
         edge_key = self.rel_dict[key]["edge_key"]
+
+        # Handle out edges
         if self.rel_dict[key]["jsonld_direction"] == "out":
             # use outedges
 
@@ -369,6 +378,7 @@ def get_ordered_entry(self, key: str, source_node_label: str) -> list[str]:
                 )
                 if edge_key in self.graph[source_node][attached_node]
             }
+        # Handle in edges
         else:
             # use inedges
             original_edge_weights_dict = {

diff --git a/schematic/schemas/data_model_jsonld.py b/schematic/schemas/data_model_jsonld.py
@@ -134,6 +134,28 @@ def get_edges_associated_with_node(
         node_edges.extend(list(self.graph.out_edges(node, data=True)))
         return node_edges
 
+    def get_edges_associated_with_property_nodes(
+        self, node:str
+    ) -> List[tuple[str, str, dict[str, int]]]:
+        """Get edges associated with property nodes to make sure we add that relationship.
+        Args:
+            node, str: Label of node property in the graph to look for assiciated edges
+        Returns:
+            node_edges, list: List of Tuples of edges associated with the given node, tuple contains the two nodes, plus the weight dict associated with the edge connection.
+        """
+        # Get edge keys for domainIncludes and subclassOf
+        domainIncludes_edge_key = self.rel_dict['domainIncludes']['edge_key']        
+        node_edges = []
+        # Get dict of edges for the current property node
+        node_edges_dict = self.graph[node]
+        for node_2, edge_dict in node_edges_dict.items():
+            # Look through relationships in the edge dictionary
+            for edge_key in edge_dict:
+                # If the edge is a property or subclass then add the edges to the list
+                if edge_key in [domainIncludes_edge_key]:
+                    node_edges.append((node, node_2, edge_dict[edge_key]))
+        return node_edges
+
     def add_edge_rels_to_template(self, template: dict, rel_vals: dict, node: str):
         """
         Args:
@@ -145,6 +167,12 @@ def add_edge_rels_to_template(self, template: dict, rel_vals: dict, node: str):
         # Get all edges associated with the current node
         node_edges = self.get_edges_associated_with_node(node=node)
 
+
+        # For properties look for reverse relationships too
+        if node in self.dmge.find_properties():
+            property_node_edges = self.get_edges_associated_with_property_nodes(node=node)
+            node_edges.extend(property_node_edges)
+
         # Get node pairs and weights for each edge
         for node_1, node_2, weight in node_edges:
             # Retrieve the relationship(s) and related info between the two nodes
@@ -160,7 +188,9 @@ def add_edge_rels_to_template(self, template: dict, rel_vals: dict, node: str):
                     # If the relationship defined and edge_key
                     if relationship == edge_key:
                         # TODO: rewrite to use edge_dir
-                        if edge_key in ["domainIncludes", "parentOf"]:
+                        domainIncludes_edge_key = self.rel_dict['domainIncludes']['edge_key']
+                        subclassOf_edge_key = self.rel_dict['subClassOf']['edge_key']
+                        if edge_key in [subclassOf_edge_key]:
                             if node_2 == node:
                                 # Make sure the key is in the template (differs between properties and classes)
                                 if rel_vals["jsonld_key"] in template.keys():
@@ -178,6 +208,24 @@ def add_edge_rels_to_template(self, template: dict, rel_vals: dict, node: str):
                                         )
                                     else:
                                         template[rel_vals["jsonld_key"]] == node_1
+                        elif edge_key in [domainIncludes_edge_key]:
+                            if node_1 == node:
+                                # Make sure the key is in the template (differs between properties and classes)
+                                if rel_vals["jsonld_key"] in template.keys():
+                                    node_2_id = {"@id": "bts:" + node_2}
+                                    # TODO Move this to a helper function to clear up.
+                                    if (
+                                        isinstance(
+                                            template[rel_vals["jsonld_key"]], list
+                                        )
+                                        and node_2_id
+                                        not in template[rel_vals["jsonld_key"]]
+                                    ):
+                                        template[rel_vals["jsonld_key"]].append(
+                                            node_2_id
+                                        )
+                                    else:
+                                        template[rel_vals["jsonld_key"]] == node_2
                         else:
                             if node_1 == node:
                                 # Make sure the key is in the template (differs between properties and classes)
@@ -238,6 +286,7 @@ def fill_entry_template(self, template: dict, node: str) -> dict:
                 template = self.add_edge_rels_to_template(
                     template=template, rel_vals=rel_vals, node=node
                 )
+
             # Fill in node value information
             else:
                 template = self.add_node_info_to_template(
@@ -249,11 +298,11 @@ def fill_entry_template(self, template: dict, node: str) -> dict:
             template=template,
             data_model_relationships=data_model_relationships,
         )
+
         # Reorder lists based on weights:
         template = self.reorder_template_entries(
             template=template,
         )
-
         # Add contexts to certain values
         template = self.add_contexts_to_entries(
             template=template,
@@ -364,6 +413,10 @@ def reorder_template_entries(self, template: dict) -> dict:
                 sorted_edges = self.dmge.get_ordered_entry(
                     key=key, source_node_label=template_label
                 )
+                if not len(entry) == len(sorted_edges):
+                    breakpoint()
+                    #raise ValueError("There is an error with sorting values in the JSONLD, please issue a bug report.")
+
                 edge_weights_dict = {edge: i for i, edge in enumerate(sorted_edges)}
                 ordered_edges = [0] * len(edge_weights_dict.keys())
                 for edge, normalized_weight in edge_weights_dict.items():

diff --git a/schematic/schemas/data_model_parser.py b/schematic/schemas/data_model_parser.py
@@ -274,6 +274,21 @@ def parse_entry(self, rel_entry: any, id_jsonld_key: str) -> Any:
             parsed_rel_entry = rel_entry
         return parsed_rel_entry
 
+    def get_display_name_from_label(self, label, model_jsonld):
+        jsonld_keys_to_extract = ["label", "displayName"]
+        label_jsonld_key, dn_jsonld_key = [
+            self.rel_dict[key]["jsonld_key"] for key in jsonld_keys_to_extract
+        ]
+        for entry in model_jsonld:
+            # Get the attr key for the dictionary
+            if dn_jsonld_key in entry:
+                # The attr_key is the entry display name if one was recorded
+                attr_key = entry[dn_jsonld_key]
+            else:
+                # If not we wil use the get the label.
+                attr_key = entry[label_jsonld_key]
+        return attr_key
+
     def gather_jsonld_attributes_relationships(self, model_jsonld: List[dict]) -> Dict:
         """
         Args:
@@ -327,15 +342,42 @@ def gather_jsonld_attributes_relationships(self, model_jsonld: List[dict]) -> Di
                 ):
                     # Retrieve entry value associated with the given relationship
                     rel_entry = entry[rel_vals["jsonld_key"]]
-                    # If there is an entry parset it by type and add to the attr:relationships dictionary.
+                    # If there is an entry parse it by type and add to the attr:relationships dictionary.
                     if rel_entry:
                         parsed_rel_entry = self.parse_entry(
                             rel_entry=rel_entry, id_jsonld_key=id_jsonld_key
                         )
-                        # Add relationships for each attribute and relationship to the dictionary
-                        attr_rel_dictionary[attr_key]["Relationships"].update(
-                            {self.rel_dict[rel_key]["csv_header"]: parsed_rel_entry}
-                        )
+                        rel_csv_header = self.rel_dict[rel_key]["csv_header"]
+                        if rel_key == 'domainIncludes' or rel_key == 'parentOf':
+                            # In the JSONLD the domain includes field contains the ids of attributes that the current attribute is the property/parent of.
+                            # Because of this we need to handle these values differently.
+                            # We will get the values in the field (parsed_val), then add the current attribute as to the property key in the attr_rel_dictionary[p_attr_key].
+                            for parsed_val in parsed_rel_entry:
+                                attr_in_dict = False
+                                #Get propert/parent key (displayName)
+                                p_attr_key=''
+                                # Check if the parsed value is already a part of the attr_rel_dictionary
+                                for attr_dn, rels in attr_rel_dictionary.items():
+                                    if parsed_val == rels["Relationships"].get('label'):
+                                        p_attr_key = attr_dn
+                                        attr_in_dict = True
+                                # If it is part of the dictionary update add current attribute as a property of the parsed value
+                                if attr_in_dict == True:
+                                    if not rel_csv_header in attr_rel_dictionary[p_attr_key]["Relationships"]:
+                                        attr_rel_dictionary[p_attr_key]["Relationships"].update({rel_csv_header:[entry[label_jsonld_key]]})
+                                    else:
+                                        attr_rel_dictionary[p_attr_key]["Relationships"][rel_csv_header].append(entry[label_jsonld_key])
+                                # If the parsed_val is not already recorded in the dictionary, add it
+                                elif attr_in_dict == False:
+                                    # Get the display name for the parsed value
+                                    p_attr_key = self.get_display_name_from_label(parsed_val, model_jsonld)
+
+                                    attr_rel_dictionary.update(attr_dict_template(p_attr_key))
+                                    attr_rel_dictionary[p_attr_key]["Relationships"].update({rel_csv_header:[entry[label_jsonld_key]]})
+                        else:
+                            attr_rel_dictionary[attr_key]["Relationships"].update(
+                                {rel_csv_header: parsed_rel_entry}
+                            )
                 elif (
                     rel_vals["jsonld_key"] in entry.keys()
                     and not rel_vals["csv_header"]

diff --git a/schematic/schemas/data_model_relationships.py b/schematic/schemas/data_model_relationships.py
@@ -153,7 +153,7 @@ def define_data_model_relationships(self) -> Dict:
                 "jsonld_key": "schema:domainIncludes",
                 "csv_header": "Properties",
                 "edge_key": "domainValue",
-                "jsonld_direction": "in",
+                "jsonld_direction": "out",
                 "edge_dir": "in",
                 "type": list,
                 "edge_rel": True,

diff --git a/schematic/schemas/data_model_validator.py b/schematic/schemas/data_model_validator.py
@@ -1,8 +1,11 @@
+import logging
+import multiprocessing
 import networkx as nx
+import time
 from typing import Any, Dict, Optional, Text, List, Tuple
 
 from schematic.schemas.data_model_relationships import DataModelRelationships
-
+logger = logging.getLogger(__name__)
 
 class DataModelValidator:
     """
@@ -69,24 +72,38 @@ def check_graph_has_required_node_fields(self) -> List[str]:
                 )
         return error
 
+    def run_cycles(self, graph):
+        cycles = nx.simple_cycles(self.graph)
+        if cycles:
+            for cycle in cycles:
+                logger.warning(
+                    f"Schematic requires models be a directed acyclic graph (DAG). Your graph is not a DAG, we found a loop between: {cycle[0]} and {cycle[1]}, please remove this loop from your model and submit again."
+                )
+
     def check_is_dag(self) -> List[str]:
         """Check that generated graph is a directed acyclic graph
         Returns:
                 error, list: List of error messages if graph is not a DAG. List will include a message for each cycle found, if not there is a more generic message for the graph as a whole.
         """
         error = []
         if not nx.is_directed_acyclic_graph(self.graph):
-            # Attempt to find any cycles:
-            cycles = nx.simple_cycles(self.graph)
-            if cycles:
-                for cycle in cycles:
-                    error.append(
-                        f"Schematic requires models be a directed acyclic graph (DAG). Your graph is not a DAG, we found a loop between: {cycle[0]} and {cycle[1]}, please remove this loop from your model and submit again."
-                    )
-            else:
-                error.append(
-                    f"Schematic requires models be a directed acyclic graph (DAG). Your graph is not a DAG, we could not locate the sorce of the error, please inspect your model."
+            cycles = multiprocessing.Process(target=self.run_cycles, name="Get Cycles", args=(self.graph,))
+            cycles.start()
+
+            # Give up to 5 seconds to find cycles, if not exit and issue standard error
+            time.sleep(5)
+
+            # If thread is active
+            if cycles.is_alive():
+                # Terminate foo
+                cycles.terminate()
+                # Cleanup
+                cycles.join()
+
+            error.append(
+                f"Schematic requires models be a directed acyclic graph (DAG). Please inspect your model."
                 )
+
         return error
 
     def check_blacklisted_characters(self) -> List[str]: