From d4c40e311bac1e0cfbd040a7470ced1fe5b1361b Mon Sep 17 00:00:00 2001 From: andrewelamb Date: Fri, 2 Feb 2024 14:25:43 -0800 Subject: [PATCH 1/7] linted visualization module --- .github/workflows/test.yml | 2 +- schematic/visualization/__init__.py | 1 + .../visualization/attributes_explorer.py | 117 ++++--- schematic/visualization/commands.py | 28 +- schematic/visualization/tangled_tree.py | 311 +++++++++++------- 5 files changed, 268 insertions(+), 191 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b2adf95f8..23b81b1d5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -116,7 +116,7 @@ jobs: run: | # ran only on certain files for now # add here when checked - poetry run pylint schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py + poetry run pylint schematic/visualization/* schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py #---------------------------------------------- # run test suite diff --git a/schematic/visualization/__init__.py b/schematic/visualization/__init__.py index a96118fc3..d5526b1ad 100644 --- a/schematic/visualization/__init__.py +++ b/schematic/visualization/__init__.py @@ -1,2 +1,3 @@ +"""visualization imports""" from schematic.visualization.attributes_explorer import AttributesExplorer from schematic.visualization.tangled_tree import TangledTree diff --git a/schematic/visualization/attributes_explorer.py b/schematic/visualization/attributes_explorer.py index 90877e7e8..71747f999 100644 --- a/schematic/visualization/attributes_explorer.py +++ b/schematic/visualization/attributes_explorer.py @@ -1,21 +1,22 @@ -import gc +"""Attributes Explorer Class""" import json import logging -import numpy as np import os + +import numpy as np import pandas as pd -from typing import Any, Dict, Optional, Text, List from schematic.schemas.data_model_parser import DataModelParser from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer from schematic.schemas.data_model_json_schema import DataModelJSONSchema - from schematic.utils.io_utils import load_json logger = logging.getLogger(__name__) class AttributesExplorer: + """AttributesExplorer class""" + def __init__( self, path_to_jsonld: str, @@ -46,7 +47,7 @@ def __init__( self.output_path = self.create_output_path("merged_csv") - def create_output_path(self, terminal_folder): + def create_output_path(self, terminal_folder: str) -> str: """Create output path to store Observable visualization data if it does not already exist. Args: self.path_to_jsonld @@ -62,20 +63,22 @@ def create_output_path(self, terminal_folder): os.makedirs(output_path) return output_path - def convert_string_cols_to_json(self, df: pd.DataFrame, cols_to_modify: list): + def convert_string_cols_to_json( + self, dataframe: pd.DataFrame, cols_to_modify: list[str] + ) -> pd.DataFrame: """Converts values in a column from strings to JSON list for upload to Synapse. """ - for col in df.columns: + for col in dataframe.columns: if col in cols_to_modify: - df[col] = df[col].apply( + dataframe[col] = dataframe[col].apply( lambda x: json.dumps([y.strip() for y in x]) if x != "NaN" and x and x == np.nan else x ) - return df + return dataframe - def parse_attributes(self, save_file=True): + def parse_attributes(self, save_file: bool = True) -> pd.DataFrame: """ Args: save_file (bool): True: merged_df is saved locally to output_path. @@ -92,13 +95,13 @@ def parse_attributes(self, save_file=True): component_dg = self.dmge.get_digraph_by_edge_type("requiresComponent") components = component_dg.nodes() - # For each data type to be loaded gather all attribtes the user would + # For each data type to be loaded gather all attributes the user would # have to provide. return self._parse_attributes(components, save_file) def parse_component_attributes( - self, component=None, save_file=True, include_index=True - ): + self, component=None, save_file: bool = True, include_index: bool = True + ) -> pd.DataFrame: """ Args: save_file (bool): True: merged_df is saved locally to output_path. @@ -115,10 +118,11 @@ def parse_component_attributes( if not component: raise ValueError("You must provide a component to visualize.") - else: - return self._parse_attributes([component], save_file, include_index) + return self._parse_attributes([component], save_file, include_index) - def _parse_attributes(self, components, save_file=True, include_index=True): + def _parse_attributes( + self, components: list, save_file=True, include_index=True + ) -> pd.DataFrame: """ Args: save_file (bool): True: merged_df is saved locally to output_path. @@ -138,22 +142,27 @@ def _parse_attributes(self, components, save_file=True, include_index=True): If unable hits an error while attempting to get conditional requirements. This error is likely to be found if there is a mismatch in naming. """ + # This function needs to be refactored, temporarily disabling some pylint errors + # pylint: disable=too-many-locals + # pylint: disable=too-many-nested-blocks + # pylint: disable=too-many-branches + # pylint: disable=too-many-statements - # For each data type to be loaded gather all attribtes the user would + # For each data type to be loaded gather all attributes the user would # have to provide. df_store = [] for component in components: - data_dict = {} + data_dict: dict = {} # get the json schema json_schema = self.data_model_js.get_json_validation_schema( source_node=component, schema_name=self.path_to_jsonld ) - # Gather all attribues, their valid values and requirements + # Gather all attributes, their valid values and requirements for key, value in json_schema["properties"].items(): data_dict[key] = {} - for k, v in value.items(): + for k, _ in value.items(): if k == "enum": data_dict[key]["Valid Values"] = value["enum"] if key in json_schema["required"]: @@ -163,20 +172,20 @@ def _parse_attributes(self, components, save_file=True, include_index=True): data_dict[key]["Component"] = component # Add additional details per key (from the JSON-ld) for dic in self.jsonld["@graph"]: - if "sms:displayName" in dic.keys(): + if "sms:displayName" in dic: key = dic["sms:displayName"] - if key in data_dict.keys(): + if key in data_dict: data_dict[key]["Attribute"] = dic["sms:displayName"] data_dict[key]["Label"] = dic["rdfs:label"] data_dict[key]["Description"] = dic["rdfs:comment"] if "validationRules" in dic.keys(): data_dict[key]["Validation Rules"] = dic["validationRules"] # Find conditional dependencies - if "allOf" in json_schema.keys(): + if "allOf" in json_schema: for conditional_dependencies in json_schema["allOf"]: key = list(conditional_dependencies["then"]["properties"])[0] try: - if key in data_dict.keys(): + if key in data_dict: if "Cond_Req" not in data_dict[key].keys(): data_dict[key]["Cond_Req"] = [] data_dict[key]["Conditional Requirements"] = [] @@ -186,11 +195,12 @@ def _parse_attributes(self, components, save_file=True, include_index=True): value = conditional_dependencies["if"]["properties"][ attribute ]["enum"] - # Capitalize attribute if it begins with a lowercase letter, for aesthetics. + # Capitalize attribute if it begins with a lowercase + # letter, for aesthetics. if attribute[0].islower(): attribute = attribute.capitalize() - # Remove "Type" (i.e. turn "Biospecimen Type" to "Biospcimen") + # Remove "Type" (i.e. turn "Biospecimen Type" to "Biospecimen") if "Type" in attribute: attribute = attribute.split(" ")[0] @@ -207,38 +217,37 @@ def _parse_attributes(self, components, save_file=True, include_index=True): data_dict[key]["Conditional Requirements"].extend( [conditional_statement] ) - except: + except Exception as exc: raise ValueError( - f"There is an error getting conditional requirements related " - "to the attribute: {key}. The error is likely caused by naming inconsistencies (e.g. uppercase, camelcase, ...)" - ) - - for key, value in data_dict.items(): - if "Conditional Requirements" in value.keys(): + ( + "There is an error getting conditional requirements related " + f"to the attribute: {key}. The error is likely caused by naming " + "inconsistencies (e.g. uppercase, camelcase, ...)" + ) + ) from exc + + for outer_dict_key, inner_dict in data_dict.items(): + if "Conditional Requirements" in inner_dict.keys(): ## reformat conditional requirement + conditional_requirements = inner_dict["Conditional Requirements"] # get all attributes - attr_lst = [ - i.split(" is ")[-1] - for i in data_dict[key]["Conditional Requirements"] - ] + attr_lst = [i.split(" is ")[-1] for i in conditional_requirements] # join a list of attributes by using OR attr_str = " OR ".join(attr_lst) # reformat the conditional requirement - component_name = data_dict[key]["Conditional Requirements"][ - 0 - ].split(" is ")[0] + component_name = conditional_requirements[0].split(" is ")[0] + conditional_statement_str = ( - f' If {component_name} is {attr_str} then "{key}" is required' + f" If {component_name} is {attr_str} then " + f'"{outer_dict_key}" is required' ) + conditional_requirements = conditional_statement_str - data_dict[key][ - "Conditional Requirements" - ] = conditional_statement_str - df = pd.DataFrame(data_dict) - df = df.T + data_dict_df = pd.DataFrame(data_dict) + data_dict_df = data_dict_df.T cols = [ "Attribute", "Label", @@ -250,11 +259,12 @@ def _parse_attributes(self, components, save_file=True, include_index=True): "Validation Rules", "Component", ] - cols = [col for col in cols if col in df.columns] - df = df[cols] - df = self.convert_string_cols_to_json(df, ["Valid Values"]) - # df.to_csv(os.path.join(csv_output_path, data_type + '.vis_data.csv')) - df_store.append(df) + cols = [col for col in cols if col in data_dict_df.columns] + data_dict_df = data_dict_df[cols] + data_dict_df = self.convert_string_cols_to_json( + data_dict_df, ["Valid Values"] + ) + df_store.append(data_dict_df) merged_attributes_df = pd.concat(df_store, join="outer") cols = [ @@ -271,12 +281,11 @@ def _parse_attributes(self, components, save_file=True, include_index=True): cols = [col for col in cols if col in merged_attributes_df.columns] merged_attributes_df = merged_attributes_df[cols] - if save_file == True: + if save_file: return merged_attributes_df.to_csv( os.path.join( self.output_path, self.schema_name + "attributes_data.vis_data.csv" ), index=include_index, ) - elif save_file == False: - return merged_attributes_df.to_csv(index=include_index) + return merged_attributes_df.to_csv(index=include_index) diff --git a/schematic/visualization/commands.py b/schematic/visualization/commands.py index 5ecc4f8f7..d69354a7b 100644 --- a/schematic/visualization/commands.py +++ b/schematic/visualization/commands.py @@ -1,10 +1,14 @@ -#!/usr/bin/env python3 +"""visualization commands""" +# pylint: disable=unused-argument +# pylint: disable=useless-return +# pylint: disable=unused-variable import logging import sys +from typing import Any import click -import click_log +import click_log # type: ignore from schematic.visualization.attributes_explorer import AttributesExplorer from schematic.visualization.tangled_tree import TangledTree @@ -16,10 +20,11 @@ logger = logging.getLogger(__name__) click_log.basic_config(logger) -CONTEXT_SETTINGS = dict(help_option_names=["--help", "-h"]) # help options +CONTEXT_SETTINGS = {"help_option_names": ["--help", "-h"]} # help options -# invoke_without_command=True -> forces the application not to show aids before losing them with a --h +# invoke_without_command=True -> forces the application not to show aids before +# losing them with a --h @click.group(context_settings=CONTEXT_SETTINGS, invoke_without_command=True) @click_log.simple_verbosity_option(logger) @click.option( @@ -30,17 +35,18 @@ help=query_dict(model_commands, ("model", "config")), ) @click.pass_context -def viz(ctx, config): # use as `schematic model ...` +def viz(ctx: Any, config: str) -> None: # use as `schematic model ...` """ Sub-commands for Visualization methods. """ try: + # pylint: disable=logging-fstring-interpolation logger.debug(f"Loading config file contents in '{config}'") CONFIG.load_config(config) ctx.obj = CONFIG - except ValueError as e: + except ValueError as exc: logger.error("'--config' not provided or environment variable not set.") - logger.exception(e) + logger.exception(exc) sys.exit(1) @@ -49,8 +55,8 @@ def viz(ctx, config): # use as `schematic model ...` ) @click_log.simple_verbosity_option(logger) @click.pass_obj -def get_attributes(ctx): - """ """ +def get_attributes(ctx: Any) -> None: + """Gets attributes""" # Get JSONLD file path path_to_jsonld = CONFIG.model_location log_value_from_config("jsonld", path_to_jsonld) @@ -74,7 +80,7 @@ def get_attributes(ctx): help=query_dict(viz_commands, ("visualization", "tangled_tree", "text_format")), ) @click.pass_obj -def get_tangled_tree_text(ctx, figure_type, text_format): +def get_tangled_tree_text(ctx: Any, figure_type: str, text_format: str) -> None: """Get text to be placed on the tangled tree visualization.""" # Get JSONLD file path path_to_jsonld = CONFIG.model_location @@ -97,7 +103,7 @@ def get_tangled_tree_text(ctx, figure_type, text_format): help=query_dict(viz_commands, ("visualization", "tangled_tree", "figure_type")), ) @click.pass_obj -def get_tangled_tree_component_layers(ctx, figure_type): +def get_tangled_tree_component_layers(ctx: Any, figure_type: str) -> None: """Get the components that belong in each layer of the tangled tree visualization.""" # Get JSONLD file path path_to_jsonld = CONFIG.model_location diff --git a/schematic/visualization/tangled_tree.py b/schematic/visualization/tangled_tree.py index d0de01bf7..eaba44fb1 100644 --- a/schematic/visualization/tangled_tree.py +++ b/schematic/visualization/tangled_tree.py @@ -1,36 +1,31 @@ +"""Tangled tree class""" + + from io import StringIO import json import logging -import networkx as nx -import numpy as np import os from os import path -import pandas as pd +from typing import Optional, Any -# allows specifying explicit variable types -from typing import Any, Dict, Optional, Text, List +import networkx as nx # type: ignore +import numpy as np +import pandas as pd -from schematic.utils.viz_utils import visualize from schematic.visualization.attributes_explorer import AttributesExplorer - from schematic.schemas.data_model_parser import DataModelParser from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer -from schematic.schemas.data_model_relationships import DataModelRelationships - -from schematic import LOADER from schematic.utils.io_utils import load_json -from copy import deepcopy - -# Make sure to have newest version of decorator logger = logging.getLogger(__name__) -# OUTPUT_DATA_DIR = str(Path('tests/data/visualization/AMPAD').resolve()) -# DATA_DIR = str(Path('tests/data').resolve()) -class TangledTree(object): - """ """ +class TangledTree: + """Tangled tree class""" + + # pylint: disable=too-many-instance-attributes + # pylint: disable=invalid-name def __init__( self, @@ -74,7 +69,15 @@ def __init__( self.text_csv_output_path = self.ae.create_output_path("text_csv") self.json_output_path = self.ae.create_output_path("tangled_tree_json") - def strip_double_quotes(self, string): + def strip_double_quotes(self, string: str) -> str: + """Removes double quotes from string + + Args: + string (str): The string to remove quotes from + + Returns: + str: The processed string + """ # Remove double quotes from beginning and end of string. if string.startswith('"') and string.endswith('"'): string = string[1:-1] @@ -83,7 +86,9 @@ def strip_double_quotes(self, string): return string def get_text_for_tangled_tree(self, text_type, save_file=False): - """Gather the text that needs to be either higlighted or plain for the tangled tree visualization. + """ + Gather the text that needs to be either highlighted or plain for the + tangled tree visualization. Args: text_type (str): Choices = ['highlighted', 'plain'], determines the type of text rendering to return. @@ -92,6 +97,7 @@ def get_text_for_tangled_tree(self, text_type, save_file=False): If save_file==True: Saves plain or highlighted text as a CSV (to disk). save_file==False: Returns plain or highlighted text as a csv string. """ + # pylint: disable=too-many-locals # Get nodes in the digraph, many more nodes returned if figure type is dependency cdg = self.dmge.get_digraph_by_edge_type(self.dependency_type) nodes = cdg.nodes() @@ -107,7 +113,7 @@ def get_text_for_tangled_tree(self, text_type, save_file=False): highlighted = [] plain = [] - # For each component node in the tangled tree gather the plain and higlighted text. + # For each component node in the tangled tree gather the plain and highlighted text. for node in component_nodes: # Get the highlighted components based on figure_type if self.figure_type == "component": @@ -117,17 +123,18 @@ def get_text_for_tangled_tree(self, text_type, save_file=False): elif self.figure_type == "dependency": highlight_descendants = [node] - # Format text to be higlighted and gather text to be formated plain. + # Format text to be highlighted and gather text to be formatted plain. if not highlight_descendants: - # If there are no highlighted descendants just highlight the selected node (format for observable.) + # If there are no highlighted descendants just highlight the selected + # node (format for observable.) highlighted.append([node, "id", node]) # Gather all the text as plain text. plain_descendants = [n for n in nodes if n != node] else: - # Format higlighted text for Observable. + # Format highlighted text for Observable. for hd in highlight_descendants: highlighted.append([node, "id", hd]) - # Gather the non-higlighted text as plain text descendants. + # Gather the non-highlighted text as plain text descendants. plain_descendants = [ node for node in nodes if node not in highlight_descendants ] @@ -142,46 +149,47 @@ def get_text_for_tangled_tree(self, text_type, save_file=False): ) # Depending on input either export csv locally to disk or as a string. - if save_file == True: + if save_file: file_name = f"{self.schema_abbr}_{self.figure_type}_{text_type}.csv" df.to_csv(os.path.join(self.text_csv_output_path, file_name)) - return - elif save_file == False: - return df.to_csv() + return None + + return df.to_csv() def get_topological_generations(self): """Gather topological_gen, nodes and edges based on figure type. Outputs: topological_gen (List(list)):list of lists. Indicates layers of nodes. - nodes: (Networkx NodeView) Nodes of the component or dependency graph. When iterated over it functions like a list. - edges: (Networkx EdgeDataView) Edges of component or dependency graph. When iterated over it works like a list of tuples. + nodes: (Networkx NodeView) Nodes of the component or dependency graph. + When iterated over it functions like a list. + edges: (Networkx EdgeDataView) Edges of component or dependency graph. + When iterated over it works like a list of tuples. """ # Get nodes in the digraph digraph = self.dmge.get_digraph_by_edge_type(self.dependency_type) nodes = digraph.nodes() # Get subgraph - # mm_graph = self.sg.se.get_nx_schema() - # subg = self.sg.get_subgraph_by_edge_type(mm_graph, self.dependency_type) - subg = self.dmge.get_subgraph_by_edge_type(self.dependency_type) + subgraph = self.dmge.get_subgraph_by_edge_type(self.dependency_type) # Get edges and topological_gen based on figure type. if self.figure_type == "component": edges = digraph.edges() - topological_gen = list(reversed(list(nx.topological_generations(subg)))) + topological_gen = list(reversed(list(nx.topological_generations(subgraph)))) elif self.figure_type == "dependency": rev_digraph = nx.DiGraph.reverse(digraph) edges = rev_digraph.edges() - topological_gen = list(nx.topological_generations(subg)) + topological_gen = list(nx.topological_generations(subgraph)) - return topological_gen, nodes, edges, subg + return topological_gen, nodes, edges, subgraph def remove_unwanted_characters_from_conditional_statement( self, cond_req: str ) -> str: """Remove unwanted characters from conditional statement - Example of conditional requirement: If File Format IS "BAM" OR "CRAM" OR "CSV/TSV" then Genome Build is required + Example of conditional requirement: If File Format IS "BAM" OR "CRAM" OR + "CSV/TSV" then Genome Build is required Example output: File Format IS "BAM" OR "CRAM" OR "CSV/TSV" """ if "then" in cond_req: @@ -195,11 +203,12 @@ def remove_unwanted_characters_from_conditional_statement( def get_ca_alias(self, conditional_requirements: list) -> dict: """Get the alias for each conditional attribute. - NOTE: Obtaining attributes(attr) and aliases(ali) in this function is specific to how formatting - is set in AttributesExplorer. If that formatting changes, this section - will likely break or in the worst case have a silent error. + NOTE: Obtaining attributes(attr) and aliases(ali) in this function is specific + to how formatting is set in AttributesExplorer. If that formatting changes, + this section will likely break or in the worst case have a silent error. Input: - conditional_requirements_list (list): list of strings of conditional requirements from outputs of AttributesExplorer. + conditional_requirements_list (list): list of strings of conditional + requirements from outputs of AttributesExplorer. Output: ca_alias (dict): key: alias (attribute response) @@ -213,7 +222,7 @@ def get_ca_alias(self, conditional_requirements: list) -> dict: for req in conditional_requirements ] - for i, req in enumerate(conditional_requirements): + for _, req in enumerate(conditional_requirements): if "OR" not in req: attr, ali = req.split(" is ") attr = "".join(attr.split()) @@ -231,7 +240,8 @@ def gather_component_dependency_info(self, cn, attributes_df): """Gather all component dependency information. Inputs: cn: (str) component name - attributes_df: (Pandas DataFrame) Details for all attributes across all components. From AttributesExplorer. + attributes_df: (Pandas DataFrame) Details for all attributes across all components. + From AttributesExplorer. Outputs: conditional_attributes (list): List of conditional attributes for a particular component ca_alias (dict): @@ -253,17 +263,12 @@ def gather_component_dependency_info(self, cn, attributes_df): if "Cond_Req" in attributes_df.columns: conditional_attributes = list( attributes_df[ - (attributes_df["Cond_Req"] == True) - & (attributes_df["Component"] == cn) + (attributes_df["Cond_Req"]) & (attributes_df["Component"] == cn) ]["Label"] ) - ca_df = attributes_df[ - (attributes_df["Cond_Req"] == True) & (attributes_df["Component"] == cn) - ] conditional_requirements = list( attributes_df[ - (attributes_df["Cond_Req"] == True) - & (attributes_df["Component"] == cn) + (attributes_df["Cond_Req"]) & (attributes_df["Component"] == cn) ]["Conditional Requirements"] ) ca_alias = self.get_ca_alias(conditional_requirements) @@ -277,16 +282,21 @@ def gather_component_dependency_info(self, cn, attributes_df): return conditional_attributes, ca_alias, all_attributes - def find_source_nodes(self, nodes, edges, all_attributes=[]): + def find_source_nodes(self, nodes, edges, all_attributes=None): """Find all nodes in the graph that do not have a parent node. Inputs: - nodes: (Networkx NodeView) Nodes of the component or dependency graph. When iterated over it functions like a list. - edges: (Networkx EdgeDataView) Edges of component or dependency graph. When iterated over it works like a list of tuples. - attributes_df: (Pandas DataFrame) Details for all attributes across all components. From AttributesExplorer. + nodes: (Networkx NodeView) Nodes of the component or dependency graph. + When iterated over it functions like a list. + edges: (Networkx EdgeDataView) Edges of component or dependency graph. + When iterated over it works like a list of tuples. + attributes_df: (Pandas DataFrame) Details for all attributes across all + components. From AttributesExplorer. Outputs: source_nodes (list(str)): List of parentless nodes in """ + if all_attributes is None: + all_attributes = [] # Find edges that are not source nodes. not_source = [] for node in nodes: @@ -305,21 +315,25 @@ def find_source_nodes(self, nodes, edges, all_attributes=[]): source_nodes.append(node) return source_nodes - def get_parent_child_dictionary(self, nodes, edges, all_attributes=[]): - """Based on the dependency type, create dictionaries between parent and child and child and parent attributes. + def get_parent_child_dictionary(self, edges, all_attributes=None): + """ + Based on the dependency type, create dictionaries between parent and + child and child and parent attributes. Input: - nodes: (Networkx NodeView) Nodes of the component or dependency graph. edges: (Networkx EdgeDataView (component figure) or List(list) (dependency figure)) Edges of component or dependency graph. all_attributes: Output: child_parents (dict): key: child - value: list of the childs parents + value: list of the child's parents parent_children (dict): key: parent value: list of the parents children """ + # pylint: disable=too-many-branches + if all_attributes is None: + all_attributes = [] child_parents = {} parent_children = {} @@ -327,7 +341,7 @@ def get_parent_child_dictionary(self, nodes, edges, all_attributes=[]): # Construct child_parents dictionary for edge in edges: # Add child as a key - if edge[0] not in child_parents.keys(): + if edge[0] not in child_parents: child_parents[edge[0]] = [] # Add parents to list @@ -336,7 +350,7 @@ def get_parent_child_dictionary(self, nodes, edges, all_attributes=[]): # Construct parent_children dictionary for edge in edges: # Add parent as a key - if edge[1] not in parent_children.keys(): + if edge[1] not in parent_children: parent_children[edge[1]] = [] # Add children to list @@ -348,10 +362,10 @@ def get_parent_child_dictionary(self, nodes, edges, all_attributes=[]): # Check if child is an attribute for the current component if edge[0] in all_attributes: # Add child as a key - if edge[0] not in child_parents.keys(): + if edge[0] not in child_parents: child_parents[edge[0]] = [] - # Add parent to list if it is an attriute for the current component + # Add parent to list if it is an attribute for the current component if edge[1] in all_attributes: child_parents[edge[0]].append(edge[1]) @@ -360,16 +374,16 @@ def get_parent_child_dictionary(self, nodes, edges, all_attributes=[]): # Check if parent is an attribute for the current component if edge[1] in all_attributes: # Add parent as a key - if edge[1] not in parent_children.keys(): + if edge[1] not in parent_children: parent_children[edge[1]] = [] - # Add child to list if it is an attriute for the current component + # Add child to list if it is an attribute for the current component if edge[0] in all_attributes: parent_children[edge[1]].append(edge[0]) return child_parents, parent_children - def alias_edges(self, ca_alias: dict, edges) -> List[list]: + def alias_edges(self, ca_alias: dict, edges) -> list[list]: """Create new edges based on aliasing between an attribute and its response. Purpose: Create aliased edges. @@ -387,16 +401,18 @@ def alias_edges(self, ca_alias: dict, edges) -> List[list]: ca_alias (dict): key: alias (attribute response) value: attribute - edges (Networkx EdgeDataView): Edges of component or dependency graph. When iterated over it works like a list of tuples. + edges (Networkx EdgeDataView): Edges of component or dependency graph. + When iterated over it works like a list of tuples. Output: - aliased_edges (List[lists]) of aliased edges. + aliased_edges (list[list]) of aliased edges. """ aliased_edges = [] - for i, edge in enumerate(edges): + for _, edge in enumerate(edges): # construct one set of edges at a time edge_set = [] - # If the first edge has an alias add alias to the first position in the current edge set + # If the first edge has an alias add alias to the first + # position in the current edge set if edge[0] in ca_alias.keys(): edge_set.append(ca_alias[edge[0]]) @@ -404,7 +420,8 @@ def alias_edges(self, ca_alias: dict, edges) -> List[list]: else: edge_set.append(edge[0]) - # If the secod edge has an alias add alias to the first position in the current edge set + # If the second edge has an alias add alias to the first + # position in the current edge set if edge[1] in ca_alias.keys(): edge_set.append(ca_alias[edge[1]]) @@ -440,13 +457,14 @@ def prune_expand_topological_gen( pruned_topological_gen = [] # For each layer(gen) in the topological generation list - for i, layer in enumerate(topological_gen): + for _, layer in enumerate(topological_gen): current_layer = [] next_layer = [] # For each node in the layer for node in layer: - # If the node is relevant to this component and is not a conditional attribute add it to the current layer. + # If the node is relevant to this component and is not a conditional + # attribute add it to the current layer. if node in all_attributes and node not in conditional_attributes: current_layer.append(node) @@ -462,13 +480,19 @@ def prune_expand_topological_gen( return pruned_topological_gen - def get_base_layers(self, topological_gen, child_parents, source_nodes, cn): + def get_base_layers( + self, + topological_gen: list[list], + child_parents: dict, + source_nodes: list, + cn: str, + ) -> tuple[dict[str, Any], dict[str, Any]]: """ Purpose: Reconfigure topological gen to move things back appropriate layers if they would have a back reference. - The Tangle Tree figure requrires an acyclic directed graph that has additional + The Tangle Tree figure requires an acyclic directed graph that has additional layering rules between connected nodes. - If there is a backward connection then the line connecting them will break (this would suggest a cyclic connection.) @@ -485,12 +509,12 @@ def get_base_layers(self, topological_gen, child_parents, source_nodes, cn): topological_gen: list of lists. Indicates layers of nodes. child_parents (dict): key: child - value: list of the childs parents + value: list of the child's parents source_nodes: list, list of nodes that do not have a parent. cn: str, component name, default='' Output: base_layers: dict, key: component name, value: layer - represents initial layering of toplogical_gen + represents initial layering of topological_gen base_layers_copy_copy: dict, key: component name, value: layer represents the final layering after moving the components/attributes to their desired layer.c @@ -518,7 +542,8 @@ def get_base_layers(self, topological_gen, child_parents, source_nodes, cn): # Get the max layer a parent of the node can be found. max_parent_level = max(parent_levels) - # Move the node one layer beyond the max parent node position, so it will be downstream of its parents. + # Move the node one layer beyond the max parent node position, + # so it will be downstream of its parents. base_layers_copy[node] = max_parent_level + 1 # Make another version of updated positions iterate on further. @@ -541,7 +566,8 @@ def get_base_layers(self, topological_gen, child_parents, source_nodes, cn): # that the connections will not be backwards (and result in a broken line) for par in child_parents[node]: # For a given parent determine if its a source node and that the parents - # are not already at level 0, and the parent is not the current component node. + # are not already at level 0, and the parent is not the current component + # node. if ( par in source_nodes and ( @@ -562,25 +588,30 @@ def get_base_layers(self, topological_gen, child_parents, source_nodes, cn): # Move the node one position downstream of its max parent level. base_layers_copy_copy[node] = max_parent_level + 1 - # For each parental position to modify, move the parents level up to the max_parent_level. + # For each parental position to modify, move the parents level up to + # the max_parent_level. for par in modify_par: base_layers_copy_copy[par] = max_parent_level return base_layers, base_layers_copy_copy def adjust_node_placement( - self, base_layers_copy_copy, base_layers, topological_gen - ): - """Reorder nodes within topological_generations to match how they were ordered in base_layers_copy_copy + self, + base_layers_copy_copy: dict[str, Any], + base_layers: dict[str, Any], + topological_gen: list[list], + ) -> list[list]: + """Reorder nodes within topological_generations to match how they were ordered in + base_layers_copy_copy Input: topological_gen: list of lists. Indicates layers of nodes. base_layers: dict, key: component name, value: layer - represents initial layering of toplogical_gen + represents initial layering of topological_gen base_layers_copy_copy: dict, key: component name, value: layer represents the final layering after moving the components/attributes to their desired layer. Output: - topological_gen: same format but as the incoming topologial_gen but + topological_gen: same format but as the incoming topological_gen but ordered to match base_layers_copy_copy. """ if self.figure_type == "component": @@ -614,38 +645,48 @@ def adjust_node_placement( topological_gen[base_layers[node]].remove(node) return topological_gen - def move_source_nodes_to_bottom_of_layer(self, node_layers, source_nodes): + def move_source_nodes_to_bottom_of_layer( + self, node_layers: list[list], source_nodes: list + ) -> list[list]: """For aesthetic purposes move source nodes to the bottom of their respective layers. Input: - node_layers (List(list)): Lists of lists of each layer and the nodes contained in that layer as strings. + node_layers (List(list)): Lists of lists of each layer and the nodes contained + in that layer as strings. source_nodes (list): list of nodes that do not have a parent. Output: node_layers (List(list)): modified to move source nodes to the bottom of each layer. """ - for i, layer in enumerate(node_layers): + for _, layer in enumerate(node_layers): nodes_to_move = [] for node in layer: if node in source_nodes: nodes_to_move.append(node) for node in nodes_to_move: - node_layers[i].remove(node) - node_layers[i].append(node) + layer.remove(node) + layer.append(node) return node_layers def get_layers_dict_list( - self, node_layers, child_parents, parent_children, all_parent_children + self, + node_layers: list[list], + child_parents: dict, + parent_children: dict, + all_parent_children: dict, ): - """Convert node_layers to a list of lists of dictionaries that specifies each node and its parents (if applicable). + """Convert node_layers to a list of lists of dictionaries that specifies each node and + its parents (if applicable). Inputs: - node_layers: list of lists of each layer and the nodes contained in that layer as strings. + node_layers: list of lists of each layer and the nodes contained in that layer + as strings. child_parents (dict): key: child - value: list of the childs parents + value: list of the child's parents parent_children (dict): key: parent value: list of the parents children Outputs: - layers_list (List(list): list of lists of dictionaries that specifies each node and its parents (if applicable) + layers_list (List(list): list of lists of dictionaries that specifies each node and its + parents (if applicable) """ num_layers = len(node_layers) layers_list = [[] for i in range(0, num_layers)] @@ -676,28 +717,29 @@ def get_layers_dict_list( return layers_list - def get_node_layers_json( + def get_node_layers_json( # pylint: disable=too-many-arguments self, - topological_gen, - source_nodes, - child_parents, - parent_children, - cn="", - all_parent_children=None, - ): + topological_gen: list[list], + source_nodes: list[str], + child_parents: dict, + parent_children: dict, + cn: str = "", + all_parent_children: Optional[dict] = None, + ) -> str: """Return all the layers of a single tangled tree as a JSON String. Inputs: topological_gen:list of lists. Indicates layers of nodes. source_nodes: list of nodes that do not have a parent. child_parents (dict): key: child - value: list of the childs parents + value: list of the child's parents parent_children (dict): key: parent value: list of the parents children all_parent_children (dict): key: parent - value: list of the parents children (including all downstream nodes). Default to an empty dictionary + value: list of the parents children (including all downstream nodes). + Default to an empty dictionary Outputs: layers_json (JSON String): Layers of nodes in the tangled tree as a json string. """ @@ -719,7 +761,7 @@ def get_node_layers_json( # Convert layers to a list of dictionaries if not all_parent_children: # default to an empty dictionary - all_parent_children = dict() + all_parent_children = {} layers_dicts = self.get_layers_dict_list( node_layers, child_parents, parent_children, all_parent_children @@ -730,23 +772,30 @@ def get_node_layers_json( return layers_json - def save_outputs(self, save_file, layers_json, cn="", all_layers=None): + def save_outputs( + self, + save_file: bool, + layers_json, + cn: str = "", + all_layers: Optional[list[str]] = None, + ): """ Inputs: save_file (bool): Indicates whether to save a file locally or not.: layers_json (JSON String): Layers of nodes in the tangled tree as a json string. cn (str): component name, default='' - all_layers (list of json strings): Each string represents contains the layers for a single tangled tree. - If a dependency figure the list is added to each time this function is called, so starts incomplete. - default=[]. + all_layers (list of json strings): Each string represents contains the layers for + a single tangled tree. If a dependency figure the list is added to each time + this function is called, so starts incomplete. default=[]. Outputs: all_layers (list of json strings): - If save_file == False: Each string represents contains the layers for a single tangled tree. + If save_file == False: Each string represents contains the layers for a single + tangled tree. If save_file ==True: is an empty list. """ if all_layers is None: all_layers = [] - if save_file == True: + if save_file: if cn: output_file_name = ( f"{self.schema_abbr}_{self.figure_type}_{cn}_tangled_tree.json" @@ -756,24 +805,34 @@ def save_outputs(self, save_file, layers_json, cn="", all_layers=None): f"{self.schema_abbr}_{self.figure_type}_tangled_tree.json" ) with open( - os.path.join(self.json_output_path, output_file_name), "w" + os.path.join(self.json_output_path, output_file_name), + mode="w", + encoding="utf-8", ) as outfile: outfile.write(layers_json) + + # pylint: disable=logging-fstring-interpolation logger.info( - f"Tangled Tree JSON String saved to {os.path.join(self.json_output_path, output_file_name)}." + ( + "Tangled Tree JSON String saved to " + f"{os.path.join(self.json_output_path, output_file_name)}" + ) ) all_layers = layers_json - elif save_file == False: + else: all_layers.append(layers_json) return all_layers - def get_ancestors_nodes(self, subgraph, components): + def get_ancestors_nodes( + self, subgraph: nx.DiGraph, components: list[str] + ) -> dict[str, list[str]]: """ Inputs: subgraph: networkX graph object components: a list of nodes outputs: - all_parent_children: a dictionary that indicates a list of children (including all the intermediate children) of a given node + all_parent_children: a dictionary that indicates a list of children + (including all the intermediate children) of a given node """ all_parent_children = {} for component in components: @@ -784,35 +843,37 @@ def get_ancestors_nodes(self, subgraph, components): return all_parent_children - def get_tangled_tree_layers(self, save_file=True): + def get_tangled_tree_layers(self, save_file: bool = True): """Based on user indicated figure type, construct the layers of nodes of a tangled tree. Inputs: save_file (bool): Indicates whether to save a file locally or not. Outputs: all_layers (list of json strings): - If save_file == False: Each string represents contains the layers for a single tangled tree. + If save_file == False: Each string represents contains the layers + for a single tangled tree. If save_file ==True: is an empty list. Note on Dependency Tangled Tree: - If there are many conditional requirements associated with a depependency, and those + If there are many conditional requirements associated with a dependency, and those conditional requirements have overlapping attributes associated with them the tangled tree will only report one """ + # pylint: disable=too-many-locals # Gather the data model's, topological generations, nodes and edges - topological_gen, nodes, edges, subg = self.get_topological_generations() + topological_gen, nodes, edges, subgraph = self.get_topological_generations() if self.figure_type == "component": # Gather all source nodes source_nodes = self.find_source_nodes(nodes, edges) # Map all children to their parents and vice versa - child_parents, parent_children = self.get_parent_child_dictionary( - nodes, edges - ) + child_parents, parent_children = self.get_parent_child_dictionary(edges) # find all the downstream nodes - all_parent_children = self.get_ancestors_nodes(subg, parent_children.keys()) + all_parent_children = self.get_ancestors_nodes( + subgraph, parent_children.keys() + ) # Get the layers that each node belongs to. layers_json = self.get_node_layers_json( @@ -855,7 +916,7 @@ def get_tangled_tree_layers(self, save_file=True): # Gather relationships between children and their parents. child_parents, parent_children = self.get_parent_child_dictionary( - nodes, aliased_edges, all_attributes + aliased_edges, all_attributes ) # Remake topological_gen so it has only relevant nodes. From 7bf62b3fa4aea0943f26fcede5ea0ae0d2498568 Mon Sep 17 00:00:00 2001 From: andrewelamb Date: Mon, 5 Feb 2024 07:35:55 -0800 Subject: [PATCH 2/7] added more typing --- schematic/visualization/tangled_tree.py | 67 ++++++++++++++++--------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/schematic/visualization/tangled_tree.py b/schematic/visualization/tangled_tree.py index eaba44fb1..cb0649a52 100644 --- a/schematic/visualization/tangled_tree.py +++ b/schematic/visualization/tangled_tree.py @@ -6,9 +6,10 @@ import logging import os from os import path -from typing import Optional, Any +from typing import Optional, Any, Literal import networkx as nx # type: ignore +from networkx.classes.reportviews import NodeView, EdgeDataView # type: ignore import numpy as np import pandas as pd @@ -85,7 +86,9 @@ def strip_double_quotes(self, string: str) -> str: string = "".join(string.split()) return string - def get_text_for_tangled_tree(self, text_type, save_file=False): + def get_text_for_tangled_tree( + self, text_type: Literal["highlighted", "plain"], save_file: bool = False + ) -> Optional[str]: """ Gather the text that needs to be either highlighted or plain for the tangled tree visualization. @@ -156,7 +159,9 @@ def get_text_for_tangled_tree(self, text_type, save_file=False): return df.to_csv() - def get_topological_generations(self): + def get_topological_generations( + self, + ) -> tuple[list[list], NodeView, EdgeDataView, nx.DiGraph]: """Gather topological_gen, nodes and edges based on figure type. Outputs: topological_gen (List(list)):list of lists. Indicates layers of nodes. @@ -200,7 +205,7 @@ def remove_unwanted_characters_from_conditional_statement( cond_req = cond_req_new.replace("If", "").lstrip().rstrip() return cond_req - def get_ca_alias(self, conditional_requirements: list) -> dict: + def get_ca_alias(self, conditional_requirements: list) -> dict[str, str]: """Get the alias for each conditional attribute. NOTE: Obtaining attributes(attr) and aliases(ali) in this function is specific @@ -236,7 +241,9 @@ def get_ca_alias(self, conditional_requirements: list) -> dict: ca_alias[elem] = attr return ca_alias - def gather_component_dependency_info(self, cn, attributes_df): + def gather_component_dependency_info( + self, cn: str, attributes_df: pd.DataFrame + ) -> tuple[list[str], dict[str, str], list[str]]: """Gather all component dependency information. Inputs: cn: (str) component name @@ -282,7 +289,12 @@ def gather_component_dependency_info(self, cn, attributes_df): return conditional_attributes, ca_alias, all_attributes - def find_source_nodes(self, nodes, edges, all_attributes=None): + def find_source_nodes( + self, + nodes: NodeView, + edges: EdgeDataView, + all_attributes: Optional[list[str]] = None, + ) -> list[str]: """Find all nodes in the graph that do not have a parent node. Inputs: nodes: (Networkx NodeView) Nodes of the component or dependency graph. @@ -315,7 +327,9 @@ def find_source_nodes(self, nodes, edges, all_attributes=None): source_nodes.append(node) return source_nodes - def get_parent_child_dictionary(self, edges, all_attributes=None): + def get_parent_child_dictionary( + self, edges: EdgeDataView, all_attributes: Optional[list[str]] = None + ) -> tuple[dict[str, list[str]], dict[str, list[str]]]: """ Based on the dependency type, create dictionaries between parent and child and child and parent attributes. @@ -332,10 +346,9 @@ def get_parent_child_dictionary(self, edges, all_attributes=None): value: list of the parents children """ # pylint: disable=too-many-branches - if all_attributes is None: - all_attributes = [] - child_parents = {} - parent_children = {} + all_attributes_list = [] if all_attributes is None else all_attributes + child_parents: dict[str, list[str]] = {} + parent_children: dict[str, list[str]] = {} if self.dependency_type == "requiresComponent": # Construct child_parents dictionary @@ -360,30 +373,30 @@ def get_parent_child_dictionary(self, edges, all_attributes=None): # Construct child_parents dictionary for edge in edges: # Check if child is an attribute for the current component - if edge[0] in all_attributes: + if edge[0] in all_attributes_list: # Add child as a key if edge[0] not in child_parents: child_parents[edge[0]] = [] # Add parent to list if it is an attribute for the current component - if edge[1] in all_attributes: + if edge[1] in all_attributes_list: child_parents[edge[0]].append(edge[1]) # Construct parent_children dictionary for edge in edges: # Check if parent is an attribute for the current component - if edge[1] in all_attributes: + if edge[1] in all_attributes_list: # Add parent as a key if edge[1] not in parent_children: parent_children[edge[1]] = [] # Add child to list if it is an attribute for the current component - if edge[0] in all_attributes: + if edge[0] in all_attributes_list: parent_children[edge[1]].append(edge[0]) return child_parents, parent_children - def alias_edges(self, ca_alias: dict, edges) -> list[list]: + def alias_edges(self, ca_alias: dict[str, str], edges: EdgeDataView) -> list[list]: """Create new edges based on aliasing between an attribute and its response. Purpose: Create aliased edges. @@ -435,8 +448,11 @@ def alias_edges(self, ca_alias: dict, edges) -> list[list]: return aliased_edges def prune_expand_topological_gen( - self, topological_gen, all_attributes, conditional_attributes - ): + self, + topological_gen: list[list[str]], + all_attributes: list[str], + conditional_attributes: list[str], + ) -> list[list[str]]: """ Purpose: Remake topological_gen with only relevant nodes. @@ -672,7 +688,7 @@ def get_layers_dict_list( child_parents: dict, parent_children: dict, all_parent_children: dict, - ): + ) -> list[list[dict[str, list[str]]]]: """Convert node_layers to a list of lists of dictionaries that specifies each node and its parents (if applicable). Inputs: @@ -689,7 +705,9 @@ def get_layers_dict_list( parents (if applicable) """ num_layers = len(node_layers) - layers_list = [[] for i in range(0, num_layers)] + layers_list: list[list[dict[str, list[str]]]] = [ + [] for i in range(0, num_layers) + ] for i, layer in enumerate(node_layers): for node in layer: if node in child_parents.keys(): @@ -793,8 +811,7 @@ def save_outputs( tangled tree. If save_file ==True: is an empty list. """ - if all_layers is None: - all_layers = [] + all_layers_list = [] if all_layers is None else all_layers if save_file: if cn: output_file_name = ( @@ -818,10 +835,10 @@ def save_outputs( f"{os.path.join(self.json_output_path, output_file_name)}" ) ) - all_layers = layers_json + all_layers_list = layers_json else: - all_layers.append(layers_json) - return all_layers + all_layers_list.append(layers_json) + return all_layers_list def get_ancestors_nodes( self, subgraph: nx.DiGraph, components: list[str] From 11e547f1dc4f0bde20166cd29610a3624ff8ab94 Mon Sep 17 00:00:00 2001 From: andrewelamb Date: Mon, 5 Feb 2024 07:36:37 -0800 Subject: [PATCH 3/7] added more typing --- schematic/visualization/tangled_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schematic/visualization/tangled_tree.py b/schematic/visualization/tangled_tree.py index cb0649a52..ccda4dedc 100644 --- a/schematic/visualization/tangled_tree.py +++ b/schematic/visualization/tangled_tree.py @@ -796,7 +796,7 @@ def save_outputs( layers_json, cn: str = "", all_layers: Optional[list[str]] = None, - ): + ) -> list[str]: """ Inputs: save_file (bool): Indicates whether to save a file locally or not.: From 8b2b33815144e61d681d5326ee130326d3bb0693 Mon Sep 17 00:00:00 2001 From: andrewelamb Date: Mon, 5 Feb 2024 08:30:17 -0800 Subject: [PATCH 4/7] moved some pylint disbale lines around --- schematic/visualization/commands.py | 2 +- schematic/visualization/tangled_tree.py | 74 ++++++++++++++----------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/schematic/visualization/commands.py b/schematic/visualization/commands.py index d69354a7b..4ed31595f 100644 --- a/schematic/visualization/commands.py +++ b/schematic/visualization/commands.py @@ -2,6 +2,7 @@ # pylint: disable=unused-argument # pylint: disable=useless-return # pylint: disable=unused-variable +# pylint: disable=logging-fstring-interpolation import logging import sys @@ -40,7 +41,6 @@ def viz(ctx: Any, config: str) -> None: # use as `schematic model ...` Sub-commands for Visualization methods. """ try: - # pylint: disable=logging-fstring-interpolation logger.debug(f"Loading config file contents in '{config}'") CONFIG.load_config(config) ctx.obj = CONFIG diff --git a/schematic/visualization/tangled_tree.py b/schematic/visualization/tangled_tree.py index ccda4dedc..9c14cb3bb 100644 --- a/schematic/visualization/tangled_tree.py +++ b/schematic/visualization/tangled_tree.py @@ -1,5 +1,7 @@ """Tangled tree class""" +# pylint: disable=logging-fstring-interpolation +# pylint: disable=too-many-instance-attributes from io import StringIO import json @@ -25,9 +27,6 @@ class TangledTree: """Tangled tree class""" - # pylint: disable=too-many-instance-attributes - # pylint: disable=invalid-name - def __init__( self, path_to_json_ld: str, @@ -64,11 +63,15 @@ def __init__( self.schema_abbr = self.schema_name.split("_")[0] # Initialize AttributesExplorer - self.ae = AttributesExplorer(self.path_to_json_ld) + self.attributes_explorer = AttributesExplorer(self.path_to_json_ld) # Create output paths. - self.text_csv_output_path = self.ae.create_output_path("text_csv") - self.json_output_path = self.ae.create_output_path("tangled_tree_json") + self.text_csv_output_path = self.attributes_explorer.create_output_path( + "text_csv" + ) + self.json_output_path = self.attributes_explorer.create_output_path( + "tangled_tree_json" + ) def strip_double_quotes(self, string: str) -> str: """Removes double quotes from string @@ -135,29 +138,29 @@ def get_text_for_tangled_tree( plain_descendants = [n for n in nodes if n != node] else: # Format highlighted text for Observable. - for hd in highlight_descendants: - highlighted.append([node, "id", hd]) + for descendant in highlight_descendants: + highlighted.append([node, "id", descendant]) # Gather the non-highlighted text as plain text descendants. plain_descendants = [ node for node in nodes if node not in highlight_descendants ] # Format all the plain text for observable. - for nd in plain_descendants: - plain.append([node, "id", nd]) + for descendant in plain_descendants: + plain.append([node, "id", descendant]) # Prepare df depending on what type of text we need. - df = pd.DataFrame( + dataframe = pd.DataFrame( locals()[text_type.lower()], columns=["Component", "type", "name"] ) # Depending on input either export csv locally to disk or as a string. if save_file: file_name = f"{self.schema_abbr}_{self.figure_type}_{text_type}.csv" - df.to_csv(os.path.join(self.text_csv_output_path, file_name)) + dataframe.to_csv(os.path.join(self.text_csv_output_path, file_name)) return None - return df.to_csv() + return dataframe.to_csv() def get_topological_generations( self, @@ -242,11 +245,11 @@ def get_ca_alias(self, conditional_requirements: list) -> dict[str, str]: return ca_alias def gather_component_dependency_info( - self, cn: str, attributes_df: pd.DataFrame + self, component_name: str, attributes_df: pd.DataFrame ) -> tuple[list[str], dict[str, str], list[str]]: """Gather all component dependency information. Inputs: - cn: (str) component name + component name: (str) component name attributes_df: (Pandas DataFrame) Details for all attributes across all components. From AttributesExplorer. Outputs: @@ -259,7 +262,7 @@ def gather_component_dependency_info( # Gather all component dependency information component_attributes = self.dmge.get_descendants_by_edge_type( - cn, self.dependency_type, connected=True + component_name, self.dependency_type, connected=True ) # Dont want to display `Component` in the figure so remove @@ -270,12 +273,14 @@ def gather_component_dependency_info( if "Cond_Req" in attributes_df.columns: conditional_attributes = list( attributes_df[ - (attributes_df["Cond_Req"]) & (attributes_df["Component"] == cn) + (attributes_df["Cond_Req"]) + & (attributes_df["Component"] == component_name) ]["Label"] ) conditional_requirements = list( attributes_df[ - (attributes_df["Cond_Req"]) & (attributes_df["Component"] == cn) + (attributes_df["Cond_Req"]) + & (attributes_df["Component"] == component_name) ]["Conditional Requirements"] ) ca_alias = self.get_ca_alias(conditional_requirements) @@ -501,7 +506,7 @@ def get_base_layers( topological_gen: list[list], child_parents: dict, source_nodes: list, - cn: str, + component_name: str, ) -> tuple[dict[str, Any], dict[str, Any]]: """ Purpose: @@ -527,7 +532,7 @@ def get_base_layers( key: child value: list of the child's parents source_nodes: list, list of nodes that do not have a parent. - cn: str, component name, default='' + component_name: str, component name, default='' Output: base_layers: dict, key: component name, value: layer represents initial layering of topological_gen @@ -590,7 +595,7 @@ def get_base_layers( parent_levels.count(parent_levels[0]) != len(parent_levels) ) - and par != cn + and par != component_name ): # If so, remove its position from parent_levels parent_levels.remove(base_layers_copy[par]) @@ -741,7 +746,7 @@ def get_node_layers_json( # pylint: disable=too-many-arguments source_nodes: list[str], child_parents: dict, parent_children: dict, - cn: str = "", + component_name: str = "", all_parent_children: Optional[dict] = None, ) -> str: """Return all the layers of a single tangled tree as a JSON String. @@ -763,7 +768,7 @@ def get_node_layers_json( # pylint: disable=too-many-arguments """ base_layers, base_layers_copy_copy = self.get_base_layers( - topological_gen, child_parents, source_nodes, cn + topological_gen, child_parents, source_nodes, component_name ) # Rearrange node_layers to follow the pattern laid out in component layers. @@ -794,14 +799,14 @@ def save_outputs( self, save_file: bool, layers_json, - cn: str = "", + component_name: str = "", all_layers: Optional[list[str]] = None, ) -> list[str]: """ Inputs: save_file (bool): Indicates whether to save a file locally or not.: layers_json (JSON String): Layers of nodes in the tangled tree as a json string. - cn (str): component name, default='' + component_name (str): component name, default='' all_layers (list of json strings): Each string represents contains the layers for a single tangled tree. If a dependency figure the list is added to each time this function is called, so starts incomplete. default=[]. @@ -813,9 +818,9 @@ def save_outputs( """ all_layers_list = [] if all_layers is None else all_layers if save_file: - if cn: + if component_name: output_file_name = ( - f"{self.schema_abbr}_{self.figure_type}_{cn}_tangled_tree.json" + f"{self.schema_abbr}_{self.figure_type}_{component_name}_tangled_tree.json" ) else: output_file_name = ( @@ -828,7 +833,6 @@ def save_outputs( ) as outfile: outfile.write(layers_json) - # pylint: disable=logging-fstring-interpolation logger.info( ( "Tangled Tree JSON String saved to " @@ -910,17 +914,19 @@ def get_tangled_tree_layers(self, save_file: bool = True): component_nodes = component_dg.nodes() # Get table of attributes. - attributes_csv_str = self.ae.parse_attributes(save_file=False) + attributes_csv_str = self.attributes_explorer.parse_attributes( + save_file=False + ) attributes_df = pd.read_table(StringIO(attributes_csv_str), sep=",") all_layers = [] - for cn in component_nodes: + for component_name in component_nodes: # Gather attribute and dependency information per node ( conditional_attributes, ca_alias, all_attributes, - ) = self.gather_component_dependency_info(cn, attributes_df) + ) = self.gather_component_dependency_info(component_name, attributes_df) # Gather all source nodes source_nodes = self.find_source_nodes( @@ -947,9 +953,11 @@ def get_tangled_tree_layers(self, save_file: bool = True): source_nodes, child_parents, parent_children, - cn, + component_name, ) # If indicated save outputs locally else, gather all layers. - all_layers = self.save_outputs(save_file, layers_json, cn, all_layers) + all_layers = self.save_outputs( + save_file, layers_json, component_name, all_layers + ) return all_layers From ca57ec6c720563527d980c35eaa5ce2ff4f99d55 Mon Sep 17 00:00:00 2001 From: andrewelamb Date: Mon, 5 Feb 2024 08:34:38 -0800 Subject: [PATCH 5/7] fix some linting --- schematic/visualization/tangled_tree.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schematic/visualization/tangled_tree.py b/schematic/visualization/tangled_tree.py index 9c14cb3bb..ed4db2b8c 100644 --- a/schematic/visualization/tangled_tree.py +++ b/schematic/visualization/tangled_tree.py @@ -1,7 +1,6 @@ """Tangled tree class""" # pylint: disable=logging-fstring-interpolation -# pylint: disable=too-many-instance-attributes from io import StringIO import json @@ -24,7 +23,7 @@ logger = logging.getLogger(__name__) -class TangledTree: +class TangledTree: # pylint: disable=too-many-instance-attributes """Tangled tree class""" def __init__( @@ -820,7 +819,8 @@ def save_outputs( if save_file: if component_name: output_file_name = ( - f"{self.schema_abbr}_{self.figure_type}_{component_name}_tangled_tree.json" + f"{self.schema_abbr}_{self.figure_type}_" + f"{component_name}_tangled_tree.json" ) else: output_file_name = ( From d736f067d13fb0248f9a395ce3764efb624228b5 Mon Sep 17 00:00:00 2001 From: andrewelamb Date: Wed, 7 Feb 2024 16:29:11 -0800 Subject: [PATCH 6/7] mialys suggestions and fixes --- schematic/visualization/attributes_explorer.py | 4 ++-- schematic/visualization/tangled_tree.py | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/schematic/visualization/attributes_explorer.py b/schematic/visualization/attributes_explorer.py index 71747f999..ff710952e 100644 --- a/schematic/visualization/attributes_explorer.py +++ b/schematic/visualization/attributes_explorer.py @@ -162,8 +162,8 @@ def _parse_attributes( # Gather all attributes, their valid values and requirements for key, value in json_schema["properties"].items(): data_dict[key] = {} - for k, _ in value.items(): - if k == "enum": + for key in value.keys(): + if key == "enum": data_dict[key]["Valid Values"] = value["enum"] if key in json_schema["required"]: data_dict[key]["Required"] = True diff --git a/schematic/visualization/tangled_tree.py b/schematic/visualization/tangled_tree.py index ed4db2b8c..a297aa400 100644 --- a/schematic/visualization/tangled_tree.py +++ b/schematic/visualization/tangled_tree.py @@ -229,7 +229,7 @@ def get_ca_alias(self, conditional_requirements: list) -> dict[str, str]: for req in conditional_requirements ] - for _, req in enumerate(conditional_requirements): + for req in conditional_requirements: if "OR" not in req: attr, ali = req.split(" is ") attr = "".join(attr.split()) @@ -424,7 +424,7 @@ def alias_edges(self, ca_alias: dict[str, str], edges: EdgeDataView) -> list[lis aliased_edges (list[list]) of aliased edges. """ aliased_edges = [] - for _, edge in enumerate(edges): + for edge in edges: # construct one set of edges at a time edge_set = [] @@ -477,7 +477,7 @@ def prune_expand_topological_gen( pruned_topological_gen = [] # For each layer(gen) in the topological generation list - for _, layer in enumerate(topological_gen): + for layer in topological_gen: current_layer = [] next_layer = [] @@ -676,7 +676,7 @@ def move_source_nodes_to_bottom_of_layer( Output: node_layers (List(list)): modified to move source nodes to the bottom of each layer. """ - for _, layer in enumerate(node_layers): + for layer in node_layers: nodes_to_move = [] for node in layer: if node in source_nodes: @@ -889,7 +889,9 @@ def get_tangled_tree_layers(self, save_file: bool = True): source_nodes = self.find_source_nodes(nodes, edges) # Map all children to their parents and vice versa - child_parents, parent_children = self.get_parent_child_dictionary(edges) + child_parents, parent_children = self.get_parent_child_dictionary( + edges=edges + ) # find all the downstream nodes all_parent_children = self.get_ancestors_nodes( From 60ac14d91ffbff9818bcda1068553657554f5899 Mon Sep 17 00:00:00 2001 From: andrewelamb Date: Wed, 7 Feb 2024 17:38:13 -0800 Subject: [PATCH 7/7] fix accidental name collision of dict keys --- schematic/visualization/attributes_explorer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schematic/visualization/attributes_explorer.py b/schematic/visualization/attributes_explorer.py index ff710952e..f2dac9736 100644 --- a/schematic/visualization/attributes_explorer.py +++ b/schematic/visualization/attributes_explorer.py @@ -162,8 +162,8 @@ def _parse_attributes( # Gather all attributes, their valid values and requirements for key, value in json_schema["properties"].items(): data_dict[key] = {} - for key in value.keys(): - if key == "enum": + for inner_key in value.keys(): + if inner_key == "enum": data_dict[key]["Valid Values"] = value["enum"] if key in json_schema["required"]: data_dict[key]["Required"] = True