Skip to content

Commit

Permalink
Merge pull request #1359 from Sage-Bionetworks/feature-fds-367-pytlin…
Browse files Browse the repository at this point in the history
…t-visualization-module

linted visualization module
  • Loading branch information
andrewelamb authored Feb 8, 2024
2 parents fd14fc6 + 60ac14d commit eb32285
Show file tree
Hide file tree
Showing 5 changed files with 335 additions and 231 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ jobs:
run: |
# ran only on certain files for now
# add here when checked
poetry run pylint schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py
poetry run pylint schematic/visualization/* schematic/configuration/*.py schematic/exceptions.py schematic/help.py schematic/loader.py schematic/version.py
#----------------------------------------------
# run test suite
Expand Down
1 change: 1 addition & 0 deletions schematic/visualization/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
"""visualization imports"""
from schematic.visualization.attributes_explorer import AttributesExplorer
from schematic.visualization.tangled_tree import TangledTree
119 changes: 64 additions & 55 deletions schematic/visualization/attributes_explorer.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import gc
"""Attributes Explorer Class"""
import json
import logging
import numpy as np
import os

import numpy as np
import pandas as pd
from typing import Any, Dict, Optional, Text, List

from schematic.schemas.data_model_parser import DataModelParser
from schematic.schemas.data_model_graph import DataModelGraph, DataModelGraphExplorer
from schematic.schemas.data_model_json_schema import DataModelJSONSchema

from schematic.utils.io_utils import load_json

logger = logging.getLogger(__name__)


class AttributesExplorer:
"""AttributesExplorer class"""

def __init__(
self,
path_to_jsonld: str,
Expand Down Expand Up @@ -46,7 +47,7 @@ def __init__(

self.output_path = self.create_output_path("merged_csv")

def create_output_path(self, terminal_folder):
def create_output_path(self, terminal_folder: str) -> str:
"""Create output path to store Observable visualization data if it does not already exist.
Args: self.path_to_jsonld
Expand All @@ -62,20 +63,22 @@ def create_output_path(self, terminal_folder):
os.makedirs(output_path)
return output_path

def convert_string_cols_to_json(self, df: pd.DataFrame, cols_to_modify: list):
def convert_string_cols_to_json(
self, dataframe: pd.DataFrame, cols_to_modify: list[str]
) -> pd.DataFrame:
"""Converts values in a column from strings to JSON list
for upload to Synapse.
"""
for col in df.columns:
for col in dataframe.columns:
if col in cols_to_modify:
df[col] = df[col].apply(
dataframe[col] = dataframe[col].apply(
lambda x: json.dumps([y.strip() for y in x])
if x != "NaN" and x and x == np.nan
else x
)
return df
return dataframe

def parse_attributes(self, save_file=True):
def parse_attributes(self, save_file: bool = True) -> pd.DataFrame:
"""
Args: save_file (bool):
True: merged_df is saved locally to output_path.
Expand All @@ -92,13 +95,13 @@ def parse_attributes(self, save_file=True):
component_dg = self.dmge.get_digraph_by_edge_type("requiresComponent")
components = component_dg.nodes()

# For each data type to be loaded gather all attribtes the user would
# For each data type to be loaded gather all attributes the user would
# have to provide.
return self._parse_attributes(components, save_file)

def parse_component_attributes(
self, component=None, save_file=True, include_index=True
):
self, component=None, save_file: bool = True, include_index: bool = True
) -> pd.DataFrame:
"""
Args: save_file (bool):
True: merged_df is saved locally to output_path.
Expand All @@ -115,10 +118,11 @@ def parse_component_attributes(

if not component:
raise ValueError("You must provide a component to visualize.")
else:
return self._parse_attributes([component], save_file, include_index)
return self._parse_attributes([component], save_file, include_index)

def _parse_attributes(self, components, save_file=True, include_index=True):
def _parse_attributes(
self, components: list, save_file=True, include_index=True
) -> pd.DataFrame:
"""
Args: save_file (bool):
True: merged_df is saved locally to output_path.
Expand All @@ -138,23 +142,28 @@ def _parse_attributes(self, components, save_file=True, include_index=True):
If unable hits an error while attempting to get conditional requirements.
This error is likely to be found if there is a mismatch in naming.
"""
# This function needs to be refactored, temporarily disabling some pylint errors
# pylint: disable=too-many-locals
# pylint: disable=too-many-nested-blocks
# pylint: disable=too-many-branches
# pylint: disable=too-many-statements

# For each data type to be loaded gather all attribtes the user would
# For each data type to be loaded gather all attributes the user would
# have to provide.
df_store = []
for component in components:
data_dict = {}
data_dict: dict = {}

# get the json schema
json_schema = self.data_model_js.get_json_validation_schema(
source_node=component, schema_name=self.path_to_jsonld
)

# Gather all attribues, their valid values and requirements
# Gather all attributes, their valid values and requirements
for key, value in json_schema["properties"].items():
data_dict[key] = {}
for k, v in value.items():
if k == "enum":
for inner_key in value.keys():
if inner_key == "enum":
data_dict[key]["Valid Values"] = value["enum"]
if key in json_schema["required"]:
data_dict[key]["Required"] = True
Expand All @@ -163,20 +172,20 @@ def _parse_attributes(self, components, save_file=True, include_index=True):
data_dict[key]["Component"] = component
# Add additional details per key (from the JSON-ld)
for dic in self.jsonld["@graph"]:
if "sms:displayName" in dic.keys():
if "sms:displayName" in dic:
key = dic["sms:displayName"]
if key in data_dict.keys():
if key in data_dict:
data_dict[key]["Attribute"] = dic["sms:displayName"]
data_dict[key]["Label"] = dic["rdfs:label"]
data_dict[key]["Description"] = dic["rdfs:comment"]
if "validationRules" in dic.keys():
data_dict[key]["Validation Rules"] = dic["validationRules"]
# Find conditional dependencies
if "allOf" in json_schema.keys():
if "allOf" in json_schema:
for conditional_dependencies in json_schema["allOf"]:
key = list(conditional_dependencies["then"]["properties"])[0]
try:
if key in data_dict.keys():
if key in data_dict:
if "Cond_Req" not in data_dict[key].keys():
data_dict[key]["Cond_Req"] = []
data_dict[key]["Conditional Requirements"] = []
Expand All @@ -186,11 +195,12 @@ def _parse_attributes(self, components, save_file=True, include_index=True):
value = conditional_dependencies["if"]["properties"][
attribute
]["enum"]
# Capitalize attribute if it begins with a lowercase letter, for aesthetics.
# Capitalize attribute if it begins with a lowercase
# letter, for aesthetics.
if attribute[0].islower():
attribute = attribute.capitalize()

# Remove "Type" (i.e. turn "Biospecimen Type" to "Biospcimen")
# Remove "Type" (i.e. turn "Biospecimen Type" to "Biospecimen")
if "Type" in attribute:
attribute = attribute.split(" ")[0]

Expand All @@ -207,38 +217,37 @@ def _parse_attributes(self, components, save_file=True, include_index=True):
data_dict[key]["Conditional Requirements"].extend(
[conditional_statement]
)
except:
except Exception as exc:
raise ValueError(
f"There is an error getting conditional requirements related "
"to the attribute: {key}. The error is likely caused by naming inconsistencies (e.g. uppercase, camelcase, ...)"
)

for key, value in data_dict.items():
if "Conditional Requirements" in value.keys():
(
"There is an error getting conditional requirements related "
f"to the attribute: {key}. The error is likely caused by naming "
"inconsistencies (e.g. uppercase, camelcase, ...)"
)
) from exc

for outer_dict_key, inner_dict in data_dict.items():
if "Conditional Requirements" in inner_dict.keys():
## reformat conditional requirement
conditional_requirements = inner_dict["Conditional Requirements"]

# get all attributes
attr_lst = [
i.split(" is ")[-1]
for i in data_dict[key]["Conditional Requirements"]
]
attr_lst = [i.split(" is ")[-1] for i in conditional_requirements]

# join a list of attributes by using OR
attr_str = " OR ".join(attr_lst)

# reformat the conditional requirement
component_name = data_dict[key]["Conditional Requirements"][
0
].split(" is ")[0]
component_name = conditional_requirements[0].split(" is ")[0]

conditional_statement_str = (
f' If {component_name} is {attr_str} then "{key}" is required'
f" If {component_name} is {attr_str} then "
f'"{outer_dict_key}" is required'
)
conditional_requirements = conditional_statement_str

data_dict[key][
"Conditional Requirements"
] = conditional_statement_str
df = pd.DataFrame(data_dict)
df = df.T
data_dict_df = pd.DataFrame(data_dict)
data_dict_df = data_dict_df.T
cols = [
"Attribute",
"Label",
Expand All @@ -250,11 +259,12 @@ def _parse_attributes(self, components, save_file=True, include_index=True):
"Validation Rules",
"Component",
]
cols = [col for col in cols if col in df.columns]
df = df[cols]
df = self.convert_string_cols_to_json(df, ["Valid Values"])
# df.to_csv(os.path.join(csv_output_path, data_type + '.vis_data.csv'))
df_store.append(df)
cols = [col for col in cols if col in data_dict_df.columns]
data_dict_df = data_dict_df[cols]
data_dict_df = self.convert_string_cols_to_json(
data_dict_df, ["Valid Values"]
)
df_store.append(data_dict_df)

merged_attributes_df = pd.concat(df_store, join="outer")
cols = [
Expand All @@ -271,12 +281,11 @@ def _parse_attributes(self, components, save_file=True, include_index=True):
cols = [col for col in cols if col in merged_attributes_df.columns]

merged_attributes_df = merged_attributes_df[cols]
if save_file == True:
if save_file:
return merged_attributes_df.to_csv(
os.path.join(
self.output_path, self.schema_name + "attributes_data.vis_data.csv"
),
index=include_index,
)
elif save_file == False:
return merged_attributes_df.to_csv(index=include_index)
return merged_attributes_df.to_csv(index=include_index)
28 changes: 17 additions & 11 deletions schematic/visualization/commands.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
#!/usr/bin/env python3
"""visualization commands"""
# pylint: disable=unused-argument
# pylint: disable=useless-return
# pylint: disable=unused-variable
# pylint: disable=logging-fstring-interpolation

import logging
import sys
from typing import Any

import click
import click_log
import click_log # type: ignore

from schematic.visualization.attributes_explorer import AttributesExplorer
from schematic.visualization.tangled_tree import TangledTree
Expand All @@ -16,10 +21,11 @@
logger = logging.getLogger(__name__)
click_log.basic_config(logger)

CONTEXT_SETTINGS = dict(help_option_names=["--help", "-h"]) # help options
CONTEXT_SETTINGS = {"help_option_names": ["--help", "-h"]} # help options


# invoke_without_command=True -> forces the application not to show aids before losing them with a --h
# invoke_without_command=True -> forces the application not to show aids before
# losing them with a --h
@click.group(context_settings=CONTEXT_SETTINGS, invoke_without_command=True)
@click_log.simple_verbosity_option(logger)
@click.option(
Expand All @@ -30,17 +36,17 @@
help=query_dict(model_commands, ("model", "config")),
)
@click.pass_context
def viz(ctx, config): # use as `schematic model ...`
def viz(ctx: Any, config: str) -> None: # use as `schematic model ...`
"""
Sub-commands for Visualization methods.
"""
try:
logger.debug(f"Loading config file contents in '{config}'")
CONFIG.load_config(config)
ctx.obj = CONFIG
except ValueError as e:
except ValueError as exc:
logger.error("'--config' not provided or environment variable not set.")
logger.exception(e)
logger.exception(exc)
sys.exit(1)


Expand All @@ -49,8 +55,8 @@ def viz(ctx, config): # use as `schematic model ...`
)
@click_log.simple_verbosity_option(logger)
@click.pass_obj
def get_attributes(ctx):
""" """
def get_attributes(ctx: Any) -> None:
"""Gets attributes"""
# Get JSONLD file path
path_to_jsonld = CONFIG.model_location
log_value_from_config("jsonld", path_to_jsonld)
Expand All @@ -74,7 +80,7 @@ def get_attributes(ctx):
help=query_dict(viz_commands, ("visualization", "tangled_tree", "text_format")),
)
@click.pass_obj
def get_tangled_tree_text(ctx, figure_type, text_format):
def get_tangled_tree_text(ctx: Any, figure_type: str, text_format: str) -> None:
"""Get text to be placed on the tangled tree visualization."""
# Get JSONLD file path
path_to_jsonld = CONFIG.model_location
Expand All @@ -97,7 +103,7 @@ def get_tangled_tree_text(ctx, figure_type, text_format):
help=query_dict(viz_commands, ("visualization", "tangled_tree", "figure_type")),
)
@click.pass_obj
def get_tangled_tree_component_layers(ctx, figure_type):
def get_tangled_tree_component_layers(ctx: Any, figure_type: str) -> None:
"""Get the components that belong in each layer of the tangled tree visualization."""
# Get JSONLD file path
path_to_jsonld = CONFIG.model_location
Expand Down
Loading

0 comments on commit eb32285

Please sign in to comment.