From 7a2baddff4017695a4e55785df94123b4e02c036 Mon Sep 17 00:00:00 2001 From: Mialy DeFelice Date: Tue, 19 Sep 2023 15:32:59 -0700 Subject: [PATCH 1/2] update updateDB to take sg and se --- schematic/store/synapse.py | 68 +++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 40915824b..46b5f7a73 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -800,7 +800,7 @@ def getProjectManifests(self, projectId: str) -> List[str]: return manifests - def upload_project_manifests_to_synapse(self, projectId: str) -> List[str]: + def upload_project_manifests_to_synapse(self, sg: SchemaGenerator, se: SchemaExplorer, projectId: str) -> List[str]: """Upload all metadata manifest files across all datasets in a specified project as tables in Synapse. Returns: String of all the manifest_table_ids of all the manifests that have been loaded. @@ -822,7 +822,7 @@ def upload_project_manifests_to_synapse(self, projectId: str) -> List[str]: manifest_name = manifest_info["properties"]["name"] manifest_path = manifest_info["path"] manifest_df = load_df(manifest_path) - manifest_table_id = uploadDB(manifest, datasetId, datasetName) + manifest_table_id = uploadDB(sg=sg, se=se, manifest=manifest, datasetId=datasetId, table_name=datasetName) manifest_loaded.append(datasetName) return manifest_loaded @@ -952,7 +952,8 @@ def get_table_info(self, datasetId: str = None, projectId: str = None) -> List[s @missing_entity_handler def uploadDB(self, - sg: SchemaGenerator, + sg: SchemaGenerator, + se: SchemaExplorer, manifest: pd.DataFrame, datasetId: str, table_name: str, @@ -964,6 +965,7 @@ def uploadDB(self, Method to upload a database to an asset store. In synapse, this will upload a metadata table Args: + sg: schemaGenerator object se: schemaExplorer object manifest: pd.Df manifest to upload datasetId: synID of the dataset for the manifest @@ -981,13 +983,13 @@ def uploadDB(self, """ - col_schema, table_manifest = self.formatDB(sg, manifest, useSchemaLabel) + col_schema, table_manifest = self.formatDB(se=se, manifest=manifest, useSchemaLabel=useSchemaLabel) manifest_table_id = self.buildDB(datasetId, table_name, col_schema, table_manifest, table_manipulation, sg, restrict,) return manifest_table_id, manifest, table_manifest - def formatDB(self, sg, manifest, useSchemaLabel): + def formatDB(self, se, manifest, useSchemaLabel): """ Method to format a manifest appropriatly for upload as table @@ -1010,7 +1012,7 @@ def formatDB(self, sg, manifest, useSchemaLabel): if useSchemaLabel: cols = [ - sg.se.get_class_label_from_display_name( + se.get_class_label_from_display_name( str(col) ).translate({ord(x): '' for x in blacklist_chars}) for col in manifest_columns @@ -1499,13 +1501,14 @@ def upload_manifest_as_table( """ # Upload manifest as a table, get the ID and updated manifest. manifest_synapse_table_id, manifest, table_manifest = self.uploadDB( - schemaGenerator, - manifest, - datasetId, - table_name, - restrict, - useSchemaLabel, - table_manipulation) + sg=schemaGenerator, + se=se, + manifest=manifest, + datasetId=datasetId, + table_name=table_name, + restrict=restrict, + useSchemaLabel=useSchemaLabel, + table_manipulation=table_manipulation) manifest = self.add_annotations_to_entities_files(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id) # Load manifest to synapse as a CSV File @@ -1518,13 +1521,14 @@ def upload_manifest_as_table( # Update manifest Synapse table with new entity id column. manifest_synapse_table_id, manifest, table_manifest = self.uploadDB( - schemaGenerator, - manifest, - datasetId, - table_name, - restrict, + sg=schemaGenerator, + se=se, + manifest=manifest, + datasetId=datasetId, + table_name=table_name, + restrict=restrict, useSchemaLabel=useSchemaLabel, - table_manipulation='update',) + table_manipulation='update') # Set annotations for the table manifest manifest_annotations = self.format_manifest_annotations(manifest, manifest_synapse_table_id) @@ -1605,13 +1609,14 @@ def upload_manifest_combo( manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse. """ manifest_synapse_table_id, manifest, table_manifest = self.uploadDB( - se, - manifest, - datasetId, - table_name, - restrict, + sg=schemaGenerator, + se=se, + manifest=manifest, + datasetId=datasetId, + table_name=table_name, + restrict=restrict, useSchemaLabel=useSchemaLabel, - table_manipulation=table_manipulation,) + table_manipulation=table_manipulation) manifest = self.add_annotations_to_entities_files(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id) @@ -1625,13 +1630,14 @@ def upload_manifest_combo( # Update manifest Synapse table with new entity id column. manifest_synapse_table_id, manifest, table_manifest = self.uploadDB( - se, - manifest, - datasetId, - table_name, - restrict, + sg=schemaGenerator, + se=se, + manifest=manifest, + datasetId=datasetId, + table_name=table_name, + restrict=restrict, useSchemaLabel=useSchemaLabel, - table_manipulation='update',) + table_manipulation='update') # Set annotations for the table manifest manifest_annotations = self.format_manifest_annotations(manifest, manifest_synapse_table_id) From 463cb547fd8cb6385a2369d07654899836144cb5 Mon Sep 17 00:00:00 2001 From: Mialy DeFelice Date: Mon, 2 Oct 2023 10:49:16 -0700 Subject: [PATCH 2/2] simplify sg ref --- schematic/store/synapse.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 46b5f7a73..f6e7e7096 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -800,7 +800,7 @@ def getProjectManifests(self, projectId: str) -> List[str]: return manifests - def upload_project_manifests_to_synapse(self, sg: SchemaGenerator, se: SchemaExplorer, projectId: str) -> List[str]: + def upload_project_manifests_to_synapse(self, sg: SchemaGenerator, projectId: str) -> List[str]: """Upload all metadata manifest files across all datasets in a specified project as tables in Synapse. Returns: String of all the manifest_table_ids of all the manifests that have been loaded. @@ -822,7 +822,7 @@ def upload_project_manifests_to_synapse(self, sg: SchemaGenerator, se: SchemaExp manifest_name = manifest_info["properties"]["name"] manifest_path = manifest_info["path"] manifest_df = load_df(manifest_path) - manifest_table_id = uploadDB(sg=sg, se=se, manifest=manifest, datasetId=datasetId, table_name=datasetName) + manifest_table_id = uploadDB(sg=sg, manifest=manifest, datasetId=datasetId, table_name=datasetName) manifest_loaded.append(datasetName) return manifest_loaded @@ -953,7 +953,6 @@ def get_table_info(self, datasetId: str = None, projectId: str = None) -> List[s @missing_entity_handler def uploadDB(self, sg: SchemaGenerator, - se: SchemaExplorer, manifest: pd.DataFrame, datasetId: str, table_name: str, @@ -966,7 +965,6 @@ def uploadDB(self, Args: sg: schemaGenerator object - se: schemaExplorer object manifest: pd.Df manifest to upload datasetId: synID of the dataset for the manifest table_name: name of the table to be uploaded @@ -983,18 +981,18 @@ def uploadDB(self, """ - col_schema, table_manifest = self.formatDB(se=se, manifest=manifest, useSchemaLabel=useSchemaLabel) + col_schema, table_manifest = self.formatDB(sg=sg, manifest=manifest, useSchemaLabel=useSchemaLabel) manifest_table_id = self.buildDB(datasetId, table_name, col_schema, table_manifest, table_manipulation, sg, restrict,) return manifest_table_id, manifest, table_manifest - def formatDB(self, se, manifest, useSchemaLabel): + def formatDB(self, sg, manifest, useSchemaLabel): """ Method to format a manifest appropriatly for upload as table Args: - se: schemaExplorer object + sg: schemaGenerator object manifest: pd.Df manifest to upload useSchemaLabel: bool whether to use schemaLabel (True) or display label (False) @@ -1012,7 +1010,7 @@ def formatDB(self, se, manifest, useSchemaLabel): if useSchemaLabel: cols = [ - se.get_class_label_from_display_name( + sg.se.get_class_label_from_display_name( str(col) ).translate({ord(x): '' for x in blacklist_chars}) for col in manifest_columns @@ -1502,7 +1500,6 @@ def upload_manifest_as_table( # Upload manifest as a table, get the ID and updated manifest. manifest_synapse_table_id, manifest, table_manifest = self.uploadDB( sg=schemaGenerator, - se=se, manifest=manifest, datasetId=datasetId, table_name=table_name, @@ -1522,7 +1519,6 @@ def upload_manifest_as_table( # Update manifest Synapse table with new entity id column. manifest_synapse_table_id, manifest, table_manifest = self.uploadDB( sg=schemaGenerator, - se=se, manifest=manifest, datasetId=datasetId, table_name=table_name, @@ -1610,7 +1606,6 @@ def upload_manifest_combo( """ manifest_synapse_table_id, manifest, table_manifest = self.uploadDB( sg=schemaGenerator, - se=se, manifest=manifest, datasetId=datasetId, table_name=table_name, @@ -1631,7 +1626,6 @@ def upload_manifest_combo( # Update manifest Synapse table with new entity id column. manifest_synapse_table_id, manifest, table_manifest = self.uploadDB( sg=schemaGenerator, - se=se, manifest=manifest, datasetId=datasetId, table_name=table_name,