Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update store/synapse.updateDB to take sg and se #1291

Merged
merged 2 commits into from
Oct 10, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 37 additions & 31 deletions schematic/store/synapse.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,7 @@ def getProjectManifests(self, projectId: str) -> List[str]:

return manifests

def upload_project_manifests_to_synapse(self, projectId: str) -> List[str]:
def upload_project_manifests_to_synapse(self, sg: SchemaGenerator, se: SchemaExplorer, projectId: str) -> List[str]:
"""Upload all metadata manifest files across all datasets in a specified project as tables in Synapse.

Returns: String of all the manifest_table_ids of all the manifests that have been loaded.
Expand All @@ -822,7 +822,7 @@ def upload_project_manifests_to_synapse(self, projectId: str) -> List[str]:
manifest_name = manifest_info["properties"]["name"]
manifest_path = manifest_info["path"]
manifest_df = load_df(manifest_path)
manifest_table_id = uploadDB(manifest, datasetId, datasetName)
manifest_table_id = uploadDB(sg=sg, se=se, manifest=manifest, datasetId=datasetId, table_name=datasetName)
manifest_loaded.append(datasetName)
return manifest_loaded

Expand Down Expand Up @@ -952,7 +952,8 @@ def get_table_info(self, datasetId: str = None, projectId: str = None) -> List[s

@missing_entity_handler
def uploadDB(self,
sg: SchemaGenerator,
sg: SchemaGenerator,
se: SchemaExplorer,
manifest: pd.DataFrame,
datasetId: str,
table_name: str,
Expand All @@ -964,6 +965,7 @@ def uploadDB(self,
Method to upload a database to an asset store. In synapse, this will upload a metadata table

Args:
sg: schemaGenerator object
se: schemaExplorer object
manifest: pd.Df manifest to upload
datasetId: synID of the dataset for the manifest
Expand All @@ -981,13 +983,13 @@ def uploadDB(self,
"""


col_schema, table_manifest = self.formatDB(sg, manifest, useSchemaLabel)
col_schema, table_manifest = self.formatDB(se=se, manifest=manifest, useSchemaLabel=useSchemaLabel)

manifest_table_id = self.buildDB(datasetId, table_name, col_schema, table_manifest, table_manipulation, sg, restrict,)

return manifest_table_id, manifest, table_manifest

def formatDB(self, sg, manifest, useSchemaLabel):
def formatDB(self, se, manifest, useSchemaLabel):
"""
Method to format a manifest appropriatly for upload as table

Expand All @@ -1010,7 +1012,7 @@ def formatDB(self, sg, manifest, useSchemaLabel):

if useSchemaLabel:
cols = [
sg.se.get_class_label_from_display_name(
se.get_class_label_from_display_name(
str(col)
).translate({ord(x): '' for x in blacklist_chars})
for col in manifest_columns
Expand Down Expand Up @@ -1499,13 +1501,14 @@ def upload_manifest_as_table(
"""
# Upload manifest as a table, get the ID and updated manifest.
manifest_synapse_table_id, manifest, table_manifest = self.uploadDB(
schemaGenerator,
manifest,
datasetId,
table_name,
restrict,
useSchemaLabel,
table_manipulation)
sg=schemaGenerator,
se=se,
manifest=manifest,
datasetId=datasetId,
table_name=table_name,
restrict=restrict,
useSchemaLabel=useSchemaLabel,
table_manipulation=table_manipulation)

manifest = self.add_annotations_to_entities_files(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)
# Load manifest to synapse as a CSV File
Expand All @@ -1518,13 +1521,14 @@ def upload_manifest_as_table(

# Update manifest Synapse table with new entity id column.
manifest_synapse_table_id, manifest, table_manifest = self.uploadDB(
schemaGenerator,
manifest,
datasetId,
table_name,
restrict,
sg=schemaGenerator,
se=se,
manifest=manifest,
datasetId=datasetId,
table_name=table_name,
restrict=restrict,
useSchemaLabel=useSchemaLabel,
table_manipulation='update',)
table_manipulation='update')

# Set annotations for the table manifest
manifest_annotations = self.format_manifest_annotations(manifest, manifest_synapse_table_id)
Expand Down Expand Up @@ -1605,13 +1609,14 @@ def upload_manifest_combo(
manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse.
"""
manifest_synapse_table_id, manifest, table_manifest = self.uploadDB(
se,
manifest,
datasetId,
table_name,
restrict,
sg=schemaGenerator,
se=se,
manifest=manifest,
datasetId=datasetId,
table_name=table_name,
restrict=restrict,
useSchemaLabel=useSchemaLabel,
table_manipulation=table_manipulation,)
table_manipulation=table_manipulation)

manifest = self.add_annotations_to_entities_files(se, schemaGenerator, manifest, manifest_record_type, datasetId, hideBlanks, manifest_synapse_table_id)

Expand All @@ -1625,13 +1630,14 @@ def upload_manifest_combo(

# Update manifest Synapse table with new entity id column.
manifest_synapse_table_id, manifest, table_manifest = self.uploadDB(
se,
manifest,
datasetId,
table_name,
restrict,
sg=schemaGenerator,
se=se,
manifest=manifest,
datasetId=datasetId,
table_name=table_name,
restrict=restrict,
useSchemaLabel=useSchemaLabel,
table_manipulation='update',)
table_manipulation='update')

# Set annotations for the table manifest
manifest_annotations = self.format_manifest_annotations(manifest, manifest_synapse_table_id)
Expand Down