diff --git a/poetry.lock b/poetry.lock index efab907c7..203239b7a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -190,19 +190,6 @@ files = [ [package.dependencies] typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} -[[package]] -name = "asyncio" -version = "3.4.3" -description = "reference implementation of PEP 3156" -optional = false -python-versions = "*" -files = [ - {file = "asyncio-3.4.3-cp33-none-win32.whl", hash = "sha256:b62c9157d36187eca799c378e572c969f0da87cd5fc42ca372d92cdb06e7e1de"}, - {file = "asyncio-3.4.3-cp33-none-win_amd64.whl", hash = "sha256:c46a87b48213d7464f22d9a497b9eef8c1928b68320a2fa94240f969f6fec08c"}, - {file = "asyncio-3.4.3-py3-none-any.whl", hash = "sha256:c4d18b22701821de07bd6aea8b53d21449ec0ec5680645e5317062ea21817d2d"}, - {file = "asyncio-3.4.3.tar.gz", hash = "sha256:83360ff8bc97980e4ff25c964c7bd3923d333d177aa4f7fb736b019f26c7cb41"}, -] - [[package]] name = "asyncio-atexit" version = "1.0.1" @@ -3243,24 +3230,6 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] -[[package]] -name = "pytest-asyncio" -version = "0.23.7" -description = "Pytest support for asyncio" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pytest_asyncio-0.23.7-py3-none-any.whl", hash = "sha256:009b48127fbe44518a547bddd25611551b0e43ccdbf1e67d12479f569832c20b"}, - {file = "pytest_asyncio-0.23.7.tar.gz", hash = "sha256:5f5c72948f4c49e7db4f29f2521d4031f1c27f86e57b046126654083d4770268"}, -] - -[package.dependencies] -pytest = ">=7.0.0,<9" - -[package.extras] -docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] -testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] - [[package]] name = "pytest-cov" version = "4.1.0" @@ -3431,6 +3400,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -3438,8 +3408,16 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -3456,6 +3434,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -3463,6 +3442,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -4399,13 +4379,13 @@ Jinja2 = ">=2.0" [[package]] name = "synapseclient" -version = "4.3.0" +version = "4.2.0" description = "A client for Synapse, a collaborative, open-source research platform that allows teams to share data, track analyses, and collaborate." optional = false python-versions = ">=3.8" files = [ - {file = "synapseclient-4.3.0-py3-none-any.whl", hash = "sha256:5d8107cfff4031a0a46d60a3c9a8120300190fa27df4983d883dc951d8bd885f"}, - {file = "synapseclient-4.3.0.tar.gz", hash = "sha256:a1149a64b3281669d42c69e210677a902478b8f6b302966d518473c7384f6387"}, + {file = "synapseclient-4.2.0-py3-none-any.whl", hash = "sha256:ab5bc9c2bf5b90f271f1a9478eff7e9fca3e573578401ac706383ddb984d7a13"}, + {file = "synapseclient-4.2.0.tar.gz", hash = "sha256:89222661125de1795b1a096cf8c58b8115c19d6b0fa5846ed2a41cdb394ef773"}, ] [package.dependencies] @@ -4425,11 +4405,11 @@ urllib3 = ">=1.26.18,<2" [package.extras] boto3 = ["boto3 (>=1.7.0,<2.0)"] -dev = ["black", "flake8 (>=3.7.0,<4.0)", "func-timeout (>=4.3,<5.0)", "pandas (>=1.5,<3.0)", "pre-commit", "pytest (>=7.0.0,<8.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-cov (>=4.1.0,<4.2.0)", "pytest-mock (>=3.0,<4.0)", "pytest-rerunfailures (>=12.0,<13.0)", "pytest-socket (>=0.6.0,<0.7.0)", "pytest-xdist[psutil] (>=2.2,<3.0.0)"] +dev = ["black", "flake8 (>=3.7.0,<4.0)", "func-timeout (>=4.3,<5.0)", "pre-commit", "pytest (>=6.0.0,<7.0)", "pytest-asyncio (>=0.19,<1.0)", "pytest-cov (>=4.1.0,<4.2.0)", "pytest-mock (>=3.0,<4.0)", "pytest-rerunfailures (>=12.0,<13.0)", "pytest-socket (>=0.6.0,<0.7.0)", "pytest-xdist[psutil] (>=2.2,<3.0.0)"] docs = ["markdown-include (>=0.8.1,<0.9.0)", "mkdocs (>=1.5.3)", "mkdocs-material (>=9.4.14)", "mkdocs-open-in-new-tab (>=1.0.3,<1.1.0)", "mkdocstrings (>=0.24.0)", "mkdocstrings-python (>=1.7.5)", "termynal (>=0.11.1)"] pandas = ["pandas (>=1.5,<3.0)"] pysftp = ["pysftp (>=0.2.8,<0.3)"] -tests = ["flake8 (>=3.7.0,<4.0)", "func-timeout (>=4.3,<5.0)", "pandas (>=1.5,<3.0)", "pytest (>=7.0.0,<8.0)", "pytest-asyncio (>=0.23.6,<1.0)", "pytest-cov (>=4.1.0,<4.2.0)", "pytest-mock (>=3.0,<4.0)", "pytest-rerunfailures (>=12.0,<13.0)", "pytest-socket (>=0.6.0,<0.7.0)", "pytest-xdist[psutil] (>=2.2,<3.0.0)"] +tests = ["flake8 (>=3.7.0,<4.0)", "func-timeout (>=4.3,<5.0)", "pytest (>=6.0.0,<7.0)", "pytest-asyncio (>=0.19,<1.0)", "pytest-cov (>=4.1.0,<4.2.0)", "pytest-mock (>=3.0,<4.0)", "pytest-rerunfailures (>=12.0,<13.0)", "pytest-socket (>=0.6.0,<0.7.0)", "pytest-xdist[psutil] (>=2.2,<3.0.0)"] [[package]] name = "tabulate" @@ -4964,4 +4944,4 @@ aws = ["uWSGI"] [metadata] lock-version = "2.0" python-versions = ">=3.9.0,<3.11" -content-hash = "a3048c0808e73fd19f5175897e9dda47a2a593422dd4744886615ac453a42139" +content-hash = "5bf0c831977694ea541db24481181ec1980ec9589a2adbd9f30ed0fe7f2b2742" diff --git a/pyproject.toml b/pyproject.toml index 8d941b8ae..3c2795140 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ pygsheets = "^2.0.4" PyYAML = "^6.0.0" rdflib = "^6.0.0" setuptools = "^66.0.0" -synapseclient = "^4.3.0" +synapseclient = "^4.1.0" tenacity = "^8.0.1" toml = "^0.10.2" great-expectations = "^0.15.0" @@ -74,8 +74,6 @@ Flask = {version = "2.1.3", optional = true} Flask-Cors = {version = "^3.0.10", optional = true} uWSGI = {version = "^2.0.21", optional = true} Jinja2 = {version = ">2.11.3", optional = true} -asyncio = "^3.4.3" -pytest-asyncio = "^0.23.7" jaeger-client = {version = "^4.8.0", optional = true} flask-opentracing = {version="^2.0.0", optional = true} diff --git a/schematic/store/synapse.py b/schematic/store/synapse.py index 71711bbae..688bbce4f 100644 --- a/schematic/store/synapse.py +++ b/schematic/store/synapse.py @@ -11,7 +11,6 @@ import secrets import shutil import synapseclient -from synapseclient.api import get_entity_id_bundle2 import uuid # used to generate unique names for entities from tenacity import ( @@ -24,7 +23,7 @@ from time import sleep # allows specifying explicit variable types -from typing import Dict, List, Tuple, Sequence, Union, Optional, Any, Set +from typing import Dict, List, Tuple, Sequence, Union, Optional from synapseclient import ( Synapse, @@ -69,9 +68,6 @@ from schematic.store.base import BaseStorage from schematic.exceptions import AccessCredentialsError from schematic.configuration.configuration import CONFIG -from synapseclient.models.annotations import Annotations -import asyncio -from dataclasses import asdict from opentelemetry import trace logger = logging.getLogger("Synapse storage") @@ -711,6 +707,7 @@ def fill_in_entity_id_filename( new_files = self._get_file_entityIds( dataset_files=dataset_files, only_new_files=True, manifest=manifest ) + # update manifest so that it contains new dataset files new_files = pd.DataFrame(new_files) manifest = ( @@ -1358,43 +1355,8 @@ def upload_manifest_file( return manifest_synapse_file_id - async def get_async_annotation(self, synapse_id: str) -> Dict[str, Any]: - """get annotations asynchronously - - Args: - synapse_id (str): synapse id of the entity that the annotation belongs - - Returns: - Dict[str, Any]: The requested entity bundle matching - - """ - return await get_entity_id_bundle2( - entity_id=synapse_id, - request={"includeAnnotations": True}, - synapse_client=self.syn, - ) - - async def store_async_annotation(self, annotation_dict: dict) -> Annotations: - """store annotation in an async way - - Args: - annotation_dict (dict): annotation in a dictionary format - - Returns: - Annotations: The stored annotations. - """ - annotation_data = Annotations.from_dict( - synapse_annotations=annotation_dict["annotations"]["annotations"] - ) - annotation_class = Annotations( - annotations=annotation_data, - etag=annotation_dict["annotations"]["etag"], - id=annotation_dict["annotations"]["id"], - ) - return await annotation_class.store_async(self.syn) - @missing_entity_handler - async def format_row_annotations( + def format_row_annotations( self, dmge, row, entityId: str, hideBlanks: bool, annotation_keys: str ): # prepare metadata for Synapse storage (resolve display name into a name that Synapse annotations support (e.g no spaces, parenthesis) @@ -1426,8 +1388,7 @@ async def format_row_annotations( metadataSyn[keySyn] = v # set annotation(s) for the various objects/items in a dataset on Synapse - annos = await self.get_async_annotation(entityId) - + annos = self.syn.get_annotations(entityId) csv_list_regex = comma_separated_list_regex() for anno_k, anno_v in metadataSyn.items(): # Remove keys with nan or empty string values from dict of annotations to be uploaded @@ -1664,6 +1625,37 @@ def _generate_table_name(self, manifest): table_name = "synapse_storage_manifest_table" return table_name, component_name + @tracer.start_as_current_span("SynapseStorage::_add_annotations") + def _add_annotations( + self, + dmge, + row, + entityId: str, + hideBlanks: bool, + annotation_keys: str, + ): + """Helper function to format and add annotations to entities in Synapse. + Args: + dmge: DataModelGraphExplorer object, + row: current row of manifest being processed + entityId (str): synapseId of entity to add annotations to + hideBlanks: Boolean flag that does not upload annotation keys with blank values when true. Uploads Annotation keys with empty string values when false. + annotation_keys: (str) display_label/class_label(default), Determines labeling syle for annotation keys. class_label will format the display + name as upper camelcase, and strip blacklisted characters, display_label will strip blacklisted characters including spaces, to retain + display label formatting while ensuring the label is formatted properly for Synapse annotations. + Returns: + Annotations are added to entities in Synapse, no return. + """ + # Format annotations for Synapse + annos = self.format_row_annotations( + dmge, row, entityId, hideBlanks, annotation_keys + ) + + if annos: + # Store annotations for an entity folder + self.syn.set_annotations(annos) + return + def _create_entity_id(self, idx, row, manifest, datasetId): """Helper function to generate an entityId and add it to the appropriate row in the manifest. Args: @@ -1683,45 +1675,8 @@ def _create_entity_id(self, idx, row, manifest, datasetId): manifest.loc[idx, "entityId"] = entityId return manifest, entityId - async def _process_store_annos(self, requests: Set[asyncio.Task]) -> None: - """Process annotations and store them on synapse asynchronously - - Args: - requests (Set[asyncio.Task]): a set of tasks of formatting annotations created by format_row_annotations function in previous step - - Raises: - RuntimeError: raise a run time error if a task failed to complete - """ - while requests: - done_tasks, pending_tasks = await asyncio.wait( - requests, return_when=asyncio.FIRST_COMPLETED - ) - requests = pending_tasks - - for completed_task in done_tasks: - try: - annos = completed_task.result() - - if isinstance(annos, Annotations): - annos_dict = asdict(annos) - entity_id = annos_dict["id"] - logger.info(f"Successfully stored annotations for {entity_id}") - else: - entity_id = annos["EntityId"] - logger.info( - f"Obtained and processed annotations for {entity_id} entity" - ) - if annos: - requests.add( - asyncio.create_task( - self.store_async_annotation(annotation_dict=annos) - ) - ) - except Exception as e: - raise RuntimeError(f"failed with { repr(e) }.") from e - @tracer.start_as_current_span("SynapseStorage::add_annotations_to_entities_files") - async def add_annotations_to_entities_files( + def add_annotations_to_entities_files( self, dmge, manifest, @@ -1762,7 +1717,6 @@ async def add_annotations_to_entities_files( ).drop("entityId_x", axis=1) # Fill `entityId` for each row if missing and annotate entity as appropriate - requests = set() for idx, row in manifest.iterrows(): if not row["entityId"] and ( manifest_record_type == "file_and_entities" @@ -1782,14 +1736,8 @@ async def add_annotations_to_entities_files( # Adding annotations to connected files. if entityId: - # Format annotations for Synapse - annos_task = asyncio.create_task( - self.format_row_annotations( - dmge, row, entityId, hideBlanks, annotation_keys - ) - ) - requests.add(annos_task) - await self._process_store_annos(requests) + self._add_annotations(dmge, row, entityId, hideBlanks, annotation_keys) + logger.info(f"Added annotations to entity: {entityId}") return manifest @tracer.start_as_current_span("SynapseStorage::upload_manifest_as_table") @@ -1843,16 +1791,14 @@ def upload_manifest_as_table( ) if file_annotations_upload: - manifest = asyncio.run( - self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - manifest_synapse_table_id, - annotation_keys, - ) + manifest = self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + manifest_synapse_table_id, + annotation_keys, ) # Load manifest to synapse as a CSV File manifest_synapse_file_id = self.upload_manifest_file( @@ -1919,15 +1865,13 @@ def upload_manifest_as_csv( manifest_synapse_file_id (str): SynID of manifest csv uploaded to synapse. """ if file_annotations_upload: - manifest = asyncio.run( - self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - annotation_keys=annotation_keys, - ) + manifest = self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + annotation_keys=annotation_keys, ) # Load manifest to synapse as a CSV File @@ -1999,16 +1943,14 @@ def upload_manifest_combo( ) if file_annotations_upload: - manifest = asyncio.run( - self.add_annotations_to_entities_files( - dmge, - manifest, - manifest_record_type, - datasetId, - hideBlanks, - manifest_synapse_table_id, - annotation_keys=annotation_keys, - ) + manifest = self.add_annotations_to_entities_files( + dmge, + manifest, + manifest_record_type, + datasetId, + hideBlanks, + manifest_synapse_table_id, + annotation_keys=annotation_keys, ) # Load manifest to synapse as a CSV File diff --git a/tests/test_store.py b/tests/test_store.py index cd5f4385b..5ac61f3d0 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -9,7 +9,6 @@ from typing import Generator, Any from unittest.mock import patch import shutil -import asyncio import pandas as pd import pytest @@ -25,8 +24,6 @@ from schematic.store.base import BaseStorage from schematic.store.synapse import DatasetFileView, ManifestDownload, SynapseStorage from schematic.utils.general import check_synapse_cache_size -from unittest.mock import AsyncMock -from synapseclient.models import Annotations logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @@ -481,158 +478,6 @@ def test_get_files_metadata_from_dataset(self, synapse_store): "entityId": ["syn123", "syn456"], } - async def test_get_async_annotation(self, synapse_store: SynapseStorage) -> None: - """test get annotation async function""" - mock_syn_id = "syn1234" - - with patch( - "schematic.store.synapse.get_entity_id_bundle2", - new_callable=AsyncMock, - return_value="mock", - ) as mock_get_entity_id_bundle2: - mock_get_entity_id_bundle2.return_value = "mock" - result = await synapse_store.get_async_annotation(synapse_id=mock_syn_id) - - mock_get_entity_id_bundle2.assert_called_once_with( - entity_id=mock_syn_id, - request={"includeAnnotations": True}, - synapse_client=synapse_store.syn, - ) - assert result == "mock" - - async def test_store_async_annotation(self, synapse_store: SynapseStorage) -> None: - """test store annotations async function""" - annos_dict = { - "annotations": { - "id": "mock_syn_id", - "etag": "mock etag", - "annotations": { - "Id": {"type": "STRING", "value": ["mock value"]}, - "EntityId": {"type": "STRING", "value": ["mock_syn_id"]}, - "SampleID": {"type": "STRING", "value": [""]}, - "Component": {"type": "STRING", "value": ["mock value"]}, - }, - }, - "FileFormat": "mock format", - "Component": "mock component", - "Id": "mock_string", - "EntityId": "mock_id", - } - expected_dict = Annotations( - annotations={ - "Id": ["mock_string"], - "EntityId": ["mock_syn_id"], - "SampleID": [""], - "Component": ["mock value"], - "FileFormat": ["mock_format"], - }, - etag="mock etag", - id="mock syn_id", - ) - - with patch( - "schematic.store.synapse.Annotations.store_async", - new_callable=AsyncMock, - return_value=expected_dict, - ) as mock_store_async: - result = await synapse_store.store_async_annotation(annos_dict) - - mock_store_async.assert_called_once_with(synapse_store.syn) - assert result == expected_dict - assert isinstance(result, Annotations) - - async def test_process_store_annos_failure( - self, synapse_store: SynapseStorage - ) -> None: - """test _process_store_annos function when there's an error either getting or storing annotations""" - - async def mock_failure_coro(): - raise ValueError("sample error") - - # create tasks that will fail - tasks = set() - tasks.add(asyncio.create_task(mock_failure_coro())) - - synapse_store._process_store_annos - # make sure error message can be raised - with pytest.raises(RuntimeError, match="failed with"): - await synapse_store._process_store_annos(tasks) - - async def test_process_store_annos_success_store( - self, synapse_store: SynapseStorage - ) -> None: - """test _process_store_annos function and make sure that annotations can be stored after successfully getting annotations.""" - # mock annotation obtained after async_store - stored_annos = Annotations( - annotations={ - "Id": ["mock_string"], - "EntityId": ["mock_syn_id"], - "SampleID": [""], - "Component": ["mock value"], - "FileFormat": ["mock_format"], - }, - etag="mock etag", - id="mock_syn_id", - ) - - async def mock_success_coro(): - return stored_annos - - with patch( - "schematic.store.synapse.SynapseStorage.store_async_annotation", - new_callable=AsyncMock, - ) as mock_store_async1: - tasks = set() - tasks.add(asyncio.create_task(mock_success_coro())) - await synapse_store._process_store_annos(tasks) - # make sure that the if statement is working - mock_store_async1.assert_not_called() - - async def test_process_store_annos_success_get( - self, synapse_store: SynapseStorage - ) -> None: - """test _process_store_annos function and make sure that task of storing annotations can be triggered""" - # mock annotation obtained after get_async - mock_annos_dict = { - "annotations": { - "id": "mock_syn_id", - "etag": "mock etag", - "annotations": { - "Id": {"type": "STRING", "value": ["mock value"]}, - "EntityId": {"type": "STRING", "value": ["mock_syn_id"]}, - "SampleID": {"type": "STRING", "value": [""]}, - "Component": {"type": "STRING", "value": ["mock value"]}, - }, - }, - "FileFormat": "mock format", - "Component": "mock component", - "Id": "mock_string", - "EntityId": "mock_id", - } - - mock_stored_annos = Annotations( - annotations={ - "Id": ["mock_string"], - "EntityId": ["mock_syn_id"], - }, - etag="mock etag", - id="mock_syn_id", - ) - - async def mock_success_coro(): - return mock_annos_dict - - # make sure that the else statement is working - new_tasks = set() - with patch( - "schematic.store.synapse.SynapseStorage.store_async_annotation", - new_callable=AsyncMock, - return_value=mock_stored_annos, - ) as mock_store_async2: - new_tasks.add(asyncio.create_task(mock_success_coro())) - await synapse_store._process_store_annos(new_tasks) - mock_store_async2.assert_called_once() - class TestDatasetFileView: def test_init(self, dataset_id, dataset_fileview, synapse_store): @@ -1086,7 +931,7 @@ class TestManifestUpload: ), ], ) - async def test_add_annotations_to_entities_files( + def test_add_annotations_to_entities_files( self, synapse_store: SynapseStorage, dmge: DataModelGraphExplorer, @@ -1106,49 +951,27 @@ async def test_add_annotations_to_entities_files( expected_filenames (list(str)): expected list of file names expected_entity_ids (list(str)): expected list of entity ids """ - - async def mock_format_row_annos(): - return - - async def mock_process_store_annos(requests): - return - with patch( "schematic.store.synapse.SynapseStorage.getFilesInStorageDataset", return_value=files_in_dataset, ): - with patch( - "schematic.store.synapse.SynapseStorage.format_row_annotations", - return_value=mock_format_row_annos, - new_callable=AsyncMock, - ) as mock_format_row: - with patch( - "schematic.store.synapse.SynapseStorage._process_store_annos", - return_value=mock_process_store_annos, - new_callable=AsyncMock, - ) as mock_process_store: - manifest_df = pd.DataFrame(original_manifest) - - new_df = await synapse_store.add_annotations_to_entities_files( - dmge, - manifest_df, - manifest_record_type="entity", - datasetId="mock id", - hideBlanks=True, - ) + manifest_df = pd.DataFrame(original_manifest) - file_names_lst = new_df["Filename"].tolist() - entity_ids_lst = new_df["entityId"].tolist() - - # test entityId and Id columns get added - assert "entityId" in new_df.columns - assert "Id" in new_df.columns - assert file_names_lst == expected_filenames - assert entity_ids_lst == expected_entity_ids + new_df = synapse_store.add_annotations_to_entities_files( + dmge, + manifest_df, + manifest_record_type="entity", + datasetId="mock id", + hideBlanks=True, + ) + file_names_lst = new_df["Filename"].tolist() + entity_ids_lst = new_df["entityId"].tolist() - # make sure async function gets called as expected - assert mock_format_row.call_count == len(expected_entity_ids) - assert mock_process_store.call_count == 1 + # test entityId and Id columns get added + assert "entityId" in new_df.columns + assert "Id" in new_df.columns + assert file_names_lst == expected_filenames + assert entity_ids_lst == expected_entity_ids @pytest.mark.parametrize( "mock_manifest_file_path", @@ -1232,14 +1055,9 @@ def test_upload_manifest_as_csv( hide_blanks: bool, restrict: bool, ) -> None: - async def mock_add_annotations_to_entities_files(): - return - with ( patch( - "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files", - return_value=mock_add_annotations_to_entities_files, - new_callable=AsyncMock, + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" ) as add_anno_mock, patch( "schematic.store.synapse.SynapseStorage.upload_manifest_file", @@ -1287,19 +1105,13 @@ def test_upload_manifest_as_table( manifest_record_type: str, ) -> None: mock_df = pd.DataFrame() - - async def mock_add_annotations_to_entities_files(): - return - with ( patch( "schematic.store.synapse.SynapseStorage.uploadDB", return_value=["mock_table_id", mock_df, "mock_table_manifest"], ) as update_db_mock, patch( - "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files", - return_value=mock_add_annotations_to_entities_files, - new_callable=AsyncMock, + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" ) as add_anno_mock, patch( "schematic.store.synapse.SynapseStorage.upload_manifest_file", @@ -1353,19 +1165,13 @@ def test_upload_manifest_combo( mock_df = pd.DataFrame() manifest_path = helpers.get_data_path("mock_manifests/test_BulkRNAseq.csv") manifest_df = helpers.get_data_frame(manifest_path) - - async def mock_add_annotations_to_entities_files(): - return - with ( patch( "schematic.store.synapse.SynapseStorage.uploadDB", return_value=["mock_table_id", mock_df, "mock_table_manifest"], ) as update_db_mock, patch( - "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files", - return_value=mock_add_annotations_to_entities_files, - new_callable=AsyncMock, + "schematic.store.synapse.SynapseStorage.add_annotations_to_entities_files" ) as add_anno_mock, patch( "schematic.store.synapse.SynapseStorage.upload_manifest_file",