From 1e16652dfe69a225a859356bda1da5f1bb3cdf65 Mon Sep 17 00:00:00 2001 From: Anja Adamov <57316423+adamovanja@users.noreply.github.com> Date: Tue, 5 Dec 2023 10:00:50 +0100 Subject: [PATCH] FIX: Pandas deprecations (#161) * FIX: pd concat and update readme * FIX: pd squeeze * revert format changes and fix len * fix lint * readme clear mandatory instructions * remove unused transformer * update installation instructions * update Dockerimage to new version * add Docker image link Co-authored-by: Lina Kim --------- Co-authored-by: Lina Kim --- Dockerfile | 2 +- README.md | 26 ++++++++++--------- q2_fondue/get_all.py | 4 +-- q2_fondue/tests/test_query.py | 3 ++- q2_fondue/types/_transformer.py | 6 ----- .../tests/test_types_formats_transformers.py | 5 ++-- 6 files changed, 21 insertions(+), 25 deletions(-) diff --git a/Dockerfile b/Dockerfile index cc49c7fc..f6946e80 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ FROM mambaorg/micromamba ARG MAMBA_DOCKERFILE_ACTIVATE=1 -RUN micromamba install -y -c https://packages.qiime2.org/qiime2/2023.2/tested/ \ +RUN micromamba install -y -c https://packages.qiime2.org/qiime2/2023.7/tested/ \ -c conda-forge -c bioconda -c defaults \ q2cli q2-fondue ENV PATH /opt/conda/bin:$PATH diff --git a/README.md b/README.md index e6d727f5..b80a4a7a 100644 --- a/README.md +++ b/README.md @@ -23,12 +23,13 @@ conda install mamba -n base -c conda-forge * Create and activate a conda environment with the required dependencies: ```shell mamba create -y -n fondue \ - -c https://packages.qiime2.org/qiime2/2023.2/tested/ \ + -c https://packages.qiime2.org/qiime2/2023.7/tested/ \ -c conda-forge -c bioconda -c defaults \ q2cli q2-fondue conda activate fondue ``` +Now, don't forget to run [the mandatory configuration step](#mandatory-configuration-for-both-options-1-and-2)! ### Option 2: Install fondue within existing QIIME 2 environment * Install QIIME 2 within a conda environment as described in [the official user documentation](https://docs.qiime2.org/). @@ -40,6 +41,7 @@ mamba install -y \ -c conda-forge -c bioconda -c defaults \ q2-fondue ``` +Now, don't forget to run [the mandatory configuration step](#mandatory-configuration-for-both-options-1-and-2)! ### Mandatory configuration for both options 1 and 2 * Refresh the QIIME 2 CLI cache and see that everything worked: @@ -65,9 +67,9 @@ vdb-config --proxy --proxy-disable no Use containerization to integrate q2-fondue into your pipelines, or simply run reproducibly without the need for heavyweight package managers. [Read more about Docker here.](https://www.docker.com/get-started/) * Install [Docker](https://docs.docker.com/engine/install/) with the linked instructions -* Pull the [q2-fondue Docker image](https://hub.docker.com/layers/linathekim/q2-fondue/2023.2/images/sha256-214d0575eb4eaf435c5c4a7d29edf0fc082e47999b884b52a173f2ec469975f2?context=repo): +* Pull the [q2-fondue Docker image](https://hub.docker.com/layers/linathekim/q2-fondue/2023.7/images/sha256-f5d26959ac035811a8f34e2a46f6cc381f9a4ce21b3604a196c1ee176ba708e7?context=repo): ```shell -docker pull linathekim/q2-fondue:2023.2 +docker pull linathekim/q2-fondue:2023.7 ``` * Within the container, refresh the QIIME 2 CLI cache to see that everything worked: ```shell @@ -89,15 +91,15 @@ To find out which temporary directory is used by Qiime 2, you can run `echo $TMP ### Available actions q2-fondue provides a couple of actions to fetch and manipulate nucleotide sequencing data and related metadata from SRA as well as an action to scrape run, study, BioProject, experiment and sample IDs from a Zotero web library. Below you will find a list of available actions and their short descriptions. -| Action | Description | -|----------------------|--------------------------------------------------------------------------| -| `get-sequences` | Fetch sequences by IDs[*] from the SRA repository. | -| `get-metadata` | Fetch metadata by IDs[*] from the SRA repository. | -| `get-all` | Fetch sequences and metadata by IDs[*] from the SRA repo. | -| `get-ids-from-query` | Find SRA run accession IDs based on a search query. | -| `merge-metadata` | Merge several metadata files into a single metadata object. | -| `combine-seqs` | Combine sequences from multiple artifacts into a single artifact. | -| `scrape-collection` | Scrape Zotero collection for IDs[*] and associated DOI names.| +| Action | Description | +|----------------------|-------------------------------------------------------------------| +| `get-sequences` | Fetch sequences by IDs[*] from the SRA repository. | +| `get-metadata` | Fetch metadata by IDs[*] from the SRA repository. | +| `get-all` | Fetch sequences and metadata by IDs[*] from the SRA repo. | +| `get-ids-from-query` | Find SRA run accession IDs based on a search query. | +| `merge-metadata` | Merge several metadata files into a single metadata object. | +| `combine-seqs` | Combine sequences from multiple artifacts into a single artifact. | +| `scrape-collection` | Scrape Zotero collection for IDs[*] and associated DOI names. | [*]: Supported IDs include run, study, BioProject, experiment and study IDs. diff --git a/q2_fondue/get_all.py b/q2_fondue/get_all.py index 73b47355..7fc72187 100644 --- a/q2_fondue/get_all.py +++ b/q2_fondue/get_all.py @@ -40,8 +40,8 @@ def get_all( seq_single, seq_paired, failed_ids, = get_sequences( run_ids, email, retries, n_jobs, log_level ) - - failed_ids_df = failed_ids_df.append(failed_ids.view(pd.DataFrame)) + failed_ids_df = pd.concat( + [failed_ids_df, failed_ids.view(pd.DataFrame)]) if failed_ids_df.shape[0] > 0: failed_ids = Artifact.import_data('SRAFailedIDs', failed_ids_df) diff --git a/q2_fondue/tests/test_query.py b/q2_fondue/tests/test_query.py index 836dca8e..d7607df7 100644 --- a/q2_fondue/tests/test_query.py +++ b/q2_fondue/tests/test_query.py @@ -28,7 +28,8 @@ def test_query(self, mock_ids): obs_ids, = fondue.actions.get_ids_from_query( query, 'fake@email.com', 1, 'DEBUG' ) - exp_ids = pd.DataFrame(index=pd.Index(['SRR123', 'SRR234'], name='ID')) + exp_ids = pd.DataFrame( + index=pd.Index(['SRR123', 'SRR234'], name='ID'), columns=[], ) mock_ids.assert_called_once_with( 'fake@email.com', 1, None, query, 'biosample', 'DEBUG' diff --git a/q2_fondue/types/_transformer.py b/q2_fondue/types/_transformer.py index 1c12ba8c..147970be 100644 --- a/q2_fondue/types/_transformer.py +++ b/q2_fondue/types/_transformer.py @@ -21,12 +21,6 @@ def _meta_fmt_to_metadata(ff): return qiime2.Metadata(df) -def _meta_fmt_to_series(ff): - with ff.open() as fh: - s = pd.read_csv(fh, header=0, dtype='str', squeeze=True) - return s - - def _series_to_meta_fmt(data: pd.Series, meta_fmt): with meta_fmt.open() as fh: data.to_csv(fh, sep='\t', header=True, index=False) diff --git a/q2_fondue/types/tests/test_types_formats_transformers.py b/q2_fondue/types/tests/test_types_formats_transformers.py index b39c2f0a..3b789e78 100644 --- a/q2_fondue/types/tests/test_types_formats_transformers.py +++ b/q2_fondue/types/tests/test_types_formats_transformers.py @@ -178,7 +178,7 @@ def setUp(self): ncbi_ids_path = self.get_data_path('ncbi-ids-runs.tsv') self.ncbi_ids = NCBIAccessionIDsFormat(ncbi_ids_path, mode='r') self.ncbi_ids_ser = pd.read_csv( - ncbi_ids_path, header=0, dtype='str', squeeze=True) + ncbi_ids_path, header=0, dtype='str').squeeze() self.ncbi_ids_df = pd.read_csv( ncbi_ids_path, sep='\t', header=0, index_col=0, dtype='str') @@ -234,8 +234,7 @@ def test_series_to_ncbi_accession_ids(self): self.assertIsInstance(obs, NCBIAccessionIDsFormat) obs = pd.read_csv( - str(obs), header=0, dtype='str', squeeze=True - ) + str(obs), header=0, dtype='str').squeeze() pd.testing.assert_series_equal(obs, self.ncbi_ids_ser) def test_dataframe_to_ncbi_accession_ids(self):