From 1e16652dfe69a225a859356bda1da5f1bb3cdf65 Mon Sep 17 00:00:00 2001
From: Anja Adamov <57316423+adamovanja@users.noreply.github.com>
Date: Tue, 5 Dec 2023 10:00:50 +0100
Subject: [PATCH] FIX: Pandas deprecations (#161)

* FIX: pd concat and update readme

* FIX: pd squeeze

* revert format changes and fix len

* fix lint

* readme clear mandatory instructions

* remove unused transformer

* update installation instructions

* update Dockerimage to new version

* add Docker image link

Co-authored-by: Lina Kim <lina-kim@users.noreply.github.com>

---------

Co-authored-by: Lina Kim <lina-kim@users.noreply.github.com>
---
 Dockerfile                                    |  2 +-
 README.md                                     | 26 ++++++++++---------
 q2_fondue/get_all.py                          |  4 +--
 q2_fondue/tests/test_query.py                 |  3 ++-
 q2_fondue/types/_transformer.py               |  6 -----
 .../tests/test_types_formats_transformers.py  |  5 ++--
 6 files changed, 21 insertions(+), 25 deletions(-)
diff --git a/Dockerfile b/Dockerfile
index cc49c7fc..f6946e80 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 FROM mambaorg/micromamba
 ARG MAMBA_DOCKERFILE_ACTIVATE=1
-RUN micromamba install -y -c https://packages.qiime2.org/qiime2/2023.2/tested/ \
+RUN micromamba install -y -c https://packages.qiime2.org/qiime2/2023.7/tested/ \
 	-c conda-forge -c bioconda -c defaults \
 	q2cli q2-fondue
 ENV PATH /opt/conda/bin:$PATH
diff --git a/README.md b/README.md
index e6d727f5..b80a4a7a 100644
--- a/README.md
+++ b/README.md
@@ -23,12 +23,13 @@ conda install mamba -n base -c conda-forge
 * Create and activate a conda environment with the required dependencies:
 ```shell
 mamba create -y -n fondue \
-   -c https://packages.qiime2.org/qiime2/2023.2/tested/ \
+   -c https://packages.qiime2.org/qiime2/2023.7/tested/ \
    -c conda-forge -c bioconda -c defaults \
    q2cli q2-fondue
 
 conda activate fondue
 ```
+Now, don't forget to run [the mandatory configuration step](#mandatory-configuration-for-both-options-1-and-2)!
 
 ### Option 2: Install fondue within existing QIIME 2 environment
 * Install QIIME 2 within a conda environment as described in [the official user documentation](https://docs.qiime2.org/). 
@@ -40,6 +41,7 @@ mamba install -y \
    -c conda-forge -c bioconda -c defaults \
    q2-fondue
 ```
+Now, don't forget to run [the mandatory configuration step](#mandatory-configuration-for-both-options-1-and-2)!
 
 ### Mandatory configuration for both options 1 and 2
 * Refresh the QIIME 2 CLI cache and see that everything worked:
@@ -65,9 +67,9 @@ vdb-config --proxy <your proxy URL> --proxy-disable no
 Use containerization to integrate q2-fondue into your pipelines, or simply run reproducibly without the need for heavyweight package managers. [Read more about Docker here.](https://www.docker.com/get-started/)
  
 * Install [Docker](https://docs.docker.com/engine/install/) with the linked instructions
-* Pull the [q2-fondue Docker image](https://hub.docker.com/layers/linathekim/q2-fondue/2023.2/images/sha256-214d0575eb4eaf435c5c4a7d29edf0fc082e47999b884b52a173f2ec469975f2?context=repo):
+* Pull the [q2-fondue Docker image](https://hub.docker.com/layers/linathekim/q2-fondue/2023.7/images/sha256-f5d26959ac035811a8f34e2a46f6cc381f9a4ce21b3604a196c1ee176ba708e7?context=repo):
 ```shell
-docker pull linathekim/q2-fondue:2023.2
+docker pull linathekim/q2-fondue:2023.7
 ```
 * Within the container, refresh the QIIME 2 CLI cache to see that everything worked:
 ```shell
@@ -89,15 +91,15 @@ To find out which temporary directory is used by Qiime 2, you can run `echo $TMP
 ### Available actions
 q2-fondue provides a couple of actions to fetch and manipulate nucleotide sequencing data and related metadata from SRA as well as an action to scrape run, study, BioProject, experiment and sample IDs from a Zotero web library. Below you will find a list of available actions and their short descriptions.
 
-| Action               | Description                                                              |
-|----------------------|--------------------------------------------------------------------------|
-| `get-sequences`      | Fetch sequences by IDs[*] from the SRA repository.        |
-| `get-metadata`       | Fetch metadata by IDs[*] from the SRA repository.         |
-| `get-all`            | Fetch sequences and metadata by IDs[*] from the SRA repo. |
-| `get-ids-from-query` | Find SRA run accession IDs based on a search query. |
-| `merge-metadata`     | Merge several metadata files into a single metadata object.              |
-| `combine-seqs`       | Combine sequences from multiple artifacts into a single artifact.        |
-| `scrape-collection`  | Scrape Zotero collection for IDs[*] and associated DOI names.|
+| Action               | Description                                                       |
+|----------------------|-------------------------------------------------------------------|
+| `get-sequences`      | Fetch sequences by IDs[*] from the SRA repository.                |
+| `get-metadata`       | Fetch metadata by IDs[*] from the SRA repository.                 |
+| `get-all`            | Fetch sequences and metadata by IDs[*] from the SRA repo.         |
+| `get-ids-from-query` | Find SRA run accession IDs based on a search query.               |
+| `merge-metadata`     | Merge several metadata files into a single metadata object.       |
+| `combine-seqs`       | Combine sequences from multiple artifacts into a single artifact. |
+| `scrape-collection`  | Scrape Zotero collection for IDs[*] and associated DOI names.     |
 
 [*]: Supported IDs include run, study, BioProject, experiment and study IDs.
 
diff --git a/q2_fondue/get_all.py b/q2_fondue/get_all.py
index 73b47355..7fc72187 100644
--- a/q2_fondue/get_all.py
+++ b/q2_fondue/get_all.py
@@ -40,8 +40,8 @@ def get_all(
     seq_single, seq_paired, failed_ids, = get_sequences(
         run_ids, email, retries, n_jobs, log_level
     )
-
-    failed_ids_df = failed_ids_df.append(failed_ids.view(pd.DataFrame))
+    failed_ids_df = pd.concat(
+        [failed_ids_df, failed_ids.view(pd.DataFrame)])
     if failed_ids_df.shape[0] > 0:
         failed_ids = Artifact.import_data('SRAFailedIDs', failed_ids_df)
 
diff --git a/q2_fondue/tests/test_query.py b/q2_fondue/tests/test_query.py
index 836dca8e..d7607df7 100644
--- a/q2_fondue/tests/test_query.py
+++ b/q2_fondue/tests/test_query.py
@@ -28,7 +28,8 @@ def test_query(self, mock_ids):
         obs_ids, = fondue.actions.get_ids_from_query(
             query, 'fake@email.com', 1, 'DEBUG'
         )
-        exp_ids = pd.DataFrame(index=pd.Index(['SRR123', 'SRR234'], name='ID'))
+        exp_ids = pd.DataFrame(
+            index=pd.Index(['SRR123', 'SRR234'], name='ID'), columns=[], )
 
         mock_ids.assert_called_once_with(
             'fake@email.com', 1, None, query, 'biosample', 'DEBUG'
diff --git a/q2_fondue/types/_transformer.py b/q2_fondue/types/_transformer.py
index 1c12ba8c..147970be 100644
--- a/q2_fondue/types/_transformer.py
+++ b/q2_fondue/types/_transformer.py
@@ -21,12 +21,6 @@ def _meta_fmt_to_metadata(ff):
         return qiime2.Metadata(df)
 
 
-def _meta_fmt_to_series(ff):
-    with ff.open() as fh:
-        s = pd.read_csv(fh, header=0, dtype='str', squeeze=True)
-        return s
-
-
 def _series_to_meta_fmt(data: pd.Series, meta_fmt):
     with meta_fmt.open() as fh:
         data.to_csv(fh, sep='\t', header=True, index=False)
diff --git a/q2_fondue/types/tests/test_types_formats_transformers.py b/q2_fondue/types/tests/test_types_formats_transformers.py
index b39c2f0a..3b789e78 100644
--- a/q2_fondue/types/tests/test_types_formats_transformers.py
+++ b/q2_fondue/types/tests/test_types_formats_transformers.py
@@ -178,7 +178,7 @@ def setUp(self):
         ncbi_ids_path = self.get_data_path('ncbi-ids-runs.tsv')
         self.ncbi_ids = NCBIAccessionIDsFormat(ncbi_ids_path, mode='r')
         self.ncbi_ids_ser = pd.read_csv(
-            ncbi_ids_path, header=0, dtype='str', squeeze=True)
+            ncbi_ids_path, header=0, dtype='str').squeeze()
         self.ncbi_ids_df = pd.read_csv(
             ncbi_ids_path, sep='\t', header=0, index_col=0, dtype='str')
 
@@ -234,8 +234,7 @@ def test_series_to_ncbi_accession_ids(self):
         self.assertIsInstance(obs, NCBIAccessionIDsFormat)
 
         obs = pd.read_csv(
-            str(obs), header=0, dtype='str', squeeze=True
-        )
+            str(obs), header=0, dtype='str').squeeze()
         pd.testing.assert_series_equal(obs, self.ncbi_ids_ser)
 
     def test_dataframe_to_ncbi_accession_ids(self):