finished refactoring

RECETOX · Jan 31, 2025 · b3b9e04 · b3b9e04
1 parent 4cfdafd
commit b3b9e04
Show file tree

Hide file tree

Showing 14 changed files with 356 additions and 202 deletions.
diff --git a/tools/ipapy2/ipapy2_MS1_annotation.xml b/tools/ipapy2/ipapy2_MS1_annotation.xml
@@ -5,6 +5,7 @@
 
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
+        <expand macro="extra_requirements"/>
     </requirements>
 
     <command detect_errors="exit_code"><![CDATA[

diff --git a/tools/ipapy2/ipapy2_MS2_annotation.xml b/tools/ipapy2/ipapy2_MS2_annotation.xml
@@ -5,6 +5,7 @@
 
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
+        <expand macro="extra_requirements"/>
     </requirements>
 
     <command detect_errors="exit_code"><![CDATA[
@@ -80,7 +81,12 @@
             <param name="all_adducts" value="all_adducts.csv"/>
             <param name="MS2_DB" value="MS2_DB.csv"/>
             <param name="ppm" value="3"/>
-            <output name="MS2_annotations" file="MS2_annotations.csv"/>
+            <output name="MS2_annotations">
+                <assert_contents>
+                    <has_n_columns n="13"  sep=","/>
+                    <has_n_lines n="158" delta="5" />
+                </assert_contents>
+            </output>
         </test>
     </tests>
 

diff --git a/tools/ipapy2/ipapy2_clustering.xml b/tools/ipapy2/ipapy2_clustering.xml
@@ -4,6 +4,7 @@
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
+        <expand macro="extra_requirements"/>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
         python3 '${__tool_directory__}/ipapy2_clustering.py'

diff --git a/tools/ipapy2/ipapy2_compute_all_adducts.xml b/tools/ipapy2/ipapy2_compute_all_adducts.xml
@@ -5,6 +5,7 @@
 
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
+        <expand macro="extra_requirements"/>
     </requirements>
 
     <command detect_errors="exit_code"><![CDATA[

diff --git a/tools/ipapy2/ipapy2_compute_bio.py b/tools/ipapy2/ipapy2_compute_bio.py
@@ -1,27 +1,29 @@
 import argparse
-import os
-import pandas as pd
 from ipaPy2 import ipa
+from utils import LoadDataAction, StoreOutputAction, group_by_peak_id
 
 
-def main(args):
-    MS1_DB = pd.read_csv(args.MS1_DB)
-    MS1_DB = MS1_DB.replace("", None)
+def main(
+    input_dataset_database,
+    input_dataset_annotations,
+    biochemical_mode,
+    connection_list,
+    output_dataset,
+    ncores,
+):
+    """
+    Compute matrix of biochemical connections. Either based on a list of
+    possible connections in the form of a list of formulas or based on the
+    reactions present in the database.
+    """
 
-    if args.annotations:
-        annotations_df = pd.read_csv(args.annotations, keep_default_na=False)
-        annotations_df = annotations_df.replace("", None)
-        annotations = {}
-        keys = set(annotations_df["peak_id"])
-        for i in keys:
-            annotations[i] = annotations_df[annotations_df["peak_id"] == i].drop(
-                "peak_id", axis=1
-            )
+    if input_dataset_annotations is not None:
+        annotations = group_by_peak_id(input_dataset_annotations)
     else:
         annotations = None
 
-    if args.biochemical_mode == "connections" and args.connection_list:
-        connections = args.connection_list
+    if biochemical_mode == "connections" and connection_list:
+        connections = connection_list
     else:
         connections = [
             "C3H5NO",
@@ -110,29 +112,34 @@ def main(args):
         ]
 
     Bio = ipa.Compute_Bio(
-        MS1_DB,
+        input_dataset_database,
         annotations=annotations,
-        mode=args.biochemical_mode,
+        mode=biochemical_mode,
         connections=connections,
-        ncores=int(os.environ.get("GALAXY_SLOTS")),
+        ncores=ncores,
     )
-    Bio.to_csv(args.compute_bio_output, index=False)
+    write_func, file_path = output_dataset
+    write_func(Bio, file_path)
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(
-        description="cluster features before IPA pipeline."
+        description=""" Compute matrix of biochemical connections. Either based on a list of
+    possible connections in the form of a list of formulas or based on the
+    reactions present in the database."""
     )
     parser.add_argument(
-        "--MS1_DB",
-        type=str,
+        "--input_dataset_database",
+        nargs=2,
+        action=LoadDataAction,
         required=True,
-        help="a dataframe containing the measured intensities across several samples.",
+        help="a datset containing the database against which the annotationis performed.",
     )
     parser.add_argument(
-        "--annotations",
-        type=str,
-        help="a dataframe containing the annotations of the features.",
+        "--input_dataset_annotations",
+        nargs=2,
+        action=LoadDataAction,
+        help="a datset containing the annotations of the features.",
     )
     parser.add_argument(
         "--biochemical_mode",
@@ -144,11 +151,25 @@ def main(args):
         "--connection_list", type=str, help="intensity mode. Default 'max' or 'ave'."
     )
     parser.add_argument(
-        "--compute_bio_output",
-        type=str,
+        "--output_dataset",
+        nargs=2,
+        action=StoreOutputAction,
         required=True,
         help="Output file path for the dataframe.",
     )
+    parser.add_argument(
+        "--ncores",
+        type=int,
+        default=None,
+        help="number of cores to use for the computation.",
+    )
     args = parser.parse_args()
 
-    main(args)
+    main(
+        args.input_dataset_database,
+        args.input_dataset_annotations,
+        args.biochemical_mode,
+        args.connection_list,
+        args.output_dataset,
+        args.ncores,
+    )
diff --git a/tools/ipapy2/ipapy2_compute_bio.xml b/tools/ipapy2/ipapy2_compute_bio.xml
@@ -5,17 +5,19 @@
 
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
+        <expand macro="extra_requirements"/>
     </requirements>
 
     <command detect_errors="exit_code"><![CDATA[
         python3 '${__tool_directory__}/ipapy2_compute_bio.py'
-        --MS1_DB '${MS1_DB}'
-        --annotations '${annotations}'
+        --input_dataset_database '${MS1_DB}' '${MS1_DB.ext}'
+        --input_dataset_annotations '${annotations}' '${annotations.ext}'
         --biochemical_mode '${biochemical_mode.biochemical_mode}'
         #if $biochemical_mode.biochemical_mode == "connections"
             --connection_list '${biochemical_mode.connection_list}'
         #end if
-        --compute_bio_output "${compute_bio_output}"
+        --output_dataset "${compute_bio_output}" "${compute_bio_output.ext}"
+        --ncores \${GALAXY_SLOTS:-1}
     ]]></command>
 
     <inputs>
@@ -37,7 +39,7 @@
     </inputs>
 
     <outputs>
-        <data label="${tool.name} on ${on_string}" name="compute_bio_output" format="csv,tsv,tabular,parquet"/>
+        <data label="${tool.name} on ${on_string}" name="compute_bio_output" format_source="MS1_DB"/>
     </outputs>
 
     <tests>