Skip to content

Commit

Permalink
finished refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
acquayefrank committed Jan 31, 2025
1 parent 4cfdafd commit b3b9e04
Show file tree
Hide file tree
Showing 14 changed files with 356 additions and 202 deletions.
1 change: 1 addition & 0 deletions tools/ipapy2/ipapy2_MS1_annotation.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

<requirements>
<requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
<expand macro="extra_requirements"/>
</requirements>

<command detect_errors="exit_code"><![CDATA[
Expand Down
8 changes: 7 additions & 1 deletion tools/ipapy2/ipapy2_MS2_annotation.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

<requirements>
<requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
<expand macro="extra_requirements"/>
</requirements>

<command detect_errors="exit_code"><![CDATA[
Expand Down Expand Up @@ -80,7 +81,12 @@
<param name="all_adducts" value="all_adducts.csv"/>
<param name="MS2_DB" value="MS2_DB.csv"/>
<param name="ppm" value="3"/>
<output name="MS2_annotations" file="MS2_annotations.csv"/>
<output name="MS2_annotations">
<assert_contents>
<has_n_columns n="13" sep=","/>
<has_n_lines n="158" delta="5" />
</assert_contents>
</output>
</test>
</tests>

Expand Down
1 change: 1 addition & 0 deletions tools/ipapy2/ipapy2_clustering.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
</macros>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
<expand macro="extra_requirements"/>
</requirements>
<command detect_errors="exit_code"><![CDATA[
python3 '${__tool_directory__}/ipapy2_clustering.py'
Expand Down
1 change: 1 addition & 0 deletions tools/ipapy2/ipapy2_compute_all_adducts.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

<requirements>
<requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
<expand macro="extra_requirements"/>
</requirements>

<command detect_errors="exit_code"><![CDATA[
Expand Down
81 changes: 51 additions & 30 deletions tools/ipapy2/ipapy2_compute_bio.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
import argparse
import os
import pandas as pd
from ipaPy2 import ipa
from utils import LoadDataAction, StoreOutputAction, group_by_peak_id


def main(args):
MS1_DB = pd.read_csv(args.MS1_DB)
MS1_DB = MS1_DB.replace("", None)
def main(
input_dataset_database,
input_dataset_annotations,
biochemical_mode,
connection_list,
output_dataset,
ncores,
):
"""
Compute matrix of biochemical connections. Either based on a list of
possible connections in the form of a list of formulas or based on the
reactions present in the database.
"""

if args.annotations:
annotations_df = pd.read_csv(args.annotations, keep_default_na=False)
annotations_df = annotations_df.replace("", None)
annotations = {}
keys = set(annotations_df["peak_id"])
for i in keys:
annotations[i] = annotations_df[annotations_df["peak_id"] == i].drop(
"peak_id", axis=1
)
if input_dataset_annotations is not None:
annotations = group_by_peak_id(input_dataset_annotations)
else:
annotations = None

if args.biochemical_mode == "connections" and args.connection_list:
connections = args.connection_list
if biochemical_mode == "connections" and connection_list:
connections = connection_list
else:
connections = [
"C3H5NO",
Expand Down Expand Up @@ -110,29 +112,34 @@ def main(args):
]

Bio = ipa.Compute_Bio(
MS1_DB,
input_dataset_database,
annotations=annotations,
mode=args.biochemical_mode,
mode=biochemical_mode,
connections=connections,
ncores=int(os.environ.get("GALAXY_SLOTS")),
ncores=ncores,
)
Bio.to_csv(args.compute_bio_output, index=False)
write_func, file_path = output_dataset
write_func(Bio, file_path)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="cluster features before IPA pipeline."
description=""" Compute matrix of biochemical connections. Either based on a list of
possible connections in the form of a list of formulas or based on the
reactions present in the database."""
)
parser.add_argument(
"--MS1_DB",
type=str,
"--input_dataset_database",
nargs=2,
action=LoadDataAction,
required=True,
help="a dataframe containing the measured intensities across several samples.",
help="a datset containing the database against which the annotationis performed.",
)
parser.add_argument(
"--annotations",
type=str,
help="a dataframe containing the annotations of the features.",
"--input_dataset_annotations",
nargs=2,
action=LoadDataAction,
help="a datset containing the annotations of the features.",
)
parser.add_argument(
"--biochemical_mode",
Expand All @@ -144,11 +151,25 @@ def main(args):
"--connection_list", type=str, help="intensity mode. Default 'max' or 'ave'."
)
parser.add_argument(
"--compute_bio_output",
type=str,
"--output_dataset",
nargs=2,
action=StoreOutputAction,
required=True,
help="Output file path for the dataframe.",
)
parser.add_argument(
"--ncores",
type=int,
default=None,
help="number of cores to use for the computation.",
)
args = parser.parse_args()

main(args)
main(
args.input_dataset_database,
args.input_dataset_annotations,
args.biochemical_mode,
args.connection_list,
args.output_dataset,
args.ncores,
)
10 changes: 6 additions & 4 deletions tools/ipapy2/ipapy2_compute_bio.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@

<requirements>
<requirement type="package" version="@TOOL_VERSION@">ipapy2</requirement>
<expand macro="extra_requirements"/>
</requirements>

<command detect_errors="exit_code"><![CDATA[
python3 '${__tool_directory__}/ipapy2_compute_bio.py'
--MS1_DB '${MS1_DB}'
--annotations '${annotations}'
--input_dataset_database '${MS1_DB}' '${MS1_DB.ext}'
--input_dataset_annotations '${annotations}' '${annotations.ext}'
--biochemical_mode '${biochemical_mode.biochemical_mode}'
#if $biochemical_mode.biochemical_mode == "connections"
--connection_list '${biochemical_mode.connection_list}'
#end if
--compute_bio_output "${compute_bio_output}"
--output_dataset "${compute_bio_output}" "${compute_bio_output.ext}"
--ncores \${GALAXY_SLOTS:-1}
]]></command>

<inputs>
Expand All @@ -37,7 +39,7 @@
</inputs>

<outputs>
<data label="${tool.name} on ${on_string}" name="compute_bio_output" format="csv,tsv,tabular,parquet"/>
<data label="${tool.name} on ${on_string}" name="compute_bio_output" format_source="MS1_DB"/>
</outputs>

<tests>
Expand Down
Loading

0 comments on commit b3b9e04

Please sign in to comment.