-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #682 from kbase/dev_add_eggnog_container
add eggnog
- Loading branch information
Showing
7 changed files
with
142 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
name: Build & Push eggNOG Image to GHCR | ||
|
||
on: | ||
pull_request: | ||
types: | ||
- opened | ||
- reopened | ||
- synchronize | ||
- ready_for_review | ||
paths: | ||
- 'src/loaders/compute_tools/eggnog/versions.yaml' | ||
- '.github/workflows/build-push-eggnog-image.yml' | ||
- '.github/workflows/build-push-tool-images.yml' | ||
|
||
push: | ||
branches: | ||
- main | ||
- master | ||
- develop | ||
paths: | ||
- 'src/loaders/compute_tools/eggnog/versions.yaml' | ||
- '.github/workflows/build-push-eggnog-image.yml' | ||
- '.github/workflows/build-push-tool-images.yml' | ||
|
||
jobs: | ||
trigger-build-push: | ||
uses: ./.github/workflows/build-push-tool-images.yml | ||
with: | ||
tool_name: eggnog | ||
version_file: 'src/loaders/compute_tools/eggnog/versions.yaml' | ||
secrets: inherit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
FROM continuumio/miniconda3:24.1.2-0 | ||
|
||
ENV EGGNOG_VER 2.1.12 | ||
ENV CONDA_ENV eggnog-$EGGNOG_VER | ||
ENV PYTHON_VER 3.11 | ||
|
||
RUN conda config --add channels bioconda | ||
RUN conda config --add channels conda-forge | ||
|
||
RUN conda create -n $CONDA_ENV python=$PYTHON_VER | ||
RUN conda install -n $CONDA_ENV -c conda-forge -c bioconda eggnog-mapper=$EGGNOG_VER | ||
RUN conda install -n $CONDA_ENV pandas=2.2.1 jsonlines=2.0.0 | ||
|
||
RUN echo "source activate $CONDA_ENV" >> ~/.bashrc | ||
|
||
# eggNOG annotation DB is pre-downloaded at /global/cfs/cdirs/kbase/collections/libraries/eggnog/5.0.2 | ||
# following instructions at https://github.com/eggnogdb/eggnog-mapper/wiki/eggNOG-mapper-v2.1.5-to-v2.1.12#setup | ||
# Mount the annotation DB directory to /reference_data when running the container | ||
ENV EGGNOG_DATA_DIR /reference_data | ||
|
||
RUN mkdir -p /app | ||
COPY ./ /app/collections | ||
# slows down that chmod step if left in place | ||
RUN rm -r /app/collections/.git | ||
|
||
ENV PYTHONPATH /app/collections | ||
|
||
WORKDIR /app | ||
|
||
ENV PY_SCRIPT=/app/collections/src/loaders/compute_tools/eggnog/eggnog.py | ||
|
||
RUN chmod -R 777 /app/collections | ||
|
||
ENTRYPOINT ["/app/collections/src/loaders/compute_tools/entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
|
||
# eggNOG tool | ||
|
||
## Overview | ||
The eggNOG tool is designed to utilize the collections infrastructure for execution and storage of result data. | ||
|
||
This tool is exclusively intended for use with the CDM project. | ||
|
||
The Collections parser program ([parse_tool_results.py](../../genome_collection/parse_tool_results.py)) will skip parsing the result files generated by this tool, as the result data is | ||
specifically tailored for the CDM project. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
""" | ||
Run eggNOG tool on a set of faa files. | ||
This tool serves a distinct purpose separate from collection tools; instead, it is suited for CDM work. | ||
Therefore, the parser program is not compatible with data generated by this tool. | ||
""" | ||
import json | ||
from pathlib import Path | ||
|
||
from src.loaders.common.loader_common_names import EGGNOG_METADATA | ||
from src.loaders.compute_tools.tool_common import ToolRunner, run_command | ||
|
||
INPUT_TYPE = 'proteins' | ||
THREADS = 8 | ||
|
||
|
||
def _run_eggnog_single( | ||
tool_safe_data_id: str, | ||
data_id: str, | ||
source_file: Path, | ||
output_dir: Path, | ||
debug: bool) -> None: | ||
|
||
metadata_file = output_dir / EGGNOG_METADATA | ||
if metadata_file.exists(): | ||
print(f"Skipping {source_file} as it has already been processed.") | ||
return | ||
|
||
# RUN eggNOG for a single genome | ||
command = ['emapper.py', | ||
'-i', source_file, # Input file. | ||
'-o', output_dir / source_file.name, # Output prefix. | ||
# Save result file to collectiondata directory. Expecting 'emapper.annotations', 'emapper.hits' and 'emapper.seed_orthologs' files. | ||
'--itype', f'{INPUT_TYPE}', | ||
'--cpu', f'{THREADS}', | ||
'--excel', | ||
'--sensmode', 'fast', | ||
'--dmnd_iterate', 'no', | ||
'--override' # Overwrites output files if they exist from previous runs. | ||
] | ||
|
||
run_command(command, output_dir if debug else None) | ||
|
||
# Save run info to a metadata file in the output directory for parsing later | ||
metadata = {'source_file': str(source_file), | ||
'input_type': INPUT_TYPE} | ||
with open(metadata_file, 'w') as f: | ||
json.dump(metadata, f, indent=4) | ||
|
||
|
||
def main(): | ||
runner = ToolRunner("eggnog") | ||
runner.parallel_single_execution(_run_eggnog_single, unzip=True) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
versions: | ||
- version: 0.1.0 | ||
date: 2024-03-13 | ||
reference_db_version: 5.0.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters