Skip to content

Commit

Permalink
Merge pull request #748 from kbase/dev_add_bbmap_tool
Browse files Browse the repository at this point in the history
add bbmap tool
  • Loading branch information
Tianhao-Gu authored Aug 21, 2024
2 parents 4c4a7f9 + 8e91382 commit 758e036
Show file tree
Hide file tree
Showing 10 changed files with 339 additions and 158 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/build-push-bbmap-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
name: Build & Push BBMap Image to GHCR

on:
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
paths:
- 'src/loaders/compute_tools/bbmap/versions.yaml'
- '.github/workflows/build-push-bbmap-image.yml'
- '.github/workflows/build-push-tool-images.yml'

push:
branches:
- main
- master
- develop
paths:
- 'src/loaders/compute_tools/bbmap/versions.yaml'
- '.github/workflows/build-push-bbmap-image.yml'
- '.github/workflows/build-push-tool-images.yml'

jobs:
trigger-build-push:
uses: ./.github/workflows/build-push-tool-images.yml
with:
tool_name: bbmap
version_file: 'src/loaders/compute_tools/bbmap/versions.yaml'
secrets: inherit
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ fastapi = "==0.112.1"
uvicorn = {version = "==0.30.6", extras = ["standard"]}
jsonlines = "==4.0.0"
cacheout = "==0.16.0"
aiohttp = "==3.10.3"
aiohttp = "==3.10.5"
requests = "==2.32.3"
jinja-cli = "==1.2.2"
pandas = "==2.2.2"
Expand Down
339 changes: 184 additions & 155 deletions Pipfile.lock

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
# KBase Collections Release Notes

## 0.1.3

* Added BBMap tool to the CDM pipeline.
* Included metadata file generation after each tool's execution.
* Updated Python library dependencies to the latest versions.
* Standardized thread management logic across all tools.
* Pass `job_id` to the tool container and remove `node_id`.
* Converted Data IDs to string format to ensure proper comparison with associated folder names
* Fix `kbase_id` formate for biolog parser script
* Added a guide for process from KBase Genome to Collections.
* The task generator now asks for confirmation before submitting a job.

## 0.1.2

* Fixed a bug that caused requests with filters to fail for filter keys containing colons.
Expand Down
2 changes: 1 addition & 1 deletion src/common/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
The version of the KBase collections software.
'''

VERSION = "0.1.2"
VERSION = "0.1.3"
32 changes: 32 additions & 0 deletions src/loaders/compute_tools/bbmap/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM continuumio/miniconda3:24.5.0-0

# NOTE: If the tool version changes, ensure the metadata information saved after running the tool in the _run_bbmap_single method is updated
ARG BBMAP_VER=39.06
ENV CONDA_ENV bbmap-$BBMAP_VER

# Add Bioconda and Conda-Forge channels
RUN conda config --add channels bioconda
RUN conda config --add channels conda-forge

# Install BBMap
ARG PYTHON_VER=3.11
RUN conda create -n $CONDA_ENV python=$PYTHON_VER bbmap=$BBMAP_VER
RUN conda install -n $CONDA_ENV pandas=2.2.2 jsonlines=4.0.0

# Activate the environment
RUN echo "source activate $CONDA_ENV" >> ~/.bashrc

# Set up directories
RUN mkdir -p /app
COPY ./ /app/collections
RUN rm -r /app/collections/.git


ENV PYTHONPATH /app/collections
WORKDIR /app

ENV PY_SCRIPT=/app/collections/src/loaders/compute_tools/bbmap/bbmap.py

RUN chmod -R 777 /app/collections

ENTRYPOINT ["/app/collections/src/loaders/compute_tools/entrypoint.sh"]
66 changes: 66 additions & 0 deletions src/loaders/compute_tools/bbmap/bbmap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
Run BBMap tool on a set of fna files.
This tool serves a distinct purpose separate from collection tools; instead, it is suited for CDM work.
Therefore, the parser program is not compatible with data generated by this tool.
"""
import time
from pathlib import Path

from src.loaders.common.loader_common_names import TOOL_METADATA
from src.loaders.compute_tools.tool_common import ToolRunner, run_command, create_tool_metadata


def _run_bbmap_single(
tool_safe_data_id: str,
data_id: str,
source_file: Path,
output_dir: Path,
threads_per_tool_run: int,
debug: bool) -> None:
start = time.time()
print(f'Start executing BBMap for {data_id}')

metadata_file = output_dir / TOOL_METADATA
if metadata_file.exists():
print(f"Skipping {source_file} as it has already been processed.")
return

command = [
'stats.sh',
'in=' + str(source_file),
'out=' + str(output_dir / 'result.json'),
'format=8', # output in JSON format
'overwrite=true'
]

run_command(command, output_dir if debug else None)

end_time = time.time()
run_time = end_time - start
print(
f'Used {round(run_time / 60, 2)} minutes to execute BBMap for {data_id}')

# Save run info to a metadata file in the output directory for parsing later
additional_metadata = {
'source_file': str(source_file),
'data_id': data_id,
}
create_tool_metadata(
output_dir,
tool_name="bbmap",
version="39.06",
command=command,
run_time=round(run_time, 2),
batch_size=1,
additional_metadata=additional_metadata)


def main():
runner = ToolRunner("bbmap")
runner.parallel_single_execution(_run_bbmap_single)


if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions src/loaders/compute_tools/bbmap/versions.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# This tool serves a distinct purpose separate from collection tools; instead, it is suited for CDM work.
# Therefore, the parser program is not compatible with data generated by this tool.

versions:
- version: 0.1.0
date: 2024-08-16
notes: |
- initial BBMap implementation
3 changes: 3 additions & 0 deletions src/loaders/compute_tools/eggnog/versions.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# This tool serves a distinct purpose separate from collection tools; instead, it is suited for CDM work.
# Therefore, the parser program is not compatible with data generated by this tool.

versions:
- version: 0.1.0
date: 2024-03-13
Expand Down
2 changes: 1 addition & 1 deletion src/loaders/jobs/taskfarmer/task_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
'''

TOOLS_AVAILABLE = ['gtdb_tk', 'checkm2', 'microtrait', 'mash', 'eggnog']
TOOLS_AVAILABLE = ['gtdb_tk', 'checkm2', 'microtrait', 'mash', 'eggnog', 'bbmap']

NODE_TIME_LIMIT_DEFAULT = 5 # hours
# Used as THREADS variable in the batch script which controls the number of parallel tasks per node
Expand Down

0 comments on commit 758e036

Please sign in to comment.