-
Notifications
You must be signed in to change notification settings - Fork 101
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
RNA pipeline with adapter clipping (#662)
* enable adapter clipping * Bring fastp tool into warp managed docker images * add fastQC as output to pipeline * Update to public version of Illumina_adapters * add new outputs to TDR, update docker tag for ingest script * sort unmapped just in case and fastp disable length filtering * round fastqc_percent_reads_with_adapter to 5 digits
- Loading branch information
1 parent
956a82d
commit 000b278
Showing
22 changed files
with
644 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
FROM debian:bullseye-slim | ||
|
||
ARG FASTP_VERSION=0.20.1 | ||
|
||
ENV TERM=xterm-256color \ | ||
FASTP_URL=http://opengene.org/fastp/fastp.${FASTP_VERSION} \ | ||
TINI_VERSION=v0.19.0 | ||
|
||
LABEL MAINTAINER="Broad Institute DSDE <dsde-engineering@broadinstitute.org>" \ | ||
FASTP_VERSION=${FASTP_VERSION} | ||
|
||
WORKDIR /usr/gitc | ||
|
||
# Install dependencies | ||
|
||
RUN set -eux; \ | ||
apt-get update; \ | ||
apt-get install -y \ | ||
wget \ | ||
; \ | ||
# Install fastp | ||
wget ${FASTP_URL} ;\ | ||
\ | ||
mv fastp.${FASTP_VERSION} /usr/local/bin/fastp; \ | ||
chmod a+x /usr/local/bin/fastp \ | ||
; \ | ||
# Install tini | ||
wget https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini -O /sbin/tini; \ | ||
chmod +x /sbin/tini \ | ||
; \ | ||
# Clean up cached files | ||
apt-get clean && rm -rf /var/lib/apt/lists/* | ||
|
||
# Set tini as default entrypoint | ||
ENTRYPOINT [ "/sbin/tini", "--" ] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# RNA Seq fastp | ||
|
||
## Quick reference | ||
|
||
Copy and paste to pull this image | ||
|
||
#### `us.gcr.io/broad-gotc-prod/fastp:1.0.0-0.20.1-1649253500` | ||
|
||
- __What is this image:__ This image is a lightweight debian based image for running the fastp tool set within our RNA sequencing pipeline. | ||
- __What is fastp:__ fastp from OpenGene is a tool designed to provide fast all-in-one preprocessing for FastQ files. See [here](https://github.com/OpenGene/fastp) for more information. | ||
- __How to see tool version used in image:__ Please see below. | ||
|
||
## Versioning | ||
|
||
fastp uses the following convention for versioning: | ||
|
||
#### `us.gcr.io/broad-gotc-prod/fastp:<image-version>-<fastp-version>-<unix-timestamp>` | ||
|
||
We keep track of all past versions in [docker_versions](docker_versions.tsv) with the last image listed being the currently used version in WARP. | ||
|
||
You can see more information about the image, including the tool versions, by running the following command: | ||
|
||
```bash | ||
$ docker pull us.gcr.io/broad-gotc-prod/fastp:1.0.0-0.20.1-1649253500 | ||
$ docker inspect us.gcr.io/broad-gotc-prod/fastp:1.0.0-0.20.1-1649253500 | ||
``` | ||
|
||
## Usage | ||
|
||
```bash | ||
$ docker run --rm -it \ | ||
us.gcr.io/broad-gotc-prod/fastp:1.0.0-0.20.1-1649253500 fastp | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
set -e | ||
|
||
# Update verson when changes to Dockerfile are made | ||
DOCKER_IMAGE_VERSION=1.0.0 | ||
TIMESTAMP=$(date +"%s") | ||
DIR=$(cd $(dirname $0) && pwd) | ||
|
||
# Registries and tags | ||
GCR_URL="us.gcr.io/broad-gotc-prod/fastp" | ||
QUAY_URL="quay.io/broadinstitute/gotc-prod-fastp" | ||
|
||
# Fgbio version | ||
FASTP_VERSION="0.20.1" | ||
|
||
# Necessary tools and help text | ||
TOOLS=(docker gcloud) | ||
HELP="$(basename "$0") [-h|--help] [-t|--tools] -- script to build the fastp image and push to GCR & Quay | ||
where: | ||
-h|--help Show help text | ||
-v|--version of fastp to use (default: $FGBIO_VERSION) | ||
-t|--tools Show tools needed to run script | ||
" | ||
|
||
function main(){ | ||
for t in "${TOOLS[@]}"; do which $t >/dev/null || ok=no; done | ||
if [[ $ok == no ]]; then | ||
echo "Missing one of the following tools: " | ||
for t in "${TOOLS[@]}"; do echo "$t"; done | ||
exit 1 | ||
fi | ||
|
||
while [[ $# -gt 0 ]] | ||
do | ||
key="$1" | ||
case $key in | ||
-v|--version) | ||
FASTP_VERSION="$2" | ||
shift | ||
shift | ||
;; | ||
-h|--help) | ||
echo "$HELP" | ||
exit 0 | ||
;; | ||
-t|--tools) | ||
for t in "${TOOLS[@]}"; do echo $t; done | ||
exit 0 | ||
;; | ||
*) | ||
shift | ||
;; | ||
esac | ||
done | ||
|
||
IMAGE_TAG="$DOCKER_IMAGE_VERSION-$FASTP_VERSION-$TIMESTAMP" | ||
|
||
echo "building and pushing GCR Image - $GCR_URL:$IMAGE_TAG" | ||
docker build --no-cache -t "$GCR_URL:$IMAGE_TAG" \ | ||
--build-arg FASTP_VERSION="$FASTP_VERSION" "$DIR" | ||
docker push "$GCR_URL:$IMAGE_TAG" | ||
|
||
#echo "tagging and pushing Quay Image" | ||
#docker tag "$GCR_URL:$IMAGE_TAG" "$QUAY_URL:$IMAGE_TAG" | ||
#docker push "$QUAY_URL:$IMAGE_TAG" | ||
|
||
echo "$GCR_URL:$IMAGE_TAG" >> "$DIR/docker_versions.tsv" | ||
echo "done" | ||
} | ||
|
||
main "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
us.gcr.io/broad-gotc-prod/fastp:1.0.0-0.20.1-1649253500 |
5 changes: 5 additions & 0 deletions
5
pipelines/broad/internal/rna_seq/BroadInternalRNAWithUMIs.changelog.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
...ad/internal/rna_seq/test_inputs/Plumbing/SM-K4Y2X_350ng_.65X_D3.hg19.fastqs.plumbing.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"BroadInternalRNAWithUMIs.sample_lsid": "broadinstitute.org:bsp.prod.sample:K4Y2X", | ||
"BroadInternalRNAWithUMIs.output_basename": "SM-K4Y2X_350ng_.65X_D3", | ||
|
||
"BroadInternalRNAWithUMIs.reference_build": "hg19", | ||
|
||
"BroadInternalRNAWithUMIs.r1_fastq": "gs://broad-gotc-test-storage/rna_seq/rna_with_umis/plumbing/inputs/SM-K4Y2X_350ng_.65X_D3_read1_plumbing.fastq", | ||
"BroadInternalRNAWithUMIs.r2_fastq": "gs://broad-gotc-test-storage/rna_seq/rna_with_umis/plumbing/inputs/SM-K4Y2X_350ng_.65X_D3_read2_plumbing.fastq", | ||
"BroadInternalRNAWithUMIs.read1Structure": "3M2S146T", | ||
"BroadInternalRNAWithUMIs.read2Structure": "3M2S146T", | ||
"BroadInternalRNAWithUMIs.library_name": "SM-K4Y2X_350ng_.65X_D3", | ||
"BroadInternalRNAWithUMIs.platform": "ILLUMINA", | ||
"BroadInternalRNAWithUMIs.platform_unit": "barcode1", | ||
"BroadInternalRNAWithUMIs.read_group_name": "RG1", | ||
|
||
"BroadInternalRNAWithUMIs.vault_token_path": "{VAULT_TOKEN_PATH}", | ||
"BroadInternalRNAWithUMIs.environment": "{ENV}" | ||
} |
18 changes: 18 additions & 0 deletions
18
...ad/internal/rna_seq/test_inputs/Plumbing/SM-K4Y2X_350ng_.65X_D3.hg38.fastqs.plumbing.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"BroadInternalRNAWithUMIs.sample_lsid": "broadinstitute.org:bsp.prod.sample:K4Y2X", | ||
"BroadInternalRNAWithUMIs.output_basename": "SM-K4Y2X_350ng_.65X_D3", | ||
|
||
"BroadInternalRNAWithUMIs.reference_build": "hg38", | ||
|
||
"BroadInternalRNAWithUMIs.r1_fastq": "gs://broad-gotc-test-storage/rna_seq/rna_with_umis/plumbing/inputs/SM-K4Y2X_350ng_.65X_D3_read1_plumbing.fastq", | ||
"BroadInternalRNAWithUMIs.r2_fastq": "gs://broad-gotc-test-storage/rna_seq/rna_with_umis/plumbing/inputs/SM-K4Y2X_350ng_.65X_D3_read2_plumbing.fastq", | ||
"BroadInternalRNAWithUMIs.read1Structure": "3M2S146T", | ||
"BroadInternalRNAWithUMIs.read2Structure": "3M2S146T", | ||
"BroadInternalRNAWithUMIs.library_name": "SM-K4Y2X_350ng_.65X_D3", | ||
"BroadInternalRNAWithUMIs.platform": "ILLUMINA", | ||
"BroadInternalRNAWithUMIs.platform_unit": "barcode1", | ||
"BroadInternalRNAWithUMIs.read_group_name": "RG1", | ||
|
||
"BroadInternalRNAWithUMIs.vault_token_path": "{VAULT_TOKEN_PATH}", | ||
"BroadInternalRNAWithUMIs.environment": "{ENV}" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.