From 0b5209ddd283219c6d2e096c6bdeba0a44ae1631 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 16 Feb 2024 16:35:22 -0800 Subject: [PATCH 1/4] removed wdl-common --- workflows/wdl-common/LICENSE | 34 --- workflows/wdl-common/README.md | 5 - workflows/wdl-common/wdl/structs.wdl | 22 -- .../wdl-common/wdl/tasks/bcftools_stats.wdl | 52 ---- workflows/wdl-common/wdl/tasks/glnexus.wdl | 70 ----- workflows/wdl-common/wdl/tasks/mosdepth.wdl | 50 ---- workflows/wdl-common/wdl/tasks/pbsv_call.wdl | 56 ---- .../wdl-common/wdl/tasks/pbsv_discover.wdl | 49 ---- workflows/wdl-common/wdl/tasks/pharmcat.wdl | 272 ------------------ .../wdl-common/wdl/tasks/samtools_fasta.wdl | 45 --- .../wdl/tasks/whatshap_haplotag.wdl | 66 ----- .../wdl-common/wdl/tasks/whatshap_phase.wdl | 58 ---- .../wdl-common/wdl/tasks/whatshap_stats.wdl | 51 ---- .../wdl-common/wdl/tasks/zip_index_vcf.wdl | 53 ---- .../backend_configuration.wdl | 119 -------- .../wdl/workflows/deepvariant/deepvariant.wdl | 269 ----------------- .../wdl/workflows/deepvariant/inputs.json | 23 -- .../wdl/workflows/phase_vcf/inputs.json | 40 --- .../wdl/workflows/phase_vcf/phase_vcf.wdl | 182 ------------ 19 files changed, 1516 deletions(-) delete mode 100644 workflows/wdl-common/LICENSE delete mode 100644 workflows/wdl-common/README.md delete mode 100644 workflows/wdl-common/wdl/structs.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/bcftools_stats.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/glnexus.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/mosdepth.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/pbsv_call.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/pbsv_discover.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/pharmcat.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/samtools_fasta.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/whatshap_haplotag.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/whatshap_phase.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/whatshap_stats.wdl delete mode 100644 workflows/wdl-common/wdl/tasks/zip_index_vcf.wdl delete mode 100644 workflows/wdl-common/wdl/workflows/backend_configuration/backend_configuration.wdl delete mode 100644 workflows/wdl-common/wdl/workflows/deepvariant/deepvariant.wdl delete mode 100644 workflows/wdl-common/wdl/workflows/deepvariant/inputs.json delete mode 100644 workflows/wdl-common/wdl/workflows/phase_vcf/inputs.json delete mode 100644 workflows/wdl-common/wdl/workflows/phase_vcf/phase_vcf.wdl diff --git a/workflows/wdl-common/LICENSE b/workflows/wdl-common/LICENSE deleted file mode 100644 index aaea0c1..0000000 --- a/workflows/wdl-common/LICENSE +++ /dev/null @@ -1,34 +0,0 @@ -Copyright (c) 2023, Pacific Biosciences of California, Inc. - -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted (subject to the limitations in the -disclaimer below) provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. - - * Neither the name of Pacific Biosciences nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE -GRANTED BY THIS LICENSE. THIS SOFTWARE IS PROVIDED BY PACIFIC -BIOSCIENCES AND ITS CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL PACIFIC BIOSCIENCES OR ITS -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -SUCH DAMAGE. diff --git a/workflows/wdl-common/README.md b/workflows/wdl-common/README.md deleted file mode 100644 index 07dcb73..0000000 --- a/workflows/wdl-common/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# wdl-common - -Workflows and tasks reused across PacBio workflows. - -**The WDL files here are under active development and are currently provided in an unsupported format.** diff --git a/workflows/wdl-common/wdl/structs.wdl b/workflows/wdl-common/wdl/structs.wdl deleted file mode 100644 index 3a5284e..0000000 --- a/workflows/wdl-common/wdl/structs.wdl +++ /dev/null @@ -1,22 +0,0 @@ -version 1.0 - -struct IndexData { - File data - File data_index -} - -struct DeepVariantModel { - IndexData model - File metadata -} - -struct RuntimeAttributes { - # The number of times to retry a task that fails due to preemption - Int preemptible_tries - # The number of times to retry a task that fails due a to nonzero return code - Int max_retries - - String zones - String queue_arn - String container_registry -} \ No newline at end of file diff --git a/workflows/wdl-common/wdl/tasks/bcftools_stats.wdl b/workflows/wdl-common/wdl/tasks/bcftools_stats.wdl deleted file mode 100644 index 93db9af..0000000 --- a/workflows/wdl-common/wdl/tasks/bcftools_stats.wdl +++ /dev/null @@ -1,52 +0,0 @@ -version 1.0 - -# Calculate VCF stats - -import "../structs.wdl" - -task bcftools_stats { - input { - File vcf - String? params - - File? reference - - RuntimeAttributes runtime_attributes - } - - String vcf_basename = basename(vcf, ".gz") - - Int threads = 2 - Int reference_size = if (defined(reference)) then ceil(size(reference, "GB")) else 0 - Int disk_size = ceil((size(vcf, "GB") + reference_size) * 2 + 20) - - command <<< - set -euo pipefail - - bcftools --help - - bcftools stats \ - --threads ~{threads - 1} \ - ~{params} \ - ~{"--fasta-ref " + reference} \ - ~{vcf} \ - > ~{vcf_basename}.stats.txt - >>> - - output { - File stats = "~{vcf_basename}.stats.txt" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/bcftools@sha256:36d91d5710397b6d836ff87dd2a924cd02fdf2ea73607f303a8544fbac2e691f" - cpu: threads - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/glnexus.wdl b/workflows/wdl-common/wdl/tasks/glnexus.wdl deleted file mode 100644 index 49b49fc..0000000 --- a/workflows/wdl-common/wdl/tasks/glnexus.wdl +++ /dev/null @@ -1,70 +0,0 @@ -version 1.0 - -# Run joint calling using GLnexus - -import "../structs.wdl" - -task glnexus { - input { - String cohort_id - Array[File] gvcfs - Array[File] gvcf_indices - - String reference_name - - File? regions_bed - - Int mem_gb = 30 - - RuntimeAttributes runtime_attributes - } - - Int threads = 24 - Int disk_size = ceil((size(gvcfs[0], "GB") * length(gvcfs)) * 2 + 100) - - command <<< - set -euo pipefail - - # glneux_cli has no version option - glnexus_cli --help 2>&1 | grep -Eo 'glnexus_cli release v[0-9a-f.-]+' - - glnexus_cli \ - --threads ~{threads} \ - --mem-gbytes ~{mem_gb} \ - --dir ~{cohort_id}.~{reference_name}.GLnexus.DB \ - --config DeepVariant_unfiltered \ - ~{"--bed " + regions_bed} \ - ~{sep=' ' gvcfs} \ - > ~{cohort_id}.~{reference_name}.deepvariant.glnexus.bcf - - bcftools --version - - bcftools view \ - --threads ~{threads} \ - --output-type z \ - --output-file ~{cohort_id}.~{reference_name}.deepvariant.glnexus.vcf.gz \ - ~{cohort_id}.~{reference_name}.deepvariant.glnexus.bcf - - tabix --version - - tabix ~{cohort_id}.~{reference_name}.deepvariant.glnexus.vcf.gz - >>> - - output { - File vcf = "~{cohort_id}.~{reference_name}.deepvariant.glnexus.vcf.gz" - File vcf_index = "~{cohort_id}.~{reference_name}.deepvariant.glnexus.vcf.gz.tbi" - } - - runtime { - docker: "ghcr.io/dnanexus-rnd/glnexus:v1.4.1" - cpu: threads - memory: mem_gb + " GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/mosdepth.wdl b/workflows/wdl-common/wdl/tasks/mosdepth.wdl deleted file mode 100644 index 6d79ffe..0000000 --- a/workflows/wdl-common/wdl/tasks/mosdepth.wdl +++ /dev/null @@ -1,50 +0,0 @@ -version 1.0 - -# Calculate summary stats using mosdepth - -import "../structs.wdl" - -task mosdepth { - input { - File aligned_bam - File aligned_bam_index - - RuntimeAttributes runtime_attributes - } - - String prefix = basename(aligned_bam, ".bam") - Int threads = 4 - Int disk_size = ceil(size(aligned_bam, "GB") * 2 + 20) - - command <<< - set -euo pipefail - - mosdepth --version - - mosdepth \ - --threads ~{threads - 1} \ - --by 500 \ - --no-per-base \ - --use-median \ - ~{prefix} \ - ~{aligned_bam} - >>> - - output { - File summary = "~{prefix}.mosdepth.summary.txt" - File region_bed = "~{prefix}.regions.bed.gz" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/mosdepth@sha256:3ebb896013e205db072c55a9fdcd322773f4a4bcdc7bedecc80b220b88e9b750" - cpu: threads - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " LOCAL" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/pbsv_call.wdl b/workflows/wdl-common/wdl/tasks/pbsv_call.wdl deleted file mode 100644 index 9f1f230..0000000 --- a/workflows/wdl-common/wdl/tasks/pbsv_call.wdl +++ /dev/null @@ -1,56 +0,0 @@ -version 1.0 - -# Call SVs using pbsv - -import "../structs.wdl" - -task pbsv_call { - input { - String sample_id - Array[File] svsigs - Int? sample_count - - File reference - File reference_index - String reference_name - - Int mem_gb = if select_first([sample_count, 1]) > 3 then 96 else 64 - - RuntimeAttributes runtime_attributes - } - - Int threads = 8 - Int disk_size = ceil((size(svsigs[0], "GB") * length(svsigs) + size(reference, "GB")) * 2 + 20) - - command <<< - set -euo pipefail - - pbsv --version - - pbsv call \ - --hifi \ - --min-sv-length 20 \ - --log-level INFO \ - --num-threads ~{threads} \ - ~{reference} \ - ~{sep=' ' svsigs} \ - ~{sample_id}.~{reference_name}.pbsv.vcf - >>> - - output { - File pbsv_vcf = "~{sample_id}.~{reference_name}.pbsv.vcf" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/pbsv@sha256:798ca327f653c4e666b9f7c6a09260a762eea4e7e6864f490a87ed4106a53b98" - cpu: threads - memory: "~{mem_gb} GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/pbsv_discover.wdl b/workflows/wdl-common/wdl/tasks/pbsv_discover.wdl deleted file mode 100644 index 912ccfb..0000000 --- a/workflows/wdl-common/wdl/tasks/pbsv_discover.wdl +++ /dev/null @@ -1,49 +0,0 @@ -version 1.0 - -# Generate svsigs to be used in SV calling using pbsv - -import "../structs.wdl" - -task pbsv_discover { - input { - File aligned_bam - File aligned_bam_index - - File reference_tandem_repeat_bed - - RuntimeAttributes runtime_attributes - } - - String prefix = basename(aligned_bam, ".bam") - Int disk_size = ceil((size(aligned_bam, "GB") + size(reference_tandem_repeat_bed, "GB")) * 2 + 20) - - command <<< - set -euo pipefail - - pbsv --version - - pbsv discover \ - --log-level INFO \ - --hifi \ - --tandem-repeats ~{reference_tandem_repeat_bed} \ - ~{aligned_bam} \ - ~{prefix}.svsig.gz - >>> - - output { - File svsig = "~{prefix}.svsig.gz" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/pbsv@sha256:798ca327f653c4e666b9f7c6a09260a762eea4e7e6864f490a87ed4106a53b98" - cpu: 2 - memory: "8 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/pharmcat.wdl b/workflows/wdl-common/wdl/tasks/pharmcat.wdl deleted file mode 100644 index 52abf6c..0000000 --- a/workflows/wdl-common/wdl/tasks/pharmcat.wdl +++ /dev/null @@ -1,272 +0,0 @@ -version 1.0 - -import "../structs.wdl" - -workflow pharmcat { - input { - Array[Pair[String,Map[String,IndexData]]] sample_data - - IndexData reference - File reference_chromosome_lengths - - IndexData pharmcat_positions - Int pharmcat_min_coverage - - RuntimeAttributes default_runtime_attributes - } - - scatter (sample in sample_data) { - call pangu_cyp2d6 { - input: - haplotagged_bam = sample.right["haplotagged_bam"].data, - haplotagged_bam_index = sample.right["haplotagged_bam"].data_index, - runtime_attributes = default_runtime_attributes - } - - call pharmcat_preprocess { - input: - vcf = sample.right["phased_vcf"].data, - vcf_index = sample.right["phased_vcf"].data_index, - reference = reference.data, - reference_index = reference.data_index, - pharmcat_positions = pharmcat_positions.data, - pharmcat_positions_index = pharmcat_positions.data_index, - runtime_attributes = default_runtime_attributes - } - - call filter_preprocessed_vcf { - input: - preprocessed_vcf = pharmcat_preprocess.preprocessed_vcf, - aligned_bam = sample.right["aligned_bam"].data, - aligned_bam_index = sample.right["aligned_bam"].data_index, - reference_chromosome_lengths = reference_chromosome_lengths, - min_coverage = pharmcat_min_coverage, - runtime_attributes = default_runtime_attributes - } - - call run_pharmcat { - input: - preprocessed_filtered_vcf = filter_preprocessed_vcf.filtered_vcf, - pangu_tsv = pangu_cyp2d6.fixed_pangu_tsv, - reference = reference.data, - reference_index = reference.data_index, - runtime_attributes = default_runtime_attributes - } - } - - output { - Array[File] pangu_jsons = pangu_cyp2d6.pangu_json - Array[File] pangu_tsvs = pangu_cyp2d6.pangu_tsv - Array[File] fixed_pangu_tsvs = pangu_cyp2d6.fixed_pangu_tsv - - Array[File?] pharmcat_missing_pgx_vcfs = pharmcat_preprocess.missing_pgx_vcf - Array[File] pharmcat_preprocessed_filtered_vcfs = filter_preprocessed_vcf.filtered_vcf - - Array[File] pharmcat_match_jsons = run_pharmcat.pharmcat_match_json - Array[File] pharmcat_phenotype_jsons = run_pharmcat.pharmcat_phenotype_json - Array[File] pharmcat_report_htmls = run_pharmcat.pharmcat_report_html - Array[File] pharmcat_report_jsons = run_pharmcat.pharmcat_report_json - } - - parameter_meta { - sample_data: {help: "Array of pairs mapping sample ID to aligned bam, haplotagged bam, gvcf, and phased VCF files for the sample"} - reference: {help: "Reference genome data"} - pharmcat_positions: {help: "VCF file and index specifying pharmact positions"} - pharmcat_min_coverage: {help: "Minimum coverage cutoff used to filter the preprocessed VCF passed to pharmcat"} - default_runtime_attributes: {help: "Default RuntimeAttributes; spot if preemptible was set to true, otherwise on_demand"} - } -} - -# Call CYP2D6 for sample -task pangu_cyp2d6 { - input { - File haplotagged_bam - File haplotagged_bam_index - - RuntimeAttributes runtime_attributes - } - - String haplotagged_bam_basename = basename(haplotagged_bam, ".bam") - Int disk_size = ceil(size(haplotagged_bam, "GB") * 2 + 20) - - command <<< - set -euo pipefail - - pangu \ - -m capture \ - -p ~{haplotagged_bam_basename}.pangu \ - ~{haplotagged_bam} - - # Fix the pangu output with missing calls for the sample - awk \ - 'BEGIN {{OFS="\t"}} !($2 ~ /\//) {{$2=$2"/[]"}} 1' \ - ~{haplotagged_bam_basename}.pangu_pharmcat.tsv \ - > ~{haplotagged_bam_basename}.pangu_pharmcat_fix.tsv - >>> - - output { - File pangu_json = "~{haplotagged_bam_basename}.pangu_report.json" - File pangu_tsv = "~{haplotagged_bam_basename}.pangu_pharmcat.tsv" - File fixed_pangu_tsv = "~{haplotagged_bam_basename}.pangu_pharmcat_fix.tsv" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/pangu@sha256:477dfa87eb98f54708dad3b20cab24ea1a171886b0b2b9d436b3ffc4e899b908" - cpu: 2 - memory: "12 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} - -# Preprocess phased VCF for sample -task pharmcat_preprocess { - input { - File vcf - File vcf_index - - File reference - File reference_index - - File pharmcat_positions - File pharmcat_positions_index - - RuntimeAttributes runtime_attributes - } - - String vcf_basename = basename(vcf, ".vcf.gz") - Int disk_size = ceil((size(vcf, "GB") + size(reference, "GB") + size(pharmcat_positions, "GB")) * 2 + 20) - - command <<< - set -euo pipefail - - /pharmcat/pharmcat_vcf_preprocessor.py \ - --missing-to-ref \ - -vcf ~{vcf} \ - -refFna ~{reference} \ - -refVcf ~{pharmcat_positions} \ - -o . - >>> - - output { - File preprocessed_vcf = "~{vcf_basename}.preprocessed.vcf.bgz" - File? missing_pgx_vcf = "~{vcf_basename}.missing_pgx_var.vcf" - } - - runtime { - docker: "pgkb/pharmcat:2.3.0" - cpu: 2 - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} - -# Remove ref calls with low mean coverage for sample -task filter_preprocessed_vcf { - input { - File preprocessed_vcf - - File aligned_bam - File aligned_bam_index - - File reference_chromosome_lengths - - Int min_coverage - - RuntimeAttributes runtime_attributes - } - - String vcf_basename = basename(preprocessed_vcf, ".vcf.bgz") - Int disk_size = ceil((size(preprocessed_vcf, "GB") + size(aligned_bam, "GB")) * 2 + 20) - - command <<< - set -euo pipefail - - bedtools coverage \ - -sorted \ - -g ~{reference_chromosome_lengths} \ - -f 1 \ - -header \ - -mean \ - -a ~{preprocessed_vcf} \ - -b ~{aligned_bam} \ - | ( sed -u '/^#CHROM/q' ; awk '$11 >= ~{min_coverage}' | cut -f1-10 ) \ - > ~{vcf_basename}.filtered.vcf - >>> - - output { - File filtered_vcf = "~{vcf_basename}.filtered.vcf" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/samtools@sha256:a843074b9be9505e6e6e93385975f761617fcce4c486fcebf97ab65075ed6bd4" - cpu: 2 - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} - -# Run pharmcat for sample -task run_pharmcat { - input { - File preprocessed_filtered_vcf - - File pangu_tsv - - File reference - File reference_index - - RuntimeAttributes runtime_attributes - } - - String vcf_basename = basename(preprocessed_filtered_vcf, ".vcf") - Int disk_size = ceil((size(preprocessed_filtered_vcf, "GB") + size(reference, "GB")) * 2 + 20) - - command <<< - set -euo pipefail - - # Run pharmcat - /pharmcat/pharmcat \ - -vcf ~{preprocessed_filtered_vcf} \ - -reporterJson \ - -po ~{pangu_tsv} \ - -o . - >>> - - output { - File pharmcat_match_json = "~{vcf_basename}.match.json" - File pharmcat_phenotype_json = "~{vcf_basename}.phenotype.json" - File pharmcat_report_html = "~{vcf_basename}.report.html" - File pharmcat_report_json = "~{vcf_basename}.report.json" - } - - runtime { - docker: "pgkb/pharmcat:2.3.0" - cpu: 2 - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/samtools_fasta.wdl b/workflows/wdl-common/wdl/tasks/samtools_fasta.wdl deleted file mode 100644 index 0aacfdb..0000000 --- a/workflows/wdl-common/wdl/tasks/samtools_fasta.wdl +++ /dev/null @@ -1,45 +0,0 @@ -version 1.0 - -# Convert a BAM to a FASTA file using samtools - -import "../structs.wdl" - -task samtools_fasta { - input { - File bam - - RuntimeAttributes runtime_attributes - } - - String bam_basename = basename(bam, ".bam") - Int threads = 4 - Int disk_size = ceil(size(bam, "GB") * 3.5 + 20) - - command <<< - set -euo pipefail - - samtools --version - - samtools fasta \ - -@ ~{threads - 1} \ - ~{bam} \ - > ~{bam_basename}.fasta - >>> - - output { - File reads_fasta = "~{bam_basename}.fasta" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/samtools@sha256:83ca955c4a83f72f2cc229f41450eea00e73333686f3ed76f9f4984a985c85bb" - cpu: threads - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/whatshap_haplotag.wdl b/workflows/wdl-common/wdl/tasks/whatshap_haplotag.wdl deleted file mode 100644 index 6478429..0000000 --- a/workflows/wdl-common/wdl/tasks/whatshap_haplotag.wdl +++ /dev/null @@ -1,66 +0,0 @@ -version 1.0 - -# Haplotag an aligned BAM file using a phased VCF with WhatsHap - -import "../structs.wdl" - -task whatshap_haplotag { - input { - File phased_vcf - File phased_vcf_index - - File aligned_bam - File aligned_bam_index - - File reference - File reference_index - - String? params - String? output_bam_name - - RuntimeAttributes runtime_attributes - } - - String output_bam = select_first([output_bam_name, "~{basename(aligned_bam, '.bam')}.haplotagged.bam"]) - Int threads = 4 - Int disk_size = ceil((size(phased_vcf, "GB") + size(aligned_bam, "GB") + size(reference, "GB")) * 2 + 20) - - command <<< - set -euo pipefail - - whatshap --version - - whatshap haplotag \ - ~{params} \ - --tag-supplementary \ - --output-threads ~{threads} \ - --reference ~{reference} \ - --output ~{output_bam} \ - ~{phased_vcf} \ - ~{aligned_bam} - - samtools --version - - samtools index \ - -@ ~{threads - 1} \ - ~{output_bam} - >>> - - output { - File haplotagged_bam = "~{output_bam}" - File haplotagged_bam_index = "~{output_bam}.bai" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/whatshap@sha256:34957019d127e9c9c888a38061b28af8c1a42ec9e131bf1b806f70c6e96a1fca" - cpu: threads - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/whatshap_phase.wdl b/workflows/wdl-common/wdl/tasks/whatshap_phase.wdl deleted file mode 100644 index 7ad3154..0000000 --- a/workflows/wdl-common/wdl/tasks/whatshap_phase.wdl +++ /dev/null @@ -1,58 +0,0 @@ -version 1.0 - -# Phase a VCF using WhatsHap - -import "../structs.wdl" - -task whatshap_phase { - input { - File vcf - File vcf_index - String? chromosome - - Array[File] aligned_bams - Array[File] aligned_bam_indices - - File reference - File reference_index - - RuntimeAttributes runtime_attributes - } - - String vcf_basename = basename(vcf, ".vcf.gz") - Int disk_size = ceil((size(vcf, "GB") + size(reference, "GB") + size(aligned_bams[0], "GB") * length(aligned_bams)) * 2 + 20) - - command <<< - set -euo pipefail - - whatshap --version - - whatshap phase \ - --indels \ - --reference ~{reference} \ - ~{"--chromosome " + chromosome} \ - --output ~{vcf_basename}.phased.vcf.gz \ - ~{vcf} \ - ~{sep=' ' aligned_bams} - - tabix ~{vcf_basename}.phased.vcf.gz - >>> - - output { - File phased_vcf = "~{vcf_basename}.phased.vcf.gz" - File phased_vcf_index = "~{vcf_basename}.phased.vcf.gz.tbi" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/whatshap@sha256:34957019d127e9c9c888a38061b28af8c1a42ec9e131bf1b806f70c6e96a1fca" - cpu: 2 - memory: "8 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/whatshap_stats.wdl b/workflows/wdl-common/wdl/tasks/whatshap_stats.wdl deleted file mode 100644 index 6ecd685..0000000 --- a/workflows/wdl-common/wdl/tasks/whatshap_stats.wdl +++ /dev/null @@ -1,51 +0,0 @@ -version 1.0 - -# Calculate stats from a phased VCF using WhatsHap - -import "../structs.wdl" - -task whatshap_stats { - input { - File phased_vcf - File phased_vcf_index - - File reference_chromosome_lengths - - RuntimeAttributes runtime_attributes - } - - String output_basename = basename(phased_vcf, ".vcf.gz") - Int disk_size = ceil(size(phased_vcf, "GB") * 2 + 20) - - command <<< - set -euo pipefail - - whatshap --version - - whatshap stats \ - --gtf ~{output_basename}.gtf \ - --tsv ~{output_basename}.tsv \ - --block-list ~{output_basename}.blocklist \ - --chr-lengths ~{reference_chromosome_lengths} \ - ~{phased_vcf} - >>> - - output { - File gtf = "~{output_basename}.gtf" - File tsv = "~{output_basename}.tsv" - File blocklist = "~{output_basename}.blocklist" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/whatshap@sha256:34957019d127e9c9c888a38061b28af8c1a42ec9e131bf1b806f70c6e96a1fca" - cpu: 2 - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/tasks/zip_index_vcf.wdl b/workflows/wdl-common/wdl/tasks/zip_index_vcf.wdl deleted file mode 100644 index 6f60b03..0000000 --- a/workflows/wdl-common/wdl/tasks/zip_index_vcf.wdl +++ /dev/null @@ -1,53 +0,0 @@ -version 1.0 - -# Zip and index a VCF file - -import "../structs.wdl" - -task zip_index_vcf { - input { - File vcf - - RuntimeAttributes runtime_attributes - } - - String vcf_basename = basename(vcf) - Int threads = 4 - Int disk_size = ceil(size(vcf, "GB") * 2 + 20) - - command <<< - set -euo pipefail - - bgzip --version - - bgzip \ - --threads ~{threads} \ - --stdout \ - ~{vcf} \ - > ~{vcf_basename}.gz - - tabix --version - - tabix \ - --preset vcf \ - ~{vcf_basename}.gz - >>> - - output { - File zipped_vcf = "~{vcf_basename}.gz" - File zipped_vcf_index = "~{vcf_basename}.gz.tbi" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/htslib@sha256:24ae834b9d4ba3ea3c23d77b2ce49b3a56a6e32d1367470e8e1160eb645019a9" - cpu: threads - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " LOCAL" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/workflows/backend_configuration/backend_configuration.wdl b/workflows/wdl-common/wdl/workflows/backend_configuration/backend_configuration.wdl deleted file mode 100644 index f2a5de9..0000000 --- a/workflows/wdl-common/wdl/workflows/backend_configuration/backend_configuration.wdl +++ /dev/null @@ -1,119 +0,0 @@ -version 1.0 - -# Set runtime attributes across environments depending on the backend in use - -import "../../structs.wdl" - -workflow backend_configuration { - input { - String backend - String? zones - String? aws_spot_queue_arn - String? aws_on_demand_queue_arn - String container_registry = "quay.io/pacbio" - } - - if (backend == "GCP") { - # zones must be defined - - # preemptible_tries applies to failures due to preemption only - # max_retries applies to failures due to a nonzero rc - # queue_arn is not used in GCP - RuntimeAttributes gcp_spot_runtime_attributes = { - "preemptible_tries": 3, - "max_retries": 0, - "zones": select_first([zones]), - "queue_arn": "", - "container_registry": container_registry - } - - RuntimeAttributes gcp_on_demand_runtime_attributes = { - "preemptible_tries": 0, - "max_retries": 0, - "zones": select_first([zones]), - "queue_arn": "", - "container_registry": container_registry - } - } - - if (backend == "Azure") { - # Requires Cromwell on Azure v3.2+ - # preemptible_tries >= 1 will be converted to `true`; 0 will be converted to `false` - # max_retries applies to failures due to preemption or to a nonzero rc - # zones, queue_arn not used in Azure - RuntimeAttributes azure_spot_runtime_attributes = { - "preemptible_tries": 3, - "max_retries": 3, - "zones": "", - "queue_arn": "", - "container_registry": container_registry - } - - RuntimeAttributes azure_on_demand_runtime_attributes = { - "preemptible_tries": 0, - "max_retries": 0, - "zones": "", - "queue_arn": "", - "container_registry": container_registry - } - } - - if (backend == "AWS") { - # zones must be defined - # aws_spot_queue_arn must be defined if preemptible is set to true and engine is not miniwdl - # aws_on_demand_queue_arn must be defined if preemptible is set to false and engine is not miniwdl - # Using miniwdl engine, the queue ARN of the context the workflow has been submitted to will be used; - # the queue_arn runtime attribute will be ignored - - # max_retries applies to failures due to preemption or to a nonzero rc - # preemptible is not used in AWS - RuntimeAttributes aws_spot_runtime_attributes = { - "preemptible_tries": 3, - "max_retries": 3, - "zones": select_first([zones]), - "queue_arn": select_first([aws_spot_queue_arn, ""]), - "container_registry": container_registry - } - - RuntimeAttributes aws_on_demand_runtime_attributes = { - "preemptible_tries": 0, - "max_retries": 0, - "zones": select_first([zones]), - "queue_arn": select_first([aws_on_demand_queue_arn, ""]), - "container_registry": container_registry - } - } - - if (backend == "HPC") { - # No distinction between preemptible and on-demand in HPC configuration - RuntimeAttributes hpc_runtime_attributes = { - "preemptible_tries": 0, - "max_retries": 3, - "zones": "", - "queue_arn": "", - "container_registry": container_registry - } - } - - output { - RuntimeAttributes spot_runtime_attributes = select_first([ - gcp_spot_runtime_attributes, - azure_spot_runtime_attributes, - aws_spot_runtime_attributes, - hpc_runtime_attributes - ]) - RuntimeAttributes on_demand_runtime_attributes = select_first([ - gcp_on_demand_runtime_attributes, - azure_on_demand_runtime_attributes, - aws_on_demand_runtime_attributes, - hpc_runtime_attributes - ]) - } - - parameter_meta { - backend: {help: "Backend where the workflow will be executed ['GCP', 'Azure', 'AWS', 'HPC']"} - zones: {help: "Zones where compute will take place; required if backend is set to 'AWS' or 'GCP'"} - aws_spot_queue_arn: {help: "Queue ARN for the spot batch queue; required if backend is set to 'AWS'"} - aws_on_demand_queue_arn: {help: "Queue ARN for the on demand batch queue; required if backend is set to 'AWS'"} - } -} diff --git a/workflows/wdl-common/wdl/workflows/deepvariant/deepvariant.wdl b/workflows/wdl-common/wdl/workflows/deepvariant/deepvariant.wdl deleted file mode 100644 index e4cd055..0000000 --- a/workflows/wdl-common/wdl/workflows/deepvariant/deepvariant.wdl +++ /dev/null @@ -1,269 +0,0 @@ -version 1.0 - -# Call variants using DeepVariant - -import "../../structs.wdl" - -workflow deepvariant { - input { - String sample_id - Array[IndexData] aligned_bams - - IndexData reference_fasta - String reference_name - - String deepvariant_version - DeepVariantModel? deepvariant_model - - RuntimeAttributes default_runtime_attributes - } - - scatter (bam_object in aligned_bams) { - File aligned_bam = bam_object.data - File aligned_bam_index = bam_object.data_index - } - - Int total_deepvariant_tasks = 64 - Int num_shards = 8 - Int tasks_per_shard = total_deepvariant_tasks / num_shards - - scatter (shard_index in range(num_shards)) { - Int task_start_index = shard_index * tasks_per_shard - - call deepvariant_make_examples { - input: - sample_id = sample_id, - aligned_bams = aligned_bam, - aligned_bam_indices = aligned_bam_index, - reference = reference_fasta.data, - reference_index = reference_fasta.data_index, - task_start_index = task_start_index, - tasks_per_shard = tasks_per_shard, - total_deepvariant_tasks = total_deepvariant_tasks, - deepvariant_version = deepvariant_version, - runtime_attributes = default_runtime_attributes - } - } - - call deepvariant_call_variants { - input: - sample_id = sample_id, - reference_name = reference_name, - example_tfrecord_tars = deepvariant_make_examples.example_tfrecord_tar, - deepvariant_model = deepvariant_model, - total_deepvariant_tasks = total_deepvariant_tasks, - deepvariant_version = deepvariant_version, - runtime_attributes = default_runtime_attributes - } - - call deepvariant_postprocess_variants { - input: - sample_id = sample_id, - tfrecord = deepvariant_call_variants.tfrecord, - nonvariant_site_tfrecord_tars = deepvariant_make_examples.nonvariant_site_tfrecord_tar, - reference = reference_fasta.data, - reference_index = reference_fasta.data_index, - reference_name = reference_name, - total_deepvariant_tasks = total_deepvariant_tasks, - deepvariant_version = deepvariant_version, - runtime_attributes = default_runtime_attributes - } - - output { - IndexData vcf = {"data": deepvariant_postprocess_variants.vcf, "data_index": deepvariant_postprocess_variants.vcf_index} - IndexData gvcf = {"data": deepvariant_postprocess_variants.gvcf, "data_index": deepvariant_postprocess_variants.gvcf_index} - } - - parameter_meta { - sample_id: {help: "Sample ID; used for naming files"} - aligned_bams: {help: "Bam and index aligned to the reference genome for each movie associated with all samples in the cohort"} - reference: {help: "Reference genome data"} - deepvariant_version: {help: "Version of deepvariant to use"} - deepvariant_model: {help: "Optional deepvariant model file to use"} - default_runtime_attributes: {help: "Default RuntimeAttributes; spot if preemptible was set to true, otherwise on_demand"} - } -} - -task deepvariant_make_examples { - input { - String sample_id - Array[File] aligned_bams - Array[File] aligned_bam_indices - - File reference - File reference_index - - Int task_start_index - Int tasks_per_shard - - Int total_deepvariant_tasks - String deepvariant_version - - RuntimeAttributes runtime_attributes - } - - Int task_end_index = task_start_index + tasks_per_shard - 1 - Int disk_size = ceil(size(aligned_bams[0], "GB") * length(aligned_bams) * 2 + 50) - Int mem_gb = tasks_per_shard * 4 - - command <<< - set -euo pipefail - - mkdir example_tfrecords nonvariant_site_tfrecords - - echo "DeepVariant version: $VERSION" - - seq ~{task_start_index} ~{task_end_index} \ - | parallel \ - --jobs ~{tasks_per_shard} \ - --halt 2 \ - /opt/deepvariant/bin/make_examples \ - --norealign_reads \ - --vsc_min_fraction_indels 0.12 \ - --pileup_image_width 199 \ - --track_ref_reads \ - --phase_reads \ - --partition_size=25000 \ - --max_reads_per_partition=600 \ - --alt_aligned_pileup=diff_channels \ - --add_hp_channel \ - --sort_by_haplotypes \ - --parse_sam_aux_fields \ - --min_mapping_quality=1 \ - --mode calling \ - --ref ~{reference} \ - --reads ~{sep="," aligned_bams} \ - --examples example_tfrecords/~{sample_id}.examples.tfrecord@~{total_deepvariant_tasks}.gz \ - --gvcf nonvariant_site_tfrecords/~{sample_id}.gvcf.tfrecord@~{total_deepvariant_tasks}.gz \ - --task {} - - tar -zcvf ~{sample_id}.~{task_start_index}.example_tfrecords.tar.gz example_tfrecords - tar -zcvf ~{sample_id}.~{task_start_index}.nonvariant_site_tfrecords.tar.gz nonvariant_site_tfrecords - >>> - - output { - File example_tfrecord_tar = "~{sample_id}.~{task_start_index}.example_tfrecords.tar.gz" - File nonvariant_site_tfrecord_tar = "~{sample_id}.~{task_start_index}.nonvariant_site_tfrecords.tar.gz" - } - - runtime { - docker: "gcr.io/deepvariant-docker/deepvariant:~{deepvariant_version}" - cpu: tasks_per_shard - memory: mem_gb + " GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} - -task deepvariant_call_variants { - input { - String sample_id - String reference_name - Array[File] example_tfrecord_tars - - DeepVariantModel? deepvariant_model - Int total_deepvariant_tasks - String deepvariant_version - - RuntimeAttributes runtime_attributes - } - - Int mem_gb = total_deepvariant_tasks * 4 - Int disk_size = ceil(size(example_tfrecord_tars[0], "GB") * length(example_tfrecord_tars) * 2 + 100) - - command <<< - set -euo pipefail - - while read -r tfrecord_tar || [[ -n "${tfrecord_tar}" ]]; do - tar -zxvf "${tfrecord_tar}" - done < ~{write_lines(example_tfrecord_tars)} - - deepvariant_model_path=~{if (defined(deepvariant_model)) then sub(select_first([deepvariant_model]).model.data, "\\.data.*", "") else "/opt/models/pacbio/model.ckpt"} - - echo "DeepVariant version: $VERSION" - - /opt/deepvariant/bin/call_variants \ - --outfile ~{sample_id}.~{reference_name}.call_variants_output.tfrecord.gz \ - --examples "example_tfrecords/~{sample_id}.examples.tfrecord@~{total_deepvariant_tasks}.gz" \ - --checkpoint "${deepvariant_model_path}" - >>> - - output { - File tfrecord = "~{sample_id}.~{reference_name}.call_variants_output.tfrecord.gz" - } - - runtime { - docker: "gcr.io/deepvariant-docker/deepvariant:~{deepvariant_version}" - cpu: total_deepvariant_tasks - memory: mem_gb + " GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} - -task deepvariant_postprocess_variants { - input { - String sample_id - File tfrecord - Array[File] nonvariant_site_tfrecord_tars - - File reference - File reference_index - String reference_name - - Int total_deepvariant_tasks - String deepvariant_version - - RuntimeAttributes runtime_attributes - } - - Int disk_size = ceil((size(tfrecord, "GB") + size(reference, "GB") + size(nonvariant_site_tfrecord_tars[0], "GB") * length(nonvariant_site_tfrecord_tars)) * 2 + 20) - - command <<< - set -euo pipefail - - while read -r nonvariant_site_tfrecord_tar || [[ -n "${nonvariant_site_tfrecord_tar}" ]]; do - tar -zxvf "${nonvariant_site_tfrecord_tar}" - done < ~{write_lines(nonvariant_site_tfrecord_tars)} - - echo "DeepVariant version: $VERSION" - - /opt/deepvariant/bin/postprocess_variants \ - --vcf_stats_report=false \ - --ref ~{reference} \ - --infile ~{tfrecord} \ - --outfile ~{sample_id}.~{reference_name}.deepvariant.vcf.gz \ - --nonvariant_site_tfrecord_path "nonvariant_site_tfrecords/~{sample_id}.gvcf.tfrecord@~{total_deepvariant_tasks}.gz" \ - --gvcf_outfile ~{sample_id}.~{reference_name}.deepvariant.g.vcf.gz - >>> - - output { - File vcf = "~{sample_id}.~{reference_name}.deepvariant.vcf.gz" - File vcf_index = "~{sample_id}.~{reference_name}.deepvariant.vcf.gz.tbi" - File gvcf = "~{sample_id}.~{reference_name}.deepvariant.g.vcf.gz" - File gvcf_index = "~{sample_id}.~{reference_name}.deepvariant.g.vcf.gz.tbi" - } - - runtime { - docker: "gcr.io/deepvariant-docker/deepvariant:~{deepvariant_version}" - cpu: 2 - memory: "32 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} diff --git a/workflows/wdl-common/wdl/workflows/deepvariant/inputs.json b/workflows/wdl-common/wdl/workflows/deepvariant/inputs.json deleted file mode 100644 index ed3a700..0000000 --- a/workflows/wdl-common/wdl/workflows/deepvariant/inputs.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "deepvariant.sample_id": "String", - "deepvariant.aligned_bams": [ - { - "data": "File", - "data_index": "File" - } - ], - "deepvariant.reference_fasta": { - "data": "File", - "data_index": "File" - }, - "deepvariant.reference_name": "String", - "deepvariant.deepvariant_version": "String", - "deepvariant.deepvariant_model": "WomCompositeType {\n model -> WomCompositeType {\n data -> File\ndata_index -> File \n}\nmetadata -> File \n}? (optional)", - "deepvariant.default_runtime_attributes": { - "preemptible_tries": "Int", - "max_retries": "Int", - "zones": "String", - "queue_arn": "String", - "container_registry": "String" - } -} diff --git a/workflows/wdl-common/wdl/workflows/phase_vcf/inputs.json b/workflows/wdl-common/wdl/workflows/phase_vcf/inputs.json deleted file mode 100644 index cd7ca4f..0000000 --- a/workflows/wdl-common/wdl/workflows/phase_vcf/inputs.json +++ /dev/null @@ -1,40 +0,0 @@ -{ - "phase_vcf.vcf": { - "data": "File", - "data_index": "File" - }, - "phase_vcf.aligned_bams": [ - { - "data": "File", - "data_index": "File" - } - ], - "phase_vcf.reference": { - "name": "String", - "fasta": { - "data": "File", - "data_index": "File" - }, - "chromosomes": "Array[String]", - "chromosome_lengths": "File", - "tandem_repeat_bed": "File", - "trgt_tandem_repeat_bed": "File", - "gnomad_af": "File", - "hprc_af": "File", - "gff": "File", - "population_vcfs": [ - { - "data": "File", - "data_index": "File" - } - ] - }, - "phase_vcf.default_runtime_attributes": { - "preemptible_tries": "Int", - "max_retries": "Int", - "zones": "String", - "queue_arn": "String", - "container_registry": "String" - } -} - diff --git a/workflows/wdl-common/wdl/workflows/phase_vcf/phase_vcf.wdl b/workflows/wdl-common/wdl/workflows/phase_vcf/phase_vcf.wdl deleted file mode 100644 index 7de5b04..0000000 --- a/workflows/wdl-common/wdl/workflows/phase_vcf/phase_vcf.wdl +++ /dev/null @@ -1,182 +0,0 @@ -version 1.0 - -# Phase and calculate stats for a VCF using WhatsHap. - -import "../../structs.wdl" -import "../../tasks/whatshap_phase.wdl" as WhatshapPhase -import "../../tasks/whatshap_stats.wdl" as WhatshapStats - -workflow phase_vcf { - input { - IndexData vcf - Array[IndexData] aligned_bams - - IndexData reference_fasta - File reference_chromosome_lengths - Array[String] regions - - RuntimeAttributes default_runtime_attributes - } - - String vcf_basename = basename(vcf.data, ".vcf.gz") - - scatter (bam_object in aligned_bams) { - File aligned_bam = bam_object.data - File aligned_bam_index = bam_object.data_index - } - - scatter (region in regions) { - call split_vcf { - input: - vcf = vcf.data, - vcf_index = vcf.data_index, - region = region, - runtime_attributes = default_runtime_attributes - } - - String chromosome = sub(region, ":.*", "") - - call WhatshapPhase.whatshap_phase { - input: - vcf = split_vcf.region_vcf, - vcf_index = split_vcf.region_vcf_index, - chromosome = chromosome, - aligned_bams = aligned_bam, - aligned_bam_indices = aligned_bam_index, - reference = reference_fasta.data, - reference_index = reference_fasta.data_index, - runtime_attributes = default_runtime_attributes - } - } - - call bcftools_concat { - input: - vcfs = whatshap_phase.phased_vcf, - vcf_indices = whatshap_phase.phased_vcf_index, - output_vcf_name = "~{vcf_basename}.phased.vcf.gz", - runtime_attributes = default_runtime_attributes - } - - call WhatshapStats.whatshap_stats { - input: - phased_vcf = bcftools_concat.concatenated_vcf, - phased_vcf_index = bcftools_concat.concatenated_vcf_index, - reference_chromosome_lengths = reference_chromosome_lengths, - runtime_attributes = default_runtime_attributes - } - - output { - IndexData phased_vcf = {"data": bcftools_concat.concatenated_vcf, "data_index": bcftools_concat.concatenated_vcf_index} - File whatshap_stats_gtf = whatshap_stats.gtf - File whatshap_stats_tsv = whatshap_stats.tsv - File whatshap_stats_blocklist = whatshap_stats.blocklist - } - - parameter_meta { - vcf: {help: "VCF to phase"} - aligned_bams: {help: "Bam and index aligned to the reference genome for each movie associated with the sample"} - reference: {help: "Reference genome fasta and index"} - reference_chromosome_lengths: {help: "File specifying the lengths of each of the reference chromosomes"} - regions: {help: "Array of regions to run phasing on; can be in the format chr or chr:start-stop"} - default_runtime_attributes: {help: "Default RuntimeAttributes; spot if preemptible was set to true, otherwise on_demand"} - } -} - -task split_vcf { - input { - File vcf - File vcf_index - String region - - RuntimeAttributes runtime_attributes - } - - String vcf_basename = basename(vcf, ".vcf.gz") - String region_substituted = sub(region, ":", "_") - Int threads = 2 - Int disk_size = ceil(size(vcf, "GB") * 2 + 20) - - command <<< - set -euo pipefail - - tabix --version - - tabix \ - -h \ - ~{vcf} \ - ~{region} \ - > ~{vcf_basename}.~{region_substituted}.vcf - - bgzip --version - - bgzip -@{threads} ~{vcf_basename}.~{region_substituted}.vcf - tabix ~{vcf_basename}.~{region_substituted}.vcf.gz - >>> - - output { - File region_vcf = "~{vcf_basename}.~{region_substituted}.vcf.gz" - File region_vcf_index = "~{vcf_basename}.~{region_substituted}.vcf.gz.tbi" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/htslib@sha256:24ae834b9d4ba3ea3c23d77b2ce49b3a56a6e32d1367470e8e1160eb645019a9" - cpu: threads - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} - -task bcftools_concat { - input { - Array[File] vcfs - Array[File] vcf_indices - String output_vcf_name - - RuntimeAttributes runtime_attributes - } - - Int threads = 2 - Int disk_size = ceil(size(vcfs[0], "GB") * length(vcfs) * 2 + 20) - - command <<< - set -euo pipefail - - bcftools --version - - tabix --version - - bcftools concat \ - --threads ~{threads - 1} \ - --no-version \ - --file-list ~{write_lines(vcfs)} \ - --allow-overlaps \ - --output ~{output_vcf_name} \ - --output-type z - - tabix "~{output_vcf_name}" - >>> - - output { - File concatenated_vcf = "~{output_vcf_name}" - File concatenated_vcf_index = "~{output_vcf_name}.tbi" - } - - runtime { - docker: "~{runtime_attributes.container_registry}/bcftools@sha256:36d91d5710397b6d836ff87dd2a924cd02fdf2ea73607f303a8544fbac2e691f" - cpu: threads - memory: "4 GB" - disk: disk_size + " GB" - disks: "local-disk " + disk_size + " HDD" - preemptible: runtime_attributes.preemptible_tries - maxRetries: runtime_attributes.max_retries - awsBatchRetryAttempts: runtime_attributes.max_retries - queueArn: runtime_attributes.queue_arn - zones: runtime_attributes.zones - } -} From 815c87f1e7f612395db7b9ab539228e074f6fc4b Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 16 Feb 2024 16:36:49 -0800 Subject: [PATCH 2/4] added wdl-common as submodule --- .gitmodules | 2 +- workflows/wdl-common | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) create mode 160000 workflows/wdl-common diff --git a/.gitmodules b/.gitmodules index 68d9e11..aca86b4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "workflows/wdl-common"] path = workflows/wdl-common - url = git@github.com:PacificBiosciences/wdl-common.git + url = https://github.com/PacificBiosciences/wdl-common.git diff --git a/workflows/wdl-common b/workflows/wdl-common new file mode 160000 index 0000000..7baa139 --- /dev/null +++ b/workflows/wdl-common @@ -0,0 +1 @@ +Subproject commit 7baa139678f470aa38181d59fb4ae664a9b6fa9d From 7a89918f518c61ce2efcfeb783673d0b93c6c811 Mon Sep 17 00:00:00 2001 From: William Rowell Date: Fri, 16 Feb 2024 16:41:49 -0800 Subject: [PATCH 3/4] removed pharmcat tests --- wdl-ci.config.json | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/wdl-ci.config.json b/wdl-ci.config.json index 624a6d9..14f53ca 100644 --- a/wdl-ci.config.json +++ b/wdl-ci.config.json @@ -611,33 +611,6 @@ } } }, - "workflows/wdl-common/wdl/tasks/pharmcat.wdl": { - "key": "workflows/wdl-common/wdl/tasks/pharmcat.wdl", - "name": "", - "description": "", - "tasks": { - "pangu_cyp2d6": { - "key": "pangu_cyp2d6", - "digest": "", - "tests": [] - }, - "pharmcat_preprocess": { - "key": "pharmcat_preprocess", - "digest": "", - "tests": [] - }, - "filter_preprocessed_vcf": { - "key": "filter_preprocessed_vcf", - "digest": "", - "tests": [] - }, - "run_pharmcat": { - "key": "run_pharmcat", - "digest": "", - "tests": [] - } - } - }, "workflows/wdl-common/wdl/tasks/pbsv_discover.wdl": { "key": "workflows/wdl-common/wdl/tasks/pbsv_discover.wdl", "name": "", From 90c68e79c7951443b4bba741806b4e54f2f8cfea Mon Sep 17 00:00:00 2001 From: Billy Rowell Date: Tue, 20 Feb 2024 11:51:39 -0800 Subject: [PATCH 4/4] fix: Change submodule URL to relative --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index aca86b4..6bc7de6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "workflows/wdl-common"] path = workflows/wdl-common - url = https://github.com/PacificBiosciences/wdl-common.git + url = ../wdl-common.git