From cd07ffaa70d59921900ac12aa4212a358c1ef3aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?In=C3=AAs=20Mendes?= Date: Tue, 23 Jul 2024 20:34:41 +0100 Subject: [PATCH] [TheiaCoV and TheiaMeta - HRRT] Patch bug by removing unneeded awk verification (#550) * remove awk verification * DEBUG is not ERROR --- tasks/quality_control/read_filtering/task_ncbi_scrub.wdl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tasks/quality_control/read_filtering/task_ncbi_scrub.wdl b/tasks/quality_control/read_filtering/task_ncbi_scrub.wdl index fd070917d..fe3447700 100644 --- a/tasks/quality_control/read_filtering/task_ncbi_scrub.wdl +++ b/tasks/quality_control/read_filtering/task_ncbi_scrub.wdl @@ -27,6 +27,9 @@ task ncbi_scrub_pe { read1_count=$($cat_command ~{read1} | wc -l | awk '{print $1/4}') read2_count=$($cat_command ~{read2} | wc -l | awk '{print $1/4}') + echo "DEBUG: Number of files in read1: $read1_count" + echo "DEBUG: Number of files in read2: $read2_count" + if [[ $read1_count -ne $read2_count ]] then echo "ERROR: The number of reads in the two input files do not match." @@ -38,9 +41,8 @@ task ncbi_scrub_pe { # paste command takes 4 lines at a time and merges them into a single line with tabs # tr substitutes the tab separators from paste into new lines, effectively interleaving the reads and keeping the FASTQ format # Important: To ensure that the reads are interleaved correctly, the reads must be in the same order in both files - # Additionally, only print read pairs that have 8 fields (4 lines) to avoid interleaving unpaired reads echo "DEGUB: Interleaving reads with paste..." - paste <($cat_command ~{read1} | paste - - - -) <($cat_command ~{read2} | paste - - - -) | awk '{if (NF == 8) print $1"\n"$2"\n"$3"\n"$4"\n"$5"\n"$6"\n"$7"\n"$8}' | tr '\t' '\n' > interleaved.fastq + paste <($cat_command ~{read1} | paste - - - -) <($cat_command ~{read2} | paste - - - -) | tr '\t' '\n' > interleaved.fastq # dehost reads # -x Remove spots instead of default 'N' replacement.