Skip to content

Commit

Permalink
Align to polyA trimmed reference; fix calculation of n mapping reads
Browse files Browse the repository at this point in the history
  • Loading branch information
boydgreenfield committed Apr 1, 2020
1 parent 7fb9bb3 commit 53f3d15
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 11 deletions.
13 changes: 9 additions & 4 deletions covid19_call_variants.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ set -eou pipefail
: "${length_threshold:=600}" # max length to be considered "short read" sequencing
: "${threads:=4}"
: "${prefix:=results}"
: "${min_quality:=20}"

echo "reference=${reference}"
echo "input_fastq=${input_fastq}"
echo "primer_bed_file=${primer_bed_file}"
echo "length_threshold=${length_threshold}"
echo "threads=${threads}"
echo "min_quality=${min_quality}"


# detect if we have long or short reads to adjust minimap2 parameters
Expand All @@ -36,14 +38,18 @@ mapping_mode=$(
MINIMAP_OPTS="-K 20M -a -x ${mapping_mode} -t ${threads}"
echo "[1] Running minimap with options: ${MINIMAP_OPTS}"

# Trim polyA tail for alignment (33 bases)
seqtk trimfq -e 33 "${reference}" > "${reference}.trimmed.fa"

# shellcheck disable=SC2086
minimap2 ${MINIMAP_OPTS} \
"${reference}" \
"${reference}.trimmed.fa" \
"${input_fastq}" \
| samtools \
view \
-u \
-h \
-q $min_quality \
-F \
4 - \
| samtools \
Expand All @@ -52,6 +58,8 @@ minimap2 ${MINIMAP_OPTS} \
- \
> "${prefix}.sorted.bam"

rm "${reference}.trimmed.fa"

# Trim with ivar
echo "[2] Trimming with ivar"
samtools index "${prefix}.sorted.bam"
Expand Down Expand Up @@ -85,8 +93,6 @@ samtools \
--count-orphans \
--no-BAQ \
--min-BQ 0 \
--min-MQ 20 \
--region MN908947.3:1-29870 \
"${prefix}.sorted.bam" \
> "${prefix}.pileup"

Expand Down Expand Up @@ -118,7 +124,6 @@ mv "${prefix}.consensus.fa" "consensus.fa"
mv "${prefix}.ivar.tsv" "variants.tsv"
mv "${prefix}.sorted.bam" "covid19.bam"
mv "${prefix}.sorted.bam.bai" "covid19.bam.bai"
mv "${prefix}.pileup" "covid19.pileup"
rm "${prefix}"*

echo "[ ] Finished!"
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies:
- ivar=1.0.1
- samtools=1.9
- minimap2=2.17
- seqtk=1.3
- art=2016.06.05
- pango
- cairo
Expand Down
12 changes: 5 additions & 7 deletions report.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@
"# note our DB actually has v1 of this assembly as of Feb 2020\n",
"VARIANTS_TSV_PATH = \"variants.tsv\"\n",
"BAM_PATH = \"covid19.bam\"\n",
"PILEUP_PATH = \"covid19.pileup\"\n",
"REFERENCE_PATH = os.environ.get(\n",
" \"FASTA_REFERENCE\", \"reference/nCoV-2019.reference.fasta\"\n",
")\n",
Expand Down Expand Up @@ -93,8 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"# calculate mapping depth, trim another 20bp to avoid mapping to polyA tail in depth calcs\n",
"!samtools depth -r MN908947.3:1-29850 -Q 20 $BAM_PATH > snps.depth 2> /dev/null"
"!samtools depth $BAM_PATH > snps.depth 2> /dev/null"
]
},
{
Expand All @@ -112,8 +110,8 @@
"metadata": {},
"outputs": [],
"source": [
"pileup_lines = !wc -l $PILEUP_PATH\n",
"n_aligned_reads = int(pileup_lines[0].split(\" \")[0])"
"samtools_view_output = !samtools view -F 4 $BAM_PATH | wc -l\n",
"n_aligned_reads = int(samtools_view_output[0])"
]
},
{
Expand Down Expand Up @@ -141,7 +139,7 @@
"source": [
"# calculate genome coverage (what percent of bases are coveraged at X coverage)\n",
"# Use a fixed reference length that we use for `samtools depth` above\n",
"reference_length = 29850\n",
"reference_length = len(reference[0])\n",
"\n",
"covered_sites = set()\n",
"covered_sites_10x = set()\n",
Expand Down Expand Up @@ -363,7 +361,7 @@
"outputs": [],
"source": [
"# Clean up files\n",
"!rm -f {sample.filename} snps.depth variants.log covid19.pileup "
"!rm -f {sample.filename} snps.depth variants.log"
]
},
{
Expand Down

0 comments on commit 53f3d15

Please sign in to comment.