Skip to content
This repository has been archived by the owner on Feb 28, 2025. It is now read-only.

Commit

Permalink
fix seqtk module
Browse files Browse the repository at this point in the history
  • Loading branch information
georgiesamaha authored Dec 13, 2021
1 parent e0a7510 commit 9196bc7
Showing 1 changed file with 2 additions and 47 deletions.
49 changes: 2 additions & 47 deletions align_nonhuman.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@

set -e

seqtk=/scratch/er01/apps/seqtk/seqtk #Compiled for Gadi

# align_split.sh for non-human samples
# align.sh for non-human samples

ref=
outdir=../Align_split
Expand All @@ -51,50 +49,7 @@ err=${errdir}/${outPrefix}.err

echo fqpair:$fqpair fq1:$fq1 fq2:$fq2 sampleID:$sampleID centre:$centre lib:$lib platform:$platform flowcell:$flowcell lane:$lane outPrefix:$outPrefix err:$err ref:$ref NCPUS:$NCPUS

$seqtk mergepe $fq1 $fq2 | bwa mem -p -t $NCPUS \
-R "@RG\tID:${flowcell}.${lane}_${sampleID}_${lib}\tPL:${platform}\tPU:${flowcell}.${lane}\tSM:${sampleID}\tLB:${sampleID}_${lib}\tCN:${centre}" \
-M ${ref} - 2> ${logdir}/${outprefix}.log.bwamem \
| samtools sort -n -@ $NCPUS -o ${outdir}/${outPrefix}.aln.bam

if ! samtools quickcheck ${outdir}/${outPrefix}.aln.bam
then
printf "Corrupted or missing BAM\n" > $err
fi

[gs5517@gadi-login-01 Scripts]$ cat align_nonhuman.sh
#!/bin/bash

set -e

seqtk=/scratch/er01/apps/seqtk/seqtk #Compiled for Gadi

# align_split.sh for non-human samples

ref=./Reference/canFam4.fasta
outdir=../Align_split
logdir=Logs/Align
errdir=Logs/Align_error_capture

fqpair=`echo $1 | cut -d ',' -f 1`
fq1=$(ls ${fqpair}*R1.f*q.gz) #Must check regex for each batch
fq2=$(ls ${fqpair}*R2.f*q.gz)
sampleID=`echo $1 | cut -d ',' -f 2`
centre=`echo $1 | cut -d ',' -f 3`
lib=`echo $1 | cut -d ',' -f 4`
platform=`echo $1 | cut -d ',' -f 5`
flowcell=`echo $1 | cut -d ',' -f 6`
lane=`echo $1 | cut -d ',' -f 7`

outPrefix=$(basename $fqpair)
err=${errdir}/${outPrefix}.err

#bwakit emits default sort order (queryname, but no SO tag in headers) or option to sort
#by coordinate with samtools but does not allow the -n flag to specify sort order by name.
#Sambamba requires queryname sorted with SO tag. Below code does it by force.

echo fqpair:$fqpair fq1:$fq1 fq2:$fq2 sampleID:$sampleID centre:$centre lib:$lib platform:$platform flowcell:$flowcell lane:$lane outPrefix:$outPrefix err:$err ref:$ref NCPUS:$NCPUS

$seqtk mergepe $fq1 $fq2 | bwa mem -p -t $NCPUS \
seqtk mergepe $fq1 $fq2 | bwa mem -p -t $NCPUS \
-R "@RG\tID:${flowcell}.${lane}_${sampleID}_${lib}\tPL:${platform}\tPU:${flowcell}.${lane}\tSM:${sampleID}\tLB:${sampleID}_${lib}\tCN:${centre}" \
-M ${ref} - 2> ${logdir}/${outprefix}.log.bwamem \
| samtools sort -n -@ $NCPUS -o ${outdir}/${outPrefix}.aln.bam
Expand Down

0 comments on commit 9196bc7

Please sign in to comment.