Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Recalculate coding TMB with updated Lancet BED file #459

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions analyses/snv-callers/run_caller_consensus_analysis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ dbfile=scratch/snv_db.sqlite
consensus_file=analyses/snv-callers/results/consensus/pbta-snv-consensus-mutation.maf.tsv

# BED and GTF file paths
exon_file=scratch/gencode.v27.primary_assembly.annotation.bed
cds_file=scratch/gencode.v27.primary_assembly.annotation.bed
all_mut_wgs_bed=scratch/intersect_strelka_mutect_WGS.bed
all_mut_wxs_bed=data/WXS.hg38.100bp_padded.bed
coding_wgs_bed=scratch/intersect_exon_lancet_strelka_mutect_WGS.bed
coding_wxs_bed=scratch/intersect_exon_WXS.bed
coding_wgs_bed=scratch/intersect_cds_lancet_strelka_mutect_WGS.bed
coding_wxs_bed=scratch/intersect_cds_lancet_WXS.bed

# Set a default for the VAF filter if none is specified
vaf_cutoff=${OPENPBTA_VAF_CUTOFF:-0}
Expand Down Expand Up @@ -60,20 +60,21 @@ bedtools intersect \
gunzip -c data/gencode.v27.primary_assembly.annotation.gtf.gz \
| awk '$3 ~ /CDS/' \
| convert2bed --do-not-sort --input=gtf - \
> $exon_file
> $cds_file

# Make WGS coding BED file
bedtools intersect \
-a data/WGS.hg38.strelka2.unpadded.bed \
-b data/WGS.hg38.mutect2.vardict.unpadded.bed \
data/WGS.hg38.lancet.300bp_padded.bed \
$exon_file \
$cds_file \
> $coding_wgs_bed

# Make WXS coding BED file
bedtools intersect \
-a data/WXS.hg38.100bp_padded.bed \
-b $exon_file
-b data/WXS.hg38.lancet.400bp_padded.bed \
$cds_file \
> $coding_wxs_bed

######################### Calculate consensus TMB ##############################
Expand Down
6 changes: 3 additions & 3 deletions analyses/snv-callers/scripts/03-calculate_tmb.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@
# --metadata data/pbta-histologies.tsv \
# --all_bed_wgs scratch/intersect_strelka_mutect_WGS.bed \
# --all_bed_wxs data/WXS.hg38.100bp_padded.bed \
# --coding_bed_wgs scratch/intersect_exon_lancet_strelka_mutect_WGS.bed \
# --coding_bed_wxs scratch/intersect_exon_WXS.bed \
# --coding_bed_wgs scratch/intersect_cds_lancet_strelka_mutect_WGS.bed \
# --coding_bed_wxs scratch/intersect_cds_WXS.bed \
# --overwrite

################################ Initial Set Up ################################
Expand Down Expand Up @@ -205,7 +205,7 @@ if (file.exists(tmb_coding_file) && !opt$overwrite) {
# Calculate coding only TMBs and write to file
tmb_coding_df <- calculate_tmb(maf_df,
bed_wgs = opt$coding_bed_wgs,
bed_wxs = opt$coding_bed_wgs
bed_wxs = opt$coding_bed_wxs
)
readr::write_tsv(tmb_coding_df, tmb_coding_file)

Expand Down
6 changes: 3 additions & 3 deletions analyses/tp53_nf1_score/run_classifier.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ POLYA_PLOT=${OPENPBTA_POLYAPLOT:-1}
data_dir="data"
scratch_dir="scratch"
# cds gencode bed file
exon_file="${scratch_dir}/gencode.v27.primary_assembly.annotation.bed"
cds_file="${scratch_dir}/gencode.v27.primary_assembly.annotation.bed"
consensus_file="${data_dir}/pbta-snv-consensus-mutation.maf.tsv.gz"
clinical_file="${data_dir}/pbta-histologies.tsv"
analysis_dir="analyses/tp53_nf1_score"
Expand All @@ -25,14 +25,14 @@ analysis_dir="analyses/tp53_nf1_score"
gunzip -c ${data_dir}/gencode.v27.primary_assembly.annotation.gtf.gz \
| awk '$3 ~ /CDS/' \
| convert2bed --do-not-sort --input=gtf - \
> $exon_file
> $cds_file

# Prep the SNV consensus data for evaluation downstream
Rscript --vanilla ${analysis_dir}/00-tp53-nf1-alterations.R \
--snvConsensus ${consensus_file} \
--clinicalFile ${clinical_file} \
--outputFolder ${analysis_dir}/results \
--gencode ${exon_file}
--gencode ${cds_file}

# expression files for prediction
collapsed_stranded="pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds"
Expand Down