Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Commit

Permalink
Update tp53 score for basesubtyping (#1037)
Browse files Browse the repository at this point in the history
* update run script for subtyping

* revamped for subtyping

Co-authored-by: Jo Lynne Rokita <jolynnerokita@d3b.center>
  • Loading branch information
kgaonkar6 and Jo Lynne Rokita authored Apr 27, 2021
1 parent 35384c7 commit f125d2f
Show file tree
Hide file tree
Showing 11 changed files with 210 additions and 168 deletions.
15 changes: 7 additions & 8 deletions analyses/tp53_nf1_score/01-apply-classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,10 @@
rownamesRDS = robjects.r["rownames"]

# Load gene expression data in rds format
file = os.path.join("data", inputfile)
name = os.path.basename(inputfile)
name = re.sub("\.rds$", "", name)

name = re.sub("\.rds$", "", inputfile)

exprs_rds = readRDS(file)
exprs_rds = readRDS(inputfile)
exprs_df = pandas2ri.ri2py(exprs_rds)
exprs_df.index = rownamesRDS(exprs_rds)

Expand All @@ -84,7 +83,7 @@

# Load RAS Classifier
file = os.path.join(
"analyses", "tp53_nf1_score", "reference", "ras_classifier_coefficients.tsv"
"reference", "ras_classifier_coefficients.tsv"
)
ras_coef_df = pd.read_table(file, index_col=0)
ras_coef_df = ras_coef_df.query("abs_val > 0")
Expand Down Expand Up @@ -119,7 +118,7 @@

# Load TP53 Classifier
file = os.path.join(
"analyses", "tp53_nf1_score", "reference", "tp53_classifier_coefficients.tsv"
"reference", "tp53_classifier_coefficients.tsv"
)
tp53_coef_df = pd.read_table(file, index_col=0)
tp53_coef_df = tp53_coef_df.query("abs_val > 0")
Expand Down Expand Up @@ -154,7 +153,7 @@

# Load NF1 Classifier
file = os.path.join(
"analyses", "tp53_nf1_score", "reference", "nf1_classifier_coefficients.tsv"
"reference", "nf1_classifier_coefficients.tsv"
)
nf1_coef_df = pd.read_table(file, index_col=0)
nf1_coef_df = nf1_coef_df.query("abs_val > 0")
Expand Down Expand Up @@ -210,5 +209,5 @@
filename = pd.Series([name, "classifier_scores.tsv"])
filename = filename.str.cat(sep="_")
print(filename)
file = os.path.join("analyses", "tp53_nf1_score", "results", filename)
file = os.path.join("results", filename)
all_results.to_csv(file, sep="\t", index=False)
12 changes: 10 additions & 2 deletions analyses/tp53_nf1_score/02-qc-rna_expression_score.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
title: "QC correlation of classifier score with RNA-Seq expression"
author: "K S Gaonkar (D3B)"
output: html_notebook

params:
base_run:
label: "1/0 to run with base histology"
value: 0
input: integer
---

In this notebook we will find correlation between RNA-Seq and classifier score
Expand All @@ -20,7 +24,11 @@ root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
data_dir <- file.path(root_dir, "data")
results_dir <- "results"
clinical <- read_tsv(file.path(data_dir,"pbta-histologies.tsv"))
if ( params$base_run ==0 ){
clinical<-read.delim(file.path(data_dir,"pbta-histologies.tsv"), stringsAsFactors = FALSE)
} else{
clinical<-read.delim(file.path(data_dir,"pbta-histologies-base.tsv"), stringsAsFactors = FALSE)
}
```

### Classifier scores
Expand Down
24 changes: 7 additions & 17 deletions analyses/tp53_nf1_score/02-qc-rna_expression_score.nb.html

Large diffs are not rendered by default.

15 changes: 12 additions & 3 deletions analyses/tp53_nf1_score/03-tp53-cnv-loss-domain.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
title: "Find CNV losses that overlap with TP53 domains"
author: "K S Gaonkar (D3B)"
output: html_notebook

params:
base_run:
label: "1/0 to run with base histology"
value: 0
input: integer
---

In this script we will find CNV losses that overlap with TP53 domains:
Expand Down Expand Up @@ -58,8 +62,13 @@ bioMartDataPfamTp53 <-
dplyr::filter(hgnc_symbol=="TP53")
# histology file
histologies_df <- read_tsv(file.path(data_dir,
"pbta-histologies.tsv")) %>%
if ( params$base_run ==0 ){
clinical<-read.delim(file.path(data_dir,"pbta-histologies.tsv"), stringsAsFactors = FALSE)
} else{
clinical<-read.delim(file.path(data_dir,"pbta-histologies-base.tsv"), stringsAsFactors = FALSE)
}
histologies_df <- clinical %>%
dplyr::select("Kids_First_Biospecimen_ID",
"sample_id")
Expand Down
36 changes: 13 additions & 23 deletions analyses/tp53_nf1_score/03-tp53-cnv-loss-domain.nb.html

Large diffs are not rendered by default.

18 changes: 14 additions & 4 deletions analyses/tp53_nf1_score/04-tp53-sv-loss.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
title: "Find CNV losses that overlap with TP53 domains"
author: "K S Gaonkar (D3B)"
output: html_notebook

params:
base_run:
label: "1/0 to run with base histology"
value: 0
input: integer
---

In this script we will find if there are a Structural Variant breakpoints within TP53 or covering the gene locus such that it is deleted and concurrently, there are either one or both of:
Expand Down Expand Up @@ -38,7 +42,13 @@ if (!dir.exists(results_dir)) {
manta_sv <- read_tsv(file.path(data_dir, "pbta-sv-manta.tsv.gz"))
stranded_expr <- readRDS(file.path(data_dir, "pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds"))
polya_expr <- readRDS(file.path(data_dir, "pbta-gene-expression-rsem-fpkm-collapsed.polya.rds"))
histology <- read_tsv(file.path(data_dir, "pbta-histologies.tsv"))
if ( params$base_run ==0 ){
clinical<-read.delim(file.path(data_dir,"pbta-histologies.tsv"), stringsAsFactors = FALSE)
} else{
clinical<-read.delim(file.path(data_dir,"pbta-histologies-base.tsv"), stringsAsFactors = FALSE)
}
histology <- select(clinical,c("Kids_First_Biospecimen_ID", "sample_id"))
# high confidence cnv losses from `03-tp53-cnv-loss-domain.Rmd`
cnv_tp53_loss <- read_tsv(file.path(results_dir, "loss_overlap_domains_tp53.tsv"))
Expand All @@ -58,7 +68,7 @@ manta_sv_tp53 <- manta_sv %>%
grepl("/TP53/", Gene.name),
SV.type %in% c("BND", "INV", "DEL")
) %>%
left_join(select(histology, c("Kids_First_Biospecimen_ID", "sample_id")),
left_join(histology,
by = c("Kids.First.Biospecimen.ID.Tumor" = "Kids_First_Biospecimen_ID")
)
Expand Down Expand Up @@ -99,7 +109,7 @@ polya_expr_tp53 <- polya_expr["TP53", ] %>%
lowexpr_tp53_with_sv <- bind_rows(stranded_expr_tp53, polya_expr_tp53) %>%
# add sample_id to expression df
left_join(select(histology, c("Kids_First_Biospecimen_ID", "sample_id")),
left_join(histology,
by = c("Kids_First_Biospecimen_ID")
) %>%
# filter to keep only low expressed TP53 sample_ids
Expand Down
Loading

0 comments on commit f125d2f

Please sign in to comment.