AlexsLemonade · jaclyn-taroni · Aug 11, 2021 · Aug 4, 2021 · Aug 11, 2021 · Aug 11, 2021
diff --git a/analyses/focal-cn-file-preparation/02-add-ploidy-consensus.Rmd b/analyses/focal-cn-file-preparation/02-add-ploidy-consensus.Rmd
@@ -6,6 +6,11 @@ output:
     toc_float: TRUE
 author: Chante Bethell and Jaclyn Taroni for ALSF CCDL
 date: 2020
+params:
+  base_run:
+    label: "1/0 to run with base histology"
+    value: 0
+    input: integer
 ---
 
 The `pbta-histologies.tsv` file contains a `tumor_ploidy` column, which is tumor ploidy as inferred by ControlFreeC.
@@ -41,11 +46,15 @@ if(!dir.exists(output_dir)) {
 ```
 
 ```{r}
-# TODO: the consensus SEG file is not currently in the data download -- when it
-# gets included we will have to change the file path here
-consensus_seg_file <- file.path("..", "copy_number_consensus_call", "results", 
-                                "pbta-cnv-consensus.seg.gz")
-histologies_file <- file.path("..", "..", "data", "pbta-histologies.tsv")
+if ( params$base_run ==0 ){
+  histologies_file <- file.path("..", "..", "data", "pbta-histologies.tsv")
+  consensus_seg_file <- file.path("..", "..", "data", 
+                                  "pbta-cnv-consensus.seg.gz")
+} else {
+  histologies_file <- file.path("..", "..", "data", "pbta-histologies-base.tsv")
+  consensus_seg_file <- file.path("..", "copy_number_consensus_call", "results", 
+                                  "pbta-cnv-consensus.seg.gz")
+}
 
 consensus_seg_df <- read_tsv(consensus_seg_file)
 histologies_df <- read_tsv(histologies_file,
@@ -215,4 +224,3 @@ temp_gain <- purrr::imap(bed_gain_list,
 ```{r}
 sessionInfo()
 ```
-
diff --git a/analyses/focal-cn-file-preparation/02-add-ploidy-consensus.nb.html b/analyses/focal-cn-file-preparation/02-add-ploidy-consensus.nb.html
diff --git a/analyses/focal-cn-file-preparation/03-add-cytoband-status-consensus.nb.html b/analyses/focal-cn-file-preparation/03-add-cytoband-status-consensus.nb.html
diff --git a/analyses/focal-cn-file-preparation/05-define-most-focal-cn-units.nb.html b/analyses/focal-cn-file-preparation/05-define-most-focal-cn-units.nb.html
diff --git a/analyses/focal-cn-file-preparation/06-find-recurrent-calls.nb.html b/analyses/focal-cn-file-preparation/06-find-recurrent-calls.nb.html
diff --git a/analyses/focal-cn-file-preparation/README.md b/analyses/focal-cn-file-preparation/README.md
@@ -8,11 +8,20 @@ The purpose of this module is to map from those ranges to gene identifiers for c
 ### Running this analysis
 *This analysis requires at least ~24 GB of RAM to run to completion*
 
-To run this analysis _only on consensus SEG file_, use the following (from the root directory of the repository):
+To run this analysis _only on consensus SEG file_, 
+
+use OPENPBTA_BASE_SUBTYPING=1 to run this module using the pbta-histologies-base.tsv from data folder and relative path to `copy_number_consensus_call/results/pbta-cnv-consensus.seg.gz` while running molecular-subtyping modules for release.
+
+```
+OPENPBTA_BASE_SUBTYPING=1 bash analyses/focal-cn-file-preparation/run-prepare-cn.sh
+```
+
+Or by default runs analyses using pbta-histologies.tsv and downloaded files from data release:
 
 ```
 bash analyses/focal-cn-file-preparation/run-prepare-cn.sh
 ```
+
 **Note**: The `run-bedtools.snakemake` script is implemented in `run-prepare-cn.sh` to run the bedtools coverage steps between the UCSC cytoband file and the samples in the copy number files produced in `02-add-ploidy-consensus.Rmd`.
 This script currently takes a while to run, and therefore slows down the processing speed of the main shell script `run-prepare-cn.sh`.
 

diff --git a/analyses/focal-cn-file-preparation/plots/all_histology_cn_counts.png b/analyses/focal-cn-file-preparation/plots/all_histology_cn_counts.png
diff --git a/...-preparation/plots/consensus_seg_annotated_cn_autosomes_polya_loss_cor_plot.png b/...-preparation/plots/consensus_seg_annotated_cn_autosomes_polya_loss_cor_plot.png
diff --git a/...e-preparation/plots/consensus_seg_annotated_cn_autosomes_polya_stacked_plot.png b/...e-preparation/plots/consensus_seg_annotated_cn_autosomes_polya_stacked_plot.png
diff --git a/...eparation/plots/consensus_seg_annotated_cn_autosomes_stranded_loss_cor_plot.png b/...eparation/plots/consensus_seg_annotated_cn_autosomes_stranded_loss_cor_plot.png
diff --git a/...reparation/plots/consensus_seg_annotated_cn_autosomes_stranded_stacked_plot.png b/...reparation/plots/consensus_seg_annotated_cn_autosomes_stranded_stacked_plot.png
diff --git a/...eparation/plots/consensus_seg_annotated_cn_autosomes_stranded_zero_cor_plot.png b/...eparation/plots/consensus_seg_annotated_cn_autosomes_stranded_zero_cor_plot.png
diff --git a/...le-preparation/plots/consensus_seg_annotated_cn_x_and_y_polya_loss_cor_plot.png b/...le-preparation/plots/consensus_seg_annotated_cn_x_and_y_polya_loss_cor_plot.png
diff --git a/...ile-preparation/plots/consensus_seg_annotated_cn_x_and_y_polya_stacked_plot.png b/...ile-preparation/plots/consensus_seg_annotated_cn_x_and_y_polya_stacked_plot.png
diff --git a/...preparation/plots/consensus_seg_annotated_cn_x_and_y_stranded_loss_cor_plot.png b/...preparation/plots/consensus_seg_annotated_cn_x_and_y_stranded_loss_cor_plot.png
diff --git a/...-preparation/plots/consensus_seg_annotated_cn_x_and_y_stranded_stacked_plot.png b/...-preparation/plots/consensus_seg_annotated_cn_x_and_y_stranded_stacked_plot.png
diff --git a/...preparation/plots/consensus_seg_annotated_cn_x_and_y_stranded_zero_cor_plot.png b/...preparation/plots/consensus_seg_annotated_cn_x_and_y_stranded_zero_cor_plot.png
diff --git a/analyses/focal-cn-file-preparation/plots/separate_histology_cn_counts.png b/analyses/focal-cn-file-preparation/plots/separate_histology_cn_counts.png
diff --git a/analyses/focal-cn-file-preparation/results/consensus_seg_annotated_cn_autosomes.tsv.gz b/analyses/focal-cn-file-preparation/results/consensus_seg_annotated_cn_autosomes.tsv.gz
diff --git a/analyses/focal-cn-file-preparation/results/consensus_seg_annotated_cn_x_and_y.tsv.gz b/analyses/focal-cn-file-preparation/results/consensus_seg_annotated_cn_x_and_y.tsv.gz
diff --git a/analyses/focal-cn-file-preparation/results/consensus_seg_focal_cn_recurrent_genes.tsv b/analyses/focal-cn-file-preparation/results/consensus_seg_focal_cn_recurrent_genes.tsv
@@ -1,39 +1,23 @@
 gene
-KDM7A
-KDM7A-DT
-PARP12
-SLC37A3
-TBXAS1
+FMC1
+FMC1-LUC7L2
+LUC7L2
 TTC26
 UBN2
 ZC3HAV1
+ZC3HAV1L
 CLEC2L
-FMC1
-FMC1-LUC7L2
 HIPK2
+KDM7A
+KIAA1549
 KLRG2
-LUC7L2
+PARP12
+TBXAS1
+KDM7A-DT
+SLC37A3
 MKRN1
 RAB19
-ZC3HAV1L
 DENND2A
-KIAA1549
 ADCK2
-BRAF
-CLEC2L
-FMC1
-FMC1-LUC7L2
-HIPK2
-KDM7A
-KDM7A-DT
-KLRG2
-LUC7L2
 NDUFB2
 NDUFB2-AS1
-PARP12
-SLC37A3
-TBXAS1
-TTC26
-UBN2
-ZC3HAV1
-ZC3HAV1L
diff --git a/analyses/focal-cn-file-preparation/results/consensus_seg_most_focal_cn_status.tsv.gz b/analyses/focal-cn-file-preparation/results/consensus_seg_most_focal_cn_status.tsv.gz