diff --git a/.circleci/config.yml b/.circleci/config.yml index 820a4b46af..dea6d3c3e3 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -20,20 +20,47 @@ jobs: name: Sample Distribution Analyses command: ./scripts/run_in_ci.sh bash "analyses/sample-distribution-analysis/run-sample-distribution.sh" + # TODO: The data files for CI need to be fixed https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/527 + # - run: + # name: TP53 NF1 classifier run + # command: OPENPBTA_POLYAPLOT=0 ./scripts/run_in_ci.sh bash "analyses/tp53_nf1_score/run_classifier.sh" + # The analysis no longer needs to be tested as it has been retired and is better covered by 'SNV Caller Analysis' below. #- run: # name: Mutect2 vs Strelka2 # command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/mutect2-vs-strelka2/01-set-up.Rmd', clean = TRUE); # rmarkdown::render('analyses/mutect2-vs-strelka2/02-analyze-concordance.Rmd', clean = TRUE)" - - - run: - name: Collapse RSEM - command: ./scripts/run_in_ci.sh bash analyses/collapse-rnaseq/run-collapse-rnaseq.sh + + ### MOLECULAR SUBTYPING ### - run: name: Molecular Subtyping - HGG command: OPENPBTA_SUBSET=0 ./scripts/run_in_ci.sh bash analyses/molecular-subtyping-HGG/run-molecular-subtyping-HGG.sh + - run: + name: Molecular subtyping - Non-MB/Non-ATRT Embryonal tumors + command: OPENPBTA_SUBSET=0 ./scripts/run_in_ci.sh bash analyses/molecular-subtyping-embryonal/run-embryonal-subtyping.sh + + - run: + name: Molecular Subtyping and Plotting - ATRT + command: OPENPBTA_SUBSET=0 ./scripts/run_in_ci.sh bash analyses/molecular-subtyping-ATRT/run-molecular-subtyping-ATRT.sh + + - run: + name: Molecular subtyping Chordoma + command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.Rmd', clean = TRUE)" + + + # Deprecated - these results do not include germline calls and therefore are insufficient by subtyping + # - run: + # name: SHH TP53 Molecular Subtyping + # command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/molecular-subtyping-SHH-tp53/SHH-tp53-molecular-subtyping-data-prep.Rmd', clean = TRUE)" + + ### END MOLECULAR SUBTYPING ### + + - run: + name: Collapse RSEM + command: ./scripts/run_in_ci.sh bash analyses/collapse-rnaseq/run-collapse-rnaseq.sh + - run: name: Immune deconvolution using xCell and MCP-Counter command: OPENPBTA_DECONV_METHOD="mcp_counter" ./scripts/run_in_ci.sh bash analyses/immune-deconv/run-immune-deconv.sh @@ -85,10 +112,6 @@ jobs: name: Tumor mutation burden with TCGA command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/tmb-compare-tcga/compare-tmb.Rmd', clean = TRUE)" - - run: - name: Molecular subtyping - Non-MB/Non-ATRT Embryonal tumors - command: OPENPBTA_SUBSET=0 ./scripts/run_in_ci.sh bash analyses/molecular-subtyping-embryonal/run-embryonal-subtyping.sh - - run: name: Copy number consensus command: ./scripts/run_in_ci.sh bash "analyses/copy_number_consensus_call/run_consensus_call.sh" @@ -104,11 +127,7 @@ jobs: - run: name: Comparative RNASeq - generate correlation matrix - rsem-tpm.stranded command: ./scripts/run_in_ci.sh python3 analyses/comparative-RNASeq-analysis/01-correlation-matrix.py ../../data/pbta-gene-expression-rsem-tpm.stranded.rds --output-prefix rsem-tpm-stranded- --verbose - - - run: - name: Molecular Subtyping and Plotting - ATRT - command: OPENPBTA_SUBSET=0 ./scripts/run_in_ci.sh bash analyses/molecular-subtyping-ATRT/run-molecular-subtyping-ATRT.sh - + - run: name: Process SV file command: ./scripts/run_in_ci.sh Rscript analyses/sv-analysis/01-process-sv-file.R @@ -116,20 +135,11 @@ jobs: - run: name: Oncoprint plotting command: ./scripts/run_in_ci.sh bash "analyses/oncoprint-landscape/run-oncoprint.sh" - - - run: - name: TP53 NF1 classifier run - command: OPENPBTA_POLYAPLOT=0 ./scripts/run_in_ci.sh bash "analyses/tp53_nf1_score/run_classifier.sh" - run: name: GISTIC Plots command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/gistic_plot.Rmd', clean = TRUE)" - # Deprecated - these results do not include germline calls and therefore are insufficient by subtyping - # - run: - # name: SHH TP53 Molecular Subtyping - # command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/molecular-subtyping-SHH-tp53/SHH-tp53-molecular-subtyping-data-prep.Rmd', clean = TRUE)" - - run: name: Gene set enrichment analysis to generate GSVA scores command: OPENPBTA_TESTING=1 ./scripts/run_in_ci.sh bash "analyses/gene-set-enrichment-analysis/run-gsea.sh" @@ -142,10 +152,6 @@ jobs: name: Fusion Summary command: OPENPBTA_TESTING=1 ./scripts/run_in_ci.sh bash "analyses/fusion-summary/run-new-analysis.sh" - - run: - name: Molecular subtyping Chordoma - command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/molecular-subtyping-chordoma/01-Subtype-chordoma.Rmd', clean = TRUE)" - - run: name: Telomerase activity command: ./scripts/run_in_ci.sh bash analyses/telomerase-activity-prediction/RUN-telomerase-activity-prediction.sh diff --git a/analyses/chromosomal-instability/01b-visualization-cnv-sv.Rmd b/analyses/chromosomal-instability/01b-visualization-cnv-sv.Rmd index a5cfeb7768..ffb277b82d 100644 --- a/analyses/chromosomal-instability/01b-visualization-cnv-sv.Rmd +++ b/analyses/chromosomal-instability/01b-visualization-cnv-sv.Rmd @@ -88,11 +88,8 @@ metadata <- readr::read_tsv(file.path(data_dir, "pbta-histologies.tsv")) Read in the CNV data. ```{r} -# TODO: update file path when consensus is added to data release cnv_df <- data.table::fread( - file.path("..", - "copy_number_consensus_call", - "results", + file.path(data_dir, "pbta-cnv-consensus.seg.gz"), data.table = FALSE ) @@ -307,7 +304,7 @@ circos_map_plot( ```{r} circos_map_transloc(transloc_df, add_track = FALSE, # We change this to true to add on to our already existing plot - sample_names = samples_for_examples[1], + sample_names = sample(transloc_df$biospecimen_id1, 1), samples_col = "biospecimen_id1", chr_col_1 = "chrom1", # Need to specify which column is the first and second location for each chr_col_2 = "chrom2", @@ -326,7 +323,7 @@ png(file.path(plots_dir, "transloc_circos_plot.png"), width = 800, height = 800) # Run function per usual circos_map_transloc(transloc_df, add_track = FALSE, - sample_names = samples_for_examples[1], + sample_names = sample(transloc_df$biospecimen_id1, 1), samples_col = "biospecimen_id1", chr_col_1 = "chrom1", chr_col_2 = "chrom2", diff --git a/analyses/chromosomal-instability/01b-visualization-cnv-sv.nb.html b/analyses/chromosomal-instability/01b-visualization-cnv-sv.nb.html index 65218600e9..06c7f5209d 100644 --- a/analyses/chromosomal-instability/01b-visualization-cnv-sv.nb.html +++ b/analyses/chromosomal-instability/01b-visualization-cnv-sv.nb.html @@ -2986,8 +2986,8 @@

Function Arguments Descriptions

Set Up

- -
# Set seed so heatmaps turn out the same
+
+
# Set seed so plots turn out the same
 set.seed(2020)
 
 # Magrittr pipe
@@ -3027,7 +3027,7 @@ 

Read in data

# Read in the metadata
 metadata <- readr::read_tsv(file.path(data_dir, "pbta-histologies.tsv"))
- +
Parsed with column specification:
 cols(
   .default = col_character(),
@@ -3039,13 +3039,13 @@ 

Read in data

molecular_subtype = col_logical() ) See spec(...) for full column specifications. -221 parsing failures. +493 parsing failures. row col expected actual file -2606 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv' -2607 molecular_subtype 1/0/T/F/TRUE/FALSE Group4 '../../data/pbta-histologies.tsv' -2608 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv' -2609 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv' -2610 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv' +2334 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv' +2335 molecular_subtype 1/0/T/F/TRUE/FALSE Group4 '../../data/pbta-histologies.tsv' +2336 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv' +2337 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv' +2338 molecular_subtype 1/0/T/F/TRUE/FALSE Group3 '../../data/pbta-histologies.tsv' .... ................. .................. ...... ................................. See problems(...) for more details.
@@ -3054,12 +3054,9 @@

Read in data

Read in the CNV data.

- -
# TODO: update file path when consensus is added to data release
-cnv_df <- data.table::fread(
-  file.path("..", 
-            "copy_number_consensus_call", 
-            "results",
+
+
cnv_df <- data.table::fread(
+  file.path(data_dir, 
             "pbta-cnv-consensus.seg.gz"),
   data.table = FALSE
 ) 
@@ -3310,10 +3307,10 @@

Example uses of circos_map_plot

Example 4 making a translocation plot.

- +
circos_map_transloc(transloc_df,
   add_track = FALSE, # We change this to true to add on to our already existing plot
-  sample_names = samples_for_examples[1],
+  sample_names = sample(transloc_df$biospecimen_id1, 1),
   samples_col = "biospecimen_id1",
   chr_col_1 = "chrom1", # Need to specify which column is the first and second location for each
   chr_col_2 = "chrom2",
@@ -3324,20 +3321,20 @@ 

Example uses of circos_map_plot

)
-

+

Example 5 Saving a plot.

- +
# Open up PNG file
 png(file.path(plots_dir, "transloc_circos_plot.png"), width = 800, height = 800)
 # Run function per usual
 circos_map_transloc(transloc_df,
   add_track = FALSE,
-  sample_names = samples_for_examples[1],
+  sample_names = sample(transloc_df$biospecimen_id1, 1),
   samples_col = "biospecimen_id1",
   chr_col_1 = "chrom1",
   chr_col_2 = "chrom2",
@@ -3365,7 +3362,7 @@ 

Session Info

sessionInfo()
- +
R version 3.6.0 (2019-04-26)
 Platform: x86_64-pc-linux-gnu (64-bit)
 Running under: Debian GNU/Linux 9 (stretch)
@@ -3374,61 +3371,84 @@ 

Session Info

BLAS/LAPACK: /usr/lib/libopenblasp-r0.2.19.so locale: - [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 - [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C LC_PAPER=en_US.UTF-8 LC_NAME=C - [9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C + [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C + [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 + [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=C + [7] LC_PAPER=en_US.UTF-8 LC_NAME=C + [9] LC_ADDRESS=C LC_TELEPHONE=C +[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats graphics grDevices utils datasets methods base -other attached packages: -[1] optparse_1.6.2 - loaded via a namespace (and not attached): - [1] colorspace_1.4-1 rjson_0.2.20 biovizBase_1.34.1 - [4] circlize_0.4.8 htmlTable_1.13.1 XVector_0.26.0 - [7] GenomicRanges_1.38.0 GlobalOptions_0.1.1 base64enc_0.1-3 - [10] dichromat_2.0-0 clue_0.3-57 rstudioapi_0.10 - [13] getopt_1.20.3 bit64_0.9-7 AnnotationDbi_1.48.0 - [16] R.methodsS3_1.7.1 splines_3.6.0 ggbio_1.34.0 - [19] knitr_1.23 Formula_1.2-3 jsonlite_1.6 - [22] Rsamtools_2.2.1 cluster_2.1.0 dbplyr_1.4.2 - [25] R.oo_1.22.0 png_0.1-7 graph_1.62.0 - [28] BiocManager_1.30.4 readr_1.3.1 compiler_3.6.0 - [31] httr_1.4.0 backports_1.1.4 assertthat_0.2.1 - [34] Matrix_1.2-17 lazyeval_0.2.2 cli_1.1.0 - [37] acepack_1.4.1 htmltools_0.3.6 prettyunits_1.0.2 - [40] tools_3.6.0 gtable_0.3.0 glue_1.3.1 - [43] GenomeInfoDbData_1.2.2 reshape2_1.4.3 dplyr_0.8.3 - [46] rappdirs_0.3.1 Rcpp_1.0.1 Biobase_2.46.0 - [49] styler_1.1.1 Biostrings_2.54.0 rtracklayer_1.46.0 - [52] xfun_0.8 stringr_1.4.0 ensembldb_2.10.2 - [55] XML_3.98-1.20 zlibbioc_1.32.0 scales_1.0.0 - [58] BSgenome_1.54.0 VariantAnnotation_1.32.0 hms_0.4.2 - [61] ProtGenerics_1.18.0 RBGL_1.62.1 parallel_3.6.0 - [64] SummarizedExperiment_1.16.0 rematch2_2.0.1 AnnotationFilter_1.10.0 - [67] RColorBrewer_1.1-2 ComplexHeatmap_2.2.0 yaml_2.2.0 - [70] curl_3.3 memoise_1.1.0 gridExtra_2.3 - [73] ggplot2_3.2.0 biomaRt_2.42.0 rpart_4.1-15 - [76] reshape_0.8.8 latticeExtra_0.6-28 stringi_1.4.3 - [79] RSQLite_2.1.1 S4Vectors_0.24.1 checkmate_1.9.4 - [82] GenomicFeatures_1.38.0 BiocGenerics_0.32.0 BiocParallel_1.20.0 - [85] shape_1.4.4 GenomeInfoDb_1.22.0 rlang_0.4.0 - [88] pkgconfig_2.0.2 matrixStats_0.55.0 bitops_1.0-6 - [91] evaluate_0.14 lattice_0.20-38 purrr_0.3.2 - [94] labeling_0.3 GenomicAlignments_1.22.1 htmlwidgets_1.3 - [97] cowplot_0.9.4 bit_1.1-14 tidyselect_0.2.5 -[100] GGally_1.4.0 plyr_1.8.4 magrittr_1.5 -[103] R6_2.4.0 IRanges_2.20.1 Hmisc_4.2-0 -[106] DelayedArray_0.12.0 DBI_1.0.0 withr_2.1.2 -[109] pillar_1.4.2 foreign_0.8-71 survival_2.44-1.1 -[112] RCurl_1.95-4.12 nnet_7.3-12 tibble_2.1.3 -[115] crayon_1.3.4 OrganismDbi_1.28.0 BiocFileCache_1.10.2 -[118] rmarkdown_1.13 GetoptLong_0.1.7 progress_1.2.2 -[121] grid_3.6.0 data.table_1.12.2 blob_1.1.1 -[124] digest_0.6.20 tidyr_0.8.3 R.utils_2.9.0 -[127] openssl_1.4 stats4_3.6.0 munsell_0.5.0 -[130] askpass_1.1
+ [1] colorspace_1.4-1 rjson_0.2.20 + [3] rprojroot_1.3-2 biovizBase_1.34.1 + [5] circlize_0.4.8 htmlTable_1.13.1 + [7] XVector_0.26.0 GenomicRanges_1.38.0 + [9] GlobalOptions_0.1.1 base64enc_0.1-3 + [11] dichromat_2.0-0 clue_0.3-57 + [13] rstudioapi_0.10 bit64_0.9-7 + [15] AnnotationDbi_1.48.0 R.methodsS3_1.7.1 + [17] splines_3.6.0 ggbio_1.34.0 + [19] knitr_1.23 Formula_1.2-3 + [21] jsonlite_1.6 colorblindr_0.1.0 + [23] Rsamtools_2.2.1 cluster_2.1.0 + [25] dbplyr_1.4.2 R.oo_1.22.0 + [27] png_0.1-7 graph_1.62.0 + [29] BiocManager_1.30.4 readr_1.3.1 + [31] compiler_3.6.0 httr_1.4.0 + [33] backports_1.1.4 assertthat_0.2.1 + [35] Matrix_1.2-17 lazyeval_0.2.2 + [37] cli_1.1.0 acepack_1.4.1 + [39] htmltools_0.3.6 prettyunits_1.0.2 + [41] tools_3.6.0 gtable_0.3.0 + [43] glue_1.3.1 GenomeInfoDbData_1.2.2 + [45] reshape2_1.4.3 dplyr_0.8.3 + [47] rappdirs_0.3.1 Rcpp_1.0.1 + [49] Biobase_2.46.0 styler_1.1.1 + [51] Biostrings_2.54.0 rtracklayer_1.46.0 + [53] xfun_0.8 stringr_1.4.0 + [55] ensembldb_2.10.2 XML_3.98-1.20 + [57] zlibbioc_1.32.0 scales_1.0.0 + [59] BSgenome_1.54.0 VariantAnnotation_1.32.0 + [61] hms_0.4.2 ProtGenerics_1.18.0 + [63] ggupset_0.1.0.9000 RBGL_1.62.1 + [65] parallel_3.6.0 SummarizedExperiment_1.16.1 + [67] AnnotationFilter_1.10.0 rematch2_2.0.1 + [69] RColorBrewer_1.1-2 ComplexHeatmap_2.2.0 + [71] yaml_2.2.0 curl_3.3 + [73] memoise_1.1.0 gridExtra_2.3 + [75] ggplot2_3.2.0 biomaRt_2.42.0 + [77] rpart_4.1-15 reshape_0.8.8 + [79] latticeExtra_0.6-28 stringi_1.4.3 + [81] RSQLite_2.1.1 S4Vectors_0.24.1 + [83] checkmate_1.9.4 GenomicFeatures_1.38.0 + [85] BiocGenerics_0.32.0 BiocParallel_1.20.1 + [87] shape_1.4.4 GenomeInfoDb_1.22.0 + [89] rlang_0.4.0 pkgconfig_2.0.2 + [91] matrixStats_0.55.0 bitops_1.0-6 + [93] evaluate_0.14 lattice_0.20-38 + [95] purrr_0.3.2 GenomicAlignments_1.22.1 + [97] htmlwidgets_1.3 labeling_0.3 + [99] cowplot_0.9.4 bit_1.1-14 +[101] tidyselect_0.2.5 GGally_1.4.0 +[103] plyr_1.8.4 magrittr_1.5 +[105] R6_2.4.0 IRanges_2.20.1 +[107] Hmisc_4.2-0 DelayedArray_0.12.2 +[109] DBI_1.0.0 pillar_1.4.2 +[111] foreign_0.8-71 withr_2.1.2 +[113] survival_2.44-1.1 RCurl_1.95-4.12 +[115] nnet_7.3-12 tibble_2.1.3 +[117] crayon_1.3.4 OrganismDbi_1.28.0 +[119] BiocFileCache_1.10.2 rmarkdown_1.13 +[121] GetoptLong_0.1.7 progress_1.2.2 +[123] grid_3.6.0 data.table_1.12.2 +[125] blob_1.1.1 forcats_0.4.0 +[127] digest_0.6.20 tidyr_0.8.3 +[129] R.utils_2.9.0 openssl_1.4 +[131] stats4_3.6.0 munsell_0.5.0 +[133] askpass_1.1
@@ -3436,7 +3456,7 @@

Session Info

-
---
title: "Circos Plots Examples for Visualizing SV and CNV data"
output:   
  html_notebook: 
    toc: true
    toc_float: true
author: Candace Savonen for ALSF - CCDL
date: 2020
---

This notebook shows examples of how to use the circos_map_plot function for 
mapping data that corresponds to chromosomal coordinates.

Corresponds to [issue 397](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/397).

### Usage

This notebook can be run via the command line from the top directory of the 
repository as follows:

```
Rscript -e "rmarkdown::render('analyses/chromosomal-instability/01b-visualization-cnv-sv.Rmd', 
                              clean = TRUE)"
```

## Circular plots functions: 
To use the functions, source the functions file: `source(file.path("util", "circos-plots.R"))`
This file includes three functions, but one is internal. 

**The two functions to use:**

`circos_map_plot`: Given a data.frame with chromosomal coordinates, and a corresponding data value to plot, make a circos plot or add a circular track to an existing plot.  
`circos_map_transloc`: Given a data.frame with two sets of coordinates, map the links between those coordinates on a new or exisitng circular plot.  

## Function Arguments Descriptions

- `df`: a data.frame with the chromosomal coordinates and y value to plot.  
- `add_track`: If true, adds a track to a current plot, if `FALSE`, starts a new plot.  
- `samples_col`: a character string specifying the samples column which can be used to filter by.  
- `sample_names`: a character string that specifies values to keep from `samples_col` column. `"all"` keeps all samples in.   
- `chr_col`: a character string that specifies the chromosomes column name.  
- `start_col`: a character string that specifies the start coordinate column name.  
- `end_col`: a character string that specifies the end coordinate column name.  
- `y_val`: The column name of the value you would like to plot. 
- `color_col`: a column with color specifications for each data point. 
- `track_height`: a number between 0 and 1 that designates height, default is `.15`. 1 = the full diameter of the circular plot.  
- `type`: Type of plot the track should be. Options are `line`, `point`, `rect`.  
- `rect_height`: The added height (plus and minus y_val) that should be plotted. 
- `cytoband`: `TRUE/FALSE` indicating whether you want a cytoband on the 
outermost of the plot. Default is TRUE.  
- `single_color`: A single color to choose. An alternative to `color_col`.

### Set Up

```{r}
# Set seed so plots turn out the same
set.seed(2020)

# Magrittr pipe
`%>%` <- dplyr::`%>%`
```

Read in the custom functions.

```{r}
source(file.path("util", "circos-plots.R"))
```

### Directories and Files

```{r}
# Path to data directory
data_dir <- file.path("..", "..", "data")

# Path to output directory
plots_dir <- "plots"
```

### Read in data 

Set up metadata

```{r}
# Read in the metadata
metadata <- readr::read_tsv(file.path(data_dir, "pbta-histologies.tsv"))
```

Read in the CNV data. 

```{r}
# TODO: update file path when consensus is added to data release
cnv_df <- data.table::fread(
  file.path("..", 
            "copy_number_consensus_call", 
            "results",
            "pbta-cnv-consensus.seg.gz"),
  data.table = FALSE
) 
```

Create a status variable based on copy number and tumor ploidy. 
This is logic was borrowed from [`focal-cn-file-preparation` module](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/86bd634a928db3f460328210e38c7dc1797d1304/analyses/focal-cn-file-preparation/02-add-ploidy-consensus.Rmd#L74).

```{r}
cnv_df <- cnv_df %>% 
  # Only keep data that has values for copy.num
  dplyr::filter(!is.na(copy.num)) %>%
  # Tack on the ploidy and sex estimate information from metadata
  dplyr::inner_join(
    dplyr::select(metadata, 
                  Kids_First_Biospecimen_ID, 
                  tumor_ploidy,
                  germline_sex_estimate), 
             by = c("ID" = "Kids_First_Biospecimen_ID")) %>%
  # Create a status variable
  dplyr::mutate(status = dplyr::case_when(
    # when the copy number is less than inferred ploidy, mark this as a loss
    copy.num < tumor_ploidy ~ "loss",
    # if copy number is higher than ploidy, mark as a gain
    copy.num > tumor_ploidy ~ "gain",
    copy.num == tumor_ploidy ~ "neutral"
  ), 
  status = factor(status, levels = c("loss", "neutral", "gain")))
```

Read in the SV data. 

```{r}
sv_df <- data.table::fread(
  file.path(data_dir, "pbta-sv-manta.tsv.gz"),
  data.table = FALSE
)
```

Make a translocation data.frame where both sets of coordinates for the translocation are in the same row.

```{r}
transloc_df <- sv_df %>%
  dplyr::filter(SV.type == "BND") %>%
  dplyr::mutate(
    match_id = stringr::str_sub(ID, 0, -3),
    pair_num = stringr::str_sub(ID, -1)
  ) %>%
  dplyr::select(
    biospecimen_id = Kids.First.Biospecimen.ID.Tumor,
    chrom = SV.chrom,
    start = SV.start,
    end = SV.end,
    width = SV.length,
    match_id,
    pair_num
  )

transloc_df <- transloc_df %>%
  dplyr::filter(pair_num == 0) %>%
  dplyr::inner_join(dplyr::filter(transloc_df, pair_num == 1),
    by = "match_id",
    suffix = c("1", "2")
  )
```

Let's get some sample IDs to play with in these examples. 

```{r}
samples_for_examples <- sample(cnv_df$ID, 5)
```

## Making color keys examples
The `circos_map_plot` functions take columns that specify the colors for each data row. 
Here are some examples of how to set up color columns that can be passed to the circos
functions as the `color_col` argument. 

**Numerically based color key:** 

```{r}
# Make color palette based on 5 colors
palette_col <- RColorBrewer::brewer.pal(5, 
                                        name = "Accent" # Can change this palette 
                                        # Use RColorBrewer::display.brewer.all() to see options
                                        )

# Make color ramp function based on quantiles of seg.mean and palette
color_fun <- circlize::colorRamp2(
  breaks = quantile(cnv_df$seg.mean, 
                    c(0.15, 0.35, 0.5, 0.65, 0.85), na.rm = TRUE), 
  colors = palette_col)
```

Add numerically based color column to `cnv_df`.

```{r}
cnv_df <- cnv_df %>%
  # Make column that specifies the color for each value
  dplyr::mutate(num_color_key = color_fun(copy.num))
```

Can run this to see other palettes besides `Accent`.

```{r}
RColorBrewer::display.brewer.all()
```

**Factor-based color keys:** 
Here we will add a column to `cnv_df` that color codes based on the `status` 
column which has three categories. 

```{r}
# Let's determine how many levels this factor column has
n_levels <- length(levels(cnv_df$status))

# Set up a palette based on number of factor levels
palette_col <- RColorBrewer::brewer.pal(n_levels, name = "Accent")

# Let's make a key to recode by based on levels
palette_key <- palette_col

# Have the factor levels be the names
names(palette_key) <- levels(cnv_df$status)

# Preview this
palette_key
```

Add factor based color column to cnv_df

```{r}
cnv_df <- cnv_df %>%
  # Make column that specifies the color for each factor level
  dplyr::mutate(fac_color_key = dplyr::recode(status, !!!palette_key))
```

## Example uses of `circos_map_plot`

**Example 1** making a circular scatter plot for a single sample.

```{r}
circos_map_plot(
  df = cnv_df,
  add_track = FALSE, # If set to TRUE we would add to an exisiting plot.
  samples_col = "ID", # Designate what columns of `cnv_df` hold the sample IDs.
  sample_names = samples_for_examples[1], # What sample we are plotting.
  chr_col = "chrom", # Designate what columns of `cnv_df` hold the coordinates.
  start_col = "loc.start",
  end_col = "loc.end",
  y_val = "copy.num", # This is the data we want to map
  track_height = .15, # This is the default
  type = "point", # type is set to point
  color_col = "fac_color_key"
  )
```

**Example 2** making a plot with rectangles for a multiple samples and color code by numeric value and without the cytoband.

```{r}
circos_map_plot(
  df = cnv_df,
  add_track = FALSE,
  samples_col = "ID",
  sample_names = samples_for_examples[1:3], # What samples we are plotting.
  chr_col = "chrom",
  start_col = "loc.start",
  end_col = "loc.end",
  y_val = "copy.num",
  track_height = .15,
  type = "rect", # Changed this to rect
  rect_height = .2, # Optionally can change height with this argument. Default is +_ 0.4
  color_col = "num_color_key", 
  cytoband = FALSE # Turning off the cytoband here. Default is TRUE
)
```

**Example 3** Adding layers to the same plot.
Here we will plot two samples, each with their own track, and a different color for each. 

```{r}
# This first part is the same as Example 3, but we will add an extra layer and change the color palette
circos_map_plot(
  df = cnv_df,
  add_track = FALSE,
  samples_col = "ID",
  sample_names = samples_for_examples[1], # One sample here
  chr_col = "chrom",
  start_col = "loc.start",
  end_col = "loc.end",
  y_val = "copy.num",
  type = "point",
  single_color = "red" # A singular color
)
# We'll change the color though for fun
# Add rect layer
circos_map_plot(
  df = cnv_df,
  add_track = TRUE, # Set to TRUE so adds to plot called above.
  samples_col = "ID",
  sample_names = samples_for_examples[2], # Second sample here. 
  chr_col = "chrom", 
  start_col = "loc.start",
  end_col = "loc.end",
  y_val = "copy.num",
  type = "point", 
  single_color = "blue" # A different singular color
)
```

**Example 4** making a translocation plot. 

```{r}
circos_map_transloc(transloc_df,
  add_track = FALSE, # We change this to true to add on to our already existing plot
  sample_names = samples_for_examples[1],
  samples_col = "biospecimen_id1",
  chr_col_1 = "chrom1", # Need to specify which column is the first and second location for each
  chr_col_2 = "chrom2",
  start_col_1 = "start1",
  start_col_2 = "start2",
  end_col_1 = "end1",
  end_col_2 = "end2"
)
```

**Example 5** Saving a plot. 

```{r}
# Open up PNG file
png(file.path(plots_dir, "transloc_circos_plot.png"), width = 800, height = 800)
# Run function per usual
circos_map_transloc(transloc_df,
  add_track = FALSE,
  sample_names = samples_for_examples[1],
  samples_col = "biospecimen_id1",
  chr_col_1 = "chrom1",
  chr_col_2 = "chrom2",
  start_col_1 = "start1",
  start_col_2 = "start2",
  end_col_1 = "end1",
  end_col_2 = "end2"
)
dev.off() # Turn off png dev
```

![Print plot we saved to PNG:](plots/transloc_circos_plot.png)

### Session Info

```{r}
sessionInfo()
```


+
---
title: "Circos Plots Examples for Visualizing SV and CNV data"
output:   
  html_notebook: 
    toc: true
    toc_float: true
author: Candace Savonen for ALSF - CCDL
date: 2020
---

This notebook shows examples of how to use the circos_map_plot function for 
mapping data that corresponds to chromosomal coordinates.

Corresponds to [issue 397](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/397).

### Usage

This notebook can be run via the command line from the top directory of the 
repository as follows:

```
Rscript -e "rmarkdown::render('analyses/chromosomal-instability/01b-visualization-cnv-sv.Rmd', 
                              clean = TRUE)"
```

## Circular plots functions: 
To use the functions, source the functions file: `source(file.path("util", "circos-plots.R"))`
This file includes three functions, but one is internal. 

**The two functions to use:**

`circos_map_plot`: Given a data.frame with chromosomal coordinates, and a corresponding data value to plot, make a circos plot or add a circular track to an existing plot.  
`circos_map_transloc`: Given a data.frame with two sets of coordinates, map the links between those coordinates on a new or exisitng circular plot.  

## Function Arguments Descriptions

- `df`: a data.frame with the chromosomal coordinates and y value to plot.  
- `add_track`: If true, adds a track to a current plot, if `FALSE`, starts a new plot.  
- `samples_col`: a character string specifying the samples column which can be used to filter by.  
- `sample_names`: a character string that specifies values to keep from `samples_col` column. `"all"` keeps all samples in.   
- `chr_col`: a character string that specifies the chromosomes column name.  
- `start_col`: a character string that specifies the start coordinate column name.  
- `end_col`: a character string that specifies the end coordinate column name.  
- `y_val`: The column name of the value you would like to plot. 
- `color_col`: a column with color specifications for each data point. 
- `track_height`: a number between 0 and 1 that designates height, default is `.15`. 1 = the full diameter of the circular plot.  
- `type`: Type of plot the track should be. Options are `line`, `point`, `rect`.  
- `rect_height`: The added height (plus and minus y_val) that should be plotted. 
- `cytoband`: `TRUE/FALSE` indicating whether you want a cytoband on the 
outermost of the plot. Default is TRUE.  
- `single_color`: A single color to choose. An alternative to `color_col`.

### Set Up

```{r}
# Set seed so plots turn out the same
set.seed(2020)

# Magrittr pipe
`%>%` <- dplyr::`%>%`
```

Read in the custom functions.

```{r}
source(file.path("util", "circos-plots.R"))
```

### Directories and Files

```{r}
# Path to data directory
data_dir <- file.path("..", "..", "data")

# Path to output directory
plots_dir <- "plots"
```

### Read in data 

Set up metadata

```{r}
# Read in the metadata
metadata <- readr::read_tsv(file.path(data_dir, "pbta-histologies.tsv"))
```

Read in the CNV data. 

```{r}
cnv_df <- data.table::fread(
  file.path(data_dir, 
            "pbta-cnv-consensus.seg.gz"),
  data.table = FALSE
) 
```

Create a status variable based on copy number and tumor ploidy. 
This is logic was borrowed from [`focal-cn-file-preparation` module](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/86bd634a928db3f460328210e38c7dc1797d1304/analyses/focal-cn-file-preparation/02-add-ploidy-consensus.Rmd#L74).

```{r}
cnv_df <- cnv_df %>% 
  # Only keep data that has values for copy.num
  dplyr::filter(!is.na(copy.num)) %>%
  # Tack on the ploidy and sex estimate information from metadata
  dplyr::inner_join(
    dplyr::select(metadata, 
                  Kids_First_Biospecimen_ID, 
                  tumor_ploidy,
                  germline_sex_estimate), 
             by = c("ID" = "Kids_First_Biospecimen_ID")) %>%
  # Create a status variable
  dplyr::mutate(status = dplyr::case_when(
    # when the copy number is less than inferred ploidy, mark this as a loss
    copy.num < tumor_ploidy ~ "loss",
    # if copy number is higher than ploidy, mark as a gain
    copy.num > tumor_ploidy ~ "gain",
    copy.num == tumor_ploidy ~ "neutral"
  ), 
  status = factor(status, levels = c("loss", "neutral", "gain")))
```

Read in the SV data. 

```{r}
sv_df <- data.table::fread(
  file.path(data_dir, "pbta-sv-manta.tsv.gz"),
  data.table = FALSE
)
```

Make a translocation data.frame where both sets of coordinates for the translocation are in the same row.

```{r}
transloc_df <- sv_df %>%
  dplyr::filter(SV.type == "BND") %>%
  dplyr::mutate(
    match_id = stringr::str_sub(ID, 0, -3),
    pair_num = stringr::str_sub(ID, -1)
  ) %>%
  dplyr::select(
    biospecimen_id = Kids.First.Biospecimen.ID.Tumor,
    chrom = SV.chrom,
    start = SV.start,
    end = SV.end,
    width = SV.length,
    match_id,
    pair_num
  )

transloc_df <- transloc_df %>%
  dplyr::filter(pair_num == 0) %>%
  dplyr::inner_join(dplyr::filter(transloc_df, pair_num == 1),
    by = "match_id",
    suffix = c("1", "2")
  )
```

Let's get some sample IDs to play with in these examples. 

```{r}
samples_for_examples <- sample(cnv_df$ID, 5)
```

## Making color keys examples
The `circos_map_plot` functions take columns that specify the colors for each data row. 
Here are some examples of how to set up color columns that can be passed to the circos
functions as the `color_col` argument. 

**Numerically based color key:** 

```{r}
# Make color palette based on 5 colors
palette_col <- RColorBrewer::brewer.pal(5, 
                                        name = "Accent" # Can change this palette 
                                        # Use RColorBrewer::display.brewer.all() to see options
                                        )

# Make color ramp function based on quantiles of seg.mean and palette
color_fun <- circlize::colorRamp2(
  breaks = quantile(cnv_df$seg.mean, 
                    c(0.15, 0.35, 0.5, 0.65, 0.85), na.rm = TRUE), 
  colors = palette_col)
```

Add numerically based color column to `cnv_df`.

```{r}
cnv_df <- cnv_df %>%
  # Make column that specifies the color for each value
  dplyr::mutate(num_color_key = color_fun(copy.num))
```

Can run this to see other palettes besides `Accent`.

```{r}
RColorBrewer::display.brewer.all()
```

**Factor-based color keys:** 
Here we will add a column to `cnv_df` that color codes based on the `status` 
column which has three categories. 

```{r}
# Let's determine how many levels this factor column has
n_levels <- length(levels(cnv_df$status))

# Set up a palette based on number of factor levels
palette_col <- RColorBrewer::brewer.pal(n_levels, name = "Accent")

# Let's make a key to recode by based on levels
palette_key <- palette_col

# Have the factor levels be the names
names(palette_key) <- levels(cnv_df$status)

# Preview this
palette_key
```

Add factor based color column to cnv_df

```{r}
cnv_df <- cnv_df %>%
  # Make column that specifies the color for each factor level
  dplyr::mutate(fac_color_key = dplyr::recode(status, !!!palette_key))
```

## Example uses of `circos_map_plot`

**Example 1** making a circular scatter plot for a single sample.

```{r}
circos_map_plot(
  df = cnv_df,
  add_track = FALSE, # If set to TRUE we would add to an exisiting plot.
  samples_col = "ID", # Designate what columns of `cnv_df` hold the sample IDs.
  sample_names = samples_for_examples[1], # What sample we are plotting.
  chr_col = "chrom", # Designate what columns of `cnv_df` hold the coordinates.
  start_col = "loc.start",
  end_col = "loc.end",
  y_val = "copy.num", # This is the data we want to map
  track_height = .15, # This is the default
  type = "point", # type is set to point
  color_col = "fac_color_key"
  )
```

**Example 2** making a plot with rectangles for a multiple samples and color code by numeric value and without the cytoband.

```{r}
circos_map_plot(
  df = cnv_df,
  add_track = FALSE,
  samples_col = "ID",
  sample_names = samples_for_examples[1:3], # What samples we are plotting.
  chr_col = "chrom",
  start_col = "loc.start",
  end_col = "loc.end",
  y_val = "copy.num",
  track_height = .15,
  type = "rect", # Changed this to rect
  rect_height = .2, # Optionally can change height with this argument. Default is +_ 0.4
  color_col = "num_color_key", 
  cytoband = FALSE # Turning off the cytoband here. Default is TRUE
)
```

**Example 3** Adding layers to the same plot.
Here we will plot two samples, each with their own track, and a different color for each. 

```{r}
# This first part is the same as Example 3, but we will add an extra layer and change the color palette
circos_map_plot(
  df = cnv_df,
  add_track = FALSE,
  samples_col = "ID",
  sample_names = samples_for_examples[1], # One sample here
  chr_col = "chrom",
  start_col = "loc.start",
  end_col = "loc.end",
  y_val = "copy.num",
  type = "point",
  single_color = "red" # A singular color
)
# We'll change the color though for fun
# Add rect layer
circos_map_plot(
  df = cnv_df,
  add_track = TRUE, # Set to TRUE so adds to plot called above.
  samples_col = "ID",
  sample_names = samples_for_examples[2], # Second sample here. 
  chr_col = "chrom", 
  start_col = "loc.start",
  end_col = "loc.end",
  y_val = "copy.num",
  type = "point", 
  single_color = "blue" # A different singular color
)
```

**Example 4** making a translocation plot. 

```{r}
circos_map_transloc(transloc_df,
  add_track = FALSE, # We change this to true to add on to our already existing plot
  sample_names = sample(transloc_df$biospecimen_id1, 1),
  samples_col = "biospecimen_id1",
  chr_col_1 = "chrom1", # Need to specify which column is the first and second location for each
  chr_col_2 = "chrom2",
  start_col_1 = "start1",
  start_col_2 = "start2",
  end_col_1 = "end1",
  end_col_2 = "end2"
)
```

**Example 5** Saving a plot. 

```{r}
# Open up PNG file
png(file.path(plots_dir, "transloc_circos_plot.png"), width = 800, height = 800)
# Run function per usual
circos_map_transloc(transloc_df,
  add_track = FALSE,
  sample_names = sample(transloc_df$biospecimen_id1, 1),
  samples_col = "biospecimen_id1",
  chr_col_1 = "chrom1",
  chr_col_2 = "chrom2",
  start_col_1 = "start1",
  start_col_2 = "start2",
  end_col_1 = "end1",
  end_col_2 = "end2"
)
dev.off() # Turn off png dev
```

![Print plot we saved to PNG:](plots/transloc_circos_plot.png)

### Session Info

```{r}
sessionInfo()
```


diff --git a/analyses/chromosomal-instability/util/circos-plots.R b/analyses/chromosomal-instability/util/circos-plots.R index 05a9704d80..a4dd9cc469 100644 --- a/analyses/chromosomal-instability/util/circos-plots.R +++ b/analyses/chromosomal-instability/util/circos-plots.R @@ -135,6 +135,13 @@ circos_map_plot <- function(df, y_min <- min(bed_df$y_val, na.rm = TRUE) y_max <- max(bed_df$y_val, na.rm = TRUE) + # Can't have identical y_min and y_max, this is just so CircleCI runs even if + # the subset data is wonky + if (y_min == y_max) { + y_max <- y_max + 0.001 + warning("ymax and ymin are identical") + } + # Tell them only one color is allowed if (length(single_color) > 1) { warning("Only a single color is allowed for the `single_color` argument, diff --git a/analyses/fusion-summary/01-fusion-summary.Rmd b/analyses/fusion-summary/01-fusion-summary.Rmd index 933900a30b..3875306d81 100644 --- a/analyses/fusion-summary/01-fusion-summary.Rmd +++ b/analyses/fusion-summary/01-fusion-summary.Rmd @@ -151,36 +151,28 @@ specimensUnion<- union(arribaDF$tumor_id, starfusionDF$tumor_id) #### Write non-MB, non-ATRT embryonal fusions to file ```{r} -allFuseEmbry <- allFuseEmbry %>% - prepareOutput(specimensUnion) +if (!running_in_ci) { + allFuseEmbry <- allFuseEmbry %>% + prepareOutput(specimensUnion) + allFuseEmbry %>% + mutate( + `CIC--NUTM1` = 0, + `MN1--BEND2` = 0 + ) %>% + write_tsv(embryFile) +} ``` -```{r} -# Are there any missing fusions? -setdiff(embryFuses, colnames(allFuseEmbry)) -``` +#### Write ependymoma fusions to file ```{r} -allFuseEmbry %>% +allFuseEpend %>% + prepareOutput(specimensUnion) %>% mutate( - `CIC--NUTM1` = 0, - `MN1--BEND2` = 0 + `C11orf95--YAP1` = 0, + `LTBP3--RELA` = 0, + `PTEN--TAS2R1` = 0, + `YAP1--MAMLD2` = 0 ) %>% - write_tsv(embryFile) -``` - -#### Write ependymoma fusions to file - -```{r} -if (!running_in_ci) { - allFuseEpend %>% - prepareOutput(specimensUnion) %>% - mutate( - `C11orf95--YAP1` = 0, - `LTBP3--RELA` = 0, - `PTEN--TAS2R1` = 0, - `YAP1--MAMLD2` = 0 - ) %>% - write_tsv(ependFile) -} + write_tsv(ependFile) ``` diff --git a/analyses/fusion-summary/01-fusion-summary.nb.html b/analyses/fusion-summary/01-fusion-summary.nb.html index dd7b91804a..10193a8610 100644 --- a/analyses/fusion-summary/01-fusion-summary.nb.html +++ b/analyses/fusion-summary/01-fusion-summary.nb.html @@ -1789,19 +1789,6 @@

Libraries and functions

library(tidyverse)
- -
Registered S3 method overwritten by 'dplyr':
-  method           from
-  print.rowwise_df     
-── Attaching packages ────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
-✔ ggplot2 3.2.0     ✔ purrr   0.3.2
-✔ tibble  2.1.3     ✔ dplyr   0.8.3
-✔ tidyr   0.8.3     ✔ stringr 1.4.0
-✔ readr   1.3.1     ✔ forcats 0.4.0
-── Conflicts ───────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
-✖ dplyr::filter() masks stats::filter()
-✖ dplyr::lag()    masks stats::lag()
- @@ -1994,9 +1981,17 @@

Filter putative oncogenic fusions list

Write non-MB, non-ATRT embryonal fusions to file

- -
allFuseEmbry <- allFuseEmbry %>%
-  prepareOutput(specimensUnion)
+ +
if (!running_in_ci) {
+  allFuseEmbry <- allFuseEmbry %>%
+    prepareOutput(specimensUnion)
+  allFuseEmbry %>%
+    mutate(
+      `CIC--NUTM1` = 0,
+      `MN1--BEND2` = 0
+    ) %>%
+    write_tsv(embryFile)
+}
Using FusionName as value column: use value.var to override.
@@ -2006,46 +2001,21 @@ 

Write non-MB, non-ATRT embryonal fusions to file

- - - -
# Are there any missing fusions?
-setdiff(embryFuses, colnames(allFuseEmbry))
- - -
[1] "CIC--NUTM1" "MN1--BEND2"
- - - - - - -
allFuseEmbry %>%
-  mutate(
-    `CIC--NUTM1` = 0,
-    `MN1--BEND2` = 0
-  ) %>%
-  write_tsv(embryFile)
- - -

Write ependymoma fusions to file

- -
if (!running_in_ci) {
-  allFuseEpend %>%
-    prepareOutput(specimensUnion) %>%
-    mutate(
-      `C11orf95--YAP1` = 0,
-      `LTBP3--RELA` = 0,
-      `PTEN--TAS2R1` = 0,
-      `YAP1--MAMLD2` = 0
-    ) %>%
-    write_tsv(ependFile)
-}
+ +
allFuseEpend %>%
+  prepareOutput(specimensUnion) %>%
+  mutate(
+    `C11orf95--YAP1` = 0,
+    `LTBP3--RELA` = 0,
+    `PTEN--TAS2R1` = 0,
+    `YAP1--MAMLD2` = 0
+  ) %>%
+  write_tsv(ependFile)
Using FusionName as value column: use value.var to override.
@@ -2058,7 +2028,7 @@ 

Write ependymoma fusions to file

-
LS0tCnRpdGxlOiAiR2VuZXJhdGUgRnVzaW9uIFN1bW1hcnkgRmlsZXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawphdXRob3I6IERhbmllbCBNaWxsZXIgKEQzYikgYW5kIEphY2x5biBUYXJvbmkgKENDREwpCmRhdGU6IEphbnVhcnkgMjAyMApwYXJhbXM6CiAgaXNfY2k6IDAKLS0tCgpHZW5lcmF0ZSBmdXNpb24gZmlsZXMgc3BlY2lmaWNhbGx5IGZvciBjb25zdW1wdGlvbiBieSBtb2xlY3VsYXIgc3VidHlwaW5nIGFuYWx5c2VzCgojIyBTZXQgdXAKCmBgYHtyfQojIGlmIHJ1bm5pbmcgaW4gQ0ksIHdlIG5lZWQgdG8gc2tpcCB0aGUgRVBOIHN0ZXBzCmlmIChwYXJhbXMkaXNfY2kgPT0gMCkgcnVubmluZ19pbl9jaSA8LSBGQUxTRQppZiAocGFyYW1zJGlzX2NpID09IDEpIHJ1bm5pbmdfaW5fY2kgPC0gVFJVRQpgYGAKCiMjIyBMaWJyYXJpZXMgYW5kIGZ1bmN0aW9ucwoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCmBgYHtyfQojJyBHZW5lcmF0ZSBmaWx0ZXJlZCBmdXNpb24gZnJhbWUKIycgQHBhcmFtIGRmIFVuZmlsdGVyZWQgZnVzaW9uIGRhdGEgZnJhbWUKIycgQHBhcmFtIGJpb2lkIExpc3Qgb2YgYmlvc3BlY2ltZW4gSURzCiMnIEBwYXJhbSBmdXNlcyBMaXN0IG9mIGV4cGxpY2l0IGZ1c2lvbiBuYW1lcwojJyBAcGFyYW0gZ2VuZXMgTGlzdCBvZiBnZW5lIG5hbWVzCiMnIEByZXR1cm4gdGhlIGZpbHRlcmVkIGZ1c2lvbiBkYXRhIGZyYW1lCmZpbHRlckZ1c2lvbiA8LSBmdW5jdGlvbihkZiwgYmlvaWQsIGZ1c2VzLCBnZW5lcykgewogIGlmICghbWlzc2luZyhiaW9pZCkpIHsKICAgIGRmIDwtIGZpbHRlcihkZiwgU2FtcGxlICVpbiUgYmlvaWQpCiAgfQogIGlmICghbWlzc2luZyhmdXNlcykgJiAhbWlzc2luZyhnZW5lcykpIHsKICAgIGRmIDwtIGZpbHRlcihkZiwgRnVzaW9uTmFtZSAlaW4lIGZ1c2VzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUxQSAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUyQSAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUxQiAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUyQiAlaW4lIGdlbmVzKQogIH0gZWxzZSBpZiAoIW1pc3NpbmcoZnVzZXMpKSB7CiAgICBkZiA8LSBmaWx0ZXIoZGYsIEZ1c2lvbk5hbWUgJWluJSBmdXNlcykKICB9IGVsc2UgaWYgKCFtaXNzaW5nKGdlbmVzKSkgewogICAgZGYgPC0gZmlsdGVyKGRmLAogICAgICAgICAgICAgICAgIEdlbmUxQSAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUyQSAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUxQiAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUyQiAlaW4lIGdlbmVzKQogIH0KICByZXR1cm4oZGYgJT4lIHNlbGVjdChTYW1wbGUsIEZ1c2lvbk5hbWUpKQp9CgoKIycgR2VuZXJhdGUgbWF0cml4IHdpdGggZnVzaW9uIGNvdW50cwojJyBAcGFyYW0gZnVzZURGIEZpbHRlcmVkIGZ1c2lvbiBkYXRhIGZyYW1lCiMnIEBwYXJhbSBiaW9pZCBMaXN0IG9mIGJpb3NwZWNpbWVuIElEcyB0aGF0IHNob3VsZCBiZSBpbmNsdWRlZCBpbiBmaW5hbCB0YWJsZQoKIycgQHJldHVybiBEYXRhIGZyYW1lIHRoYXQgY29udGFpbnMgZnVzaW9uIGNvdW50cwpwcmVwYXJlT3V0cHV0IDwtIGZ1bmN0aW9uKGZ1c2VERiwgYmlvaWQpIHsKICBmdXNlREYgJT4lIAogICAgcmVzaGFwZTI6OmRjYXN0KFNhbXBsZSB+IEZ1c2lvbk5hbWUpICU+JQogICAgcmlnaHRfam9pbihkYXRhLmZyYW1lKFNhbXBsZSA9IGJpb2lkKSkgJT4lCiAgICByZXBsYWNlKGlzLm5hKC4pLCAwKSAlPiUKICAgIHJlbmFtZShLaWRzX0ZpcnN0X0Jpb3NwZWNpbWVuX0lEID0gU2FtcGxlKQp9CmBgYAoKIyMjIFJlYWQgaW4gZGF0YQoKYGBge3J9CmRhdGFEaXIgPC0gZmlsZS5wYXRoKCIuLiIsICIuLiIsICJkYXRhIikKIycgVGhlIHB1dGF0aXZlIG9uY29nZW5pYyBmdXNpb24gZmlsZSBpcyB3aGF0IHdlJ2xsIHVzZSB0byBjaGVjayBmb3IgdGhlIAojJyBwcmVzZW5jZSBvciBhYnNlbmNlIG9mIHRoZSBmdXNpb25zLgpwdXRhdGl2ZU9uY29nZW5pY0RGIDwtIAogIHJlYWRfdHN2KGZpbGUucGF0aChkYXRhRGlyLCAicGJ0YS1mdXNpb24tcHV0YXRpdmUtb25jb2dlbmljLnRzdiIpKQojJyBIb3dldmVyLCBzb21lIGJpb3NwZWNpbWVucyBhcmUgbm90IHJlcHJlc2VudGVkIGluIHRoaXMgZmlsdGVyZWQsIHByaW9yaXRpemVkCiMnIGZpbGUgYnV0ICphcmUqIHByZXNlbnQgaW4gdGhlIG9yaWdpbmFsIGZpbGVzIC0tIHRoaXMgd2lsbCBjYXVzZSB0aGVtIHRvIGJlCiMnICJtaXNzaW5nIiBpbiB0aGUgZmluYWwgZmlsZXMgZm9yIGNvbnN1bXB0aW9uIHdoaWNoIGNvdWxkIG1pc2xlYWQgYW5hbHlzdHMuCmFycmliYURGIDwtIHJlYWRfdHN2KGZpbGUucGF0aChkYXRhRGlyLCAicGJ0YS1mdXNpb24tYXJyaWJhLnRzdi5neiIpKQpzdGFyZnVzaW9uREYgPC0gcmVhZF90c3YoZmlsZS5wYXRoKGRhdGFEaXIsICJwYnRhLWZ1c2lvbi1zdGFyZnVzaW9uLnRzdi5neiIpKQpgYGAKCiMjIyBPdXRwdXQKCmBgYHtyfQpyZXN1bHRzRGlyIDwtICJyZXN1bHRzIgppZiAoIWRpci5leGlzdHMocmVzdWx0c0RpcikpIHsKICBkaXIuY3JlYXRlKHJlc3VsdHNEaXIpCn0KZXBlbmRGaWxlIDwtIGZpbGUucGF0aChyZXN1bHRzRGlyLCAiZnVzaW9uX3N1bW1hcnlfZXBlbmR5bW9tYV9mb2kudHN2IikKZW1icnlGaWxlIDwtIGZpbGUucGF0aChyZXN1bHRzRGlyLCAiZnVzaW9uX3N1bW1hcnlfZW1icnlvbmFsX2ZvaS50c3YiKQpgYGAKCiMjIEZ1c2lvbnMgYW5kIGdlbmVzIG9mIGludGVyZXN0CgpUYWtlbiBmcm9tIFtgQWxleHNMZW1vbmFkZS9PcGVuUEJUQS1hbmFseXNpcyMyNDVgXShodHRwczovL2dpdGh1Yi5jb20vQWxleHNMZW1vbmFkZS9PcGVuUEJUQS1hbmFseXNpcy9pc3N1ZXMvMjQ1KSBhbmQgW2BBbGV4c0xlbW9uYWRlL09wZW5QQlRBLWFuYWx5c2lzIzI1MWBdKGh0dHBzOi8vZ2l0aHViLmNvbS9BbGV4c0xlbW9uYWRlL09wZW5QQlRBLWFuYWx5c2lzL2lzc3Vlcy8yNTEpLCByZXNwZWN0aXZlbHkuCgpgYGB7cn0KIycgKipGaWx0ZXJzKioKIycKIycgKkZ1c2lvbnMgRmlsdGVycyoKIycgMTogRXhhY3QgbWF0Y2ggYSBsaXN0IG9mIGZ1c2lvbnMgY29tbW9uIGluIEVwZW5keW1vbWEgdHVtb3JzCmVwZW5kRnVzZXMgPC0gYygKICAiQzExb3JmOTUtLU1BTUwyIiwKICAiQzExb3JmOTUtLVJFTEEiLAogICJDMTFvcmY5NS0tWUFQMSIsCiAgIkxUQlAzLS1SRUxBIiwKICAiUFRFTi0tVEFTMlIxIiwKICAiWUFQMS0tRkFNMTE4QiIsCiAgIllBUDEtLU1BTUxEMSIsCiAgIllBUDEtLU1BTUxEMiIKKQplcGVuZEdlbmVzIDwtIGMoCiAgIlJFTEEiCikKIycgMjogRXhhY3QgbWF0Y2ggYSBsaXN0IG9mIGZ1c2lvbnMgY29tbW9uIGluIEVtYnJ5b25hbCB0dW1vcnMKIycgYXMgd2VsbCBhcyBmdXNpb25zIGNvbnRhaW5pbmcgYSBwYXJ0aWN1bGFyIGdlbmUgd2l0aCBhbnkgb3RoZXIgZ2VuZQplbWJyeUZ1c2VzIDwtIGMoCiAgIkNJQy0tTlVUTTEiLAogICJNTjEtLUJFTkQyIiwKICAiTU4xLS1DWFhDNSIKKQplbWJyeUdlbmVzIDwtIGMoCiAgIkZPWFIyIiwKICAiTU4xIiwKICAiVFRZSDEiCikKYGBgCgojIyMgRmlsdGVyIHB1dGF0aXZlIG9uY29nZW5pYyBmdXNpb25zIGxpc3QKCmBgYHtyfQphbGxGdXNlRXBlbmQgPC0gZmlsdGVyRnVzaW9uKGRmID0gcHV0YXRpdmVPbmNvZ2VuaWNERiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmdXNlcyA9IGVwZW5kRnVzZXMsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZXMgPSBlcGVuZEdlbmVzKQphbGxGdXNlRW1icnkgPC0gZmlsdGVyRnVzaW9uKGRmID0gcHV0YXRpdmVPbmNvZ2VuaWNERiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmdXNlcyA9IGVtYnJ5RnVzZXMsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZXMgPSBlbWJyeUdlbmVzKQpgYGAKCkdldCB0aGUgYmlvc3BlY2ltZW4gSURzIHRoYXQgYXJlIHByZXNlbnQgaW4gKmVpdGhlciogY2FsbGVyIGZpbGUgKEFycmliYSwgU1RBUkZ1c2lvbikuClRoZSBmdXNpb25zIGluIHRoZSBwdXRhdGl2ZSBvbmNvZ2VuaWMgZnVzaW9uIGZpbGUgY2FuIGJlIHJldGFpbmVkIGV2ZW4gaWYgdGhleSBhcmUgbm90IGluIGJvdGggY2FsbGVyczogaHR0cHM6Ly9naXRodWIuY29tL0FsZXhzTGVtb25hZGUvT3BlblBCVEEtYW5hbHlzaXMvYmxvYi84ZmJhMTc1MzYwOGQ4YWMwYWEzZDVkN2Q2M2M0ODBiOGYwMGZmMGU5L2FuYWx5c2VzL2Z1c2lvbl9maWx0ZXJpbmcvMDQtcHJvamVjdC1zcGVjaWZpYy1maWx0ZXJpbmcuUm1kI0wyNDIKV2UgdXNlIHRoZSBwdXRhdGl2ZSBvbmNvZ2VuaWMgZmlsZSBoZXJlLCB0aGVyZWZvcmUgYW55IHNhbXBsZSB0aGF0IGlzIGluIGVpdGhlciBmaWxlIGJ1dCBkb2VzIG5vdCBoYXZlIGEgZnVzaW9uIHRoYXQgaXMgcmVsZXZhbnQgdG8gdGhlIHN1YnR5cGluZyB0aWNrZXRzIGlzIG5vdCBfbWlzc2luZ18gYnV0IGluc3RlYWQgaGFzIG5vIGV2aWRlbmNlIG9mIHRoZSByZWxldmFudCBmdXNpb25zLgoKYGBge3J9CnNwZWNpbWVuc1VuaW9uPC0gdW5pb24oYXJyaWJhREYkdHVtb3JfaWQsIHN0YXJmdXNpb25ERiR0dW1vcl9pZCkKYGBgCgojIyMjIFdyaXRlIG5vbi1NQiwgbm9uLUFUUlQgZW1icnlvbmFsIGZ1c2lvbnMgdG8gZmlsZQoKYGBge3J9CmFsbEZ1c2VFbWJyeSA8LSBhbGxGdXNlRW1icnkgJT4lCiAgcHJlcGFyZU91dHB1dChzcGVjaW1lbnNVbmlvbikKYGBgCgpgYGB7cn0KIyBBcmUgdGhlcmUgYW55IG1pc3NpbmcgZnVzaW9ucz8Kc2V0ZGlmZihlbWJyeUZ1c2VzLCBjb2xuYW1lcyhhbGxGdXNlRW1icnkpKQpgYGAKCmBgYHtyfQphbGxGdXNlRW1icnkgJT4lCiAgbXV0YXRlKAogICAgYENJQy0tTlVUTTFgID0gMCwKICAgIGBNTjEtLUJFTkQyYCA9IDAKICApICU+JQogIHdyaXRlX3RzdihlbWJyeUZpbGUpCmBgYAoKIyMjIyBXcml0ZSBlcGVuZHltb21hIGZ1c2lvbnMgdG8gZmlsZQoKYGBge3J9CmlmICghcnVubmluZ19pbl9jaSkgewogIGFsbEZ1c2VFcGVuZCAlPiUKICAgIHByZXBhcmVPdXRwdXQoc3BlY2ltZW5zVW5pb24pICU+JQogICAgbXV0YXRlKAogICAgICBgQzExb3JmOTUtLVlBUDFgID0gMCwKICAgICAgYExUQlAzLS1SRUxBYCA9IDAsCiAgICAgIGBQVEVOLS1UQVMyUjFgID0gMCwKICAgICAgYFlBUDEtLU1BTUxEMmAgPSAwCiAgICApICU+JQogICAgd3JpdGVfdHN2KGVwZW5kRmlsZSkKfQpgYGAK
+
LS0tCnRpdGxlOiAiR2VuZXJhdGUgRnVzaW9uIFN1bW1hcnkgRmlsZXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawphdXRob3I6IERhbmllbCBNaWxsZXIgKEQzYikgYW5kIEphY2x5biBUYXJvbmkgKENDREwpCmRhdGU6IEphbnVhcnkgMjAyMApwYXJhbXM6CiAgaXNfY2k6IDAKLS0tCgpHZW5lcmF0ZSBmdXNpb24gZmlsZXMgc3BlY2lmaWNhbGx5IGZvciBjb25zdW1wdGlvbiBieSBtb2xlY3VsYXIgc3VidHlwaW5nIGFuYWx5c2VzCgojIyBTZXQgdXAKCmBgYHtyfQojIGlmIHJ1bm5pbmcgaW4gQ0ksIHdlIG5lZWQgdG8gc2tpcCB0aGUgRVBOIHN0ZXBzCmlmIChwYXJhbXMkaXNfY2kgPT0gMCkgcnVubmluZ19pbl9jaSA8LSBGQUxTRQppZiAocGFyYW1zJGlzX2NpID09IDEpIHJ1bm5pbmdfaW5fY2kgPC0gVFJVRQpgYGAKCiMjIyBMaWJyYXJpZXMgYW5kIGZ1bmN0aW9ucwoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCmBgYHtyfQojJyBHZW5lcmF0ZSBmaWx0ZXJlZCBmdXNpb24gZnJhbWUKIycgQHBhcmFtIGRmIFVuZmlsdGVyZWQgZnVzaW9uIGRhdGEgZnJhbWUKIycgQHBhcmFtIGJpb2lkIExpc3Qgb2YgYmlvc3BlY2ltZW4gSURzCiMnIEBwYXJhbSBmdXNlcyBMaXN0IG9mIGV4cGxpY2l0IGZ1c2lvbiBuYW1lcwojJyBAcGFyYW0gZ2VuZXMgTGlzdCBvZiBnZW5lIG5hbWVzCiMnIEByZXR1cm4gdGhlIGZpbHRlcmVkIGZ1c2lvbiBkYXRhIGZyYW1lCmZpbHRlckZ1c2lvbiA8LSBmdW5jdGlvbihkZiwgYmlvaWQsIGZ1c2VzLCBnZW5lcykgewogIGlmICghbWlzc2luZyhiaW9pZCkpIHsKICAgIGRmIDwtIGZpbHRlcihkZiwgU2FtcGxlICVpbiUgYmlvaWQpCiAgfQogIGlmICghbWlzc2luZyhmdXNlcykgJiAhbWlzc2luZyhnZW5lcykpIHsKICAgIGRmIDwtIGZpbHRlcihkZiwgRnVzaW9uTmFtZSAlaW4lIGZ1c2VzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUxQSAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUyQSAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUxQiAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUyQiAlaW4lIGdlbmVzKQogIH0gZWxzZSBpZiAoIW1pc3NpbmcoZnVzZXMpKSB7CiAgICBkZiA8LSBmaWx0ZXIoZGYsIEZ1c2lvbk5hbWUgJWluJSBmdXNlcykKICB9IGVsc2UgaWYgKCFtaXNzaW5nKGdlbmVzKSkgewogICAgZGYgPC0gZmlsdGVyKGRmLAogICAgICAgICAgICAgICAgIEdlbmUxQSAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUyQSAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUxQiAlaW4lIGdlbmVzIHwKICAgICAgICAgICAgICAgICAgIEdlbmUyQiAlaW4lIGdlbmVzKQogIH0KICByZXR1cm4oZGYgJT4lIHNlbGVjdChTYW1wbGUsIEZ1c2lvbk5hbWUpKQp9CgoKIycgR2VuZXJhdGUgbWF0cml4IHdpdGggZnVzaW9uIGNvdW50cwojJyBAcGFyYW0gZnVzZURGIEZpbHRlcmVkIGZ1c2lvbiBkYXRhIGZyYW1lCiMnIEBwYXJhbSBiaW9pZCBMaXN0IG9mIGJpb3NwZWNpbWVuIElEcyB0aGF0IHNob3VsZCBiZSBpbmNsdWRlZCBpbiBmaW5hbCB0YWJsZQoKIycgQHJldHVybiBEYXRhIGZyYW1lIHRoYXQgY29udGFpbnMgZnVzaW9uIGNvdW50cwpwcmVwYXJlT3V0cHV0IDwtIGZ1bmN0aW9uKGZ1c2VERiwgYmlvaWQpIHsKICBmdXNlREYgJT4lIAogICAgcmVzaGFwZTI6OmRjYXN0KFNhbXBsZSB+IEZ1c2lvbk5hbWUpICU+JQogICAgcmlnaHRfam9pbihkYXRhLmZyYW1lKFNhbXBsZSA9IGJpb2lkKSkgJT4lCiAgICByZXBsYWNlKGlzLm5hKC4pLCAwKSAlPiUKICAgIHJlbmFtZShLaWRzX0ZpcnN0X0Jpb3NwZWNpbWVuX0lEID0gU2FtcGxlKQp9CmBgYAoKIyMjIFJlYWQgaW4gZGF0YQoKYGBge3J9CmRhdGFEaXIgPC0gZmlsZS5wYXRoKCIuLiIsICIuLiIsICJkYXRhIikKIycgVGhlIHB1dGF0aXZlIG9uY29nZW5pYyBmdXNpb24gZmlsZSBpcyB3aGF0IHdlJ2xsIHVzZSB0byBjaGVjayBmb3IgdGhlIAojJyBwcmVzZW5jZSBvciBhYnNlbmNlIG9mIHRoZSBmdXNpb25zLgpwdXRhdGl2ZU9uY29nZW5pY0RGIDwtIAogIHJlYWRfdHN2KGZpbGUucGF0aChkYXRhRGlyLCAicGJ0YS1mdXNpb24tcHV0YXRpdmUtb25jb2dlbmljLnRzdiIpKQojJyBIb3dldmVyLCBzb21lIGJpb3NwZWNpbWVucyBhcmUgbm90IHJlcHJlc2VudGVkIGluIHRoaXMgZmlsdGVyZWQsIHByaW9yaXRpemVkCiMnIGZpbGUgYnV0ICphcmUqIHByZXNlbnQgaW4gdGhlIG9yaWdpbmFsIGZpbGVzIC0tIHRoaXMgd2lsbCBjYXVzZSB0aGVtIHRvIGJlCiMnICJtaXNzaW5nIiBpbiB0aGUgZmluYWwgZmlsZXMgZm9yIGNvbnN1bXB0aW9uIHdoaWNoIGNvdWxkIG1pc2xlYWQgYW5hbHlzdHMuCmFycmliYURGIDwtIHJlYWRfdHN2KGZpbGUucGF0aChkYXRhRGlyLCAicGJ0YS1mdXNpb24tYXJyaWJhLnRzdi5neiIpKQpzdGFyZnVzaW9uREYgPC0gcmVhZF90c3YoZmlsZS5wYXRoKGRhdGFEaXIsICJwYnRhLWZ1c2lvbi1zdGFyZnVzaW9uLnRzdi5neiIpKQpgYGAKCiMjIyBPdXRwdXQKCmBgYHtyfQpyZXN1bHRzRGlyIDwtICJyZXN1bHRzIgppZiAoIWRpci5leGlzdHMocmVzdWx0c0RpcikpIHsKICBkaXIuY3JlYXRlKHJlc3VsdHNEaXIpCn0KZXBlbmRGaWxlIDwtIGZpbGUucGF0aChyZXN1bHRzRGlyLCAiZnVzaW9uX3N1bW1hcnlfZXBlbmR5bW9tYV9mb2kudHN2IikKZW1icnlGaWxlIDwtIGZpbGUucGF0aChyZXN1bHRzRGlyLCAiZnVzaW9uX3N1bW1hcnlfZW1icnlvbmFsX2ZvaS50c3YiKQpgYGAKCiMjIEZ1c2lvbnMgYW5kIGdlbmVzIG9mIGludGVyZXN0CgpUYWtlbiBmcm9tIFtgQWxleHNMZW1vbmFkZS9PcGVuUEJUQS1hbmFseXNpcyMyNDVgXShodHRwczovL2dpdGh1Yi5jb20vQWxleHNMZW1vbmFkZS9PcGVuUEJUQS1hbmFseXNpcy9pc3N1ZXMvMjQ1KSBhbmQgW2BBbGV4c0xlbW9uYWRlL09wZW5QQlRBLWFuYWx5c2lzIzI1MWBdKGh0dHBzOi8vZ2l0aHViLmNvbS9BbGV4c0xlbW9uYWRlL09wZW5QQlRBLWFuYWx5c2lzL2lzc3Vlcy8yNTEpLCByZXNwZWN0aXZlbHkuCgpgYGB7cn0KIycgKipGaWx0ZXJzKioKIycKIycgKkZ1c2lvbnMgRmlsdGVycyoKIycgMTogRXhhY3QgbWF0Y2ggYSBsaXN0IG9mIGZ1c2lvbnMgY29tbW9uIGluIEVwZW5keW1vbWEgdHVtb3JzCmVwZW5kRnVzZXMgPC0gYygKICAiQzExb3JmOTUtLU1BTUwyIiwKICAiQzExb3JmOTUtLVJFTEEiLAogICJDMTFvcmY5NS0tWUFQMSIsCiAgIkxUQlAzLS1SRUxBIiwKICAiUFRFTi0tVEFTMlIxIiwKICAiWUFQMS0tRkFNMTE4QiIsCiAgIllBUDEtLU1BTUxEMSIsCiAgIllBUDEtLU1BTUxEMiIKKQplcGVuZEdlbmVzIDwtIGMoCiAgIlJFTEEiCikKIycgMjogRXhhY3QgbWF0Y2ggYSBsaXN0IG9mIGZ1c2lvbnMgY29tbW9uIGluIEVtYnJ5b25hbCB0dW1vcnMKIycgYXMgd2VsbCBhcyBmdXNpb25zIGNvbnRhaW5pbmcgYSBwYXJ0aWN1bGFyIGdlbmUgd2l0aCBhbnkgb3RoZXIgZ2VuZQplbWJyeUZ1c2VzIDwtIGMoCiAgIkNJQy0tTlVUTTEiLAogICJNTjEtLUJFTkQyIiwKICAiTU4xLS1DWFhDNSIKKQplbWJyeUdlbmVzIDwtIGMoCiAgIkZPWFIyIiwKICAiTU4xIiwKICAiVFRZSDEiCikKYGBgCgojIyMgRmlsdGVyIHB1dGF0aXZlIG9uY29nZW5pYyBmdXNpb25zIGxpc3QKCmBgYHtyfQphbGxGdXNlRXBlbmQgPC0gZmlsdGVyRnVzaW9uKGRmID0gcHV0YXRpdmVPbmNvZ2VuaWNERiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmdXNlcyA9IGVwZW5kRnVzZXMsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZXMgPSBlcGVuZEdlbmVzKQphbGxGdXNlRW1icnkgPC0gZmlsdGVyRnVzaW9uKGRmID0gcHV0YXRpdmVPbmNvZ2VuaWNERiwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmdXNlcyA9IGVtYnJ5RnVzZXMsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZXMgPSBlbWJyeUdlbmVzKQpgYGAKCkdldCB0aGUgYmlvc3BlY2ltZW4gSURzIHRoYXQgYXJlIHByZXNlbnQgaW4gKmVpdGhlciogY2FsbGVyIGZpbGUgKEFycmliYSwgU1RBUkZ1c2lvbikuClRoZSBmdXNpb25zIGluIHRoZSBwdXRhdGl2ZSBvbmNvZ2VuaWMgZnVzaW9uIGZpbGUgY2FuIGJlIHJldGFpbmVkIGV2ZW4gaWYgdGhleSBhcmUgbm90IGluIGJvdGggY2FsbGVyczogaHR0cHM6Ly9naXRodWIuY29tL0FsZXhzTGVtb25hZGUvT3BlblBCVEEtYW5hbHlzaXMvYmxvYi84ZmJhMTc1MzYwOGQ4YWMwYWEzZDVkN2Q2M2M0ODBiOGYwMGZmMGU5L2FuYWx5c2VzL2Z1c2lvbl9maWx0ZXJpbmcvMDQtcHJvamVjdC1zcGVjaWZpYy1maWx0ZXJpbmcuUm1kI0wyNDIKV2UgdXNlIHRoZSBwdXRhdGl2ZSBvbmNvZ2VuaWMgZmlsZSBoZXJlLCB0aGVyZWZvcmUgYW55IHNhbXBsZSB0aGF0IGlzIGluIGVpdGhlciBmaWxlIGJ1dCBkb2VzIG5vdCBoYXZlIGEgZnVzaW9uIHRoYXQgaXMgcmVsZXZhbnQgdG8gdGhlIHN1YnR5cGluZyB0aWNrZXRzIGlzIG5vdCBfbWlzc2luZ18gYnV0IGluc3RlYWQgaGFzIG5vIGV2aWRlbmNlIG9mIHRoZSByZWxldmFudCBmdXNpb25zLgoKYGBge3J9CnNwZWNpbWVuc1VuaW9uPC0gdW5pb24oYXJyaWJhREYkdHVtb3JfaWQsIHN0YXJmdXNpb25ERiR0dW1vcl9pZCkKYGBgCgojIyMjIFdyaXRlIG5vbi1NQiwgbm9uLUFUUlQgZW1icnlvbmFsIGZ1c2lvbnMgdG8gZmlsZQoKYGBge3J9CmlmICghcnVubmluZ19pbl9jaSkgewogIGFsbEZ1c2VFbWJyeSA8LSBhbGxGdXNlRW1icnkgJT4lCiAgICBwcmVwYXJlT3V0cHV0KHNwZWNpbWVuc1VuaW9uKQogIGFsbEZ1c2VFbWJyeSAlPiUKICAgIG11dGF0ZSgKICAgICAgYENJQy0tTlVUTTFgID0gMCwKICAgICAgYE1OMS0tQkVORDJgID0gMAogICAgKSAlPiUKICAgIHdyaXRlX3RzdihlbWJyeUZpbGUpCn0KYGBgCgojIyMjIFdyaXRlIGVwZW5keW1vbWEgZnVzaW9ucyB0byBmaWxlCgpgYGB7cn0KYWxsRnVzZUVwZW5kICU+JQogIHByZXBhcmVPdXRwdXQoc3BlY2ltZW5zVW5pb24pICU+JQogIG11dGF0ZSgKICAgIGBDMTFvcmY5NS0tWUFQMWAgPSAwLAogICAgYExUQlAzLS1SRUxBYCA9IDAsCiAgICBgUFRFTi0tVEFTMlIxYCA9IDAsCiAgICBgWUFQMS0tTUFNTEQyYCA9IDAKICApICU+JQogIHdyaXRlX3RzdihlcGVuZEZpbGUpCmBgYAo=
diff --git a/analyses/tp53_nf1_score/run_classifier.sh b/analyses/tp53_nf1_score/run_classifier.sh index 851fac2770..3caba9d0c5 100644 --- a/analyses/tp53_nf1_score/run_classifier.sh +++ b/analyses/tp53_nf1_score/run_classifier.sh @@ -9,8 +9,9 @@ set -e set -o pipefail -# we want to skip the poly-A ROC plot in CI -POLYA_PLOT=${OPENPBTA_POLYAPLOT:-1} +# we want to skip the poly-A steps in CI +# if POLYA=1, poly-A steps will be run +POLYA=${OPENPBTA_POLYAPLOT:-1} data_dir="data" scratch_dir="scratch" @@ -38,16 +39,12 @@ Rscript --vanilla ${analysis_dir}/00-tp53-nf1-alterations.R \ collapsed_stranded="pbta-gene-expression-rsem-fpkm-collapsed.stranded.rds" collapsed_polya="pbta-gene-expression-rsem-fpkm-collapsed.polya.rds" -# Run classifier for stranded and polya +# Run classifier and ROC plotting for stranded data python3 ${analysis_dir}/01-apply-classifier.py -f ${collapsed_stranded} -python3 ${analysis_dir}/01-apply-classifier.py -f ${collapsed_polya} - - -# Run ROC plot step +python3 ${analysis_dir}/02-evaluate-classifier.py -s ${analysis_dir}/results/TP53_NF1_snv_alteration.tsv -f ${analysis_dir}/results/pbta-gene-expression-rsem-fpkm-collapsed.stranded_classifier_scores.tsv -c ${data_dir}/pbta-histologies.tsv -o stranded -# Skip poly-A plotting in CI -if [ "$POLYA_PLOT" -gt "0" ]; then - python3 ${analysis_dir}/02-evaluate-classifier.py -s ${analysis_dir}/results/TP53_NF1_snv_alteration.tsv -f ${analysis_dir}/results/pbta-gene-expression-rsem-fpkm-collapsed.polya_classifier_scores.tsv -c ${data_dir}/pbta-histologies.tsv -o polya +# Skip poly-A steps in CI +if [ "$POLYA" -gt "0" ]; then + python3 ${analysis_dir}/01-apply-classifier.py -f ${collapsed_polya} + python3 ${analysis_dir}/02-evaluate-classifier.py -s ${analysis_dir}/results/TP53_NF1_snv_alteration.tsv -f ${analysis_dir}/results/pbta-gene-expression-rsem-fpkm-collapsed.polya_classifier_scores.tsv -c ${data_dir}/pbta-histologies.tsv -o polya fi - -python3 ${analysis_dir}/02-evaluate-classifier.py -s ${analysis_dir}/results/TP53_NF1_snv_alteration.tsv -f ${analysis_dir}/results/pbta-gene-expression-rsem-fpkm-collapsed.stranded_classifier_scores.tsv -c ${data_dir}/pbta-histologies.tsv -o stranded