Skip to content
This repository has been archived by the owner on Jun 21, 2023. It is now read-only.

Commit

Permalink
Updates to generate v15 CI files (#575)
Browse files Browse the repository at this point in the history
* Update to accommodate v15

* Add biospecimen.RDS

Co-authored-by: Candace Savonen <cansav09@gmail.com>
Co-authored-by: jashapiro <jashapiro@gmail.com>
  • Loading branch information
3 people authored Mar 2, 2020
1 parent 661f644 commit 988f12d
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 4 deletions.
7 changes: 5 additions & 2 deletions analyses/create-subset-files/01-get_biospecimen_identifiers.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# consideration. This number will be 10% of num_matched.
# - We include (and hardcode) a set of biospecimen IDs for samples that have
# TP53 and NF1 mutations that meet the criteria in the tp53_nf1_module and
# are represented in the stranded RNA-seq dataset.
# are represented in the stranded RNA-seq dataset.
# See 00-enrich-positive-examples for more information.
#
# EXAMPLE USAGE:
Expand Down Expand Up @@ -73,6 +73,9 @@ get_biospecimen_ids <- function(filename, id_mapping_df) {
} else {
biospecimen_ids <- unique(cnv_file$ID)
}
} else if (grepl("consensus_seg_annotated", filename)) {
annotated_cn_file <- read_tsv(filename)
biospecimen_ids <- unique(annotated_cn_file$biospecimen_id)
} else if (grepl("pbta-fusion", filename)) {
fusion_file <- read_tsv(filename)
# the biospecimen IDs in the filtered/prioritize fusion list included with
Expand Down Expand Up @@ -127,7 +130,7 @@ option_list <- list(
make_option(
c("-r", "--supported_string"),
type = "character",
default = "pbta-snv|pbta-cnv|pbta-fusion|pbta-isoform|pbta-sv|pbta-gene|cnv_consensus",
default = "pbta-snv|pbta-cnv|pbta-fusion|pbta-isoform|pbta-sv|pbta-gene|consensus_seg_annotated",
help = "string for pattern matching used to subset to only supported files"
),
make_option(
Expand Down
6 changes: 5 additions & 1 deletion analyses/create-subset-files/02-subset_files.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,11 @@ subset_files <- function(filename, biospecimen_ids, output_directory) {
cnv_file %>%
dplyr::filter(!!rlang::sym(biospecimen_column) %in% biospecimen_ids) %>%
readr::write_tsv(output_file)

} else if (grepl("consensus_seg_annotated", filename)) {
annotated_cn_file <- readr::read_tsv(filename)
annotated_cn_file %>%
dplyr::filter(biospecimen_id %in% biospecimen_ids) %>%
readr::write_tsv(output_file)
} else if (grepl("pbta-fusion", filename)) {
# original files contain the biospecimen IDs in a column called 'tumor_id',
# the filtered/prioritized list biospecimen IDs are in 'Sample'
Expand Down
Binary file modified analyses/create-subset-files/biospecimen_ids_for_subset.RDS
Binary file not shown.
2 changes: 1 addition & 1 deletion analyses/create-subset-files/create_subset_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set -o pipefail

# Set defaults for release and biospecimen file name
BIOSPECIMEN_FILE=${BIOSPECIMEN_FILE:-biospecimen_ids_for_subset.RDS}
RELEASE=${RELEASE:-release-v14-20200203}
RELEASE=${RELEASE:-release-v15-20200228}
NUM_MATCHED=${NUM_MATCHED:-15}

# This option controls whether or not the two larger MAF files are skipped as
Expand Down

0 comments on commit 988f12d

Please sign in to comment.