diff --git a/analyses/molecular-subtyping-MB/03-compare-classes.Rmd b/analyses/molecular-subtyping-MB/03-compare-classes.Rmd index 78f06610c4..8bbaa6af0a 100644 --- a/analyses/molecular-subtyping-MB/03-compare-classes.Rmd +++ b/analyses/molecular-subtyping-MB/03-compare-classes.Rmd @@ -330,7 +330,9 @@ consensus.corrected.out <- consensus.corrected.out %>% consensus.corrected.out <- consensus.corrected.out %>% # adding "MB" to be consistent with other subtyping values - dplyr::mutate(molecular_subtype = if_else(!is.na(molecular_subtype),paste("MB,",molecular_subtype),NA_character_)) %>% + dplyr::mutate(molecular_subtype = if_else(!is.na(molecular_subtype), + paste("MB,",molecular_subtype), + "MB, To be classified")) %>% write.table( file = file.path(results_dir, 'MB_batchcorrected_molecular_subtype.tsv'), row.names = F, quote = F, sep = "\t") @@ -358,7 +360,9 @@ consensus.uncorrected.out <- consensus.uncorrected.out %>% consensus.uncorrected.out <- consensus.uncorrected.out %>% # adding "MB" to be consistent with other subtyping values - dplyr::mutate(molecular_subtype = if_else(!is.na(molecular_subtype),paste("MB,",molecular_subtype),NA_character_)) %>% + dplyr::mutate(molecular_subtype = if_else(!is.na(molecular_subtype), + paste("MB,",molecular_subtype), + "MB, To be classified")) %>% write.table(file = file.path(results_dir, 'MB_molecular_subtype.tsv'), row.names = F, quote = F, sep = "\t") ``` diff --git a/analyses/molecular-subtyping-MB/03-compare-classes.html b/analyses/molecular-subtyping-MB/03-compare-classes.html index 46cf58c9df..ecb4f0815b 100644 --- a/analyses/molecular-subtyping-MB/03-compare-classes.html +++ b/analyses/molecular-subtyping-MB/03-compare-classes.html @@ -1,18 +1,31 @@ - + - + Comparison of Expected and Observed MB Subtype Classification + @@ -4166,35 +4329,6 @@ border-radius: 0.28571429rem; } - + - @@ -4390,7 +4585,6 @@ } img { max-width:100%; - height: auto; } .tabbed-pane { padding-top: 12px; @@ -4452,6 +4646,7 @@ border: none; display: inline-block; border-radius: 4px; + background-color: transparent; } .tabset-dropdown > .nav-tabs.nav-tabs-open > li { @@ -4464,18 +4659,6 @@ } - - @@ -4551,7 +4734,7 @@

Code:

# combine both clin.merged <- clin.wgs %>% - right_join(clin.rna, by = c('sample_id','Kids_First_Participant_ID','tumor_descriptor')) + right_join(clin.rna, by = c('sample_id','Kids_First_Participant_ID','tumor_descriptor'))

Data Table

@@ -4560,14 +4743,14 @@

Code:

DT::datatable(dat, rownames = FALSE, filter = "top", - class = 'cell-border stripe', + class = 'cell-border stripe', options = list(pageLength = 5, searchHighlight = TRUE, scrollX = TRUE, - dom = 'tpi', + dom = 'tpi', initComplete = JS("function(settings, json) {", - "$(this.api().table().header()).css({'background-color': - '#004467', 'color': '#fff'});","}")) + "$(this.api().table().header()).css({'background-color': + '#004467', 'color': '#fff'});","}")) ) }
@@ -4577,7 +4760,7 @@

Code:

compute.accuracy <- function(expected.input, observed.input){ # merge expected and observed subtypes merged.table <- expected.input %>% - left_join(observed.input, by = c('Kids_First_Biospecimen_ID' = 'sample')) %>% + left_join(observed.input, by = c('Kids_First_Biospecimen_ID' = 'sample')) %>% mutate(match = str_detect(pathology_subtype, best.fit)) merged.table$sample_id <- as.factor(merged.table$sample_id) @@ -4586,7 +4769,7 @@

Code:

filter(!is.na(pathology_subtype)) %>% group_by(match) %>% dplyr::summarise(n = n()) %>% - mutate(Accuracy = paste0(n/sum(n)*100, '%')) %>% + mutate(Accuracy = paste0(n/sum(n)*100, '%')) %>% filter(match) %>% .$Accuracy @@ -4642,8 +4825,8 @@

Code:

# join both consensus.mat <- mm2s %>% inner_join(medullo.classifier, - by = c('Kids_First_Biospecimen_ID', 'sample_id', - 'pathology_subtype', 'other_molecular_findings')) + by = c('Kids_First_Biospecimen_ID', 'sample_id', + 'pathology_subtype', 'other_molecular_findings')) # create consensus molecular subtype if both classifiers agree # match consensus molecular subtype to the pathology report @@ -4655,7 +4838,7 @@

Code:

consensus.mat.out <- clin.merged %>% inner_join(consensus.mat %>% mutate(Kids_First_Biospecimen_ID_RNA = Kids_First_Biospecimen_ID), - by = c('sample_id', 'Kids_First_Biospecimen_ID_RNA')) + by = c('sample_id', 'Kids_First_Biospecimen_ID_RNA')) # resolve discrepancy with multiple sample ids # apply prop.mb.subtype function on same event (`sample_id`) with the same `tumor_descriptor` @@ -4681,7 +4864,7 @@

Code:

filter(!is.na(pathology_subtype)) %>% group_by(match) %>% dplyr::summarise(n = n()) %>% - mutate(Accuracy = paste0(n/sum(n)*100, '%')) %>% + mutate(Accuracy = paste0(n/sum(n)*100, '%')) %>% filter(match) %>% .$Accuracy @@ -4707,14 +4890,14 @@

Details:

  • Pathology report has subtype information on 32/122 (26.2%) samples. Following is the breakdown of pathology identified subtypes:

  • -
    - +
    +

    Individual classifier and Consensus outputs:

    -

    +

    MM2S (batch-corrected)

    out <- compute.accuracy(expected.input = exp.class, 
    @@ -4727,8 +4910,8 @@ 

    MM2S (batch-corrected)

    # output table
     mm2s.corrected <- out$merged.table
     viewDataTable(mm2s.corrected)
    -
    - +
    +

    MM2S (uncorrected)

    @@ -4742,8 +4925,8 @@

    MM2S (uncorrected)

    # output table
     mm2s.uncorrected <- out$merged.table
     viewDataTable(mm2s.uncorrected)
    -
    - +
    +

    medulloPackage (batch-corrected)

    @@ -4757,8 +4940,8 @@

    medulloPackage (batch-corrected)

    # output table
     medullo.classifier.corrected <- out$merged.table
     viewDataTable(medullo.classifier.corrected)
    -
    - +
    +

    medulloPackage (uncorrected)

    @@ -4772,8 +4955,8 @@

    medulloPackage (uncorrected)

    # output table
     medullo.classifier.uncorrected <- out$merged.table
     viewDataTable(medullo.classifier.uncorrected)
    -
    - +
    +

    Consensus (batch-corrected)

    @@ -4788,14 +4971,20 @@

    Consensus (batch-corrected)

    # output table
     consensus.corrected <- out$consensus.mat
     viewDataTable(consensus.corrected)
    -
    - +
    +
    # merged output with clinical ids 
     consensus.corrected.out <- out$consensus.mat.out
     consensus.corrected.out <- consensus.corrected.out %>%
       arrange(Kids_First_Biospecimen_ID_RNA)
    -write.table(consensus.corrected.out, 
    -            file = file.path(results_dir, 'MB_batchcorrected_molecular_subtype.tsv'), 
    +
    +consensus.corrected.out <- consensus.corrected.out %>%
    +  # adding "MB" to be consistent with other subtyping values
    +  dplyr::mutate(molecular_subtype = if_else(!is.na(molecular_subtype),
    +                                            paste("MB,",molecular_subtype),
    +                                            "MB, To be classified")) %>%
    +  write.table(
    +            file = file.path(results_dir, 'MB_batchcorrected_molecular_subtype.tsv'), 
                 row.names = F, quote = F, sep = "\t")
    @@ -4811,14 +5000,19 @@

    Consensus (uncorrected)

    # output table
     consensus.uncorrected <- out$consensus.mat
     viewDataTable(consensus.uncorrected)
    -
    - +
    +
    # merged output with clinical ids 
     consensus.uncorrected.out <- out$consensus.mat.out
     consensus.uncorrected.out <- consensus.uncorrected.out %>%
       arrange(Kids_First_Biospecimen_ID_RNA)
    -write.table(consensus.uncorrected.out, 
    -            file = file.path(results_dir, 'MB_molecular_subtype.tsv'), 
    +
    +consensus.uncorrected.out <- consensus.uncorrected.out %>%
    +  # adding "MB" to be consistent with other subtyping values
    +  dplyr::mutate(molecular_subtype = if_else(!is.na(molecular_subtype),
    +                                            paste("MB,",molecular_subtype),
    +                                            "MB, To be classified")) %>%
    +  write.table(file = file.path(results_dir, 'MB_molecular_subtype.tsv'), 
                 row.names = F, quote = F, sep = "\t")
    @@ -4854,7 +5048,7 @@

    Consensus comparison

    .$Kids_First_Biospecimen_ID) print(length(matches))
    [1] 24
    -
    # what's the difference?
    +
    # what's the difference?
     consensus.corrected.matches <- consensus.corrected %>%
       filter(match) %>%
       .$Kids_First_Biospecimen_ID 
    @@ -4889,6 +5083,23 @@ 

    Consensus comparison

    + + + + + + +