From e33cdb52b6ad4f4eb52978c1de5f76ea32c21a43 Mon Sep 17 00:00:00 2001 From: Candace Savonen Date: Tue, 5 Jan 2021 09:40:19 -0500 Subject: [PATCH] Revert "Merge branch 'master' into v18-ci" This reverts commit 27dec43f1d030e2ebdcddc5d6a4dcb79bdfaac0f. --- .circleci/config.yml | 60 +- Dockerfile | 2 +- .../03-subgrouping_samples.html | 308 +- ...ession-rsem-fpkm-collapsed.stranded.tsv.gz | Bin 5283760 -> 5234912 bytes .../results/EPN_all_data.tsv | 187 +- .../results/EPN_all_data_withsubgroup.tsv | 187 +- .../01-compile-subtyping-results.nb.html | 2922 +----------- .../02-incorporate-clinical-feedback.Rmd | 30 +- .../02-incorporate-clinical-feedback.nb.html | 2837 +----------- .../03-incorporate-pathology-feedback.Rmd | 251 -- .../03-incorporate-pathology-feedback.nb.html | 3986 +++-------------- ...ecular_subtypes_with_clinical_feedback.tsv | 2437 ++++------ ...types_with_clinical_pathology_feedback.tsv | 1816 +++----- 13 files changed, 2772 insertions(+), 12251 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 522d776f4c..26232e3b7a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -68,7 +68,7 @@ jobs: name: Molecular Subtyping Neurocytoma command: ./scripts/run_in_ci.sh bash analyses/molecular-subtyping-neurocytoma/run_subtyping.sh - # Commenting this out for now; the code is expected to change + # Commenting this out for now; the code is expected to change # - run: # name: Molecular Subtyping - Compile and incorporate pathology feedback # command: OPENPBTA_TESTING=1 ./scripts/run_in_ci.sh bash analyses/molecular-subtyping-pathology/run-subtyping-aggregation.sh @@ -132,17 +132,17 @@ jobs: name: Independent samples command: ./scripts/run_in_ci.sh bash analyses/independent-samples/run-independent-samples.sh -# - run: -# name: Interaction plot -# command: OPENPBTA_ALL=0 ./scripts/run_in_ci.sh bash analyses/interaction-plots/01-create-interaction-plots.sh + - run: + name: Interaction plot + command: OPENPBTA_ALL=0 ./scripts/run_in_ci.sh bash analyses/interaction-plots/01-create-interaction-plots.sh - run: - name: Mutational Signatures + name: Mutational Signatures command: OPENPBTA_QUICK_MUTSIGS=1 ./scripts/run_in_ci.sh bash analyses/mutational-signatures/run_mutational_signatures.sh -# - run: -# name: Chromosomal instability breakpoints -# command: OPENPBTA_TESTING=1 ./scripts/run_in_ci.sh bash analyses/chromosomal-instability/run_breakpoint_analysis.sh + - run: + name: Chromosomal instability breakpoints + command: OPENPBTA_TESTING=1 ./scripts/run_in_ci.sh bash analyses/chromosomal-instability/run_breakpoint_analysis.sh - run: name: Copy number consensus @@ -156,33 +156,33 @@ jobs: name: Survival analysis command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/survival-analysis/survival-analysis_template.Rmd', params = list(plot_ci = FALSE), clean = TRUE)" -# - run: -# name: Comparative RNASeq - generate correlation matrix - rsem-tpm.polya -# command: ./scripts/run_in_ci.sh python3 analyses/comparative-RNASeq-analysis/01-correlation-matrix.py ../../data/pbta-gene-expression-rsem-tpm.polya.rds --clinical-path ../../data/pbta-histologies.tsv --qc-manifest-path ../../data/pbta-mend-qc-manifest.tsv --qc-results-path ../../data/pbta-mend-qc-results.tar.gz --prefix rsem-tpm-polya- --verbose + - run: + name: Comparative RNASeq - generate correlation matrix - rsem-tpm.polya + command: ./scripts/run_in_ci.sh python3 analyses/comparative-RNASeq-analysis/01-correlation-matrix.py ../../data/pbta-gene-expression-rsem-tpm.polya.rds --clinical-path ../../data/pbta-histologies.tsv --qc-manifest-path ../../data/pbta-mend-qc-manifest.tsv --qc-results-path ../../data/pbta-mend-qc-results.tar.gz --prefix rsem-tpm-polya- --verbose -# - run: -# name: Comparative RNASeq - generate correlation matrix - rsem-tpm.stranded -# command: ./scripts/run_in_ci.sh python3 analyses/comparative-RNASeq-analysis/01-correlation-matrix.py ../../data/pbta-gene-expression-rsem-tpm.stranded.rds --clinical-path ../../data/pbta-histologies.tsv --qc-manifest-path ../../data/pbta-mend-qc-manifest.tsv --qc-results-path ../../data/pbta-mend-qc-results.tar.gz --prefix rsem-tpm-stranded- --verbose + - run: + name: Comparative RNASeq - generate correlation matrix - rsem-tpm.stranded + command: ./scripts/run_in_ci.sh python3 analyses/comparative-RNASeq-analysis/01-correlation-matrix.py ../../data/pbta-gene-expression-rsem-tpm.stranded.rds --clinical-path ../../data/pbta-histologies.tsv --qc-manifest-path ../../data/pbta-mend-qc-manifest.tsv --qc-results-path ../../data/pbta-mend-qc-results.tar.gz --prefix rsem-tpm-stranded- --verbose -# - run: -# name: Comparative RNASeq - generate thresholds and outliers - rsem-tpm.stranded -# command: ./scripts/run_in_ci.sh python3 analyses/comparative-RNASeq-analysis/02-thresholds-and-outliers.py --prefix rsem-tpm-stranded- --results results --verbose + - run: + name: Comparative RNASeq - generate thresholds and outliers - rsem-tpm.stranded + command: ./scripts/run_in_ci.sh python3 analyses/comparative-RNASeq-analysis/02-thresholds-and-outliers.py --prefix rsem-tpm-stranded- --results results --verbose - run: name: Process SV file command: ./scripts/run_in_ci.sh Rscript analyses/sv-analysis/01-process-sv-file.R -# - run: -# name: Oncoprint plotting -# command: ./scripts/run_in_ci.sh bash "analyses/oncoprint-landscape/run-oncoprint.sh" + - run: + name: Oncoprint plotting + command: ./scripts/run_in_ci.sh bash "analyses/oncoprint-landscape/run-oncoprint.sh" - run: name: GISTIC Plots command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/gistic_plot.Rmd', clean = TRUE)" -# - run: -# name: CN Status Heatmap -# command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/cn_status_heatmap.Rmd', clean = TRUE)" + - run: + name: CN Status Heatmap + command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/cnv-chrom-plot/cn_status_heatmap.Rmd', clean = TRUE)" - run: name: Gene set enrichment analysis to generate GSVA scores @@ -205,9 +205,9 @@ jobs: #### Add your analysis here #### ################################ -# - run: -# name: RNA-Seq composition -# command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/rna-seq-composition/rna-seq-composition.Rmd', clean = TRUE)" + - run: + name: RNA-Seq composition + command: ./scripts/run_in_ci.sh Rscript -e "rmarkdown::render('analyses/rna-seq-composition/rna-seq-composition.Rmd', clean = TRUE)" - run: name: TCGA SNV Caller Analysis @@ -224,7 +224,7 @@ jobs: - run: name: Exploration of nonsynonymous filter command: ./scripts/run_in_ci.sh bash analyses/snv-callers/explore_variant_classifications/run_explorations.sh - + # This analysis was used to explore the TCGA PBTA data when the BED files used to calculate TCGA # were incorrect https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/568 #- run: @@ -254,9 +254,9 @@ jobs: name: TCGA Capture Kit Investigation command: ./scripts/run_in_ci.sh bash analyses/tcga-capture-kit-investigation/run-investigation.sh -# - run: -# name: d3b TMB code -# command: ./scripts/run_in_ci.sh bash analyses/tmb-compare/TMB_d3b_code/run_tmb_d3b.sh + - run: + name: d3b TMB code + command: ./scripts/run_in_ci.sh bash analyses/tmb-compare/TMB_d3b_code/run_tmb_d3b.sh - run: name: Compare TMB calculations diff --git a/Dockerfile b/Dockerfile index 05efc24e22..abf51a0f2a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -245,8 +245,8 @@ RUN pip3 install \ "cycler==0.10.0" "kiwisolver==1.1.0" "pyparsing==2.4.5" "python-dateutil==2.8.1" "pytz==2019.3" \ "cython==0.29.15" \ "ipykernel==4.8.1" \ + "jupyter==1.0.0" \ "matplotlib==3.0.3" \ - "notebook==6.0.0" \ "numpy==1.17.3" \ "pandas==0.25.3" \ "plotnine==0.3.0" \ diff --git a/analyses/molecular-subtyping-EPN/03-subgrouping_samples.html b/analyses/molecular-subtyping-EPN/03-subgrouping_samples.html index dfbb681285..7b8c15122c 100644 --- a/analyses/molecular-subtyping-EPN/03-subgrouping_samples.html +++ b/analyses/molecular-subtyping-EPN/03-subgrouping_samples.html @@ -12948,10 +12948,10 @@ ',c.insertBefore(e,d),b=42===f.offsetWidth,c.removeChild(e),{matches:b,media:a}}}(a.document)}(this),function(a){"use strict";function b(){u(!0)}var c={};a.respond=c,c.update=function(){};var d=[],e=function(){var b=!1;try{b=new a.XMLHttpRequest}catch(c){b=new a.ActiveXObject("Microsoft.XMLHTTP")}return function(){return b}}(),f=function(a,b){var c=e();c&&(c.open("GET",a,!0),c.onreadystatechange=function(){4!==c.readyState||200!==c.status&&304!==c.status||b(c.responseText)},4!==c.readyState&&c.send(null))};if(c.ajax=f,c.queue=d,c.regex={media:/@media[^\{]+\{([^\{\}]*\{[^\}\{]*\})+/gi,keyframes:/@(?:\-(?:o|moz|webkit)\-)?keyframes[^\{]+\{(?:[^\{\}]*\{[^\}\{]*\})+[^\}]*\}/gi,urls:/(url\()['"]?([^\/\)'"][^:\)'"]+)['"]?(\))/g,findStyles:/@media *([^\{]+)\{([\S\s]+?)$/,only:/(only\s+)?([a-zA-Z]+)\s?/,minw:/\([\s]*min\-width\s*:[\s]*([\s]*[0-9\.]+)(px|em)[\s]*\)/,maxw:/\([\s]*max\-width\s*:[\s]*([\s]*[0-9\.]+)(px|em)[\s]*\)/},c.mediaQueriesSupported=a.matchMedia&&null!==a.matchMedia("only all")&&a.matchMedia("only all").matches,!c.mediaQueriesSupported){var g,h,i,j=a.document,k=j.documentElement,l=[],m=[],n=[],o={},p=30,q=j.getElementsByTagName("head")[0]||k,r=j.getElementsByTagName("base")[0],s=q.getElementsByTagName("link"),t=function(){var a,b=j.createElement("div"),c=j.body,d=k.style.fontSize,e=c&&c.style.fontSize,f=!1;return b.style.cssText="position:absolute;font-size:1em;width:1em",c||(c=f=j.createElement("body"),c.style.background="none"),k.style.fontSize="100%",c.style.fontSize="100%",c.appendChild(b),f&&k.insertBefore(c,k.firstChild),a=b.offsetWidth,f?k.removeChild(c):c.removeChild(b),k.style.fontSize=d,e&&(c.style.fontSize=e),a=i=parseFloat(a)},u=function(b){var c="clientWidth",d=k[c],e="CSS1Compat"===j.compatMode&&d||j.body[c]||d,f={},o=s[s.length-1],r=(new Date).getTime();if(b&&g&&p>r-g)return a.clearTimeout(h),h=a.setTimeout(u,p),void 0;g=r;for(var v in l)if(l.hasOwnProperty(v)){var w=l[v],x=w.minw,y=w.maxw,z=null===x,A=null===y,B="em";x&&(x=parseFloat(x)*(x.indexOf(B)>-1?i||t():1)),y&&(y=parseFloat(y)*(y.indexOf(B)>-1?i||t():1)),w.hasquery&&(z&&A||!(z||e>=x)||!(A||y>=e))||(f[w.media]||(f[w.media]=[]),f[w.media].push(m[w.rules]))}for(var C in n)n.hasOwnProperty(C)&&n[C]&&n[C].parentNode===q&&q.removeChild(n[C]);n.length=0;for(var D in f)if(f.hasOwnProperty(D)){var E=j.createElement("style"),F=f[D].join("\n");E.type="text/css",E.media=D,q.insertBefore(E,o.nextSibling),E.styleSheet?E.styleSheet.cssText=F:E.appendChild(j.createTextNode(F)),n.push(E)}},v=function(a,b,d){var e=a.replace(c.regex.keyframes,"").match(c.regex.media),f=e&&e.length||0;b=b.substring(0,b.lastIndexOf("/"));var g=function(a){return a.replace(c.regex.urls,"$1"+b+"$2$3")},h=!f&&d;b.length&&(b+="/"),h&&(f=1);for(var i=0;f>i;i++){var j,k,n,o;h?(j=d,m.push(g(a))):(j=e[i].match(c.regex.findStyles)&&RegExp.$1,m.push(RegExp.$2&&g(RegExp.$2))),n=j.split(","),o=n.length;for(var p=0;o>p;p++)k=n[p],l.push({media:k.split("(")[0].match(c.regex.only)&&RegExp.$2||"all",rules:m.length-1,hasquery:k.indexOf("(")>-1,minw:k.match(c.regex.minw)&&parseFloat(RegExp.$1)+(RegExp.$2||""),maxw:k.match(c.regex.maxw)&&parseFloat(RegExp.$1)+(RegExp.$2||"")})}u()},w=function(){if(d.length){var b=d.shift();f(b.href,function(c){v(c,b.href,b.media),o[b.href]=!0,a.setTimeout(function(){w()},0)})}},x=function(){for(var b=0;b - - - - - - - - - - - - - - - - - - ',c.insertBefore(e,d),b=42===f.offsetWidth,c.removeChild(e),{matches:b,media:a}}}(a.document)}(this),function(a){"use strict";function b(){u(!0)}var c={};a.respond=c,c.update=function(){};var d=[],e=function(){var b=!1;try{b=new a.XMLHttpRequest}catch(c){b=new a.ActiveXObject("Microsoft.XMLHTTP")}return function(){return b}}(),f=function(a,b){var c=e();c&&(c.open("GET",a,!0),c.onreadystatechange=function(){4!==c.readyState||200!==c.status&&304!==c.status||b(c.responseText)},4!==c.readyState&&c.send(null))};if(c.ajax=f,c.queue=d,c.regex={media:/@media[^\{]+\{([^\{\}]*\{[^\}\{]*\})+/gi,keyframes:/@(?:\-(?:o|moz|webkit)\-)?keyframes[^\{]+\{(?:[^\{\}]*\{[^\}\{]*\})+[^\}]*\}/gi,urls:/(url\()['"]?([^\/\)'"][^:\)'"]+)['"]?(\))/g,findStyles:/@media *([^\{]+)\{([\S\s]+?)$/,only:/(only\s+)?([a-zA-Z]+)\s?/,minw:/\([\s]*min\-width\s*:[\s]*([\s]*[0-9\.]+)(px|em)[\s]*\)/,maxw:/\([\s]*max\-width\s*:[\s]*([\s]*[0-9\.]+)(px|em)[\s]*\)/},c.mediaQueriesSupported=a.matchMedia&&null!==a.matchMedia("only all")&&a.matchMedia("only all").matches,!c.mediaQueriesSupported){var g,h,i,j=a.document,k=j.documentElement,l=[],m=[],n=[],o={},p=30,q=j.getElementsByTagName("head")[0]||k,r=j.getElementsByTagName("base")[0],s=q.getElementsByTagName("link"),t=function(){var a,b=j.createElement("div"),c=j.body,d=k.style.fontSize,e=c&&c.style.fontSize,f=!1;return b.style.cssText="position:absolute;font-size:1em;width:1em",c||(c=f=j.createElement("body"),c.style.background="none"),k.style.fontSize="100%",c.style.fontSize="100%",c.appendChild(b),f&&k.insertBefore(c,k.firstChild),a=b.offsetWidth,f?k.removeChild(c):c.removeChild(b),k.style.fontSize=d,e&&(c.style.fontSize=e),a=i=parseFloat(a)},u=function(b){var c="clientWidth",d=k[c],e="CSS1Compat"===j.compatMode&&d||j.body[c]||d,f={},o=s[s.length-1],r=(new Date).getTime();if(b&&g&&p>r-g)return a.clearTimeout(h),h=a.setTimeout(u,p),void 0;g=r;for(var v in l)if(l.hasOwnProperty(v)){var w=l[v],x=w.minw,y=w.maxw,z=null===x,A=null===y,B="em";x&&(x=parseFloat(x)*(x.indexOf(B)>-1?i||t():1)),y&&(y=parseFloat(y)*(y.indexOf(B)>-1?i||t():1)),w.hasquery&&(z&&A||!(z||e>=x)||!(A||y>=e))||(f[w.media]||(f[w.media]=[]),f[w.media].push(m[w.rules]))}for(var C in n)n.hasOwnProperty(C)&&n[C]&&n[C].parentNode===q&&q.removeChild(n[C]);n.length=0;for(var D in f)if(f.hasOwnProperty(D)){var E=j.createElement("style"),F=f[D].join("\n");E.type="text/css",E.media=D,q.insertBefore(E,o.nextSibling),E.styleSheet?E.styleSheet.cssText=F:E.appendChild(j.createTextNode(F)),n.push(E)}},v=function(a,b,d){var e=a.replace(c.regex.keyframes,"").match(c.regex.media),f=e&&e.length||0;b=b.substring(0,b.lastIndexOf("/"));var g=function(a){return a.replace(c.regex.urls,"$1"+b+"$2$3")},h=!f&&d;b.length&&(b+="/"),h&&(f=1);for(var i=0;f>i;i++){var j,k,n,o;h?(j=d,m.push(g(a))):(j=e[i].match(c.regex.findStyles)&&RegExp.$1,m.push(RegExp.$2&&g(RegExp.$2))),n=j.split(","),o=n.length;for(var p=0;o>p;p++)k=n[p],l.push({media:k.split("(")[0].match(c.regex.only)&&RegExp.$2||"all",rules:m.length-1,hasquery:k.indexOf("(")>-1,minw:k.match(c.regex.minw)&&parseFloat(RegExp.$1)+(RegExp.$2||""),maxw:k.match(c.regex.maxw)&&parseFloat(RegExp.$1)+(RegExp.$2||"")})}u()},w=function(){if(d.length){var b=d.shift();f(b.href,function(c){v(c,b.href,b.media),o[b.href]=!0,a.setTimeout(function(){w()},0)})}},x=function(){for(var b=0;b - - - - - - - - - - - - - - - - - - ',c.insertBefore(e,d),b=42===f.offsetWidth,c.removeChild(e),{matches:b,media:a}}}(a.document)}(this),function(a){"use strict";function b(){u(!0)}var c={};a.respond=c,c.update=function(){};var d=[],e=function(){var b=!1;try{b=new a.XMLHttpRequest}catch(c){b=new a.ActiveXObject("Microsoft.XMLHTTP")}return function(){return b}}(),f=function(a,b){var c=e();c&&(c.open("GET",a,!0),c.onreadystatechange=function(){4!==c.readyState||200!==c.status&&304!==c.status||b(c.responseText)},4!==c.readyState&&c.send(null))};if(c.ajax=f,c.queue=d,c.regex={media:/@media[^\{]+\{([^\{\}]*\{[^\}\{]*\})+/gi,keyframes:/@(?:\-(?:o|moz|webkit)\-)?keyframes[^\{]+\{(?:[^\{\}]*\{[^\}\{]*\})+[^\}]*\}/gi,urls:/(url\()['"]?([^\/\)'"][^:\)'"]+)['"]?(\))/g,findStyles:/@media *([^\{]+)\{([\S\s]+?)$/,only:/(only\s+)?([a-zA-Z]+)\s?/,minw:/\([\s]*min\-width\s*:[\s]*([\s]*[0-9\.]+)(px|em)[\s]*\)/,maxw:/\([\s]*max\-width\s*:[\s]*([\s]*[0-9\.]+)(px|em)[\s]*\)/},c.mediaQueriesSupported=a.matchMedia&&null!==a.matchMedia("only all")&&a.matchMedia("only all").matches,!c.mediaQueriesSupported){var g,h,i,j=a.document,k=j.documentElement,l=[],m=[],n=[],o={},p=30,q=j.getElementsByTagName("head")[0]||k,r=j.getElementsByTagName("base")[0],s=q.getElementsByTagName("link"),t=function(){var a,b=j.createElement("div"),c=j.body,d=k.style.fontSize,e=c&&c.style.fontSize,f=!1;return b.style.cssText="position:absolute;font-size:1em;width:1em",c||(c=f=j.createElement("body"),c.style.background="none"),k.style.fontSize="100%",c.style.fontSize="100%",c.appendChild(b),f&&k.insertBefore(c,k.firstChild),a=b.offsetWidth,f?k.removeChild(c):c.removeChild(b),k.style.fontSize=d,e&&(c.style.fontSize=e),a=i=parseFloat(a)},u=function(b){var c="clientWidth",d=k[c],e="CSS1Compat"===j.compatMode&&d||j.body[c]||d,f={},o=s[s.length-1],r=(new Date).getTime();if(b&&g&&p>r-g)return a.clearTimeout(h),h=a.setTimeout(u,p),void 0;g=r;for(var v in l)if(l.hasOwnProperty(v)){var w=l[v],x=w.minw,y=w.maxw,z=null===x,A=null===y,B="em";x&&(x=parseFloat(x)*(x.indexOf(B)>-1?i||t():1)),y&&(y=parseFloat(y)*(y.indexOf(B)>-1?i||t():1)),w.hasquery&&(z&&A||!(z||e>=x)||!(A||y>=e))||(f[w.media]||(f[w.media]=[]),f[w.media].push(m[w.rules]))}for(var C in n)n.hasOwnProperty(C)&&n[C]&&n[C].parentNode===q&&q.removeChild(n[C]);n.length=0;for(var D in f)if(f.hasOwnProperty(D)){var E=j.createElement("style"),F=f[D].join("\n");E.type="text/css",E.media=D,q.insertBefore(E,o.nextSibling),E.styleSheet?E.styleSheet.cssText=F:E.appendChild(j.createTextNode(F)),n.push(E)}},v=function(a,b,d){var e=a.replace(c.regex.keyframes,"").match(c.regex.media),f=e&&e.length||0;b=b.substring(0,b.lastIndexOf("/"));var g=function(a){return a.replace(c.regex.urls,"$1"+b+"$2$3")},h=!f&&d;b.length&&(b+="/"),h&&(f=1);for(var i=0;f>i;i++){var j,k,n,o;h?(j=d,m.push(g(a))):(j=e[i].match(c.regex.findStyles)&&RegExp.$1,m.push(RegExp.$2&&g(RegExp.$2))),n=j.split(","),o=n.length;for(var p=0;o>p;p++)k=n[p],l.push({media:k.split("(")[0].match(c.regex.only)&&RegExp.$2||"all",rules:m.length-1,hasquery:k.indexOf("(")>-1,minw:k.match(c.regex.minw)&&parseFloat(RegExp.$1)+(RegExp.$2||""),maxw:k.match(c.regex.maxw)&&parseFloat(RegExp.$1)+(RegExp.$2||"")})}u()},w=function(){if(d.length){var b=d.shift();f(b.href,function(c){v(c,b.href,b.media),o[b.href]=!0,a.setTimeout(function(){w()},0)})}},x=function(){for(var b=0;b - + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- - - -
-
-
-
-
- -
- - - - - +
@@ -3302,713 +674,305 @@

Pathology Feedback

ETMR Embryonal tumor -tumor -Updated via OpenPBTA subtyping -None documented -ETMR, C19MC-altered - - -BS_5P1TN10Z -hemispheric -7316-158 -746061 -PT_9BZETM0M -RNA-Seq -Supratentorial or Spinal Cord PNET -High-grade glioma -Path report PNET -reviewed - OK with new dx grade III-IV - -HGAT -Diffuse astrocytic and oligodendroglial tumor -tumor -from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping -None documented -HGG, H3 G35 - - -BS_STNH7YSX -hemispheric -7316-158 -746056 -PT_9BZETM0M -WGS -Supratentorial or Spinal Cord PNET -High-grade glioma -Path report PNET -reviewed - OK with new dx grade III-IV - -HGAT -Diffuse astrocytic and oligodendroglial tumor -tumor -from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping -None documented -HGG, H3 G35 - - -BS_TV5B86ZD -hemispheric -7316-158 -150593 -PT_9BZETM0M -RNA-Seq -Supratentorial or Spinal Cord PNET -High-grade glioma -Path report PNET -reviewed - OK with new dx grade III-IV - -HGAT -Diffuse astrocytic and oligodendroglial tumor -tumor -from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping -None documented -HGG, H3 G35 - - - -

And the notes:

-
-

Few notes:

-
    -
  1. PT_7E3V3JFX specimens were consistent with the original EPN dx, so pathology would call this a rare EPN, H3 K28 mutated tumor, rather than DMG.
  2. -
  3. PT_AQWDQW27 specimen was consistent with meningioma, even though it has a hallmark EPN fusion, so pathology would also call this a rare meningioma with a YAP1 fusion.
  4. -
  5. Because 1 is a rare tumor (maybe first seen), the logic of searching for all H3 K28 mutations in HGG subtyping would convert this sample by default - how to handle this?
  6. -
  7. Pathology confirmed this HGG BRAF V600E mutant tumor, BS_H1XPVS9A, to be a LGAT (PXA). I updated molecular_subtype here based on what it would look like, but this should come through via the LGAT subtyping ticket. How should we add this info?
  8. -
-
-
-
-

Set up

- - - -
# This logic is the same as analyses/gene-set-enrichment-analysis/02-model-gsea.Rmd
-# Assigning params$is_ci to running_in_ci avoids a locked binding error
-running_in_ci <- params$is_ci
-
-# Are we testing? In case of a non 0/1 number, we recast as logical, and then 
-# ensure logical.
-if (running_in_ci %in% c(0,1)) running_in_ci <- as.logical(running_in_ci)
-if (!(is.logical(running_in_ci)))
-{
-  stop("\n\nERROR: The parameter `is_ci` should be FALSE/TRUE (or 0/1).")
-}
- - - - - - -
library(tidyverse)
- - - -
-

Directory and files

-
-

Directories

- - - -
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
-data_dir <- file.path(root_dir, "data")
-results_dir <- "results"
- - - -
-
-

Input

-

When we run this locally, we want to tie it to a specific version of the histologies file prior to feedback we received from pathology that is recorded on #609.

- - - -
if (running_in_ci) {
-  histologies_file <- file.path(data_dir, "pbta-histologies.tsv")
-} else {
-  histologies_file <- file.path(data_dir, "release-v15-20200228",
-                                "pbta-histologies.tsv")
-}
- - - - - - -
compiled_results_file <- file.path(results_dir, 
-                                   "compiled_molecular_subtypes_with_clinical_feedback.tsv")
-fusions_file <- file.path(data_dir,
-                          "pbta-fusion-putative-oncogenic.tsv")
- - - -
-
-

Output

- - - -
output_file <- file.path(results_dir,
-                         "compiled_molecular_subtypes_with_clinical_pathology_feedback.tsv")
- - - -
-
-
-
-

Read in data

- - - -
histologies_df <- read_tsv(histologies_file, guess_max = 10000)
-compiled_df <- read_tsv(compiled_results_file)
-fusions_df <- read_tsv(fusions_file)
- - - -
-
-

Revise subtyping calls based on pathology feedback

-
-

PT_7E3V3JFX

-

PT_7E3V3JFX was classified in molecular-subtyping-HGG as DMG, H3 K28 because the first step in that module was to identify any samples in the cohort with "defining lesions:" H3 K28 or H3 G35 (original subtyping issue: #249).

- - - -
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_7E3V3JFX")
- - -
- -
- - - -

The relevant notes above are

+tumor +Updated via OpenPBTA subtyping +None documented +ETMR, C19MC-altered + + +BS_5P1TN10Z +hemispheric +7316-158 +746061 +PT_9BZETM0M +RNA-Seq +Supratentorial or Spinal Cord PNET +High-grade glioma +Path report PNET +reviewed - OK with new dx grade III-IV + +HGAT +Diffuse astrocytic and oligodendroglial tumor +tumor +from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping +None documented +HGG, H3 G35 + + +BS_STNH7YSX +hemispheric +7316-158 +746056 +PT_9BZETM0M +WGS +Supratentorial or Spinal Cord PNET +High-grade glioma +Path report PNET +reviewed - OK with new dx grade III-IV + +HGAT +Diffuse astrocytic and oligodendroglial tumor +tumor +from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping +None documented +HGG, H3 G35 + + +BS_TV5B86ZD +hemispheric +7316-158 +150593 +PT_9BZETM0M +RNA-Seq +Supratentorial or Spinal Cord PNET +High-grade glioma +Path report PNET +reviewed - OK with new dx grade III-IV + +HGAT +Diffuse astrocytic and oligodendroglial tumor +tumor +from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping +None documented +HGG, H3 G35 + + + +

And the notes:

+

Few notes:

  1. PT_7E3V3JFX specimens were consistent with the original EPN dx, so pathology would call this a rare EPN, H3 K28 mutated tumor, rather than DMG.
  2. -
  3. Because 1 is a rare tumor (maybe first seen), the logic of searching for all H3 K28 mutations in HGG subtyping would convert this sample by default - how to handle this?
  4. -
-
-

So we will revise the molecular_subtype, integrated_diagnosis, short_histology, and broad_histology accordingly.

- - - -
compiled_df <- compiled_df %>%
-  mutate(
-    integrated_diagnosis = case_when(
-      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymoma",
-      TRUE ~ integrated_diagnosis 
-    ),
-    short_histology = case_when(
-      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymoma",
-      TRUE ~ short_histology
-    ),
-    broad_histology = case_when(
-      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymal tumor",
-      TRUE ~ broad_histology
-    ),
-    molecular_subtype = case_when(
-      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "EPN, H3 K28",
-      TRUE ~ molecular_subtype
-    ),
-    Notes = case_when(
-      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Updated via OpenPBTA subtyping",
-      TRUE ~ Notes
-    )
-  )
- - - -
-
-

BS_H1XPVS9A

-

BS_H1XPVS9A is from a sample with a BRAF V600E mutation that is H3 wildtype. This sample was originally labeled as a HGG. In low-dimensional transcriptomic space (e.g., UMAP, t-SNE), BS_H1XPVS9A clusters with LGG samples. (See molecular-subtyping-HGG.)

-

The relevant note from above:

-
-
    -
  1. Pathology confirmed this HGG BRAF V600E mutant tumor, BS_H1XPVS9A to be a LGAT (PXA). I updated molecular_subtype here based on what it would look like, but this should come through via the LGAT subtyping ticket. How should we add this info?
  2. -
-
-

The steps for LGAT subtyping are to classify LGG samples on the basis of the presence/absence of BRAF fusions and BRAF V600E mutations.

- - - -
compiled_df %>%
-  filter(Kids_First_Biospecimen_ID == "BS_H1XPVS9A") %>%
-  select(Kids_First_Participant_ID, sample_id) %>%
-  left_join(compiled_df)
- - -
Joining, by = c("Kids_First_Participant_ID", "sample_id")
- - -
- -
- - - -

We know from molecular-subtyping-HGG that a BRAF V600E mutation is present. Now we have to check for the presence of BRAF fusions.

- - - -
fusions_df %>%
-  filter(str_detect(FusionName, "BRAF"),
-         Sample == "BS_H1XPVS9A")
- - -
- -
- - - -

There are no BRAF fusions in BS_H1XPVS9A, so we are able to classify this sample (7316-1106) as LGG, BRAF V600E.

- - - -
compiled_df <- compiled_df %>%
-  mutate(
-    integrated_diagnosis = case_when(
-      sample_id == "7316-1106" ~ "Low-grade astrocytic tumor",
-      TRUE ~ integrated_diagnosis
-    ),
-    short_histology = case_when(
-      sample_id == "7316-1106" ~ "LGAT",
-      TRUE ~ short_histology
-    ),
-    broad_histology = case_when(
-      sample_id == "7316-1106" ~ "Low-grade astrocytic tumor",
-      TRUE ~ broad_histology
-    ),
-    molecular_subtype = case_when(
-      sample_id == "7316-1106" ~ "LGG, BRAF V600E",
-      TRUE ~ molecular_subtype
-    ),
-    Notes = case_when(
-      sample_id == "7316-1106" ~ "Updated via OpenPBTA subtyping",
-      TRUE ~ Notes
-    )
-  )
- - - -
-
-

HGG BRAF V600E

-

The follow point comes from another issue #627 (comment):

-
-

We have a standalone BRAF V600E subtype, but this is HGG only, and should be labeled as such (will create an update HGG analysis ticket).

-
- - - -
compiled_df %>%
-  filter(molecular_subtype == "BRAF V600E")
- - -
- -
- - - -

Now that we have addressed the one sample that should have been reclassified as LGG, we are able to update molecular_subtype to HGG, BRAF V600E for the remaining samples.

- - - -
compiled_df <- compiled_df %>%
-  mutate(molecular_subtype = case_when(
-    molecular_subtype == "BRAF V600E" ~ "HGG, BRAF V600E",
-    TRUE ~ molecular_subtype
-  ))
- - - -
-
-

PT_AQWDQW27

-
-
  1. PT_AQWDQW27 specimen was consistent with meningioma, even though it has a hallmark EPN fusion, so pathology would also call this a rare meningioma with a YAP1 fusion.
  2. +
  3. Because 1 is a rare tumor (maybe first seen), the logic of searching for all H3 K28 mutations in HGG subtyping would convert this sample by default - how to handle this?
  4. +
  5. Pathology confirmed this HGG BRAF V600E mutant tumor, BS_H1XPVS9A, to be a LGAT (PXA). I updated molecular_subtype here based on what it would look like, but this should come through via the LGAT subtyping ticket. How should we add this info?
-

The molecular-subtyping-EPN module has not been completed yet, but the logic that is in that module may mean that we need to include revising the labels of PT_AQWDQW27.

- - - -
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_AQWDQW27")
- - -
- -
- - -
# This sample is missing from the EPN table, but it should be there - will have to investigate and update this later.
- - - -
-
-

PT_6Q0NPVP3

-

The specimens for this patient, BS_5JM573JC and BS_E5H6CFYT, were classified as HGAT due to the presence of a histone mutation, but with the removal of LGAT from the HGAT module, this sample will no longer show up in two modules.

- - - -
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_6Q0NPVP3")
- - -
- -
- - -
-
-

Are there any other duplicate subtypes?

+
+

Set up

- -
unique_subtypes <- compiled_df %>%
-  select(Kids_First_Participant_ID, sample_id, molecular_subtype) %>%
-  distinct()
+
+
# This logic is the same as analyses/gene-set-enrichment-analysis/02-model-gsea.Rmd
+# Assigning params$is_ci to running_in_ci avoids a locked binding error
+running_in_ci <- params$is_ci
 
-unique_subtypes[duplicated(unique_subtypes$sample_id),]
- - -
- -
- - -
#PT_KTRJ8TFY (fixed in clinical feedback) and PT_6Q0NPVP3 (fixed in HGG module removing LGAT)
- - - -
-
-

PT_00G007DM

-

7316-272 is subtyped as ETMR because a C19MC alteration was found and pathology review confirms the new diagnosis

- - - -
compiled_df <- compiled_df %>%
-  mutate(molecular_subtype= case_when(sample_id == "7316-272"~"ETMR, C19MC-altered",
-                                      TRUE ~ molecular_subtype),
-         short_histology= case_when(sample_id == "7316-272"~"ETMR",
-                                      TRUE ~ short_histology),
-         integrated_diagnosis = case_when(sample_id == "7316-272"~"Embryonal tumor with multilayer rosettes, C19MC-altered",
-                                      TRUE ~ integrated_diagnosis),
-         Notes = case_when(sample_id == "7316-272" ~ "Updated via OpenPBTA subtyping",
-      TRUE ~ Notes
-    )
-  ) %>%
-  unique()
- - - -

This sample has multiple specimens with different diagnoses and subtypes and this is actually expected, as there are two different diagnoses in the CBTN database and the C19MC alteration was only found in one of the samples.

- - - -
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_00G007DM")
+# Are we testing? In case of a non 0/1 number, we recast as logical, and then +# ensure logical. +if (running_in_ci %in% c(0,1)) running_in_ci <- as.logical(running_in_ci) +if (!(is.logical(running_in_ci))) +{ + stop("\n\nERROR: The parameter `is_ci` should be FALSE/TRUE (or 0/1).") +}
- -
- -
- -
-
-

PT_5BWZA0NT

-

This sample was inspected by pathology and should be annotated as DMG, H3 K28, so let’s update that.

- -
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_5BWZA0NT")
+ +
library(tidyverse)
- -
- -
- + +
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
+ + +
✔ ggplot2 3.3.0     ✔ purrr   0.3.4
+✔ tibble  3.0.0     ✔ dplyr   0.8.5
+✔ tidyr   1.0.2     ✔ stringr 1.4.0
+✔ readr   1.3.1     ✔ forcats 0.5.0
+ + +
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
+✖ dplyr::filter() masks stats::filter()
+✖ dplyr::lag()    masks stats::lag()
+ +
+

Directory and files

+
+

Directories

- -
compiled_df <- compiled_df %>%
-  mutate(
-    integrated_diagnosis = case_when(
-      sample_id == "7316-1102" ~ "Diffuse midline glioma, H3 K28-mutant",
-      TRUE ~ integrated_diagnosis
-    ),
-    short_histology = case_when(
-      sample_id == "7316-1102" ~ "HGAT",
-      TRUE ~ short_histology
-    ),
-    broad_histology = case_when(
-      sample_id == "7316-1102" ~ "Diffuse astrocytic and oligodendroglial tumor",
-      TRUE ~ broad_histology
-    ),
-    molecular_subtype = case_when(
-      sample_id == "7316-1102" ~ "DMG, H3 K28",
-      TRUE ~ molecular_subtype
-    ),
-    Notes = case_when(
-      sample_id == "7316-1102" ~ "Updated via OpenPBTA subtyping",
-      TRUE ~ Notes
-    )
-  )
+ +
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
+data_dir <- file.path(root_dir, "data")
+results_dir <- "results"
-
-

PT_80NVYCBS

-

This sample was inspected by pathology and should be CNS HGNET-MN1, so we will update here.

+
+

Input

+

When we run this locally, we want to tie it to a specific version of the histologies file prior to feedback we received from pathology that is recorded on #609.

- -
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_80NVYCBS")
+ +
if (running_in_ci) {
+  histologies_file <- file.path(data_dir, "pbta-histologies.tsv")
+} else {
+  histologies_file <- file.path(data_dir, "release-v15-20200228",
+                                "pbta-histologies.tsv")
+}
- -
- -
- + + - -
compiled_df <- compiled_df %>%
-  mutate(
-    integrated_diagnosis = case_when(
-      sample_id == "7316-765" ~ "CNS Embryonal Tumor, HGNET-MN1",
-      TRUE ~ integrated_diagnosis
-    ),
-    short_histology = case_when(
-      sample_id == "7316-765" ~ "Embryonal Tumor",
-      TRUE ~ short_histology
-    ),
-    broad_histology = case_when(
-      sample_id == "7316-765" ~ "Embryonal Tumor",
-      TRUE ~ broad_histology
-    ),
-    molecular_subtype = case_when(
-      sample_id == "7316-765" ~ "CNS HGNET-MN1",
-      TRUE ~ molecular_subtype
-    ),
-    Notes = case_when(
-      sample_id == "7316-765" ~ "Updated via OpenPBTA subtyping",
-      TRUE ~ Notes
-    )
-  )
+ +
compiled_results_file <- file.path(results_dir, 
+                                   "compiled_molecular_subtypes_with_clinical_feedback.tsv")
+fusions_file <- file.path(data_dir,
+                          "pbta-fusion-putative-oncogenic.tsv")
-
-

PT_9BZETM0M

-

This sample was inspected by pathology and should be HGG, H3 G35, so we will update here.

+
+

Output

- -
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_9BZETM0M")
- - -
- -
- - - - -
compiled_df <- compiled_df %>%
-  mutate(
-    integrated_diagnosis = case_when(
-      sample_id == "7316-158" ~ "High-grade glioma/astrocytoma, H3 G35-mutant",
-      TRUE ~ integrated_diagnosis
-    ),
-    short_histology = case_when(
-      sample_id == "7316-158" ~ "HGAT",
-      TRUE ~ short_histology
-    ),
-    broad_histology = case_when(
-      sample_id == "7316-158" ~ "Diffuse astrocytic and oligodendroglial tumor",
-      TRUE ~ broad_histology
-    ),
-    molecular_subtype = case_when(
-      sample_id == "7316-158" ~ "HGG, H3 G35",
-      TRUE ~ molecular_subtype
-    ),
-    Notes = case_when(
-      sample_id == "7316-158" ~ "Updated via OpenPBTA subtyping",
-      TRUE ~ Notes
-    )
-  )
+ +
output_file <- file.path(results_dir,
+                         "compiled_molecular_subtypes_with_clinical_pathology_feedback.tsv")
-
-

PT_C2D4JXS1

-

This sample has one biospecimen which did not have the H3 variant captured in consensus calls - will inspect this later and possibly will be fixed with #819.

+
+
+
+

Read in data

- -
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_C2D4JXS1")
+ +
histologies_df <- read_tsv(histologies_file, guess_max = 10000)
+compiled_df <- read_tsv(compiled_results_file)
+fusions_df <- read_tsv(fusions_file)
- -
- -
-
-
-

PT_EHE800JJ

-

This sample has a hallmark EWSR1-FLI1 fusion, so could be re-classified as EWS once it goes through pathology review. For now, we will change it here to EWS.

+
+

Revise subtyping calls based on pathology feedback

+
+

PT_7E3V3JFX

+

PT_7E3V3JFX was classified in molecular-subtyping-HGG as DMG, H3 K28 because the first step in that module was to identify any samples in the cohort with "defining lesions:" H3 K28 or H3 G35 (original subtyping issue: #249).

- +
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_EHE800JJ")
+ filter(Kids_First_Participant_ID == "PT_7E3V3JFX") -
- + +

The relevant notes above are

+
+
    +
  1. PT_7E3V3JFX specimens were consistent with the original EPN dx, so pathology would call this a rare EPN, H3 K28 mutated tumor, rather than DMG.
  2. +
  3. Because 1 is a rare tumor (maybe first seen), the logic of searching for all H3 K28 mutations in HGG subtyping would convert this sample by default - how to handle this?
  4. +
+
+

So we will revise the molecular_subtype, integrated_diagnosis, short_histology, and broad_histology accordingly.

+ - +
compiled_df <- compiled_df %>%
   mutate(
     integrated_diagnosis = case_when(
-      sample_id == "7316-229" ~ "Ewing sarcoma",
-      TRUE ~ integrated_diagnosis
+      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymoma",
+      TRUE ~ integrated_diagnosis 
     ),
     short_histology = case_when(
-      sample_id == "7316-229" ~ "EWS",
+      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymoma",
       TRUE ~ short_histology
     ),
     broad_histology = case_when(
-      sample_id == "7316-229" ~ "Mesenchymal non-meningothelial tumor",
+      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymal tumor",
       TRUE ~ broad_histology
     ),
     molecular_subtype = case_when(
-      sample_id == "7316-229" ~ "EWS",
+      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "EPN, H3 K28",
       TRUE ~ molecular_subtype
-    ),
-    Notes = case_when(
-      sample_id == "7316-229" ~ "Updated via OpenPBTA subtyping",
-      TRUE ~ Notes
     )
   )
-
-

PT_KTRJ8TFY

-

This sample has one biospecimen which did not have the H3 variant captured in consensus calls. It has the variant present only in VarDict calls and in IGV, and expect this will be fixed with #819.

+
+

BS_H1XPVS9A

+

BS_H1XPVS9A is from a sample with a BRAF V600E mutation that is H3 wildtype. This sample was originally labeled as a HGG. In low-dimensional transcriptomic space (e.g., UMAP, t-SNE), BS_H1XPVS9A clusters with LGG samples. (See molecular-subtyping-HGG.)

+

The relevant note from above:

+
+
    +
  1. Pathology confirmed this HGG BRAF V600E mutant tumor, BS_H1XPVS9A to be a LGAT (PXA). I updated molecular_subtype here based on what it would look like, but this should come through via the LGAT subtyping ticket. How should we add this info?
  2. +
+
+

The steps for LGAT subtyping are to classify LGG samples on the basis of the presence/absence of BRAF fusions and BRAF V600E mutations.

- +
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_KTRJ8TFY")
+ filter(Kids_First_Biospecimen_ID == "BS_H1XPVS9A") %>% + select(Kids_First_Participant_ID, sample_id) %>% + left_join(compiled_df) - + +
Joining, by = c("Kids_First_Participant_ID", "sample_id")
+
- -
-
-

PT_X648RVMK

-

This sample was reviewed by pathology and confirmed to be ETMR, C19MC-altered. Will change here.

+

We know from molecular-subtyping-HGG that a BRAF V600E mutation is present. Now we have to check for the presence of BRAF fusions.

- -
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_X648RVMK")
+ +
fusions_df %>%
+  filter(str_detect(FusionName, "BRAF"),
+         Sample == "BS_H1XPVS9A")
-
- + +

There are no BRAF fusions in BS_H1XPVS9A, so we are able to classify this sample (7316-1106) as LGG, BRAF V600E.

+ - +
compiled_df <- compiled_df %>%
   mutate(
     integrated_diagnosis = case_when(
-      sample_id == "7316-238" ~ "Embryonal tumor with multilayer rosettes, C19MC-altered",
+      sample_id == "7316-1106" ~ "Low-grade astrocytic tumor",
       TRUE ~ integrated_diagnosis
     ),
     short_histology = case_when(
-      sample_id == "7316-238" ~ "ETMR",
+      sample_id == "7316-1106" ~ "LGAT",
       TRUE ~ short_histology
     ),
     broad_histology = case_when(
-      sample_id == "7316-238" ~ "Embryonal Tumor",
+      sample_id == "7316-1106" ~ "Low-grade astrocytic tumor",
       TRUE ~ broad_histology
     ),
     molecular_subtype = case_when(
-      sample_id == "7316-238" ~ "ETMR, C19MC-altered",
+      sample_id == "7316-1106" ~ "LGG, BRAF V600E",
       TRUE ~ molecular_subtype
     ),
     Notes = case_when(
-      sample_id == "7316-238" ~ "Updated via OpenPBTA subtyping",
+      sample_id == "7316-1106" ~ "Updated via OpenPBTA subtyping",
       TRUE ~ Notes
     )
   )
@@ -4016,56 +980,54 @@

PT_X648RVMK

-
-

PT_X71D7G5S

-

This sample was reviewed by pathology and confirmed to be H3 mutant. Will change here.

+
+

HGG BRAF V600E

+

The follow point comes from another issue #627 (comment):

+
+

We have a standalone BRAF V600E subtype, but this is HGG only, and should be labeled as such (will create an update HGG analysis ticket).

+
- +
compiled_df %>%
-  filter(Kids_First_Participant_ID == "PT_X71D7G5S")
+ filter(molecular_subtype == "BRAF V600E") -
- + +

Now that we have addressed the one sample that should have been reclassified as LGG, we are able to update molecular_subtype to HGG, BRAF V600E for the remaining samples.

+ - +
compiled_df <- compiled_df %>%
-  mutate(
-    integrated_diagnosis = case_when(
-      sample_id == "7316-1105" ~ "High-grade glioma/astrocytoma, H3 G35-mutant",
-      TRUE ~ integrated_diagnosis
-    ),
-    short_histology = case_when(
-      sample_id == "7316-1105" ~ "HGAT",
-      TRUE ~ short_histology
-    ),
-    broad_histology = case_when(
-      sample_id == "7316-1105" ~ "Diffuse astrocytic and oligodendroglial tumor",
-      TRUE ~ broad_histology
-    ),
-    molecular_subtype = case_when(
-      sample_id == "7316-1105" ~ "HGG, H3 G35",
-      TRUE ~ molecular_subtype
-    ),
-    Notes = case_when(sample_id == "7316-1105" ~ "Updated via OpenPBTA subtyping",
-      TRUE ~ Notes
-    )
-  )
+ mutate(molecular_subtype = case_when( + molecular_subtype == "BRAF V600E" ~ "HGG, BRAF V600E", + TRUE ~ molecular_subtype + ))
+
+

PT_AQWDQW27

+
+
    +
  1. PT_AQWDQW27 specimen was consistent with meningioma, even though it has a hallmark EPN fusion, so pathology would also call this a rare meningioma with a YAP1 fusion.
  2. +
+
+

The molecular-subtyping-EPN module has not been completed yet, but the logic that is in that module may mean that we need to include revising the labels of PT_AQWDQW27.

+
# TODO: do we need to update PT_AQWDQW27 once molecular-subtyping-EPN is 
+# complete?
+

Write revised table to file

- +
# sort first
 compiled_df <- compiled_df %>% 
   arrange(Kids_First_Participant_ID, sample_id)
@@ -4079,48 +1041,51 @@ 

Write revised table to file

Session Info

- +
sessionInfo()
- -
R version 3.6.0 (2019-04-26)
-Platform: x86_64-pc-linux-gnu (64-bit)
-Running under: Debian GNU/Linux 9 (stretch)
+
+
R version 3.5.1 (2018-07-02)
+Platform: x86_64-apple-darwin15.6.0 (64-bit)
+Running under: macOS Sierra 10.12.6
 
 Matrix products: default
-BLAS/LAPACK: /usr/lib/libopenblasp-r0.2.19.so
+BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
+LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
 
 locale:
- [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8       
- [4] LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
- [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C              
-[10] LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
+[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
 
 attached base packages:
 [1] stats     graphics  grDevices utils     datasets  methods   base     
 
 other attached packages:
-[1] forcats_0.4.0   stringr_1.4.0   dplyr_0.8.3     purrr_0.3.2     readr_1.3.1    
-[6] tidyr_0.8.3     tibble_2.1.3    ggplot2_3.2.0   tidyverse_1.2.1
+[1] forcats_0.5.0   stringr_1.4.0   dplyr_0.8.5     purrr_0.3.4    
+[5] readr_1.3.1     tidyr_1.0.2     tibble_3.0.0    ggplot2_3.3.0  
+[9] tidyverse_1.3.0
 
 loaded via a namespace (and not attached):
- [1] Rcpp_1.0.1       cellranger_1.1.0 pillar_1.4.2     compiler_3.6.0   base64enc_0.1-3 
- [6] tools_3.6.0      digest_0.6.20    evaluate_0.14    jsonlite_1.6     lubridate_1.7.4 
-[11] nlme_3.1-140     gtable_0.3.0     lattice_0.20-38  pkgconfig_2.0.2  rlang_0.4.0     
-[16] cli_1.1.0        rstudioapi_0.10  yaml_2.2.0       haven_2.1.1      xfun_0.8        
-[21] withr_2.1.2      xml2_1.2.0       httr_1.4.0       knitr_1.23       generics_0.0.2  
-[26] hms_0.4.2        rprojroot_1.3-2  grid_3.6.0       tidyselect_0.2.5 glue_1.3.1      
-[31] R6_2.4.0         readxl_1.3.1     rmarkdown_1.13   modelr_0.1.4     magrittr_1.5    
-[36] backports_1.1.4  scales_1.0.0     htmltools_0.3.6  rvest_0.3.4      assertthat_0.2.1
-[41] colorspace_1.4-1 stringi_1.4.3    lazyeval_0.2.2   munsell_0.5.0    broom_0.5.2     
-[46] crayon_1.3.4    
+ [1] tidyselect_1.0.0 xfun_0.12 haven_2.2.0 lattice_0.20-41 + [5] colorspace_1.4-1 vctrs_0.2.4 generics_0.0.2 htmltools_0.4.0 + [9] yaml_2.2.1 base64enc_0.1-3 rlang_0.4.6 pillar_1.4.3 +[13] withr_2.2.0 glue_1.4.0 DBI_1.1.0 dbplyr_1.4.2 +[17] modelr_0.1.6 readxl_1.3.1 lifecycle_0.2.0 munsell_0.5.0 +[21] gtable_0.3.0 cellranger_1.1.0 rvest_0.3.5 evaluate_0.14 +[25] knitr_1.28 fansi_0.4.1 broom_0.5.5 Rcpp_1.0.4 +[29] backports_1.1.6 scales_1.1.0 jsonlite_1.6.1 fs_1.3.1 +[33] hms_0.5.3 digest_0.6.25 stringi_1.4.6 rprojroot_1.3-2 +[37] grid_3.5.1 cli_2.0.2 tools_3.5.1 magrittr_1.5 +[41] crayon_1.3.4 pkgconfig_2.0.3 ellipsis_0.3.0 xml2_1.3.2 +[45] reprex_0.3.0 lubridate_1.7.8 rstudioapi_0.11 assertthat_0.2.1 +[49] rmarkdown_2.3 httr_1.4.1 R6_2.4.1 nlme_3.1-137 +[53] compiler_3.5.1
-
---
title: "Incorporating CHOP pathology input to molecular subtyping calls"
output: 
  html_notebook:
    toc: TRUE
    toc_float: TRUE
author: Jaclyn Taroni for ALSF CCDL, Jo Lynne Rokita for D3b
date: 2020
params:
  is_ci: FALSE
---

_Authorship above refers to this notebook **only**._

## Background

As part of this project, we have undertaken several analyses that use molecular data to subtype or reclassify biospecimens. 
In some cases, our analyses have resulted in an update of the `integrated_diagnosis` field included in the clinical file (`pbta-histologies.tsv` [[doc](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/master/doc/data-formats.md#data-caveats)]).
However, not all logic included in the code of the repository was ultimately in agreement with feedback from pathologists at CHOP.
In this notebook, we will make changes and document the cases where the logic in the modules of the repository deviate from the final labels.

## Pathology Feedback

Here, we're copying the table from [#606 (comment)](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/609#issuecomment-602821376) here:

| Kids_First_Biospecimen_ID | glioma_brain_region | sample_id | aliquot_id | Kids_First_Participant_ID | experimental_strategy | pathology_diagnosis                              | integrated_diagnosis                     | Notes from Path inspection                                                                  | Action                                                                                 | Given to Rita 3/13/20                                                                                      | short_histology | broad_histology                                                          | broad_composition | Notes                                                                 | cancer_predispositions | molecular_subtype   |
|---------------------------|---------------------|-----------|------------|---------------------------|-----------------------|--------------------------------------------------|------------------------------------------|---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|-----------------|--------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------|------------------------|---------------------|
| BS_C6ZZ3FY5               | hemispheric         | 7316-1105 | 470017     | PT_X71D7G5S               | RNA-Seq               | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | No path report available                                                                    | reviewed - OK with new dx                                                              | 1 slide, pathology report                                                                                  | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | HGG, H3 G35         |
| BS_P8WN8XEQ               | hemispheric         | 7316-1105 | 549712     | PT_X71D7G5S               | WGS                   | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | No path report available                                                                    | reviewed - OK with new dx                                                              | 1 slide, pathology report                                                                                  | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | HGG, H3 G35         |
| BS_23M72ABG               | NA                  | 7316-523  | 588343     | PT_AQWDQW27               | RNA-Seq               | Meningioma                                       | </s>Ependymoma</s> Meningioma                   | No path report available                                                                    | reviewed - keep as meningioma                                                          | 2 slides, pathology report; email request for radiology                                                    | tumor           | <s>Ependymal tumor</s>                                                          | tumor             |                                                                       | None documented        | <s>ST-EPN-YAP</s>          |
| BS_5JM573JC               | hemispheric         | 7316-2255 | 717166     | PT_6Q0NPVP3               | RNA-Seq               | Ganglioglioma                                    | Diffuse midline glioma                   | Path report ganglioglioma with extensive chondroid metaplasia                               | need to review radiology report; if not diffuse, ganglioglioma, H3 mut - cannot review | pathology report (no slides - from Pitt); email request for radiology - will not receive in the short term | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                         | None documented        | DMG, H3 K28         |
| BS_E5H6CFYT               | hemispheric         | 7316-2255 | 711134     | PT_6Q0NPVP3               | WGS                   | Ganglioglioma                                    | Diffuse midline glioma                   | Path report ganglioglioma with extensive chondroid metaplasia                               | need to review radiology report; if not diffuse, ganglioglioma, H3 mut - cannot review | pathology report (no slides - from Pitt); email request for radiology - will not receive in the short term | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                         | None documented        | DMG, H3 K28         |
| BS_BFDEZK1C               | other               | 7316-1102 | 549714     | PT_5BWZA0NT               | WGS                   | Supratentorial or Spinal Cord PNET               | Diffuse midline glioma                   | No path report available                                                                    | reviewed - OK with new dx                                                              | 3 slides, pathology report                                                                                 | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | DMG, H3 K28         |
| BS_MB7WN0ZB               | other               | 7316-1102 | 470031     | PT_5BWZA0NT               | RNA-Seq               | Supratentorial or Spinal Cord PNET               | Diffuse midline glioma                   | No path report available                                                                    | reviewed - OK with new dx                                                              | 3 slides, pathology report                                                                                 | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | DMG, H3 K28         |
| BS_H1XPVS9A               | hemispheric         | 7316-1106 | 549575     | PT_T8V9ES93               | RNA-Seq               | High-grade glioma/astrocytoma (WHO grade III/IV) | <s>High-grade glioma</s> Low-grade glioma       | No path report available                                                                    | reviewed - OK with new dx (PXA/LGG)                                                    | 2 slides, pathology report                                                                                 | HGAT            | <s>Diffuse astrocytic and oligodendroglial tumor</s> Low-grade astrocytic tumor | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | LGG, BRAF V600E          |
| BS_J4E9SW51               | hemispheric         | 7316-1106 | 549708     | PT_T8V9ES93               | WGS                   | High-grade glioma/astrocytoma (WHO grade III/IV) | <s>High-grade glioma</s> Low-grade glioma       | No path report available                                                                    | reviewed - OK with new dx (PXA/LGG)                                                    | 2 slides, pathology report                                                                                 | HGAT            | <s>Diffuse astrocytic and oligodendroglial tumor</s> Low-grade astrocytic tumor | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | LGG, BRAF V600E          |
| BS_J8VX4D17               | mixed               | 7316-506  | 588299     | PT_7E3V3JFX               | RNA-Seq               | Ependymoma                                       | <s>Diffuse midline glioma</s> Ependymoma        | Path report posterior fossa ependymoma                                                      | reviewed - keep EPN with H3 mut- rare                                                  |                                                                                                            | HGAT            | <s>Diffuse astrocytic and oligodendroglial tumor</s> Ependymal tumor            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | <s>DMG, H3 K28</s>         |
| BS_G16NM5WJ               | mixed               | 7316-506  | 588011     | PT_7E3V3JFX               | WGS                   | Ependymoma                                       | <s>Diffuse midline glioma</s> Ependymoma        | Path report posterior fossa ependymoma                                                      | reviewed - keep EPN with H3 mut- rare                                                  |                                                                                                            | HGAT            | <s>Diffuse astrocytic and oligodendroglial tumor</s> Ependymal tumor            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | <s>DMG, H3 K28</s>         |
| BS_KSKZ9J7J               | NA                  | 7316-765  | 470731     | PT_80NVYCBS               | RNA-Seq               | Ependymoma                                       | CNS Embryonal Tumor                      | Path report anaplastic ependymoma                                                           | reviewed - OK with new dx                                                              |                                                                                                            | Embryonal Tumor | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | CNS HGNET-MN1       |
| BS_K07KNTFY               | NA                  | 7316-272  | 588001     | PT_00G007DM               | WGS                   | Medulloblastoma                                  | Embryonal tumor with multilayer rosettes | Path report had "medulloblastoma" handwritten, but final dx ETMR with multilayer rosettes   | reviewed - OK with new dx                                                              |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_QWNBZ9RJ               | NA                  | 7316-272  | 588287     | PT_00G007DM               | RNA-Seq               | Medulloblastoma                                  | Embryonal tumor with multilayer rosettes | Path report had "medulloblastoma" handwritten, but final dx ETMR with multilayer rosettes   | reviewed - OK with new dx                                                              |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_P39SQPTS               | NA                  | 7316-447  | 588300     | PT_01MZ62KG               | RNA-Seq               | Pineoblastoma                                    | Embryonal tumor with multilayer rosettes | Path report PNET/pineoblastoma; "there are scattered true rosettes (Flexner-Wintersteiner)" | reviewed - OK with new dx                                                              |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_TE8QFF7T               | NA                  | 7316-447  | 588012     | PT_01MZ62KG               | WGS                   | Pineoblastoma                                    | Embryonal tumor with multilayer rosettes | Path report PNET/pineoblastoma; "there are scattered true rosettes (Flexner-Wintersteiner)" | reviewed - OK with new dx                                                              |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_N6N147BY               | NA                  | 7316-238  | 232096     | PT_X648RVMK               | RNA-Seq               | High-grade glioma/astrocytoma (WHO grade III/IV) | Embryonal tumor with multilayer rosettes | Path report handwritten "high grade glioma"; final dx infiltrating glioneuronal neoplasm    | reviewed - OK with new dx (ETMR original tumor 7316-2975 and this one consistent)      |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_5P1TN10Z               | hemispheric         | 7316-158  | 746061     | PT_9BZETM0M               | RNA-Seq               | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | Path report PNET                                                                            | reviewed - OK with new dx grade III-IV                                                 |                                                                                                            | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping | None documented        | HGG, H3 G35         |
| BS_STNH7YSX               | hemispheric         | 7316-158  | 746056     | PT_9BZETM0M               | WGS                   | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | Path report PNET                                                                            | reviewed - OK with new dx grade III-IV                                                 |                                                                                                            | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping | None documented        | HGG, H3 G35         |
| BS_TV5B86ZD               | hemispheric         | 7316-158  | 150593     | PT_9BZETM0M               | RNA-Seq               | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | Path report PNET                                                                            | reviewed - OK with new dx grade III-IV                                                 |                                                                                                            | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping | None documented        | HGG, H3 G35         |

And the notes:

> #### Few notes:
> 1. `PT_7E3V3JFX` specimens were consistent with the original EPN dx, so pathology would call this a rare EPN, H3 K28 mutated tumor, rather than DMG.
> 2. `PT_AQWDQW27` specimen was consistent with meningioma, even though it has a hallmark EPN fusion, so pathology would also call this a rare meningioma with a _YAP1_ fusion.
> 3. Because 1 is a rare tumor (maybe first seen), the logic of searching for all H3 K28 mutations in [HGG subtyping](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/249) would convert this sample by default.
>  4. Pathology confirmed this HGG BRAF V600E mutant tumor, [`BS_H1XPVS9A`](https://cbethell.github.io/open-pbta-output/09-HGG-with-braf-clustering.nb.html#identify_sample_that_clusters_with_lgat), to be a LGAT (PXA). I updated `molecular_subtype` here based on what it would look like, but this should come through via the LGAT [subtyping](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/631) ticket. How should we add this info?

## Set up 

```{r}
# This logic is the same as analyses/gene-set-enrichment-analysis/02-model-gsea.Rmd
# Assigning params$is_ci to running_in_ci avoids a locked binding error
running_in_ci <- params$is_ci

# Are we testing? In case of a non 0/1 number, we recast as logical, and then 
# ensure logical.
if (running_in_ci %in% c(0,1)) running_in_ci <- as.logical(running_in_ci)
if (!(is.logical(running_in_ci)))
{
  stop("\n\nERROR: The parameter `is_ci` should be FALSE/TRUE (or 0/1).")
}
```

```{r warning=FALSE}
library(tidyverse)
```

### Directory and files

#### Directories

```{r}
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
data_dir <- file.path(root_dir, "data")
results_dir <- "results"
```

#### Input

When we run this locally, we want to tie it to a specific version of the histologies file _prior_ to feedback we received from pathology that is recorded on [#609](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/609).

```{r}
if (running_in_ci) {
  histologies_file <- file.path(data_dir, "pbta-histologies.tsv")
} else {
  histologies_file <- file.path(data_dir, "release-v17-20200908",
                                "pbta-histologies.tsv")
}
```

```{r}
compiled_results_file <- file.path(results_dir, 
                                   "compiled_molecular_subtypes_with_clinical_feedback.tsv")
fusions_file <- file.path(data_dir,
                          "pbta-fusion-putative-oncogenic.tsv")
```

#### Output

```{r}
output_file <- file.path(results_dir,
                         "compiled_molecular_subtypes_with_clinical_pathology_feedback.tsv")
```

## Read in data

```{r message=FALSE}
histologies_df <- read_tsv(histologies_file, guess_max = 10000)
compiled_df <- read_tsv(compiled_results_file)
fusions_df <- read_tsv(fusions_file)
```

## Revise subtyping calls based on pathology feedback

### `PT_7E3V3JFX`

`PT_7E3V3JFX` was classified in `molecular-subtyping-HGG` as `DMG, H3 K28` because the first step in that module was to identify any samples in the cohort with "defining lesions:" H3 K28 or H3 G35 (original subtyping issue: [#249](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/249)).

```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_7E3V3JFX")
```

The relevant notes above are

> 1. `PT_7E3V3JFX` specimens were consistent with the original EPN dx, so pathology would call this a rare EPN, H3 K28 mutated tumor, rather than DMG.
> 3. Because 1 is a rare tumor (maybe first seen), the logic of searching for all H3 K28 mutations in [HGG subtyping](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/249) would convert this sample by default - how to handle this? 

So we will revise the `molecular_subtype`, `integrated_diagnosis`, `short_histology`, and `broad_histology` accordingly.

```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymoma",
      TRUE ~ integrated_diagnosis 
    ),
    short_histology = case_when(
      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymoma",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymal tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "EPN, H3 K28",
      TRUE ~ molecular_subtype
    ),
    Notes = case_when(
      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  )
```

### `BS_H1XPVS9A`

`BS_H1XPVS9A` is from a sample with a BRAF V600E mutation that is H3 wildtype.
This sample was originally labeled as a HGG.
In low-dimensional transcriptomic space (e.g., UMAP, t-SNE), `BS_H1XPVS9A` clusters with LGG samples.
(See `molecular-subtyping-HGG`.)

The relevant note from above:

>  4. Pathology confirmed this HGG BRAF V600E mutant tumor, `BS_H1XPVS9A`
to be a LGAT (PXA). I updated `molecular_subtype` here based on what it would look like, but this should come through via the LGAT [subtyping](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/631) ticket. How should we add this info?

The steps for LGAT subtyping are to classify LGG samples on the basis of the presence/absence of _BRAF_ fusions and BRAF V600E mutations.

```{r}
compiled_df %>%
  filter(Kids_First_Biospecimen_ID == "BS_H1XPVS9A") %>%
  select(Kids_First_Participant_ID, sample_id) %>%
  left_join(compiled_df)
```

We know from `molecular-subtyping-HGG` that a BRAF V600E mutation is present.
Now we have to check for the presence of _BRAF_ fusions.

```{r}
fusions_df %>%
  filter(str_detect(FusionName, "BRAF"),
         Sample == "BS_H1XPVS9A")
```

There are no _BRAF_ fusions in `BS_H1XPVS9A`, so we are able to classify this sample (`7316-1106`) as `LGG, BRAF V600E`.

```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      sample_id == "7316-1106" ~ "Low-grade astrocytic tumor",
      TRUE ~ integrated_diagnosis
    ),
    short_histology = case_when(
      sample_id == "7316-1106" ~ "LGAT",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      sample_id == "7316-1106" ~ "Low-grade astrocytic tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      sample_id == "7316-1106" ~ "LGG, BRAF V600E",
      TRUE ~ molecular_subtype
    ),
    Notes = case_when(
      sample_id == "7316-1106" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  )
```

### HGG BRAF V600E 

We will be removing this from subtyping, so this can be left for now

The follow point comes from another issue [#627 (comment)](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/627#issuecomment-598789232):

> We have a standalone BRAF V600E subtype, but this is HGG only, and should be labeled as such (will create an update HGG analysis ticket).

```{r}
compiled_df %>%
  filter(molecular_subtype == "BRAF V600E")
```

Now that we have addressed the one sample that should have been reclassified as LGG, we are able to update `molecular_subtype` to `HGG, BRAF V600E` for the remaining samples.

```{r}
compiled_df <- compiled_df %>%
  mutate(molecular_subtype = case_when(
    molecular_subtype == "BRAF V600E" ~ "HGG, BRAF V600E",
    TRUE ~ molecular_subtype
  ))
```

### `PT_AQWDQW27`

> 2. `PT_AQWDQW27` specimen was consistent with meningioma, even though it has a hallmark EPN fusion, so pathology would also call this a rare meningioma with a _YAP1_ fusion.

The `molecular-subtyping-EPN` module has not been completed yet, but the logic that is in that module may mean that we need to include revising the labels of `PT_AQWDQW27`.

```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_AQWDQW27")
# This sample is missing from the EPN table, but it should be there - will have to investigate and update this later.
```

### `PT_6Q0NPVP3`

The specimens for this patient, BS_5JM573JC and BS_E5H6CFYT, were classified as HGAT due to the presence of a histone mutation, but with the removal of LGAT from the HGAT module, this sample will no longer show up in two modules.
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_6Q0NPVP3")
```

### Are there any other duplicate subtypes?
```{r}
unique_subtypes <- compiled_df %>%
  select(Kids_First_Participant_ID, sample_id, molecular_subtype) %>%
  distinct()

unique_subtypes[duplicated(unique_subtypes$sample_id),]
#PT_KTRJ8TFY (fixed in clinical feedback) and PT_6Q0NPVP3 (fixed in HGG module removing LGAT)
```
### `PT_00G007DM`
7316-272 is subtyped as ETMR because a C19MC alteration was found and pathology review confirms the new diagnosis

```{r}
compiled_df <- compiled_df %>%
  mutate(molecular_subtype= case_when(sample_id == "7316-272"~"ETMR, C19MC-altered",
                                      TRUE ~ molecular_subtype),
         short_histology= case_when(sample_id == "7316-272"~"ETMR",
                                      TRUE ~ short_histology),
         integrated_diagnosis = case_when(sample_id == "7316-272"~"Embryonal tumor with multilayer rosettes, C19MC-altered",
                                      TRUE ~ integrated_diagnosis),
         Notes = case_when(sample_id == "7316-272" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  ) %>%
  unique()
```

This sample has multiple specimens with different diagnoses and subtypes and this is actually expected, as there are two different diagnoses in the CBTN database and the C19MC alteration was only found in one of the samples.
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_00G007DM")
```

### `PT_5BWZA0NT`

This sample was inspected by pathology and should be annotated as DMG, H3 K28, so let's update that.
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_5BWZA0NT")
```

```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      sample_id == "7316-1102" ~ "Diffuse midline glioma, H3 K28-mutant",
      TRUE ~ integrated_diagnosis
    ),
    short_histology = case_when(
      sample_id == "7316-1102" ~ "HGAT",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      sample_id == "7316-1102" ~ "Diffuse astrocytic and oligodendroglial tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      sample_id == "7316-1102" ~ "DMG, H3 K28",
      TRUE ~ molecular_subtype
    ),
    Notes = case_when(
      sample_id == "7316-1102" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  )
```

### `PT_80NVYCBS`

This sample was inspected by pathology and should be CNS HGNET-MN1, so we will update here.
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_80NVYCBS")
```
```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      sample_id == "7316-765" ~ "CNS Embryonal Tumor, HGNET-MN1",
      TRUE ~ integrated_diagnosis
    ),
    short_histology = case_when(
      sample_id == "7316-765" ~ "Embryonal Tumor",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      sample_id == "7316-765" ~ "Embryonal Tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      sample_id == "7316-765" ~ "CNS HGNET-MN1",
      TRUE ~ molecular_subtype
    ),
    Notes = case_when(
      sample_id == "7316-765" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  )
```

### `PT_9BZETM0M`

This sample was inspected by pathology and should be HGG, H3 G35, so we will update here.
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_9BZETM0M")
```
```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      sample_id == "7316-158" ~ "High-grade glioma/astrocytoma, H3 G35-mutant",
      TRUE ~ integrated_diagnosis
    ),
    short_histology = case_when(
      sample_id == "7316-158" ~ "HGAT",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      sample_id == "7316-158" ~ "Diffuse astrocytic and oligodendroglial tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      sample_id == "7316-158" ~ "HGG, H3 G35",
      TRUE ~ molecular_subtype
    ),
    Notes = case_when(
      sample_id == "7316-158" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  )
```

### `PT_C2D4JXS1`

This sample has one biospecimen which did not have the H3 variant captured in consensus calls - will inspect this later and possibly will be fixed with [#819](https://github.com/AlexsLemonade/OpenPBTA-analysis/pull/854).
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_C2D4JXS1")
```

### `PT_EHE800JJ`

This sample has a hallmark EWSR1-FLI1 fusion, so could be re-classified as EWS once it goes through pathology review. For now, we will change it here to EWS.
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_EHE800JJ")
```
```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      sample_id == "7316-229" ~ "Ewing sarcoma",
      TRUE ~ integrated_diagnosis
    ),
    short_histology = case_when(
      sample_id == "7316-229" ~ "EWS",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      sample_id == "7316-229" ~ "Mesenchymal non-meningothelial tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      sample_id == "7316-229" ~ "EWS",
      TRUE ~ molecular_subtype
    ),
    Notes = case_when(
      sample_id == "7316-229" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  )
```

### `PT_KTRJ8TFY`

This sample has one biospecimen which did not have the H3 variant captured in consensus calls. It has the variant present only in VarDict calls and in IGV, and expect this will be fixed with [#819](https://github.com/AlexsLemonade/OpenPBTA-analysis/pull/854).
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_KTRJ8TFY")
```

### `PT_X648RVMK`

This sample was reviewed by pathology and confirmed to be ETMR, C19MC-altered. Will change here.
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_X648RVMK")
```
```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      sample_id == "7316-238" ~ "Embryonal tumor with multilayer rosettes, C19MC-altered",
      TRUE ~ integrated_diagnosis
    ),
    short_histology = case_when(
      sample_id == "7316-238" ~ "ETMR",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      sample_id == "7316-238" ~ "Embryonal Tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      sample_id == "7316-238" ~ "ETMR, C19MC-altered",
      TRUE ~ molecular_subtype
    ),
    Notes = case_when(
      sample_id == "7316-238" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  )
```

### `PT_X71D7G5S`

This sample was reviewed by pathology and confirmed to be H3 mutant. Will change here.
```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_X71D7G5S")
```
```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      sample_id == "7316-1105" ~ "High-grade glioma/astrocytoma, H3 G35-mutant",
      TRUE ~ integrated_diagnosis
    ),
    short_histology = case_when(
      sample_id == "7316-1105" ~ "HGAT",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      sample_id == "7316-1105" ~ "Diffuse astrocytic and oligodendroglial tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      sample_id == "7316-1105" ~ "HGG, H3 G35",
      TRUE ~ molecular_subtype
    ),
    Notes = case_when(sample_id == "7316-1105" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  )
```

### Write revised table to file

```{r}
# sort first
compiled_df <- compiled_df %>% 
  arrange(Kids_First_Participant_ID, sample_id) %>%
  distinct() # to remove duplicates from above
write_tsv(compiled_df, output_file)
```

## Session Info

```{r}
sessionInfo()
```

+
---
title: "Incorporating CHOP pathology input to molecular subtyping calls"
output: 
  html_notebook:
    toc: TRUE
    toc_float: TRUE
author: Jaclyn Taroni for ALSF CCDL
date: 2020
params:
  is_ci: FALSE
---

_Authorship above refers to this notebook **only**._

## Background

As part of this project, we have undertaken several analyses that use molecular data to subtype or reclassify biospecimens. 
In some cases, our analyses have resulted in an update of the `integrated_diagnosis` field included in the clinical file (`pbta-histologies.tsv` [[doc](https://github.com/AlexsLemonade/OpenPBTA-analysis/blob/master/doc/data-formats.md#data-caveats)]).
However, not all logic included in the code of the repository was ultimately in agreement with feedback from pathologists at CHOP.
In this notebook, we will make changes and document the cases where the logic in the modules of the repository deviate from the final labels.

## Pathology Feedback

Here, we're copying the table from [#606 (comment)](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/609#issuecomment-602821376) here:

| Kids_First_Biospecimen_ID | glioma_brain_region | sample_id | aliquot_id | Kids_First_Participant_ID | experimental_strategy | pathology_diagnosis                              | integrated_diagnosis                     | Notes from Path inspection                                                                  | Action                                                                                 | Given to Rita 3/13/20                                                                                      | short_histology | broad_histology                                                          | broad_composition | Notes                                                                 | cancer_predispositions | molecular_subtype   |
|---------------------------|---------------------|-----------|------------|---------------------------|-----------------------|--------------------------------------------------|------------------------------------------|---------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------|-----------------|--------------------------------------------------------------------------|-------------------|-----------------------------------------------------------------------|------------------------|---------------------|
| BS_C6ZZ3FY5               | hemispheric         | 7316-1105 | 470017     | PT_X71D7G5S               | RNA-Seq               | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | No path report available                                                                    | reviewed - OK with new dx                                                              | 1 slide, pathology report                                                                                  | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | HGG, H3 G35         |
| BS_P8WN8XEQ               | hemispheric         | 7316-1105 | 549712     | PT_X71D7G5S               | WGS                   | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | No path report available                                                                    | reviewed - OK with new dx                                                              | 1 slide, pathology report                                                                                  | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | HGG, H3 G35         |
| BS_23M72ABG               | NA                  | 7316-523  | 588343     | PT_AQWDQW27               | RNA-Seq               | Meningioma                                       | </s>Ependymoma</s> Meningioma                   | No path report available                                                                    | reviewed - keep as meningioma                                                          | 2 slides, pathology report; email request for radiology                                                    | tumor           | <s>Ependymal tumor</s>                                                          | tumor             |                                                                       | None documented        | <s>ST-EPN-YAP</s>          |
| BS_5JM573JC               | hemispheric         | 7316-2255 | 717166     | PT_6Q0NPVP3               | RNA-Seq               | Ganglioglioma                                    | Diffuse midline glioma                   | Path report ganglioglioma with extensive chondroid metaplasia                               | need to review radiology report; if not diffuse, ganglioglioma, H3 mut - cannot review | pathology report (no slides - from Pitt); email request for radiology - will not receive in the short term | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                         | None documented        | DMG, H3 K28         |
| BS_E5H6CFYT               | hemispheric         | 7316-2255 | 711134     | PT_6Q0NPVP3               | WGS                   | Ganglioglioma                                    | Diffuse midline glioma                   | Path report ganglioglioma with extensive chondroid metaplasia                               | need to review radiology report; if not diffuse, ganglioglioma, H3 mut - cannot review | pathology report (no slides - from Pitt); email request for radiology - will not receive in the short term | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                         | None documented        | DMG, H3 K28         |
| BS_BFDEZK1C               | other               | 7316-1102 | 549714     | PT_5BWZA0NT               | WGS                   | Supratentorial or Spinal Cord PNET               | Diffuse midline glioma                   | No path report available                                                                    | reviewed - OK with new dx                                                              | 3 slides, pathology report                                                                                 | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | DMG, H3 K28         |
| BS_MB7WN0ZB               | other               | 7316-1102 | 470031     | PT_5BWZA0NT               | RNA-Seq               | Supratentorial or Spinal Cord PNET               | Diffuse midline glioma                   | No path report available                                                                    | reviewed - OK with new dx                                                              | 3 slides, pathology report                                                                                 | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | DMG, H3 K28         |
| BS_H1XPVS9A               | hemispheric         | 7316-1106 | 549575     | PT_T8V9ES93               | RNA-Seq               | High-grade glioma/astrocytoma (WHO grade III/IV) | <s>High-grade glioma</s> Low-grade glioma       | No path report available                                                                    | reviewed - OK with new dx (PXA/LGG)                                                    | 2 slides, pathology report                                                                                 | HGAT            | <s>Diffuse astrocytic and oligodendroglial tumor</s> Low-grade astrocytic tumor | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | LGG, BRAF V600E          |
| BS_J4E9SW51               | hemispheric         | 7316-1106 | 549708     | PT_T8V9ES93               | WGS                   | High-grade glioma/astrocytoma (WHO grade III/IV) | <s>High-grade glioma</s> Low-grade glioma       | No path report available                                                                    | reviewed - OK with new dx (PXA/LGG)                                                    | 2 slides, pathology report                                                                                 | HGAT            | <s>Diffuse astrocytic and oligodendroglial tumor</s> Low-grade astrocytic tumor | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | LGG, BRAF V600E          |
| BS_J8VX4D17               | mixed               | 7316-506  | 588299     | PT_7E3V3JFX               | RNA-Seq               | Ependymoma                                       | <s>Diffuse midline glioma</s> Ependymoma        | Path report posterior fossa ependymoma                                                      | reviewed - keep EPN with H3 mut- rare                                                  |                                                                                                            | HGAT            | <s>Diffuse astrocytic and oligodendroglial tumor</s> Ependymal tumor            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | <s>DMG, H3 K28</s>         |
| BS_G16NM5WJ               | mixed               | 7316-506  | 588011     | PT_7E3V3JFX               | WGS                   | Ependymoma                                       | <s>Diffuse midline glioma</s> Ependymoma        | Path report posterior fossa ependymoma                                                      | reviewed - keep EPN with H3 mut- rare                                                  |                                                                                                            | HGAT            | <s>Diffuse astrocytic and oligodendroglial tumor</s> Ependymal tumor            | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | <s>DMG, H3 K28</s>         |
| BS_KSKZ9J7J               | NA                  | 7316-765  | 470731     | PT_80NVYCBS               | RNA-Seq               | Ependymoma                                       | CNS Embryonal Tumor                      | Path report anaplastic ependymoma                                                           | reviewed - OK with new dx                                                              |                                                                                                            | Embryonal Tumor | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | CNS HGNET-MN1       |
| BS_K07KNTFY               | NA                  | 7316-272  | 588001     | PT_00G007DM               | WGS                   | Medulloblastoma                                  | Embryonal tumor with multilayer rosettes | Path report had "medulloblastoma" handwritten, but final dx ETMR with multilayer rosettes   | reviewed - OK with new dx                                                              |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_QWNBZ9RJ               | NA                  | 7316-272  | 588287     | PT_00G007DM               | RNA-Seq               | Medulloblastoma                                  | Embryonal tumor with multilayer rosettes | Path report had "medulloblastoma" handwritten, but final dx ETMR with multilayer rosettes   | reviewed - OK with new dx                                                              |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_P39SQPTS               | NA                  | 7316-447  | 588300     | PT_01MZ62KG               | RNA-Seq               | Pineoblastoma                                    | Embryonal tumor with multilayer rosettes | Path report PNET/pineoblastoma; "there are scattered true rosettes (Flexner-Wintersteiner)" | reviewed - OK with new dx                                                              |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_TE8QFF7T               | NA                  | 7316-447  | 588012     | PT_01MZ62KG               | WGS                   | Pineoblastoma                                    | Embryonal tumor with multilayer rosettes | Path report PNET/pineoblastoma; "there are scattered true rosettes (Flexner-Wintersteiner)" | reviewed - OK with new dx                                                              |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_N6N147BY               | NA                  | 7316-238  | 232096     | PT_X648RVMK               | RNA-Seq               | High-grade glioma/astrocytoma (WHO grade III/IV) | Embryonal tumor with multilayer rosettes | Path report handwritten "high grade glioma"; final dx infiltrating glioneuronal neoplasm    | reviewed - OK with new dx (ETMR original tumor 7316-2975 and this one consistent)      |                                                                                                            | ETMR            | Embryonal tumor                                                          | tumor             | Updated via OpenPBTA subtyping                                        | None documented        | ETMR, C19MC-altered |
| BS_5P1TN10Z               | hemispheric         | 7316-158  | 746061     | PT_9BZETM0M               | RNA-Seq               | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | Path report PNET                                                                            | reviewed - OK with new dx grade III-IV                                                 |                                                                                                            | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping | None documented        | HGG, H3 G35         |
| BS_STNH7YSX               | hemispheric         | 7316-158  | 746056     | PT_9BZETM0M               | WGS                   | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | Path report PNET                                                                            | reviewed - OK with new dx grade III-IV                                                 |                                                                                                            | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping | None documented        | HGG, H3 G35         |
| BS_TV5B86ZD               | hemispheric         | 7316-158  | 150593     | PT_9BZETM0M               | RNA-Seq               | Supratentorial or Spinal Cord PNET               | High-grade glioma                        | Path report PNET                                                                            | reviewed - OK with new dx grade III-IV                                                 |                                                                                                            | HGAT            | Diffuse astrocytic and oligodendroglial tumor                            | tumor             | from PNET to HGG due to H3F3A mutation;Updated via OpenPBTA subtyping | None documented        | HGG, H3 G35         |

And the notes:

> #### Few notes:
> 1. `PT_7E3V3JFX` specimens were consistent with the original EPN dx, so pathology would call this a rare EPN, H3 K28 mutated tumor, rather than DMG.
> 2. `PT_AQWDQW27` specimen was consistent with meningioma, even though it has a hallmark EPN fusion, so pathology would also call this a rare meningioma with a _YAP1_ fusion.
> 3. Because 1 is a rare tumor (maybe first seen), the logic of searching for all H3 K28 mutations in [HGG subtyping](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/249) would convert this sample by default - how to handle this? 
>  4. Pathology confirmed this HGG BRAF V600E mutant tumor, [`BS_H1XPVS9A`](https://cbethell.github.io/open-pbta-output/09-HGG-with-braf-clustering.nb.html#identify_sample_that_clusters_with_lgat), to be a LGAT (PXA). I updated `molecular_subtype` here based on what it would look like, but this should come through via the LGAT [subtyping](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/631) ticket. How should we add this info?

## Set up 

```{r}
# This logic is the same as analyses/gene-set-enrichment-analysis/02-model-gsea.Rmd
# Assigning params$is_ci to running_in_ci avoids a locked binding error
running_in_ci <- params$is_ci

# Are we testing? In case of a non 0/1 number, we recast as logical, and then 
# ensure logical.
if (running_in_ci %in% c(0,1)) running_in_ci <- as.logical(running_in_ci)
if (!(is.logical(running_in_ci)))
{
  stop("\n\nERROR: The parameter `is_ci` should be FALSE/TRUE (or 0/1).")
}
```

```{r warning=FALSE}
library(tidyverse)
```

### Directory and files

#### Directories

```{r}
root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
data_dir <- file.path(root_dir, "data")
results_dir <- "results"
```

#### Input

When we run this locally, we want to tie it to a specific version of the histologies file _prior_ to feedback we received from pathology that is recorded on [#609](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/609).

```{r}
if (running_in_ci) {
  histologies_file <- file.path(data_dir, "pbta-histologies.tsv")
} else {
  histologies_file <- file.path(data_dir, "release-v15-20200228",
                                "pbta-histologies.tsv")
}
```

```{r}
compiled_results_file <- file.path(results_dir, 
                                   "compiled_molecular_subtypes_with_clinical_feedback.tsv")
fusions_file <- file.path(data_dir,
                          "pbta-fusion-putative-oncogenic.tsv")
```

#### Output

```{r}
output_file <- file.path(results_dir,
                         "compiled_molecular_subtypes_with_clinical_pathology_feedback.tsv")
```

## Read in data

```{r message=FALSE}
histologies_df <- read_tsv(histologies_file, guess_max = 10000)
compiled_df <- read_tsv(compiled_results_file)
fusions_df <- read_tsv(fusions_file)
```

## Revise subtyping calls based on pathology feedback

### `PT_7E3V3JFX`

`PT_7E3V3JFX` was classified in `molecular-subtyping-HGG` as `DMG, H3 K28` because the first step in that module was to identify any samples in the cohort with "defining lesions:" H3 K28 or H3 G35 (original subtyping issue: [#249](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/249)).

```{r}
compiled_df %>%
  filter(Kids_First_Participant_ID == "PT_7E3V3JFX")
```

The relevant notes above are

> 1. `PT_7E3V3JFX` specimens were consistent with the original EPN dx, so pathology would call this a rare EPN, H3 K28 mutated tumor, rather than DMG.
> 3. Because 1 is a rare tumor (maybe first seen), the logic of searching for all H3 K28 mutations in [HGG subtyping](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/249) would convert this sample by default - how to handle this? 

So we will revise the `molecular_subtype`, `integrated_diagnosis`, `short_histology`, and `broad_histology` accordingly.

```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymoma",
      TRUE ~ integrated_diagnosis 
    ),
    short_histology = case_when(
      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymoma",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "Ependymal tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      Kids_First_Participant_ID == "PT_7E3V3JFX" ~ "EPN, H3 K28",
      TRUE ~ molecular_subtype
    )
  )
```

### `BS_H1XPVS9A`

`BS_H1XPVS9A` is from a sample with a BRAF V600E mutation that is H3 wildtype.
This sample was originally labeled as a HGG.
In low-dimensional transcriptomic space (e.g., UMAP, t-SNE), `BS_H1XPVS9A` clusters with LGG samples.
(See `molecular-subtyping-HGG`.)

The relevant note from above:

>  4. Pathology confirmed this HGG BRAF V600E mutant tumor, `BS_H1XPVS9A`
to be a LGAT (PXA). I updated `molecular_subtype` here based on what it would look like, but this should come through via the LGAT [subtyping](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/631) ticket. How should we add this info?

The steps for LGAT subtyping are to classify LGG samples on the basis of the presence/absence of _BRAF_ fusions and BRAF V600E mutations.

```{r}
compiled_df %>%
  filter(Kids_First_Biospecimen_ID == "BS_H1XPVS9A") %>%
  select(Kids_First_Participant_ID, sample_id) %>%
  left_join(compiled_df)
```

We know from `molecular-subtyping-HGG` that a BRAF V600E mutation is present.
Now we have to check for the presence of _BRAF_ fusions.

```{r}
fusions_df %>%
  filter(str_detect(FusionName, "BRAF"),
         Sample == "BS_H1XPVS9A")
```

There are no _BRAF_ fusions in `BS_H1XPVS9A`, so we are able to classify this sample (`7316-1106`) as `LGG, BRAF V600E`.

```{r}
compiled_df <- compiled_df %>%
  mutate(
    integrated_diagnosis = case_when(
      sample_id == "7316-1106" ~ "Low-grade astrocytic tumor",
      TRUE ~ integrated_diagnosis
    ),
    short_histology = case_when(
      sample_id == "7316-1106" ~ "LGAT",
      TRUE ~ short_histology
    ),
    broad_histology = case_when(
      sample_id == "7316-1106" ~ "Low-grade astrocytic tumor",
      TRUE ~ broad_histology
    ),
    molecular_subtype = case_when(
      sample_id == "7316-1106" ~ "LGG, BRAF V600E",
      TRUE ~ molecular_subtype
    ),
    Notes = case_when(
      sample_id == "7316-1106" ~ "Updated via OpenPBTA subtyping",
      TRUE ~ Notes
    )
  )
```

### HGG BRAF V600E

The follow point comes from another issue [#627 (comment)](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/627#issuecomment-598789232):

> We have a standalone BRAF V600E subtype, but this is HGG only, and should be labeled as such (will create an update HGG analysis ticket).

```{r}
compiled_df %>%
  filter(molecular_subtype == "BRAF V600E")
```

Now that we have addressed the one sample that should have been reclassified as LGG, we are able to update `molecular_subtype` to `HGG, BRAF V600E` for the remaining samples.

```{r}
compiled_df <- compiled_df %>%
  mutate(molecular_subtype = case_when(
    molecular_subtype == "BRAF V600E" ~ "HGG, BRAF V600E",
    TRUE ~ molecular_subtype
  ))
```

### `PT_AQWDQW27`

> 2. `PT_AQWDQW27` specimen was consistent with meningioma, even though it has a hallmark EPN fusion, so pathology would also call this a rare meningioma with a _YAP1_ fusion.

The `molecular-subtyping-EPN` module has not been completed yet, but the logic that is in that module may mean that we need to include revising the labels of `PT_AQWDQW27`.

```
# TODO: do we need to update PT_AQWDQW27 once molecular-subtyping-EPN is 
# complete?
```

### Write revised table to file

```{r}
# sort first
compiled_df <- compiled_df %>% 
  arrange(Kids_First_Participant_ID, sample_id)
write_tsv(compiled_df, output_file)
```

## Session Info

```{r}
sessionInfo()
```


@@ -4152,6 +1117,55 @@

Session Info

+ + + + + + + + +