Merge pull request #981 from allyhawkins/allyhawkins/explore-cell-sta…

…tes-ewings Explore expression of marker genes for tumor cell states in 2 samples
AlexsLemonade · Jan 14, 2025 · 40d6db1 · 40d6db1
2 parents a18421e + 7888eb9
commit 40d6db1
Show file tree

Hide file tree

Showing 8 changed files with 1,940 additions and 12 deletions.
diff --git a/analyses/cell-type-ewings/exploratory_analysis/06-tumor-cell-state-assignment.Rmd b/analyses/cell-type-ewings/exploratory_analysis/06-tumor-cell-state-assignment.Rmd
diff --git a/analyses/cell-type-ewings/exploratory_analysis/06-tumor-cell-state-assignment.html b/analyses/cell-type-ewings/exploratory_analysis/06-tumor-cell-state-assignment.html
diff --git a/analyses/cell-type-ewings/exploratory_analysis/README.md b/analyses/cell-type-ewings/exploratory_analysis/README.md
@@ -20,6 +20,9 @@ This includes comparing annotations to those obtained from marker gene annotatio
 5. `05-cluster-exploration.Rmd`: This notebook looks at clustering across different parameters to choose the most optimal clusters for `SCPCL000822` and `SCPCL000824`. 
 Additionally, we look at expression of marker genes from `references/visser-all-marker-genes.tsv` across all clusters and use that to refine tumor cell annotations obtained from running `aucell-singler-annotation.sh`. 
 
+6. `06-tumor-cell-state-assignments.Rmd`: This notebook looks at assigning tumor cell states in `SCPCL000822` and `SCPCL000824`. 
+Tumor cells are grouped into `EWS-FLI` high, `EWS-FLI` low, and `EWS-FLI` middle based on expression of marker genes. 
+
 ## Annotation notebooks
 
 The `annotation_notebooks` folder contains exploratory notebooks used to explore and validate annotations in individual samples. 
diff --git a/analyses/cell-type-ewings/plots/SCPCL000822_cluster-marker-gene-exp.png b/analyses/cell-type-ewings/plots/SCPCL000822_cluster-marker-gene-exp.png
diff --git a/analyses/cell-type-ewings/plots/SCPCL000824_cluster-marker-gene-exp.png b/analyses/cell-type-ewings/plots/SCPCL000824_cluster-marker-gene-exp.png
diff --git a/analyses/cell-type-ewings/references/README.md b/analyses/cell-type-ewings/references/README.md
@@ -54,7 +54,13 @@ Each library contains a folder with any annotations file used to run `InferCNV`
 
 The `tumor-cell-state-markers.tsv` file contains a list of marker genes that can be used to classify tumor cell states in Ewing samples. 
 The marker genes included here are specific to EWS-FLI1 high, EWS-FLI1 low, and proliferative tumor cells. 
-This list was obtained based on key genes mentioned in [Goodspeed _et al._](https://doi.org/10.1101/2024.01.18.576251), [Aynaud _et al._](https://doi.org/10.1016/j.celrep.2020.01.049), and [Wrenn _et al._](https://doi.org/10.1158/1078-0432.CCR-23-1111). 
+This list was obtained based on key genes mentioned in the following publications: 
+
+- [Goodspeed _et al._](https://doi.org/10.1101/2024.01.18.576251)
+- [Aynaud _et al._](https://doi.org/10.1016/j.celrep.2020.01.049)
+- [Wrenn _et al._](https://doi.org/10.1158/1078-0432.CCR-23-1111)
+- [Franzetti _et al._](https://doi.org/10.1038/onc.2016.498)
+- [Riggi _et al._](https://doi.org/10.1016/j.ccell.2014.10.004)
 
 ### Gene signatures 
 

diff --git a/analyses/cell-type-ewings/references/tumor-cell-state-markers.tsv b/analyses/cell-type-ewings/references/tumor-cell-state-markers.tsv
@@ -1,6 +1,7 @@
 cell_state	gene_symbol	ensembl_gene_id	source
 proliferative	MKI67	ENSG00000148773	https://doi.org/10.1101/2024.01.18.576251;https://doi.org/10.1158/1078-0432.CCR-23-1111
 proliferative	PCNA	ENSG00000132646	https://doi.org/10.1101/2024.01.18.576251
+proliferative	TOP2A	ENSG00000131747	https://doi.org/10.1101/2024.01.18.576251
 EWS-low	NT5E	ENSG00000135318	https://doi.org/10.1158/1078-0432.CCR-23-1111
 EWS-low	IGFBP3	ENSG00000146674	https://doi.org/10.1016/j.celrep.2020.01.049
 EWS-low	IL8	ENSG00000169429	https://doi.org/10.1016/j.celrep.2020.01.049
@@ -13,7 +14,9 @@ EWS-low	S100A10	ENSG00000197747	https://doi.org/10.1158/1078-0432.CCR-23-1111
 EWS-low	SPARC	ENSG00000113140	https://doi.org/10.1158/1078-0432.CCR-23-1111
 EWS-low	COL1A2	ENSG00000164692	https://doi.org/10.1158/1078-0432.CCR-23-1111
 EWS-low	CD44	ENSG00000026508	https://doi.org/10.1101/2024.01.18.576251
+EWS-low	ICAM1	ENSG00000090339	https://doi.org/10.1038/onc.2016.498
 EWS-high	PRKCB	ENSG00000166501	https://doi.org/10.1016/j.celrep.2020.01.049
 EWS-high	LIPI	ENSG00000188992	https://doi.org/10.1016/j.celrep.2020.01.049
 EWS-high	CCND1	ENSG00000110092	https://doi.org/10.1016/j.celrep.2020.01.049
 EWS-high	NR0B1	ENSG00000169297	https://doi.org/10.1016/j.celrep.2020.01.049
+EWS-high	VRK1	ENSG00000100749	https://doi.org/10.1016/j.ccell.2014.10.004
diff --git a/analyses/cell-type-ewings/scripts/utils/tumor-validation-helpers.R b/analyses/cell-type-ewings/scripts/utils/tumor-validation-helpers.R
@@ -123,10 +123,11 @@ create_marker_gene_df <- function(
 # output is a data frame with barcodes and `{cell_type}_sum`
 calculate_sum_markers <- function(marker_genes_df,
                                   sce,
-                                  type) {
+                                  type, 
+                                  cell_type_column = cell_type) {
   # get list of marker genes to use
   marker_genes <- marker_genes_df |>
-    dplyr::filter(cell_type == type) |>
+    dplyr::filter({{cell_type_column}} == type) |>
     dplyr::pull(ensembl_gene_id)
 
   # get the gene expression counts for all marker genes
@@ -153,6 +154,42 @@ calculate_sum_markers <- function(marker_genes_df,
 }
 
 
+# calculate the mean of expression for all markers in a given cell type
+# takes as input the marker gene df with `type` and `ensembl_gene_id` as columns
+# For any genes that are in the specified `type`, sum of the logcounts is calculated
+# output is a data frame with barcodes and `{type}_sum`
+calculate_mean_markers <- function(marker_genes_df,
+                                   sce,
+                                   type, 
+                                   cell_type_column = cell_type) {
+  # get list of marker genes to use
+  marker_genes <- marker_genes_df |>
+    dplyr::filter({{cell_type_column}} == type) |>
+    dplyr::pull(ensembl_gene_id)
+
+  # get the gene expression counts for all marker genes
+  mean_exp <- logcounts(sce[marker_genes, ]) |>
+    as.matrix() |>
+    t() |>
+    rowMeans()
+
+  df <- data.frame(
+    barcodes = names(mean_exp),
+    mean_exp = mean_exp
+  )
+
+  # get rid of extra " cells" at end of some of the names
+  type <- stringr::str_remove(type, " cells")
+
+  colnames(df) <- c(
+    "barcodes",
+    # add ref name to colnames for easier joining
+    glue::glue("{type}_mean")
+  )
+
+  return(df)
+}
+
 
 
 # Heatmaps ---------------------------------------------------------------------
@@ -163,7 +200,8 @@ plot_gene_heatmap <- function(
     df,
     row_title = "",
     legend_title = "",
-    annotation = NULL) {
+    annotation = NULL,
+    cluster_columns = TRUE) {
   # plot heatmap of marker genes
   heatmap <- ComplexHeatmap::Heatmap(
     df,
@@ -179,7 +217,7 @@ plot_gene_heatmap <- function(
     row_dend_side = "right",
     row_names_gp = grid::gpar(fontsize = 10),
     ## Column parameters
-    cluster_columns = TRUE,
+    cluster_columns = cluster_columns,
     show_column_names = FALSE,
     bottom_annotation = annotation,
     heatmap_legend_param = list(
@@ -239,7 +277,8 @@ plot_cnv_heatmap <- function(
 # colors are automatically determined using the `Dark2` palette and assigning to cell types listed in the `annotation_column`
 full_celltype_heatmap <- function(classification_df,
                                   gene_exp_columns,
-                                  annotation_column) {
+                                  annotation_column,
+                                  cluster_columns = TRUE) {
   # get list of all cell types being plotted and assign colors from Dark2 palette
   cell_types <- unique(classification_df[[annotation_column]])
   num_cell_types <- length(cell_types)
@@ -249,25 +288,26 @@ full_celltype_heatmap <- function(classification_df,
 
   # create annotation for heatmap
   annotation <- ComplexHeatmap::columnAnnotation(
-    singler = classification_df[[annotation_column]],
+    annotation = classification_df[[annotation_column]],
     col = list(
-      singler = colors
+      annotation = colors
     )
   )
 
   # build matrix for heatmap cells x gene set sum or mean
   heatmap_mtx <- classification_df |>
-    dplyr::select(barcodes, gene_exp_columns) |>
+    dplyr::select(barcodes, all_of(gene_exp_columns)) |>
     tibble::column_to_rownames("barcodes") |>
     as.matrix() |>
     t()
-  rownames(heatmap_mtx) <- stringr::str_remove(rownames(heatmap_mtx), "_sum|mean-")
+  rownames(heatmap_mtx) <- stringr::str_remove(rownames(heatmap_mtx), "_sum|mean-|_mean")
 
   # plot heatmap of marker genes
   plot_gene_heatmap(heatmap_mtx,
     row_title = "",
     legend_title = "Marker gene \nexpression",
-    annotation = annotation
+    annotation = annotation,
+    cluster_columns = cluster_columns
   )
 }
 
@@ -293,7 +333,7 @@ plot_density <- function(classification_df,
     ) +
     labs(
       x = "Gene set expression",
-      y = "Cell type annotation",
+      y = annotation_column,
       title = geneset_name
     ) +
     scale_alpha_identity() +