|
| 1 | +library(ggrepel) |
| 2 | + |
| 3 | +# Compute the "least extreme" (l.e.) effect differences for a |
| 4 | +# non-negative effects matrix. |
| 5 | +compute_le_diff <- function (effects_matrix, |
| 6 | + compare_dims = seq(1,ncol(effects_matrix))) { |
| 7 | + m <- ncol(effects_matrix) |
| 8 | + out <- effects_matrix |
| 9 | + for (i in 1:m) { |
| 10 | + dims <- setdiff(compare_dims,i) |
| 11 | + out[,i] <- effects_matrix[,i] - apply(effects_matrix[,dims],1,max) |
| 12 | + } |
| 13 | + return(out) |
| 14 | +} |
| 15 | + |
| 16 | +# This creates a "distinctive genes plot"; this is a plot in which the |
| 17 | +# effect estimate is shown on the x axis and the "least extreme" |
| 18 | +# difference between the estimated effects is shown on the y axis. The |
| 19 | +# idea is that these scatterplots should better highlight the |
| 20 | +# "interesting" genes for a given dimension/factor. The "label_gene" |
| 21 | +# input argument is a function that returns TRUE when the gene should |
| 22 | +# be labeled in the plot; the default is that it always returns FALSE |
| 23 | +# (so that no genes are labeled in the plot). |
| 24 | +distinctive_genes_scatterplot <- function (effects_matrix, k, |
| 25 | + effect_quantile_prob = 0.999, |
| 26 | + lediff_quantile_prob = 0.999) { |
| 27 | + lediff <- compute_le_diff(effects_matrix) |
| 28 | + genes <- rownames(effects_matrix) |
| 29 | + pdat <- data.frame(gene = genes, |
| 30 | + effect = effects_matrix[,k], |
| 31 | + lediff = lediff[,k]) |
| 32 | + effect_quantile <- quantile(pdat$effect,effect_quantile_prob) |
| 33 | + lediff_quantile <- quantile(pdat$lediff,lediff_quantile_prob) |
| 34 | + i <- which(pdat$effect < effect_quantile & pdat$lediff < lediff_quantile) |
| 35 | + pdat[i,"gene"] <- NA |
| 36 | + return(ggplot(pdat,aes(x = effect,y = lediff,label = gene)) + |
| 37 | + geom_point(color = "dodgerblue") + |
| 38 | + geom_hline(yintercept = 0,color = "magenta",linetype = "dotted", |
| 39 | + linewidth = 0.5) + |
| 40 | + geom_text_repel(color = "black",size = 2.25, |
| 41 | + fontface = "italic",segment.color = "black", |
| 42 | + segment.size = 0.25,min.segment.length = 0, |
| 43 | + max.overlaps = Inf,na.rm = TRUE) + |
| 44 | + labs(x = "log-fold change",y = "l.e. difference") + |
| 45 | + theme_cowplot(font_size = 10)) |
| 46 | +} |
| 47 | + |
| 48 | +F <- fl_nmf_ldf$F |
| 49 | +colnames(F) <- paste0("k",1:9) |
| 50 | +kset <- paste0("k",4:6) |
| 51 | +p1 <- distinctive_genes_scatterplot(F[,kset],"k4") + ggtitle("factor k4") |
| 52 | +p2 <- distinctive_genes_scatterplot(F[,kset],"k5") + ggtitle("factor k5") |
| 53 | +p3 <- distinctive_genes_scatterplot(F[,kset],"k6") + ggtitle("factor k6") |
| 54 | +print(plot_grid(p1,p2,p3,nrow = 1,ncol = 3)) |
0 commit comments