diff --git a/.github/workflows/oma_nix_dev.yml b/.github/workflows/oma_nix_dev.yml deleted file mode 100644 index 18c4e4c56..000000000 --- a/.github/workflows/oma_nix_dev.yml +++ /dev/null @@ -1,31 +0,0 @@ -name: "Update cachix cache for Orchestrating Microbiome Analysis environment" - -on: - push: - branches: - - devel - - RELEASE_** -jobs: - r-update-cachix: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: cachix/install-nix-action@v25 - with: - nix_path: nixpkgs=https://github.com/rstats-on-nix/nixpkgs/archive/0a6b0ea0f895208a490ec7fb3fe63232117511b7.tar.gz - - - uses: cachix/cachix-action@v14 - with: - name: oma - authToken: '${{ secrets.CACHIX_AUTH }}' - - - run: | - if [ "$RUNNER_OS" == "Linux" ]; then - nix-build --argstr system x86_64-linux - nix-build --argstr system aarch64-linux - else - nix-build - fi - - - run: nix-store -qR --include-outputs $(nix-instantiate default.nix) | cachix push oma - - run: nix-shell --run "echo OK" diff --git a/.gitignore b/.gitignore index 23d1ae828..d15a3028d 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,6 @@ renv.lock cachix/.envrc cachix/.devenv cachix/.direnv + +# Nix stuff +*.nix diff --git a/DESCRIPTION b/DESCRIPTION index 8ed2a338c..1643b26cc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,19 +1,14 @@ Package: OMA Title: Orchestrating Microbiome Analysis with Bioconductor -Version: 0.98.29 +Version: 0.98.30 Date: 2024-10-04 Authors@R: - c(person("Leo", "Lahti", role = c("aut"), - comment = c(ORCID = "0000-0001-5537-637X")), - person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), - email = "tuomas.v.borman@utu.fi", - comment = c(ORCID = "0000-0002-8563-8884")), - person("Felix GM", "Ernst", email = "felix.gm.ernst@outlook.com", - role = c("aut"), - comment = c(ORCID = "0000-0001-5064-0928")), - person("and others", "(see the full list of contributors)", - role = c("ctb")) - ) + c( + person(given = "Tuomas", family = "Borman", role = c("aut", "cre"), email = "tuomas.v.borman@utu.fi", comment = c(ORCID = "0000-0002-8563-8884")), + person("Leo", "Lahti", role = c("aut"), comment = c(ORCID = "0000-0001-5537-637X")), + person("Felix GM", "Ernst", email = "felix.gm.ernst@outlook.com", role = c("aut"), comment = c(ORCID = "0000-0001-5064-0928")), + person("and others", "(see the full list of contributors)", role = c("ctb")) + ) Description: This is a reference cookbook for **Microbiome Data Science** with R and Bioconductor. @@ -43,6 +38,7 @@ Suggests: cobiclust, ComplexHeatmap, corpcor, + cowplot, curatedMetagenomicData, dada2, dendextend, @@ -51,16 +47,16 @@ Suggests: dplyr, DT, factoextra, - forcats, fido, + forcats, ggplot2, ggpubr, ggtree, glmnet, glue, grid, - gtools, gsEasy, + gtools, igraph, IntegratedLearner, knitr, @@ -75,12 +71,13 @@ Suggests: MMUPHin, MOFA2, multiview, - NetCoMi, NbClust, + NetCoMi, NMF, patchwork, phyloseq, plotly, + plotROC, purrr, qgraph, RColorBrewer, @@ -88,9 +85,9 @@ Suggests: reshape2, reticulate, rgl, - ROCR, scales, scater, + scuttle, sechm, sessioninfo, shadowtext, @@ -98,6 +95,7 @@ Suggests: SPRING, stats, stringr, + SuperLearner, tidyverse, topGO, vegan, @@ -112,7 +110,7 @@ Remotes: github::GraceYoon/SPRING, github::himelmallick/IntegratedLearner VignetteBuilder: knitr -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 BiocType: Book BiocBookTemplate: 1.0.5 SystemRequirements: quarto diff --git a/PackageInstallations_Troubleshoots.qmd b/PackageInstallations_Troubleshoots.qmd index 32cdc6b3d..0631f51ad 100644 --- a/PackageInstallations_Troubleshoots.qmd +++ b/PackageInstallations_Troubleshoots.qmd @@ -16,7 +16,7 @@ First of all, please ensure that you have an up-to-date version of R ## Mac M1 user When attempting to install miaverse packages, you may encounter -installation failures related to the 'scuttle' and 'scatter' +installation failures related to the 'scuttle' and 'scater' dependencies, which require a gcc compiler for installation. The error message might resemble the following: diff --git a/default.nix b/default.nix deleted file mode 100644 index 5892efe9e..000000000 --- a/default.nix +++ /dev/null @@ -1,276 +0,0 @@ -# Use a specific version of nixpkgs from an bleeding-edge fork of github.com/NixOS/nixpkgs -let - pkgs = - import - (fetchTarball "https://github.com/rstats-on-nix/nixpkgs/archive/0a6b0ea0f895208a490ec7fb3fe63232117511b7.tar.gz") - { }; - # Add generic R packages required for this build - rpkgs = with pkgs.rPackages; [ - BiocManager - BiocBook - ]; - - # Build mia package - mia = [ - (pkgs.rPackages.buildRPackage { - name = "mia"; - src = pkgs.fetchgit { - url = "https://github.com/microbiome/mia"; - branchName = "devel"; - rev = "b627edce620807af20d3ed85c6667f4b8ef8f2ea"; - sha256 = "sha256-VJVQfl7LQzooc1NHgu0qAstPFiC9d/8hH23KODV1V0Y="; - }; - # mia dependencies (see DESCRIPTION) - propagatedBuildInputs = builtins.attrValues { - inherit (pkgs.rPackages) - ape - BiocGenerics - BiocParallel - Biostrings - bluster - DECIPHER - decontam - DelayedArray - DelayedMatrixStats - DirichletMultinomial - dplyr - IRanges - MASS - MatrixGenerics - mediation - MultiAssayExperiment - rlang - S4Vectors - scater - scuttle - SingleCellExperiment - SummarizedExperiment - tibble - tidyr - TreeSummarizedExperiment - vegan - ; - }; - }) - ]; - - # Build miaTime package - miatime = [ - (pkgs.rPackages.buildRPackage { - name = "miaTime"; - src = pkgs.fetchgit { - url = "https://github.com/microbiome/miaTime"; - branchName = "master"; - rev = "9fe9771f7329fc991796eb79cc1e17ee06e1bc24"; - sha256 = "sha256-IL9CbL0HWKlpmMHq1Rxen7+utzpw4qfb4NuVvM0N0oA="; - }; - # miaTime dependencies (see DESCRIPTION) - propagatedBuildInputs = - builtins.attrValues { - inherit (pkgs.rPackages) - dplyr - S4Vectors - SummarizedExperiment - SingleCellExperiment - vegan - ; - } - ++ [ mia ]; - }) - ]; - - # Build SpiecEasi package - spieceasi = [ - (pkgs.rPackages.buildRPackage { - name = "SpiecEasi"; - src = pkgs.fetchgit { - url = "https://github.com/zdk123/SpiecEasi"; - branchName = "master"; - rev = "5f396da85baa114b31c13d9744c05387a1b04c23"; - sha256 = "sha256-Z3x7hK2ieLxjQVn94DCPJCDP86TK+k5no4/e5jb8ihg="; - }; - # SpiecEasi dependencies (see DESCRIPTION) - propagatedBuildInputs = builtins.attrValues { - inherit (pkgs.rPackages) - huge - pulsar - MASS - VGAM - Matrix - glmnet - RcppArmadillo - ; - }; - }) - ]; - - # Build SPRING package - spring = [ - (pkgs.rPackages.buildRPackage { - name = "SPRING"; - src = pkgs.fetchgit { - url = "https://github.com/GraceYoon/SPRING"; - branchName = "master"; - rev = "3d641a4b939b1b3cc042c064a05000aa48266af0"; - sha256 = "sha256-H1kEy5dPPjiUPFiQLFzbdsO5t204NSCPnQfqPQitMTs="; - }; - # SPRING dependencies (see DESCRIPTION) - propagatedBuildInputs = - builtins.attrValues { - inherit (pkgs.rPackages) - mixedCCA - huge - pulsar - rootSolve - mvtnorm - ; - } - ++ [ spieceasi ]; - }) - ]; - - # Build NetCoMi package - netcomi = [ - (pkgs.rPackages.buildRPackage { - name = "NetCoMi"; - src = pkgs.fetchgit { - url = "https://github.com/stefpeschel/NetCoMi"; - branchName = "main"; - rev = "0809c7a5e0f1e74cb9023fbf1186d477739cc6f7"; - sha256 = "sha256-X+isckPsojo2rfaICXXmydN1AcT1IzOJaMqGEe9CIxE="; - }; - # NetCoMi dependencies (see DESCRIPTION) - propagatedBuildInputs = - builtins.attrValues { - inherit (pkgs.rPackages) - Biobase - corrplot - doSNOW - fdrtool - filematrix - foreach - gtools - huge - igraph - MASS - Matrix - mixedCCA - orca - phyloseq - pulsar - qgraph - RColorBrewer - Rdpack - rlang - vegan - WGCNA - ; - } - ++ [ spring ]; - }) - ]; - - # Build OMA book/package - oma = [ - (pkgs.rPackages.buildRPackage { - name = "oma"; - src = pkgs.fetchgit { - url = "https://github.com/microbiome/OMA"; - branchName = "devel"; - rev = "67dd77ef36f7b90c416ef98ce5c8f2086f64fbdb"; - sha256 = "sha256-U4fHmVKvoCq6YNeSgoT1eY1ZD6AiX+xiAXtq9pQl5Ak="; - }; - # oma dependencies - propagatedBuildInputs = - builtins.attrValues { - inherit (pkgs.rPackages) - rebook - glue - sessioninfo - microbiomeDataSets - curatedMetagenomicData - microbiome - ggsignif - SummarizedExperiment - TreeSummarizedExperiment - kableExtra - dendextend - NbClust - randomcoloR - cobiclust - biclust - tidyverse - ALDEx2 - ANCOMBC - Maaslin2 - MicrobiomeStat - GUniFrac - devtools - ComplexHeatmap - mikropml - MLeval - sechm - ggpubr - fido - rgl - miaViz - SuperLearner - multiview - MMUPHin - gsEasy - topGO - ; - } - ++ [ - miatime - spieceasi - ]; - }) - ]; - - # System dependencies - system_packages = builtins.attrValues { inherit (pkgs) R glibcLocales quarto; }; - - # R wrapper for nix - R = pkgs.rWrapper.override { - packages = [ - rpkgs - miatime - mia - spieceasi - spring - netcomi - oma - ]; - }; - - # RStudio wrapper for nix - rstudio_pkgs = pkgs.rstudioWrapper.override { - packages = [ - rpkgs - miatime - mia - spieceasi - spring - netcomi - oma - ]; - }; -in -# Build R environment -pkgs.mkShell { - LOCALE_ARCHIVE = - if pkgs.system == "x86_64-linux" then "${pkgs.glibcLocalesUtf8}/lib/locale/locale-archive" else ""; - LANG = "en_US.UTF-8"; - LC_ALL = "en_US.UTF-8"; - LC_TIME = "en_US.UTF-8"; - LC_MONETARY = "en_US.UTF-8"; - LC_PAPER = "en_US.UTF-8"; - LC_MEASUREMENT = "en_US.UTF-8"; - - buildInputs = [ - R - rstudio_pkgs - system_packages - ]; -} diff --git a/inst/pages/integrated_learner.qmd b/inst/pages/integrated_learner.qmd index 734e3c701..5d1579175 100644 --- a/inst/pages/integrated_learner.qmd +++ b/inst/pages/integrated_learner.qmd @@ -196,7 +196,7 @@ provides us with the overall importance of each feature in the final model. ```{r} #| label: feat_importance -library(ggplot2) +library(miaViz) # Get individual models models <- fit$model_fits$model_layers @@ -208,28 +208,18 @@ importances <- lapply(seq_len(length(models)), function(i){ temp <- temp * fit$weights[[i]] return(temp) }) -# Combine and order to most important features +# Combine the feature importances importances <- do.call(rbind, importances) -importances <- importances[ - order(importances, decreasing = TRUE), , drop = FALSE] -# Add features to column -importances <- importances |> as.data.frame() -importances[["Feature"]] <- factor( - rownames(importances), levels = rownames(importances)) -# Convert to 0-1 scale -importances[[1]] <- importances[[1]] / sum(importances[[1]]) -# Get top 20 importances -top_n <- 20 -importances <- importances[ seq_len(top_n), ] - -# Plot as a bar plot -p <- ggplot(importances, aes(x = MeanDecreaseGini, y = Feature)) + - geom_bar(stat = "identity") + +# Plot feature importances +p <- plotLoadings(importances, ncomponents = 1, n = 20, show.color = FALSE) p ``` -From the plot, we can observe that _`r importances[1, "Feature"]`_ and -_`r importances[1, "Feature"]`_ appear to have the greatest predictive power +From the plot, we can observe that +_`r rownames(importances)[order(importances, decreasing = TRUE)][[1]]`_ and +_`r rownames(importances)[order(importances, decreasing = TRUE)][[2]]`_ appear +to have the greatest predictive power among all the features in determining the outcome. However, the predictive power appears to be fairly evenly distributed across all features. diff --git a/inst/pages/machine_learning.qmd b/inst/pages/machine_learning.qmd index 3ffed514d..846299437 100644 --- a/inst/pages/machine_learning.qmd +++ b/inst/pages/machine_learning.qmd @@ -196,7 +196,8 @@ model <- train( tuneGrid = tune_grid, trControl = train_control, weights = class_weights, - max_delta_step = 1 + max_delta_step = 1, + verbosity = 0 ) # Get predictions @@ -211,41 +212,37 @@ technique for binary classification problems. ```{r} #| label: ROC -library(ROCR) +library(plotROC) -# Get positive class -pos_class <-levels(res[["obs"]])[[1]] -# Create ROC plot -pred <- prediction(res[[pos_class]], ifelse(res[["obs"]] == pos_class, 1, 0)) -perf <- performance(pred, measure = "tpr", x.measure = "fpr") -p <- plot(perf) +# Prepare data for ROC +roc_data <- data.frame( + observed_class = as.numeric(res[["obs"]] == "healthy"), + predicted_probability = res[["healthy"]] + ) + +# Plot ROC curve +p <- ggplot(roc_data, aes(m = predicted_probability, d = observed_class)) + + geom_roc() + + style_roc(theme = theme_minimal()) p ``` -XGBoost model returns also feature importances that can be visualized with bar +XGBoost model also returns feature significance that can be visualized with bar plot. ```{r} #| label: xgboost_feat library(xgboost) +library(miaViz) + +# Get feature importance and convert to matrix +df <- xgb.importance(model = model$finalModel) |> as.data.frame() +rownames(df) <- df[["Feature"]] +df <- as.matrix(df[, "Gain", drop = FALSE]) -# Get feature importance -df <- xgb.importance(model = model$finalModel) -# Take top 20 features -df <- df[seq_len(20), ] -# Factorize to preserve order -df[["Feature"]] <- factor(df[["Feature"]], levels = df[["Feature"]]) -# Round values, add percentage symbol -df[["Percentage"]] <- paste0(round(df[["Gain"]], 3)*100, "%") - -# Create a plot -p <- ggplot(df, aes(x = Feature, y = Gain)) + - geom_bar(stat = "identity") + - geom_text(aes(label = Percentage), hjust = -0.1, size = 2.5) + - expand_limits(y = max(df[["Gain"]]) + 0.01) + - scale_y_continuous(labels = scales::percent) + - coord_flip() +# Create plot for top 20 features +p <- plotLoadings(df, ncomponents = 1, n = 20, show.color = FALSE) p ``` diff --git a/inst/pages/training.qmd b/inst/pages/training.qmd index 702fdfb47..4cea0b040 100644 --- a/inst/pages/training.qmd +++ b/inst/pages/training.qmd @@ -12,8 +12,10 @@ The page provides practical information to support training and self-study. Brief checklist to prepare for training (see below for links). - Install the recommended software + - If the time allows, watch the short online videos and familiarize yourself with the other available material + - Join Gitter online chat for support ## Recommended software {#sec-software} @@ -32,21 +34,22 @@ information. RStudio is optional. - Install key R packages (Section [@sec-ecosystem] provides an installation script) -- After a successful installation you can consider trying out examples -from Section [@sec-exercises] already before training. **You can run -the workflows by simply copy-pasting examples.** You can then test -further examples from this tutorial, modifying and applying these -techniques to your own data. Plain source code for the individual chapters -of this book are available via -[Github](https://github.com/microbiome/OMA/tree/master/R) - -- If you have access to CSC notebook you can find instructions from -[here](https://microbiome.github.io/outreach/). +Once you've successfully installed the software, consider exploring examples +from Section [@sec-exercises] even before starting the training. Running the +workflows is easy — just copy and paste the examples. You can then try +additional examples from the book, adapting and applying the techniques to your +own data. Source code for each chapter is available on +[Github](https://github.com/microbiome/OMA). ## Study material {#sec-material} We encourage you to familiarize yourself with the material and test examples -in advance but this is optional: +in advance but this is optional. If you're new to this topic, the following +resources may be particularly helpful. + +- [Interactive R tutorials for beginners](https://rstudio.github.io/learnr/articles/examples.html) + +- [Introduction to R](https://noppe.2.rahtiapp.fi/main/catalog) (available in CSC Noppe, see [@sec-vm]) - [Introduction to data analysis with R and Bioconductor](https://carpentries-incubator.github.io/bioc-intro/) (for beginners with R) @@ -61,3 +64,19 @@ in advance but this is optional: - @sec-exercises for self-study - @sec-resources and links to complementary external material + +## Virtual machines and learning environments {#sec-vm} + +In most of the training courses, we use learning environments that have +necessary software installed. Check from course details, if this applied to the +course that you are participating. + +In most training courses, we use learning environments with the required software pre-installed. Please check the course details to see if this applies to your course. + +- CSC Noppe (formerly Notebooks) is available for users with accounts at Finnish +higher education institutions or state research institutes. For more +information, visit [this page](https://microbiome.github.io/outreach/). + +- [Bioconductor workshops](https://workshop.bioconductor.org/) are accessible +with a Bioconductor account, providing pre-installed Bioconductor software and +workshops.