phyloseq_OTUs

###########################
# Phyloseq analysis of rRNA OTUs
# Code written by Thanassis Zervas & Maria Scheel
# az@envs.au.dk, maria.scheel@ecos.au.dk
# Creative commons, 2022. 

###----Load packages----
library("phyloseq"); packageVersion("phyloseq")
library("ggplot2"); packageVersion("ggplot2")
library("tidyverse"); packageVersion("tidyverse")
library("RColorBrewer"); packageVersion("RColorBrewer")
library("MetBrewer"); packageVersion("MetBrewer")

###----Import data----
setwd("C:/Users/au620434/OneDrive - Aarhus Universitet/PhD/bioinformatics/RNA/transcriptomics/phyloseq/")
set.seed(42)

#BEWARE! Are your numbers using "." or "," as the decimal counter? If "," - use read.csv2() instead!
otu <- read.csv(file = 'RNA_OTU.csv', sep = ';', header = TRUE, fileEncoding="UTF-8-BOM") # OTU table across 30 samples
tax <- as.matrix(read.csv2(file = 'RNA_TAX.csv', sep = ';', header = TRUE, fileEncoding="UTF-8-BOM")) # in same order taxaonomy to OTUs

#create metadata table from otu/tax table info
meta <- as.data.frame(colnames(otu))

meta$triplicate = as.numeric(c("1","1","1","1","1","1","1","1","1","1","2","2","2","2","2","2","2","2","2","2","3","3","3","3","3","3","3","3","3","3"))
meta$layer = as.factor(c("AL","AL","AL","AL", "TZ1", "TZ1", "TZ1","TZ2", "TZ2", "PF","AL","AL","AL","AL", "TZ1", "TZ1","TZ1","TZ2", "TZ2", "PF","AL","AL","AL","AL", "TZ1", "TZ1", "TZ1","TZ2", "TZ2", "PF"))
meta$age = as.factor(c("AY","AY","AY","AY", "AM", "AM", "AO","AO", "AO", "AO","AY","AY","AY","AY", "AM", "AM","AO","AO", "AO", "AO","AY","AY","AY","AY", "AM", "AM", "AO","AO", "AO", "AO"))
meta$depth = as.numeric(c("10","20","30","40", "50", "60", "70","80", "90", "100","10","20","30","40", "50", "60", "70","80", "90", "100","10","20","30","40", "50", "60", "70","80", "90", "100"))
meta$depth = as.factor(depth_tripl)
meta$SOM = as.numeric(c("8.73","5.39","10.19","13.68","2.59","2.93","1.52","1.1","1","1.04","8.73","5.39","10.19","13.68","2.59","2.93","1.52","1.1","1","1.04","8.73","5.39","10.19","13.68","2.59","2.93","1.52","1.1","1","1.04"))
meta$H2O = as.numeric(c("28.8","22.52","22.05","26.57","7.73","15.61","8.72","6.35","7.96","7.8","28.8","22.52","22.05","26.57","7.73","15.61","8.72","6.35","7.96","7.8","28.8","22.52","22.05","26.57","7.73","15.61","8.72","6.35","7.96","7.8"))
meta$pH = as.numeric(c("4.22","4.02","4.29","4.25","4.64","4.13","4.86","4.48","4.57","4.91","4.22","4.02","4.29","4.25","4.64","4.13","4.86","4.48","4.57","4.91","4.22","4.02","4.29","4.25","4.64","4.13","4.86","4.48","4.57","4.91"))

# from relative abundance table, total abundance per sample of protozoan, non-predatory nd predatory prokaryota (bacpred) were created
meta$protozoa = as.numeric(c("3.89","1.71","2.18","3.72","8.39","3.26","6.17","5.66","4.49","2.42","2.89","1.79","2.10","2.59","5.02","6.98","5.72","5.67","5.34","4.91","3.89","1.41","2.16","3.28","4.58","6.43","9.29","4.48","4.85","3.41"))
meta$prokarya = as.numeric(c("70.22","88.91","87.26","77.27","69.59","85.36","81.82","81.54","79.86","88.89","75.33","88.12","84.66","79.52","73.27","76.33","83.39","82.41","82.65","78.18","70.22","90.58","87.04","76.26","76.27","67.37","70.12","77.90","70.69","85.07"))
meta$bacpred = as.numeric(c("7.71","5.35","7.20","13.43","13.43","3.45","4.05","1.97","1.02","0.56","6.44","5.28","7.22","13.78","13.39","4.92","2.74","1.72","1.21","4.78","7.71","4.98","7.16","13.92","7.57","15.55","6.25","2.74","0.96","1.20"))

meta
rownames(meta) = meta$`colnames(otu)`

###----Make Phyloseq object----
OTU = otu_table(otu, taxa_are_rows = TRUE); head(OTU)
TAX = tax_table(tax); head(TAX)
META <- sample_data(meta)

head(META)

# Combine OTU table, taxonomy table and metadata into a phyloseq object.
physeq <- phyloseq(OTU, TAX, META)

physeq

taxa_names(physeq) 
# OTUnames <- read.delim("OTUs.txt")
# rownames(OTU)
                        
backup_physeq_all <- physeq

# Check if everything looks good
sample_sums(physeq)
sample_names(physeq)
rank_names(physeq)
sample_variables(physeq)
otu_table(physeq)[1:3, 1:2]
taxa_names(physeq)[1:5] 
# taxa_names(physeq) <- rownames(otu)
# otu <- otu[,-1]

###----Tidy up names----
colnames(tax_table(physeq)) <- c(k = "Kingdom", p = "Phylum", c = "Class", 
                                 o = "Order", f = "Family", g = "Genus", s = "Species")
rank_names(physeq)

#Remove anything labelled "unknown". This will not affect abundances, only the "name" of the species/genus/etc. level. 
tax_table(physeq) <- gsub("Unknown.*", "", tax_table(physeq))
tax_table(physeq) <- gsub("incer*ae sedis*", "", tax_table(physeq))

###----Make unique OTU labels----
makeTaxLabel <- function(OTU, mydata){
  
  # Makes a string label using the lowest informative tax level
  #
  # Args:
  #   OTU: OTU number
  #   mydata: the phyloseq object with the tax table
  #
  # Returns:
  #   a tax name
  OTU <- as.character(OTU)  # the OTU numbers are stored as character not integer!
  taxstrings <- as.character(tax_table(mydata)[OTU])
  #taxstrings <- as.character(taxTab(mydata))
  empty_strings <- c("k_", "p_", "c_", "o_", "f_", "g_", "s_", " ", "", NA)
  tax_name <- NA
  tax_level <- length(taxstrings)  # start at lowest tax level
  
  
  while(is.na(tax_name) |
        (tax_name %in% empty_strings)){
    tax_name  <- taxstrings[tax_level]
    tax_level <- tax_level -1
  }
  tax_name
}


mynames = NULL
for (i in 1:length(taxa_names(physeq))){
  mynames <- rbind(mynames, c(makeTaxLabel(taxa_names(physeq)[i],physeq)))
}

mynames <- make.unique(mynames)

head(taxa_names(physeq))
taxa_names(physeq) = mynames

head(otu_table(physeq))
tail(otu_table(physeq))

head(tax_table(physeq))
tail(tax_table(physeq))

# Remove prefixes from tax table
tax_table(physeq) <- gsub("s__", "", tax_table(physeq))
tax_table(physeq) <- gsub("g__", "", tax_table(physeq))
tax_table(physeq) <- gsub("f__", "", tax_table(physeq))
tax_table(physeq) <- gsub("o__", "", tax_table(physeq))
tax_table(physeq) <- gsub("c__", "", tax_table(physeq))
tax_table(physeq) <- gsub("p__", "", tax_table(physeq))
tax_table(physeq) <- gsub("k__", "", tax_table(physeq))

# Replace spaces with underscores:
# tax_table(physeq) <- gsub(" ", "_", tax_table(physeq))

head(tax_table(physeq), n = 30)
head(otu_table(physeq))


### ----Subsetting----
# based on https://david-barnett.github.io/microViz/reference/tax_select.html
#Make a phyloseq object that only contains OTUs belonging to a specific group or clade.

prokarya <- c("Bacteria","Archaea")
protozoa <- c("LEMD255","LKM74","Lobosa","Schizoplasmodiida","Heteronematina","Jakobida","Kinetoplastea","Telonemia","Choanoflaggellida","Rotifera","Cercozoa","Ciliophora")
bactpred <- c("Bdellovibrionales","Myxococcales")
# pred = protozoa + bacpred
pred <- c("LEMD255","LKM74","Lobosa","Schizoplasmodiida","Heteronematina","Jakobida","Kinetoplastea","Telonemia","Choanoflaggellida","Rotifera","Cercozoa","Ciliophora","Bdellovibrionales","Myxococcales")
all <- c("Bacteria", "Archaea","LEMD255","LKM74","Lobosa","Schizoplasmodiida","Heteronematina","Jakobida","Kinetoplastea","Telonemia","Choanoflaggellida","Rotifera","Cercozoa","Ciliophora")

ps<- physeq
tax_select <- function(ps,
                       tax_list,
                       ranks_searched = "all",
                       strict_matches = FALSE,
                       n_typos = 0,
                       deselect = FALSE) {
  if (inherits(ps, "ps_extra")) ps <- ps_get(ps)
  
  # collapse tax_list to a string of regex OR patterns
  taxaString <- paste(tax_list, collapse = "|")
  
  # get tax table to search
  Taxa <- phyloseq::tax_table(ps)
  
  if (!identical(ranks_searched, "all")) {
    # check valid taxonomic ranks given for searching
    if (any(!ranks_searched %in% phyloseq::rank_names(ps))) {
      stop(
        "Invalid rank names given: ", paste(ranks_searched, collapse = " "),
        "\n- Should be any/some of: ", paste(phyloseq::rank_names(ps), collapse = "/")
      )
    }
    
    Taxa <- Taxa[, ranks_searched, drop = FALSE]
  }
  
  if (isTRUE(strict_matches)) {
    selectionVec <- apply(Taxa, MARGIN = 1, FUN = function(r) any(r %in% tax_list))
  } else if (n_typos > 0) {
    selectionVec <- apply(Taxa, MARGIN = 1, FUN = function(r) {
      any(sapply(tax_list, FUN = function(x) {
        agrepl(x, r, max.distance = n_typos)
      }))
    })
  } else {
    selectionVec <- apply(Taxa, MARGIN = 1, FUN = function(r) any(grepl(taxaString, r)))
  }
  
  # include exact rownames/taxa names matches
  ROWNAMES <- rownames(Taxa)
  selectionVec <- selectionVec | ROWNAMES %in% tax_list
  
  # stop with error if no taxa matched! (but not "in deselect mode")
  if (isFALSE(deselect) && !any(selectionVec)) {
    stop("No taxa matched.")
  }
  
  # if taxa DEselection is requested, invert the logical vector, to cause removal of matching taxa
  if (isTRUE(deselect)) {
    selectionVec <- !selectionVec
  }
  
  ps <- phyloseq::prune_taxa(x = ps, taxa = selectionVec)
  
  return(ps)
}


physeq_all <- physeq %>% tax_select(tax_list = "Chloroplast", deselect = TRUE)
physeq_prok <- physeq %>% tax_select(tax_list = prokarya,n_typos = 0)
physeq_prok_wo <- physeq_prok %>% tax_select(tax_list = bactpred,n_typos = 0, deselect = TRUE)
physeq_prot <- physeq %>% tax_select(tax_list = protozoa)
physeq_bacpred <- physeq %>% tax_select(tax_list = bactpred,n_typos = 0) %>% tax_select(tax_list = "Sorangium", strict_matches = FALSE, deselect = TRUE)
physeq_pred <- physeq %>% tax_select(tax_list = pred,n_typos = 0) %>% tax_select(tax_list = "Sorangium", strict_matches = FALSE, deselect = TRUE)
physeq_euk <- physeq %>% tax_select(tax_list = prokarya, n_typos = 0, deselect = TRUE)  %>% tax_select(tax_list = "Chloroplast", deselect = TRUE) # %>% tax_select(tax_list = protozoa, deselect = TRUE)
physeq_rel <- transform_sample_counts(physeq_all, function(x) x/sum(x)*100)
sample_sums(physeq_rel)
physeq_prok_rel <- transform_sample_counts(physeq_prok, function(x) x/sum(x)*100)
sample_sums(physeq_prok_rel)
physeq_prok_wo_rel <- transform_sample_counts(physeq_prok_wo, function(x) x/sum(x)*100)
sample_sums(physeq_prok_wo_rel)
physeq_prot_rel <- transform_sample_counts(physeq_prot, function(x) x/sum(x)*100)
sample_sums(physeq_prot_rel)
physeq_pred_rel <- transform_sample_counts(physeq_pred, function(x) x/sum(x)*100)
sample_sums(physeq_pred_rel)
physeq_bacpred_rel <- transform_sample_counts(physeq_bacpred, function(x) x/sum(x)*100)
sample_sums(physeq_bacpred_rel)
physeq_euk_rel <- transform_sample_counts(physeq_euk, function(x) x/sum(x)*100)
sample_sums(physeq_euk_rel)


# creating rel counts per depth
physeq_all_mean <- merge_samples(physeq_all, "depth") 

# because averages are produced, sample data needs to be reconnected (before: NAs)
sample_data(physeq_all_mean)$age <- as.factor(c("AY","AY","AY","AY", "AM", "AM", "AO","AO", "AO", "AO"))
sample_data(physeq_all_mean)$depth <- as.factor(c("0-10","10-20","20-30","30-40", "40-50", "50-60", "60-70","70-80", "80-90", "90-100"))
sample_data(physeq_all_mean)$layer= as.factor(c("AL","AL","AL","AL", "TZ1", "TZ1", "TZ1","TZ2", "TZ2", "PF"))
physeq_mean_rel <- transform_sample_counts(physeq_all_mean, function(x) x/sum(x)*100)
sample_sums(physeq_mean_rel)
head(tax_table(physeq_mean_rel))
head(sample_data(physeq_mean_rel))

physeq_bacpred_mean <- merge_samples(physeq_bacpred, "depth")
sample_data(physeq_bacpred_mean)$age <- as.factor(c("AY","AY","AY","AY", "AM", "AM", "AO","AO", "AO", "AO"))
sample_data(physeq_bacpred_mean)$depth <- as.factor(c("0-10","10-20","20-30","30-40", "40-50", "50-60", "60-70","70-80", "80-90", "90-100"))
sample_data(physeq_bacpred_mean)$layer= as.factor(c("AL","AL","AL","AL", "TZ1", "TZ1", "TZ1","TZ2", "TZ2", "PF"))
physeq_bacpred_mean_rel <- transform_sample_counts(physeq_bacpred_mean, function(x) x/sum(x)*100)
sample_sums(physeq_bacpred_mean_rel)
head(tax_table(physeq_bacpred_mean_rel))

physeq_prot_mean <- merge_samples(physeq_prot, "depth")
sample_data(physeq_prot_mean)$age <- as.factor(c("AY","AY","AY","AY", "AM", "AM", "AO","AO", "AO", "AO"))
sample_data(physeq_prot_mean)$depth <- as.factor(c("0-10","10-20","20-30","30-40", "40-50", "50-60", "60-70","70-80", "80-90", "90-100"))
sample_data(physeq_prot_mean)$layer= as.factor(c("AL","AL","AL","AL", "TZ1", "TZ1", "TZ1","TZ2", "TZ2", "PF"))
physeq_prot_mean_rel <- transform_sample_counts(physeq_prot_mean, function(x) x/sum(x)*100)
sample_sums(physeq_prot_mean_rel)
head(tax_table(physeq_prot_mean_rel))

physeq_prok_mean <- merge_samples(physeq_prok, "depth")
sample_data(physeq_prok_mean)$age <- as.factor(c("AY","AY","AY","AY", "AM", "AM", "AO","AO", "AO", "AO"))
sample_data(physeq_prok_mean)$depth <- as.factor(c("0-10","10-20","20-30","30-40", "40-50", "50-60", "60-70","70-80", "80-90", "90-100"))
sample_data(physeq_prok_mean)$layer= as.factor(c("AL","AL","AL","AL", "TZ1", "TZ1", "TZ1","TZ2", "TZ2", "PF"))
physeq_prok_mean_rel <- transform_sample_counts(physeq_prok_mean, function(x) x/sum(x)*100)
sample_sums(physeq_prok_mean_rel)
head(tax_table(physeq_prok_mean_rel))

physeq_prok_WO_mean <- merge_samples(physeq_prok_wo, "depth")
sample_data(physeq_prok_WO_mean)$age <- as.factor(c("AY","AY","AY","AY", "AM", "AM", "AO","AO", "AO", "AO"))
sample_data(physeq_prok_WO_mean)$depth <- as.factor(c("0-10","10-20","20-30","30-40", "40-50", "50-60", "60-70","70-80", "80-90", "90-100"))
sample_data(physeq_prok_WO_mean)$layer= as.factor(c("AL","AL","AL","AL", "TZ1", "TZ1", "TZ1","TZ2", "TZ2", "PF"))
physeq_prok_WO_mean_rel <- transform_sample_counts(physeq_prok_WO_mean, function(x) x/sum(x)*100)
sample_sums(physeq_prok_WO_mean_rel)
head(tax_table(physeq_prok_WO_mean_rel))

physeq_pred_mean <- merge_samples(physeq_pred, "depth")
sample_data(physeq_pred_mean)$age <- as.factor(c("AY","AY","AY","AY", "AM", "AM", "AO","AO", "AO", "AO"))
sample_data(physeq_pred_mean)$depth <- as.factor(c("0-10","10-20","20-30","30-40", "40-50", "50-60", "60-70","70-80", "80-90", "90-100"))
sample_data(physeq_pred_mean)$layer= as.factor(c("AL","AL","AL","AL", "TZ1", "TZ1", "TZ1","TZ2", "TZ2", "PF"))
physeq_pred_mean_rel <- transform_sample_counts(physeq_pred_mean, function(x) x/sum(x)*100)
sample_sums(physeq_pred_mean_rel)
head(tax_table(physeq_pred_mean_rel))

physeq_euk_mean <- merge_samples(physeq_euk, "depth")
sample_data(physeq_euk_mean)$age <- as.factor(c("AY","AY","AY","AY", "AM", "AM", "AO","AO", "AO", "AO"))
sample_data(physeq_euk_mean)$depth <- as.factor(c("0-10","10-20","20-30","30-40", "40-50", "50-60", "60-70","70-80", "80-90", "90-100"))
sample_data(physeq_euk_mean)$layer= as.factor(c("AL","AL","AL","AL", "TZ1", "TZ1", "TZ1","TZ2", "TZ2", "PF"))
physeq_euk_mean_rel <- transform_sample_counts(physeq_euk_mean, function(x) x/sum(x)*100)
sample_sums(physeq_euk_mean_rel)
head(tax_table(physeq_euk_mean_rel))


# Agglomerate at certain level.
# This maneuver combines all duplicates at species level, OTU abundances are summed.
physeq_p <- tax_glom(physeq_all, taxrank = "Phylum", NArm = FALSE)
physeq_p
head(tax_table(physeq_p))
sample_sums(physeq_p)

physeq_o <- tax_glom(physeq_all, taxrank = "Order", NArm = FALSE)
physeq_o
head(tax_table(physeq_o))
sample_sums(physeq_o)

physeq_g <- tax_glom(physeq_all, taxrank = "Genus", NArm = FALSE)
physeq_g
head(tax_table(physeq_g))
sample_sums(physeq_g)


###----Richness all----
# I used this block of code for all subsets
p1 <- plot_richness(physeq, x=reorder("depth","triplicate"), color = "age", shape = "layer", measures=c("Observed","Shannon"))
p1 <- p1 + geom_boxplot(data = p1$data, aes(color = NULL), alpha = 0.05)+
  theme_bw()+
  theme(legend.title = element_blank())+
  theme(legend.position = "right") +
  theme(strip.background = element_rect(fill="white" ))+
  ggtitle("overall rRNA alpha diversity")
p1

# write the calculations in a file
write.table(p1$data, file='diversity_total.tsv', quote=FALSE, sep='\t')


###----Ordination_all----
# I then used this block of code for all subsets
# subset_AL <-subset_samples(physeq_rel, layer=="AL")
# subset_TZ1 <-subset_samples(physeq_rel, layer=="TZ1")
# subset_TZ2 <-subset_samples(physeq_rel, layer=="TZ2")
# subset_PF <-subset_samples(physeq_rel, layer=="PF")

physeq_nmds <- ordinate(physeq_rel, method = "NMDS", distance = "bray")

# Shepard plot - all
vegan::stressplot(physeq_nmds)

# NMDS plot - all
p1 = plot_ordination(physeq_rel, physeq_nmds, type ="samples", color = "layer", shape="age",title = "NMDS, Bray-curtis all rRNA contigs")+
  geom_point(size=3)+
  theme_bw()+
  scale_color_manual(values=met.brewer("Derain")) +
  theme(legend.title = element_blank())+
  theme(strip.background = element_rect(fill="white" ))+
  stat_ellipse()
print(p1)

# p1 = plot_ordination(physeq_c, physeq_nmds, type="taxa", color="Phylum")
# p1 + facet_wrap(~Kingdom, 3)
# 
# p4 = plot_ordination(physeq_c, physeq_nmds, type="split", color="Kingdom") 
# p4
# 

# PCoA (to account for multicollinearity
ordu = ordinate(physeq_rel, "PCoA")
p = plot_ordination(physeq_rel, ordu, color="layer", shape="age")
p = p + geom_point(size=5, alpha=0.75)
p = p + scale_colour_brewer(type="qual", palette="Set1")
p + ggtitle("MDS/PCoA total rRNA community")


#### ANOVA all ####
# I then used this block of ANOVA for all subsets
library("microbiome"); packageVersion("microbiome")
library("vegan"); packageVersion("vegan")

otu1 <- microbiome::abundances(physeq_mean_rel) # doesnt work on mean, as then age and layer contain NAs
meta1 <- microbiome::meta(physeq_mean_rel)

permanova_age <- vegan::adonis(t(otu1)~age, data = meta1, permutations=999, method = "bray")
print(as.data.frame(permanova_age$aov.tab))

permanova_layer <- vegan::adonis(t(otu1)~layer, data = meta1, permutations=999, method = "bray")
print(as.data.frame(permanova_layer$aov.tab))

permanova <- vegan::adonis(t(otu1)~depth, data = meta1, permutations=999, method = "bray")
print(as.data.frame(permanova$aov.tab))

permanova_SOM <- vegan::adonis(t(otu1)~SOM, data = meta1, permutations=999, method = "bray")
print(as.data.frame(permanova_SOM$aov.tab))

permanova_H2O <- vegan::adonis(t(otu1)~H2O, data = meta1, permutations=999, method = "bray")
print(as.data.frame(permanova_H2O$aov.tab))

permanova_pH <- vegan::adonis(t(otu1)~pH, data = meta1, permutations=999, method = "bray")
print(as.data.frame(permanova_pH$aov.tab))


#Calculate beta dispersion (in this case - the dispersion between the kits)

dist <- vegan::vegdist(t(otu1), method="bray")
mod <- vegan::betadisper(dist, meta1$age, type="centroid")
mod
mod$age <- meta$age
TukeyHSD(mod)

mod <- vegan::betadisper(dist, meta1$layer, type="centroid")
mod
mod$layer <- meta$layer
TukeyHSD(mod)

# mod <- vegan::betadisper(dist, meta1$depth, type="centroid")
# mod
# mod$depth <- meta$depth

# write.table(TukeyHSD(mod), file='tukey_depth_all.tsv', quote=FALSE, sep='\t')

# plot(mod,  hull=FALSE, ellipse=TRUE, main = "PCoA, layer", sub=NULL, col=c("#2f85fe", "#e05436", "#009453"), cex=2, lwd=1) #+
# boxplot(mod$distances ~ mod$group, main= "Distance to Centroid", xlab="depth [cm]", ylab="Distance", col= c("#2f85fe", "#e05436", "#009453"))

# dev.off()

###----Stacked Barplots all----
# I then created barplots based on this code for all subsets
#check taxonomy level names
rank_names(physeq_rel)
str(physeq_rel)

#agglomeration on the Phylum level
physeq_all_phylum <- tax_glom(physeq_rel, taxrank = "Phylum")

#check the number of taxa in the whole phyloseq object and then how many phyla they were assigned to
physeq_all
physeq_all_phylum

#check sample sums to make sure nothing was deleted due to NAs (should sum up to 1)
sample_sums(physeq_all_phylum)
#After merging, we need to dived the numbers to the number of replicates, in this case "3"
physeq_all_phylum <- transform_sample_counts(physeq_all_phylum, function(x) x/sum(x)*100)       #careful when duplicates // /sum(x)*100

#Since we get too many phyla to plot in a stacked barplot, we will filter the low abundant ones and put them into one category. To do this, we will use the power of tidyverse again. First, we will create a normal data frame out of the phyloseq object and then add another column where all taxa with abundance lower than 1% will be renamed to "< 1%".

#transform phyloseq object to a data frame (DF)
physeq_all_phylumDF<- psmelt(physeq_all_phylum)

#inspect the dataframe
str(physeq_all_phylumDF)

#make the phyla characters, not factors
physeq_all_phylumDF$Phylum <- as.character(physeq_all_phylumDF$Phylum)


#there are some reads that were assigned only to the kingdom level, 
# i.e. NA on the phylum level, so we will rename them
physeq_all_phylumDF <- physeq_all_phylumDF %>% 
  mutate(Phylum = replace(Phylum, Phylum == "NA", "unassigned"))


#check all phyla names
unique(physeq_all_phylumDF$Phylum)

# add new column with renamed low abundant taxa
physeq_all_phylumDF <- physeq_all_phylumDF %>% 
  mutate(Phylum2 = replace(Phylum, Abundance < 2, "< 2%"))

#check all phyla names
unique(physeq_all_phylumDF$Phylum2)

#reorder the phyla so that they are stacked according to abundance
physeq_all_phylumDF$Phylum <- reorder(physeq_all_phylumDF$Phylum2,
                                           physeq_all_phylumDF$Abundance)

all <- ggplot(physeq_all_phylumDF, aes(x=reorder(Sample,depth), y=Abundance, fill=Phylum)) +
  # geom_bar(stat = "identity") +
  geom_bar(stat = 'identity', position = 'stack') + facet_grid(~ depth) +
scale_color_manual(values=met.brewer("Derain",16))+
labs(x= "depth [cm]",y= "Relative abundance [%]", fill= "Phyla") +
  theme_bw()

all
all + facet_grid(cols = vars(layer), scales="free") #facet_wrap(vars(Kingdom), scales="free")

#more colours and combinations can be autogenerated here: https://medialab.github.io/iwanthue/
#this part was adapted from: https://mvuko.github.io/meta_phyloseq/


#### Network ####
subset_AL <-subset_samples(physeq_c, layer=="AL")
subset_TZ1 <-subset_samples(physeq_c, layer=="TZ1" | layer=="TZ2")
# subset_TZ2 <-subset_samples(physeq_c, layer=="TZ2")
subset_PF <-subset_samples(physeq_c, layer=="PF")

ig_AL <- make_network(subset_AL, max.dist=0.2, type="taxa", distance="bray")
plot_network(ig_AL, subset_AL, color = "Kingdom", type="taxa", label = "Phylum", title="AL network, max.dist=0.2, BC")

ig_TZ1 <- make_network(subset_TZ1, max.dist=0.2, type="taxa", distance="bray")
plot_network(ig_TZ1, subset_TZ1, color = "Kingdom", type="taxa", label = "Phylum", title="TZ1 network, max.dist=0.2, BC")

ig_TZ2 <- make_network(subset_TZ2, max.dist=0.2, type="taxa", distance="bray")
plot_network(ig_TZ2, subset_TZ2, color = "Kingdom", type="taxa", label = "Phylum", title="TZ2 network, max.dist=0.2, BC")

ig_PF <- make_network(subset_PF, max.dist=0.2, type="taxa", distance="bray")
plot_network(ig_PF, subset_PF, color = "Kingdom", type="taxa", label = "Phylum", title="PF network, max.dist=0.2, BC")

ig <- make_network(physeq_o, max.dist=0.2, type="taxa", distance="bray")
plot_network(ig, physeq_o, color = "Kingdom", type="taxa", label = "Order", title="Network, Order, max.dist=0.2, BC")