Cv_MS_BSCGanalysis

#Extract transcript info
library(DESeq2)
lib <- read.csv(file="/mnt/research/ShadeLab/Chodkowski/JGI_SynCom/RNAseq/Chromobacterium/final_analyses_Rev/initial_files/Cviolaceum_LIBRARIES.txt", sep="\t",header=TRUE)
counts <- read.csv(file="/mnt/research/ShadeLab/Chodkowski/JGI_SynCom/RNAseq/Chromobacterium/final_analyses_Rev/initial_files/Cvraw_counts_kallisoNCBI.txt",sep="\t",header=TRUE)
mapping <- read.csv(file="/mnt/research/ShadeLab/Chodkowski/JGI_SynCom/RNAseq/Chromobacterium/final_analyses_Rev/initial_files/Cv_diffExp_all.csv",header=TRUE,sep=",")

BSGC <- read.csv(file="/mnt/research/ShadeLab/Chodkowski/JGI_SynCom/RNAseq/Chromobacterium/final_analyses_Rev/LocusTags_forBSGCs.csv",header=TRUE,sep=",")

#Obtain genes of interest
ddsMat <- readRDS("diffExp/initial_files/Deseq2Results_kallistoNCBI.rds")
#obtain normalized counts
norm <- counts(ddsMat, normalized=TRUE)
#Extract loci of interest
BSGCloci <- norm[which(row.names(norm) %in% BSGC$Locus),]

#Replace col names with sample names
match(colnames(BSGCloci),mapping$Code) #looks to be in the same order. Good
#Now, rename columns
colnames(BSGCloci) <- mapping$Condition

library(reshape2)
BSGCloci_melt <- melt(BSGCloci,value.name = "Value", varnames=c('Locus', 'Sample'))

#Add additional metaData. Here, we'll add condition identifiers (Membership + Time)
library(tidyr)
BSGCloci_melt <- BSGCloci_melt %>% separate(Sample, c("Condition"), sep = "_",remove=FALSE)

#Now let's add BSGCs to the dataframe. Make sure Loci across dfs match
match(BSGCloci_melt$Locus,BSGC$Locus) #No

#Remove missing loci from BSGC (Because they were filtered out of the transcripts)
BSGCfilt <- BSGC[which(BSGC$Locus %in% BSGCloci_melt$Locus),]
match(BSGCloci_melt$Locus,BSGCfilt$Locus) #All good

#Add BSGCs (replicate by 94 because there are 91 samples)
BSGCloci_melt$BSGC <- rep(BSGCfilt$BSGC,91)

#Obtain mean transcripts across all biosynthetic genes involved in a BSGC
library(dplyr)
dataSum <- BSGCloci_melt %>%
  group_by(BSGC,Condition,Locus) %>%
  summarize(mean = mean(log10(Value)),sd=sd(log10(Value)))