-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCv_MS_BSCGanalysis
42 lines (33 loc) · 2.02 KB
/
Cv_MS_BSCGanalysis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#Extract transcript info
library(DESeq2)
lib <- read.csv(file="/mnt/research/ShadeLab/Chodkowski/JGI_SynCom/RNAseq/Chromobacterium/final_analyses_Rev/initial_files/Cviolaceum_LIBRARIES.txt", sep="\t",header=TRUE)
counts <- read.csv(file="/mnt/research/ShadeLab/Chodkowski/JGI_SynCom/RNAseq/Chromobacterium/final_analyses_Rev/initial_files/Cvraw_counts_kallisoNCBI.txt",sep="\t",header=TRUE)
mapping <- read.csv(file="/mnt/research/ShadeLab/Chodkowski/JGI_SynCom/RNAseq/Chromobacterium/final_analyses_Rev/initial_files/Cv_diffExp_all.csv",header=TRUE,sep=",")
BSGC <- read.csv(file="/mnt/research/ShadeLab/Chodkowski/JGI_SynCom/RNAseq/Chromobacterium/final_analyses_Rev/LocusTags_forBSGCs.csv",header=TRUE,sep=",")
#Obtain genes of interest
ddsMat <- readRDS("diffExp/initial_files/Deseq2Results_kallistoNCBI.rds")
#obtain normalized counts
norm <- counts(ddsMat, normalized=TRUE)
#Extract loci of interest
BSGCloci <- norm[which(row.names(norm) %in% BSGC$Locus),]
#Replace col names with sample names
match(colnames(BSGCloci),mapping$Code) #looks to be in the same order. Good
#Now, rename columns
colnames(BSGCloci) <- mapping$Condition
library(reshape2)
BSGCloci_melt <- melt(BSGCloci,value.name = "Value", varnames=c('Locus', 'Sample'))
#Add additional metaData. Here, we'll add condition identifiers (Membership + Time)
library(tidyr)
BSGCloci_melt <- BSGCloci_melt %>% separate(Sample, c("Condition"), sep = "_",remove=FALSE)
#Now let's add BSGCs to the dataframe. Make sure Loci across dfs match
match(BSGCloci_melt$Locus,BSGC$Locus) #No
#Remove missing loci from BSGC (Because they were filtered out of the transcripts)
BSGCfilt <- BSGC[which(BSGC$Locus %in% BSGCloci_melt$Locus),]
match(BSGCloci_melt$Locus,BSGCfilt$Locus) #All good
#Add BSGCs (replicate by 94 because there are 91 samples)
BSGCloci_melt$BSGC <- rep(BSGCfilt$BSGC,91)
#Obtain mean transcripts across all biosynthetic genes involved in a BSGC
library(dplyr)
dataSum <- BSGCloci_melt %>%
group_by(BSGC,Condition,Locus) %>%
summarize(mean = mean(log10(Value)),sd=sd(log10(Value)))