-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathapp.R
2054 lines (1721 loc) · 92.6 KB
/
app.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
## Author: Francisco J. Romero-Campero
## Contact: Francisco J. Romero-Campero - fran@us.es
## Input to test
## input <- list(selected.multiple.tfs = c("BRC1", "HEC1", "ABI5"), cluster = "1")
## options(repos = BiocInstaller::biocinstallRepos())
## getOption("repos")
# Load neccesary libraries
library(shiny)
library(shinythemes)
library(shinycssloaders)
library(shinyWidgets)
library(shinyjs)
## Load motif names
motif.names <- read.table(file = "data/motif_names.txt",header = F,as.is=T)[[1]]
## Transcription factors AGI ids and names
tfs.names <- c("ATAF1", "bZIP52", "PIF3", "MYB3", "ZAT10",
"ERF055", "VIP1", "ERF014", "NAC018", "NAP",
"ANAC032", "RGA", "HB21", "HB6", "SVP",
"ABI5", "IBH1", "BRC1", "ERF035", "GBF2",
"PDF2", "HSFB2B", "TCX2", "WRKY18", "ABF3",
"HB40", "ZAT6", "DREB2A", "MYB56", "ERF003",
"NAC6/ORE1", "HAT2", "SPCH", "DOF5_4", "NAC102",
"HB53", "HEC1","GBF3", "HSFB2A")
tf.ids <- c("AT1G01720", "AT1G06850" ,"AT1G09530", "AT1G22640", "AT1G27730",
"AT1G36060", "AT1G43700", "AT1G44830", "AT1G52880", "AT1G69490",
"AT1G77450", "AT2G01570", "AT2G18550", "AT2G22430", "AT2G22540",
"AT2G36270", "AT2G43060", "AT3G18550", "AT3G60490", "AT4G01120",
"AT4G04890", "AT4G11660", "AT4G14770", "AT4G31800", "AT4G34000",
"AT4G36740", "AT5G04340", "AT5G05410", "AT5G17800", "AT5G25190",
"AT5G39610", "AT5G47370", "AT5G53210", "AT5G60850", "AT5G63790",
"AT5G66700", "AT5G67060", "AT2G46270", "AT5G62020")
# Define UI for application that draws a histogram
ui <- fluidPage(
##shinythemes::themeSelector(),
theme = shinytheme("sandstone"),
##theme = shinytheme("simplex"),
##theme = shinytheme("journal"),
# Application title, introductory text and sidebar navigation
fluidRow(
column(
width = 2,
img(src='bud.jpg', align = "center", width=180),
tags$br(),
radioButtons(inputId = "navigation_bar", width="100%",selected="home",
label="",
choices=c(
"Home" = "home",
"Multiple transcription factor analysis" = "multiple_gene",
"Individual gene analysis" = "individual_gene",
"Tutorials" = "tutorials",
"GitHub repository" = "github",
"Citation and Contact" = "citation"
))),
column(
width = 8,
tags$div(align = "center",
tags$h1("The TCP transcription factor ",tags$i(tags$b("BRANCHED1,")),
" orchestrates several gene regulatory networks to promote axillary
bud dormancy in ", tags$i("Arabidopsis"))),
tags$br(),tags$br(),
conditionalPanel(condition = "input.navigation_bar == 'home'",
tags$div(align="justify", "This online tool allows researchers to explore the transcriptional
network downstream of", tags$i("BRC1"),". This network was constructed by combining genome-wide transcriptional
profiling of active/dormant buds and seedlings after BRC1 induction, with
genome-wide BRC1 binding sites determined by Chromatin Immunoprecipitation
sequencing (ChIP-seq). We identified nine co-expressed gene clusters strongly dependent on
BRC1 function and, within these clusters, a group of genes encoding TFs which are
direct targets of BRC1. This specific set of regulators probably plays a key role
together with BRC1 in mediating amplification and maintenance of the observed
transcriptional responses triggered by BRC1. Use the navigation bar on the left to obtain
insights into the molecular mechanisms that operate directly downstream of BRC1 to promote axillary
bud arrest."),
plotOutput("introPlot")
),
## Conditional for citation
conditionalPanel(condition = "input.navigation_bar == 'citation'",
tags$div(align = "justify", "We are strongly committed to", tags$b("open access software"),
"and", tags$b("open science."),"Following our philosophy we have deposited our GitHub code
into", tags$a(href="https://zenodo.org/record/4323584#.X9k6M8Io8ws", target="_blank",tags$b("Zenodo")), ", a
general-purpose open-access repository developed under the",
tags$a(href="https://www.openaire.eu/", target="_blank", tags$b("European OpenAIRE program.")),
"A preprint of our manuscript has also been deposited in bioRxiv:",
tags$br(),
tags$br(),
tags$div(
tags$a(href="https://www.biorxiv.org/content/10.1101/2020.12.14.394403v1", target="_blank", tags$b("Sam W. van Es, Aitor Munoz-Gasca, Francisco J. Romero-Campero, Eduardo Gonzalez-Grandio,
Pedro de los Reyes, Carlos Tarancon, Aalt D.J. van Dijk, Wilma van Esse, Gerco C. Angenent, Richard Immink, Pilar Cubas (2020)
A gene regulatory network critical for axillary bud dormancy directly controlled by Arabidopsis BRANCHED1, doi: https://doi.org/10.1101/2020.12.14.394403 ")
))),
tags$br(),
tags$br(),
#tags$div(align="center", img(src='smiley.png', align = "center", width=200,hight=200)),
tags$br()
),
## Conditional panel for video tutorial
conditionalPanel(condition = "input.navigation_bar == 'tutorials'",
tags$div(align="center",uiOutput("video_tutorial")),
tags$div(align = "justify",
tags$br(),
tags$br(),
tags$div(tags$h4(tags$b("Above you can find a video tutorial on how to use the different tools implemented
in BRC1NET to explore the transcriptional network downstream from the ", tags$i("Arabidopsis thaliana "),
"transcription factor BRANCHED1"))))),
conditionalPanel(condition = "input.navigation_bar == 'individual_gene'",
tags$div(align="justify", tags$b("BRC1NET"), "allows researchers to explore the coordinated regulation of several
BRC1 dependent transcription factors over an", tags$b("individually selected"), "gene. Follow the steps below:",
tags$ol(
tags$li("Select a specific gene from our network using the", tags$b("Target Gene"),
"dropdown menu on the left below. You can enter either the AGI identifier or primary symbol for the",
"gene of interest."),
tags$li("Select several transcription factors (TFs) to explore their regulation over the
previously selected target gene using the",tags$b("Select Transcription Factors"), "checkboxes on the left below."),
tags$li("Results will be depicted below showing the selected target gene genomic location and the
peaks detected in our analysis of the correspondig TFs ChIP-seq or DAP-seq data.
Here you can specify the length of the gene promoter and 3' region as well as DNA
TFs binding motifs to search for in the detected peak regions with the specified
score for identity.")
))
),
conditionalPanel(condition = "input.navigation_bar == 'multiple_gene'",
tags$div(align="justify", tags$b("BRC1NET"), "allows researchers to explore the coordinated regulation of several
BRC1-dependent transcription factors (TFs) over their common targets. The node representing BRC1 is located
at the center. Genes are classified into", tags$b("nine different co-expressed clusters"), "constituted by overexpressed (UP) or
underexpressed (DOWN) genes after BRC1 induction. Red and purple nodes are BRC1 direct targets, blue and green nodes
represent indirect BRC1 targets whereas purple and green nodes denote genes encoding TFs. Follow the steps below to
explore this network:",
tags$ol(
tags$li("Select your TFs of interest using the", tags$b("Select Transcription Factors"),
"checkbox menu on the left below."),
tags$li("You can also select a gene cluster using the dropdown menu", tags$b("Select a gene cluster:")),
tags$li("Check the box", tags$b("Visualize Edges"), "when you want to depict arrows from TFs to their target genes."),
tags$li("Choose between two", tags$b("Modes of selction"), ", either selecting all the target genes or only those common to the selected TFs."),
tags$li("Click on the", tags$b("SELECT GENES"), "to visualize your selected TFs target genes located
in the specified cluster. Explore the different tabs to
download a table with the selected genes, perform a signficance analysis of the overlap between the selected
TFs targets and the specified gene cluster as well as functional enrichment analysis.")
)
)
),
conditionalPanel(condition = "input.navigation_bar == 'github'",
tags$div(align = "justify", tags$b("BRC1NET,"), "is entirely developed using
the R package", tags$b( tags$a(href="https://shiny.rstudio.com/", "shiny.")), "The
source code is released under", tags$b("GNU General Public License v3.0"), "and is hosted at",
tags$b("GitHub."), "If you experience any problem using BRC1NET please create an", tags$b(tags$a(href="https://github.com/fran-romero-campero/BRC1TRANSNET/issues","issue")), "in GitHub and we will address it."),
tags$div(align="center",tags$h1(tags$b(tags$a(href="https://github.com/fran-romero-campero/BRC1NET", "BRC1NET at GitHub"))))
),
),
column(
width = 2,
img(src='cnb.jpg', align = "center", width=100),
img(src='logo_ibvf.jpg', align = "center", width=100),
img(src='logo_us.png', align = "center", width=100),
tags$br(),tags$br(),tags$br(),
img(src='logo_csic.jpg', align = "center", width=100),
tags$br(),
tags$br(),
tags$div(align = "center", width=60,
#HTML("<script type=\"text/javascript\" id=\"clstr_globe\" src=\"//clustrmaps.com/globe.js?d=444jQllIhH-hM07Zehu_i5ENUTxfV-dOb2uxYzFH43I\"></script>")
HTML("<script type=\"text/javascript\" src=\"//rf.revolvermaps.com/0/0/2.js?i=5lnqjnbsiy1&m=6&s=130&c=ff0000&t=1\" async=\"async\"></script>")
)
)
),
## Separation for different tools
tags$br(),tags$br(),
## Conditional panel for individual gene analysis
conditionalPanel(condition = "input.navigation_bar == 'individual_gene'",
fluidRow(
column(width = 3,
## Select target gene to study
selectizeInput(inputId = "target.gene",
label = "Target Gene:",
choices = NULL, #genes.selectize,
multiple = FALSE),
## Check box for the TF peaks to represent
checkboxGroupInput(inputId = "selected.tfs",
label = "Select Transcription Factors:",
choices = c("Select All Transcription Factors",
tfs.names),
inline = TRUE,width = "100%")
),
column(width = 9,
column(wellPanel(
## Numeric input for promoter length
numericInput(inputId = "promoter.length",
label = "Promoter Length",
value = 2000,
min = 500,
max = 2000,
step = 100),
## Numeric input for 3' length
numericInput(inputId = "threeprime.length",
label = "3' Length",
value = 500,
min = 100,
max = 500,
step = 100)),width=3),
column(wellPanel(
## Selectize to choose target gene to represent
selectizeInput(inputId = "selected.motifs",
label = "Select Motifs",
selected =c("G-box","BRC1"),
choices = motif.names,
multiple = TRUE),
## Checkbox to select all available motifs
checkboxInput(inputId = "all.motifs",
label = "Select All Motifs:",
value = FALSE),
## Numeric input for PWM score
numericInput(inputId = "min.score.pwm",
label = "Motif Identification Score:",
value = 100,
min = 80,
max = 100,
step = 5)),width=9),
fluidRow(
column(
plotOutput(outputId = "peak_plot"),
width=12)
)
)
)
),
## Conditional panel for multiple transcription factors and regulators analysis
conditionalPanel(condition = "input.navigation_bar == 'multiple_gene'",
fluidRow(
column(width = 3,
## Check box for the TFs to analyse
checkboxGroupInput(inputId = "selected.multiple.tfs",
label = "Select Transcription Factors:",
choices = tfs.names,
inline = TRUE,width = "100%"),
tags$b("Select a gene cluster:"),
selectInput(inputId = "cluster", label="",
choices = c("No selected cluster" = "any",
"Cluster UP_C1" = "1", "Cluster UP_C2" = "2", "Cluster UP_C3" = "3",
"Cluster UP_C4" = "4", "Cluster UP_C5" = "5", "Cluster UP_C6" = "6",
"Cluster DOWN_C1" = "7", "Cluster DOWN_C2" = "8", "Cluster DOWN_C3" = "9"),
selected = "any", multiple = FALSE, selectize = TRUE),
checkboxInput(inputId = "edges",label = "Visualize Edges",value = FALSE),
radioButtons(inputId = "selection.mode",label = "Mode of selection:",
choices = c("Common gene targets","All gene targets"),selected = "Common gene targets",inline = F),
actionButton(inputId = "go_multiple",label = "Select Genes")
),
column(width = 9,
tabsetPanel(type = "tabs",
tabPanel(title = "Network Visualization",
plotOutput("networkPlot", click="plot_click")
),
tabPanel(title = "Gene Table",
dataTableOutput(outputId = "outputTable"),
uiOutput(outputId = "download_ui_for_table")
),
tabPanel(title = "Overlap Significance",
tags$br(),
tags$div(align="justify", "In this section, we present the results of a significance analysis of the
overlap between the targets of the selected transcription factors and gene
with a specific expresion pattern."),
tags$br(),
textOutput("overlap.message"),
textOutput("overlap.significance.text"),
tags$br(),
tags$br(),
tags$div(align="center",
plotOutput("venn.diagram.plot"))
),
tabPanel(title = "Functional Enrichment",
tabsetPanel(type = "tabs",
tabPanel(title = "GO Enrichment",
tags$br(),
tags$div(align="justify", "In this section you can perform a GO term
enrichment analysis over the selected genes. First
of all, you need to choose the background set of genes between
the entire genome of", tags$i("Arabidopsis thaliana"), "or just the genes in BRC1NET:"),
tags$br(),
radioButtons(inputId = "go.background", width="100%",selected="allgenome",
label="",
choices=c(
"Complete genome" = "allgenome",
"Genes in network" = "onlynet"
)), tags$br(),
actionButton(inputId = "goterm",label = "GO terms analysis"),
tags$br(),
tags$br(),
shinyjs::useShinyjs(),
hidden(div(id='loading.div',h3('Please be patient, computing GO enrichment ...'))),
tags$br(),
tabsetPanel(type = "tabs",
tabPanel(title = "GO table",
tags$br(), tags$br(),
htmlOutput(outputId = "textGOTable"),
tags$br(), tags$br(),
dataTableOutput(outputId = "output_go_table"),
htmlOutput(outputId = "revigo"),
uiOutput(outputId = "download_ui_for_go_table")
),
tabPanel(title = "GO map",
tags$br(), tags$br(),
htmlOutput(outputId = "gomap_text"),
tags$br(),
div(style= "overflow:scroll; height:500px;",
addSpinner(plotOutput("gomap"), spin = "circle", color = "#E41A1C"))),
tabPanel(title = "GO barplot",
tags$br(),
htmlOutput(outputId = "barplot_text"),
tags$br(),
addSpinner(plotOutput("bar.plot"), spin = "circle", color = "#E41A1C")),
tabPanel(title = "GO Enrichment Map",
tags$br(),
htmlOutput(outputId = "emapplot_text"),
tags$br(),
addSpinner(plotOutput(outputId = "emap.plot",inline=TRUE))),
tabPanel(title = "GO concept network",
tags$br(),
htmlOutput(outputId = "cnetplot_text"),
tags$br(),
addSpinner(plotOutput(outputId = "cnet.plot",inline=TRUE)))
)
),
tabPanel(title = "KEGG Pathway Enrichment",
tags$br(),
tags$div(align="justify", "In this section you can perform a KEGG pathways and modules
enrichment analysis over the selected genes. First
of all, you need to choose the background set of genes between
the entire genome of", tags$i("Arabidopsis thaliana"), "or just the genes in BRC1NET:"),
tags$br(),
radioButtons(inputId = "pathway_background", width="100%",selected="allgenome",
label="",
choices=c(
"Complete genome" = "allgenome",
"Genes in network" = "onlynet"
)),
actionButton(inputId = "pathway_button",label = "KEGG pathway analysis"),
tags$br(),
tags$br(),
shinyjs::useShinyjs(),
hidden(div(id='loading.div.kegg',h3('Please be patient, computing KEGG pathway enrichment ...'))),
tags$br(),
tabsetPanel(type = "tabs",
tabPanel(title = "Enriched Pathway Table",
tags$br(), tags$br(),
htmlOutput(outputId = "no_kegg_enrichment"),
dataTableOutput(outputId = "output_pathway_table"),
uiOutput(outputId = "download_ui_for_kegg_table")
),
tabPanel(title = "Enriched Pathway Visualization",
uiOutput(outputId = "kegg_selectize"),
imageOutput("kegg_image"),
br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(),
br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(), br(),
br(), br(), br(), br(), br()
),
tabPanel(title = "Enriched Module Table",
htmlOutput(outputId = "text_module_kegg"),
br(), br(),
dataTableOutput(outputId = "output_module_table")
)
)
)
)
)
)
)
)
)
)
library(ChIPpeakAnno)
library(rtracklayer)
library(TxDb.Athaliana.BioMart.plantsmart28)
library(Biostrings)
library(seqinr)
library(org.At.tair.db)
library(igraph)
library(ggplot2)
library(ggrepel)
library(stringr)
library(clusterProfiler)
library(pathview)
library(SuperExactTest)
library(VennDiagram)
##Load the network data
network.data <- read.table(file="data/brc1_network.tsv",header = TRUE,as.is=TRUE,sep="\t",quote = "",comment.char = "%",check.names=F)
rownames(network.data) <- network.data$names
## Tranforming coordinates for a better visualization
x.coord <- as.numeric(network.data$y.pos)
y.coord <- as.numeric(network.data$x.pos)
network.data$x.pos <- x.coord
network.data$y.pos <- y.coord
pos.data <- t(matrix(data=c(x.coord,y.coord),ncol=2))
names(tf.ids) <- tfs.names
tfs.order <- c("BRC1", "ABI5", "ABF3", "GBF3", "GBF2", "bZIP52",
"ANAC032", "ATAF1", "NAC6/ORE1","NAC102", "NAP", "NAC018",
"HB53","HB40","HB21","HAT2","HB6","PDF2",
"MYB56", "MYB3",
"ERF003", "DREB2A", "ERF035", "ERF014", "ERF055",
"ZAT6", "ZAT10",
"SVP", "WRKY18","HSFB2A","HSFB2B", "DOF5_4", "RGA", "TCX2","VIP1")
tf.ids <- tf.ids[tfs.order]
tfs.names <- tfs.order
tf.colors <- c("gold", "firebrick1", "firebrick2", "firebrick3", "firebrick", "firebrick4",
"springgreen1","springgreen2","springgreen3","springgreen4","seagreen3","seagreen4",
"salmon", "salmon1", "salmon2", "salmon3", "salmon4","brown",
"red","red4",
"deepskyblue1", "deepskyblue2", "deepskyblue3", "deepskyblue4",
"plum", "plum1", "plum2", "plum3", "plum4",
"peachpuff3", "peachpuff4",
"darkgreen", "paleturquoise3", "rosybrown", "rosybrown4", "purple", "navyblue", "limegreen", "lightsteelblue3")
names(tf.colors) <- tfs.names
cluster.names <- c("Cluster UP_C1", "Cluster UP_C2", "Cluster UP_C3", "Cluster UP_C4",
"Cluster UP_C5", "Cluster UP_C6", "Cluster DOWN_C1", "Cluster DOWN_C2",
"Cluster DOWN_C3")
# btf.ids <- c("AT4G36740","AT5G66700","AT2G18550","AT4G01120","AT2G46270","AT4G34000",
# "AT2G36270","AT1G77450","AT1G01720","AT1G70000","AT2G40970","AT5G13330",
# "AT3G61630","AT5G49700","AT5G44260","AT5G62020","AT1G19050","AT4G31800",
# "AT1G07090")
tfs.network.data <- subset(network.data, names %in% tf.ids)
cluster.pos <- data.frame(x.pos = c(-1700,-900,-200,700,900,700,-100,-1100,-1800),
y.pos = c(1200,1900,2050,1300,400,-1500,-2500,-2600,-1000),
cluster.name=cluster.names[1:9])
## Extract gene ids
genes <- sort(network.data$name)
## Load and extract Arabidopsis thaliana annotation regarding genes, exons and cds
txdb <- TxDb.Athaliana.BioMart.plantsmart28
genes.data <- subset(genes(txdb), seqnames %in% c("1","2","3","4","5")) ## only nuclear genes are considered
genes.data <- as.data.frame(genes.data)
exons.data <- as.data.frame(exons(txdb))
cds.data <- as.data.frame(cds(txdb))
## Load all and circadian genes
alias2symbol.table <- AnnotationDbi::select(org.At.tair.db,
keys=keys(org.At.tair.db, keytype="ENTREZID"),
columns=c("SYMBOL", "TAIR"), keytype="ENTREZID")
ath.universe <- alias2symbol.table$TAIR
alias2symbol.table <- subset(alias2symbol.table, TAIR %in% genes)
alias <- alias2symbol.table$SYMBOL
names(alias) <- alias2symbol.table$TAIR
alias[is.na(alias)] <- ""
genes.selectize <- paste(names(alias), alias, sep=" - ")
## Setting conversion between alias and agis
agis <-alias2symbol.table$TAIR
names(agis) <- alias2symbol.table$SYMBOL
agis[is.na(agis)] <- ""
## Color vectors
line.colors <- c("blue","red", "darkgreen","black","#663300","#99003d","#b3b300","#4d0039","#4d2600","#006666","#000066","#003300","#333300","#660066")
area.colors <- c("skyblue","salmon", "lightgreen","lightgrey","#ffcc99","#ff99c2","#ffffb3","#ffe6f9","#ffe6cc","#80ffff","#b3b3ff","#99ff99","#e6e600","#ffb3ff")
## Load chromosome sequences
chr1 <- readDNAStringSet(filepath = "data/athaliana_genome/chr1.fa")[[1]]
chr2 <- readDNAStringSet(filepath = "data/athaliana_genome/chr2.fa")[[1]]
chr3 <- readDNAStringSet(filepath = "data/athaliana_genome/chr3.fa")[[1]]
chr4 <- readDNAStringSet(filepath = "data/athaliana_genome/chr4.fa")[[1]]
chr5 <- readDNAStringSet(filepath = "data/athaliana_genome/chr5.fa")[[1]]
## Function to compute the reverse complement
reverse.complement <- function(dna.sequence)
{
return(c2s(comp(rev(s2c(dna.sequence)),forceToLower = FALSE)))
}
## Load Position Weight Matrices
## Open file connection
con <- file("data/jaspar_motifs/pfm_plants_20180911.txt",open = "r")
## Empty list for storing PWM
motifs.pwm <- vector(mode="list",length = 453)
motif.ids <- vector(mode="character",length=453)
motif.names <- vector(mode="character",length=453)
## Load 64 PWM
for(j in 1:454)
{
## First line contains motif id and name
first.line <- readLines(con,1)
motif.ids[j] <- strsplit(first.line,split=" ")[[1]][1]
motif.names[j] <- strsplit(first.line,split=" ")[[1]][2]
## Next four line contians probabilites for each nucleotide
a.row <- as.numeric(strsplit(readLines(con,1),split="( )+")[[1]])
c.row <- as.numeric(strsplit(readLines(con,1),split="( )+")[[1]])
g.row <- as.numeric(strsplit(readLines(con,1),split="( )+")[[1]])
t.row <- as.numeric(strsplit(readLines(con,1),split="( )+")[[1]])
## Construct PWM
motif.pwm <- matrix(nrow = 4,ncol=length(a.row))
motif.pwm[1,] <- a.row
motif.pwm[2,] <- c.row
motif.pwm[3,] <- g.row
motif.pwm[4,] <- t.row
rownames(motif.pwm) <- c("A","C","G","T")
motifs.pwm[[j]] <- prop.table(motif.pwm,2)
}
## Close file connection
close(con)
## Naming list with PWM
names(motifs.pwm) <- motif.names
names(motif.ids) <- motif.names
## Load bed files for each transcription factor
bed.file.names <- c("data/bed_files/ABF3_ABA_optimal_narrowPeak_p16.bed",
"data/bed_files/ABI5_col_v3h.narrowPeak",
"data/bed_files/ANAC032_ABA_optimal_narrowPeak_p16.bed",
"data/bed_files/ANAC092_ORE1_col_v3a",
"data/bed_files/ANAC102_ABA_optimal_narrowPeak_p16.bed",
"data/bed_files/ATAF1.narrowPeak",
"data/bed_files/ATHB21 col_a.narrowPeak",
"data/bed_files/ATHB40_col_a.narrowPeak",
"data/bed_files/ATHB53_col_a.narrowPeak",
"data/bed_files/brc1_peaks.bed",
"data/bed_files/bZIP52.narrowPeak",
"data/bed_files/DREB2A_ABA_optimal_narrowPeak_p16.bed",
"data/bed_files/erf55.narrowPeak",
"data/bed_files/ERF14.narrowPeak",
"data/bed_files/ERF035 AT3G60490.narrowPeak",
"data/bed_files/ESE3.narrowPeak",
"data/bed_files/GBF2_ABA_optimal_narrowPeak_p16.bed",
"data/bed_files/HAT2.narrowPeak",
"data/bed_files/HB6_ABA_optimal_narrowPeak_p16.bed",
"data/bed_files/HSF6.narrowPeak",
"data/bed_files/HSF7.narrowPeak",
"data/bed_files/MYB3_ABA_optimal_narrowPeak_p16.bed",
"data/bed_files/MYB56.narrowPeak",
"data/bed_files/NAM.narrowPeak",
"data/bed_files/NAP.narrowPeak",
"data/bed_files/OBP4.narrowPeak",
"data/bed_files/PDF2.narrowPeak",
"data/bed_files/SVP.narrowPeak",
"data/bed_files/TCX2.narrowPeak",
"data/bed_files/VIP.narrowPeak",
"data/bed_files/WRKY18.narrowPeak",
"data/bed_files/ZAT6_ABA_optimal_narrowPeak_p16.bed",
"data/bed_files/ZAT10 STZ_col.narrowPeak",
"data/bed_files/hec1.narrowPeak",
"data/bed_files/PIF3.narrowPeak",
"data/bed_files/RGA.bed",
"data/bed_files/IBH1.bed",
"data/bed_files/SPCH.bed",
"data/bed_files/GBF3.bed")
## Load bed files
bed.files <- vector(mode = "list",length = length(bed.file.names))
for(i in 1:length(bed.file.names))
{
bed.files[[i]] <- read.table(file=bed.file.names[i],header = F, as.is = T)
}
names(bed.files) <- c("ABF3", "ABI5", "ANAC032", "NAC6/ORE1", "NAC102", "ATAF1", "HB21",
"HB40", "HB53", "BRC1", "bZIP52", "DREB2A", "ERF055", "ERF014",
"ERF035", "ERF003", "GBF2", "HAT2", "HB6", "HSFB2A","HSFB2B", "MYB3", "MYB56",
"NAC018", "NAP", "DOF5_4", "PDF2", "SVP", "TCX2", "VIP1", "WRKY18", "ZAT6",
"ZAT10", "HEC1", "PIF3","RGA","IBH1", "SPCH","GBF3")
## TF binding sites colors and symbol shapes
symbol.shapes <- c(17, 18, 19, 15)
symbol.color <- c("blue", "red", "darkgreen", "magenta")
## Node colors to represent in the global transcriptional network
node.colors <- network.data$color
## Extract adjacency matrix
adj.global.matrix <- as.matrix(network.data[,tfs.names])
rownames(adj.global.matrix) <- network.data$names
## Function to determine cluster membership
cluster.member <- function(clusters.vector,cluster)
{
return(cluster %in% clusters.vector)
}
## Function to generate output table
create.output.table <- function(input.gene.df,alias,tfs.names)
{
output.selected.genes.df <- data.frame(matrix(nrow=nrow(input.gene.df), ncol=6))
colnames(output.selected.genes.df) <- c("AGI ID", "Gene Name", "Gene Description", "Regulators","log2FC","Cluster")
output.selected.genes.df$`Gene Description` <- input.gene.df$annotation
output.selected.genes.df$log2FC <- input.gene.df$FC
output.selected.genes.df$Cluster <- input.gene.df$cluster
i <- 1
for(i in 1:nrow(output.selected.genes.df))
{
tair.link <- paste0("https://www.arabidopsis.org/servlets/TairObject?type=locus&name=",input.gene.df[i,1])
output.selected.genes.df[i,1] <- paste(c("<a href=\"",
tair.link,
"\" target=\"_blank\">",
input.gene.df[i,1], "</a>"),
collapse="")
output.selected.genes.df[i,2] <- alias[input.gene.df[i,1]]
output.selected.genes.df[i,4] <- paste(tfs.names[which(input.gene.df[i,tfs.names] == 1)],collapse=", ")
}
return(output.selected.genes.df)
}
## Function to generate output table to download
create.downloadable.output.table <- function(input.gene.df,alias,tfs.names)
{
output.selected.genes.df <- data.frame(matrix(nrow=nrow(input.gene.df), ncol=6))
colnames(output.selected.genes.df) <- c("AGI ID", "Gene Name", "Gene Description", "Regulators","log2FC","Cluster")
output.selected.genes.df$`Gene Description` <- input.gene.df$annotation
output.selected.genes.df$log2FC <- input.gene.df$FC
output.selected.genes.df$Cluster <- input.gene.df$cluster
i <- 1
for(i in 1:nrow(output.selected.genes.df))
{
output.selected.genes.df[i,1] <- input.gene.df[i,1]
output.selected.genes.df[i,2] <- alias[input.gene.df[i,1]]
output.selected.genes.df[i,4] <- paste(tfs.names[which(input.gene.df[i,tfs.names] == 1)],collapse=", ")
}
return(output.selected.genes.df)
}
## Auxiliary function to compute enrichments for GO table
compute.enrichments <- function(gene.ratios, bg.ratios)
{
gene.ratios.eval <- sapply(parse(text=gene.ratios),FUN = eval)
bg.ratios.eval <- sapply(parse(text=bg.ratios),FUN = eval)
enrichments <- round(x=gene.ratios.eval/bg.ratios.eval,digits = 2)
enrichments.text <- paste(enrichments, " (", gene.ratios, "; ", bg.ratios, ")",sep="")
return(enrichments.text)
}
#GO links and tair link functions
go.link <- function(go.term)
{
link <- paste0("http://amigo.geneontology.org/amigo/term/", go.term)
complete.link <- paste(c("<a href=\"",
link,
"\" target=\"_blank\">",
go.term, "</a>"),
collapse = "")
return(complete.link)
}
gene.link.function <- function(gene.name)
{
tair.link <- paste(c("https://www.arabidopsis.org/servlets/TairObject?name=",
gene.name,
"&type=locus"),collapse="")
gene.link <- paste(c("<a href=\"",
tair.link,
"\" target=\"_blank\">",
gene.name, "</a>"),
collapse="")
return(gene.link)
}
## KEGG pathway link
kegg.pathway.link <- function(kegg.pathway)
{
link <- paste0("https://www.genome.jp/kegg-bin/show_pathway?",kegg.pathway)
complete.link <- paste(c("<a href=\"",
link,
"\" target=\"_blank\">",
kegg.pathway, "</a>"),
collapse = "")
return(complete.link)
}
## KEGG module link
kegg.module.link <- function(kegg.module)
{
link <- paste0("https://www.genome.jp/kegg-bin/show_module?",kegg.module)
complete.link <- paste(c("<a href=\"",
link,
"\" target=\"_blank\">",
kegg.module, "</a>"),
collapse = "")
return(complete.link)
}
## TFBS jaspar link
tfbs.link <- function(motif.id)
{
link <- paste0("http://jaspar.genereg.net/matrix/",motif.id)
complete.link <- paste(c("<a href=\"",
link,
"\" target=\"_blank\">",
motif.id, "</a>"),
collapse = "")
return(complete.link)
}
server <- function(input, output, session) {
# ## Select all code
# observe({
# if ("Select All Transcription Factors" %in% input$selected.tfs) {
# # choose all the choices _except_ "Select All"
# updateSelectInput(session, "selected.tfs", selected = tfs.names)
# }
# })
## video tutorial
output$video_tutorial <- renderUI({
HTML("<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/aOQ9cZPy1l4\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture\" allowfullscreen></iframe>")
})
## Server side for target.gene selectize
updateSelectizeInput(session,inputId = "target.gene", choices = genes.selectize, server = TRUE)
## Peak visualizer code
output$peak_plot <- renderPlot({
## Select all code
if ("Select All Transcription Factors" %in% input$selected.tfs)
{
# choose all the choices _except_ "Select All"
updateSelectInput(session, "selected.tfs", selected = tfs.names)
currently.selected.tfs <- tfs.names
} else
{
currently.selected.tfs <- input$selected.tfs
}
## Sanity checks
validate(
need(length(input$selected.tfs) > 0 , "Please select a set of transcription factors"),
need(input$target.gene, "Please select a target gene")
)
## Extract target gene annotation
gene.name <- strsplit(input$target.gene,split=" - ")[[1]][1]
target.gene.body <- genes.data[gene.name,]
target.gene.chr <- as.character(target.gene.body$seqnames)
target.gene.start <- target.gene.body$start
target.gene.end <- target.gene.body$end
target.gene.strand <- as.character(target.gene.body$strand)
## Extract cds annotation
cds.data.target.gene <- subset(cds.data, seqnames == target.gene.chr & (start >= target.gene.start & end <= target.gene.end))
## Extract exons annotation
exons.data.target.gene <- subset(exons.data, seqnames == target.gene.chr & (start >= target.gene.start & end <= target.gene.end))
## Determine the genome range to plot including promoter, gene body and 3' UTR
## This depends on whether the gene is on the forward or reverse strand
range.to.plot <- target.gene.body
if(target.gene.strand == "+")
{
range.to.plot$start <- range.to.plot$start - input$promoter.length
range.to.plot$end <- range.to.plot$end + input$threeprime.length
} else if (target.gene.strand == "-")
{
range.to.plot$end <- range.to.plot$end + input$promoter.length
range.to.plot$start <- range.to.plot$start - input$threeprime.length
}
## Compute the length of the genome range to represent
current.length <- range.to.plot$end - range.to.plot$start
## Determine upper limit of the graph
number.tfs <- length(currently.selected.tfs) #length(input$selected.tfs)
upper.lim <- 25 * length(currently.selected.tfs) #length(input$selected.tfs)
## Draw DNA strand
gene.height <- -25
cord.x <- 1:current.length
plot(cord.x, rep(gene.height,length(cord.x)),type="l",col="black",lwd=3,ylab="",
cex.lab=2,axes=FALSE,xlab="",main="",cex.main=2,
ylim=c(-30,25 * length(currently.selected.tfs)), #length(input$selected.tfs)
xlim=c(-3000,max(cord.x)))
## Extract exons for target gene
exons.data.target.gene <- subset(exons.data, seqnames == target.gene.chr & (start >= target.gene.start & end <= target.gene.end))
## Transform exon coordinates to current range
min.pos <- min(exons.data.target.gene$start)
if(target.gene.strand == "+")
{
exons.data.target.gene$start <- exons.data.target.gene$start - min.pos + input$promoter.length
exons.data.target.gene$end <- exons.data.target.gene$end - min.pos + input$promoter.length
} else if(target.gene.strand == "-")
{
exons.data.target.gene$start <- exons.data.target.gene$start - min.pos + input$threeprime.length
exons.data.target.gene$end <- exons.data.target.gene$end - min.pos + input$threeprime.length
}
## Represent exons
exon.width <- 2
for(i in 1:nrow(exons.data.target.gene))
{
# Determine start/end for each exon
current.exon.start <- exons.data.target.gene$start[i]
current.exon.end <- exons.data.target.gene$end[i]
## Determine coordinates for each exon polygon and represent it
exon.x <- c(current.exon.start,current.exon.end,current.exon.end,current.exon.start)
exon.y <- c(gene.height + exon.width, gene.height + exon.width, gene.height - exon.width, gene.height - exon.width)
polygon(x = exon.x, y = exon.y, col = "blue",border = "blue")
}
## Extract cds for target gene
cds.data.target.gene <- subset(cds.data, seqnames == target.gene.chr & (start >= target.gene.start & end <= target.gene.end))
## Transform cds coordinates to current range
if(target.gene.strand == "+")
{
cds.data.target.gene$start <- cds.data.target.gene$start - min.pos + input$promoter.length
cds.data.target.gene$end <- cds.data.target.gene$end - min.pos + input$promoter.length
} else if (target.gene.strand == "-")
{
cds.data.target.gene$start <- cds.data.target.gene$start - min.pos + input$threeprime.length
cds.data.target.gene$end <- cds.data.target.gene$end - min.pos + input$threeprime.length
}
cds.width <- 3
for(i in 1:nrow(cds.data.target.gene))
{
# Determine current cds start/end
current.cds.start <- cds.data.target.gene$start[i]
current.cds.end <- cds.data.target.gene$end[i]
# Determine curret cds coordinates for the polygon and represent it
cds.x <- c(current.cds.start,current.cds.end,current.cds.end,current.cds.start)
cds.y <- c(gene.height + cds.width, gene.height + cds.width, gene.height - cds.width, gene.height - cds.width)
polygon(x = cds.x, y = cds.y, col = "blue",border = "blue")
}
## Draw arrow to represent transcription direction
if(target.gene.strand == "+")
{
lines(c(input$promoter.length,input$promoter.length,input$promoter.length+100),y=c(gene.height,gene.height+5,gene.height+5),lwd=3)
lines(c(input$promoter.length+50,input$promoter.length+100),y=c(gene.height+6,gene.height+5),lwd=3)
lines(c(input$promoter.length+50,input$promoter.length+100),y=c(gene.height+4,gene.height+5),lwd=3)
} else if (target.gene.strand == "-")
{
lines(c(current.length - input$promoter.length, current.length - input$promoter.length, current.length - input$promoter.length-100),y=c(gene.height,gene.height+5,gene.height+5),lwd=3)
lines(c(current.length - input$promoter.length-50, current.length - input$promoter.length - 100),y=c(gene.height + 6, gene.height + 5),lwd=3)
lines(c(current.length - input$promoter.length-50, current.length - input$promoter.length - 100),y=c(gene.height + 4, gene.height + 5),lwd=3)
}
## Draw promoter range
if(target.gene.strand == "+")
{
axis(side = 1,labels = c(- input$promoter.length, - input$promoter.length / 2,"TSS"),at = c(1,input$promoter.length/2,input$promoter.length),lwd=2,cex=1.5,las=2,cex=2)
} else if(target.gene.strand == "-")
{
axis(side = 1,labels = c("TSS",- input$promoter.length / 2,- input$promoter.length),at = c(current.length-input$promoter.length,current.length-input$promoter.length/2, current.length),lwd=2,cex=1.5,las=2,cex=2)
}
selected.bed.files <- bed.files[currently.selected.tfs]#input$selected.tfs]
## Since ChIPpeakAnno needs more than one region to plot our region
## is duplicated
regions.plot <- GRanges(rbind(range.to.plot,range.to.plot))
## Draw peak regions for each TF and determing TF binding sequences
## Determine TFBS motifs to search for and Selecting an
## example motif if the user does not select any of them
if(input$all.motifs)
{
selected.motifs.pwm <- motifs.pwm
} else if(length(input$selected.motifs)==0)
{
selected.motifs.pwm <- motifs.pwm[c("G-box","BRC1")]
}else
{
selected.motifs.pwm <- motifs.pwm[input$selected.motifs]
}
selected.motif.names <- names(selected.motifs.pwm)
selected.motif.ids <- motif.ids[selected.motif.names]
## Initialize data frame containing TF binding sequences in the peak regions
df.hits <- data.frame(0,0,"","","")
colnames(df.hits) <- c("tf_number","position","id","name","seq")
## Width of the rectangle representing the peak region
peak.width <- 1
for(i in 1:length(currently.selected.tfs))#length(input$selected.tfs))
{
current.tf <- currently.selected.tfs[i] #input$selected.tfs[i]
## Extract bed file name 1 and read it
current.peaks <- bed.files[[current.tf]]
#current.peaks <- read.table(file=current.bed.file,header = F, as.is = T)
peak.coordinates <- subset(current.peaks, V1 == range.to.plot$seqnames & V2 >= range.to.plot$start & V3 <= range.to.plot$end)
current.peaks.to.plot <- peak.coordinates[,2:3]
## Transform coordinates
current.peaks.to.plot <- current.peaks.to.plot - range.to.plot$start
## Check if there are peaks for the target gene
if(nrow(current.peaks.to.plot) > 0)
{
## Normalization
#chip.signal.means[i, ] <- 10 * chip.signal.means[i, ] / max(chip.signal.means[i, ])
#motifs.in.peaks <- vector(mode="list", length=nrow(current.peaks.to.plot))
for(j in 1:nrow(current.peaks.to.plot))
{
## Extract start and end point of each peak region
current.peak.start <- current.peaks.to.plot[j,1]
current.peak.end <- current.peaks.to.plot[j,2]
## Computer coordinates for polygon and draw it
peak.x <- c(current.peak.start,current.peak.end,
current.peak.end,current.peak.start)
peak.y <- c(25*(i - 1) - 5 + peak.width, 25*(i - 1) - 5 + peak.width,
25*(i - 1) - 5 - peak.width, 25*(i - 1) - 5 - peak.width)
#polygon(x = peak.x, y = peak.y, col = area.colors[i], border = line.colors[i],lwd=2)
polygon(x = peak.x, y = peak.y, col = tf.colors[current.tf], border = tf.colors[current.tf],lwd=2)
## Identify TF binding DNA motifs
peak.chr <- peak.coordinates[j, 1]
peak.start <- peak.coordinates[j, 2]
peak.end <- peak.coordinates[j, 3]
## Extract peak sequence
if(peak.chr == "1")
{
peak.sequence <- as.character(chr1[peak.start:peak.end])
} else if(peak.chr == "2")
{
peak.sequence <- as.character(chr2[peak.start:peak.end])
} else if(peak.chr == "3")
{
peak.sequence <- as.character(chr3[peak.start:peak.end])
} else if(peak.chr == "4")
{
peak.sequence <- as.character(chr4[peak.start:peak.end])
} else if(peak.chr == "5")
{
peak.sequence <- as.character(chr5[peak.start:peak.end])
}
peak.rev.comp.sequence <- reverse.complement(peak.sequence)