-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHTMLizer.py
160 lines (131 loc) · 7.13 KB
/
HTMLizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!bin/usr/python
# -*- coding: utf-8 -*-.
# Adrian Garcia Moreno
# HTMLizer
import io
import re
import os
import subprocess
import sys
from input_checker import input_checker
def tabloider(go_result_table, summary_warning = ""):
handle = go_result_table.split("\n")
go_result_table_htmlizer = ""
for line in handle:
if line[0] != "#":
bytabs = line.split("\t")
bytabs[0] = "<tr><td>{}".format(bytabs[0])
bytabs[-1] = "{}</td></tr>\n".format(bytabs[-1])
go_result_table_htmlizer += "</td><td>".join(bytabs)
else:
go_result_table_htmlizer += line+"\n"
if summary_warning != "":
summary_warning = "<tr><td colspan=6>{}</td></tr>\n".format(summary_warning)
go_result_table_htmlizer = "{}{}</table>".format(go_result_table_htmlizer, summary_warning)
return go_result_table_htmlizer
def urlizer(full_text, input_folder_or_file, GOus_dict):
regions_urlbase = '<a href= "https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&position={}%3A{}-{}" target=new>{}</a>'#.format(chro,start,end, region)
HP_urlbase = '<h1><a href= "http://www.human-phenotype-ontology.org/hpoweb?id={}" target=new>{}</a></h1>'#.format(HP, HP)
GO_urlbase = '<a href= "http://amigo.geneontology.org/amigo/term/{}" target=new>{}</a>'#.format(GOID, GOID)
geneID_urlbase = '<a href= "https://www.ncbi.nlm.nih.gov/gene/{}" style="color:#7a1919;background-color:#ffffa0" target=new>{}</a>'#.format(GeneID, GeneID)
full_graph_base = '<a href= "http://amigo.geneontology.org/visualize?format=png&term_data_type=string&mode=amigo&term_data={}" target=new>AmiGO GRAPH</a>'#.format(GOus_dict[lines[line][3]])
caption_base = '<table>\n<caption>{} {} <a href=..{} target=new>TogGO GRAPH</a></caption>\n'#.format(full_graph, lines[line], full_path_name)
sig_genes_base = '<a href=..{} target=new>{}</a>'
regions_match = re.compile(r"^chr.+")
GeneID_match = re.compile(r"^(\d+)")
caption_match = re.compile(r"^###+.*")
HP_match = re.compile(r"(HP:[0-9]+)")
GOID_match = re.compile(r"(GO:[0-9]+).*")
lines = full_text.split("\n")
for line in xrange(len(lines)):
if regions_match.search(lines[line]) is not None:
chro, start, end, score = lines[line].split("\t")
region = "\t".join([chro, start, end])
lines[line] = regions_urlbase.format(chro,start,end, region)+"\t"+score
elif caption_match.search(lines[line]) is not None:
full_path_name = subprocess.check_output("find {}../*GOanalysis/{}/{}*.pdf".format(input_folder_or_file, feature_name, lines[line][3]), shell=True).rstrip("\n")
full_path_name = full_path_name.split("..")[-1] # this is to normalise the path and later i will add the .. in the substitution
full_graph = full_graph_base.format(GOus_dict[lines[line][3]])
lines[line] = caption_base.format(full_graph, lines[line], full_path_name)
elif GeneID_match.match(lines[line]) is not None:
GeneID = GeneID_match.match(lines[line]).group(1)
lines[line] = geneID_urlbase.format(GeneID,GeneID)
elif HP_match.search(lines[line]) is not None:
HP = HP_match.search(lines[line]).group(1)
lines[line] = re.sub(HP, HP_urlbase.format(HP, HP), lines[line])
elif GOID_match.search(lines[line]) is not None:
GOID = GOID_match.search(lines[line]).group(1)
lines[line] = re.sub(GOID, GO_urlbase.format(GOID, GOID), lines[line])
full_path_name = subprocess.check_output("find {}/../*GOanalysis/{}/htmls/{}.html".format(input_folder_or_file, feature_name, GOID), shell=True).rstrip("\n")
full_path_name = full_path_name.split("..")[-1]
GO_elements = lines[line].split("</td><td>")
GO_elements[3] = sig_genes_base.format(full_path_name, GO_elements[3])
lines[line] = "</td><td>".join(GO_elements)
full_text = "\n".join(lines)
return full_text
input_folder_or_file = sys.argv[1]
try:
os.mkdir("{}../htmls".format(input_folder_or_file))
except:
answer = raw_input("Folder already exists, some files could be overwritten\nDo you want to continue? (Y/N)")
if answer == "N":
sys.exit()
paths, input_files = input_checker(input_folder_or_file)
if os.path.isfile(input_files[0]) and len(input_files) == 1:
path = paths[0]
else:
path = paths[0]
input_files = [paths[0]+bed_file for bed_file in input_files]
files_toprocess = len(input_files)
counter = 1
for input_summedup in input_files:
print "HTMLizing {}\t{} of {}".format(input_summedup, counter, files_toprocess)
counter += 1
output_html = os.path.splitext(os.path.basename(input_summedup))[0]
results_handle = open(input_summedup, "r")
feature_name = results_handle.readline().rstrip()
results_handle.seek(0)
full_text = results_handle.read()
parts = full_text.split("\n\n")
GOus_dict = {}
GOID_match = re.compile(r"(GO:[0-9]+).*")
if len(parts) > 1 and parts[-2][0] == "#":
GOus = GOID_match.findall(parts[-4])
if len(GOus) == 0:
summary_warning = "No GO.ID passed the summary threshold, you can still check the results of the TopGO GRAPH"
else:
summary_warning = ""
GO_category = parts[-4][3]
GOus_dict[GO_category] = re.sub(":", "%3A", " ".join(GOus))
parts[-4] = tabloider(parts[-4], summary_warning)
GOus = GOID_match.findall(parts[-3])
if len(GOus) == 0:
summary_warning = "No GO.ID passed the summary threshold, you can still check the results of the TopGO GRAPH"
else:
summary_warning = ""
GO_category = parts[-3][3]
GOus_dict[GO_category] = re.sub(":", "%3A", " ".join(GOus))
parts[-3] = tabloider(parts[-3], summary_warning)
GOus = GOID_match.findall(parts[-2])
if len(GOus) == 0:
summary_warning = "No GO.ID passed the summary threshold, you can still check the results of the TopGO GRAPH"
else:
summary_warning = ""
GO_category = parts[-2][3]
GOus_dict[GO_category] = re.sub(":", "%3A", " ".join(GOus))
parts[-2] = tabloider(parts[-2], summary_warning)
full_text = "\n\n".join(parts)
#print GOus_dict
full_text = urlizer(full_text, path, GOus_dict)
HTML_heading = "<!DOCTYPE html>\n<html>\n<head>\n<title>{}</title>\
\n<style>\ntable, th, td {{border:1px solid black; border-collapse:\
collapse;}}\ncapth, td {{padding: 5px;}}\n</style></head>\n<body>\n<pre>".format(feature_name)
HTML_footing = "</pre>\n</body>\n</html>"
htmlized = open("{}../htmls/{}.html".format(path, output_html), "w")
htmlized.write(HTML_heading)
htmlized.write(full_text)
htmlized.write(HTML_footing)
htmlized.close()
# file:///home/bioinfo/Desktop/TFM/Study/hyI/Study/hyI/hg19_ncbi_run/annotation_results/hg19_enrichment_GOanalysis/results/HP:0100704/MFgraph_HP:0100704_classic_10_all.pdf
# ../Study/hyI/hg19_ncbi_run/summed_up_annotation/
#os.system("mv *.html htmls/")