forked from ajeyone/codes2html
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcodes2html.py
executable file
·197 lines (169 loc) · 7.07 KB
/
codes2html.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/usr/bin/env python
import os
import sys
import re
import fnmatch
import argparse
import time
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import get_lexer_for_filename
from pygments.formatters.html import DOC_HEADER
from pygments.formatters.html import DOC_FOOTER
def _parse_args():
parser = argparse.ArgumentParser(
description='A tool to collect codes and highlight syntax in a single html document.')
parser.add_argument('sources', metavar='source', nargs='+',
type=str, help='source code directory or file')
parser.add_argument(
'-e', '--extensions', help='file extensions to be considered as source files. separated with comma or a single "*" indicates all. e.g. "c,cpp,h,m,mm". default is "*"', default='*')
parser.add_argument('-l', '--lines', help='limit the lines of source codes. but the content of a file is always complete, so the final lines may exceed this value. 0 for unlimited. default is 3500', type=int, default='3500')
parser.add_argument(
'-o', '--output', help='output file path. default is output.html', default='output.html')
parser.add_argument('-i', '--ignore', help='path of the ignore file, similar to .gitignore. default is ignore.txt',
default='ignore.txt', dest='ignore_file')
parser.add_argument('-f', '--footer', help='file footer string, you can use </br> to insert lines. default is </br>',
default='</br>', dest='file_footer')
args = parser.parse_args()
sources = []
for s in args.sources:
s = os.path.expanduser(s).rstrip(os.path.sep)
if os.path.exists(s):
sources.append(s)
else:
print('error: "', s, '" does not exists!', sep='')
if len(sources) != len(args.sources):
return None
args.sources = sources
args.output = os.path.expanduser(args.output)
args.ignore_file = os.path.expanduser(args.ignore_file)
args.ignore_patterns = _parse_ignore_file(args.ignore_file)
args.extension_patterns = _parse_extensions(args.extensions)
if args.lines <= 0:
args.lines = 2 ** 31
return args
def _parse_ignore_file(path):
if not os.path.exists(path):
print('warning: no ignore file specified, all codes will be collected')
return []
try:
with open(path, 'r') as fd:
ss = fd.readlines()
ss = [s.strip('\n') for s in ss if len(s.strip('\n')) > 0]
return ss
except:
return []
def _parse_extensions(extensions_string):
if extensions_string.strip() == '*':
return []
array = extensions_string.split(',')
array = ['*.' + e.strip() for e in array if len(e.strip()) > 0]
array = list(set(array)) # remove duplicated values
return array
def _short_class_name(obj):
t = type(obj)
m = re.match(r'<class \'(\w+\.)*(\w+)\'>', str(t))
return m.group(2)
def _match_any_pattern(name, patterns):
for pattern in patterns:
if fnmatch.fnmatch(name, pattern):
return True
return False
class Codes2HtmlTool:
def __init__(self, args):
self.args = args
self.written_lines = 0
with open(args.output, 'w') as write_fd:
self.hf = HtmlFormatter()
self.write_fd = write_fd
write_fd.write(self._header())
for path in args.sources:
self._collect_files(path)
print('total lines: {0}'.format(self.written_lines))
write_fd.write(self._footer())
def _should_ignore_file(self, name):
return _match_any_pattern(name, self.args.ignore_patterns)
def _accept_extension(self, name):
patterns = self.args.extension_patterns
return len(patterns) == 0 or _match_any_pattern(name, patterns)
def _header(self):
return DOC_HEADER % dict(
title=self.hf.title,
styledefs=self.hf.get_style_defs('body'),
encoding=self.hf.encoding)
def _footer(self):
return DOC_FOOTER
def _collect_files(self, path):
subfiles = os.listdir(path)
subfiles.sort()
for subfile in subfiles:
if self.written_lines >= self.args.lines:
break
if subfile.startswith('.'):
continue
full_path = os.path.join(path, subfile)
if self._should_ignore_file(subfile):
print('ignore "', full_path, '"', sep='')
continue
if os.path.isdir(full_path):
self._collect_files(full_path)
elif self._accept_extension(subfile):
self._highlight_and_write_file(full_path)
def _filter_lines(self, lines):
new_lines = []
commentFlag = False
for line in lines:
tmp = line.strip()
if '' == tmp or tmp.startswith('//') or tmp.startswith('#'):
continue
if tmp.startswith('/*') or tmp.startswith('<!--'):
commentFlag = True
if tmp.endswith('*/') or tmp.endswith('-->'):
commentFlag = False
continue
if commentFlag:
continue
idx = line.find('//') # 删除行内注释
# 排除链接URL内带有//的情况
if line[idx-1] == ':':
idx = 0
if idx > 0:
line = line[:idx]
new_lines.append(line)
return new_lines
def _highlight_and_write_file(self, full_path):
write_fd = self.write_fd
hf = self.hf
footer = self.args.file_footer
try:
orig_full_path = full_path
if full_path.endswith('.wxml'):
full_path = full_path.replace('.wxml', '.xml', 1)
if full_path.endswith('.wxss'):
full_path = full_path.replace('.wxss', '.css', 1)
lexer = get_lexer_for_filename(full_path)
full_path = orig_full_path
with open(full_path) as fd:
lines = self._filter_lines(fd.readlines())
# 追加文件名到首行
lines.insert(0, '>>>>' + os.path.basename(full_path) + '\n')
self.written_lines += len(lines) - 1
content = ''.join(lines)
if full_path.endswith('.h'):
# ".h" file is possible to be objective-c.
# guess again with file content to determine the actual syntax
lexer = get_lexer_for_filename(full_path, code=content)
formatted = highlight(content, lexer, hf)
write_fd.write(formatted)
write_fd.write(footer)
print('highlighted with ', _short_class_name(
lexer), ': "', full_path, '"', sep='')
except:
print('not source code: "', full_path, '"', sep='')
if __name__ == "__main__":
t1 = time.time()
args = _parse_args()
if args is not None:
Codes2HtmlTool(args)
t2 = time.time()
print('total time: %.1fs' % (t2 - t1))