-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathez_epub_example.py
63 lines (58 loc) · 2 KB
/
ez_epub_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# -*- coding: utf-8 -*-
import re
import ez_epub
def formatParagraph(paragraph):
paragraph = paragraph.replace('--', '¡ª')
paragraph = re.sub(r' +', ' ', paragraph)
paragraph = re.sub(r'_(.+?)_', r'<em>\1</em>', paragraph)
return segmentParagraph(paragraph)
def segmentParagraph(paragraph):
segments = []
textStart = 0
style = []
for match in re.finditer(r'<(/?)([^>]+)>', paragraph):
if match.start() > textStart:
segments.append((paragraph[textStart : match.start()], ' '.join(style)))
if match.group(1) == '':
style.append(match.group(2))
else:
style.remove(match.group(2))
textStart = match.end()
if textStart < len(paragraph):
segments.append((paragraph[textStart :], ' '.join(style)))
return segments
def parseBook(path, startLineNum, endLineNum):
PATTERN = re.compile(r'Chapter \d+$')
sections = []
paragraph = ''
fin = open(path)
lineNum = 0
for line in fin:
lineNum += 1
if lineNum < startLineNum:
continue
if endLineNum > 0 and lineNum > endLineNum:
break
line = line.strip()
if PATTERN.match(line):
section = ez_epub.Section()
section.css = """.em { font-style: italic; }"""
section.title = line
sections.append(section)
elif line == '':
if paragraph != '':
section.text.append(formatParagraph(paragraph))
paragraph = ''
else:
if paragraph != '':
paragraph += ' '
paragraph += line
if paragraph != '':
section.text.append(formatParagraph(paragraph))
return sections
if __name__ == '__main__':
book = ez_epub.Book()
book.title = 'Pride and Prejudice'
book.authors = ['Jane Austen']
book.sections = parseBook(r'D:\epub\1342.txt', 38, 13061)
book.make(r'D:\epub\%s' % book.title)