-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdb.py
169 lines (139 loc) · 4.77 KB
/
db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import json
import logging
import pathlib
import sqlite3
from collections import namedtuple
from typing import Dict, List, Tuple
logger = logging.getLogger(__name__)
KoboHighlight = namedtuple(
"KoboHighlight",
["start_path", "end_path", "start_offset", "end_offset", "text", "content_path"],
)
CalibreHighlight = namedtuple(
"CalibreHighlight",
[
"book",
"format",
"user_type",
"user",
"timestamp",
"annot_id",
"annot_type",
"highlight",
"searchable_text",
],
)
def get_calibre_book_id(kobo_volume: pathlib.Path, lpath: str) -> int:
"""Get the calibre book id from the lpath of the book in the kobo db."""
calibre_device_metadata = kobo_volume.resolve() / "metadata.calibre"
with open(calibre_device_metadata) as f:
metadata = json.load(f)
target_book = list(
filter(lambda x: x.get("lpath").split("/")[-1] == lpath, metadata)
)[0]
return target_book["application_id"]
def get_likely_book_path_from_calibre(
calibre_db_path: pathlib.Path, kobo_volume: pathlib.Path, book_lpath: str
) -> Tuple[int, str]:
"""Get the likely book path from the calibre db."""
con = sqlite3.connect(calibre_db_path)
cur = con.cursor()
book_lpath = book_lpath.split("/")[-1]
book_id = get_calibre_book_id(kobo_volume, book_lpath)
book_path = cur.execute(f"SELECT path FROM books WHERE id = {book_id}").fetchone()[
0
]
con.close()
return book_id, book_path
def get_dictinct_highlights_from_kobo(
input_kobo_db: pathlib.Path,
) -> Dict[str, List[KoboHighlight]]:
"""Get distinct highlights from the kobo db."""
con = sqlite3.connect(input_kobo_db)
cur = con.cursor()
result = {}
for distinct_name in cur.execute("SELECT DISTINCT `VolumeID` FROM `Bookmark`"):
name = distinct_name[0]
if name not in result:
result[name] = []
cur1 = con.cursor()
for row in cur1.execute(
f"SELECT * FROM `Bookmark` WHERE `VolumeID` = '{name}' AND text !=''"
):
content_path = row[2].split("epub!")[-1]
content_path = content_path.lstrip("!")
content_path = content_path.replace("!", "/")
if "#" in content_path:
content_path = content_path.split("#")[-2]
if not content_path:
continue
highlight = KoboHighlight(
row[3], row[6], row[5], row[8], row[9], content_path
)
result[name].append(highlight)
con.close()
return result
def get_highlights_from_kobo_by_book(
input_kobo_db: pathlib.Path, book: str
) -> List[KoboHighlight]:
"""Get highlights from the kobo db for a specific book."""
con = sqlite3.connect(input_kobo_db)
result = []
cur1 = con.cursor()
for row in cur1.execute(
f"SELECT * FROM `Bookmark` WHERE `VolumeID` = '{book}' AND text !=''"
):
content_path = row[2].split("epub!")[-1]
content_path = content_path.lstrip("!")
content_path = content_path.replace("!", "/")
if "#" in content_path:
content_path = content_path.split("#")[-2]
if not content_path:
continue
highlight = KoboHighlight(row[3], row[6], row[5], row[8], row[9], content_path)
result.append(highlight)
con.close()
return result
def insert_highlights_into_calibre(
output_calibre_db: pathlib.Path, books_highlights: List[CalibreHighlight]
) -> int:
"""Insert highlights into the calibre db."""
con = sqlite3.connect(output_calibre_db)
cur = con.cursor()
actually_inserted_count = 0
for h in books_highlights:
# check for duplicates
if cur.execute(
f"SELECT id from annotations where annot_id = '{h.annot_id}'"
).fetchone():
logger.debug(
f"Annotation with id {h.annot_id} (from {h.book}) already exists in db"
)
continue
query = (
"INSERT INTO annotations( "
"book, format, user_type, user, "
"timestamp, annot_id, annot_type, "
"annot_data, searchable_text) VALUES("
"?, ?, ?, ?, ?, ?, ?, ?, ?)"
)
logger.debug(f"Query: {query}")
cur.execute(
query,
(
h.book,
h.format,
h.user_type,
h.user,
h.timestamp,
h.highlight["uuid"],
h.annot_type,
json.dumps(h.highlight),
h.searchable_text,
),
)
actually_inserted_count += 1
logger.info(f"Inserted {actually_inserted_count} new highlights")
con.commit()
con.close()
return actually_inserted_count