Skip to content

Commit

Permalink
make many converters more robust, i. e. don't fail if a single note c…
Browse files Browse the repository at this point in the history
…onversion fails
  • Loading branch information
marph91 committed Dec 1, 2024
1 parent 7f9869d commit c58d5e8
Show file tree
Hide file tree
Showing 20 changed files with 439 additions and 387 deletions.
24 changes: 24 additions & 0 deletions src/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import tarfile
import tempfile
import time
from typing import Any, Callable, TypeVar, cast
import uuid
import zipfile

Expand All @@ -28,6 +29,29 @@
###########################################################


F = TypeVar("F", bound=Callable[..., Any])


def catch_all_exceptions(func: F) -> F:
"""
Decorator to catch all exceptions.
Useful if many individual notes are converted.
"""

def wrapper(*args, **kwargs):
try:
func(*args, **kwargs)
except Exception as exc: # pylint: disable=broad-except
LOGGER.warning(
"Failed to convert note. "
'Enable extended log by "--stdout-log-level DEBUG".'
)
# https://stackoverflow.com/a/52466005/7410886
LOGGER.debug(exc, exc_info=True)

return cast(F, wrapper)


def safe_path(path: Path | str, max_name_length: int = 50) -> Path | str:
r"""
Return a safe version of the provided path or string.
Expand Down
1 change: 1 addition & 0 deletions src/formats/cherrytree.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.bookmarked_nodes = []

@common.catch_all_exceptions
def convert_to_markdown(self, node, root_notebook):
# TODO
# pylint: disable=too-many-locals
Expand Down
2 changes: 1 addition & 1 deletion src/formats/day_one.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import markdown_lib.common


def guess_title(body):
def guess_title(body: str) -> str:
for line in body.split("\n"):
if line.startswith("!["):
continue
Expand Down
50 changes: 25 additions & 25 deletions src/formats/dynalist.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,36 +30,36 @@ def handle_markdown_links(body: str, root_folder: Path) -> imf.NoteLinks:
class Converter(converter.BaseConverter):
accepted_extensions = [".zip"]

def convert(self, file_or_folder: Path):
self.convert_folder(self.root_path, self.root_notebook)
@common.catch_all_exceptions
def convert_file(self, item: Path, parent: imf.Notebook):
# We get a zip with opml and txt. Only advantage of opml over txt is
# the owner attribute. So just use txt, because it's simpler.
# opml is supported by pandoc, but the import is not working properly.
if item.suffix.lower() != ".txt":
return
title = item.stem
self.logger.debug(f'Converting note "{title}"')

note_imf = imf.Note(
title,
item.read_text(encoding="utf-8"),
source_application=self.format,
)
note_imf.tags = [
imf.Tag(tag)
for tag in markdown_lib.common.get_inline_tags(note_imf.body, ["#", "@"])
]
note_imf.note_links = handle_markdown_links(note_imf.body, self.root_path)
parent.child_notes.append(note_imf)

def convert_folder(self, folder: Path, parent: imf.Notebook):
for item in sorted(folder.iterdir()):
if item.is_file():
# We get a zip with opml and txt. Only advantage of opml over txt is
# the owner attribute. So just use txt, because it's simpler.
# opml is supported by pandoc, but the import is not working properly.
if item.suffix.lower() != ".txt":
continue
title = item.stem
self.logger.debug(f'Converting note "{title}"')

note_imf = imf.Note(
title,
item.read_text(encoding="utf-8"),
source_application=self.format,
)
note_imf.tags = [
imf.Tag(tag)
for tag in markdown_lib.common.get_inline_tags(
note_imf.body, ["#", "@"]
)
]
note_imf.note_links = handle_markdown_links(
note_imf.body, self.root_path
)
parent.child_notes.append(note_imf)
self.convert_file(item, parent)
else:
new_parent = imf.Notebook(item.name)
self.convert_folder(item, new_parent)
parent.child_notebooks.append(new_parent)

def convert(self, file_or_folder: Path):
self.convert_folder(self.root_path, self.root_notebook)
1 change: 1 addition & 0 deletions src/formats/evernote.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def link_notes_by_title(self, root_notebook: imf.Notebook | None = None):
for notebook in root_notebook.child_notebooks:
self.link_notes_by_title(notebook)

@common.catch_all_exceptions
def convert_single_enex(self, file_or_folder: Path, parent_notebook: imf.Notebook):
self.logger.debug(f'Converting file "{file_or_folder.name}"')
try:
Expand Down
86 changes: 43 additions & 43 deletions src/formats/google_keep.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,49 +11,49 @@
class Converter(converter.BaseConverter):
accepted_extensions = [".tgz", ".zip"]

@common.catch_all_exceptions
def convert_file(self, file_: Path):
note_keep = json.loads(file_.read_text(encoding="utf-8"))

title = note_keep.get("title", "")
self.logger.debug(f'Converting note "{title}"')

tags_keep = [
label["name"] for label in note_keep.get("labels", []) if "name" in label
]
if note_keep.get("isPinned"):
tags_keep.append("google-keep-pinned")

resources_keep = []
for resource_keep in note_keep.get("attachments", []):
resources_keep.append(
imf.Resource(file_.parent.absolute() / resource_keep["filePath"])
)

# fall back to HTML if there is no plain text
body = note_keep.get("textContent", note_keep.get("textContentHtml", ""))
if (annotations := note_keep.get("annotations")) is not None:
annotations_md = ["", "", "## Annotations", ""]
for annotation in annotations:
annotations_md.append(f"- <{annotation["url"]}>: {annotation["title"]}")
annotations_md.append("") # newline at the end
body += "\n".join(annotations_md)

note_imf = imf.Note(
title,
body,
source_application=self.format,
# Labels / tags don't have a separate id. Just use the name as id.
tags=[imf.Tag(tag) for tag in tags_keep],
resources=resources_keep,
)
if (value := note_keep.get("createdTimestampUsec")) is not None:
note_imf.created = common.timestamp_to_datetime(value // (10**6))
if (value := note_keep.get("userEditedTimestampUsec")) is not None:
note_imf.updated = common.timestamp_to_datetime(value // (10**6))
self.root_notebook.child_notes.append(note_imf)

def convert(self, file_or_folder: Path):
# take only the exports in json format
for file_ in sorted(self.root_path.rglob("*.json")):
note_keep = json.loads(Path(file_).read_text(encoding="utf-8"))

title = note_keep.get("title", "")
self.logger.debug(f'Converting note "{title}"')

tags_keep = [
label["name"]
for label in note_keep.get("labels", [])
if "name" in label
]
if note_keep.get("isPinned"):
tags_keep.append("google-keep-pinned")

resources_keep = []
for resource_keep in note_keep.get("attachments", []):
resources_keep.append(
imf.Resource(file_.parent.absolute() / resource_keep["filePath"])
)

# fall back to HTML if there is no plain text
body = note_keep.get("textContent", note_keep.get("textContentHtml", ""))
if (annotations := note_keep.get("annotations")) is not None:
annotations_md = ["", "", "## Annotations", ""]
for annotation in annotations:
annotations_md.append(
f"- <{annotation["url"]}>: {annotation["title"]}"
)
annotations_md.append("") # newline at the end
body += "\n".join(annotations_md)

note_imf = imf.Note(
title,
body,
source_application=self.format,
# Labels / tags don't have a separate id. Just use the name as id.
tags=[imf.Tag(tag) for tag in tags_keep],
resources=resources_keep,
)
if (value := note_keep.get("createdTimestampUsec")) is not None:
note_imf.created = common.timestamp_to_datetime(value // (10**6))
if (value := note_keep.get("userEditedTimestampUsec")) is not None:
note_imf.updated = common.timestamp_to_datetime(value // (10**6))
self.root_notebook.child_notes.append(note_imf)
self.convert_file(file_)
45 changes: 25 additions & 20 deletions src/formats/jrnl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,38 @@
from pathlib import Path
import json

import common
import converter
import intermediate_format as imf


class Converter(converter.BaseConverter):
accepted_extensions = [".json"]

@common.catch_all_exceptions
def convert_note(self, note_jrnl):
title = f"{note_jrnl['date']} {note_jrnl['time']} {note_jrnl['title']}"
self.logger.debug(f'Converting note "{title}"')

unix_time = dt.datetime.fromisoformat(
f"{note_jrnl['date']}T{note_jrnl['time']}"
)

tags = [tag.lstrip("@") for tag in note_jrnl["tags"]]
if note_jrnl["starred"]:
tags.append("jrnl-starred")

note_imf = imf.Note(
title,
note_jrnl["body"],
created=unix_time,
updated=unix_time,
source_application=self.format,
tags=[imf.Tag(tag) for tag in tags],
)
self.root_notebook.child_notes.append(note_imf)

def convert(self, file_or_folder: Path):
file_dict = json.loads(file_or_folder.read_text(encoding="utf-8"))
for note_jrnl in file_dict.get("entries", []):
title = f"{note_jrnl['date']} {note_jrnl['time']} {note_jrnl['title']}"
self.logger.debug(f'Converting note "{title}"')

unix_time = dt.datetime.fromisoformat(
f"{note_jrnl['date']}T{note_jrnl['time']}"
)

tags = [tag.lstrip("@") for tag in note_jrnl["tags"]]
if note_jrnl["starred"]:
tags.append("jrnl-starred")

note_imf = imf.Note(
title,
note_jrnl["body"],
created=unix_time,
updated=unix_time,
source_application=self.format,
tags=[imf.Tag(tag) for tag in tags],
)
self.root_notebook.child_notes.append(note_imf)
self.convert_note(note_jrnl)
50 changes: 26 additions & 24 deletions src/formats/nimbus_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,31 +93,33 @@ def handle_markdown_links(self, note_body: str, root_folder: Path) -> imf.Resour
)
return resources

@common.catch_all_exceptions
def convert_file(self, file_: Path, temp_folder: Path):
title = file_.stem
self.logger.debug(f'Converting note "{title}"')
temp_folder_note = temp_folder / file_.stem
temp_folder_note.mkdir()
common.extract_zip(file_, temp_folder=temp_folder_note)

# HTML note seems to have the name "note.html" always
note_body_html = (temp_folder_note / "note.html").read_text(encoding="utf-8")

soup = BeautifulSoup(note_body_html, "html.parser")
streamline_tables(soup)
streamline_lists(soup)

note_body_markdown = markdown_lib.common.markup_to_markdown(str(soup))
resources = self.handle_markdown_links(note_body_markdown, temp_folder_note)
note_imf = imf.Note(
title,
note_body_markdown.strip(),
source_application=self.format,
resources=resources,
)
self.root_notebook.child_notes.append(note_imf)

def convert(self, file_or_folder: Path):
temp_folder = common.get_temp_folder()

for file_ in sorted(file_or_folder.rglob("*.zip")):
title = file_.stem
self.logger.debug(f'Converting note "{title}"')
temp_folder_note = temp_folder / file_.stem
temp_folder_note.mkdir()
common.extract_zip(file_, temp_folder=temp_folder_note)

# HTML note seems to have the name "note.html" always
note_body_html = (temp_folder_note / "note.html").read_text(
encoding="utf-8"
)

soup = BeautifulSoup(note_body_html, "html.parser")
streamline_tables(soup)
streamline_lists(soup)

note_body_markdown = markdown_lib.common.markup_to_markdown(str(soup))
resources = self.handle_markdown_links(note_body_markdown, temp_folder_note)
note_imf = imf.Note(
title,
note_body_markdown.strip(),
source_application=self.format,
resources=resources,
)
self.root_notebook.child_notes.append(note_imf)
self.convert_file(file_, temp_folder)
1 change: 1 addition & 0 deletions src/formats/notion.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def handle_markdown_links(
self.logger.debug(f'Unhandled link "{link}"')
return resources, note_links

@common.catch_all_exceptions
def convert_directory(self, parent_notebook):
relative_parent_path = self.id_path_map[parent_notebook.original_id]

Expand Down
Loading

0 comments on commit c58d5e8

Please sign in to comment.