make many converters more robust, i. e. don't fail if a single note c…

…onversion fails
marph91 · Dec 1, 2024 · c58d5e8 · c58d5e8
1 parent 7f9869d
commit c58d5e8
Show file tree

Hide file tree

Showing 20 changed files with 439 additions and 387 deletions.
diff --git a/src/common.py b/src/common.py
@@ -11,6 +11,7 @@
 import tarfile
 import tempfile
 import time
+from typing import Any, Callable, TypeVar, cast
 import uuid
 import zipfile
 
@@ -28,6 +29,29 @@
 ###########################################################
 
 
+F = TypeVar("F", bound=Callable[..., Any])
+
+
+def catch_all_exceptions(func: F) -> F:
+    """
+    Decorator to catch all exceptions.
+    Useful if many individual notes are converted.
+    """
+
+    def wrapper(*args, **kwargs):
+        try:
+            func(*args, **kwargs)
+        except Exception as exc:  # pylint: disable=broad-except
+            LOGGER.warning(
+                "Failed to convert note. "
+                'Enable extended log by "--stdout-log-level DEBUG".'
+            )
+            # https://stackoverflow.com/a/52466005/7410886
+            LOGGER.debug(exc, exc_info=True)
+
+    return cast(F, wrapper)
+
+
 def safe_path(path: Path | str, max_name_length: int = 50) -> Path | str:
     r"""
     Return a safe version of the provided path or string.

diff --git a/src/formats/cherrytree.py b/src/formats/cherrytree.py
@@ -184,6 +184,7 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.bookmarked_nodes = []
 
+    @common.catch_all_exceptions
     def convert_to_markdown(self, node, root_notebook):
         # TODO
         # pylint: disable=too-many-locals

diff --git a/src/formats/day_one.py b/src/formats/day_one.py
@@ -9,7 +9,7 @@
 import markdown_lib.common
 
 
-def guess_title(body):
+def guess_title(body: str) -> str:
     for line in body.split("\n"):
         if line.startswith("!["):
             continue

diff --git a/src/formats/dynalist.py b/src/formats/dynalist.py
@@ -30,36 +30,36 @@ def handle_markdown_links(body: str, root_folder: Path) -> imf.NoteLinks:
 class Converter(converter.BaseConverter):
     accepted_extensions = [".zip"]
 
-    def convert(self, file_or_folder: Path):
-        self.convert_folder(self.root_path, self.root_notebook)
+    @common.catch_all_exceptions
+    def convert_file(self, item: Path, parent: imf.Notebook):
+        # We get a zip with opml and txt. Only advantage of opml over txt is
+        # the owner attribute. So just use txt, because it's simpler.
+        # opml is supported by pandoc, but the import is not working properly.
+        if item.suffix.lower() != ".txt":
+            return
+        title = item.stem
+        self.logger.debug(f'Converting note "{title}"')
+
+        note_imf = imf.Note(
+            title,
+            item.read_text(encoding="utf-8"),
+            source_application=self.format,
+        )
+        note_imf.tags = [
+            imf.Tag(tag)
+            for tag in markdown_lib.common.get_inline_tags(note_imf.body, ["#", "@"])
+        ]
+        note_imf.note_links = handle_markdown_links(note_imf.body, self.root_path)
+        parent.child_notes.append(note_imf)
 
     def convert_folder(self, folder: Path, parent: imf.Notebook):
         for item in sorted(folder.iterdir()):
             if item.is_file():
-                # We get a zip with opml and txt. Only advantage of opml over txt is
-                # the owner attribute. So just use txt, because it's simpler.
-                # opml is supported by pandoc, but the import is not working properly.
-                if item.suffix.lower() != ".txt":
-                    continue
-                title = item.stem
-                self.logger.debug(f'Converting note "{title}"')
-
-                note_imf = imf.Note(
-                    title,
-                    item.read_text(encoding="utf-8"),
-                    source_application=self.format,
-                )
-                note_imf.tags = [
-                    imf.Tag(tag)
-                    for tag in markdown_lib.common.get_inline_tags(
-                        note_imf.body, ["#", "@"]
-                    )
-                ]
-                note_imf.note_links = handle_markdown_links(
-                    note_imf.body, self.root_path
-                )
-                parent.child_notes.append(note_imf)
+                self.convert_file(item, parent)
             else:
                 new_parent = imf.Notebook(item.name)
                 self.convert_folder(item, new_parent)
                 parent.child_notebooks.append(new_parent)
+
+    def convert(self, file_or_folder: Path):
+        self.convert_folder(self.root_path, self.root_notebook)
diff --git a/src/formats/evernote.py b/src/formats/evernote.py
@@ -87,6 +87,7 @@ def link_notes_by_title(self, root_notebook: imf.Notebook | None = None):
         for notebook in root_notebook.child_notebooks:
             self.link_notes_by_title(notebook)
 
+    @common.catch_all_exceptions
     def convert_single_enex(self, file_or_folder: Path, parent_notebook: imf.Notebook):
         self.logger.debug(f'Converting file "{file_or_folder.name}"')
         try:

diff --git a/src/formats/google_keep.py b/src/formats/google_keep.py
@@ -11,49 +11,49 @@
 class Converter(converter.BaseConverter):
     accepted_extensions = [".tgz", ".zip"]
 
+    @common.catch_all_exceptions
+    def convert_file(self, file_: Path):
+        note_keep = json.loads(file_.read_text(encoding="utf-8"))
+
+        title = note_keep.get("title", "")
+        self.logger.debug(f'Converting note "{title}"')
+
+        tags_keep = [
+            label["name"] for label in note_keep.get("labels", []) if "name" in label
+        ]
+        if note_keep.get("isPinned"):
+            tags_keep.append("google-keep-pinned")
+
+        resources_keep = []
+        for resource_keep in note_keep.get("attachments", []):
+            resources_keep.append(
+                imf.Resource(file_.parent.absolute() / resource_keep["filePath"])
+            )
+
+        # fall back to HTML if there is no plain text
+        body = note_keep.get("textContent", note_keep.get("textContentHtml", ""))
+        if (annotations := note_keep.get("annotations")) is not None:
+            annotations_md = ["", "", "## Annotations", ""]
+            for annotation in annotations:
+                annotations_md.append(f"- <{annotation["url"]}>: {annotation["title"]}")
+            annotations_md.append("")  # newline at the end
+            body += "\n".join(annotations_md)
+
+        note_imf = imf.Note(
+            title,
+            body,
+            source_application=self.format,
+            # Labels / tags don't have a separate id. Just use the name as id.
+            tags=[imf.Tag(tag) for tag in tags_keep],
+            resources=resources_keep,
+        )
+        if (value := note_keep.get("createdTimestampUsec")) is not None:
+            note_imf.created = common.timestamp_to_datetime(value // (10**6))
+        if (value := note_keep.get("userEditedTimestampUsec")) is not None:
+            note_imf.updated = common.timestamp_to_datetime(value // (10**6))
+        self.root_notebook.child_notes.append(note_imf)
+
     def convert(self, file_or_folder: Path):
         # take only the exports in json format
         for file_ in sorted(self.root_path.rglob("*.json")):
-            note_keep = json.loads(Path(file_).read_text(encoding="utf-8"))
-
-            title = note_keep.get("title", "")
-            self.logger.debug(f'Converting note "{title}"')
-
-            tags_keep = [
-                label["name"]
-                for label in note_keep.get("labels", [])
-                if "name" in label
-            ]
-            if note_keep.get("isPinned"):
-                tags_keep.append("google-keep-pinned")
-
-            resources_keep = []
-            for resource_keep in note_keep.get("attachments", []):
-                resources_keep.append(
-                    imf.Resource(file_.parent.absolute() / resource_keep["filePath"])
-                )
-
-            # fall back to HTML if there is no plain text
-            body = note_keep.get("textContent", note_keep.get("textContentHtml", ""))
-            if (annotations := note_keep.get("annotations")) is not None:
-                annotations_md = ["", "", "## Annotations", ""]
-                for annotation in annotations:
-                    annotations_md.append(
-                        f"- <{annotation["url"]}>: {annotation["title"]}"
-                    )
-                annotations_md.append("")  # newline at the end
-                body += "\n".join(annotations_md)
-
-            note_imf = imf.Note(
-                title,
-                body,
-                source_application=self.format,
-                # Labels / tags don't have a separate id. Just use the name as id.
-                tags=[imf.Tag(tag) for tag in tags_keep],
-                resources=resources_keep,
-            )
-            if (value := note_keep.get("createdTimestampUsec")) is not None:
-                note_imf.created = common.timestamp_to_datetime(value // (10**6))
-            if (value := note_keep.get("userEditedTimestampUsec")) is not None:
-                note_imf.updated = common.timestamp_to_datetime(value // (10**6))
-            self.root_notebook.child_notes.append(note_imf)
+            self.convert_file(file_)
diff --git a/src/formats/jrnl.py b/src/formats/jrnl.py
@@ -4,33 +4,38 @@
 from pathlib import Path
 import json
 
+import common
 import converter
 import intermediate_format as imf
 
 
 class Converter(converter.BaseConverter):
     accepted_extensions = [".json"]
 
+    @common.catch_all_exceptions
+    def convert_note(self, note_jrnl):
+        title = f"{note_jrnl['date']} {note_jrnl['time']} {note_jrnl['title']}"
+        self.logger.debug(f'Converting note "{title}"')
+
+        unix_time = dt.datetime.fromisoformat(
+            f"{note_jrnl['date']}T{note_jrnl['time']}"
+        )
+
+        tags = [tag.lstrip("@") for tag in note_jrnl["tags"]]
+        if note_jrnl["starred"]:
+            tags.append("jrnl-starred")
+
+        note_imf = imf.Note(
+            title,
+            note_jrnl["body"],
+            created=unix_time,
+            updated=unix_time,
+            source_application=self.format,
+            tags=[imf.Tag(tag) for tag in tags],
+        )
+        self.root_notebook.child_notes.append(note_imf)
+
     def convert(self, file_or_folder: Path):
         file_dict = json.loads(file_or_folder.read_text(encoding="utf-8"))
         for note_jrnl in file_dict.get("entries", []):
-            title = f"{note_jrnl['date']} {note_jrnl['time']} {note_jrnl['title']}"
-            self.logger.debug(f'Converting note "{title}"')
-
-            unix_time = dt.datetime.fromisoformat(
-                f"{note_jrnl['date']}T{note_jrnl['time']}"
-            )
-
-            tags = [tag.lstrip("@") for tag in note_jrnl["tags"]]
-            if note_jrnl["starred"]:
-                tags.append("jrnl-starred")
-
-            note_imf = imf.Note(
-                title,
-                note_jrnl["body"],
-                created=unix_time,
-                updated=unix_time,
-                source_application=self.format,
-                tags=[imf.Tag(tag) for tag in tags],
-            )
-            self.root_notebook.child_notes.append(note_imf)
+            self.convert_note(note_jrnl)
diff --git a/src/formats/nimbus_note.py b/src/formats/nimbus_note.py
@@ -93,31 +93,33 @@ def handle_markdown_links(self, note_body: str, root_folder: Path) -> imf.Resour
                 )
         return resources
 
+    @common.catch_all_exceptions
+    def convert_file(self, file_: Path, temp_folder: Path):
+        title = file_.stem
+        self.logger.debug(f'Converting note "{title}"')
+        temp_folder_note = temp_folder / file_.stem
+        temp_folder_note.mkdir()
+        common.extract_zip(file_, temp_folder=temp_folder_note)
+
+        # HTML note seems to have the name "note.html" always
+        note_body_html = (temp_folder_note / "note.html").read_text(encoding="utf-8")
+
+        soup = BeautifulSoup(note_body_html, "html.parser")
+        streamline_tables(soup)
+        streamline_lists(soup)
+
+        note_body_markdown = markdown_lib.common.markup_to_markdown(str(soup))
+        resources = self.handle_markdown_links(note_body_markdown, temp_folder_note)
+        note_imf = imf.Note(
+            title,
+            note_body_markdown.strip(),
+            source_application=self.format,
+            resources=resources,
+        )
+        self.root_notebook.child_notes.append(note_imf)
+
     def convert(self, file_or_folder: Path):
         temp_folder = common.get_temp_folder()
 
         for file_ in sorted(file_or_folder.rglob("*.zip")):
-            title = file_.stem
-            self.logger.debug(f'Converting note "{title}"')
-            temp_folder_note = temp_folder / file_.stem
-            temp_folder_note.mkdir()
-            common.extract_zip(file_, temp_folder=temp_folder_note)
-
-            # HTML note seems to have the name "note.html" always
-            note_body_html = (temp_folder_note / "note.html").read_text(
-                encoding="utf-8"
-            )
-
-            soup = BeautifulSoup(note_body_html, "html.parser")
-            streamline_tables(soup)
-            streamline_lists(soup)
-
-            note_body_markdown = markdown_lib.common.markup_to_markdown(str(soup))
-            resources = self.handle_markdown_links(note_body_markdown, temp_folder_note)
-            note_imf = imf.Note(
-                title,
-                note_body_markdown.strip(),
-                source_application=self.format,
-                resources=resources,
-            )
-            self.root_notebook.child_notes.append(note_imf)
+            self.convert_file(file_, temp_folder)
diff --git a/src/formats/notion.py b/src/formats/notion.py
@@ -70,6 +70,7 @@ def handle_markdown_links(
                 self.logger.debug(f'Unhandled link "{link}"')
         return resources, note_links
 
+    @common.catch_all_exceptions
     def convert_directory(self, parent_notebook):
         relative_parent_path = self.id_path_map[parent_notebook.original_id]