diff --git a/src/common.py b/src/common.py index cdb2dc0..6036eb3 100644 --- a/src/common.py +++ b/src/common.py @@ -11,6 +11,7 @@ import tarfile import tempfile import time +from typing import Any, Callable, TypeVar, cast import uuid import zipfile @@ -28,6 +29,29 @@ ########################################################### +F = TypeVar("F", bound=Callable[..., Any]) + + +def catch_all_exceptions(func: F) -> F: + """ + Decorator to catch all exceptions. + Useful if many individual notes are converted. + """ + + def wrapper(*args, **kwargs): + try: + func(*args, **kwargs) + except Exception as exc: # pylint: disable=broad-except + LOGGER.warning( + "Failed to convert note. " + 'Enable extended log by "--stdout-log-level DEBUG".' + ) + # https://stackoverflow.com/a/52466005/7410886 + LOGGER.debug(exc, exc_info=True) + + return cast(F, wrapper) + + def safe_path(path: Path | str, max_name_length: int = 50) -> Path | str: r""" Return a safe version of the provided path or string. diff --git a/src/formats/cherrytree.py b/src/formats/cherrytree.py index df32088..6c42c7f 100644 --- a/src/formats/cherrytree.py +++ b/src/formats/cherrytree.py @@ -184,6 +184,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.bookmarked_nodes = [] + @common.catch_all_exceptions def convert_to_markdown(self, node, root_notebook): # TODO # pylint: disable=too-many-locals diff --git a/src/formats/day_one.py b/src/formats/day_one.py index dc1bd17..c471f47 100644 --- a/src/formats/day_one.py +++ b/src/formats/day_one.py @@ -9,7 +9,7 @@ import markdown_lib.common -def guess_title(body): +def guess_title(body: str) -> str: for line in body.split("\n"): if line.startswith("!["): continue diff --git a/src/formats/dynalist.py b/src/formats/dynalist.py index e986139..b6974c3 100644 --- a/src/formats/dynalist.py +++ b/src/formats/dynalist.py @@ -30,36 +30,36 @@ def handle_markdown_links(body: str, root_folder: Path) -> imf.NoteLinks: class Converter(converter.BaseConverter): accepted_extensions = [".zip"] - def convert(self, file_or_folder: Path): - self.convert_folder(self.root_path, self.root_notebook) + @common.catch_all_exceptions + def convert_file(self, item: Path, parent: imf.Notebook): + # We get a zip with opml and txt. Only advantage of opml over txt is + # the owner attribute. So just use txt, because it's simpler. + # opml is supported by pandoc, but the import is not working properly. + if item.suffix.lower() != ".txt": + return + title = item.stem + self.logger.debug(f'Converting note "{title}"') + + note_imf = imf.Note( + title, + item.read_text(encoding="utf-8"), + source_application=self.format, + ) + note_imf.tags = [ + imf.Tag(tag) + for tag in markdown_lib.common.get_inline_tags(note_imf.body, ["#", "@"]) + ] + note_imf.note_links = handle_markdown_links(note_imf.body, self.root_path) + parent.child_notes.append(note_imf) def convert_folder(self, folder: Path, parent: imf.Notebook): for item in sorted(folder.iterdir()): if item.is_file(): - # We get a zip with opml and txt. Only advantage of opml over txt is - # the owner attribute. So just use txt, because it's simpler. - # opml is supported by pandoc, but the import is not working properly. - if item.suffix.lower() != ".txt": - continue - title = item.stem - self.logger.debug(f'Converting note "{title}"') - - note_imf = imf.Note( - title, - item.read_text(encoding="utf-8"), - source_application=self.format, - ) - note_imf.tags = [ - imf.Tag(tag) - for tag in markdown_lib.common.get_inline_tags( - note_imf.body, ["#", "@"] - ) - ] - note_imf.note_links = handle_markdown_links( - note_imf.body, self.root_path - ) - parent.child_notes.append(note_imf) + self.convert_file(item, parent) else: new_parent = imf.Notebook(item.name) self.convert_folder(item, new_parent) parent.child_notebooks.append(new_parent) + + def convert(self, file_or_folder: Path): + self.convert_folder(self.root_path, self.root_notebook) diff --git a/src/formats/evernote.py b/src/formats/evernote.py index 1fe1077..41a93fa 100644 --- a/src/formats/evernote.py +++ b/src/formats/evernote.py @@ -87,6 +87,7 @@ def link_notes_by_title(self, root_notebook: imf.Notebook | None = None): for notebook in root_notebook.child_notebooks: self.link_notes_by_title(notebook) + @common.catch_all_exceptions def convert_single_enex(self, file_or_folder: Path, parent_notebook: imf.Notebook): self.logger.debug(f'Converting file "{file_or_folder.name}"') try: diff --git a/src/formats/google_keep.py b/src/formats/google_keep.py index ece70e6..7af2950 100644 --- a/src/formats/google_keep.py +++ b/src/formats/google_keep.py @@ -11,49 +11,49 @@ class Converter(converter.BaseConverter): accepted_extensions = [".tgz", ".zip"] + @common.catch_all_exceptions + def convert_file(self, file_: Path): + note_keep = json.loads(file_.read_text(encoding="utf-8")) + + title = note_keep.get("title", "") + self.logger.debug(f'Converting note "{title}"') + + tags_keep = [ + label["name"] for label in note_keep.get("labels", []) if "name" in label + ] + if note_keep.get("isPinned"): + tags_keep.append("google-keep-pinned") + + resources_keep = [] + for resource_keep in note_keep.get("attachments", []): + resources_keep.append( + imf.Resource(file_.parent.absolute() / resource_keep["filePath"]) + ) + + # fall back to HTML if there is no plain text + body = note_keep.get("textContent", note_keep.get("textContentHtml", "")) + if (annotations := note_keep.get("annotations")) is not None: + annotations_md = ["", "", "## Annotations", ""] + for annotation in annotations: + annotations_md.append(f"- <{annotation["url"]}>: {annotation["title"]}") + annotations_md.append("") # newline at the end + body += "\n".join(annotations_md) + + note_imf = imf.Note( + title, + body, + source_application=self.format, + # Labels / tags don't have a separate id. Just use the name as id. + tags=[imf.Tag(tag) for tag in tags_keep], + resources=resources_keep, + ) + if (value := note_keep.get("createdTimestampUsec")) is not None: + note_imf.created = common.timestamp_to_datetime(value // (10**6)) + if (value := note_keep.get("userEditedTimestampUsec")) is not None: + note_imf.updated = common.timestamp_to_datetime(value // (10**6)) + self.root_notebook.child_notes.append(note_imf) + def convert(self, file_or_folder: Path): # take only the exports in json format for file_ in sorted(self.root_path.rglob("*.json")): - note_keep = json.loads(Path(file_).read_text(encoding="utf-8")) - - title = note_keep.get("title", "") - self.logger.debug(f'Converting note "{title}"') - - tags_keep = [ - label["name"] - for label in note_keep.get("labels", []) - if "name" in label - ] - if note_keep.get("isPinned"): - tags_keep.append("google-keep-pinned") - - resources_keep = [] - for resource_keep in note_keep.get("attachments", []): - resources_keep.append( - imf.Resource(file_.parent.absolute() / resource_keep["filePath"]) - ) - - # fall back to HTML if there is no plain text - body = note_keep.get("textContent", note_keep.get("textContentHtml", "")) - if (annotations := note_keep.get("annotations")) is not None: - annotations_md = ["", "", "## Annotations", ""] - for annotation in annotations: - annotations_md.append( - f"- <{annotation["url"]}>: {annotation["title"]}" - ) - annotations_md.append("") # newline at the end - body += "\n".join(annotations_md) - - note_imf = imf.Note( - title, - body, - source_application=self.format, - # Labels / tags don't have a separate id. Just use the name as id. - tags=[imf.Tag(tag) for tag in tags_keep], - resources=resources_keep, - ) - if (value := note_keep.get("createdTimestampUsec")) is not None: - note_imf.created = common.timestamp_to_datetime(value // (10**6)) - if (value := note_keep.get("userEditedTimestampUsec")) is not None: - note_imf.updated = common.timestamp_to_datetime(value // (10**6)) - self.root_notebook.child_notes.append(note_imf) + self.convert_file(file_) diff --git a/src/formats/jrnl.py b/src/formats/jrnl.py index 117782a..723ee41 100644 --- a/src/formats/jrnl.py +++ b/src/formats/jrnl.py @@ -4,6 +4,7 @@ from pathlib import Path import json +import common import converter import intermediate_format as imf @@ -11,26 +12,30 @@ class Converter(converter.BaseConverter): accepted_extensions = [".json"] + @common.catch_all_exceptions + def convert_note(self, note_jrnl): + title = f"{note_jrnl['date']} {note_jrnl['time']} {note_jrnl['title']}" + self.logger.debug(f'Converting note "{title}"') + + unix_time = dt.datetime.fromisoformat( + f"{note_jrnl['date']}T{note_jrnl['time']}" + ) + + tags = [tag.lstrip("@") for tag in note_jrnl["tags"]] + if note_jrnl["starred"]: + tags.append("jrnl-starred") + + note_imf = imf.Note( + title, + note_jrnl["body"], + created=unix_time, + updated=unix_time, + source_application=self.format, + tags=[imf.Tag(tag) for tag in tags], + ) + self.root_notebook.child_notes.append(note_imf) + def convert(self, file_or_folder: Path): file_dict = json.loads(file_or_folder.read_text(encoding="utf-8")) for note_jrnl in file_dict.get("entries", []): - title = f"{note_jrnl['date']} {note_jrnl['time']} {note_jrnl['title']}" - self.logger.debug(f'Converting note "{title}"') - - unix_time = dt.datetime.fromisoformat( - f"{note_jrnl['date']}T{note_jrnl['time']}" - ) - - tags = [tag.lstrip("@") for tag in note_jrnl["tags"]] - if note_jrnl["starred"]: - tags.append("jrnl-starred") - - note_imf = imf.Note( - title, - note_jrnl["body"], - created=unix_time, - updated=unix_time, - source_application=self.format, - tags=[imf.Tag(tag) for tag in tags], - ) - self.root_notebook.child_notes.append(note_imf) + self.convert_note(note_jrnl) diff --git a/src/formats/nimbus_note.py b/src/formats/nimbus_note.py index 8931cfd..fcd821e 100644 --- a/src/formats/nimbus_note.py +++ b/src/formats/nimbus_note.py @@ -93,31 +93,33 @@ def handle_markdown_links(self, note_body: str, root_folder: Path) -> imf.Resour ) return resources + @common.catch_all_exceptions + def convert_file(self, file_: Path, temp_folder: Path): + title = file_.stem + self.logger.debug(f'Converting note "{title}"') + temp_folder_note = temp_folder / file_.stem + temp_folder_note.mkdir() + common.extract_zip(file_, temp_folder=temp_folder_note) + + # HTML note seems to have the name "note.html" always + note_body_html = (temp_folder_note / "note.html").read_text(encoding="utf-8") + + soup = BeautifulSoup(note_body_html, "html.parser") + streamline_tables(soup) + streamline_lists(soup) + + note_body_markdown = markdown_lib.common.markup_to_markdown(str(soup)) + resources = self.handle_markdown_links(note_body_markdown, temp_folder_note) + note_imf = imf.Note( + title, + note_body_markdown.strip(), + source_application=self.format, + resources=resources, + ) + self.root_notebook.child_notes.append(note_imf) + def convert(self, file_or_folder: Path): temp_folder = common.get_temp_folder() for file_ in sorted(file_or_folder.rglob("*.zip")): - title = file_.stem - self.logger.debug(f'Converting note "{title}"') - temp_folder_note = temp_folder / file_.stem - temp_folder_note.mkdir() - common.extract_zip(file_, temp_folder=temp_folder_note) - - # HTML note seems to have the name "note.html" always - note_body_html = (temp_folder_note / "note.html").read_text( - encoding="utf-8" - ) - - soup = BeautifulSoup(note_body_html, "html.parser") - streamline_tables(soup) - streamline_lists(soup) - - note_body_markdown = markdown_lib.common.markup_to_markdown(str(soup)) - resources = self.handle_markdown_links(note_body_markdown, temp_folder_note) - note_imf = imf.Note( - title, - note_body_markdown.strip(), - source_application=self.format, - resources=resources, - ) - self.root_notebook.child_notes.append(note_imf) + self.convert_file(file_, temp_folder) diff --git a/src/formats/notion.py b/src/formats/notion.py index 3691eaf..d8d5458 100644 --- a/src/formats/notion.py +++ b/src/formats/notion.py @@ -70,6 +70,7 @@ def handle_markdown_links( self.logger.debug(f'Unhandled link "{link}"') return resources, note_links + @common.catch_all_exceptions def convert_directory(self, parent_notebook): relative_parent_path = self.id_path_map[parent_notebook.original_id] diff --git a/src/formats/obsidian.py b/src/formats/obsidian.py index 4110791..d479504 100644 --- a/src/formats/obsidian.py +++ b/src/formats/obsidian.py @@ -14,9 +14,6 @@ class Converter(converter.BaseConverter): accept_folder = True - def convert(self, file_or_folder: Path): - self.convert_folder(file_or_folder, self.root_notebook) - def handle_markdown_links(self, body: str) -> tuple[imf.Resources, imf.NoteLinks]: note_links = [] resources = [] @@ -67,41 +64,48 @@ def handle_links(self, body: str) -> tuple[imf.Resources, imf.NoteLinks]: wikilink_note_links + markdown_note_links, ) + @common.catch_all_exceptions + def convert_file(self, item: Path, parent: imf.Notebook): + if item.suffix.lower() != ".md": + return + title = item.stem + self.logger.debug(f'Converting note "{title}"') + + body = item.read_text(encoding="utf-8") + resources, note_links = self.handle_links(body) + + # https://help.obsidian.md/Editing+and+formatting/Tags + inline_tags = markdown_lib.common.get_inline_tags(body, ["#"]) + + # frontmatter tags + # https://help.obsidian.md/Editing+and+formatting/Properties#Default+properties + metadata, body = frontmatter.parse(body) + frontmatter_tags = metadata.get("tags", []) + + # aliases seem to be only used in the link description + # frontmatter_.get("aliases", []) + + parent.child_notes.append( + imf.Note( + title, + body, + source_application=self.format, + tags=[imf.Tag(tag) for tag in inline_tags + frontmatter_tags], + resources=resources, + note_links=note_links, + ) + ) + def convert_folder(self, folder: Path, parent: imf.Notebook): for item in sorted(folder.iterdir()): if item.is_dir() and item.name == ".obsidian": continue # ignore the internal obsidian folder if item.is_file(): - if item.suffix.lower() != ".md": - continue - title = item.stem - self.logger.debug(f'Converting note "{title}"') - - body = item.read_text(encoding="utf-8") - resources, note_links = self.handle_links(body) - - # https://help.obsidian.md/Editing+and+formatting/Tags - inline_tags = markdown_lib.common.get_inline_tags(body, ["#"]) - - # frontmatter tags - # https://help.obsidian.md/Editing+and+formatting/Properties#Default+properties - metadata, body = frontmatter.parse(body) - frontmatter_tags = metadata.get("tags", []) - - # aliases seem to be only used in the link description - # frontmatter_.get("aliases", []) - - parent.child_notes.append( - imf.Note( - title, - body, - source_application=self.format, - tags=[imf.Tag(tag) for tag in inline_tags + frontmatter_tags], - resources=resources, - note_links=note_links, - ) - ) + self.convert_file(item, parent) else: new_parent = imf.Notebook(item.name) self.convert_folder(item, new_parent) parent.child_notebooks.append(new_parent) + + def convert(self, file_or_folder: Path): + self.convert_folder(file_or_folder, self.root_notebook) diff --git a/src/formats/qownnotes.py b/src/formats/qownnotes.py index 4811497..0fcf634 100644 --- a/src/formats/qownnotes.py +++ b/src/formats/qownnotes.py @@ -6,6 +6,7 @@ import sqlite3 from urllib.parse import unquote +import common import converter import intermediate_format as imf import markdown_lib.common @@ -94,22 +95,26 @@ def parse_tags(self): conn.close() return note_tag_map + @common.catch_all_exceptions + def convert_note(self, note_qownnotes: Path, note_tag_map): + title = note_qownnotes.stem + self.logger.debug(f'Converting note "{title}"') + note_body = note_qownnotes.read_text(encoding="utf-8") + + resources, note_links = self.handle_markdown_links(note_body) + note_imf = imf.Note( + title, + "\n".join(note_body.split("\n")[3:]), # TODO: make robust + source_application=self.format, + tags=note_tag_map.get(note_qownnotes.stem, []), + resources=resources, + note_links=note_links, + ) + note_imf.time_from_file(note_qownnotes) + self.root_notebook.child_notes.append(note_imf) + def convert(self, file_or_folder: Path): note_tag_map = self.parse_tags() for note_qownnotes in sorted(file_or_folder.glob("*.md")): - title = note_qownnotes.stem - self.logger.debug(f'Converting note "{title}"') - note_body = note_qownnotes.read_text(encoding="utf-8") - - resources, note_links = self.handle_markdown_links(note_body) - note_imf = imf.Note( - title, - "\n".join(note_body.split("\n")[3:]), # TODO: make robust - source_application=self.format, - tags=note_tag_map.get(note_qownnotes.stem, []), - resources=resources, - note_links=note_links, - ) - note_imf.time_from_file(note_qownnotes) - self.root_notebook.child_notes.append(note_imf) + self.convert_note(note_qownnotes, note_tag_map) diff --git a/src/formats/simplenote.py b/src/formats/simplenote.py index d5a08d8..e2def41 100644 --- a/src/formats/simplenote.py +++ b/src/formats/simplenote.py @@ -4,6 +4,7 @@ import json from pathlib import Path +import common import converter import intermediate_format as imf import markdown_lib @@ -12,42 +13,44 @@ class Converter(converter.BaseConverter): accepted_extensions = [".zip"] + @common.catch_all_exceptions + def convert_note(self, note_simplenote): + # title is the first line + title, body = markdown_lib.common.split_h1_title_from_body( + note_simplenote["content"] + ) + self.logger.debug(f'Converting note "{title}"') + + note_links = [] + for link in markdown_lib.common.get_markdown_links(body): + if link.is_web_link or link.is_mail_link: + continue # keep the original links + if link.url.startswith("simplenote://"): + # internal link + _, linked_note_id = link.url.rsplit("/", 1) + note_links.append(imf.NoteLink(str(link), linked_note_id, link.text)) + + tags = note_simplenote.get("tags", []) + if note_simplenote.get("pinned"): + tags.append("simplenote-pinned") + + note_imf = imf.Note( + title.strip(), + body.lstrip(), + created=dt.datetime.fromisoformat(note_simplenote["creationDate"]), + updated=dt.datetime.fromisoformat(note_simplenote["lastModified"]), + source_application=self.format, + # Tags don't have a separate id. Just use the name as id. + tags=[imf.Tag(tag) for tag in tags], + note_links=note_links, + original_id=note_simplenote["id"], + ) + self.root_notebook.child_notes.append(note_imf) + def convert(self, file_or_folder: Path): input_json = json.loads( (self.root_path / "source/notes.json").read_text(encoding="utf-8") ) for note_simplenote in input_json["activeNotes"]: - # title is the first line - title, body = markdown_lib.common.split_h1_title_from_body( - note_simplenote["content"] - ) - self.logger.debug(f'Converting note "{title}"') - - note_links = [] - for link in markdown_lib.common.get_markdown_links(body): - if link.is_web_link or link.is_mail_link: - continue # keep the original links - if link.url.startswith("simplenote://"): - # internal link - _, linked_note_id = link.url.rsplit("/", 1) - note_links.append( - imf.NoteLink(str(link), linked_note_id, link.text) - ) - - tags = note_simplenote.get("tags", []) - if note_simplenote.get("pinned"): - tags.append("simplenote-pinned") - - note_imf = imf.Note( - title.strip(), - body.lstrip(), - created=dt.datetime.fromisoformat(note_simplenote["creationDate"]), - updated=dt.datetime.fromisoformat(note_simplenote["lastModified"]), - source_application=self.format, - # Tags don't have a separate id. Just use the name as id. - tags=[imf.Tag(tag) for tag in tags], - note_links=note_links, - original_id=note_simplenote["id"], - ) - self.root_notebook.child_notes.append(note_imf) + self.convert_note(note_simplenote) diff --git a/src/formats/synology_note_station.py b/src/formats/synology_note_station.py index a1481de..51cc4b4 100644 --- a/src/formats/synology_note_station.py +++ b/src/formats/synology_note_station.py @@ -145,6 +145,51 @@ def map_resources_by_hash(self, note: dict) -> imf.Resources: break return resources + @common.catch_all_exceptions + def convert_note(self, note_id, note_id_title_map): + note = json.loads((self.root_path / note_id).read_text(encoding="utf-8")) + + if note["parent_id"].rsplit("_")[-1] == "#00000000": + self.logger.debug(f"Ignoring note in trash \"{note['title']}\"") + return + title = note["title"] + self.logger.debug(f'Converting note "{title}"') + + # resources / attachments + resources = self.map_resources_by_hash(note) + + note_links: imf.NoteLinks = [] + if (content_html := note.get("content")) is not None: + content_html = streamline_html(content_html) + content_markdown = markdown_lib.common.markup_to_markdown(content_html) + # note title only needed for debug message + resources_referenced, note_links = self.handle_markdown_links( + note["title"], content_markdown, note_id_title_map + ) + resources.extend(resources_referenced) + body = content_markdown + else: + body = "" + + note_imf = imf.Note( + title, + body, + created=common.timestamp_to_datetime(note["ctime"]), + updated=common.timestamp_to_datetime(note["mtime"]), + source_application=self.format, + tags=[imf.Tag(tag) for tag in note.get("tag", [])], + resources=resources, + note_links=note_links, + original_id=note_id, + ) + if (latitude := note.get("latitude")) is not None: + note_imf.latitude = latitude + if (longitude := note.get("longitude")) is not None: + note_imf.longitude = longitude + + parent_notebook = self.find_parent_notebook(note["parent_id"]) + parent_notebook.child_notes.append(note_imf) + def convert(self, file_or_folder: Path): # pylint: disable=too-many-locals input_json = json.loads( @@ -175,54 +220,4 @@ def convert(self, file_or_folder: Path): note_id_title_map[note_id] = note["title"] for note_id in input_json["note"]: - try: - note = json.loads( - (self.root_path / note_id).read_text(encoding="utf-8") - ) - - if note["parent_id"].rsplit("_")[-1] == "#00000000": - self.logger.debug(f"Ignoring note in trash \"{note['title']}\"") - continue - title = note["title"] - self.logger.debug(f'Converting note "{title}"') - - # resources / attachments - resources = self.map_resources_by_hash(note) - - note_links: imf.NoteLinks = [] - if (content_html := note.get("content")) is not None: - content_html = streamline_html(content_html) - content_markdown = markdown_lib.common.markup_to_markdown( - content_html - ) - # note title only needed for debug message - resources_referenced, note_links = self.handle_markdown_links( - note["title"], content_markdown, note_id_title_map - ) - resources.extend(resources_referenced) - body = content_markdown - else: - body = "" - - note_imf = imf.Note( - title, - body, - created=common.timestamp_to_datetime(note["ctime"]), - updated=common.timestamp_to_datetime(note["mtime"]), - source_application=self.format, - tags=[imf.Tag(tag) for tag in note.get("tag", [])], - resources=resources, - note_links=note_links, - original_id=note_id, - ) - if (latitude := note.get("latitude")) is not None: - note_imf.latitude = latitude - if (longitude := note.get("longitude")) is not None: - note_imf.longitude = longitude - - parent_notebook = self.find_parent_notebook(note["parent_id"]) - parent_notebook.child_notes.append(note_imf) - except Exception as exc: # pylint: disable=broad-except - self.logger.warning(f"Failed to convert note \"{note['title']}\"") - # https://stackoverflow.com/a/52466005/7410886 - self.logger.debug(exc, exc_info=True) + self.convert_note(note_id, note_id_title_map) diff --git a/src/formats/textbundle.py b/src/formats/textbundle.py index 2b7398a..b7e4c37 100644 --- a/src/formats/textbundle.py +++ b/src/formats/textbundle.py @@ -3,6 +3,7 @@ from pathlib import Path from urllib.parse import unquote +import common import converter import intermediate_format as imf import markdown_lib @@ -26,28 +27,32 @@ def handle_markdown_links(self, body: str) -> imf.Resources: resources.append(imf.Resource(resource_path, str(link), link.text)) return resources + @common.catch_all_exceptions + def convert_file(self, file_: Path): + if file_.suffix.lower() not in (".md", ".markdown"): + # take only the exports in markdown format + self.logger.debug(f"Ignoring folder or file {file_.name}") + return + + # Filename from textbundle name seems to be more robust + # than taking the first line of the body. + title = file_.parent.stem + self.logger.debug(f'Converting note "{title}"') + + note_imf = imf.Note( + title, file_.read_text(encoding="utf-8"), source_application=self.format + ) + note_imf.tags = [ + imf.Tag(tag) + for tag in markdown_lib.common.get_inline_tags(note_imf.body, ["#"]) + ] + note_imf.resources = self.handle_markdown_links(note_imf.body) + note_imf.time_from_file(file_) + + self.root_notebook.child_notes.append(note_imf) + def convert(self, file_or_folder: Path): # TODO: Are internal links and nested folders supported by this format? for file_ in sorted(self.root_path.iterdir()): - if file_.suffix.lower() not in (".md", ".markdown"): - # take only the exports in markdown format - self.logger.debug(f"Ignoring folder or file {file_.name}") - continue - - # Filename from textbundle name seems to be more robust - # than taking the first line of the body. - title = file_.parent.stem - self.logger.debug(f'Converting note "{title}"') - - note_imf = imf.Note( - title, file_.read_text(encoding="utf-8"), source_application=self.format - ) - note_imf.tags = [ - imf.Tag(tag) - for tag in markdown_lib.common.get_inline_tags(note_imf.body, ["#"]) - ] - note_imf.resources = self.handle_markdown_links(note_imf.body) - note_imf.time_from_file(file_) - - self.root_notebook.child_notes.append(note_imf) + self.convert_file(file_) diff --git a/src/formats/tiddlywiki.py b/src/formats/tiddlywiki.py index 1218ddf..215c0ba 100644 --- a/src/formats/tiddlywiki.py +++ b/src/formats/tiddlywiki.py @@ -201,6 +201,7 @@ def convert_json(self, file_or_folder: Path): continue # skip notes with special tags self.root_notebook.child_notes.append(note_imf) + @common.catch_all_exceptions def convert_tid(self, file_or_folder: Path): # pylint: disable=too-many-locals tiddler = file_or_folder.read_text(encoding="utf-8") diff --git a/src/formats/tomboy_ng.py b/src/formats/tomboy_ng.py index 3990b61..22737f5 100644 --- a/src/formats/tomboy_ng.py +++ b/src/formats/tomboy_ng.py @@ -4,6 +4,7 @@ from pathlib import Path import xml.etree.ElementTree as ET # noqa: N817 +import common import converter import intermediate_format as imf @@ -61,6 +62,7 @@ def parse_content(self, node): md_content.append(node.tail) return "".join(md_content).strip(), note_links + @common.catch_all_exceptions def convert_note(self, note_file: Path): # Format: https://wiki.gnome.org/Apps/Tomboy/NoteXmlFormat root_node = ET.parse(note_file).getroot() diff --git a/src/formats/zettelkasten.py b/src/formats/zettelkasten.py index d657a1e..453cd94 100644 --- a/src/formats/zettelkasten.py +++ b/src/formats/zettelkasten.py @@ -4,6 +4,7 @@ from pathlib import Path import xml.etree.ElementTree as ET # noqa: N817 +import common import converter import intermediate_format as imf import markdown_lib.common @@ -51,22 +52,91 @@ def handle_markdown_links( ) return resources, note_links + @common.catch_all_exceptions + def convert_zettel(self, id_: int, zettel, file_or_folder: Path, tag_id_name_map): + # pylint: disable=too-many-locals + title = item.text if (item := zettel.find("title")) is not None else "" + assert title is not None + self.logger.debug(f'Converting note "{title}"') + note_imf = imf.Note(title, original_id=str(id_)) + + self.parse_attributes(zettel, note_imf) + + for item in zettel: + match item.tag: + case "title": + pass # handled already + case "content": + body = bbcode_to_md(item.text if item.text else "") + note_imf.body = body + resources, note_links = self.handle_markdown_links( + body, file_or_folder.parent + ) + note_imf.resources.extend(resources) + note_imf.note_links.extend(note_links) + + # if self.images_available: + # for image in images: + # image.filename = self.images_folder / image.filename + # # Set manually, because with invalid path it's + # # set to False. + # image.is_image = True + # note_imf.resources.extend(resources) + case "author": + note_imf.author = item.text + case "keywords": + if item.text is not None: + for tag_id in item.text.split(","): + tag_name = tag_id_name_map.get(tag_id, tag_id) + assert tag_name is not None + note_imf.tags.append(imf.Tag(tag_name)) + case "links": + if not self.attachments_available: + continue + # links = resources are always attached at the end + for link in item.findall("link"): + if link.text is None: + continue + note_imf.resources.append( + imf.Resource(self.attachments_folder / link.text) + ) + case "luhmann": # folgezettel + if item.text is None: + continue + # TODO: Ensure that this is called always + # after the initial note content is parsed. + sequences = [] + for note_id in item.text.split(","): + text = f"[{note_id}]({note_id})" + sequences.append(text) + note_imf.note_links.append(imf.NoteLink(text, note_id, note_id)) + note_imf.body += ( + "\n\n## Note Sequences\n\n" + ", ".join(sequences) + "\n" + ) + case "misc" | "zettel": + pass # always None + case "manlinks": + pass # TODO: Should correspond to the parsed note links. + case _: + self.logger.warning(f"ignoring item {item.tag}={item.text}") + self.root_notebook.child_notes.append(note_imf) + def convert(self, file_or_folder: Path): # TODO - # pylint: disable=too-many-branches,too-many-locals - attachments_folder = file_or_folder.parent / "attachments" - attachments_available = attachments_folder.is_dir() - if not attachments_available: + # pylint: disable=attribute-defined-outside-init + self.attachments_folder = file_or_folder.parent / "attachments" + self.attachments_available = self.attachments_folder.is_dir() + if not self.attachments_available: self.logger.warning( - f"No attachments folder found at {attachments_folder}. " + f"No attachments folder found at {self.attachments_folder}. " "Attachments are not converted." ) - images_folder = file_or_folder.parent / "img" - images_available = images_folder.is_dir() - if not images_available: + self.images_folder = file_or_folder.parent / "img" + self.images_available = self.images_folder.is_dir() + if not self.images_available: self.logger.warning( - f"No images folder found at {images_folder}. " + f"No images folder found at {self.images_folder}. " "Images are not converted." ) @@ -78,70 +148,4 @@ def convert(self, file_or_folder: Path): root_node = ET.parse(self.root_path / "zknFile.xml").getroot() for id_, zettel in enumerate(root_node.findall("zettel"), start=1): - title = item.text if (item := zettel.find("title")) is not None else "" - assert title is not None - self.logger.debug(f'Converting note "{title}"') - note_imf = imf.Note(title, original_id=str(id_)) - - self.parse_attributes(zettel, note_imf) - - for item in zettel: - match item.tag: - case "title": - pass # handled already - case "content": - body = bbcode_to_md(item.text if item.text else "") - note_imf.body = body - resources, note_links = self.handle_markdown_links( - body, file_or_folder.parent - ) - note_imf.resources.extend(resources) - note_imf.note_links.extend(note_links) - - # if images_available: - # for image in images: - # image.filename = images_folder / image.filename - # # Set manually, because with invalid path it's - # # set to False. - # image.is_image = True - # note_imf.resources.extend(resources) - case "author": - note_imf.author = item.text - case "keywords": - if item.text is not None: - for tag_id in item.text.split(","): - tag_name = tag_id_name_map.get(tag_id, tag_id) - assert tag_name is not None - note_imf.tags.append(imf.Tag(tag_name)) - case "links": - if not attachments_available: - continue - # links = resources are always attached at the end - for link in item.findall("link"): - if link.text is None: - continue - note_imf.resources.append( - imf.Resource(attachments_folder / link.text) - ) - case "luhmann": # folgezettel - if item.text is None: - continue - # TODO: Ensure that this is called always - # after the initial note content is parsed. - sequences = [] - for note_id in item.text.split(","): - text = f"[{note_id}]({note_id})" - sequences.append(text) - note_imf.note_links.append( - imf.NoteLink(text, note_id, note_id) - ) - note_imf.body += ( - "\n\n## Note Sequences\n\n" + ", ".join(sequences) + "\n" - ) - case "misc" | "zettel": - pass # always None - case "manlinks": - pass # TODO: Should correspond to the parsed note links. - case _: - self.logger.warning(f"ignoring item {item.tag}={item.text}") - self.root_notebook.child_notes.append(note_imf) + self.convert_zettel(id_, zettel, file_or_folder, tag_id_name_map) diff --git a/src/formats/zim.py b/src/formats/zim.py index 26017a1..839a9d8 100644 --- a/src/formats/zim.py +++ b/src/formats/zim.py @@ -4,6 +4,7 @@ from pathlib import Path import re +import common import converter import intermediate_format as imf import markdown_lib.common @@ -73,56 +74,56 @@ def handle_zim_images(self, body: str, resource_path: Path) -> imf.Resources: ) return images - def convert_folder(self, folder: Path, parent: imf.Notebook): - # pylint: disable=too-many-locals - for item in sorted(folder.iterdir()): - if item.is_dir(): - # notebook - new_parent = imf.Notebook(item.name) - self.convert_folder(item, new_parent) - parent.child_notebooks.append(new_parent) - continue - if item.name == "notebook.zim" or item.suffix.lower() != ".txt": - continue + @common.catch_all_exceptions + def convert_note(self, item: Path, parent: imf.Notebook): + if item.name == "notebook.zim" or item.suffix.lower() != ".txt": + return - # note - title = item.stem.replace("_", " ") # underscores seem to be replaced - self.logger.debug(f'Converting note "{title}"') + # note + title = item.stem.replace("_", " ") # underscores seem to be replaced + self.logger.debug(f'Converting note "{title}"') - imf_note = imf.Note( - title, source_application=self.format, original_id=title - ) + imf_note = imf.Note(title, source_application=self.format, original_id=title) - item_content = item.read_text(encoding="utf-8") - try: - metadata, _, body = item_content.split("\n\n", maxsplit=2) - except ValueError: - body = item_content - metadata = "" + item_content = item.read_text(encoding="utf-8") + try: + metadata, _, body = item_content.split("\n\n", maxsplit=2) + except ValueError: + body = item_content + metadata = "" - for line in metadata.split("\n"): - key, value = line.split(": ", maxsplit=1) - if key == "Creation-Date": - imf_note.created = dt.datetime.fromisoformat(value) + for line in metadata.split("\n"): + key, value = line.split(": ", maxsplit=1) + if key == "Creation-Date": + imf_note.created = dt.datetime.fromisoformat(value) - imf_note.body = zim_to_md(body) + imf_note.body = zim_to_md(body) - resource_path = folder / item.stem - resources, note_links = self.handle_zim_links(imf_note.body, resource_path) - imf_note.resources = resources - imf_note.note_links = note_links + resource_path = item.parent / item.stem + resources, note_links = self.handle_zim_links(imf_note.body, resource_path) + imf_note.resources = resources + imf_note.note_links = note_links - imf_note.resources.extend( - self.handle_zim_images(imf_note.body, resource_path) - ) + imf_note.resources.extend(self.handle_zim_images(imf_note.body, resource_path)) + + # tags: https://zim-wiki.org/manual/Help/Tags.html + # TODO: exclude invalid characters + imf_note.tags = [ + imf.Tag(tag) for tag in markdown_lib.common.get_inline_tags(body, ["@"]) + ] - # tags: https://zim-wiki.org/manual/Help/Tags.html - # TODO: exclude invalid characters - imf_note.tags = [ - imf.Tag(tag) for tag in markdown_lib.common.get_inline_tags(body, ["@"]) - ] + parent.child_notes.append(imf_note) - parent.child_notes.append(imf_note) + def convert_folder(self, folder: Path, parent: imf.Notebook): + # pylint: disable=too-many-locals + for item in sorted(folder.iterdir()): + if item.is_dir(): + # notebook + new_parent = imf.Notebook(item.name) + self.convert_folder(item, new_parent) + parent.child_notebooks.append(new_parent) + continue + self.convert_note(item, parent) def convert(self, file_or_folder: Path): self.root_path = file_or_folder diff --git a/src/formats/zoho_notebook.py b/src/formats/zoho_notebook.py index cfc9c97..dc5f1dc 100644 --- a/src/formats/zoho_notebook.py +++ b/src/formats/zoho_notebook.py @@ -70,6 +70,7 @@ def handle_markdown_links( ) return resources, note_links + @common.catch_all_exceptions def convert_note(self, file_: Path): soup = BeautifulSoup(file_.read_text(encoding="utf-8"), "html.parser") diff --git a/src/importer.py b/src/importer.py index 6b640d7..144cf07 100644 --- a/src/importer.py +++ b/src/importer.py @@ -219,6 +219,7 @@ def write_note(self, note: imf.Note): case _: note.path.write_text(note.body, encoding="utf-8") + @common.catch_all_exceptions def import_note(self, note: imf.Note): assert note.path is not None self.progress_bars["notes"].update(1) @@ -253,11 +254,7 @@ def import_notebook(self, notebook: imf.Notebook): # Don't overwrite existing suffices. if note.path.suffix != ".md": note.path = note.path.with_suffix(note.path.suffix + ".md") - try: - self.import_note(note) - except Exception as exc: # pylint: disable=broad-except - LOGGER.error(f'Failed to write note "{note.title}"') - LOGGER.debug(exc, exc_info=True) + self.import_note(note) for child_notebook in notebook.child_notebooks: child_notebook.path = notebook.path / common.safe_path(child_notebook.title) self.import_notebook(child_notebook)