-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
285 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
This page describes how to convert notes from Zim Wiki to Markdown. | ||
|
||
## General Information | ||
|
||
- [Website](https://zim-wiki.org/) | ||
- Typical extension: Folder with `.txt` files | ||
|
||
## Instructions | ||
|
||
1. [Install jimmy](../index.md#installation) | ||
2. Convert to Markdown. Example: `jimmy-cli-linux zim/folder --format zim` | ||
3. [Import to your app](../import_instructions.md) | ||
|
||
## Import Structure | ||
|
||
Zim does a good job in [exporting to Markdown](https://zim-wiki.org/manual/Help/Export.html). If the built-in export is fine for you, you don't need to use Jimmy. | ||
|
||
Jimmy doesn't use pandoc for conversion and applies some additional tweaks: | ||
|
||
- Consistently use ATX style headings (starting with `#`). | ||
- Consistently use spaces instea of tabs. | ||
- Page title and creation date are removed from the note body. They are instead stored in the metadata respectively the filename. The metadata can be included by a front matter. | ||
- Convert Zim checklists to Markdown checklists (`- [ ]`) instead of Markdown lists with signs (`- ☐`). The checklist states are converted as described below: | ||
- Done and not done are converted to `- [x]`. | ||
- All other states are converted to `-[ ]`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
"""Convert TiddlyWiki notes to the intermediate format.""" | ||
|
||
import datetime as dt | ||
from pathlib import Path | ||
import re | ||
|
||
import common | ||
import converter | ||
import intermediate_format as imf | ||
import markdown_lib.common | ||
from markdown_lib.zim import zim_to_md | ||
|
||
|
||
ZIM_IMAGE_REGEX = re.compile(r"(\{\{(.*?)\}\})") | ||
|
||
|
||
class Converter(converter.BaseConverter): | ||
accept_folder = True | ||
|
||
def handle_zim_links(self, body: str) -> tuple[list, list]: | ||
# https://zim-wiki.org/manual/Help/Links.html | ||
# https://zim-wiki.org/manual/Help/Wiki_Syntax.html | ||
note_links = [] | ||
resources = [] | ||
for _, url, description in markdown_lib.common.get_wikilink_links(body): | ||
original_text = f"[[{url}]]" | ||
if "/" in url: | ||
# resource | ||
# Links containing a '/' are considered links to external files | ||
resource_path = common.find_file_recursively(self.root_path, url) | ||
if resource_path is None: | ||
continue | ||
resources.append( | ||
imf.Resource(resource_path, original_text, description or url) | ||
) | ||
elif "?" in url: | ||
# Links that contain a '?' are interwiki links | ||
pass # interwiki links can't be resolved | ||
elif url.startswith("#"): | ||
# Links that start with a '#' are resolved as links | ||
# within the page to a heading or an object | ||
pass # they don't need to be resolved | ||
else: | ||
# Ignore other directives for now. | ||
# TODO: Find a way to map them. Right now we only map by | ||
# matching the original_id. | ||
original_id = url.split(":")[-1].lstrip("+") | ||
note_links.append( | ||
imf.NoteLink(original_text, original_id, description or original_id) | ||
) | ||
return resources, note_links | ||
|
||
def handle_zim_images(self, body: str) -> list[imf.Resource]: | ||
images = [] | ||
for original_text, image_link in ZIM_IMAGE_REGEX.findall(body): | ||
image_link = Path(image_link) | ||
images.append(imf.Resource(image_link, original_text, image_link.name)) | ||
return images | ||
|
||
def convert_folder(self, folder: Path, parent: imf.Notebook): | ||
for item in folder.iterdir(): | ||
if item.is_dir(): | ||
# notebook | ||
new_parent = imf.Notebook(item.name) | ||
self.convert_folder(item, new_parent) | ||
parent.child_notebooks.append(new_parent) | ||
continue | ||
if item.name == "notebook.zim" or item.suffix.lower() != ".txt": | ||
continue | ||
|
||
# note | ||
title = item.stem.replace("_", " ") # underscores seem to be replaced | ||
self.logger.debug(f'Converting note "{title}"') | ||
|
||
imf_note = imf.Note( | ||
title, source_application=self.format, original_id=title | ||
) | ||
|
||
metadata, _, body = item.read_text(encoding="utf-8").split( | ||
"\n\n", maxsplit=2 | ||
) | ||
for line in metadata.split("\n"): | ||
key, value = line.split(": ", maxsplit=1) | ||
if key == "Creation-Date": | ||
imf_note.created = dt.datetime.fromisoformat(value) | ||
|
||
imf_note.body = zim_to_md(body) | ||
|
||
resources, note_links = self.handle_zim_links(imf_note.body) | ||
imf_note.resources = resources | ||
imf_note.note_links = note_links | ||
|
||
imf_note.resources.extend(self.handle_zim_images(imf_note.body)) | ||
|
||
# tags: https://zim-wiki.org/manual/Help/Tags.html | ||
# TODO: exclude invalid characters | ||
imf_note.tags = [ | ||
imf.Tag(tag) for tag in markdown_lib.common.get_inline_tags(body, ["@"]) | ||
] | ||
|
||
parent.child_notes.append(imf_note) | ||
|
||
def convert(self, file_or_folder: Path): | ||
self.root_path = file_or_folder | ||
self.convert_folder(file_or_folder, self.root_notebook) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
"""Convert Zim Wiki to Markdown.""" | ||
|
||
import re | ||
|
||
import pyparsing as pp | ||
|
||
import markdown_lib.common | ||
|
||
|
||
# Prevent spaces, tabs and newlines from being stripped. | ||
pp.ParserElement.set_default_whitespace_chars("") | ||
|
||
|
||
heading_re = re.compile(r"(={1,6}) (.*?) ={1,6}") | ||
checklist_re = re.compile(r"^( *)\[([ <>*x])\] ", re.MULTILINE) | ||
|
||
|
||
def quote(source_tag, target_tag): | ||
"""Conversion of a quoted string. I. e. with the same start and end tags.""" | ||
|
||
def to_md(_, t): # noqa | ||
return target_tag + t[0] + target_tag | ||
|
||
return pp.QuotedString(source_tag).set_parse_action(to_md) | ||
|
||
|
||
def subscript(): | ||
def to_md(_, t): # noqa | ||
return "~" + t[0] + "~" | ||
|
||
return pp.QuotedString("_{", endQuoteChar="}").set_parse_action(to_md) | ||
|
||
|
||
def superscript(): | ||
def to_md(_, t): # noqa | ||
return "^" + t[0] + "^" | ||
|
||
return pp.QuotedString("^{", endQuoteChar="}").set_parse_action(to_md) | ||
|
||
|
||
def italic(): | ||
def to_md(_, t): # noqa | ||
return "*" + t[0][0] + "*" | ||
|
||
return pp.Regex( | ||
markdown_lib.common.double_slash_re, as_group_list=True | ||
).set_parse_action(to_md) | ||
|
||
|
||
def horizontal_line(): | ||
return pp.Regex(markdown_lib.common.horizontal_line_re).set_parse_action( | ||
lambda: "\n---\n" | ||
) | ||
|
||
|
||
def heading(): | ||
def to_md(_, t): # noqa | ||
return "#" * (7 - len(t[0][0])) + " " + t[0][1] | ||
|
||
return pp.Regex(heading_re, as_group_list=True).set_parse_action(to_md) | ||
|
||
|
||
def checklist(): | ||
def to_md(_, t): # noqa | ||
list_char = "x" if t[0][1] in ("*", "x") else " " | ||
return f"{t[0][0]}- [{list_char}] " | ||
|
||
return pp.Regex(checklist_re, as_group_list=True).set_parse_action(to_md) | ||
|
||
|
||
def zim_to_md(zim_text: str) -> str: | ||
r""" | ||
Main Zim Wiki to Markdown conversion function. | ||
>>> zim_to_md("''monospace'' **bold**") | ||
'`monospace` **bold**' | ||
>>> zim_to_md("super^{script}, sub_{script}") | ||
'super^script^, sub~script~' | ||
>>> zim_to_md("====== heading 1 ======") | ||
'# heading 1' | ||
>>> zim_to_md("== heading5 ==") | ||
'##### heading5' | ||
>>> zim_to_md("'''\nsome code\nblock\n'''") | ||
'```\nsome code\nblock\n```' | ||
>>> zim_to_md("[ ] unchecked\n[x] not done") | ||
'- [ ] unchecked\n- [x] not done' | ||
>>> zim_to_md("[ ] u\n [>] np\n [*] nd\n[x] nd") | ||
'- [ ] u\n - [ ] np\n - [x] nd\n- [x] nd' | ||
>>> zim_to_md("* lvl1\n\t* lvl2\n\t* lvl2\n* lvl1") | ||
'* lvl1\n * lvl2\n * lvl2\n* lvl1' | ||
""" | ||
zim_markup = ( | ||
pp.Literal("'''").set_parse_action(lambda: "```") | ||
# text formatting | ||
| quote("''", "`") | ||
| italic() | ||
| subscript() | ||
| superscript() | ||
# | ||
| horizontal_line() | ||
| heading() | ||
| checklist() | ||
) | ||
|
||
# TODO: str.translate() seems to be fastest | ||
# https://stackoverflow.com/a/8958372 | ||
zim_text = zim_text.replace("\t", " " * 4) | ||
return zim_markup.transform_string(zim_text) |
Submodule data
updated
14 files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters