Skip to content

Commit

Permalink
implement sample lua filter
Browse files Browse the repository at this point in the history
  • Loading branch information
marph91 committed Jan 4, 2025
1 parent 205b311 commit b25c7b0
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 17 deletions.
5 changes: 3 additions & 2 deletions jimmy_cli.spec
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# -*- mode: python ; coding: utf-8 -*-
from PyInstaller.utils.hooks import collect_data_files

# pypandoc: https://github.com/orgs/pyinstaller/discussions/8387
datas = [(".version", ".")]
# https://pyinstaller.org/en/stable/spec-files.html#adding-data-files
datas = [(".version", "."), ("src/pandoc_filter/*.lua", "src/pandoc_filter")]
datas += collect_data_files("anyblock_exporter")
# pypandoc: https://github.com/orgs/pyinstaller/discussions/8387
datas += collect_data_files("pypandoc")


Expand Down
7 changes: 7 additions & 0 deletions src/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pathlib import Path
import pkgutil
import random
import sys
import tarfile
import tempfile
import time
Expand Down Expand Up @@ -52,6 +53,12 @@ def wrapper(*args, **kwargs):
return cast(F, wrapper)


# Pyinstaller has different root path than module.
# https://stackoverflow.com/a/44352931/7410886
ROOT_PATH = Path(getattr(sys, "_MEIPASS", Path(__file__).parent.parent))
PANDOC_FILTER_PATH = ROOT_PATH / "src/pandoc_filter"


def safe_path(path: Path | str, max_name_length: int = 50) -> Path | str:
r"""
Return a safe version of the provided path or string.
Expand Down
10 changes: 3 additions & 7 deletions src/formats/synology_note_station.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,6 @@ def streamline_html(content_html: str) -> str:
iframe.string = iframe.attrs["src"]
iframe.attrs = {"href": iframe.attrs["src"]}

# hack: In the original data, the attachment_id is stored in the
# "ref" attribute. Mitigate by storing it in the "src" attribute.
for img in soup.find_all("img"):
if (new_src := img.attrs.get("ref")) is not None:
img.attrs["src"] = new_src

return str(soup)


Expand Down Expand Up @@ -203,7 +197,9 @@ def convert_note(self, note_id, note_id_title_map):
note_links: imf.NoteLinks = []
if (content_html := note.get("content")) is not None:
content_html = streamline_html(content_html)
content_markdown = markdown_lib.common.markup_to_markdown(content_html)
content_markdown = markdown_lib.common.markup_to_markdown(
content_html, filters=["synology_note_station.lua"]
)
content_markdown = content_markdown.replace("{TEMPORARYNEWLINE}", "<br>")
# note title only needed for debug message
body, resources_referenced, note_links = self.handle_markdown_links(
Expand Down
7 changes: 1 addition & 6 deletions src/jimmy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import importlib
import logging
from pathlib import Path
import sys

import pypandoc
from rich import print # pylint: disable=redefined-builtin
Expand Down Expand Up @@ -97,10 +95,7 @@ def get_tree(root_notebooks: imf.Notebooks, root_tree: Tree) -> Tree:


def get_jimmy_version():
# Pyinstaller has different path than module.
# https://stackoverflow.com/a/44352931/7410886
base_path = getattr(sys, "_MEIPASS", Path(__file__).parent)
version_file = Path(base_path) / ".version"
version_file = common.ROOT_PATH / ".version"
return (
version_file.read_text().lstrip("v").rstrip()
if version_file.is_file()
Expand Down
15 changes: 13 additions & 2 deletions src/markdown_lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from markdown.extensions import Extension
import pypandoc

import common

LOGGER = logging.getLogger("jimmy")

Expand Down Expand Up @@ -218,20 +219,29 @@ def get_inline_tags(text: str, start_characters: list[str]) -> list[str]:
# fmt:on


def markup_to_markdown(text: str, format_: str = "html") -> str:
def markup_to_markdown(
text: str, format_: str = "html", filters: list[str] | None = None
) -> str:
if filters is None:
filters = []
text_md = pypandoc.convert_text(
text,
PANDOC_OUTPUT_FORMAT,
format=format_,
sandbox=True,
extra_args=["--wrap=none"],
filters=[str(common.PANDOC_FILTER_PATH / f) for f in filters],
)
if "[TABLE]" in text_md:
LOGGER.warning("Table is too complex and can't be converted to markdown.")
return text_md.strip()


def file_to_markdown(file_: Path, resource_folder: Path) -> str:
def file_to_markdown(
file_: Path, resource_folder: Path, filters: list[str] | None = None
) -> str:
if filters is None:
filters = []
file_md = pypandoc.convert_file(
file_,
PANDOC_OUTPUT_FORMAT,
Expand All @@ -242,6 +252,7 @@ def file_to_markdown(file_: Path, resource_folder: Path) -> str:
# don't create artificial line breaks
"--wrap=none",
],
filters=[str(common.PANDOC_FILTER_PATH / f) for f in filters],
)
if "[TABLE]" in file_md:
LOGGER.warning("Table is too complex and can't be converted to markdown.")
Expand Down
7 changes: 7 additions & 0 deletions src/pandoc_filter/synology_note_station.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
function Image(img)
-- In the original data, the "src" is stored in the "ref" attribute. Move it where it belongs.
if img.attributes["ref"] then
img.src = img.attributes["ref"]
end
return img
end

0 comments on commit b25c7b0

Please sign in to comment.