Skip to content

Commit

Permalink
nimbusnote: improve list conversion (indentation; checklist, numbered…
Browse files Browse the repository at this point in the history
… and unnumbered lists)
  • Loading branch information
marph91 committed Oct 20, 2024
1 parent 1c42ff3 commit c5b5084
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 16 deletions.
46 changes: 37 additions & 9 deletions src/formats/nimbus_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,51 @@
import markdown_lib.common


def clean_tables(soup):
def streamline_tables(soup: BeautifulSoup):
for table in soup.find_all("table"):
tags_to_remove = ["div", "span"]
for tag in tags_to_remove:
for element in table.find_all(tag):
element.unwrap()


def assign_lists(_soup):
# TODO:
def streamline_lists(soup: BeautifulSoup):
# - all lists are unnumbered lists (ul)
# - type is in the class attr (list-item-number, -bullet, -checkbox)
# - indentation is in the class attr (indent-0)

# for task_list in soup.find_all("ul", class_="checklist"):
# ...
pass
for list_ in soup.find_all("ul"):
current_indent = 0
current_list = list_
for item in list_.find_all("li"):
item_type = [
i[len("list-item-") :]
for i in item["class"]
if i.startswith("list-item-")
][0]
list_type = {"checkbox": "ul", "bullet": "ul", "number": "ol"}[item_type]
if item_type == "checkbox":
item.insert(0, soup.new_tag("input", type="checkbox"))

indent = [i for i in item["class"] if i.startswith("indent-")][0]
indent_int = int(indent[len("indent-") :]) # 1 digit number always
if indent_int == 0:
# would be sufficient to do only one time
current_list.name = list_type
if item_type == "checkbox" and "checklist" not in current_list["class"]:
current_list["class"] = ["checklist"] # drop the other classes
if indent_int > current_indent:
# new child list
new_list = soup.new_tag(list_type)
current_list.append(new_list)
current_list = new_list
current_indent = indent_int
elif indent_int < current_indent:
# find parent list at the corresponding level
for _ in range(current_indent - indent_int):
current_list = current_list.parent

item.attrs = {} # remove all attributes
current_list.append(item)


class Converter(converter.BaseConverter):
Expand Down Expand Up @@ -81,8 +109,8 @@ def convert(self, file_or_folder: Path):
)

soup = BeautifulSoup(note_body_html, "html.parser")
clean_tables(soup)
assign_lists(soup)
streamline_tables(soup)
streamline_lists(soup)

note_body_markdown = markdown_lib.common.markup_to_markdown(str(soup))
resources = self.handle_markdown_links(note_body_markdown, temp_folder_note)
Expand Down
10 changes: 4 additions & 6 deletions src/formats/zoho_notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import markdown_lib


def clean_tables(soup):
def streamline_tables(soup: BeautifulSoup):
for table in soup.find_all("table"):
for row in table.find_all("tr"):
for td in row.find_all("td"):
Expand All @@ -22,9 +22,7 @@ def clean_tables(soup):
td.append(text_only)


def clean_task_lists(soup):
# TODO: Not sure why the cleaned task lists still don't work.
# It works online. Maybe caused by an old pandoc version.
def streamline_checklists(soup: BeautifulSoup):
for task_list in soup.find_all("div", class_="checklist"):
task_list.name = "ul"
# remove the spans
Expand Down Expand Up @@ -132,8 +130,8 @@ def convert_note(self, file_: Path):

# convert the note body to Markdown
if soup.body is not None:
clean_tables(soup)
clean_task_lists(soup)
streamline_tables(soup)
streamline_checklists(soup)
body = markdown_lib.common.markup_to_markdown(str(soup))

# resources and internal links
Expand Down
2 changes: 1 addition & 1 deletion test/data

0 comments on commit c5b5084

Please sign in to comment.