Skip to content

Commit

Permalink
Fix broken links in lists, indented text and multiline
Browse files Browse the repository at this point in the history
When replacing links, we parse the markdown document line by line. When
the markdown parsers encounters an indented line, without the context of
the surrounding lines, it assumes a code block and does not render the
links to html, which means we do not re-write those links.

If the link is broken over two lines we also fail to discover it.
This patch fixes the issue by changing the processing from line by line
to the document as a whole.

Fixes: #229

Signed-off-by: Andrea Frittoli <andrea.frittoli@gmail.com>
  • Loading branch information
afrittoli committed Feb 19, 2021
1 parent 1a2e500 commit a3bee90
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 23 deletions.
34 changes: 17 additions & 17 deletions sync/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,15 +174,17 @@ def transform_doc(doc, source_folder, target, target_folder, header,
site_target_folder = os.path.normpath(os.path.join(site_folder, target_folder))
safe_makedirs(site_target_folder)
target = os.path.join(site_target_folder, target)
with open(target, 'w+') as target_doc:
# If there is an header configured, write it (in YAML)
if header:
# If there is an header configured, write it (in YAML)
if header:
with open(target, 'w+') as target_doc:
target_doc.write(YAML_SEPARATOR)
YAML().dump(header, target_doc)
target_doc.write(YAML_SEPARATOR)
for line in decode(doc.data_stream.read()).splitlines():
target_doc.write(
f'{transform_line(line, source_folder, local_files, base_path, base_url)}\n')
with open(target, 'ab+') as target_doc:
doc_markdown = doc.data_stream.read()
doc_markdown = transform_links_doc(
doc_markdown, source_folder, local_files, base_path, base_url)
target_doc.write(doc_markdown)
return target


Expand All @@ -195,17 +197,15 @@ def decode(s, encodings=('utf8', 'latin1', 'ascii')):
return s.decode('ascii', 'ignore')


def transform_line(line, base_path, local_files, rewrite_path, rewrite_url):
""" transform all the links in one line """
line = line.rstrip()
links = get_links(line)
# If there are links in this line we may need to fix them
for link in links:
# link contains the text and href
href =link.get("href")
href_mod = transform_link(href, base_path, local_files, rewrite_path, rewrite_url)
line = line.replace(href, href_mod)
return line
def transform_links_doc(text, base_path, local_files, rewrite_path, rewrite_url):
""" transform all the links the text """
links = get_links(text)
# Rewrite map, only use links with an href
rewrite_map = {x.get("href"): transform_link(x.get("href"), base_path, local_files, rewrite_path, rewrite_url)
for x in links if x.get("href")}
for source, target in rewrite_map.items():
text.replace(source, target)
return text


def get_links(md):
Expand Down
16 changes: 10 additions & 6 deletions sync/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from sync import (
doc_config, docs_from_tree, get_links, is_absolute_url,
is_fragment, get_tags, load_config, save_config,
get_files_in_path, transform_link, transform_line,
get_files_in_path, transform_link, transform_links_doc,
transform_doc, transform_docs)


Expand Down Expand Up @@ -257,7 +257,7 @@ def test_transform_link(self):
transform_link(case, base_path, local_files, rewrite_path, rewrite_url),
expected)

def test_transform_line(self):
def test_transform_links_doc(self):
self.maxDiff = None

# Links are in a page stored undrer base_path
Expand All @@ -282,7 +282,9 @@ def test_transform_line(self):
"[notfound-relative-link-dotdot](../examples/notfound.txt)",
"[invalid-absolute-link](www.github.com)",
("[valid-absolute-link](https://website-random321.net#FRagment) "
"[valid-ref-link](#fooTEr)")
"[valid-ref-link](#fooTEr)"),
("Valid link broken on two lines [exists-link-in-list]("
"./test.txt)")
]
expected_results = [
"[exists-relative-link](/docs/test/test.txt)",
Expand All @@ -295,12 +297,14 @@ def test_transform_line(self):
"[notfound-relative-link-dotdot](http://test.com/tree/docs/examples/notfound.txt)",
"[invalid-absolute-link](http://test.com/tree/docs/www.github.com)",
("[valid-absolute-link](https://website-random321.net#FRagment) "
"[valid-ref-link](#footer)")
"[valid-ref-link](#footer)"),
("Valid link broken on two lines [exists-link-in-list]("
"/docs/test/test.txt)")
]

for case, expected in zip(cases, expected_results):
actual = transform_line(
line=case, base_path=base_path, local_files=local_files,
actual = transform_links_doc(
text=case, base_path=base_path, local_files=local_files,
rewrite_path='/docs/test', rewrite_url='http://test.com/tree/docs/test'
)

Expand Down

0 comments on commit a3bee90

Please sign in to comment.