Skip to content

Commit

Permalink
Fix two parser bugs involving wikitable error handling.
Browse files Browse the repository at this point in the history
  • Loading branch information
earwig committed Dec 5, 2015
1 parent 2ec35e2 commit 61b6b98
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 12 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
v0.5 (unreleased):

-
- Fixed parsing bugs involving:
- wikitables nested in templates;
- wikitable error recovery when unable to recurse.

v0.4.3 (released October 29, 2015):

Expand Down
5 changes: 4 additions & 1 deletion docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ v0.5
Unreleased
(`changes <https://github.com/earwig/mwparserfromhell/compare/v0.4.3...develop>`__):

-
- Fixed parsing bugs involving:

- wikitables nested in templates;
- wikitable error recovery when unable to recurse.

v0.4.3
------
Expand Down
10 changes: 4 additions & 6 deletions mwparserfromhell/parser/ctokenizer/tok_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -2190,7 +2190,7 @@ static PyObject* Tokenizer_handle_table_style(Tokenizer* self, Unicode end_token
*/
static int Tokenizer_parse_table(Tokenizer* self)
{
Py_ssize_t reset = self->head + 1;
Py_ssize_t reset = self->head;
PyObject *style, *padding;
PyObject *table = NULL;
self->head += 2;
Expand All @@ -2201,7 +2201,7 @@ static int Tokenizer_parse_table(Tokenizer* self)
if (BAD_ROUTE) {
RESET_ROUTE();
self->head = reset;
if (Tokenizer_emit_text(self, "{|"))
if (Tokenizer_emit_char(self, '{'))
return -1;
return 0;
}
Expand All @@ -2220,7 +2220,7 @@ static int Tokenizer_parse_table(Tokenizer* self)
Py_DECREF(padding);
Py_DECREF(style);
self->head = reset;
if (Tokenizer_emit_text(self, "{|"))
if (Tokenizer_emit_char(self, '{'))
return -1;
return 0;
}
Expand Down Expand Up @@ -2689,10 +2689,8 @@ PyObject* Tokenizer_parse(Tokenizer* self, uint64_t context, int push)
if (Tokenizer_parse_table(self))
return NULL;
}
else if (Tokenizer_emit_char(self, this) || Tokenizer_emit_char(self, next))
else if (Tokenizer_emit_char(self, this))
return NULL;
else
self->head++;
}
else if (this_context & LC_TABLE_OPEN) {
if (this == '|' && next == '|' && this_context & LC_TABLE_TD_LINE) {
Expand Down
8 changes: 4 additions & 4 deletions mwparserfromhell/parser/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,14 +1074,14 @@ def _handle_table_style(self, end_token):

def _parse_table(self):
"""Parse a wikicode table by starting with the first line."""
reset = self._head + 1
reset = self._head
self._head += 2
self._push(contexts.TABLE_OPEN)
try:
padding = self._handle_table_style("\n")
except BadRoute:
self._head = reset
self._emit_text("{|")
self._emit_text("{")
return
style = self._pop()

Expand All @@ -1090,7 +1090,7 @@ def _parse_table(self):
table = self._parse(contexts.TABLE_OPEN)
except BadRoute:
self._head = reset
self._emit_text("{|")
self._emit_text("{")
return

self._emit_table_tag("{|", "table", style, padding, None, table, "|}")
Expand Down Expand Up @@ -1352,7 +1352,7 @@ def _parse(self, context=0, push=True):
if self._can_recurse():
self._parse_table()
else:
self._emit_text("{|")
self._emit_text("{")
elif self._context & contexts.TABLE_OPEN:
if this == next == "|" and self._context & contexts.TABLE_TD_LINE:
if self._context & contexts.TABLE_CELL_OPEN:
Expand Down
14 changes: 14 additions & 0 deletions tests/tokenizer/integration.mwtest
Original file line number Diff line number Diff line change
Expand Up @@ -332,3 +332,17 @@ name: wikilink_to_external_link_fallback_2
label: an external link enclosed in an extra pair of brackets (see issue #120)
input: "[[http://example.com]]"
output: [Text(text="["), ExternalLinkOpen(brackets=True), Text(text="http://example.com"), ExternalLinkClose(), Text(text="]")]

---

name: tables_in_templates
label: catch error handling mistakes when wikitables are inside templates
input: "{{hello|test\n{|\n|} }}"
output: [TemplateOpen(), Text(text="hello"), TemplateParamSeparator(), Text(text="test\n"), TagOpenOpen(wiki_markup="{|"), Text(text="table"), TagCloseOpen(padding="\n"), TagOpenClose(wiki_markup="|}"), Text(text="table"), TagCloseClose(), Text(text=" "), TemplateClose()]

---

name: tables_in_templates_2
label: catch error handling mistakes when wikitables are inside templates
input: "{{hello|test\n{|\n| }}"
output: [TemplateOpen(), Text(text="hello"), TemplateParamSeparator(), Text(text="test\n{"), TemplateParamSeparator(), Text(text="\n"), TemplateParamSeparator(), Text(text=" "), TemplateClose()]

0 comments on commit 61b6b98

Please sign in to comment.