Skip to content

Commit

Permalink
template is a special tag
Browse files Browse the repository at this point in the history
Also updated isSpecial to current spec

Fixes #2258
  • Loading branch information
jhy committed Jan 14, 2025
1 parent a62c7f3 commit 72fb596
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
exceeded. [2250](https://github.com/jhy/jsoup/pull/2250)
* For backwards compatibility, allow `null` InputStream inputs to `Jsoup.parse(InputStream stream, ...)`, by returning
an empty `Document`. [2252](https://github.com/jhy/jsoup/issues/2252)
* A `template` tag containing an `li` within an open `li` would be parsed incorrectly, as it was not recognized as a "special" tag (which have additional processing rules). Also, added the SVG and MathML namespace tags to the list of special tags.

## 1.18.3 (2024-Dec-02)

Expand Down
31 changes: 20 additions & 11 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,15 @@ public class HtmlTreeBuilder extends TreeBuilder {
static final String[] TagSearchSelectScope = new String[]{"optgroup", "option"};
static final String[] TagSearchEndTags = new String[]{"dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc"};
static final String[] TagThoroughSearchEndTags = new String[]{"caption", "colgroup", "dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc", "tbody", "td", "tfoot", "th", "thead", "tr"};
static final String[] TagSearchSpecial = new String[]{"address", "applet", "area", "article", "aside", "base", "basefont", "bgsound",
"blockquote", "body", "br", "button", "caption", "center", "col", "colgroup", "command", "dd",
"details", "dir", "div", "dl", "dt", "embed", "fieldset", "figcaption", "figure", "footer", "form",
"frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hgroup", "hr", "html",
"iframe", "img", "input", "isindex", "li", "link", "listing", "marquee", "menu", "meta", "nav",
"noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext", "pre", "script",
"section", "select", "style", "summary", "table", "tbody", "td", "textarea", "tfoot", "th", "thead",
"title", "tr", "ul", "wbr", "xmp"};
static final String[] TagSearchSpecial = new String[]{
"address", "applet", "area", "article", "aside", "base", "basefont", "bgsound", "blockquote", "body", "br",
"button", "caption", "center", "col", "colgroup", "dd", "details", "dir", "div", "dl", "dt", "embed",
"fieldset", "figcaption", "figure", "footer", "form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6",
"head", "header", "hgroup", "hr", "html", "iframe", "img", "input", "keygen", "li", "link", "listing", "main",
"marquee", "menu", "meta", "nav", "noembed", "noframes", "noscript", "object", "ol", "p", "param", "plaintext",
"pre", "script", "search", "section", "select", "source", "style", "summary", "table", "tbody", "td",
"template", "textarea", "tfoot", "th", "thead", "title", "tr", "track", "ul", "wbr", "xmp"};
static String[] TagSearchSpecialMath = {"annotation-xml", "mi", "mn", "mo", "ms", "mtext"}; // differs to MathML text integration point; adds annotation-xml
static final String[] TagMathMlTextIntegration = new String[]{"mi", "mn", "mo", "ms", "mtext"};
static final String[] TagSvgHtmlIntegration = new String[]{"desc", "foreignObject", "title"};

Expand Down Expand Up @@ -818,10 +819,18 @@ void closeElement(String name) {
}

static boolean isSpecial(Element el) {
// todo: mathml's mi, mo, mn
// todo: svg's foreigObject, desc, title
String namespace = el.tag().namespace();
String name = el.normalName();
return inSorted(name, TagSearchSpecial);
switch (namespace) {
case NamespaceHtml:
return inSorted(name, TagSearchSpecial);
case Parser.NamespaceMathml:
return inSorted(name, TagSearchSpecialMath);
case Parser.NamespaceSvg:
return inSorted(name, TagSvgHtmlIntegration);
default:
return false;
}
}

Element lastFormattingElement() {
Expand Down
8 changes: 8 additions & 0 deletions src/test/java/org/jsoup/parser/HtmlParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1656,6 +1656,14 @@ private boolean didAddElements(String input) {
TextUtil.stripNewlines(doc.head().html()));
}

@Test void templateInLi() {
// https://github.com/jhy/jsoup/issues/2258
String html = "<ul><li>L1</li><li>L2 <template><li>T1</li><li>T2</template></li><li>L3</ul>";
Document doc = Jsoup.parse(html);
assertEquals("<ul><li>L1</li><li>L2 <template><li>T1</li><li>T2</li></template></li><li>L3</li></ul>",
TextUtil.stripNewlines(doc.body().html()));
}

@Test void errorsBeforeHtml() {
Parser parser = Parser.htmlParser();
parser.setTrackErrors(10);
Expand Down
24 changes: 23 additions & 1 deletion src/test/java/org/jsoup/parser/HtmlTreeBuilderTest.java
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
package org.jsoup.parser;


import org.jsoup.nodes.Element;
import org.jspecify.annotations.NullMarked;
import org.junit.jupiter.api.Test;
import java.io.Reader;
import java.lang.annotation.Annotation;
import java.lang.reflect.Method;
import java.util.List;

import static org.jsoup.parser.Parser.NamespaceHtml;
import static org.junit.jupiter.api.Assertions.*;

public class HtmlTreeBuilderTest {
@Test
public void ensureSearchArraysAreSorted() {
List<Object[]> constants = HtmlTreeBuilderStateTest.findConstantArrays(HtmlTreeBuilder.class);
HtmlTreeBuilderStateTest.ensureSorted(constants);
assertEquals(10, constants.size());
assertEquals(11, constants.size());
}

@Test
Expand All @@ -37,6 +39,26 @@ public void nonnull() {

// would need to rework this if/when that annotation moves from the method to the class / package.
assertTrue(seen);
}

@Test void isSpecial() {
ParseSettings settings = ParseSettings.htmlDefault;
Element htmlEl = new Element(Tag.valueOf("div", NamespaceHtml, settings), "");
assertTrue(HtmlTreeBuilder.isSpecial(htmlEl));

Element notHtml = new Element(Tag.valueOf("not-html", NamespaceHtml, settings), "");
assertFalse(HtmlTreeBuilder.isSpecial(notHtml));

Element mathEl = new Element(Tag.valueOf("mi", Parser.NamespaceMathml, settings), "");
assertTrue(HtmlTreeBuilder.isSpecial(mathEl));

Element notMathEl = new Element(Tag.valueOf("not-math", Parser.NamespaceMathml, settings), "");
assertFalse(HtmlTreeBuilder.isSpecial(notMathEl));

Element svgEl = new Element(Tag.valueOf("title", Parser.NamespaceSvg, settings), "");
assertTrue(HtmlTreeBuilder.isSpecial(svgEl));

Element notSvgEl = new Element(Tag.valueOf("not-svg", Parser.NamespaceSvg, settings), "");
assertFalse(HtmlTreeBuilder.isSpecial(notSvgEl));
}
}

0 comments on commit 72fb596

Please sign in to comment.