diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java index 05c28ccd47..20dc31f65e 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java @@ -250,6 +250,9 @@ private boolean anythingElse(Token t, HtmlTreeBuilder tb) { }, InBody { boolean process(Token t, HtmlTreeBuilder tb) { + ArrayList stack; + Element el; + switch (t.type) { case Character: { Token.Character c = t.asCharacter(); @@ -277,485 +280,546 @@ boolean process(Token t, HtmlTreeBuilder tb) { } case StartTag: Token.StartTag startTag = t.asStartTag(); - // todo - refactor to a switch statement String name = startTag.normalName(); - if (name.equals("a")) { - if (tb.getActiveFormattingElement("a") != null) { - tb.error(this); - tb.processEndTag("a"); - // still on stack? - Element remainingA = tb.getFromStack("a"); - if (remainingA != null) { - tb.removeFromActiveFormattingElements(remainingA); - tb.removeFromStack(remainingA); - } - } - tb.reconstructFormattingElements(); - Element a = tb.insert(startTag); - tb.pushActiveFormattingElements(a); - } else if (StringUtil.inSorted(name, Constants.InBodyStartEmptyFormatters)) { - tb.reconstructFormattingElements(); - tb.insertEmpty(startTag); - tb.framesetOk(false); - } else if (StringUtil.inSorted(name, Constants.InBodyStartPClosers)) { - if (tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - tb.insert(startTag); - } else if (name.equals("span")) { - // same as final else, but short circuits lots of checks - tb.reconstructFormattingElements(); - tb.insert(startTag); - } else if (name.equals("li")) { - tb.framesetOk(false); - ArrayList stack = tb.getStack(); - for (int i = stack.size() - 1; i > 0; i--) { - Element el = stack.get(i); - if (el.normalName().equals("li")) { - tb.processEndTag("li"); - break; + switch (name) { + case "a": + if (tb.getActiveFormattingElement("a") != null) { + tb.error(this); + tb.processEndTag("a"); + + // still on stack? + Element remainingA = tb.getFromStack("a"); + if (remainingA != null) { + tb.removeFromActiveFormattingElements(remainingA); + tb.removeFromStack(remainingA); + } } - if (tb.isSpecial(el) && !StringUtil.inSorted(el.normalName(), Constants.InBodyStartLiBreakers)) - break; - } - if (tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - tb.insert(startTag); - } else if (name.equals("html")) { - tb.error(this); - // merge attributes onto real html - Element html = tb.getStack().get(0); - for (Attribute attribute : startTag.getAttributes()) { - if (!html.hasAttr(attribute.getKey())) - html.attributes().put(attribute); - } - } else if (StringUtil.inSorted(name, Constants.InBodyStartToHead)) { - return tb.process(t, InHead); - } else if (name.equals("body")) { - tb.error(this); - ArrayList stack = tb.getStack(); - if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).normalName().equals("body"))) { - // only in fragment case - return false; // ignore - } else { + tb.reconstructFormattingElements(); + Element a = tb.insert(startTag); + tb.pushActiveFormattingElements(a); + break; + case "span": + // same as final else, but short circuits lots of checks + tb.reconstructFormattingElements(); + tb.insert(startTag); + break; + case "li": tb.framesetOk(false); - Element body = stack.get(1); - for (Attribute attribute : startTag.getAttributes()) { - if (!body.hasAttr(attribute.getKey())) - body.attributes().put(attribute); + stack = tb.getStack(); + for (int i = stack.size() - 1; i > 0; i--) { + el = stack.get(i); + if (el.normalName().equals("li")) { + tb.processEndTag("li"); + break; + } + if (tb.isSpecial(el) && !StringUtil.inSorted(el.normalName(), Constants.InBodyStartLiBreakers)) + break; + } + if (tb.inButtonScope("p")) { + tb.processEndTag("p"); } - } - } else if (name.equals("frameset")) { - tb.error(this); - ArrayList stack = tb.getStack(); - if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).normalName().equals("body"))) { - // only in fragment case - return false; // ignore - } else if (!tb.framesetOk()) { - return false; // ignore frameset - } else { - Element second = stack.get(1); - if (second.parent() != null) - second.remove(); - // pop up to html element - while (stack.size() > 1) - stack.remove(stack.size()-1); tb.insert(startTag); - tb.transition(InFrameset); - } - } else if (StringUtil.inSorted(name, Constants.Headings)) { - if (tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - if (StringUtil.inSorted(tb.currentElement().normalName(), Constants.Headings)) { + break; + case "html": tb.error(this); - tb.pop(); - } - tb.insert(startTag); - } else if (StringUtil.inSorted(name, Constants.InBodyStartPreListing)) { - if (tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - tb.insert(startTag); - tb.reader.matchConsume("\n"); // ignore LF if next token - tb.framesetOk(false); - } else if (name.equals("form")) { - if (tb.getFormElement() != null) { + // merge attributes onto real html + Element html = tb.getStack().get(0); + for (Attribute attribute : startTag.getAttributes()) { + if (!html.hasAttr(attribute.getKey())) + html.attributes().put(attribute); + } + break; + case "body": tb.error(this); - return false; - } - if (tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - tb.insertForm(startTag, true); - } else if (StringUtil.inSorted(name, Constants.DdDt)) { - tb.framesetOk(false); - ArrayList stack = tb.getStack(); - for (int i = stack.size() - 1; i > 0; i--) { - Element el = stack.get(i); - if (StringUtil.inSorted(el.normalName(), Constants.DdDt)) { - tb.processEndTag(el.normalName()); - break; + stack = tb.getStack(); + if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).normalName().equals("body"))) { + // only in fragment case + return false; // ignore + } else { + tb.framesetOk(false); + Element body = stack.get(1); + for (Attribute attribute : startTag.getAttributes()) { + if (!body.hasAttr(attribute.getKey())) + body.attributes().put(attribute); + } } - if (tb.isSpecial(el) && !StringUtil.inSorted(el.normalName(), Constants.InBodyStartLiBreakers)) - break; - } - if (tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - tb.insert(startTag); - } else if (name.equals("plaintext")) { - if (tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - tb.insert(startTag); - tb.tokeniser.transition(TokeniserState.PLAINTEXT); // once in, never gets out - } else if (name.equals("button")) { - if (tb.inButtonScope("button")) { - // close and reprocess + break; + case "frameset": tb.error(this); - tb.processEndTag("button"); - tb.process(startTag); - } else { + stack = tb.getStack(); + if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).normalName().equals("body"))) { + // only in fragment case + return false; // ignore + } else if (!tb.framesetOk()) { + return false; // ignore frameset + } else { + Element second = stack.get(1); + if (second.parent() != null) + second.remove(); + // pop up to html element + while (stack.size() > 1) + stack.remove(stack.size() - 1); + tb.insert(startTag); + tb.transition(InFrameset); + } + break; + case "form": + if (tb.getFormElement() != null) { + tb.error(this); + return false; + } + if (tb.inButtonScope("p")) { + tb.processEndTag("p"); + } + tb.insertForm(startTag, true); + break; + case "plaintext": + if (tb.inButtonScope("p")) { + tb.processEndTag("p"); + } + tb.insert(startTag); + tb.tokeniser.transition(TokeniserState.PLAINTEXT); // once in, never gets out + break; + case "button": + if (tb.inButtonScope("button")) { + // close and reprocess + tb.error(this); + tb.processEndTag("button"); + tb.process(startTag); + } else { + tb.reconstructFormattingElements(); + tb.insert(startTag); + tb.framesetOk(false); + } + break; + case "nobr": tb.reconstructFormattingElements(); + if (tb.inScope("nobr")) { + tb.error(this); + tb.processEndTag("nobr"); + tb.reconstructFormattingElements(); + } + el = tb.insert(startTag); + tb.pushActiveFormattingElements(el); + break; + case "table": + if (tb.getDocument().quirksMode() != Document.QuirksMode.quirks && tb.inButtonScope("p")) { + tb.processEndTag("p"); + } tb.insert(startTag); tb.framesetOk(false); - } - } else if (StringUtil.inSorted(name, Constants.Formatters)) { - tb.reconstructFormattingElements(); - Element el = tb.insert(startTag); - tb.pushActiveFormattingElements(el); - } else if (name.equals("nobr")) { - tb.reconstructFormattingElements(); - if (tb.inScope("nobr")) { - tb.error(this); - tb.processEndTag("nobr"); + tb.transition(InTable); + break; + case "input": tb.reconstructFormattingElements(); - } - Element el = tb.insert(startTag); - tb.pushActiveFormattingElements(el); - } else if (StringUtil.inSorted(name, Constants.InBodyStartApplets)) { - tb.reconstructFormattingElements(); - tb.insert(startTag); - tb.insertMarkerToFormattingElements(); - tb.framesetOk(false); - } else if (name.equals("table")) { - if (tb.getDocument().quirksMode() != Document.QuirksMode.quirks && tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - tb.insert(startTag); - tb.framesetOk(false); - tb.transition(InTable); - } else if (name.equals("input")) { - tb.reconstructFormattingElements(); - Element el = tb.insertEmpty(startTag); - if (!el.attr("type").equalsIgnoreCase("hidden")) + el = tb.insertEmpty(startTag); + if (!el.attr("type").equalsIgnoreCase("hidden")) + tb.framesetOk(false); + break; + case "hr": + if (tb.inButtonScope("p")) { + tb.processEndTag("p"); + } + tb.insertEmpty(startTag); tb.framesetOk(false); - } else if (StringUtil.inSorted(name, Constants.InBodyStartMedia)) { - tb.insertEmpty(startTag); - } else if (name.equals("hr")) { - if (tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - tb.insertEmpty(startTag); - tb.framesetOk(false); - } else if (name.equals("image")) { - if (tb.getFromStack("svg") == null) - return tb.process(startTag.name("img")); // change to , unless in svg - else - tb.insert(startTag); - } else if (name.equals("isindex")) { - // how much do we care about the early 90s? - tb.error(this); - if (tb.getFormElement() != null) - return false; + break; + case "image": + if (tb.getFromStack("svg") == null) + return tb.process(startTag.name("img")); // change to , unless in svg + else + tb.insert(startTag); + break; + case "isindex": + // how much do we care about the early 90s? + tb.error(this); + if (tb.getFormElement() != null) + return false; - tb.processStartTag("form"); - if (startTag.attributes.hasKey("action")) { - Element form = tb.getFormElement(); - form.attr("action", startTag.attributes.get("action")); - } - tb.processStartTag("hr"); - tb.processStartTag("label"); - // hope you like english. - String prompt = startTag.attributes.hasKey("prompt") ? + tb.processStartTag("form"); + if (startTag.attributes.hasKey("action")) { + Element form = tb.getFormElement(); + form.attr("action", startTag.attributes.get("action")); + } + tb.processStartTag("hr"); + tb.processStartTag("label"); + // hope you like english. + String prompt = startTag.attributes.hasKey("prompt") ? startTag.attributes.get("prompt") : "This is a searchable index. Enter search keywords: "; - tb.process(new Token.Character().data(prompt)); + tb.process(new Token.Character().data(prompt)); - // input - Attributes inputAttribs = new Attributes(); - for (Attribute attr : startTag.attributes) { - if (!StringUtil.inSorted(attr.getKey(), Constants.InBodyStartInputAttribs)) - inputAttribs.put(attr); - } - inputAttribs.put("name", "isindex"); - tb.processStartTag("input", inputAttribs); - tb.processEndTag("label"); - tb.processStartTag("hr"); - tb.processEndTag("form"); - } else if (name.equals("textarea")) { - tb.insert(startTag); - if (!startTag.isSelfClosing()) { - tb.tokeniser.transition(TokeniserState.Rcdata); - tb.markInsertionMode(); + // input + Attributes inputAttribs = new Attributes(); + for (Attribute attr : startTag.attributes) { + if (!StringUtil.inSorted(attr.getKey(), Constants.InBodyStartInputAttribs)) + inputAttribs.put(attr); + } + inputAttribs.put("name", "isindex"); + tb.processStartTag("input", inputAttribs); + tb.processEndTag("label"); + tb.processStartTag("hr"); + tb.processEndTag("form"); + break; + case "textarea": + tb.insert(startTag); + if (!startTag.isSelfClosing()) { + tb.tokeniser.transition(TokeniserState.Rcdata); + tb.markInsertionMode(); + tb.framesetOk(false); + tb.transition(Text); + } + break; + case "xmp": + if (tb.inButtonScope("p")) { + tb.processEndTag("p"); + } + tb.reconstructFormattingElements(); + tb.framesetOk(false); + handleRawtext(startTag, tb); + break; + case "iframe": + tb.framesetOk(false); + handleRawtext(startTag, tb); + break; + case "noembed": + // also handle noscript if script enabled + handleRawtext(startTag, tb); + break; + case "select": + tb.reconstructFormattingElements(); + tb.insert(startTag); tb.framesetOk(false); - tb.transition(Text); - } - } else if (name.equals("xmp")) { - if (tb.inButtonScope("p")) { - tb.processEndTag("p"); - } - tb.reconstructFormattingElements(); - tb.framesetOk(false); - handleRawtext(startTag, tb); - } else if (name.equals("iframe")) { - tb.framesetOk(false); - handleRawtext(startTag, tb); - } else if (name.equals("noembed")) { - // also handle noscript if script enabled - handleRawtext(startTag, tb); - } else if (name.equals("select")) { - tb.reconstructFormattingElements(); - tb.insert(startTag); - tb.framesetOk(false); - HtmlTreeBuilderState state = tb.state(); - if (state.equals(InTable) || state.equals(InCaption) || state.equals(InTableBody) || state.equals(InRow) || state.equals(InCell)) - tb.transition(InSelectInTable); - else - tb.transition(InSelect); - } else if (StringUtil.inSorted(name, Constants.InBodyStartOptions)) { - if (tb.currentElement().normalName().equals("option")) - tb.processEndTag("option"); - tb.reconstructFormattingElements(); - tb.insert(startTag); - } else if (StringUtil.inSorted(name, Constants.InBodyStartRuby)) { - if (tb.inScope("ruby")) { - tb.generateImpliedEndTags(); - if (!tb.currentElement().normalName().equals("ruby")) { + HtmlTreeBuilderState state = tb.state(); + if (state.equals(InTable) || state.equals(InCaption) || state.equals(InTableBody) || state.equals(InRow) || state.equals(InCell)) + tb.transition(InSelectInTable); + else + tb.transition(InSelect); + break; + case "math": + tb.reconstructFormattingElements(); + // todo: handle A start tag whose tag name is "math" (i.e. foreign, mathml) + tb.insert(startTag); + break; + case "svg": + tb.reconstructFormattingElements(); + // todo: handle A start tag whose tag name is "svg" (xlink, svg) + tb.insert(startTag); + break; + // static final String[] Headings = new String[]{"h1", "h2", "h3", "h4", "h5", "h6"}; + case "h1": + case "h2": + case "h3": + case "h4": + case "h5": + case "h6": + if (tb.inButtonScope("p")) { + tb.processEndTag("p"); + } + if (StringUtil.inSorted(tb.currentElement().normalName(), Constants.Headings)) { tb.error(this); - tb.popStackToBefore("ruby"); // i.e. close up to but not include name + tb.pop(); } tb.insert(startTag); - } - } else if (name.equals("math")) { - tb.reconstructFormattingElements(); - // todo: handle A start tag whose tag name is "math" (i.e. foreign, mathml) - tb.insert(startTag); - } else if (name.equals("svg")) { - tb.reconstructFormattingElements(); - // todo: handle A start tag whose tag name is "svg" (xlink, svg) - tb.insert(startTag); - } else if (StringUtil.inSorted(name, Constants.InBodyStartDrop)) { - tb.error(this); - return false; - } else { - tb.reconstructFormattingElements(); - tb.insert(startTag); + break; + // static final String[] InBodyStartPreListing = new String[]{"listing", "pre"}; + case "pre": + case "listing": + if (tb.inButtonScope("p")) { + tb.processEndTag("p"); + } + tb.insert(startTag); + tb.reader.matchConsume("\n"); // ignore LF if next token + tb.framesetOk(false); + break; + // static final String[] DdDt = new String[]{"dd", "dt"}; + case "dd": + case "dt": + tb.framesetOk(false); + stack = tb.getStack(); + for (int i = stack.size() - 1; i > 0; i--) { + el = stack.get(i); + if (StringUtil.inSorted(el.normalName(), Constants.DdDt)) { + tb.processEndTag(el.normalName()); + break; + } + if (tb.isSpecial(el) && !StringUtil.inSorted(el.normalName(), Constants.InBodyStartLiBreakers)) + break; + } + if (tb.inButtonScope("p")) { + tb.processEndTag("p"); + } + tb.insert(startTag); + break; + // static final String[] InBodyStartOptions = new String[]{"optgroup", "option"}; + case "optgroup": + case "option": + if (tb.currentElement().normalName().equals("option")) + tb.processEndTag("option"); + tb.reconstructFormattingElements(); + tb.insert(startTag); + break; + // static final String[] InBodyStartRuby = new String[]{"rp", "rt"}; + case "rp": + case "rt": + if (tb.inScope("ruby")) { + tb.generateImpliedEndTags(); + if (!tb.currentElement().normalName().equals("ruby")) { + tb.error(this); + tb.popStackToBefore("ruby"); // i.e. close up to but not include name + } + tb.insert(startTag); + } + // todo - is this right? drops rp, rt if ruby not in scope? + break; + default: + // todo - bring scan groups in if desired + if (StringUtil.inSorted(name, Constants.InBodyStartEmptyFormatters)) { + tb.reconstructFormattingElements(); + tb.insertEmpty(startTag); + tb.framesetOk(false); + } else if (StringUtil.inSorted(name, Constants.InBodyStartPClosers)) { + if (tb.inButtonScope("p")) { + tb.processEndTag("p"); + } + tb.insert(startTag); + } else if (StringUtil.inSorted(name, Constants.InBodyStartToHead)) { + return tb.process(t, InHead); + } else if (StringUtil.inSorted(name, Constants.Formatters)) { + tb.reconstructFormattingElements(); + el = tb.insert(startTag); + tb.pushActiveFormattingElements(el); + } else if (StringUtil.inSorted(name, Constants.InBodyStartApplets)) { + tb.reconstructFormattingElements(); + tb.insert(startTag); + tb.insertMarkerToFormattingElements(); + tb.framesetOk(false); + } else if (StringUtil.inSorted(name, Constants.InBodyStartMedia)) { + tb.insertEmpty(startTag); + } else if (StringUtil.inSorted(name, Constants.InBodyStartDrop)) { + tb.error(this); + return false; + } else { + tb.reconstructFormattingElements(); + tb.insert(startTag); + } } break; case EndTag: Token.EndTag endTag = t.asEndTag(); name = endTag.normalName(); - if (StringUtil.inSorted(name, Constants.InBodyEndAdoptionFormatters)) { - // Adoption Agency Algorithm. - for (int i = 0; i < 8; i++) { - Element formatEl = tb.getActiveFormattingElement(name); - if (formatEl == null) - return anyOtherEndTag(t, tb); - else if (!tb.onStack(formatEl)) { + switch (name) { + case "sarcasm": // *sigh* + case "span": + // same as final fall through, but saves short circuit + return anyOtherEndTag(t, tb); + case "li": + if (!tb.inListItemScope(name)) { tb.error(this); - tb.removeFromActiveFormattingElements(formatEl); - return true; - } else if (!tb.inScope(formatEl.normalName())) { + return false; + } else { + tb.generateImpliedEndTags(name); + if (!tb.currentElement().normalName().equals(name)) + tb.error(this); + tb.popStackToClose(name); + } + break; + case "body": + if (!tb.inScope("body")) { tb.error(this); return false; - } else if (tb.currentElement() != formatEl) + } else { + // todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html + tb.transition(AfterBody); + } + break; + case "html": + boolean notIgnored = tb.processEndTag("body"); + if (notIgnored) + return tb.process(endTag); + break; + case "form": + Element currentForm = tb.getFormElement(); + tb.setFormElement(null); + if (currentForm == null || !tb.inScope(name)) { tb.error(this); - - Element furthestBlock = null; - Element commonAncestor = null; - boolean seenFormattingElement = false; - ArrayList stack = tb.getStack(); - // the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents - // run-aways - final int stackSize = stack.size(); - for (int si = 0; si < stackSize && si < 64; si++) { - Element el = stack.get(si); - if (el == formatEl) { - commonAncestor = stack.get(si - 1); - seenFormattingElement = true; - } else if (seenFormattingElement && tb.isSpecial(el)) { - furthestBlock = el; - break; - } + return false; + } else { + tb.generateImpliedEndTags(); + if (!tb.currentElement().normalName().equals(name)) + tb.error(this); + // remove currentForm from stack. will shift anything under up. + tb.removeFromStack(currentForm); } - if (furthestBlock == null) { - tb.popStackToClose(formatEl.normalName()); - tb.removeFromActiveFormattingElements(formatEl); - return true; + break; + case "p": + if (!tb.inButtonScope(name)) { + tb.error(this); + tb.processStartTag(name); // if no p to close, creates an empty

+ return tb.process(endTag); + } else { + tb.generateImpliedEndTags(name); + if (!tb.currentElement().normalName().equals(name)) + tb.error(this); + tb.popStackToClose(name); } + break; + case "dd": + case "dt": + if (!tb.inScope(name)) { + tb.error(this); + return false; + } else { + tb.generateImpliedEndTags(name); + if (!tb.currentElement().normalName().equals(name)) + tb.error(this); + tb.popStackToClose(name); + } + break; + case "h1": + case "h2": + case "h3": + case "h4": + case "h5": + case "h6": + if (!tb.inScope(Constants.Headings)) { + tb.error(this); + return false; + } else { + tb.generateImpliedEndTags(name); + if (!tb.currentElement().normalName().equals(name)) + tb.error(this); + tb.popStackToClose(Constants.Headings); + } + break; + case "br": + tb.error(this); + tb.processStartTag("br"); + return false; + default: + // todo - move rest to switch if desired + if (StringUtil.inSorted(name, Constants.InBodyEndAdoptionFormatters)) { + // Adoption Agency Algorithm. + for (int i = 0; i < 8; i++) { + Element formatEl = tb.getActiveFormattingElement(name); + if (formatEl == null) + return anyOtherEndTag(t, tb); + else if (!tb.onStack(formatEl)) { + tb.error(this); + tb.removeFromActiveFormattingElements(formatEl); + return true; + } else if (!tb.inScope(formatEl.normalName())) { + tb.error(this); + return false; + } else if (tb.currentElement() != formatEl) + tb.error(this); - // todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list. - // does that mean: int pos of format el in list? - Element node = furthestBlock; - Element lastNode = furthestBlock; - for (int j = 0; j < 3; j++) { - if (tb.onStack(node)) - node = tb.aboveOnStack(node); - if (!tb.isInActiveFormattingElements(node)) { // note no bookmark check - tb.removeFromStack(node); - continue; - } else if (node == formatEl) - break; + Element furthestBlock = null; + Element commonAncestor = null; + boolean seenFormattingElement = false; + stack = tb.getStack(); + // the spec doesn't limit to < 64, but in degenerate cases (9000+ stack depth) this prevents + // run-aways + final int stackSize = stack.size(); + for (int si = 0; si < stackSize && si < 64; si++) { + el = stack.get(si); + if (el == formatEl) { + commonAncestor = stack.get(si - 1); + seenFormattingElement = true; + } else if (seenFormattingElement && tb.isSpecial(el)) { + furthestBlock = el; + break; + } + } + if (furthestBlock == null) { + tb.popStackToClose(formatEl.normalName()); + tb.removeFromActiveFormattingElements(formatEl); + return true; + } - Element replacement = new Element(Tag.valueOf(node.nodeName(), ParseSettings.preserveCase), tb.getBaseUri()); - // case will follow the original node (so honours ParseSettings) - tb.replaceActiveFormattingElement(node, replacement); - tb.replaceOnStack(node, replacement); - node = replacement; + // todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list. + // does that mean: int pos of format el in list? + Element node = furthestBlock; + Element lastNode = furthestBlock; + for (int j = 0; j < 3; j++) { + if (tb.onStack(node)) + node = tb.aboveOnStack(node); + if (!tb.isInActiveFormattingElements(node)) { // note no bookmark check + tb.removeFromStack(node); + continue; + } else if (node == formatEl) + break; - if (lastNode == furthestBlock) { - // todo: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements. - // not getting how this bookmark both straddles the element above, but is inbetween here... - } - if (lastNode.parent() != null) - lastNode.remove(); - node.appendChild(lastNode); + Element replacement = new Element(Tag.valueOf(node.nodeName(), ParseSettings.preserveCase), tb.getBaseUri()); + // case will follow the original node (so honours ParseSettings) + tb.replaceActiveFormattingElement(node, replacement); + tb.replaceOnStack(node, replacement); + node = replacement; - lastNode = node; - } + //noinspection StatementWithEmptyBody + if (lastNode == furthestBlock) { + // todo: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements. + // not getting how this bookmark both straddles the element above, but is inbetween here... + } + if (lastNode.parent() != null) + lastNode.remove(); + node.appendChild(lastNode); - if (StringUtil.inSorted(commonAncestor.normalName(), Constants.InBodyEndTableFosters)) { - if (lastNode.parent() != null) - lastNode.remove(); - tb.insertInFosterParent(lastNode); - } else { - if (lastNode.parent() != null) - lastNode.remove(); - commonAncestor.appendChild(lastNode); - } + lastNode = node; + } - Element adopter = new Element(formatEl.tag(), tb.getBaseUri()); - adopter.attributes().addAll(formatEl.attributes()); - Node[] childNodes = furthestBlock.childNodes().toArray(new Node[0]); - for (Node childNode : childNodes) { - adopter.appendChild(childNode); // append will reparent. thus the clone to avoid concurrent mod. - } - furthestBlock.appendChild(adopter); - tb.removeFromActiveFormattingElements(formatEl); - // todo: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark. - tb.removeFromStack(formatEl); - tb.insertOnStackAfter(furthestBlock, adopter); - } - } else if (StringUtil.inSorted(name, Constants.InBodyEndClosers)) { - if (!tb.inScope(name)) { - // nothing to close - tb.error(this); - return false; - } else { - tb.generateImpliedEndTags(); - if (!tb.currentElement().normalName().equals(name)) - tb.error(this); - tb.popStackToClose(name); - } - } else if (name.equals("span")) { - // same as final fall through, but saves short circuit - return anyOtherEndTag(t, tb); - } else if (name.equals("li")) { - if (!tb.inListItemScope(name)) { - tb.error(this); - return false; - } else { - tb.generateImpliedEndTags(name); - if (!tb.currentElement().normalName().equals(name)) - tb.error(this); - tb.popStackToClose(name); - } - } else if (name.equals("body")) { - if (!tb.inScope("body")) { - tb.error(this); - return false; - } else { - // todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html - tb.transition(AfterBody); - } - } else if (name.equals("html")) { - boolean notIgnored = tb.processEndTag("body"); - if (notIgnored) - return tb.process(endTag); - } else if (name.equals("form")) { - Element currentForm = tb.getFormElement(); - tb.setFormElement(null); - if (currentForm == null || !tb.inScope(name)) { - tb.error(this); - return false; - } else { - tb.generateImpliedEndTags(); - if (!tb.currentElement().normalName().equals(name)) - tb.error(this); - // remove currentForm from stack. will shift anything under up. - tb.removeFromStack(currentForm); - } - } else if (name.equals("p")) { - if (!tb.inButtonScope(name)) { - tb.error(this); - tb.processStartTag(name); // if no p to close, creates an empty

- return tb.process(endTag); - } else { - tb.generateImpliedEndTags(name); - if (!tb.currentElement().normalName().equals(name)) - tb.error(this); - tb.popStackToClose(name); - } - } else if (StringUtil.inSorted(name, Constants.DdDt)) { - if (!tb.inScope(name)) { - tb.error(this); - return false; - } else { - tb.generateImpliedEndTags(name); - if (!tb.currentElement().normalName().equals(name)) - tb.error(this); - tb.popStackToClose(name); - } - } else if (StringUtil.inSorted(name, Constants.Headings)) { - if (!tb.inScope(Constants.Headings)) { - tb.error(this); - return false; - } else { - tb.generateImpliedEndTags(name); - if (!tb.currentElement().normalName().equals(name)) - tb.error(this); - tb.popStackToClose(Constants.Headings); - } - } else if (name.equals("sarcasm")) { - // *sigh* - return anyOtherEndTag(t, tb); - } else if (StringUtil.inSorted(name, Constants.InBodyStartApplets)) { - if (!tb.inScope("name")) { - if (!tb.inScope(name)) { - tb.error(this); - return false; + if (StringUtil.inSorted(commonAncestor.normalName(), Constants.InBodyEndTableFosters)) { + if (lastNode.parent() != null) + lastNode.remove(); + tb.insertInFosterParent(lastNode); + } else { + if (lastNode.parent() != null) + lastNode.remove(); + commonAncestor.appendChild(lastNode); + } + + Element adopter = new Element(formatEl.tag(), tb.getBaseUri()); + adopter.attributes().addAll(formatEl.attributes()); + Node[] childNodes = furthestBlock.childNodes().toArray(new Node[0]); + for (Node childNode : childNodes) { + adopter.appendChild(childNode); // append will reparent. thus the clone to avoid concurrent mod. + } + furthestBlock.appendChild(adopter); + tb.removeFromActiveFormattingElements(formatEl); + // todo: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark. + tb.removeFromStack(formatEl); + tb.insertOnStackAfter(furthestBlock, adopter); + } + } else if (StringUtil.inSorted(name, Constants.InBodyEndClosers)) { + if (!tb.inScope(name)) { + // nothing to close + tb.error(this); + return false; + } else { + tb.generateImpliedEndTags(); + if (!tb.currentElement().normalName().equals(name)) + tb.error(this); + tb.popStackToClose(name); + } + } else if (StringUtil.inSorted(name, Constants.InBodyStartApplets)) { + if (!tb.inScope("name")) { + if (!tb.inScope(name)) { + tb.error(this); + return false; + } + tb.generateImpliedEndTags(); + if (!tb.currentElement().normalName().equals(name)) + tb.error(this); + tb.popStackToClose(name); + tb.clearFormattingElementsToLastMarker(); + } + } else { + return anyOtherEndTag(t, tb); } - tb.generateImpliedEndTags(); - if (!tb.currentElement().normalName().equals(name)) - tb.error(this); - tb.popStackToClose(name); - tb.clearFormattingElementsToLastMarker(); - } - } else if (name.equals("br")) { - tb.error(this); - tb.processStartTag("br"); - return false; - } else { - return anyOtherEndTag(t, tb); } - break; case EOF: // todo: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html @@ -907,37 +971,34 @@ boolean anythingElse(Token t, HtmlTreeBuilder tb) { }, InTableText { boolean process(Token t, HtmlTreeBuilder tb) { - switch (t.type) { - case Character: - Token.Character c = t.asCharacter(); - if (c.getData().equals(nullString)) { - tb.error(this); - return false; - } else { - tb.getPendingTableCharacters().add(c.getData()); - } - break; - default: - // todo - don't really like the way these table character data lists are built - if (tb.getPendingTableCharacters().size() > 0) { - for (String character : tb.getPendingTableCharacters()) { - if (!isWhitespace(character)) { - // InTable anything else section: - tb.error(this); - if (StringUtil.in(tb.currentElement().normalName(), "table", "tbody", "tfoot", "thead", "tr")) { - tb.setFosterInserts(true); - tb.process(new Token.Character().data(character), InBody); - tb.setFosterInserts(false); - } else { - tb.process(new Token.Character().data(character), InBody); - } - } else - tb.insert(new Token.Character().data(character)); - } - tb.newPendingTableCharacters(); + if (t.type == Token.TokenType.Character) { + Token.Character c = t.asCharacter(); + if (c.getData().equals(nullString)) { + tb.error(this); + return false; + } else { + tb.getPendingTableCharacters().add(c.getData()); + } + } else {// todo - don't really like the way these table character data lists are built + if (tb.getPendingTableCharacters().size() > 0) { + for (String character : tb.getPendingTableCharacters()) { + if (!isWhitespace(character)) { + // InTable anything else section: + tb.error(this); + if (StringUtil.in(tb.currentElement().normalName(), "table", "tbody", "tfoot", "thead", "tr")) { + tb.setFosterInserts(true); + tb.process(new Token.Character().data(character), InBody); + tb.setFosterInserts(false); + } else { + tb.process(new Token.Character().data(character), InBody); + } + } else + tb.insert(new Token.Character().data(character)); } - tb.transition(tb.originalState()); - return tb.process(t); + tb.newPendingTableCharacters(); + } + tb.transition(tb.originalState()); + return tb.process(t); } return true; } @@ -1466,7 +1527,7 @@ boolean process(Token t, HtmlTreeBuilder tb) { } }; - private static String nullString = String.valueOf('\u0000'); + private static final String nullString = String.valueOf('\u0000'); abstract boolean process(Token t, HtmlTreeBuilder tb);