From f9777aa1fedcf1958e67eee99b3af1d289b141a8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 25 Oct 2024 22:51:44 +0200 Subject: [PATCH 01/24] Add failing test case --- .../html-api/wpHtmlProcessor-bookmark.php | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php new file mode 100644 index 0000000000000..e34e01f7cda55 --- /dev/null +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php @@ -0,0 +1,34 @@ +
' ); + $this->assertTrue( $processor->next_tag( 'BODY' ) ); + $this->assertTrue( $processor->set_bookmark( $bookmark_name ), 'Failed to set bookmark.' ); + $this->assertTrue( $processor->has_bookmark( $bookmark_name ), 'Failed has_bookmark check.' ); + + // Move past the bookmark so it must scan backwards. + $this->assertTrue( $processor->next_tag( 'DIV' ) ); + + // Confirm the bookmark works. + $this->assertTrue( $processor->seek( $bookmark_name ), 'Failed to seek to bookmark.' ); + $this->assertSame( 'BODY', $processor->get_tag() ); + } +} From 5d75bb7ed8553955d1c9caeac50817b61c6b8368 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 25 Oct 2024 23:15:51 +0200 Subject: [PATCH 02/24] Work on fix --- .../html-api/class-wp-html-processor.php | 72 ++++++++++++------- .../html-api/class-wp-html-tag-processor.php | 5 +- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 4003cf48c1b69..7d3a3d56329bd 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5286,6 +5286,8 @@ public function seek( $bookmark_name ): bool { // Flush any pending updates to the document before beginning. $this->get_updated_html(); + echo 'Seeking to ' . $bookmark_name . "\n"; + $actual_bookmark_name = "_{$bookmark_name}"; $processor_started_at = $this->state->current_token ? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start @@ -5323,45 +5325,64 @@ public function seek( $bookmark_name ): bool { * and computation time. */ if ( 'backward' === $direction ) { - /* - * Instead of clearing the parser state and starting fresh, calling the stack methods - * maintains the proper flags in the parser. - */ - foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - if ( 'context-node' === $item->bookmark_name ) { - break; - } + echo 'back: ' . "\n"; + // Reset necessary parser state + $this->state->frameset_ok = true; + $this->state->stack_of_template_insertion_modes = array(); + $this->state->head_element = null; + $this->state->form_element = null; + $this->state->current_token = null; + $this->element_queue = array(); + $this->current_element = null; + + if ( null === $this->context_node ) { + echo 'full parser' . "\n"; + /* + * In the case of a full parser, the processor needs to be + * reset to a clean state and start over. + */ + $this->state->stack_of_open_elements = new WP_HTML_Open_Elements(); + $this->state->active_formatting_elements = new WP_HTML_Active_Formatting_Elements(); - $this->state->stack_of_open_elements->remove_node( $item ); - } + $this->breadcrumbs = array(); + $this->bytes_already_parsed = 0; + $this->parser_state = self::STATE_READY; + } else { + /* + * In case the parser is a fragment parser, instead of clearing the + * parser state and starting fresh, calling the stack methods + * maintains the proper flags in the parser. + */ + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + if ( 'context-node' === $item->bookmark_name ) { + break; + } - foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { - if ( 'context-node' === $item->bookmark_name ) { - break; + $this->state->stack_of_open_elements->remove_node( $item ); } - $this->state->active_formatting_elements->remove_node( $item ); - } + foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { + if ( 'context-node' === $item->bookmark_name ) { + break; + } - parent::seek( 'context-node' ); - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - $this->state->frameset_ok = true; - $this->element_queue = array(); - $this->current_element = null; + $this->state->active_formatting_elements->remove_node( $item ); + } - if ( isset( $this->context_node ) ) { - $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); - } else { - $this->breadcrumbs = array(); + parent::seek( 'context-node' ); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; + $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); } } + // When moving forwards, reparse the document until reaching the same location as the original bookmark. - if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { + if ( null !== $this->state->current_token && $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { return true; } while ( $this->next_token() ) { + echo 'bm starts at ' . $bookmark_starts_at . ' and current token starts at ' . $this->bookmarks[ $this->state->current_token->bookmark_name ]->start . "\n"; if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) { $this->current_element = array_shift( $this->element_queue ); @@ -5370,6 +5391,7 @@ public function seek( $bookmark_name ): bool { } } + echo 'false' . "\n"; return false; } diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index e2632c80f6da5..ecce9b285c779 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -591,7 +591,7 @@ class WP_HTML_Tag_Processor { * @since 6.2.0 * @var int */ - private $bytes_already_parsed = 0; + protected $bytes_already_parsed = 0; /** * Byte offset in input document where current token starts. @@ -946,6 +946,8 @@ private function base_class_next_token(): bool { $was_at = $this->bytes_already_parsed; $this->after_tag(); + var_dump( $was_at, $this->parser_state ); + // Don't proceed if there's nothing more to scan. if ( self::STATE_COMPLETE === $this->parser_state || @@ -2542,6 +2544,7 @@ public function has_bookmark( $bookmark_name ): bool { * @return bool Whether the internal cursor was successfully moved to the bookmark's location. */ public function seek( $bookmark_name ): bool { + var_dump( $bookmark_name, $this->bookmarks ); if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) { _doing_it_wrong( __METHOD__, From 541b4f690e4f10d6e519b6d3059be5c15ec586c1 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Fri, 25 Oct 2024 23:19:19 +0200 Subject: [PATCH 03/24] wip --- src/wp-includes/html-api/class-wp-html-processor.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 7d3a3d56329bd..46bac0abc68f3 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5343,6 +5343,7 @@ public function seek( $bookmark_name ): bool { */ $this->state->stack_of_open_elements = new WP_HTML_Open_Elements(); $this->state->active_formatting_elements = new WP_HTML_Active_Formatting_Elements(); + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; $this->breadcrumbs = array(); $this->bytes_already_parsed = 0; @@ -5381,7 +5382,7 @@ public function seek( $bookmark_name ): bool { return true; } - while ( $this->next_token() ) { + while ( var_dump( $this->next_token() ) ) { echo 'bm starts at ' . $bookmark_starts_at . ' and current token starts at ' . $this->bookmarks[ $this->state->current_token->bookmark_name ]->start . "\n"; if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) { From dc3f1e65792f49da2351de3665cd282f83fe76fd Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 5 Nov 2024 19:06:23 +0100 Subject: [PATCH 04/24] try fixes --- ...ass-wp-html-active-formatting-elements.php | 2 +- .../html-api/class-wp-html-open-elements.php | 2 +- .../html-api/class-wp-html-processor.php | 63 ++++++++----------- .../html-api/class-wp-html-tag-processor.php | 2 - 4 files changed, 28 insertions(+), 41 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index 2f51482eee052..f343c5f38f010 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -41,7 +41,7 @@ class WP_HTML_Active_Formatting_Elements { * * @var WP_HTML_Token[] */ - private $stack = array(); + public $stack = array(); /** * Reports if a specific node is in the stack of active formatting elements. diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index cb913853f0ee9..03bc904113f9e 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -49,7 +49,7 @@ class WP_HTML_Open_Elements { * * @var bool */ - private $has_p_in_button_scope = false; + public $has_p_in_button_scope = false; /** * A function that will be called when an item is popped off the stack of open elements. diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 46bac0abc68f3..88e1b69fd4b86 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5286,8 +5286,6 @@ public function seek( $bookmark_name ): bool { // Flush any pending updates to the document before beginning. $this->get_updated_html(); - echo 'Seeking to ' . $bookmark_name . "\n"; - $actual_bookmark_name = "_{$bookmark_name}"; $processor_started_at = $this->state->current_token ? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start @@ -5325,8 +5323,8 @@ public function seek( $bookmark_name ): bool { * and computation time. */ if ( 'backward' === $direction ) { - echo 'back: ' . "\n"; // Reset necessary parser state + $this->change_parsing_namespace( 'html' ); $this->state->frameset_ok = true; $this->state->stack_of_template_insertion_modes = array(); $this->state->head_element = null; @@ -5335,56 +5333,48 @@ public function seek( $bookmark_name ): bool { $this->element_queue = array(); $this->current_element = null; - if ( null === $this->context_node ) { - echo 'full parser' . "\n"; - /* - * In the case of a full parser, the processor needs to be - * reset to a clean state and start over. - */ - $this->state->stack_of_open_elements = new WP_HTML_Open_Elements(); - $this->state->active_formatting_elements = new WP_HTML_Active_Formatting_Elements(); - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; + /* + * In case the parser is a fragment parser, instead of clearing the + * parser state and starting fresh, calling the stack methods + * maintains the proper flags in the parser. + */ + foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { + if ( 'context-node' === $item->bookmark_name ) { + break; + } - $this->breadcrumbs = array(); - $this->bytes_already_parsed = 0; - $this->parser_state = self::STATE_READY; - } else { - /* - * In case the parser is a fragment parser, instead of clearing the - * parser state and starting fresh, calling the stack methods - * maintains the proper flags in the parser. - */ - foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - if ( 'context-node' === $item->bookmark_name ) { - break; - } + $this->state->stack_of_open_elements->remove_node( $item ); + } - $this->state->stack_of_open_elements->remove_node( $item ); + foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { + if ( 'context-node' === $item->bookmark_name ) { + break; } - foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { - if ( 'context-node' === $item->bookmark_name ) { - break; - } + $this->state->active_formatting_elements->remove_node( $item ); + } - $this->state->active_formatting_elements->remove_node( $item ); - } + if ( null === $this->context_node ) { + $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; + $this->breadcrumbs = array(); + $this->bytes_already_parsed = 0; + $this->parser_state = self::STATE_READY; + } else { parent::seek( 'context-node' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); } } - // When moving forwards, reparse the document until reaching the same location as the original bookmark. if ( null !== $this->state->current_token && $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { return true; } - while ( var_dump( $this->next_token() ) ) { - echo 'bm starts at ' . $bookmark_starts_at . ' and current token starts at ' . $this->bookmarks[ $this->state->current_token->bookmark_name ]->start . "\n"; - if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { + while ( $this->next_token() ) { + $bm = $this->bookmarks[ $this->current_element->token->bookmark_name ]; + if ( $bookmark_starts_at === $bm->start ) { while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) { $this->current_element = array_shift( $this->element_queue ); } @@ -5392,7 +5382,6 @@ public function seek( $bookmark_name ): bool { } } - echo 'false' . "\n"; return false; } diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index ecce9b285c779..b3f6a6de9c0d8 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -946,7 +946,6 @@ private function base_class_next_token(): bool { $was_at = $this->bytes_already_parsed; $this->after_tag(); - var_dump( $was_at, $this->parser_state ); // Don't proceed if there's nothing more to scan. if ( @@ -2544,7 +2543,6 @@ public function has_bookmark( $bookmark_name ): bool { * @return bool Whether the internal cursor was successfully moved to the bookmark's location. */ public function seek( $bookmark_name ): bool { - var_dump( $bookmark_name, $this->bookmarks ); if ( ! array_key_exists( $bookmark_name, $this->bookmarks ) ) { _doing_it_wrong( __METHOD__, From 89aa01b820231b697edaca069313cfffe173d02b Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 5 Nov 2024 19:47:16 +0100 Subject: [PATCH 05/24] Fix the issue --- .../html-api/class-wp-html-processor.php | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 88e1b69fd4b86..16acd3771904b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5323,15 +5323,6 @@ public function seek( $bookmark_name ): bool { * and computation time. */ if ( 'backward' === $direction ) { - // Reset necessary parser state - $this->change_parsing_namespace( 'html' ); - $this->state->frameset_ok = true; - $this->state->stack_of_template_insertion_modes = array(); - $this->state->head_element = null; - $this->state->form_element = null; - $this->state->current_token = null; - $this->element_queue = array(); - $this->current_element = null; /* * In case the parser is a fragment parser, instead of clearing the @@ -5354,6 +5345,16 @@ public function seek( $bookmark_name ): bool { $this->state->active_formatting_elements->remove_node( $item ); } + // Reset necessary parser state + $this->change_parsing_namespace( 'html' ); + $this->state->frameset_ok = true; + $this->state->stack_of_template_insertion_modes = array(); + $this->state->head_element = null; + $this->state->form_element = null; + $this->state->current_token = null; + $this->current_element = null; + $this->element_queue = array(); + if ( null === $this->context_node ) { $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; $this->breadcrumbs = array(); @@ -5373,6 +5374,9 @@ public function seek( $bookmark_name ): bool { } while ( $this->next_token() ) { + if ( $this->is_virtual() ) { + continue; + } $bm = $this->bookmarks[ $this->current_element->token->bookmark_name ]; if ( $bookmark_starts_at === $bm->start ) { while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) { From 737bf92b9908793c231512842c8306101a85c894 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 5 Nov 2024 19:49:01 +0100 Subject: [PATCH 06/24] Restore original bookmark matching --- src/wp-includes/html-api/class-wp-html-processor.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 16acd3771904b..841a024701661 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5377,8 +5377,7 @@ public function seek( $bookmark_name ): bool { if ( $this->is_virtual() ) { continue; } - $bm = $this->bookmarks[ $this->current_element->token->bookmark_name ]; - if ( $bookmark_starts_at === $bm->start ) { + if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) { $this->current_element = array_shift( $this->element_queue ); } From cb63bbc0ebc851030b0806804fc7c4cc1e98b0f8 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Tue, 5 Nov 2024 19:51:29 +0100 Subject: [PATCH 07/24] Lint: remove empty line --- src/wp-includes/html-api/class-wp-html-tag-processor.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index b3f6a6de9c0d8..a5b7645e122ee 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -946,7 +946,6 @@ private function base_class_next_token(): bool { $was_at = $this->bytes_already_parsed; $this->after_tag(); - // Don't proceed if there's nothing more to scan. if ( self::STATE_COMPLETE === $this->parser_state || From 744cf0115a1bcee2c9a312c0b5a38bcddb5bd302 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 11:42:42 +0100 Subject: [PATCH 08/24] Add commentary around state resetting in seek --- src/wp-includes/html-api/class-wp-html-processor.php | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 841a024701661..e5cb7d27ed01b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5323,11 +5323,8 @@ public function seek( $bookmark_name ): bool { * and computation time. */ if ( 'backward' === $direction ) { - /* - * In case the parser is a fragment parser, instead of clearing the - * parser state and starting fresh, calling the stack methods - * maintains the proper flags in the parser. + * When moving backward, stateful stacks should be cleared. */ foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { if ( 'context-node' === $item->bookmark_name ) { @@ -5345,7 +5342,11 @@ public function seek( $bookmark_name ): bool { $this->state->active_formatting_elements->remove_node( $item ); } - // Reset necessary parser state + /* + * **After** clearing stacks, more processor state can be reset. + * This must be done after clearing the stack because those stacks generate events that + * would appear on a subsequent call to `next_token()`. + */ $this->change_parsing_namespace( 'html' ); $this->state->frameset_ok = true; $this->state->stack_of_template_insertion_modes = array(); @@ -5355,6 +5356,7 @@ public function seek( $bookmark_name ): bool { $this->current_element = null; $this->element_queue = array(); + // The presence or absence of a context node indicates a full or fragment parser. if ( null === $this->context_node ) { $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; $this->breadcrumbs = array(); From 6106a560db2666ab0ca1a91e6c3a745f04e9bc4f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 11:46:49 +0100 Subject: [PATCH 09/24] Use do-while loop to iterate after first check --- .../html-api/class-wp-html-processor.php | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index e5cb7d27ed01b..1c96cab510816 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5363,6 +5363,7 @@ public function seek( $bookmark_name ): bool { $this->bytes_already_parsed = 0; $this->parser_state = self::STATE_READY; + $this->next_token(); } else { parent::seek( 'context-node' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; @@ -5370,12 +5371,12 @@ public function seek( $bookmark_name ): bool { } } - // When moving forwards, reparse the document until reaching the same location as the original bookmark. - if ( null !== $this->state->current_token && $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { - return true; - } - - while ( $this->next_token() ) { + /* + * Here, the processor moves forward through the document until it matches the bookmark. + * do-while is used here because the processor is expected to already be stopped on + * a token than may match the bookmarked location. + */ + do { if ( $this->is_virtual() ) { continue; } @@ -5385,7 +5386,7 @@ public function seek( $bookmark_name ): bool { } return true; } - } + } while ( $this->next_token() ); return false; } From 851df382b09b735ee22498cecdd753ad72daa010 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 11:47:17 +0100 Subject: [PATCH 10/24] Fix fragment parser bug that should reset breadcrumbs before seeking --- src/wp-includes/html-api/class-wp-html-processor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 1c96cab510816..de3f3e1604840 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5365,9 +5365,9 @@ public function seek( $bookmark_name ): bool { $this->parser_state = self::STATE_READY; $this->next_token(); } else { - parent::seek( 'context-node' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); + parent::seek( $this->context_node->bookmark_name ); } } From 9af204c1e838d1accb16dd220b52ccd33a90ff38 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 11:47:56 +0100 Subject: [PATCH 11/24] Assert setting bookmark two This is consisten with the assertion on bookmark one --- tests/phpunit/tests/html-api/wpHtmlProcessor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index 7e568286ccdf9..a8d239bc9b74a 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -133,7 +133,7 @@ public function test_clear_to_navigate_after_seeking() { // Create a bookmark inside of that stack. if ( null !== $processor->get_attribute( 'two' ) ) { - $processor->set_bookmark( 'two' ); + $this->assertTrue( $processor->set_bookmark( 'two' ) ); break; } } From 660dc85ae3c48718517faa77d748904b02e29e55 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 11:49:21 +0100 Subject: [PATCH 12/24] Remove condition that may not be necessary This seems related to virutal tokens and it likely covered by the virtual token condition --- src/wp-includes/html-api/class-wp-html-processor.php | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index de3f3e1604840..62a67ffc86e97 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5381,9 +5381,6 @@ public function seek( $bookmark_name ): bool { continue; } if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { - while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) { - $this->current_element = array_shift( $this->element_queue ); - } return true; } } while ( $this->next_token() ); From 9a0c01798fd6e34676b7d090563c7868bdb24a11 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 12:05:43 +0100 Subject: [PATCH 13/24] Set parsing namespace correctly when seeking --- src/wp-includes/html-api/class-wp-html-processor.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 62a67ffc86e97..2482dc53d3d47 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5347,7 +5347,6 @@ public function seek( $bookmark_name ): bool { * This must be done after clearing the stack because those stacks generate events that * would appear on a subsequent call to `next_token()`. */ - $this->change_parsing_namespace( 'html' ); $this->state->frameset_ok = true; $this->state->stack_of_template_insertion_modes = array(); $this->state->head_element = null; @@ -5358,6 +5357,7 @@ public function seek( $bookmark_name ): bool { // The presence or absence of a context node indicates a full or fragment parser. if ( null === $this->context_node ) { + $this->change_parsing_namespace( 'html' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; $this->breadcrumbs = array(); @@ -5365,6 +5365,11 @@ public function seek( $bookmark_name ): bool { $this->parser_state = self::STATE_READY; $this->next_token(); } else { + $this->change_parsing_namespace( + $this->context_node->integration_node_type + ? 'html' + : $this->context_node->namespace + ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); parent::seek( $this->context_node->bookmark_name ); From d1819382a99f289a1ff1590723397f25f46631e0 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 12:13:48 +0100 Subject: [PATCH 14/24] Fix fragment state reset for different context nodes --- src/wp-includes/html-api/class-wp-html-processor.php | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 2482dc53d3d47..ed6c8fc42a1a8 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5370,8 +5370,13 @@ public function seek( $bookmark_name ): bool { ? 'html' : $this->context_node->namespace ); - $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; - $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); + + if ( 'TEMPLATE' === $this->context_node->node_name ) { + $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; + } + + $this->reset_insertion_mode_appropriately(); + $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); parent::seek( $this->context_node->bookmark_name ); } } From d5bf14cf3d55be1d10a420f866d713e06b6f110f Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 12:17:47 +0100 Subject: [PATCH 15/24] Add and improve comments --- src/wp-includes/html-api/class-wp-html-processor.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index ed6c8fc42a1a8..41d3b28f22177 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5355,7 +5355,10 @@ public function seek( $bookmark_name ): bool { $this->current_element = null; $this->element_queue = array(); - // The presence or absence of a context node indicates a full or fragment parser. + /* + * The absence of a context node indicates a full parse. + * The presence of a context node indicates a fragment parser. + */ if ( null === $this->context_node ) { $this->change_parsing_namespace( 'html' ); $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; @@ -5387,6 +5390,10 @@ public function seek( $bookmark_name ): bool { * a token than may match the bookmarked location. */ do { + /* + * The processor will stop on virtual tokens, but bookmarks may not be set on them. + * They should not be matched when seeking a bookmark, skip them. + */ if ( $this->is_virtual() ) { continue; } From 388bf1998b2467e26509e80344d6363fe46a8882 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 13:05:34 +0100 Subject: [PATCH 16/24] Add and improve HTML processor bookmark tests --- .../html-api/wpHtmlProcessor-bookmark.php | 51 +++++++++++++++++-- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php index e34e01f7cda55..4d2581b70e4c1 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php @@ -13,11 +13,27 @@ */ class Tests_HtmlApi_WpHtmlProcessor_Bookmark extends WP_UnitTestCase { /** - * Ensures that bookmarks can be set and seeked to for the full processor. - * * @ticket 62290 */ - public function test_full_processor_seek() { + public function test_full_processor_seek_same_location() { + $bookmark_name = 'the-bookmark'; + $processor = WP_HTML_Processor::create_full_parser( '
' ); + $this->assertTrue( $processor->next_tag( 'BODY' ) ); + $this->assertTrue( $processor->set_bookmark( $bookmark_name ), 'Failed to set bookmark.' ); + $this->assertTrue( $processor->has_bookmark( $bookmark_name ), 'Failed has_bookmark check.' ); + + // Confirm the bookmark works. + $this->assertTrue( $processor->seek( $bookmark_name ), 'Failed to seek to bookmark.' ); + $this->assertSame( 'BODY', $processor->get_tag() ); + $this->assertTrue( $processor->next_tag() ); + $this->assertSame( 'DIV', $processor->get_tag() ); + $this->assertSame( array( 'HTML', 'BODY', 'DIV' ), $processor->get_breadcrumbs() ); + } + + /** + * @ticket 62290 + */ + public function test_full_processor_seek_backward() { $bookmark_name = 'the-bookmark'; $processor = WP_HTML_Processor::create_full_parser( '
' ); $this->assertTrue( $processor->next_tag( 'BODY' ) ); @@ -31,4 +47,33 @@ public function test_full_processor_seek() { $this->assertTrue( $processor->seek( $bookmark_name ), 'Failed to seek to bookmark.' ); $this->assertSame( 'BODY', $processor->get_tag() ); } + + /** + * @ticket 62290 + */ + public function test_full_processor_seek_forward() { + $processor = WP_HTML_Processor::create_full_parser( '
' ); + $this->assertTrue( $processor->next_tag( 'BODY' ) ); + $this->assertTrue( $processor->set_bookmark( 'body' ), 'Failed to set bookmark "body".' ); + $this->assertTrue( $processor->has_bookmark( 'body' ), 'Failed "body" has_bookmark check.' ); + + // Move past the bookmark so it must scan backwards. + $this->assertTrue( $processor->next_tag( 'DIV' ) ); + $this->assertTrue( $processor->get_attribute( 'one' ) ); + $this->assertTrue( $processor->set_bookmark( 'one' ), 'Failed to set bookmark "one".' ); + $this->assertTrue( $processor->has_bookmark( 'one' ), 'Failed "one" has_bookmark check.' ); + + // Seek back. + $this->assertTrue( $processor->seek( 'body' ), 'Failed to seek to bookmark "body".' ); + $this->assertSame( 'BODY', $processor->get_tag() ); + + // Seek forward and continue processing. + $this->assertTrue( $processor->seek( 'one' ), 'Failed to seek to bookmark "one".' ); + $this->assertSame( 'DIV', $processor->get_tag() ); + $this->assertTrue( $processor->get_attribute( 'one' ) ); + $this->assertTrue( $processor->next_tag() ); + + $this->assertSame( 'DIV', $processor->get_tag() ); + $this->assertTrue( $processor->get_attribute( 'two' ) ); + } } From f7af6e31073c56146544fc1ed8b8ab184a0a6eb7 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 13:19:58 +0100 Subject: [PATCH 17/24] Add test seeking from SVG namespace --- .../html-api/wpHtmlProcessor-bookmark.php | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php index 4d2581b70e4c1..35a268c43af89 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php @@ -76,4 +76,94 @@ public function test_full_processor_seek_forward() { $this->assertSame( 'DIV', $processor->get_tag() ); $this->assertTrue( $processor->get_attribute( 'two' ) ); } + + /** + * Seeking into and out of foreign content requires updating the parsing namespace correctly. + * + * @ticket 62290 + */ + public function test_full_processor_seek_back_from_foreign_content() { + $processor = WP_HTML_Processor::create_full_parser( '' ); + $this->assertTrue( $processor->next_tag( 'CUSTOM-ELEMENT' ) ); + $this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark "mark".' ); + $this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed "mark" has_bookmark check.' ); + + /* + * has self-closing flag, but HTML elements (that are not void elements) cannot self-close, + * they must be closed by some means, usually a closing tag. + * + * If the div were interpreted as foreign content, it would self-close. + */ + $this->assertTrue( $processor->has_self_closing_flag() ); + $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted custom-element with self-closing flag as self-closing element.' ); + + // Proceed into foreign content. + $this->assertTrue( $processor->next_tag( 'RECT' ) ); + $this->assertSame( 'svg', $processor->get_namespace() ); + $this->assertTrue( $processor->has_self_closing_flag() ); + $this->assertFalse( $processor->expects_closer() ); + $this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() ); + + // Seek back. + $this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark "mark".' ); + $this->assertSame( 'CUSTOM-ELEMENT', $processor->get_tag() ); + // If the parsing namespace were not correct here (html), + // then the self-closing flag would be misinterpreted. + $this->assertTrue( $processor->has_self_closing_flag() ); + $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted custom-element with self-closing flag as self-closing element.' ); + + // Proceed into foreign content again. + $this->assertTrue( $processor->next_tag( 'RECT' ) ); + $this->assertSame( 'svg', $processor->get_namespace() ); + $this->assertTrue( $processor->has_self_closing_flag() ); + $this->assertFalse( $processor->expects_closer() ); + + // The RECT should still descend from the CUSTOM-ELEMENT despite its self-closing flag. + $this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() ); + } + + /** + * Seeking into and out of foreign content requires updating the parsing namespace correctly. + * + * @ticket 62290 + */ + public function test_pragment_processor_seek_back_from_foreign_content() { + $processor = WP_HTML_Processor::create_fragment( '' ); + $this->assertTrue( $processor->next_tag( 'CUSTOM-ELEMENT' ) ); + $this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark "mark".' ); + $this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed "mark" has_bookmark check.' ); + + /* + * has self-closing flag, but HTML elements (that are not void elements) cannot self-close, + * they must be closed by some means, usually a closing tag. + * + * If the div were interpreted as foreign content, it would self-close. + */ + $this->assertTrue( $processor->has_self_closing_flag() ); + $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted custom-element with self-closing flag as self-closing element.' ); + + // Proceed into foreign content. + $this->assertTrue( $processor->next_tag( 'RECT' ) ); + $this->assertSame( 'svg', $processor->get_namespace() ); + $this->assertTrue( $processor->has_self_closing_flag() ); + $this->assertFalse( $processor->expects_closer() ); + $this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() ); + + // Seek back. + $this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark "mark".' ); + $this->assertSame( 'CUSTOM-ELEMENT', $processor->get_tag() ); + // If the parsing namespace were not correct here (html), + // then the self-closing flag would be misinterpreted. + $this->assertTrue( $processor->has_self_closing_flag() ); + $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted custom-element with self-closing flag as self-closing element.' ); + + // Proceed into foreign content again. + $this->assertTrue( $processor->next_tag( 'RECT' ) ); + $this->assertSame( 'svg', $processor->get_namespace() ); + $this->assertTrue( $processor->has_self_closing_flag() ); + $this->assertFalse( $processor->expects_closer() ); + + // The RECT should still descend from the CUSTOM-ELEMENT despite its self-closing flag. + $this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() ); + } } From d5a7d5c883cea52e1512f497250df9aa48d87361 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 13:40:18 +0100 Subject: [PATCH 18/24] Rework with tests for fragment and full processors Use a dataprovider to get a factory function for processors --- .../html-api/wpHtmlProcessor-bookmark.php | 132 +++++++----------- 1 file changed, 53 insertions(+), 79 deletions(-) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php index 35a268c43af89..eafb03c0cbaec 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php @@ -13,77 +13,84 @@ */ class Tests_HtmlApi_WpHtmlProcessor_Bookmark extends WP_UnitTestCase { /** + * @dataProvider data_processor_constructors + * * @ticket 62290 */ - public function test_full_processor_seek_same_location() { - $bookmark_name = 'the-bookmark'; - $processor = WP_HTML_Processor::create_full_parser( '
' ); - $this->assertTrue( $processor->next_tag( 'BODY' ) ); - $this->assertTrue( $processor->set_bookmark( $bookmark_name ), 'Failed to set bookmark.' ); - $this->assertTrue( $processor->has_bookmark( $bookmark_name ), 'Failed has_bookmark check.' ); + public function test_processor_seek_same_location( callable $factory ) { + $processor = $factory( '
' ); + $this->assertTrue( $processor->next_tag( 'DIV' ) ); + $this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark.' ); + $this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed has_bookmark check.' ); - // Confirm the bookmark works. - $this->assertTrue( $processor->seek( $bookmark_name ), 'Failed to seek to bookmark.' ); - $this->assertSame( 'BODY', $processor->get_tag() ); - $this->assertTrue( $processor->next_tag() ); + // Confirm the bookmark works and processing continues normally. + $this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark.' ); $this->assertSame( 'DIV', $processor->get_tag() ); $this->assertSame( array( 'HTML', 'BODY', 'DIV' ), $processor->get_breadcrumbs() ); + $this->assertTrue( $processor->next_tag() ); + $this->assertSame( 'SPAN', $processor->get_tag() ); + $this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN' ), $processor->get_breadcrumbs() ); } /** + * @dataProvider data_processor_constructors + * * @ticket 62290 */ - public function test_full_processor_seek_backward() { - $bookmark_name = 'the-bookmark'; - $processor = WP_HTML_Processor::create_full_parser( '
' ); - $this->assertTrue( $processor->next_tag( 'BODY' ) ); - $this->assertTrue( $processor->set_bookmark( $bookmark_name ), 'Failed to set bookmark.' ); - $this->assertTrue( $processor->has_bookmark( $bookmark_name ), 'Failed has_bookmark check.' ); + public function test_processor_seek_backward( callable $factory ) { + $processor = $factory( '
' ); + $this->assertTrue( $processor->next_tag( 'DIV' ) ); + $this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark.' ); + $this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed has_bookmark check.' ); // Move past the bookmark so it must scan backwards. - $this->assertTrue( $processor->next_tag( 'DIV' ) ); + $this->assertTrue( $processor->next_tag( 'SPAN' ) ); // Confirm the bookmark works. - $this->assertTrue( $processor->seek( $bookmark_name ), 'Failed to seek to bookmark.' ); - $this->assertSame( 'BODY', $processor->get_tag() ); + $this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark.' ); + $this->assertSame( 'DIV', $processor->get_tag() ); } /** + * @dataProvider data_processor_constructors + * * @ticket 62290 */ - public function test_full_processor_seek_forward() { - $processor = WP_HTML_Processor::create_full_parser( '
' ); - $this->assertTrue( $processor->next_tag( 'BODY' ) ); - $this->assertTrue( $processor->set_bookmark( 'body' ), 'Failed to set bookmark "body".' ); - $this->assertTrue( $processor->has_bookmark( 'body' ), 'Failed "body" has_bookmark check.' ); - - // Move past the bookmark so it must scan backwards. + public function test_processor_seek_forward( callable $factory ) { + $processor = $factory( '
' ); $this->assertTrue( $processor->next_tag( 'DIV' ) ); - $this->assertTrue( $processor->get_attribute( 'one' ) ); $this->assertTrue( $processor->set_bookmark( 'one' ), 'Failed to set bookmark "one".' ); $this->assertTrue( $processor->has_bookmark( 'one' ), 'Failed "one" has_bookmark check.' ); - // Seek back. - $this->assertTrue( $processor->seek( 'body' ), 'Failed to seek to bookmark "body".' ); - $this->assertSame( 'BODY', $processor->get_tag() ); + // Move past the bookmark so it must scan backwards. + $this->assertTrue( $processor->next_tag( 'SPAN' ) ); + $this->assertTrue( $processor->get_attribute( 'two' ) ); + $this->assertTrue( $processor->set_bookmark( 'two' ), 'Failed to set bookmark "two".' ); + $this->assertTrue( $processor->has_bookmark( 'two' ), 'Failed "two" has_bookmark check.' ); - // Seek forward and continue processing. + // Seek back. $this->assertTrue( $processor->seek( 'one' ), 'Failed to seek to bookmark "one".' ); $this->assertSame( 'DIV', $processor->get_tag() ); - $this->assertTrue( $processor->get_attribute( 'one' ) ); - $this->assertTrue( $processor->next_tag() ); - $this->assertSame( 'DIV', $processor->get_tag() ); + // Seek forward and continue processing. + $this->assertTrue( $processor->seek( 'two' ), 'Failed to seek to bookmark "two".' ); + $this->assertSame( 'SPAN', $processor->get_tag() ); $this->assertTrue( $processor->get_attribute( 'two' ) ); + + $this->assertTrue( $processor->next_tag() ); + $this->assertSame( 'A', $processor->get_tag() ); + $this->assertTrue( $processor->get_attribute( 'three' ) ); } /** - * Seeking into and out of foreign content requires updating the parsing namespace correctly. + * Ensure the parsing namespace is handled when seeking from foreign content. + * + * @dataProvider data_processor_constructors * * @ticket 62290 */ - public function test_full_processor_seek_back_from_foreign_content() { - $processor = WP_HTML_Processor::create_full_parser( '' ); + public function test_seek_back_from_foreign_content( callable $factory ) { + $processor = $factory( '' ); $this->assertTrue( $processor->next_tag( 'CUSTOM-ELEMENT' ) ); $this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark "mark".' ); $this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed "mark" has_bookmark check.' ); @@ -95,7 +102,7 @@ public function test_full_processor_seek_back_from_foreign_content() { * If the div were interpreted as foreign content, it would self-close. */ $this->assertTrue( $processor->has_self_closing_flag() ); - $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted custom-element with self-closing flag as self-closing element.' ); + $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted HTML custom-element with self-closing flag as self-closing element.' ); // Proceed into foreign content. $this->assertTrue( $processor->next_tag( 'RECT' ) ); @@ -110,7 +117,7 @@ public function test_full_processor_seek_back_from_foreign_content() { // If the parsing namespace were not correct here (html), // then the self-closing flag would be misinterpreted. $this->assertTrue( $processor->has_self_closing_flag() ); - $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted custom-element with self-closing flag as self-closing element.' ); + $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted HTML custom-element with self-closing flag as self-closing element.' ); // Proceed into foreign content again. $this->assertTrue( $processor->next_tag( 'RECT' ) ); @@ -123,47 +130,14 @@ public function test_full_processor_seek_back_from_foreign_content() { } /** - * Seeking into and out of foreign content requires updating the parsing namespace correctly. + * Data provider. * - * @ticket 62290 + * @return array */ - public function test_pragment_processor_seek_back_from_foreign_content() { - $processor = WP_HTML_Processor::create_fragment( '' ); - $this->assertTrue( $processor->next_tag( 'CUSTOM-ELEMENT' ) ); - $this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark "mark".' ); - $this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed "mark" has_bookmark check.' ); - - /* - * has self-closing flag, but HTML elements (that are not void elements) cannot self-close, - * they must be closed by some means, usually a closing tag. - * - * If the div were interpreted as foreign content, it would self-close. - */ - $this->assertTrue( $processor->has_self_closing_flag() ); - $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted custom-element with self-closing flag as self-closing element.' ); - - // Proceed into foreign content. - $this->assertTrue( $processor->next_tag( 'RECT' ) ); - $this->assertSame( 'svg', $processor->get_namespace() ); - $this->assertTrue( $processor->has_self_closing_flag() ); - $this->assertFalse( $processor->expects_closer() ); - $this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() ); - - // Seek back. - $this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark "mark".' ); - $this->assertSame( 'CUSTOM-ELEMENT', $processor->get_tag() ); - // If the parsing namespace were not correct here (html), - // then the self-closing flag would be misinterpreted. - $this->assertTrue( $processor->has_self_closing_flag() ); - $this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted custom-element with self-closing flag as self-closing element.' ); - - // Proceed into foreign content again. - $this->assertTrue( $processor->next_tag( 'RECT' ) ); - $this->assertSame( 'svg', $processor->get_namespace() ); - $this->assertTrue( $processor->has_self_closing_flag() ); - $this->assertFalse( $processor->expects_closer() ); - - // The RECT should still descend from the CUSTOM-ELEMENT despite its self-closing flag. - $this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() ); + public static function data_processor_constructors(): array { + return array( + 'Full parser' => array( array( WP_HTML_Processor::class, 'create_full_parser' ) ), + 'Fragment parser' => array( array( WP_HTML_Processor::class, 'create_fragment' ) ), + ); } } From b95e4020fae5dc6f45155e03f3d18ce67abb8acd Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 13:45:13 +0100 Subject: [PATCH 19/24] Restore private stack properties to private --- .../html-api/class-wp-html-active-formatting-elements.php | 2 +- src/wp-includes/html-api/class-wp-html-open-elements.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php index f343c5f38f010..2f51482eee052 100644 --- a/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php +++ b/src/wp-includes/html-api/class-wp-html-active-formatting-elements.php @@ -41,7 +41,7 @@ class WP_HTML_Active_Formatting_Elements { * * @var WP_HTML_Token[] */ - public $stack = array(); + private $stack = array(); /** * Reports if a specific node is in the stack of active formatting elements. diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index 03bc904113f9e..cb913853f0ee9 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -49,7 +49,7 @@ class WP_HTML_Open_Elements { * * @var bool */ - public $has_p_in_button_scope = false; + private $has_p_in_button_scope = false; /** * A function that will be called when an item is popped off the stack of open elements. From 90eb6e27ad282de34e9d2a21d2f7217a6d0a8454 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 13:53:15 +0100 Subject: [PATCH 20/24] Use a temporary bookmark to avoid modifying tag processor internal state --- src/wp-includes/html-api/class-wp-html-processor.php | 6 +++--- src/wp-includes/html-api/class-wp-html-tag-processor.php | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 41d3b28f22177..41036943c21c6 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5364,9 +5364,9 @@ public function seek( $bookmark_name ): bool { $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; $this->breadcrumbs = array(); - $this->bytes_already_parsed = 0; - $this->parser_state = self::STATE_READY; - $this->next_token(); + $this->bookmarks['initial'] = new WP_HTML_Span( 0, 0 ); + parent::seek( 'initial' ); + unset( $this->bookmarks['initial'] ); } else { $this->change_parsing_namespace( $this->context_node->integration_node_type diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index a5b7645e122ee..e2632c80f6da5 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -591,7 +591,7 @@ class WP_HTML_Tag_Processor { * @since 6.2.0 * @var int */ - protected $bytes_already_parsed = 0; + private $bytes_already_parsed = 0; /** * Byte offset in input document where current token starts. From 75ab9c25ab533bbdad08b25e559e3d0ff4e52a3c Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 20:27:57 +0100 Subject: [PATCH 21/24] Break at root-node when popping elements on seek The context node is not pushed to the stack of open elements, this code was either doing nothing or incorrect. --- src/wp-includes/html-api/class-wp-html-processor.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 41036943c21c6..2009abc7a1c9d 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5327,7 +5327,11 @@ public function seek( $bookmark_name ): bool { * When moving backward, stateful stacks should be cleared. */ foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - if ( 'context-node' === $item->bookmark_name ) { + /* + * Fragment parsers always start with an HTML root node at the top of the stack. + * Do not remove it. + */ + if ( 'root-node' === $item->bookmark_name ) { break; } @@ -5335,10 +5339,6 @@ public function seek( $bookmark_name ): bool { } foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { - if ( 'context-node' === $item->bookmark_name ) { - break; - } - $this->state->active_formatting_elements->remove_node( $item ); } From 05ca2a4130f82639db692f15453f3c3bd08f0414 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Wed, 6 Nov 2024 20:29:36 +0100 Subject: [PATCH 22/24] Remove context-node cases from open elements The context node is not pushed to the stack of open elements, and therefore does not need special handling. --- src/wp-includes/html-api/class-wp-html-open-elements.php | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index cb913853f0ee9..210492ab9af08 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -520,11 +520,6 @@ public function pop(): bool { return false; } - if ( 'context-node' === $item->bookmark_name ) { - $this->stack[] = $item; - return false; - } - $this->after_element_pop( $item ); return true; } @@ -585,10 +580,6 @@ public function push( WP_HTML_Token $stack_item ): void { * @return bool Whether the node was found and removed from the stack of open elements. */ public function remove_node( WP_HTML_Token $token ): bool { - if ( 'context-node' === $token->bookmark_name ) { - return false; - } - foreach ( $this->walk_up() as $position_from_end => $item ) { if ( $token->bookmark_name !== $item->bookmark_name ) { continue; From c48be709141b0b37255174a7cdb6b9c41a16e5b6 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 11 Nov 2024 20:26:18 +0100 Subject: [PATCH 23/24] Add test for root-node regression --- .../tests/html-api/wpHtmlProcessor-bookmark.php | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php index eafb03c0cbaec..91cc17898f77d 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php @@ -129,6 +129,23 @@ public function test_seek_back_from_foreign_content( callable $factory ) { $this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() ); } + /** + * Covers a regression where the root node may not be present on the stack of open elements. + * + * Heading elements (h1, h2, etc.) check the current node on the stack of open elements + * and expect it to be defined. If the root-node has been popped, pushing a new heading + * onto the stack will create a warning and fail the test. + * + * @ticket 62290 + */ + public function test_fragment_starts_with_h1() { + $processor = WP_HTML_Processor::create_fragment( '

' ); + $this->assertTrue( $processor->next_tag( 'H1' ) ); + $this->assertTrue( $processor->set_bookmark( 'mark' ) ); + $this->assertTrue( $processor->next_token() ); + $this->assertTrue( $processor->seek( 'mark' ) ); + } + /** * Data provider. * From 710c5f7dd1af13950e87b5f82c088742f9c25435 Mon Sep 17 00:00:00 2001 From: Jon Surrell Date: Mon, 11 Nov 2024 20:20:45 +0100 Subject: [PATCH 24/24] Fix root-node pop regression in fragment parser seek --- .../html-api/class-wp-html-processor.php | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 3e4f91683ef50..6a2c7d6fbeaf7 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -5328,18 +5328,11 @@ public function seek( $bookmark_name ): bool { * and computation time. */ if ( 'backward' === $direction ) { + /* * When moving backward, stateful stacks should be cleared. */ foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { - /* - * Fragment parsers always start with an HTML root node at the top of the stack. - * Do not remove it. - */ - if ( 'root-node' === $item->bookmark_name ) { - break; - } - $this->state->stack_of_open_elements->remove_node( $item ); } @@ -5373,6 +5366,21 @@ public function seek( $bookmark_name ): bool { parent::seek( 'initial' ); unset( $this->bookmarks['initial'] ); } else { + + /* + * Push the root-node (HTML) back onto the stack of open elements. + * + * Fragment parsers require this extra bit of setup. + * It's handled in full parsers by advancing the processor state. + */ + $this->state->stack_of_open_elements->push( + new WP_HTML_Token( + 'root-node', + 'HTML', + false + ) + ); + $this->change_parsing_namespace( $this->context_node->integration_node_type ? 'html'