Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML API: Refactor wp_strip_all_tags() #6196

Draft
wants to merge 3 commits into
base: trunk
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 23 additions & 10 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -5517,17 +5517,19 @@ function normalize_whitespace( $str ) {
* will return 'something'. wp_strip_all_tags will return ''
*
* @since 2.9.0
* @since 6.5.0 Relies on the HTML API for processing.
*
* @param string $html String potentially containing HTML tags, comments, etc...
* @param bool $remove_breaks Optional. Whether to remove left over line breaks and white space chars.
*
* @param string $text String containing HTML tags
* @param bool $remove_breaks Optional. Whether to remove left over line breaks and white space chars
* @return string The processed string.
*/
function wp_strip_all_tags( $text, $remove_breaks = false ) {
if ( is_null( $text ) ) {
function wp_strip_all_tags( $html, $remove_breaks = false ) {
if ( ! isset( $html ) ) {
return '';
}

if ( ! is_scalar( $text ) ) {
if ( ! is_scalar( $html ) ) {
/*
* To maintain consistency with pre-PHP 8 error levels,
* trigger_error() is used to trigger an E_USER_WARNING,
Expand All @@ -5541,22 +5543,33 @@ function wp_strip_all_tags( $text, $remove_breaks = false ) {
'#1',
'$text',
'string',
gettype( $text )
gettype( $html )
),
E_USER_WARNING
);

return '';
}

$text = preg_replace( '@<(script|style)[^>]*?>.*?</\\1>@si', '', $text );
$text = strip_tags( $text );
$processor = new WP_Unsafe_Internal_Tag_Processor( (string) $html );
$text_content = '';

while ( $processor->next_token() ) {
switch ( $processor->get_token_name() ) {
case '#text':
case 'TEXTAREA':
case 'TITLE':
case 'XMP':
$text_content .= $processor->unsafe_get_raw_modifiable_text();
break;
}
}

if ( $remove_breaks ) {
$text = preg_replace( '/[\r\n\t ]+/', ' ', $text );
$text_content = preg_replace( '/[\f\r\n\t ]+/', ' ', $text_content );
}

return trim( $text );
return trim( $text_content );
}

/**
Expand Down
36 changes: 35 additions & 1 deletion src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -2810,7 +2810,7 @@ public function get_modifiable_text() {

$decoded = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE );

if ( empty( $decoded ) ) {
if ( 0 === strlen( $decoded ) ) {
return '';
}

Expand All @@ -2834,6 +2834,40 @@ public function get_modifiable_text() {
return $decoded;
}

/**
* Returns the span covering the current token, or null if none matched.
*
* @since 6.6.0
*
* @access private
*
* @return WP_HTML_Span|null Span of the current token if matched, otherwise `null`.
*/
protected function unsafe_get_token_extents() {
if ( null === $this->token_starts_at ) {
return null;
}

return new WP_HTML_Span( $this->token_starts_at, $this->token_length );
}

/**
* Returns the span covering the current modifiable text, or null if none matched.
*
* @since 6.6.0
*
* @access private
*
* @return WP_HTML_Span|null Span of the current modifiable text if any, otherwise `null`.
*/
protected function unsafe_get_modifiable_text_extents() {
if ( null === $this->text_starts_at ) {
return null;
}

return new WP_HTML_Span( $this->text_starts_at, $this->text_length );
}

/**
* Updates or creates a new attribute on the currently matched tag with the passed value.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<?php

class WP_Unsafe_Internal_Tag_Processor extends WP_HTML_Tag_Processor {
public function unsafe_get_raw_modifiable_text() {
$text = $this->unsafe_get_modifiable_text_extents();

return null !== $text
? substr( $this->html, $text->start, $text->length )
: '';
}

public function unsafe_get_raw_token() {
$token = $this->unsafe_get_token_extents();

return null !== $token
? substr( $this->html, $token->start, $token->length )
: '';
}
}
1 change: 1 addition & 0 deletions src/wp-settings.php
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@
require ABSPATH . WPINC . '/html-api/class-wp-html-span.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-text-replacement.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-tag-processor.php';
require ABSPATH . WPINC . '/html-api/class-wp-unsafe-internal-tag-processor.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-unsupported-exception.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-active-formatting-elements.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-open-elements.php';
Expand Down
Loading