Skip to content

Commit

Permalink
More docs, remove a binary op in UTF-8 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed Apr 28, 2024
1 parent e0ef94a commit b80e784
Showing 1 changed file with 12 additions and 7 deletions.
19 changes: 12 additions & 7 deletions src/wp-includes/html-api/class-wp-html-decoder.php
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,11 @@ public static function read_character_reference( $context, $text, $at, &$skip_by
*/

/*
* Code points in the C1 controls area need to be remapped as if they
* were stored in Windows-1252. Note! This transformation only happens
* for numeric character references. The raw code points in the byte
* stream are not translated.
*
* > If the number is one of the numbers in the first column of
* > the following table, then find the row with that number in
* > the first column, and set the character reference code to
Expand Down Expand Up @@ -455,27 +460,27 @@ public static function code_point_to_utf8_bytes( $code_point ) {
return '';
}

if ( $code_point < 0x80 ) {
if ( $code_point <= 0x7F ) {
return chr( $code_point );
}

if ( $code_point < 0x800 ) {
$byte1 = ( $code_point >> 6 ) & 0x1F | 0xC0;
if ( $code_point <= 0x7FF ) {
$byte1 = ( $code_point >> 6 ) | 0xC0;
$byte2 = $code_point & 0x3F | 0x80;

return pack( 'CC', $byte1, $byte2 );
}

if ( $code_point < 0x10000 ) {
$byte1 = ( $code_point >> 12 ) & 0x0F | 0xE0;
if ( $code_point <= 0xFFFF ) {
$byte1 = ( $code_point >> 12 ) | 0xE0;
$byte2 = ( $code_point >> 6 ) & 0x3F | 0x80;
$byte3 = $code_point & 0x3F | 0x80;

return pack( 'CCC', $byte1, $byte2, $byte3 );
}

if ( $code_point < 0x110000 ) {
$byte1 = ( $code_point >> 18 ) & 0x07 | 0xF0;
if ( $code_point <= 0x10FFFF ) {
$byte1 = ( $code_point >> 18 ) | 0xF0;
$byte2 = ( $code_point >> 12 ) & 0x3F | 0x80;
$byte3 = ( $code_point >> 6 ) & 0x3F | 0x80;
$byte4 = $code_point & 0x3F | 0x80;
Expand Down

0 comments on commit b80e784

Please sign in to comment.