Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 50 additions & 24 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,15 @@ class WP_HTML_Tag_Processor {
*/
private $token_length;

/**
* Whether the current tag token has the self-closing flag.
*
* @since 7.1.0
*
* @var bool
*/
private $has_self_closing_flag = false;

/**
* Byte offset in input document where current tag name starts.
*
Expand Down Expand Up @@ -1074,11 +1083,12 @@ private function base_class_next_token(): bool {
* the closing tag to point to the opening of the special atomic
* tag sequence.
*/
$tag_name_starts_at = $this->tag_name_starts_at;
$tag_name_length = $this->tag_name_length;
$tag_ends_at = $this->token_starts_at + $this->token_length;
$attributes = $this->attributes;
$duplicate_attributes = $this->duplicate_attributes;
$tag_name_starts_at = $this->tag_name_starts_at;
$tag_name_length = $this->tag_name_length;
$tag_ends_at = $this->token_starts_at + $this->token_length;
$has_self_closing_flag = $this->has_self_closing_flag;
$attributes = $this->attributes;
$duplicate_attributes = $this->duplicate_attributes;

// Find the closing tag if necessary.
switch ( $tag_name ) {
Expand Down Expand Up @@ -1128,14 +1138,15 @@ private function base_class_next_token(): bool {
* functions that skip the contents have moved all the internal cursors past
* the inner content of the tag.
*/
$this->token_starts_at = $was_at;
$this->token_length = $this->bytes_already_parsed - $this->token_starts_at;
$this->text_starts_at = $tag_ends_at;
$this->text_length = $this->tag_name_starts_at - $this->text_starts_at;
$this->tag_name_starts_at = $tag_name_starts_at;
$this->tag_name_length = $tag_name_length;
$this->attributes = $attributes;
$this->duplicate_attributes = $duplicate_attributes;
$this->token_starts_at = $was_at;
$this->token_length = $this->bytes_already_parsed - $this->token_starts_at;
$this->text_starts_at = $tag_ends_at;
$this->text_length = $this->tag_name_starts_at - $this->text_starts_at;
$this->tag_name_starts_at = $tag_name_starts_at;
$this->tag_name_length = $tag_name_length;
$this->has_self_closing_flag = $has_self_closing_flag;
$this->attributes = $attributes;
$this->duplicate_attributes = $duplicate_attributes;

return true;
}
Expand Down Expand Up @@ -2134,19 +2145,41 @@ private function parse_next_tag(): bool {
* @since 6.2.0
* @ignore
*
* @return bool Whether an attribute was found before the end of the document.
* @return bool True to indicate attribute parsing should continue. False if a stop condition
* was reached.
*/
private function parse_next_attribute(): bool {
$doc_length = strlen( $this->html );

// Skip whitespace and slashes.
$this->bytes_already_parsed += strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed );
$skipped_length = strspn( $this->html, " \t\f\r\n/", $this->bytes_already_parsed );
$this->bytes_already_parsed += $skipped_length;
if ( $this->bytes_already_parsed >= $doc_length ) {
$this->parser_state = self::STATE_INCOMPLETE_INPUT;

return false;
}

/**
* This block serves two purposes:
*
* - A fast path for common tag-ending `>`.
* - A check for the self-closing flag which must appear as `/>`.
*
* In a tag like `<g attr=/>`, `/` is the attribute value, not a self-closing
* flag. When it appears in this form, the parser has already consumed the
* attribute value, `$skipped_length` is 0, and this checks below correctly
* identify whether there is a self-closing flag.
*
* Note: Both start and end tags may have the self-closing flag.
*/
if ( '>' === $this->html[ $this->bytes_already_parsed ] ) {
if ( $skipped_length > 0 && '/' === $this->html[ $this->bytes_already_parsed - 1 ] ) {
$this->has_self_closing_flag = true;
}
return false;
}

/*
* Treat the equal sign as a part of the attribute
* name if it is the first encountered byte.
Expand Down Expand Up @@ -2324,6 +2357,7 @@ private function after_tag(): void {

$this->token_starts_at = null;
$this->token_length = null;
$this->has_self_closing_flag = false;
$this->tag_name_starts_at = null;
$this->tag_name_length = null;
$this->text_starts_at = 0;
Expand Down Expand Up @@ -3332,15 +3366,7 @@ public function has_self_closing_flag(): bool {
return false;
}

/*
* The self-closing flag is the solidus at the _end_ of the tag, not the beginning.
*
* Example:
*
* <figure />
* ^ this appears one character before the end of the closing ">".
*/
return '/' === $this->html[ $this->token_starts_at + $this->token_length - 2 ];
return $this->has_self_closing_flag;
}

/**
Expand Down
23 changes: 23 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,29 @@ public function test_expects_closer_foreign_content_self_closing() {
$this->assertTrue( $processor->expects_closer() );
}

/**
* Ensures a slash-only unquoted attribute value does not close foreign content.
*
* @ticket 65372
*/
public function test_unquoted_slash_attribute_does_not_self_close_foreign_content(): void {
$processor = WP_HTML_Processor::create_fragment( '<math><mi a=/>math:mi is not self-closing, it has [a="/"] attribute.' );

$this->assertTrue( $processor->next_tag( 'MI' ), 'Failed to find the MI tag: check test setup.' );
$this->assertSame( '/', $processor->get_attribute( 'a' ), 'Failed to treat the slash as the unquoted attribute value.' );
$this->assertFalse(
$processor->has_self_closing_flag(),
'Failed to avoid interpreting the slash-only unquoted attribute value as a self-closing flag.'
);

$this->assertTrue( $processor->next_token(), 'Failed to find text following the MI tag: check test setup.' );
$this->assertSame(
array( 'HTML', 'BODY', 'MATH', 'MI', '#text' ),
$processor->get_breadcrumbs(),
'Failed to keep text following the MI tag inside the MI element.'
);
}

/**
* Ensures that expects_closer works for void-like elements in foreign content.
*
Expand Down
7 changes: 5 additions & 2 deletions tests/phpunit/tests/html-api/wpHtmlTagProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,13 @@ public static function data_has_self_closing_flag() {
'No self-closing flag on a foreign element' => array( '<circle>', false ),
// These involve syntax peculiarities.
'Self-closing flag after extra spaces' => array( '<div />', true ),
'Self-closing flag after attribute' => array( '<div id=test/>', true ),
'Self-closing flag after attribute' => array( '<div id=test />', true ),
'Slash inside unquoted attribute value' => array( '<div id=test/>', false ),
'Slash only unquoted attribute value' => array( '<div attr=/>', false ),
'Attribute "=" with value ""' => array( '<div =/>', false ),

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'Self-closing flag after quoted attribute' => array( '<div id="test"/>', true ),
'Self-closing flag after boolean attribute' => array( '<div enabled/>', true ),
'Boolean attribute that looks like a self-closer' => array( '<div / >', false ),
'Ignored "/" and whitespace' => array( '<div / >', false ),
);
}

Expand Down
Loading