diff --git a/src/wp-includes/html-api/class-wp-html-processor-state.php b/src/wp-includes/html-api/class-wp-html-processor-state.php index 16875c4ac1b2b..b7cdd347ca85b 100644 --- a/src/wp-includes/html-api/class-wp-html-processor-state.php +++ b/src/wp-includes/html-api/class-wp-html-processor-state.php @@ -299,31 +299,6 @@ class WP_HTML_Processor_State { */ const INSERTION_MODE_AFTER_AFTER_FRAMESET = 'insertion-mode-after-after-frameset'; - /** - * No-quirks mode document compatability mode. - * - * > In no-quirks mode, the behavior is (hopefully) the desired behavior - * > described by the modern HTML and CSS specifications. - * - * @since 6.7.0 - * - * @var string - */ - const NO_QUIRKS_MODE = 'no-quirks-mode'; - - /** - * Quirks mode document compatability mode. - * - * > In quirks mode, layout emulates behavior in Navigator 4 and Internet - * > Explorer 5. This is essential in order to support websites that were - * > built before the widespread adoption of web standards. - * - * @since 6.7.0 - * - * @var string - */ - const QUIRKS_MODE = 'quirks-mode'; - /** * The stack of template insertion modes. * @@ -381,30 +356,6 @@ class WP_HTML_Processor_State { */ public $insertion_mode = self::INSERTION_MODE_INITIAL; - /** - * Indicates if the document is in quirks mode or no-quirks mode. - * - * Impact on HTML parsing: - * - * - In `NO_QUIRKS_MODE` CSS class and ID selectors match in a byte-for-byte - * manner, otherwise for backwards compatability, class selectors are to - * match in an ASCII case-insensitive manner. - * - * - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag - * if one is in scope and open, otherwise the TABLE becomes a child of the P. - * - * `QUIRKS_MODE` impacts many styling-related aspects of an HTML document, but - * none of the other changes modifies how the HTML is parsed or selected. - * - * @see self::QUIRKS_MODE - * @see self::NO_QUIRKS_MODE - * - * @since 6.7.0 - * - * @var string - */ - public $document_mode = self::NO_QUIRKS_MODE; - /** * Context node initializing fragment parser, if created as a fragment parser. * diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 661b9c712ad49..55b906136820f 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -1080,7 +1080,7 @@ private function step_initial(): bool { case 'html': $doctype = $this->get_doctype_info(); if ( null !== $doctype && 'quirks' === $doctype->indicated_compatability_mode ) { - $this->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE; + $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; } /* @@ -1095,7 +1095,7 @@ private function step_initial(): bool { * > Anything else */ initial_anything_else: - $this->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE; + $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML; return $this->step( self::REPROCESS_CURRENT_NODE ); } @@ -2448,7 +2448,7 @@ private function step_in_body(): bool { * > has a p element in button scope, then close a p element. */ if ( - WP_HTML_Processor_State::QUIRKS_MODE !== $this->state->document_mode && + WP_HTML_Tag_Processor::QUIRKS_MODE !== $this->compat_mode && $this->state->stack_of_open_elements->has_p_in_button_scope() ) { $this->close_a_p_element(); @@ -4938,6 +4938,10 @@ public function remove_class( $class_name ): bool { * * @since 6.6.0 Subclassed for the HTML Processor. * + * @todo When reconstructing active formatting elements with attributes, find a way + * to indicate if the virtually-reconstructed formatting elements contain the + * wanted class name. + * * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. * @return bool|null Whether the matched tag contains the given class name, or null if not matched. */ diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index e8572935a64ce..1ea8066d97ade 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -511,6 +511,32 @@ class WP_HTML_Tag_Processor { */ protected $parser_state = self::STATE_READY; + /** + * Indicates if the document is in quirks mode or no-quirks mode. + * + * Impact on HTML parsing: + * + * - In `NO_QUIRKS_MODE` (also known as "standard mode"): + * - CSS class and ID selectors match byte-for-byte (case-sensitively). + * - A TABLE start tag `` implicitly closes any open `P` element. + * + * - In `QUIRKS_MODE`: + * - CSS class and ID selectors match match in an ASCII case-insensitive manner. + * - A TABLE start tag `
` opens a `TABLE` element as a child of a `P` + * element if one is open. + * + * Quirks and no-quirks mode are thus mostly about styling, but have an impact when + * tables are found inside paragraph elements. + * + * @see self::QUIRKS_MODE + * @see self::NO_QUIRKS_MODE + * + * @since 6.7.0 + * + * @var string + */ + protected $compat_mode = self::NO_QUIRKS_MODE; + /** * Indicates whether the parser is inside foreign content, * e.g. inside an SVG or MathML element. @@ -1155,6 +1181,8 @@ public function class_list() { $seen = array(); + $is_quirks = self::QUIRKS_MODE === $this->compat_mode; + $at = 0; while ( $at < strlen( $class ) ) { // Skip past any initial boundary characters. @@ -1169,13 +1197,11 @@ public function class_list() { return; } - /* - * CSS class names are case-insensitive in the ASCII range. - * - * @see https://www.w3.org/TR/CSS2/syndata.html#x1 - */ - $name = str_replace( "\x00", "\u{FFFD}", strtolower( substr( $class, $at, $length ) ) ); - $at += $length; + $name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) ); + if ( $is_quirks ) { + $name = strtolower( $name ); + } + $at += $length; /* * It's expected that the number of class names for a given tag is relatively small. @@ -1205,10 +1231,14 @@ public function has_class( $wanted_class ): ?bool { return null; } - $wanted_class = strtolower( $wanted_class ); + $case_insensitive = self::QUIRKS_MODE === $this->compat_mode; + $wanted_length = strlen( $wanted_class ); foreach ( $this->class_list() as $class_name ) { - if ( $class_name === $wanted_class ) { + if ( + strlen( $class_name ) === $wanted_length && + 0 === substr_compare( $class_name, $wanted_class, 0, strlen( $wanted_class ), $case_insensitive ) + ) { return true; } } @@ -2296,6 +2326,23 @@ private function class_name_updates_to_attributes_updates(): void { */ $modified = false; + $seen = array(); + $to_remove = array(); + $is_quirks = self::QUIRKS_MODE === $this->compat_mode; + if ( $is_quirks ) { + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( self::REMOVE_CLASS === $action ) { + $to_remove[] = strtolower( $updated_name ); + } + } + } else { + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( self::REMOVE_CLASS === $action ) { + $to_remove[] = $updated_name; + } + } + } + // Remove unwanted classes by only copying the new ones. $existing_class_length = strlen( $existing_class ); while ( $at < $existing_class_length ) { @@ -2311,25 +2358,23 @@ private function class_name_updates_to_attributes_updates(): void { break; } - $name = substr( $existing_class, $at, $name_length ); - $at += $name_length; - - // If this class is marked for removal, start processing the next one. - $remove_class = ( - isset( $this->classname_updates[ $name ] ) && - self::REMOVE_CLASS === $this->classname_updates[ $name ] - ); + $name = substr( $existing_class, $at, $name_length ); + $comparable_class_name = $is_quirks ? strtolower( $name ) : $name; + $at += $name_length; - // If a class has already been seen then skip it; it should not be added twice. - if ( ! $remove_class ) { - $this->classname_updates[ $name ] = self::SKIP_CLASS; + // If this class is marked for removal, remove it and move on to the next one. + if ( in_array( $comparable_class_name, $to_remove, true ) ) { + $modified = true; + continue; } - if ( $remove_class ) { - $modified = true; + // If a class has already been seen then skip it; it should not be added twice. + if ( in_array( $comparable_class_name, $seen, true ) ) { continue; } + $seen[] = $comparable_class_name; + /* * Otherwise, append it to the new "class" attribute value. * @@ -2350,7 +2395,8 @@ private function class_name_updates_to_attributes_updates(): void { // Add new classes by appending those which haven't already been seen. foreach ( $this->classname_updates as $name => $operation ) { - if ( self::ADD_CLASS === $operation ) { + $comparable_name = $is_quirks ? strtolower( $name ) : $name; + if ( self::ADD_CLASS === $operation && ! in_array( $comparable_name, $seen, true ) ) { $modified = true; $class .= strlen( $class ) > 0 ? ' ' : ''; @@ -3932,8 +3978,29 @@ public function add_class( $class_name ): bool { return false; } - $this->classname_updates[ $class_name ] = self::ADD_CLASS; + if ( self::QUIRKS_MODE !== $this->compat_mode ) { + $this->classname_updates[ $class_name ] = self::ADD_CLASS; + return true; + } + /* + * Because class names are matched ASCII-case-insensitively in quirks mode, + * this needs to see if a case variant of the given class name is already + * enqueued and update that existing entry, if so. This picks the casing of + * the first-provided class name for all lexical variations. + */ + $class_name_length = strlen( $class_name ); + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( + strlen( $updated_name ) === $class_name_length && + 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) + ) { + $this->classname_updates[ $updated_name ] = self::ADD_CLASS; + return true; + } + } + + $this->classname_updates[ $class_name ] = self::ADD_CLASS; return true; } @@ -3953,10 +4020,29 @@ public function remove_class( $class_name ): bool { return false; } - if ( null !== $this->tag_name_starts_at ) { + if ( self::QUIRKS_MODE !== $this->compat_mode ) { $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; + return true; + } + + /* + * Because class names are matched ASCII-case-insensitively in quirks mode, + * this needs to see if a case variant of the given class name is already + * enqueued and update that existing entry, if so. This picks the casing of + * the first-provided class name for all lexical variations. + */ + $class_name_length = strlen( $class_name ); + foreach ( $this->classname_updates as $updated_name => $action ) { + if ( + strlen( $updated_name ) === $class_name_length && + 0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true ) + ) { + $this->classname_updates[ $updated_name ] = self::REMOVE_CLASS; + return true; + } } + $this->classname_updates[ $class_name ] = self::REMOVE_CLASS; return true; } @@ -4350,6 +4436,37 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info { */ const COMMENT_AS_INVALID_HTML = 'COMMENT_AS_INVALID_HTML'; + /** + * No-quirks mode document compatability mode. + * + * > In no-quirks mode, the behavior is (hopefully) the desired behavior + * > described by the modern HTML and CSS specifications. + * + * @see self::$compat_mode + * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode + * + * @since 6.7.0 + * + * @var string + */ + const NO_QUIRKS_MODE = 'no-quirks-mode'; + + /** + * Quirks mode document compatability mode. + * + * > In quirks mode, layout emulates behavior in Navigator 4 and Internet + * > Explorer 5. This is essential in order to support websites that were + * > built before the widespread adoption of web standards. + * + * @see self::$compat_mode + * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode + * + * @since 6.7.0 + * + * @var string + */ + const QUIRKS_MODE = 'quirks-mode'; + /** * Indicates that a span of text may contain any combination of significant * kinds of characters: NULL bytes, whitespace, and others. diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index ebc41aef9b5ef..e9b9063f77a7b 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -519,4 +519,155 @@ public function test_foreign_content_script_self_closing() { $processor = WP_HTML_Processor::create_fragment( '' ); $this->assertTrue( $processor->next_tag( 'script' ) ); } + + /** + * Ensures that the tag processor is case sensitive when removing CSS classes in no-quirks mode. + * + * @ticket 61531 + * + * @covers ::remove_class + */ + public function test_remove_class_no_quirks_mode() { + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); + $processor->remove_class( 'upper' ); + $this->assertSame( '', $processor->get_updated_html() ); + + $processor->remove_class( 'UPPER' ); + $this->assertSame( '', $processor->get_updated_html() ); + } + + /** + * Ensures that the tag processor is case sensitive when adding CSS classes in no-quirks mode. + * + * @ticket 61531 + * + * @covers ::add_class + */ + public function test_add_class_no_quirks_mode() { + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); + $processor->add_class( 'UPPER' ); + $this->assertSame( '', $processor->get_updated_html() ); + + $processor->add_class( 'upper' ); + $this->assertSame( '', $processor->get_updated_html() ); + } + + /** + * Ensures that the tag processor is case sensitive when checking has CSS classes in no-quirks mode. + * + * @ticket 61531 + * + * @covers ::has_class + */ + public function test_has_class_no_quirks_mode() { + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); + $this->assertFalse( $processor->has_class( 'upper' ) ); + $this->assertTrue( $processor->has_class( 'UPPER' ) ); + } + + /** + * Ensures that the tag processor lists unique CSS class names in no-quirks mode. + * + * @ticket 61531 + * + * @covers ::class_list + */ + public function test_class_list_no_quirks_mode() { + $processor = WP_HTML_Processor::create_full_parser( + /* + * U+00C9 is LATIN CAPITAL LETTER E WITH ACUTE + * U+0045 is LATIN CAPITAL LETTER E + * U+0301 is COMBINING ACUTE ACCENT + * + * This tests not only that the class matching deduplicates the É, but also + * that it treats the same character in different normalization forms as + * distinct, since matching occurs on a byte-for-byte basis. + */ + "" + ); + $processor->next_tag( 'SPAN' ); + $class_list = iterator_to_array( $processor->class_list() ); + $this->assertSame( + array( 'A', 'a', 'B', 'b', 'É', "E\u{0301}", 'é' ), + $class_list + ); + } + + /** + * Ensures that the tag processor is case insensitive when removing CSS classes in quirks mode. + * + * @ticket 61531 + * + * @covers ::remove_class + */ + public function test_remove_class_quirks_mode() { + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); + $processor->remove_class( 'upPer' ); + $this->assertSame( '', $processor->get_updated_html() ); + } + + /** + * Ensures that the tag processor is case insensitive when adding CSS classes in quirks mode. + * + * @ticket 61531 + * + * @covers ::add_class + */ + public function test_add_class_quirks_mode() { + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); + $processor->add_class( 'upper' ); + + $this->assertSame( '', $processor->get_updated_html() ); + + $processor->add_class( 'ANOTHER-UPPER' ); + $this->assertSame( '', $processor->get_updated_html() ); + } + + /** + * Ensures that the tag processor is case sensitive when checking has CSS classes in quirks mode. + * + * @ticket 61531 + * + * @covers ::has_class + */ + public function test_has_class_quirks_mode() { + $processor = WP_HTML_Processor::create_full_parser( '' ); + $processor->next_tag( 'SPAN' ); + $this->assertTrue( $processor->has_class( 'upper' ) ); + $this->assertTrue( $processor->has_class( 'UPPER' ) ); + } + + /** + * Ensures that the tag processor lists unique CSS class names in quirks mode. + * + * @ticket 61531 + * + * @covers ::class_list + */ + public function test_class_list_quirks_mode() { + $processor = WP_HTML_Processor::create_full_parser( + /* + * U+00C9 is LATIN CAPITAL LETTER E WITH ACUTE + * U+0045 is LATIN CAPITAL LETTER E + * U+0065 is LATIN SMALL LETTER E + * U+0301 is COMBINING ACUTE ACCENT + * + * This tests not only that the class matching deduplicates the É, but also + * that it treats the same character in different normalization forms as + * distinct, since matching occurs on a byte-for-byte basis. + */ + "" + ); + $processor->next_tag( 'SPAN' ); + $class_list = iterator_to_array( $processor->class_list() ); + $this->assertSame( + array( 'a', 'b', 'É', "e\u{301}", 'é' ), + $class_list + ); + } }