@@ -511,6 +511,32 @@ class WP_HTML_Tag_Processor {
511511 */
512512 protected $ parser_state = self ::STATE_READY ;
513513
514+ /**
515+ * Indicates if the document is in quirks mode or no-quirks mode.
516+ *
517+ * Impact on HTML parsing:
518+ *
519+ * - In `NO_QUIRKS_MODE` (also known as "standard mode"):
520+ * - CSS class and ID selectors match byte-for-byte (case-sensitively).
521+ * - A TABLE start tag `<table>` implicitly closes any open `P` element.
522+ *
523+ * - In `QUIRKS_MODE`:
524+ * - CSS class and ID selectors match match in an ASCII case-insensitive manner.
525+ * - A TABLE start tag `<table>` opens a `TABLE` element as a child of a `P`
526+ * element if one is open.
527+ *
528+ * Quirks and no-quirks mode are thus mostly about styling, but have an impact when
529+ * tables are found inside paragraph elements.
530+ *
531+ * @see self::QUIRKS_MODE
532+ * @see self::NO_QUIRKS_MODE
533+ *
534+ * @since 6.7.0
535+ *
536+ * @var string
537+ */
538+ protected $ compat_mode = self ::NO_QUIRKS_MODE ;
539+
514540 /**
515541 * Indicates whether the parser is inside foreign content,
516542 * e.g. inside an SVG or MathML element.
@@ -1155,6 +1181,8 @@ public function class_list() {
11551181
11561182 $ seen = array ();
11571183
1184+ $ is_quirks = self ::QUIRKS_MODE === $ this ->compat_mode ;
1185+
11581186 $ at = 0 ;
11591187 while ( $ at < strlen ( $ class ) ) {
11601188 // Skip past any initial boundary characters.
@@ -1169,13 +1197,11 @@ public function class_list() {
11691197 return ;
11701198 }
11711199
1172- /*
1173- * CSS class names are case-insensitive in the ASCII range.
1174- *
1175- * @see https://www.w3.org/TR/CSS2/syndata.html#x1
1176- */
1177- $ name = str_replace ( "\x00" , "\u{FFFD}" , strtolower ( substr ( $ class , $ at , $ length ) ) );
1178- $ at += $ length ;
1200+ $ name = str_replace ( "\x00" , "\u{FFFD}" , substr ( $ class , $ at , $ length ) );
1201+ if ( $ is_quirks ) {
1202+ $ name = strtolower ( $ name );
1203+ }
1204+ $ at += $ length ;
11791205
11801206 /*
11811207 * It's expected that the number of class names for a given tag is relatively small.
@@ -1205,10 +1231,14 @@ public function has_class( $wanted_class ): ?bool {
12051231 return null ;
12061232 }
12071233
1208- $ wanted_class = strtolower ( $ wanted_class ) ;
1234+ $ case_insensitive = self :: QUIRKS_MODE === $ this -> compat_mode ;
12091235
1236+ $ wanted_length = strlen ( $ wanted_class );
12101237 foreach ( $ this ->class_list () as $ class_name ) {
1211- if ( $ class_name === $ wanted_class ) {
1238+ if (
1239+ strlen ( $ class_name ) === $ wanted_length &&
1240+ 0 === substr_compare ( $ class_name , $ wanted_class , 0 , strlen ( $ wanted_class ), $ case_insensitive )
1241+ ) {
12121242 return true ;
12131243 }
12141244 }
@@ -2296,6 +2326,23 @@ private function class_name_updates_to_attributes_updates(): void {
22962326 */
22972327 $ modified = false ;
22982328
2329+ $ seen = array ();
2330+ $ to_remove = array ();
2331+ $ is_quirks = self ::QUIRKS_MODE === $ this ->compat_mode ;
2332+ if ( $ is_quirks ) {
2333+ foreach ( $ this ->classname_updates as $ updated_name => $ action ) {
2334+ if ( self ::REMOVE_CLASS === $ action ) {
2335+ $ to_remove [] = strtolower ( $ updated_name );
2336+ }
2337+ }
2338+ } else {
2339+ foreach ( $ this ->classname_updates as $ updated_name => $ action ) {
2340+ if ( self ::REMOVE_CLASS === $ action ) {
2341+ $ to_remove [] = $ updated_name ;
2342+ }
2343+ }
2344+ }
2345+
22992346 // Remove unwanted classes by only copying the new ones.
23002347 $ existing_class_length = strlen ( $ existing_class );
23012348 while ( $ at < $ existing_class_length ) {
@@ -2311,25 +2358,23 @@ private function class_name_updates_to_attributes_updates(): void {
23112358 break ;
23122359 }
23132360
2314- $ name = substr ( $ existing_class , $ at , $ name_length );
2315- $ at += $ name_length ;
2316-
2317- // If this class is marked for removal, start processing the next one.
2318- $ remove_class = (
2319- isset ( $ this ->classname_updates [ $ name ] ) &&
2320- self ::REMOVE_CLASS === $ this ->classname_updates [ $ name ]
2321- );
2361+ $ name = substr ( $ existing_class , $ at , $ name_length );
2362+ $ comparable_class_name = $ is_quirks ? strtolower ( $ name ) : $ name ;
2363+ $ at += $ name_length ;
23222364
2323- // If a class has already been seen then skip it; it should not be added twice.
2324- if ( ! $ remove_class ) {
2325- $ this ->classname_updates [ $ name ] = self ::SKIP_CLASS ;
2365+ // If this class is marked for removal, remove it and move on to the next one.
2366+ if ( in_array ( $ comparable_class_name , $ to_remove , true ) ) {
2367+ $ modified = true ;
2368+ continue ;
23262369 }
23272370
2328- if ( $ remove_class ) {
2329- $ modified = true ;
2371+ // If a class has already been seen then skip it; it should not be added twice.
2372+ if ( in_array ( $ comparable_class_name , $ seen , true ) ) {
23302373 continue ;
23312374 }
23322375
2376+ $ seen [] = $ comparable_class_name ;
2377+
23332378 /*
23342379 * Otherwise, append it to the new "class" attribute value.
23352380 *
@@ -2350,7 +2395,8 @@ private function class_name_updates_to_attributes_updates(): void {
23502395
23512396 // Add new classes by appending those which haven't already been seen.
23522397 foreach ( $ this ->classname_updates as $ name => $ operation ) {
2353- if ( self ::ADD_CLASS === $ operation ) {
2398+ $ comparable_name = $ is_quirks ? strtolower ( $ name ) : $ name ;
2399+ if ( self ::ADD_CLASS === $ operation && ! in_array ( $ comparable_name , $ seen , true ) ) {
23542400 $ modified = true ;
23552401
23562402 $ class .= strlen ( $ class ) > 0 ? ' ' : '' ;
@@ -3932,8 +3978,29 @@ public function add_class( $class_name ): bool {
39323978 return false ;
39333979 }
39343980
3935- $ this ->classname_updates [ $ class_name ] = self ::ADD_CLASS ;
3981+ if ( self ::QUIRKS_MODE !== $ this ->compat_mode ) {
3982+ $ this ->classname_updates [ $ class_name ] = self ::ADD_CLASS ;
3983+ return true ;
3984+ }
39363985
3986+ /*
3987+ * Because class names are matched ASCII-case-insensitively in quirks mode,
3988+ * this needs to see if a case variant of the given class name is already
3989+ * enqueued and update that existing entry, if so. This picks the casing of
3990+ * the first-provided class name for all lexical variations.
3991+ */
3992+ $ class_name_length = strlen ( $ class_name );
3993+ foreach ( $ this ->classname_updates as $ updated_name => $ action ) {
3994+ if (
3995+ strlen ( $ updated_name ) === $ class_name_length &&
3996+ 0 === substr_compare ( $ updated_name , $ class_name , 0 , $ class_name_length , true )
3997+ ) {
3998+ $ this ->classname_updates [ $ updated_name ] = self ::ADD_CLASS ;
3999+ return true ;
4000+ }
4001+ }
4002+
4003+ $ this ->classname_updates [ $ class_name ] = self ::ADD_CLASS ;
39374004 return true ;
39384005 }
39394006
@@ -3953,10 +4020,29 @@ public function remove_class( $class_name ): bool {
39534020 return false ;
39544021 }
39554022
3956- if ( null !== $ this ->tag_name_starts_at ) {
4023+ if ( self :: QUIRKS_MODE !== $ this ->compat_mode ) {
39574024 $ this ->classname_updates [ $ class_name ] = self ::REMOVE_CLASS ;
4025+ return true ;
4026+ }
4027+
4028+ /*
4029+ * Because class names are matched ASCII-case-insensitively in quirks mode,
4030+ * this needs to see if a case variant of the given class name is already
4031+ * enqueued and update that existing entry, if so. This picks the casing of
4032+ * the first-provided class name for all lexical variations.
4033+ */
4034+ $ class_name_length = strlen ( $ class_name );
4035+ foreach ( $ this ->classname_updates as $ updated_name => $ action ) {
4036+ if (
4037+ strlen ( $ updated_name ) === $ class_name_length &&
4038+ 0 === substr_compare ( $ updated_name , $ class_name , 0 , $ class_name_length , true )
4039+ ) {
4040+ $ this ->classname_updates [ $ updated_name ] = self ::REMOVE_CLASS ;
4041+ return true ;
4042+ }
39584043 }
39594044
4045+ $ this ->classname_updates [ $ class_name ] = self ::REMOVE_CLASS ;
39604046 return true ;
39614047 }
39624048
@@ -4350,6 +4436,37 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info {
43504436 */
43514437 const COMMENT_AS_INVALID_HTML = 'COMMENT_AS_INVALID_HTML ' ;
43524438
4439+ /**
4440+ * No-quirks mode document compatability mode.
4441+ *
4442+ * > In no-quirks mode, the behavior is (hopefully) the desired behavior
4443+ * > described by the modern HTML and CSS specifications.
4444+ *
4445+ * @see self::$compat_mode
4446+ * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode
4447+ *
4448+ * @since 6.7.0
4449+ *
4450+ * @var string
4451+ */
4452+ const NO_QUIRKS_MODE = 'no-quirks-mode ' ;
4453+
4454+ /**
4455+ * Quirks mode document compatability mode.
4456+ *
4457+ * > In quirks mode, layout emulates behavior in Navigator 4 and Internet
4458+ * > Explorer 5. This is essential in order to support websites that were
4459+ * > built before the widespread adoption of web standards.
4460+ *
4461+ * @see self::$compat_mode
4462+ * @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode
4463+ *
4464+ * @since 6.7.0
4465+ *
4466+ * @var string
4467+ */
4468+ const QUIRKS_MODE = 'quirks-mode ' ;
4469+
43534470 /**
43544471 * Indicates that a span of text may contain any combination of significant
43554472 * kinds of characters: NULL bytes, whitespace, and others.
0 commit comments