Skip to content

Commit 153f076

Browse files
committed
HTML API: Respect document compat mode when handling CSS class names.
The HTML API has been behaving as if CSS class name selectors matched class names in an ASCII case-insensitive manner. This is only true if the document in question is set to quirks mode. Unfortunately most documents processed will be set to no-quirks mode, meaning that some CSS behaviors have been matching incorrectly when provided with case variants of class names. In this patch, the CSS methods have been audited and updated to adhere to the rules governing ASCII case sensitivity when matching classes. This includes `add_class()`, `remove_class()`, `has_class()`, and `class_list()`. Now, it is assumed that a document is in no-quirks mode unless a full HTML parser infers quirks mode, and these methods will treat class names in a byte-for-byte manner. Otherwise, when a document is in quirks mode, the methods will compare the provided class names against existing class names for the tag in an ASCII case insensitive way, while `class_list()` will return a lower-cased version of the existing class names. The lower-casing in `class_list()` is performed for consistency, since it's possible that multiple case variants of the same comparable class name exists on a tag in the input HTML. Developed in WordPress/wordpress-develop#7169 Discussed in https://core.trac.wordpress.org/ticket/61531 Props dmsnell, jonsurrell. See #61531. Built from https://develop.svn.wordpress.org/trunk@58985 git-svn-id: https://core.svn.wordpress.org/trunk@58381 1a063a9b-81f0-0310-95a4-ce76da25c4cd
1 parent fe51709 commit 153f076

File tree

4 files changed

+150
-78
lines changed

4 files changed

+150
-78
lines changed

wp-includes/html-api/class-wp-html-processor-state.php

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -299,31 +299,6 @@ class WP_HTML_Processor_State {
299299
*/
300300
const INSERTION_MODE_AFTER_AFTER_FRAMESET = 'insertion-mode-after-after-frameset';
301301

302-
/**
303-
* No-quirks mode document compatability mode.
304-
*
305-
* > In no-quirks mode, the behavior is (hopefully) the desired behavior
306-
* > described by the modern HTML and CSS specifications.
307-
*
308-
* @since 6.7.0
309-
*
310-
* @var string
311-
*/
312-
const NO_QUIRKS_MODE = 'no-quirks-mode';
313-
314-
/**
315-
* Quirks mode document compatability mode.
316-
*
317-
* > In quirks mode, layout emulates behavior in Navigator 4 and Internet
318-
* > Explorer 5. This is essential in order to support websites that were
319-
* > built before the widespread adoption of web standards.
320-
*
321-
* @since 6.7.0
322-
*
323-
* @var string
324-
*/
325-
const QUIRKS_MODE = 'quirks-mode';
326-
327302
/**
328303
* The stack of template insertion modes.
329304
*
@@ -381,30 +356,6 @@ class WP_HTML_Processor_State {
381356
*/
382357
public $insertion_mode = self::INSERTION_MODE_INITIAL;
383358

384-
/**
385-
* Indicates if the document is in quirks mode or no-quirks mode.
386-
*
387-
* Impact on HTML parsing:
388-
*
389-
* - In `NO_QUIRKS_MODE` CSS class and ID selectors match in a byte-for-byte
390-
* manner, otherwise for backwards compatability, class selectors are to
391-
* match in an ASCII case-insensitive manner.
392-
*
393-
* - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag
394-
* if one is in scope and open, otherwise the TABLE becomes a child of the P.
395-
*
396-
* `QUIRKS_MODE` impacts many styling-related aspects of an HTML document, but
397-
* none of the other changes modifies how the HTML is parsed or selected.
398-
*
399-
* @see self::QUIRKS_MODE
400-
* @see self::NO_QUIRKS_MODE
401-
*
402-
* @since 6.7.0
403-
*
404-
* @var string
405-
*/
406-
public $document_mode = self::NO_QUIRKS_MODE;
407-
408359
/**
409360
* Context node initializing fragment parser, if created as a fragment parser.
410361
*

wp-includes/html-api/class-wp-html-processor.php

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,7 +1080,7 @@ private function step_initial(): bool {
10801080
case 'html':
10811081
$doctype = $this->get_doctype_info();
10821082
if ( null !== $doctype && 'quirks' === $doctype->indicated_compatability_mode ) {
1083-
$this->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE;
1083+
$this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE;
10841084
}
10851085

10861086
/*
@@ -1095,7 +1095,7 @@ private function step_initial(): bool {
10951095
* > Anything else
10961096
*/
10971097
initial_anything_else:
1098-
$this->state->document_mode = WP_HTML_Processor_State::QUIRKS_MODE;
1098+
$this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE;
10991099
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML;
11001100
return $this->step( self::REPROCESS_CURRENT_NODE );
11011101
}
@@ -2448,7 +2448,7 @@ private function step_in_body(): bool {
24482448
* > has a p element in button scope, then close a p element.
24492449
*/
24502450
if (
2451-
WP_HTML_Processor_State::QUIRKS_MODE !== $this->state->document_mode &&
2451+
WP_HTML_Tag_Processor::QUIRKS_MODE !== $this->compat_mode &&
24522452
$this->state->stack_of_open_elements->has_p_in_button_scope()
24532453
) {
24542454
$this->close_a_p_element();
@@ -4938,6 +4938,10 @@ public function remove_class( $class_name ): bool {
49384938
*
49394939
* @since 6.6.0 Subclassed for the HTML Processor.
49404940
*
4941+
* @todo When reconstructing active formatting elements with attributes, find a way
4942+
* to indicate if the virtually-reconstructed formatting elements contain the
4943+
* wanted class name.
4944+
*
49414945
* @param string $wanted_class Look for this CSS class name, ASCII case-insensitive.
49424946
* @return bool|null Whether the matched tag contains the given class name, or null if not matched.
49434947
*/

wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 142 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,32 @@ class WP_HTML_Tag_Processor {
511511
*/
512512
protected $parser_state = self::STATE_READY;
513513

514+
/**
515+
* Indicates if the document is in quirks mode or no-quirks mode.
516+
*
517+
* Impact on HTML parsing:
518+
*
519+
* - In `NO_QUIRKS_MODE` (also known as "standard mode"):
520+
* - CSS class and ID selectors match byte-for-byte (case-sensitively).
521+
* - A TABLE start tag `<table>` implicitly closes any open `P` element.
522+
*
523+
* - In `QUIRKS_MODE`:
524+
* - CSS class and ID selectors match match in an ASCII case-insensitive manner.
525+
* - A TABLE start tag `<table>` opens a `TABLE` element as a child of a `P`
526+
* element if one is open.
527+
*
528+
* Quirks and no-quirks mode are thus mostly about styling, but have an impact when
529+
* tables are found inside paragraph elements.
530+
*
531+
* @see self::QUIRKS_MODE
532+
* @see self::NO_QUIRKS_MODE
533+
*
534+
* @since 6.7.0
535+
*
536+
* @var string
537+
*/
538+
protected $compat_mode = self::NO_QUIRKS_MODE;
539+
514540
/**
515541
* Indicates whether the parser is inside foreign content,
516542
* e.g. inside an SVG or MathML element.
@@ -1155,6 +1181,8 @@ public function class_list() {
11551181

11561182
$seen = array();
11571183

1184+
$is_quirks = self::QUIRKS_MODE === $this->compat_mode;
1185+
11581186
$at = 0;
11591187
while ( $at < strlen( $class ) ) {
11601188
// Skip past any initial boundary characters.
@@ -1169,13 +1197,11 @@ public function class_list() {
11691197
return;
11701198
}
11711199

1172-
/*
1173-
* CSS class names are case-insensitive in the ASCII range.
1174-
*
1175-
* @see https://www.w3.org/TR/CSS2/syndata.html#x1
1176-
*/
1177-
$name = str_replace( "\x00", "\u{FFFD}", strtolower( substr( $class, $at, $length ) ) );
1178-
$at += $length;
1200+
$name = str_replace( "\x00", "\u{FFFD}", substr( $class, $at, $length ) );
1201+
if ( $is_quirks ) {
1202+
$name = strtolower( $name );
1203+
}
1204+
$at += $length;
11791205

11801206
/*
11811207
* It's expected that the number of class names for a given tag is relatively small.
@@ -1205,10 +1231,14 @@ public function has_class( $wanted_class ): ?bool {
12051231
return null;
12061232
}
12071233

1208-
$wanted_class = strtolower( $wanted_class );
1234+
$case_insensitive = self::QUIRKS_MODE === $this->compat_mode;
12091235

1236+
$wanted_length = strlen( $wanted_class );
12101237
foreach ( $this->class_list() as $class_name ) {
1211-
if ( $class_name === $wanted_class ) {
1238+
if (
1239+
strlen( $class_name ) === $wanted_length &&
1240+
0 === substr_compare( $class_name, $wanted_class, 0, strlen( $wanted_class ), $case_insensitive )
1241+
) {
12121242
return true;
12131243
}
12141244
}
@@ -2296,6 +2326,23 @@ private function class_name_updates_to_attributes_updates(): void {
22962326
*/
22972327
$modified = false;
22982328

2329+
$seen = array();
2330+
$to_remove = array();
2331+
$is_quirks = self::QUIRKS_MODE === $this->compat_mode;
2332+
if ( $is_quirks ) {
2333+
foreach ( $this->classname_updates as $updated_name => $action ) {
2334+
if ( self::REMOVE_CLASS === $action ) {
2335+
$to_remove[] = strtolower( $updated_name );
2336+
}
2337+
}
2338+
} else {
2339+
foreach ( $this->classname_updates as $updated_name => $action ) {
2340+
if ( self::REMOVE_CLASS === $action ) {
2341+
$to_remove[] = $updated_name;
2342+
}
2343+
}
2344+
}
2345+
22992346
// Remove unwanted classes by only copying the new ones.
23002347
$existing_class_length = strlen( $existing_class );
23012348
while ( $at < $existing_class_length ) {
@@ -2311,25 +2358,23 @@ private function class_name_updates_to_attributes_updates(): void {
23112358
break;
23122359
}
23132360

2314-
$name = substr( $existing_class, $at, $name_length );
2315-
$at += $name_length;
2316-
2317-
// If this class is marked for removal, start processing the next one.
2318-
$remove_class = (
2319-
isset( $this->classname_updates[ $name ] ) &&
2320-
self::REMOVE_CLASS === $this->classname_updates[ $name ]
2321-
);
2361+
$name = substr( $existing_class, $at, $name_length );
2362+
$comparable_class_name = $is_quirks ? strtolower( $name ) : $name;
2363+
$at += $name_length;
23222364

2323-
// If a class has already been seen then skip it; it should not be added twice.
2324-
if ( ! $remove_class ) {
2325-
$this->classname_updates[ $name ] = self::SKIP_CLASS;
2365+
// If this class is marked for removal, remove it and move on to the next one.
2366+
if ( in_array( $comparable_class_name, $to_remove, true ) ) {
2367+
$modified = true;
2368+
continue;
23262369
}
23272370

2328-
if ( $remove_class ) {
2329-
$modified = true;
2371+
// If a class has already been seen then skip it; it should not be added twice.
2372+
if ( in_array( $comparable_class_name, $seen, true ) ) {
23302373
continue;
23312374
}
23322375

2376+
$seen[] = $comparable_class_name;
2377+
23332378
/*
23342379
* Otherwise, append it to the new "class" attribute value.
23352380
*
@@ -2350,7 +2395,8 @@ private function class_name_updates_to_attributes_updates(): void {
23502395

23512396
// Add new classes by appending those which haven't already been seen.
23522397
foreach ( $this->classname_updates as $name => $operation ) {
2353-
if ( self::ADD_CLASS === $operation ) {
2398+
$comparable_name = $is_quirks ? strtolower( $name ) : $name;
2399+
if ( self::ADD_CLASS === $operation && ! in_array( $comparable_name, $seen, true ) ) {
23542400
$modified = true;
23552401

23562402
$class .= strlen( $class ) > 0 ? ' ' : '';
@@ -3932,8 +3978,29 @@ public function add_class( $class_name ): bool {
39323978
return false;
39333979
}
39343980

3935-
$this->classname_updates[ $class_name ] = self::ADD_CLASS;
3981+
if ( self::QUIRKS_MODE !== $this->compat_mode ) {
3982+
$this->classname_updates[ $class_name ] = self::ADD_CLASS;
3983+
return true;
3984+
}
39363985

3986+
/*
3987+
* Because class names are matched ASCII-case-insensitively in quirks mode,
3988+
* this needs to see if a case variant of the given class name is already
3989+
* enqueued and update that existing entry, if so. This picks the casing of
3990+
* the first-provided class name for all lexical variations.
3991+
*/
3992+
$class_name_length = strlen( $class_name );
3993+
foreach ( $this->classname_updates as $updated_name => $action ) {
3994+
if (
3995+
strlen( $updated_name ) === $class_name_length &&
3996+
0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true )
3997+
) {
3998+
$this->classname_updates[ $updated_name ] = self::ADD_CLASS;
3999+
return true;
4000+
}
4001+
}
4002+
4003+
$this->classname_updates[ $class_name ] = self::ADD_CLASS;
39374004
return true;
39384005
}
39394006

@@ -3953,10 +4020,29 @@ public function remove_class( $class_name ): bool {
39534020
return false;
39544021
}
39554022

3956-
if ( null !== $this->tag_name_starts_at ) {
4023+
if ( self::QUIRKS_MODE !== $this->compat_mode ) {
39574024
$this->classname_updates[ $class_name ] = self::REMOVE_CLASS;
4025+
return true;
4026+
}
4027+
4028+
/*
4029+
* Because class names are matched ASCII-case-insensitively in quirks mode,
4030+
* this needs to see if a case variant of the given class name is already
4031+
* enqueued and update that existing entry, if so. This picks the casing of
4032+
* the first-provided class name for all lexical variations.
4033+
*/
4034+
$class_name_length = strlen( $class_name );
4035+
foreach ( $this->classname_updates as $updated_name => $action ) {
4036+
if (
4037+
strlen( $updated_name ) === $class_name_length &&
4038+
0 === substr_compare( $updated_name, $class_name, 0, $class_name_length, true )
4039+
) {
4040+
$this->classname_updates[ $updated_name ] = self::REMOVE_CLASS;
4041+
return true;
4042+
}
39584043
}
39594044

4045+
$this->classname_updates[ $class_name ] = self::REMOVE_CLASS;
39604046
return true;
39614047
}
39624048

@@ -4350,6 +4436,37 @@ public function get_doctype_info(): ?WP_HTML_Doctype_Info {
43504436
*/
43514437
const COMMENT_AS_INVALID_HTML = 'COMMENT_AS_INVALID_HTML';
43524438

4439+
/**
4440+
* No-quirks mode document compatability mode.
4441+
*
4442+
* > In no-quirks mode, the behavior is (hopefully) the desired behavior
4443+
* > described by the modern HTML and CSS specifications.
4444+
*
4445+
* @see self::$compat_mode
4446+
* @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode
4447+
*
4448+
* @since 6.7.0
4449+
*
4450+
* @var string
4451+
*/
4452+
const NO_QUIRKS_MODE = 'no-quirks-mode';
4453+
4454+
/**
4455+
* Quirks mode document compatability mode.
4456+
*
4457+
* > In quirks mode, layout emulates behavior in Navigator 4 and Internet
4458+
* > Explorer 5. This is essential in order to support websites that were
4459+
* > built before the widespread adoption of web standards.
4460+
*
4461+
* @see self::$compat_mode
4462+
* @see https://developer.mozilla.org/en-US/docs/Web/HTML/Quirks_Mode_and_Standards_Mode
4463+
*
4464+
* @since 6.7.0
4465+
*
4466+
* @var string
4467+
*/
4468+
const QUIRKS_MODE = 'quirks-mode';
4469+
43534470
/**
43544471
* Indicates that a span of text may contain any combination of significant
43554472
* kinds of characters: NULL bytes, whitespace, and others.

wp-includes/version.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
*
1717
* @global string $wp_version
1818
*/
19-
$wp_version = '6.7-alpha-58984';
19+
$wp_version = '6.7-alpha-58985';
2020

2121
/**
2222
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.

0 commit comments

Comments
 (0)