diff --git a/src/wp-includes/compat.php b/src/wp-includes/compat.php index 84e611faee5e8..0cf4edd91613f 100644 --- a/src/wp-includes/compat.php +++ b/src/wp-includes/compat.php @@ -581,6 +581,86 @@ function array_last( array $array ) { // phpcs:ignore Universal.NamingConvention } } +if ( ! function_exists( 'mb_trim' ) ) { + /** + * Polyfill for `mb_trim()` function added in PHP 8.4. + * + * Trims whitespace from the beginning and end of a string. + * + * @since 6.9.0 + * + * @param string $string The string to trim. + * @param string|null $characters Optional. The characters to trim from the string. + * Without the second parameter, mb_trim() will strip these characters: + * - " " (Unicode U+0020), an ordinary space. + * - "\t" (Unicode U+0009), a tab. + * - "\n" (Unicode U+000A), a new line (line feed). + * - "\r" (Unicode U+000D), a carriage return. + * - "\0" (Unicode U+0000), the NUL-byte. + * - "\v" (Unicode U+000B), a vertical tab. + * - "\f" (Unicode U+000C), a form feed. + * - "\u00A0" (Unicode U+00A0), a NO-BREAK SPACE. + * - "\u1680" (Unicode U+1680), an OGHAM SPACE MARK. + * - "\u2000" (Unicode U+2000), an EN QUAD. + * - "\u2001" (Unicode U+2001), an EM QUAD. + * - "\u2002" (Unicode U+2002), an EN SPACE. + * - "\u2003" (Unicode U+2003), an EM SPACE. + * - "\u2004" (Unicode U+2004), a THREE-PER-EM SPACE. + * - "\u2005" (Unicode U+2005), a FOUR-PER-EM SPACE. + * - "\u2006" (Unicode U+2006), a SIX-PER-EM SPACE. + * - "\u2007" (Unicode U+2007), a FIGURE SPACE. + * - "\u2008" (Unicode U+2008), a PUNCTUATION SPACE. + * - "\u2009" (Unicode U+2009), a THIN SPACE. + * - "\u200A" (Unicode U+200A), a HAIR SPACE. + * - "\u2028" (Unicode U+2028), a LINE SEPARATOR. + * - "\u2029" (Unicode U+2029), a PARAGRAPH SEPARATOR. + * - "\u202F" (Unicode U+202F), a NARROW NO-BREAK SPACE. + * - "\u205F" (Unicode U+205F), a MEDIUM MATHEMATICAL SPACE. + * - "\u3000" (Unicode U+3000), an IDEOGRAPHIC SPACE. + * - "\u0085" (Unicode U+0085), a NEXT LINE (NEL). + * - "\u180E" (Unicode U+180E), a MONGOLIAN VOWEL SEPARATOR. + * @param string|null $encoding Optional. The encoding parameter is the character encoding. If it is omitted or null, the internal character encoding value will be used. + * @return string The trimmed string. + */ + function mb_trim( string $str, ?string $characters = null, ?string $encoding = null ) { + if ( is_null( $characters ) ) { + $characters = " \t\n\r\0\v\f\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u180E"; + } + + if ( is_null( $encoding ) ) { + $encoding = mb_internal_encoding(); + } + + if ( ! mb_check_encoding( '', $encoding ) ) { + return $str; // If the encoding is invalid, return the original string. + } + + if ( '' === $characters ) { + return $str; + } + + if ( 'UTF-8' !== $encoding ) { + $characters = mb_convert_encoding( $characters, 'UTF-8', $encoding ); + $str = mb_convert_encoding( $str, 'UTF-8', $encoding ); + } + + // Use preg_replace to trim the characters from both ends of the string. + $pattern = '/^[' . preg_quote( $characters, '/' ) . ']+|[' . preg_quote( $characters, '/' ) . ']+$/uD'; + $trimmed_string = preg_replace( $pattern, '', $str ); + + if ( false === $trimmed_string ) { + return $str; // If preg_replace fails, return the original string. + } + + // Convert back to the original encoding if it was not UTF-8. + if ( 'UTF-8' !== $encoding ) { + $trimmed_string = mb_convert_encoding( $trimmed_string, $encoding, 'UTF-8' ); + } + + return $trimmed_string; + } +} + // IMAGETYPE_AVIF constant is only defined in PHP 8.x or later. if ( ! defined( 'IMAGETYPE_AVIF' ) ) { define( 'IMAGETYPE_AVIF', 19 ); diff --git a/src/wp-includes/formatting.php b/src/wp-includes/formatting.php index 72fcb4e554c38..20542a8103951 100644 --- a/src/wp-includes/formatting.php +++ b/src/wp-includes/formatting.php @@ -6443,3 +6443,32 @@ function maybe_hash_hex_color( $color ) { return $color; } + +/** + * Global variable containing the characters to trim from the beginning and end of a string. + * + * This variable is used by the `js_trim()` function to define which characters + * should be trimmed from a string. It includes common whitespace characters + * as well as some Unicode whitespace characters supported by JavaScript. + * + * @since 6.9.0 + * + * @var string + */ +$js_trimmables = "\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{FEFF}"; + +/** + * Trims whitespace from the beginning and end of a string. + * + * This function is similar to `trim()`, but it uses a custom set of characters + * defined in the global `$js_trimmables` variable. + * + * @since 6.9.0 + * + * @param string $string The string to trim. + * @return string The trimmed string. + */ +function js_trim( $string ) { + global $js_trimmables; + return mb_trim( $string, $js_trimmables, 'UTF-8' ); +} diff --git a/tests/phpunit/tests/compat/mbTrim.php b/tests/phpunit/tests/compat/mbTrim.php new file mode 100644 index 0000000000000..6f51bdccf0d9c --- /dev/null +++ b/tests/phpunit/tests/compat/mbTrim.php @@ -0,0 +1,101 @@ +assertTrue( function_exists( 'mb_trim' ) ); + } + + /** + * @ticket 63804 + * + * @dataProvider data_mb_trim + * + * @param string $input The input string to be trimmed. + * @param string $expected The expected trimmed result. + * @param string|null $characters Optional. The characters to trim. Default null (whitespace). + * @param string|null $encoding Optional. The character encoding. Default null (internal encoding). + */ + public function test_mb_trim( $input, $expected, $characters = null, $encoding = null ): void { + $this->assertSame( + $expected, + mb_trim( $input, $characters, $encoding ) + ); + } + + /** + * Data provider for mb_trim tests. + * + * @return array[] + */ + public function data_mb_trim(): array { + return array( + // Basic ASCII whitespace. + array( ' hello ', 'hello' ), + array( "\t\n\rhello\n\r\t", 'hello' ), + // Unicode whitespace. + array( "\u{00A0}hello\u{00A0}", 'hello' ), + array( "\u{3000}hello\u{3000}", 'hello' ), + array( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}", 'hello' ), + // Custom characters. + array( 'xxhelloxx', 'hello', 'x' ), + array( 'xyhelloyx', 'hello', 'xy' ), + // No trimming needed. + array( 'hello', 'hello' ), + // Empty string. + array( '', '' ), + // With encoding. + array( ' hello ', 'hello', null, 'UTF-8' ), + // Null characters. + array( "\0hello\0", 'hello' ), + // Vertical tab and form feed. + array( "\v\fhello\f\v", 'hello' ), + ); + } + + /** + * @ticket 63804 + * + * @dataProvider data_mb_trim_non_utf8 + * + * @param string $input The input string to be trimmed. + * @param string $expected The expected trimmed result. + * @param string $encoding The character encoding. + */ + public function test_mb_trim_non_utf8_encodings( $input, $expected, $encoding ): void { + $this->assertSame( + $expected, + mb_trim( $input, null, $encoding ) + ); + } + + /** + * Data provider for non-UTF-8 encoding tests. + * + * @return array[] + */ + public function data_mb_trim_non_utf8(): array { + // Japanese "ヒス" (HIS) in Shift_JIS, with ASCII spaces around. + $shift_jis_str = mb_convert_encoding( ' ヒス ', 'SJIS', 'UTF-8' ); + $shift_jis_expected = mb_convert_encoding( 'ヒス', 'SJIS', 'UTF-8' ); + + // Latin1 example with spaces. + $latin1_str = mb_convert_encoding( ' café ', 'ISO-8859-1', 'UTF-8' ); + $latin1_expected = mb_convert_encoding( 'café', 'ISO-8859-1', 'UTF-8' ); + + return array( + array( $shift_jis_str, $shift_jis_expected, 'SJIS' ), + array( $latin1_str, $latin1_expected, 'ISO-8859-1' ), + ); + } +} diff --git a/tests/phpunit/tests/formatting/jsTrim.php b/tests/phpunit/tests/formatting/jsTrim.php new file mode 100644 index 0000000000000..ff19cdbe7a0bd --- /dev/null +++ b/tests/phpunit/tests/formatting/jsTrim.php @@ -0,0 +1,55 @@ +assertTrue( function_exists( 'js_trim' ) ); + } + + /** + * @ticket 63804 + * + * @dataProvider data_js_trim + * + * @param string $input The input string to be trimmed. + * @param string $expected The expected trimmed result. + */ + public function test_js_trim( $input, $expected ): void { + $this->assertSame( $expected, js_trim( $input ) ); + } + + /** + * Data provider for js_trim tests. + * + * @return array[] + */ + public function data_js_trim(): array { + return array( + // Basic ASCII whitespace. + array( ' hello ', 'hello' ), + array( "\t\n\rhello\n\r\t", 'hello' ), + // Unicode whitespace. + array( "\u{00A0}hello\u{00A0}", 'hello' ), + array( "\u{3000}hello\u{3000}", 'hello' ), + array( "\u{00A0}\u{3000} hello \u{3000}\u{00A0}", 'hello' ), + // Null characters should not be trimmed by js_trim(). + array( "\0hello\0", "\0hello\0" ), + // Vertical tab and form feed are trimmed. + array( "\v\fhello\f\v", 'hello' ), + // No trimming needed. + array( 'hello', 'hello' ), + // Empty string. + array( '', '' ), + ); + } +}