@@ -399,7 +399,7 @@ private RegExpTree parseCharset() {
399399 CharRanges ieExplicits = CharRanges .EMPTY ;
400400 while (pos < limit && pattern .charAt (pos ) != ']' ) {
401401 char ch = pattern .charAt (pos );
402- char start ;
402+ int start ;
403403 if (ch == '\\' ) {
404404 ++pos ;
405405 char possibleGroupName = pattern .charAt (pos );
@@ -414,7 +414,7 @@ private RegExpTree parseCharset() {
414414 start = ch ;
415415 ++pos ;
416416 }
417- char end = start ;
417+ int end = start ;
418418 if (pos + 1 < limit && pattern .charAt (pos ) == '-'
419419 && pattern .charAt (pos + 1 ) != ']' ) {
420420 ++pos ;
@@ -459,20 +459,22 @@ private RegExpTree parseCharset() {
459459 }
460460
461461 /**
462- * Parses an escape to a code point.
463- * Some of the characters parsed here have special meanings in various
464- * contexts, so contexts must filter those instead.
465- * E.g. '\b' means a different thing inside a charset than without.
462+ * Parses an escape to a code point. Some of the characters parsed here have special meanings
463+ * in various contexts, so contexts must filter those instead. E.g. '\b' means a different
464+ * thing inside a charset than without.
466465 */
467- private char parseEscapeChar () {
466+ private int parseEscapeChar () {
468467 char ch = pattern .charAt (pos ++);
469468 switch (ch ) {
470469 case 'b' : return '\b' ;
471470 case 'f' : return '\f' ;
472471 case 'n' : return '\n' ;
473472 case 'r' : return '\r' ;
474473 case 't' : return '\t' ;
475- case 'u' : return parseHex (4 );
474+ case 'u' :
475+ return (flags .contains ("u" ) && pos < limit && pattern .charAt (pos ) == '{' )
476+ ? parseBracedUnicodeEscape ()
477+ : parseHex (4 );
476478 case 'v' : return '\u000b' ;
477479 case 'x' : return parseHex (2 );
478480 default :
@@ -599,18 +601,23 @@ private RegExpTree parseEscape() {
599601 ++pos ;
600602 return new Charset (charGroup , CharRanges .EMPTY );
601603 }
602- return new Text ("" + parseEscapeChar ());
604+ return new Text (new String ( Character . toChars ( parseEscapeChar ()) ));
603605 }
604606 }
605607
606- /**
607- * Parses n hex digits to a code-unit.
608- */
609- private char parseHex (int n ) {
608+ /** Parses n hex digits to a code-unit. */
609+ private int parseHex (int n ) {
610610 if (pos + n > limit ) {
611611 throw new IllegalArgumentException (
612612 "Abbreviated hex escape " + pattern .substring (pos ));
613613 }
614+ if (n > 7 ) {
615+ // We need to guard the MSB to prevent overflow.
616+ throw new IllegalArgumentException (
617+ "Cannot parse hexadecimal encoding wider than 28 bits: "
618+ + pattern .substring (pos , pos + n ));
619+ }
620+
614621 int result = 0 ;
615622 while (--n >= 0 ) {
616623 char ch = pattern .charAt (pos );
@@ -627,7 +634,31 @@ private char parseHex(int n) {
627634 ++pos ;
628635 result = (result << 4 ) | digit ;
629636 }
630- return (char ) result ;
637+ return result ;
638+ }
639+
640+ private int parseBracedUnicodeEscape () {
641+ int openBrace = pos ;
642+ checkState (pattern .charAt (pos ++) == '{' );
643+
644+ int closeBrace = pos ;
645+ while (closeBrace < limit && pattern .charAt (closeBrace ) != '}' ) {
646+ closeBrace ++;
647+ }
648+ if (closeBrace == limit ) {
649+ throw new IllegalArgumentException (
650+ "Malformed unicode escape: expected '}' after " + pattern .substring (openBrace ));
651+ } else if (closeBrace == pos ) {
652+ throw new IllegalArgumentException ("Empty unicode escape" );
653+ }
654+
655+ int result = parseHex (closeBrace - pos );
656+ if (result > 0x10FFFF ) {
657+ throw new IllegalArgumentException (
658+ "Unicode must be at most 0x10FFFF: " + pattern .substring (openBrace + 1 , pos ));
659+ }
660+ pos ++; // Consume the close brace.
661+ return result ;
631662 }
632663
633664 private boolean isRepetitionStart (char ch ) {
0 commit comments