@@ -252,6 +252,10 @@ class UnicodeSet::Lexer {
252252 return U_SUCCESS (errorCode_) && category_ == STRING_LITERAL;
253253 }
254254
255+ bool isNamedElement () const {
256+ return U_SUCCESS (errorCode_) && category_ == NAMED_ELEMENT;
257+ }
258+
255259 bool isBracketedElement () const {
256260 return U_SUCCESS (errorCode_) && category_ == BRACKETED_ELEMENT;
257261 }
@@ -267,20 +271,19 @@ class UnicodeSet::Lexer {
267271
268272 std::optional<UChar32> codePoint () const {
269273 if (U_SUCCESS (errorCode_) && (category_ == LITERAL_ELEMENT || category_ == ESCAPED_ELEMENT ||
270- category_ == BRACKETED_ELEMENT)) {
274+ category_ == BRACKETED_ELEMENT || category_ == NAMED_ELEMENT )) {
271275 return string_.char32At (0 );
272276 }
273277 return std::nullopt ;
274278 }
275279
276- // If `*this` is a valid property-query, named-element, or set-valued-variable, returns the
277- // set represented by this lexical element, which lives at least as long as `*this`. Null
278- // otherwise.
280+ // If `*this` is a valid property-query or set-valued-variable, returns the set represented
281+ // by this lexical element, which lives at least as long as `*this`. Null otherwise.
279282 const UnicodeSet *set () const {
280283 if (U_FAILURE (errorCode_)) {
281284 return nullptr ;
282285 }
283- if (category_ == PROPERTY_QUERY || category_ == NAMED_ELEMENT || category_ == VARIABLE) {
286+ if (category_ == PROPERTY_QUERY || category_ == VARIABLE) {
284287 if (precomputedSet_ != nullptr ) {
285288 return precomputedSet_;
286289 } else {
@@ -453,11 +456,11 @@ class UnicodeSet::Lexer {
453456 if ((first == u' [' && second == u' :' ) ||
454457 (first == u' \\ ' && (second == u' p' || second == u' P' || second == u' N' ))) {
455458 if (second == u' N' ) {
456- UnicodeSet const queryResult = scanNamedElementBrackets (errorCode);
459+ UChar32 const queryResult = scanNamedElementBrackets (errorCode);
457460 return LexicalElement (
458- LexicalElement::NAMED_ELEMENT, {} , getPos (), errorCode,
461+ LexicalElement::NAMED_ELEMENT, UnicodeString (queryResult) , getPos (), errorCode,
459462 /* precomputedSet=*/ nullptr ,
460- /* set=*/ std::move (queryResult) ,
463+ /* set=*/ {} ,
461464 std::u16string_view (pattern_).substr (start, parsePosition_.getIndex () - start));
462465 } else {
463466 UnicodeSet queryResult = scanPropertyQueryAfterStart (first, second, start, errorCode);
@@ -523,8 +526,24 @@ class UnicodeSet::Lexer {
523526 UChar32 next;
524527 int32_t codePointCount = 0 ;
525528 while (!chars_.atEnd () && U_SUCCESS (errorCode)) {
526- // TODO(egg): Propose making this space-sensitive and recognizing named-element.
527- next = chars_.next (charsOptions_, escaped, errorCode);
529+ // TODO(egg): Propose making this space-sensitive.
530+ const RuleCharacterIterator::Pos beforeNext = getPos ();
531+ next = chars_.next (charsOptions_ & ~RuleCharacterIterator::PARSE_ESCAPES,
532+ unusedEscaped, errorCode);
533+ if (next == u' \\ ' ) {
534+ if (chars_.next (charsOptions_ & ~(RuleCharacterIterator::PARSE_ESCAPES |
535+ RuleCharacterIterator::SKIP_WHITESPACE),
536+ unusedEscaped, errorCode) == u' N' ) {
537+ next = scanNamedElementBrackets (errorCode);
538+ escaped = true ;
539+ } else {
540+ chars_.setPos (beforeNext);
541+ // Parse the escape.
542+ next = chars_.next (charsOptions_, escaped, errorCode);
543+ }
544+ } else {
545+ escaped = false ;
546+ }
528547 if (!escaped && next == u' }' ) {
529548 return LexicalElement (
530549 codePointCount == 1 ? LexicalElement::BRACKETED_ELEMENT
@@ -551,8 +570,7 @@ class UnicodeSet::Lexer {
551570 }
552571 }
553572
554- // TODO(egg): Return UChar32 per ICU-TC 2025-12-11.
555- UnicodeSet scanNamedElementBrackets (UErrorCode &errorCode) {
573+ UChar32 scanNamedElementBrackets (UErrorCode &errorCode) {
556574 UBool unusedEscaped;
557575 const UChar32 open = chars_.next (charsOptions_ & ~(RuleCharacterIterator::PARSE_ESCAPES |
558576 RuleCharacterIterator::SKIP_WHITESPACE),
@@ -571,7 +589,7 @@ class UnicodeSet::Lexer {
571589 errorCode);
572590 result.setPattern (
573591 pattern_.tempSubStringBetween (nameStart - 3 , parsePosition_.getIndex ()));
574- return result;
592+ return result. charAt ( 0 ) ;
575593 }
576594 }
577595 }
@@ -1058,6 +1076,11 @@ void UnicodeSet::parseElements(Lexer &lexer,
10581076 // Element ::= RangeElement
10591077 // | string-literal
10601078 // | bracketed-element
1079+ // RangeElement ::= literal-element
1080+ // | escaped-element
1081+ // | named-element
1082+ // codePoint().has_value() on a lexical element if it is a RangeElement or a bracketed-element (which
1083+ // should become a RangeElement, but this will take another ICU proposal).
10611084 if (lexer.lookahead ().isBracketedElement () || lexer.lookahead ().isStringLiteral ()) {
10621085 add (*lexer.lookahead ().element ());
10631086 rebuiltPat.append (u' {' );
@@ -1107,7 +1130,7 @@ void UnicodeSet::parseElements(Lexer &lexer,
11071130 lexer.lookahead2 ().debugString (),
11081131 lexer, ec);
11091132 }
1110- } else if (lexer.lookahead ().codePoint ().has_value ()) {
1133+ } else if (lexer.lookahead ().codePoint ().has_value () && !lexer. lookahead (). isBracketedElement () ) {
11111134 last = *lexer.lookahead ().codePoint ();
11121135 } else {
11131136 U_UNICODESET_RETURN_WITH_PARSE_ERROR (" RangeElement" , lexer.lookahead ().debugString (), lexer, ec);
0 commit comments