1212- http://userguide.icu-project.org/formatparse/numbers/rbnf-examples
1313- http://source.icu-project.org/repos/icu/trunk/icu4j/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java
1414
15-
15+
1616"""
1717# Dev notes
1818#
1919# Reloading cldr:
2020# python ./scripts/import_cldr.py ./cldr/cldr-core-35.1/common/ -f
21- #
21+ #
2222# Tokenization is inspired by Ka-Ping Yee's tokenize library
2323
2424# Undocumented syntax (←%rule-name←←)
3030# http://bugs.icu-project.org/trac/ticket/4039
3131
3232
33- from dataclasses import dataclass , field
34- import re
35- import math
36- import decimal
3733import collections
34+ import decimal
35+ import math
36+ import re
3837import warnings
38+ from dataclasses import dataclass , field
3939
4040from babel .core import Locale , get_global
4141
@@ -109,17 +109,13 @@ class RulesetSubstitutionWarning(UserWarning):
109109 pass
110110
111111
112- class RuleMalformationWarning (UserWarning ):
113- pass
114-
115-
116112TokenInfo = collections .namedtuple ('TokenInfo' , 'type reference optional' )
117113
118114
119115def tokenize (text ):
120116 """
121117 Each rule has a list of tokens
122-
118+
123119 Text parsed by matching a list of regular expressions
124120 against the beginning of the text. If the regex match
125121 a token is generated, and we continue with the rest of
@@ -130,7 +126,7 @@ def tokenize(text):
130126 end of the optional section no tokens are generated.
131127 Instead, all the tokens inside the optional section are
132128 flagged as optional.
133-
129+
134130 Some of the tokens are referencing other rulesets by name.
135131 This information is stored in the token along with the type
136132 of reference.
@@ -168,7 +164,7 @@ def _gen_token(tok, match, optional):
168164 # remove this if CLDR is updated based on ticket
169165 # http://unicode.org/cldr/trac/ticket/10544
170166 if tok == INTEGRAL_TOKEN and match .group (2 ) == '←' :
171- warnings .warn ('Unsupported syntax ←...←←' , SyntaxWarning )
167+ warnings .warn ('Unsupported syntax ←...←←' , SyntaxWarning , stacklevel = 2 )
172168
173169 if tok in REFERENCE_TOKENS :
174170 reference = _parse_reference (match .group (1 ))
@@ -195,11 +191,12 @@ def _parse_reference(string):
195191 return PUBLIC_REF , string [1 :]
196192 if string [0 ] in '0#' :
197193 return DECIMAL_REF , string
198- warnings .warn (f'Reference parsing error: { string } ' , SyntaxWarning )
194+ warnings .warn (f'Reference parsing error: { string } ' , SyntaxWarning , stacklevel = 3 )
199195 return INTERNAL_REF , "" # defaults to this
200196
201197
202198def compute_divisor (value , radix ):
199+ # compute the highest exponent of radix less than or equal to the rule's base
203200 ctx = decimal .Context (prec = 20 )
204201 if isinstance (value , int ):
205202 if value == 0 :
@@ -227,7 +224,7 @@ class RuleBasedNumberFormat:
227224 :nothing:
228225 Perform the mathematical operation on the number, and format the
229226 result using the rule set containing the current rule, except:
230-
227+
231228 - You can't have an empty substitution descriptor with
232229 a == substitution.
233230 - If you omit the substitution descriptor in a >> substitution
@@ -273,14 +270,14 @@ def match_ruleset(self, ruleset):
273270 if not ruleset :
274271 raise RulesetNotFound (f"No ordinal ruleset is available for { self ._locale } " )
275272 if not exact_match :
276- warnings .warn (f"Using non-specific ordinal ruleset { ruleset } " , RulesetSubstitutionWarning )
273+ warnings .warn (f"Using non-specific ordinal ruleset { ruleset } " , RulesetSubstitutionWarning , stacklevel = 2 )
277274 if not ruleset .startswith ("spellout-" ):
278275 ruleset = "spellout-" + ruleset
279276 ruleset_obj = self .get_ruleset (ruleset )
280277 if not ruleset_obj :
281278 raise RulesetNotFound (
282279 f"Ruleset { ruleset !r} is not one of the ones available for "
283- f"{ self ._locale } : { self .available_rulesets !r} "
280+ f"{ self ._locale } : { self .available_rulesets !r} " ,
284281 )
285282 return ruleset_obj
286283
@@ -297,8 +294,8 @@ def format(self, number, ruleset=None):
297294
298295 try :
299296 return ruleset .apply (number , self )
300- except RecursionError :
301- raise RBNFError (f"Infinite recursion formatting { number } with { ruleset .name } , potentially malformed ruleset!" )
297+ except RecursionError as e :
298+ raise RBNFError (f"Infinite recursion formatting { number } with { ruleset .name } , potentially malformed ruleset!" ) from e
302299
303300 def get_ruleset (self , name ):
304301 for r in self .rulesets :
@@ -338,19 +335,19 @@ class Ruleset:
338335 REGULAR (NON-FRACTION) PROCESSING
339336 ---------------------------------
340337 If the rule set is a regular rule set, do the following:
341-
338+
342339 MASTER_RULE
343340 If the rule set includes a master rule (and the number was passed in as a
344341 double), use the master rule. (If the number being formatted was passed
345342 in as a long, the master rule is ignored.)
346-
343+
347344 NEGATIVE_NUMBER_RULE
348345 If the number is negative, use the negative-number rule.
349-
346+
350347 IMPROPER_FRACTION_RULE
351348 If the number has a fractional part and is greater than 1, use
352349 the improper fraction rule.
353-
350+
354351 PROPER_FRACTION_RULE
355352 If the number has a fractional part and is between 0 and 1, use
356353 the proper fraction rule.
@@ -360,17 +357,17 @@ class Ruleset:
360357 its base value is not an even multiple of its divisor, and the number
361358 is an even multiple of the rule's divisor, use the rule that precedes
362359 it in the rule list. Otherwise, use the rule itself.
363-
360+
364361 FRACTION PROCESSING
365362 -------------------
366363 If the rule set is a fraction rule set, do the following:
367364
368365 Ignore negative-number and fraction rules.
369-
366+
370367 For each rule in the list, multiply the number being formatted (which
371368 will always be between 0 and 1) by the rule's base value. Keep track
372369 of the distance between the result and the nearest integer.
373-
370+
374371 Use the rule that produced the result closest to zero in the above
375372 calculation. In the event of a tie or a direct hit, use the first
376373 matching rule encountered. (The idea here is to try each rule's base
@@ -403,7 +400,7 @@ class Ruleset:
403400 in the original rule text.
404401
405402 The meanings of the substitution token characters are as follows:
406-
403+
407404 →→ REMAINDER_TOKEN
408405 :in normal rule:
409406 Divide the number by the rule's divisor and format the remainder
@@ -413,7 +410,7 @@ class Ruleset:
413410 Isolate the number's fractional part and format it.
414411 :in rule in fraction rule set:
415412 Not allowed.
416-
413+
417414 →→→ PREVIOUS_TOKEN
418415 :in normal rule:
419416 Divide the number by the rule's divisor and format the
@@ -422,7 +419,7 @@ class Ruleset:
422419 rule list.
423420 :in all other rules:
424421 Not allowed.
425-
422+
426423 ←← INTEGRAL_TOKEN
427424 :in normal rule:
428425 Divide the number by the rule's divisor and format the quotient
@@ -432,11 +429,11 @@ class Ruleset:
432429 Isolate the number's integral part and format it.
433430 :in rule in fraction rule set:
434431 Multiply the number by the rule's base value and format the result.
435-
432+
436433 == SUBSTITUTION_TOKEN
437434 :in all rule sets:
438435 Format the number unchanged
439-
436+
440437 [] OPT_START, OPT_END
441438 :in normal rule:
442439 Omit the optional text if the number is an even
@@ -455,7 +452,7 @@ class Ruleset:
455452 :in rule in fraction rule set:
456453 Omit the optional text if multiplying the number by the
457454 rule's base value yields 1.
458-
455+
459456 $(cardinal,plural syntax)$ PLURAL_TOKEN
460457 :in all rule sets:
461458 This provides the ability to choose a word based on the
@@ -464,7 +461,7 @@ class Ruleset:
464461 normally equivalent to the ←← value. This uses the cardinal
465462 plural rules from PluralFormat. All strings used in the
466463 plural format are treated as the same base value for parsing.
467-
464+
468465 $(ordinal,plural syntax)$ PLURAL_TOKEN
469466 :in all rule sets:
470467 This provides the ability to choose a word based on the
@@ -473,11 +470,11 @@ class Ruleset:
473470 normally equivalent to the ←← value. This uses the ordinal
474471 plural rules from PluralFormat. All strings used in the
475472 plural format are treated as the same base value for parsing.
476-
473+
477474 INFINITY_RULE = 'Inf'
478-
475+
479476 NOT_A_NUMBER_RULE = 'NaN'
480-
477+
481478 SPECIAL_FRACTION_RULE = 'x,x' # there are other options but not existent in CLDR
482479 """
483480
@@ -549,7 +546,7 @@ def apply(self, raw_number, parent, fractional=False, index=None):
549546 if index is None :
550547 # not coming from a PREVIOUS TOKEN
551548 index = self .get_rule_integral (integral )
552-
549+
553550 if index is None :
554551 raise RuleNotFound (f"normal rule for { integral } " )
555552 rule = self .rules [index ]
@@ -662,7 +659,7 @@ class Rule:
662659
663660 def __init__ (self , value , text , radix = None ):
664661 """
665- divisor : iterator of literal, back_sub, fwd_sub, lit_exact elements parsed from rule
662+ divisor : iterator of literal, back_sub, fwd_sub, lit_exact elements parsed from rule
666663 """
667664 # TODO handle specials separatelly?
668665 if value in self .specials :
@@ -673,13 +670,14 @@ def __init__(self, value, text, radix=None):
673670 self .divisor = compute_divisor (self .value , int (radix or 10 ))
674671 self .tokens = list (tokenize (text ))
675672 # could not decide if number of substitutions counted with or without optional ones
676- self .substitutions = len ([t for t in self .tokens if t .type in REFERENCE_TOKENS ])
673+ # counting optional causes infinite recursion in the `lt` locale
674+ self .substitutions = len ([t for t in self .tokens if t .type in REFERENCE_TOKENS if not t .optional ])
677675
678676 def apply (self , number , context ):
679677 """
680678 """
681679 # print(f"RULE {self.value} - divisor: {self.divisor}")
682-
680+
683681 res = []
684682 for t in self .tokens :
685683 if t .optional and not context .omit_optional :
@@ -716,7 +714,7 @@ def apply(self, number, context):
716714 res .append (ruleset .apply (
717715 context .REMAINDER , # number
718716 context .speller ,
719- index = context .previous_rule_index
717+ index = context .previous_rule_index ,
720718 ))
721719
722720 elif t .type == PLURAL_TOKEN :
@@ -729,7 +727,7 @@ def apply(self, number, context):
729727
730728 else :
731729 raise ValueError (f'unknown token { t } ' , t )
732-
730+
733731 return '' .join (res )
734732
735733 # TODO create simpler repr and move logic to testing utils
@@ -761,4 +759,3 @@ def return_value_by_type(self, typ: int):
761759 REMAINDER_TOKEN : self .REMAINDER ,
762760 SUBSTITUTION_TOKEN : self .SUBSTITUTION ,
763761 }[typ ]
764-
0 commit comments