Skip to content

Commit 7a56053

Browse files
committed
add extensive smoke testing with generated TOML files for all RBNF locales, fix linting errors
1 parent a4692a9 commit 7a56053

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+29177
-144
lines changed

babel/numbers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,7 +1026,7 @@ def __init__(self, message: str, suggestions: list[str] | None = None) -> None:
10261026

10271027
def spell_number(number, locale=LC_NUMERIC, ruleset=None):
10281028
"""Return value spelled out for a specific locale
1029-
1029+
10301030
:param number: the number to format
10311031
:param locale: the `Locale` object or locale identifier
10321032
:param ruleset: the ruleset to use; defaults to regular numbers.

babel/rbnf.py

Lines changed: 40 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
- http://userguide.icu-project.org/formatparse/numbers/rbnf-examples
1313
- http://source.icu-project.org/repos/icu/trunk/icu4j/demos/src/com/ibm/icu/dev/demo/rbnf/RbnfSampleRuleSets.java
1414
15-
15+
1616
"""
1717
# Dev notes
1818
#
1919
# Reloading cldr:
2020
# python ./scripts/import_cldr.py ./cldr/cldr-core-35.1/common/ -f
21-
#
21+
#
2222
# Tokenization is inspired by Ka-Ping Yee's tokenize library
2323

2424
# Undocumented syntax (←%rule-name←←)
@@ -30,12 +30,12 @@
3030
# http://bugs.icu-project.org/trac/ticket/4039
3131

3232

33-
from dataclasses import dataclass, field
34-
import re
35-
import math
36-
import decimal
3733
import collections
34+
import decimal
35+
import math
36+
import re
3837
import warnings
38+
from dataclasses import dataclass, field
3939

4040
from babel.core import Locale, get_global
4141

@@ -109,17 +109,13 @@ class RulesetSubstitutionWarning(UserWarning):
109109
pass
110110

111111

112-
class RuleMalformationWarning(UserWarning):
113-
pass
114-
115-
116112
TokenInfo = collections.namedtuple('TokenInfo', 'type reference optional')
117113

118114

119115
def tokenize(text):
120116
"""
121117
Each rule has a list of tokens
122-
118+
123119
Text parsed by matching a list of regular expressions
124120
against the beginning of the text. If the regex match
125121
a token is generated, and we continue with the rest of
@@ -130,7 +126,7 @@ def tokenize(text):
130126
end of the optional section no tokens are generated.
131127
Instead, all the tokens inside the optional section are
132128
flagged as optional.
133-
129+
134130
Some of the tokens are referencing other rulesets by name.
135131
This information is stored in the token along with the type
136132
of reference.
@@ -168,7 +164,7 @@ def _gen_token(tok, match, optional):
168164
# remove this if CLDR is updated based on ticket
169165
# http://unicode.org/cldr/trac/ticket/10544
170166
if tok == INTEGRAL_TOKEN and match.group(2) == '←':
171-
warnings.warn('Unsupported syntax ←...←←', SyntaxWarning)
167+
warnings.warn('Unsupported syntax ←...←←', SyntaxWarning, stacklevel=2)
172168

173169
if tok in REFERENCE_TOKENS:
174170
reference = _parse_reference(match.group(1))
@@ -195,11 +191,12 @@ def _parse_reference(string):
195191
return PUBLIC_REF, string[1:]
196192
if string[0] in '0#':
197193
return DECIMAL_REF, string
198-
warnings.warn(f'Reference parsing error: {string}', SyntaxWarning)
194+
warnings.warn(f'Reference parsing error: {string}', SyntaxWarning, stacklevel=3)
199195
return INTERNAL_REF, "" # defaults to this
200196

201197

202198
def compute_divisor(value, radix):
199+
# compute the highest exponent of radix less than or equal to the rule's base
203200
ctx = decimal.Context(prec=20)
204201
if isinstance(value, int):
205202
if value == 0:
@@ -227,7 +224,7 @@ class RuleBasedNumberFormat:
227224
:nothing:
228225
Perform the mathematical operation on the number, and format the
229226
result using the rule set containing the current rule, except:
230-
227+
231228
- You can't have an empty substitution descriptor with
232229
a == substitution.
233230
- If you omit the substitution descriptor in a >> substitution
@@ -273,14 +270,14 @@ def match_ruleset(self, ruleset):
273270
if not ruleset:
274271
raise RulesetNotFound(f"No ordinal ruleset is available for {self._locale}")
275272
if not exact_match:
276-
warnings.warn(f"Using non-specific ordinal ruleset {ruleset}", RulesetSubstitutionWarning)
273+
warnings.warn(f"Using non-specific ordinal ruleset {ruleset}", RulesetSubstitutionWarning, stacklevel=2)
277274
if not ruleset.startswith("spellout-"):
278275
ruleset = "spellout-" + ruleset
279276
ruleset_obj = self.get_ruleset(ruleset)
280277
if not ruleset_obj:
281278
raise RulesetNotFound(
282279
f"Ruleset {ruleset!r} is not one of the ones available for "
283-
f"{self._locale}: {self.available_rulesets!r}"
280+
f"{self._locale}: {self.available_rulesets!r}",
284281
)
285282
return ruleset_obj
286283

@@ -297,8 +294,8 @@ def format(self, number, ruleset=None):
297294

298295
try:
299296
return ruleset.apply(number, self)
300-
except RecursionError:
301-
raise RBNFError(f"Infinite recursion formatting {number} with {ruleset.name}, potentially malformed ruleset!")
297+
except RecursionError as e:
298+
raise RBNFError(f"Infinite recursion formatting {number} with {ruleset.name}, potentially malformed ruleset!") from e
302299

303300
def get_ruleset(self, name):
304301
for r in self.rulesets:
@@ -338,19 +335,19 @@ class Ruleset:
338335
REGULAR (NON-FRACTION) PROCESSING
339336
---------------------------------
340337
If the rule set is a regular rule set, do the following:
341-
338+
342339
MASTER_RULE
343340
If the rule set includes a master rule (and the number was passed in as a
344341
double), use the master rule. (If the number being formatted was passed
345342
in as a long, the master rule is ignored.)
346-
343+
347344
NEGATIVE_NUMBER_RULE
348345
If the number is negative, use the negative-number rule.
349-
346+
350347
IMPROPER_FRACTION_RULE
351348
If the number has a fractional part and is greater than 1, use
352349
the improper fraction rule.
353-
350+
354351
PROPER_FRACTION_RULE
355352
If the number has a fractional part and is between 0 and 1, use
356353
the proper fraction rule.
@@ -360,17 +357,17 @@ class Ruleset:
360357
its base value is not an even multiple of its divisor, and the number
361358
is an even multiple of the rule's divisor, use the rule that precedes
362359
it in the rule list. Otherwise, use the rule itself.
363-
360+
364361
FRACTION PROCESSING
365362
-------------------
366363
If the rule set is a fraction rule set, do the following:
367364
368365
Ignore negative-number and fraction rules.
369-
366+
370367
For each rule in the list, multiply the number being formatted (which
371368
will always be between 0 and 1) by the rule's base value. Keep track
372369
of the distance between the result and the nearest integer.
373-
370+
374371
Use the rule that produced the result closest to zero in the above
375372
calculation. In the event of a tie or a direct hit, use the first
376373
matching rule encountered. (The idea here is to try each rule's base
@@ -403,7 +400,7 @@ class Ruleset:
403400
in the original rule text.
404401
405402
The meanings of the substitution token characters are as follows:
406-
403+
407404
→→ REMAINDER_TOKEN
408405
:in normal rule:
409406
Divide the number by the rule's divisor and format the remainder
@@ -413,7 +410,7 @@ class Ruleset:
413410
Isolate the number's fractional part and format it.
414411
:in rule in fraction rule set:
415412
Not allowed.
416-
413+
417414
→→→ PREVIOUS_TOKEN
418415
:in normal rule:
419416
Divide the number by the rule's divisor and format the
@@ -422,7 +419,7 @@ class Ruleset:
422419
rule list.
423420
:in all other rules:
424421
Not allowed.
425-
422+
426423
←← INTEGRAL_TOKEN
427424
:in normal rule:
428425
Divide the number by the rule's divisor and format the quotient
@@ -432,11 +429,11 @@ class Ruleset:
432429
Isolate the number's integral part and format it.
433430
:in rule in fraction rule set:
434431
Multiply the number by the rule's base value and format the result.
435-
432+
436433
== SUBSTITUTION_TOKEN
437434
:in all rule sets:
438435
Format the number unchanged
439-
436+
440437
[] OPT_START, OPT_END
441438
:in normal rule:
442439
Omit the optional text if the number is an even
@@ -455,7 +452,7 @@ class Ruleset:
455452
:in rule in fraction rule set:
456453
Omit the optional text if multiplying the number by the
457454
rule's base value yields 1.
458-
455+
459456
$(cardinal,plural syntax)$ PLURAL_TOKEN
460457
:in all rule sets:
461458
This provides the ability to choose a word based on the
@@ -464,7 +461,7 @@ class Ruleset:
464461
normally equivalent to the ←← value. This uses the cardinal
465462
plural rules from PluralFormat. All strings used in the
466463
plural format are treated as the same base value for parsing.
467-
464+
468465
$(ordinal,plural syntax)$ PLURAL_TOKEN
469466
:in all rule sets:
470467
This provides the ability to choose a word based on the
@@ -473,11 +470,11 @@ class Ruleset:
473470
normally equivalent to the ←← value. This uses the ordinal
474471
plural rules from PluralFormat. All strings used in the
475472
plural format are treated as the same base value for parsing.
476-
473+
477474
INFINITY_RULE = 'Inf'
478-
475+
479476
NOT_A_NUMBER_RULE = 'NaN'
480-
477+
481478
SPECIAL_FRACTION_RULE = 'x,x' # there are other options but not existent in CLDR
482479
"""
483480

@@ -549,7 +546,7 @@ def apply(self, raw_number, parent, fractional=False, index=None):
549546
if index is None:
550547
# not coming from a PREVIOUS TOKEN
551548
index = self.get_rule_integral(integral)
552-
549+
553550
if index is None:
554551
raise RuleNotFound(f"normal rule for {integral}")
555552
rule = self.rules[index]
@@ -662,7 +659,7 @@ class Rule:
662659

663660
def __init__(self, value, text, radix=None):
664661
"""
665-
divisor : iterator of literal, back_sub, fwd_sub, lit_exact elements parsed from rule
662+
divisor : iterator of literal, back_sub, fwd_sub, lit_exact elements parsed from rule
666663
"""
667664
# TODO handle specials separatelly?
668665
if value in self.specials:
@@ -673,13 +670,14 @@ def __init__(self, value, text, radix=None):
673670
self.divisor = compute_divisor(self.value, int(radix or 10))
674671
self.tokens = list(tokenize(text))
675672
# could not decide if number of substitutions counted with or without optional ones
676-
self.substitutions = len([t for t in self.tokens if t.type in REFERENCE_TOKENS])
673+
# counting optional causes infinite recursion in the `lt` locale
674+
self.substitutions = len([t for t in self.tokens if t.type in REFERENCE_TOKENS if not t.optional])
677675

678676
def apply(self, number, context):
679677
"""
680678
"""
681679
# print(f"RULE {self.value} - divisor: {self.divisor}")
682-
680+
683681
res = []
684682
for t in self.tokens:
685683
if t.optional and not context.omit_optional:
@@ -716,7 +714,7 @@ def apply(self, number, context):
716714
res.append(ruleset.apply(
717715
context.REMAINDER, # number
718716
context.speller,
719-
index=context.previous_rule_index
717+
index=context.previous_rule_index,
720718
))
721719

722720
elif t.type == PLURAL_TOKEN:
@@ -729,7 +727,7 @@ def apply(self, number, context):
729727

730728
else:
731729
raise ValueError(f'unknown token {t}', t)
732-
730+
733731
return ''.join(res)
734732

735733
# TODO create simpler repr and move logic to testing utils
@@ -761,4 +759,3 @@ def return_value_by_type(self, typ: int):
761759
REMAINDER_TOKEN: self.REMAINDER,
762760
SUBSTITUTION_TOKEN: self.SUBSTITUTION,
763761
}[typ]
764-

scripts/import_cldr.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,7 @@ def parse_global(srcdir, sup):
344344
}
345345
territory_languages[territory.attrib['type']] = languages
346346

347-
347+
348348
# To help the negotiation in `babel.numbers.spell_number`
349349
# add all locales with rbnf rules to a list under `rbnf_locales`
350350
filenames = os.listdir(os.path.join(srcdir, 'rbnf'))
@@ -1077,7 +1077,7 @@ def parse_rbnf_rules(data, tree):
10771077
try:
10781078
rule_obj = rbnf.Rule(rule.attrib['value'], rule.text, radix)
10791079
ruleset_obj.rules.append(rule_obj)
1080-
except rbnf.TokenizationError as e:
1080+
except rbnf.TokenizationError:
10811081
log('%s: Unable to parse rule "%s%s: %s "' % (
10821082
data['locale_id'],
10831083
rule.attrib['value'],

tests/conftest.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import pytest
21

32
try:
43
import zoneinfo

0 commit comments

Comments
 (0)