Skip to content

Commit 192fecf

Browse files
committed
rbnf: light clean up
1 parent 6499b12 commit 192fecf

2 files changed

Lines changed: 62 additions & 80 deletions

File tree

babel/rbnf.py

Lines changed: 57 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from __future__ import unicode_literals
3434

3535
import re
36-
import sys
3736
import math
3837
import decimal
3938
import collections
@@ -50,15 +49,18 @@
5049
OPT_START = 7
5150
OPT_END = 8
5251

53-
regex = [
54-
(PLURAL_TOKEN, r"\$\((.+)\)\$"),
55-
(INTEGRAL_TOKEN, r"←([^←[]*)←(←?)"),
56-
(PREVIOUS_TOKEN, r"→→→"),
57-
(REMAINDER_TOKEN, r"→([^→[]*)→"),
58-
(SUBSTITUTION_TOKEN, r"=([^=[]+)="),
59-
(OPT_START, r"\["),
60-
(OPT_END, r"\]"),
61-
(TEXT_TOKEN, r"[^[\]=→←]+"),
52+
token_regexes = [
53+
(t, re.compile(r))
54+
for (t, r) in [
55+
(PLURAL_TOKEN, r"\$\((.+)\)\$"),
56+
(INTEGRAL_TOKEN, r"←([^←[]*)←(←?)"),
57+
(PREVIOUS_TOKEN, r"→→→"),
58+
(REMAINDER_TOKEN, r"→([^→[]*)→"),
59+
(SUBSTITUTION_TOKEN, r"=([^=[]+)="),
60+
(OPT_START, r"\["),
61+
(OPT_END, r"\]"),
62+
(TEXT_TOKEN, r"[^[\]=→←]+"),
63+
]
6264
]
6365

6466
INTERNAL_REF = 1
@@ -76,6 +78,8 @@
7678
INFINITY_RULE = 'Inf'
7779
NOT_A_NUMBER_RULE = 'NaN'
7880
SPECIAL_FRACTION_RULE = 'x,x' # there are other options but not existent in CLDR
81+
82+
7983
# locale.number_symbols['decimal']
8084
# normal rule means a number is specified
8185

@@ -86,9 +90,6 @@ class RulesetNotFound(RBNFError): pass
8690
class RuleNotFound(RBNFError): pass
8791

8892
TokenInfo = collections.namedtuple('TokenInfo', 'type reference optional')
89-
90-
# compile regex
91-
regex_comp = [(t, re.compile(r)) for t, r in regex]
9293

9394

9495
def tokenize(text):
@@ -97,37 +98,41 @@ def tokenize(text):
9798
9899
Text parsed by matching a list of regular expressions
99100
against the beginning of the text. If the regex match
100-
a token is generated and we continue with the rest of
101+
a token is generated, and we continue with the rest of
101102
the text.
102103
103-
Some of the tokens are optional if they are in squared
104-
brackets. From regular expressions for the begining and
104+
Some tokens are optional if they are in squared
105+
brackets. From regular expressions for the beginning and
105106
end of the optional section no tokens are generated.
106-
Instead all the tokens inside the optional section are
107-
flaged as optional.
107+
Instead, all the tokens inside the optional section are
108+
flagged as optional.
108109
109-
Some of them tokens are referencing other rulesets by name
110-
this information is stored in the token along with the type
110+
Some of the tokens are referencing other rulesets by name.
111+
This information is stored in the token along with the type
111112
of reference.
112113
113114
"""
114-
# remove uneccesarry syntax (only used in the non-xml form)
115-
if text.endswith(";"): text = text[:-1]
116-
if text.startswith("'"): text = text[1:]
115+
# remove unnecessary syntax (only used in the non-xml form)
116+
if text.endswith(";"):
117+
text = text[:-1]
118+
if text.startswith("'"):
119+
text = text[1:]
117120

118121
optional = False
119122

120123
while text:
121124
stop = True
122125
# print("TEXT: ", text)
123-
for tok, regex in regex_comp:
126+
for tok, regex in token_regexes:
124127
# print(token, regex)
125128
match = regex.match(text)
126129
if match:
127130
stop = False
128131
text = text[match.end():]
129-
if tok == OPT_START: optional = True
130-
elif tok == OPT_END: optional = False
132+
if tok == OPT_START:
133+
optional = True
134+
elif tok == OPT_END:
135+
optional = False
131136
else:
132137
token = _gen_token(tok, match, optional)
133138
if token:
@@ -138,7 +143,7 @@ def tokenize(text):
138143

139144

140145
def _gen_token(tok, match, optional):
141-
# remove this if CLCR is updated based on ticket
146+
# remove this if CLDR is updated based on ticket
142147
# http://unicode.org/cldr/trac/ticket/10544
143148
if tok == INTEGRAL_TOKEN and match.group(2) == '←':
144149
warnings.warn('Unsupported syntax ←...←←', SyntaxWarning)
@@ -171,13 +176,6 @@ def _parse_reference(string):
171176
return INTERNAL_REF, "" # defaults to this
172177

173178

174-
def untokenize_ICU():
175-
"""
176-
TODO implement ICU style representation
177-
rather make Ruleset.format_icu()
178-
"""
179-
180-
181179
class RuleBasedNumberFormat(object):
182180
"""
183181
RuleBasedNumberFormat's behavior consists of one or more rule sets
@@ -206,6 +204,7 @@ class RuleBasedNumberFormat(object):
206204
the default rule set for this formatter.
207205
"""
208206
group_types = ('SpelloutRules', 'OrdinalRules', 'NumberingSystemRules')
207+
209208
# spell number should go for Spelloutrules
210209
# make interface for the other two groups
211210

@@ -434,15 +433,15 @@ class Ruleset(object):
434433
435434
SPECIAL_FRACTION_RULE = 'x,x' # there are other options but not existent in CLDR
436435
"""
436+
437437
def __init__(self, name, private=False):
438438
self.name = name
439439
self.private = private
440440
self.rules = []
441441

442-
443442
def apply(self, number, parent, fractional=False):
444443
number = decimal.Decimal(str(number))
445-
# str is needed to avoid unecessary precision
444+
# str is needed to avoid unnecessary precision
446445
# decimal is necessary for exact representation in fraction rules
447446

448447
context = {
@@ -451,7 +450,7 @@ def apply(self, number, parent, fractional=False):
451450
'fractional': fractional,
452451
'omit_optional': False, # no default value is defined in the spec
453452
SUBSTITUTION_TOKEN: number,
454-
'remainder_as_fractional': False # format remainder as fractional rule?
453+
'remainder_as_fractional': False # format remainder as fractional rule?
455454
}
456455
integral, remainder = divmod(number, 1)
457456

@@ -469,7 +468,7 @@ def apply(self, number, parent, fractional=False):
469468

470469
# negative number rule
471470
if number < 0:
472-
rule = self.get_rule_special(NEGATIVE_NUMBER_RULE)
471+
rule = self.get_rule_special(NEGATIVE_NUMBER_RULE)
473472
if rule is None:
474473
raise RuleNotFound("negative number rule (%s)" % NEGATIVE_NUMBER_RULE)
475474
context[REMAINDER_TOKEN] = abs(number)
@@ -507,22 +506,20 @@ def apply(self, number, parent, fractional=False):
507506
i, r = divmod(integral, rule.divisor)
508507
context[REMAINDER_TOKEN] = r
509508
context[INTEGRAL_TOKEN] = i
510-
context[PREVIOUS_TOKEN] = index-1 # get rule using ruleset
511-
context['omit_optional'] = r != 0 # only if not even multiple (TODO no need to store separatelly)
509+
context[PREVIOUS_TOKEN] = index - 1 # get rule using ruleset
510+
context['omit_optional'] = r != 0 # only if not even multiple (TODO no need to store separately)
512511
return rule.apply(number, context)
513512

514-
515513
def get_rule_special(self, val, strict=False):
516514
if val in Rule.specials:
517515
for r in self.rules:
518516
if r.value == val:
519517
return r
520-
521-
# return last rule if no match occured and strict is false
518+
519+
# return last rule if no match occurred and strict is false
522520
if not strict:
523521
return self.rules[-1]
524522

525-
526523
def get_rule_integral(self, val):
527524
"""
528525
Binary-search the rule list for the rule with the highest base value
@@ -534,13 +531,13 @@ def get_rule_integral(self, val):
534531
it in the rule list. Otherwise, use the rule itself.
535532
"""
536533
# automatically return last rule if no range matched
537-
ret = len(self.rules)-1
534+
ret = len(self.rules) - 1
538535

539-
for i in range(len(self.rules)-1):
536+
for i in range(len(self.rules) - 1):
540537
if self.rules[i].value in Rule.specials:
541538
continue
542-
543-
if self.rules[i].value <= val < self.rules[i+1].value:
539+
540+
if self.rules[i].value <= val < self.rules[i + 1].value:
544541
ret = i
545542
break
546543

@@ -553,16 +550,15 @@ def get_rule_integral(self, val):
553550

554551
return ret
555552

556-
557553
def get_rule_fractional(self, val):
558554
"""If the rule set is a fraction rule set, do the following:
559555
560556
Ignore negative-number and fraction rules.
561-
557+
562558
For each rule in the list, multiply the number being formatted (which
563559
will always be between 0 and 1) by the rule's base value. Keep track
564560
of the distance between the result and the nearest integer.
565-
561+
566562
Use the rule that produced the result closest to zero in the above
567563
calculation. In the event of a tie or a direct hit, use the first
568564
matching rule encountered. (The idea here is to try each rule's base
@@ -582,35 +578,33 @@ def get_rule_fractional(self, val):
582578
for i, rule in enumerate(self.rules):
583579
if rule.value in Rule.specials or rule.value == 0: # ignore specials and 0 rules
584580
continue
585-
d = abs(round(val*rule.value) - val*rule.value)
581+
d = abs(round(val * rule.value) - val * rule.value)
586582
dists.append((i, d))
587583

588584
# get the index of the closest 0 match
589585
bst = min(dists, key=lambda x: x[1])[0]
590586

591587
# there is a following rule
592-
if len(self.rules) > bst+1 and \
593-
self.rules[bst].value == self.rules[bst+1].value and \
594-
val*self.rules[bst].value > 1:
588+
if len(self.rules) > bst + 1 and \
589+
self.rules[bst].value == self.rules[bst + 1].value and \
590+
val * self.rules[bst].value > 1:
595591
bst += 1
596592

597593
return bst
598594

599-
600595
def __repr__(self):
601-
return 'Ruleset %s %s\n%s\n' % (self.name, self.private, '\n'.join(['\t'+str(r) for r in self.rules]))
596+
return 'Ruleset %s %s\n%s\n' % (self.name, self.private, '\n'.join(['\t' + str(r) for r in self.rules]))
602597

603598

604599
class Rule(object):
605600
"""
606601
base value, a divisor, rule text, and zero, one, or two substitutions.
607602
"""
608-
specials = (
603+
specials = {
609604
NEGATIVE_NUMBER_RULE, IMPROPER_FRACTION_RULE,
610605
PROPER_FRACTION_RULE, MASTER_RULE, INFINITY_RULE,
611606
NOT_A_NUMBER_RULE, SPECIAL_FRACTION_RULE,
612-
)
613-
607+
}
614608

615609
def __init__(self, value, text, radix=None):
616610
"""
@@ -647,8 +641,7 @@ def apply(self, number, context):
647641
ruleset = context['search_at'].get_ruleset(ref)
648642
elif ref_type == DECIMAL_REF:
649643
loc = context['search_at']._locale
650-
x = numbers.format_decimal(number, format=ref, locale=loc)
651-
res.append(x)
644+
res.append(format_decimal(number, format=ref, locale=loc))
652645

653646
if ruleset:
654647
if t.type == REMAINDER_TOKEN and context['remainder_as_fractional']:
@@ -671,10 +664,8 @@ def apply(self, number, context):
671664
else:
672665
raise ValueError('unknown token %s', t)
673666

674-
675667
return ''.join(res)
676668

677-
678669
@property
679670
def divisor(self):
680671
"""it is highest exponent of radix less then or equal to the rules's base"""
@@ -688,16 +679,14 @@ def divisor(self):
688679
def substitutions(self):
689680
return len([t for t in self.tokens if t.type in REFERENCE_TOKENS])
690681

691-
692682
def _parse(self, text):
693683
try:
694684
self.tokens = [t for t in tokenize(text)]
695685
except ValueError:
696-
raise TokenizationError(self.text)
697-
686+
raise TokenizationError(text)
698687

699688
def __repr__(self):
700689
return 'Rule %s (%s) - %s\n%s\n' % (
701690
self.value, self.text,
702691
self.radix,
703-
'\n'.join(['\t\t'+str(t) for t in self.tokens]))
692+
'\n'.join(['\t\t' + str(t) for t in self.tokens]))

0 commit comments

Comments
 (0)