3333from __future__ import unicode_literals
3434
3535import re
36- import sys
3736import math
3837import decimal
3938import collections
5049OPT_START = 7
5150OPT_END = 8
5251
53- regex = [
54- (PLURAL_TOKEN , r"\$\((.+)\)\$" ),
55- (INTEGRAL_TOKEN , r"←([^←[]*)←(←?)" ),
56- (PREVIOUS_TOKEN , r"→→→" ),
57- (REMAINDER_TOKEN , r"→([^→[]*)→" ),
58- (SUBSTITUTION_TOKEN , r"=([^=[]+)=" ),
59- (OPT_START , r"\[" ),
60- (OPT_END , r"\]" ),
61- (TEXT_TOKEN , r"[^[\]=→←]+" ),
52+ token_regexes = [
53+ (t , re .compile (r ))
54+ for (t , r ) in [
55+ (PLURAL_TOKEN , r"\$\((.+)\)\$" ),
56+ (INTEGRAL_TOKEN , r"←([^←[]*)←(←?)" ),
57+ (PREVIOUS_TOKEN , r"→→→" ),
58+ (REMAINDER_TOKEN , r"→([^→[]*)→" ),
59+ (SUBSTITUTION_TOKEN , r"=([^=[]+)=" ),
60+ (OPT_START , r"\[" ),
61+ (OPT_END , r"\]" ),
62+ (TEXT_TOKEN , r"[^[\]=→←]+" ),
63+ ]
6264]
6365
6466INTERNAL_REF = 1
7678INFINITY_RULE = 'Inf'
7779NOT_A_NUMBER_RULE = 'NaN'
7880SPECIAL_FRACTION_RULE = 'x,x' # there are other options but not existent in CLDR
81+
82+
7983# locale.number_symbols['decimal']
8084# normal rule means a number is specified
8185
@@ -86,9 +90,6 @@ class RulesetNotFound(RBNFError): pass
8690class RuleNotFound (RBNFError ): pass
8791
8892TokenInfo = collections .namedtuple ('TokenInfo' , 'type reference optional' )
89-
90- # compile regex
91- regex_comp = [(t , re .compile (r )) for t , r in regex ]
9293
9394
9495def tokenize (text ):
@@ -97,37 +98,41 @@ def tokenize(text):
9798
9899 Text parsed by matching a list of regular expressions
99100 against the beginning of the text. If the regex match
100- a token is generated and we continue with the rest of
101+ a token is generated, and we continue with the rest of
101102 the text.
102103
103- Some of the tokens are optional if they are in squared
104- brackets. From regular expressions for the begining and
104+ Some tokens are optional if they are in squared
105+ brackets. From regular expressions for the beginning and
105106 end of the optional section no tokens are generated.
106- Instead all the tokens inside the optional section are
107- flaged as optional.
107+ Instead, all the tokens inside the optional section are
108+ flagged as optional.
108109
109- Some of them tokens are referencing other rulesets by name
110- this information is stored in the token along with the type
110+ Some of the tokens are referencing other rulesets by name.
111+ This information is stored in the token along with the type
111112 of reference.
112113
113114 """
114- # remove uneccesarry syntax (only used in the non-xml form)
115- if text .endswith (";" ): text = text [:- 1 ]
116- if text .startswith ("'" ): text = text [1 :]
115+ # remove unnecessary syntax (only used in the non-xml form)
116+ if text .endswith (";" ):
117+ text = text [:- 1 ]
118+ if text .startswith ("'" ):
119+ text = text [1 :]
117120
118121 optional = False
119122
120123 while text :
121124 stop = True
122125 # print("TEXT: ", text)
123- for tok , regex in regex_comp :
126+ for tok , regex in token_regexes :
124127 # print(token, regex)
125128 match = regex .match (text )
126129 if match :
127130 stop = False
128131 text = text [match .end ():]
129- if tok == OPT_START : optional = True
130- elif tok == OPT_END : optional = False
132+ if tok == OPT_START :
133+ optional = True
134+ elif tok == OPT_END :
135+ optional = False
131136 else :
132137 token = _gen_token (tok , match , optional )
133138 if token :
@@ -138,7 +143,7 @@ def tokenize(text):
138143
139144
140145def _gen_token (tok , match , optional ):
141- # remove this if CLCR is updated based on ticket
146+ # remove this if CLDR is updated based on ticket
142147 # http://unicode.org/cldr/trac/ticket/10544
143148 if tok == INTEGRAL_TOKEN and match .group (2 ) == '←' :
144149 warnings .warn ('Unsupported syntax ←...←←' , SyntaxWarning )
@@ -171,13 +176,6 @@ def _parse_reference(string):
171176 return INTERNAL_REF , "" # defaults to this
172177
173178
174- def untokenize_ICU ():
175- """
176- TODO implement ICU style representation
177- rather make Ruleset.format_icu()
178- """
179-
180-
181179class RuleBasedNumberFormat (object ):
182180 """
183181 RuleBasedNumberFormat's behavior consists of one or more rule sets
@@ -206,6 +204,7 @@ class RuleBasedNumberFormat(object):
206204 the default rule set for this formatter.
207205 """
208206 group_types = ('SpelloutRules' , 'OrdinalRules' , 'NumberingSystemRules' )
207+
209208 # spell number should go for Spelloutrules
210209 # make interface for the other two groups
211210
@@ -434,15 +433,15 @@ class Ruleset(object):
434433
435434 SPECIAL_FRACTION_RULE = 'x,x' # there are other options but not existent in CLDR
436435 """
436+
437437 def __init__ (self , name , private = False ):
438438 self .name = name
439439 self .private = private
440440 self .rules = []
441441
442-
443442 def apply (self , number , parent , fractional = False ):
444443 number = decimal .Decimal (str (number ))
445- # str is needed to avoid unecessary precision
444+ # str is needed to avoid unnecessary precision
446445 # decimal is necessary for exact representation in fraction rules
447446
448447 context = {
@@ -451,7 +450,7 @@ def apply(self, number, parent, fractional=False):
451450 'fractional' : fractional ,
452451 'omit_optional' : False , # no default value is defined in the spec
453452 SUBSTITUTION_TOKEN : number ,
454- 'remainder_as_fractional' : False # format remainder as fractional rule?
453+ 'remainder_as_fractional' : False # format remainder as fractional rule?
455454 }
456455 integral , remainder = divmod (number , 1 )
457456
@@ -469,7 +468,7 @@ def apply(self, number, parent, fractional=False):
469468
470469 # negative number rule
471470 if number < 0 :
472- rule = self .get_rule_special (NEGATIVE_NUMBER_RULE )
471+ rule = self .get_rule_special (NEGATIVE_NUMBER_RULE )
473472 if rule is None :
474473 raise RuleNotFound ("negative number rule (%s)" % NEGATIVE_NUMBER_RULE )
475474 context [REMAINDER_TOKEN ] = abs (number )
@@ -507,22 +506,20 @@ def apply(self, number, parent, fractional=False):
507506 i , r = divmod (integral , rule .divisor )
508507 context [REMAINDER_TOKEN ] = r
509508 context [INTEGRAL_TOKEN ] = i
510- context [PREVIOUS_TOKEN ] = index - 1 # get rule using ruleset
511- context ['omit_optional' ] = r != 0 # only if not even multiple (TODO no need to store separatelly )
509+ context [PREVIOUS_TOKEN ] = index - 1 # get rule using ruleset
510+ context ['omit_optional' ] = r != 0 # only if not even multiple (TODO no need to store separately )
512511 return rule .apply (number , context )
513512
514-
515513 def get_rule_special (self , val , strict = False ):
516514 if val in Rule .specials :
517515 for r in self .rules :
518516 if r .value == val :
519517 return r
520-
521- # return last rule if no match occured and strict is false
518+
519+ # return last rule if no match occurred and strict is false
522520 if not strict :
523521 return self .rules [- 1 ]
524522
525-
526523 def get_rule_integral (self , val ):
527524 """
528525 Binary-search the rule list for the rule with the highest base value
@@ -534,13 +531,13 @@ def get_rule_integral(self, val):
534531 it in the rule list. Otherwise, use the rule itself.
535532 """
536533 # automatically return last rule if no range matched
537- ret = len (self .rules )- 1
534+ ret = len (self .rules ) - 1
538535
539- for i in range (len (self .rules )- 1 ):
536+ for i in range (len (self .rules ) - 1 ):
540537 if self .rules [i ].value in Rule .specials :
541538 continue
542-
543- if self .rules [i ].value <= val < self .rules [i + 1 ].value :
539+
540+ if self .rules [i ].value <= val < self .rules [i + 1 ].value :
544541 ret = i
545542 break
546543
@@ -553,16 +550,15 @@ def get_rule_integral(self, val):
553550
554551 return ret
555552
556-
557553 def get_rule_fractional (self , val ):
558554 """If the rule set is a fraction rule set, do the following:
559555
560556 Ignore negative-number and fraction rules.
561-
557+
562558 For each rule in the list, multiply the number being formatted (which
563559 will always be between 0 and 1) by the rule's base value. Keep track
564560 of the distance between the result and the nearest integer.
565-
561+
566562 Use the rule that produced the result closest to zero in the above
567563 calculation. In the event of a tie or a direct hit, use the first
568564 matching rule encountered. (The idea here is to try each rule's base
@@ -582,35 +578,33 @@ def get_rule_fractional(self, val):
582578 for i , rule in enumerate (self .rules ):
583579 if rule .value in Rule .specials or rule .value == 0 : # ignore specials and 0 rules
584580 continue
585- d = abs (round (val * rule .value ) - val * rule .value )
581+ d = abs (round (val * rule .value ) - val * rule .value )
586582 dists .append ((i , d ))
587583
588584 # get the index of the closest 0 match
589585 bst = min (dists , key = lambda x : x [1 ])[0 ]
590586
591587 # there is a following rule
592- if len (self .rules ) > bst + 1 and \
593- self .rules [bst ].value == self .rules [bst + 1 ].value and \
594- val * self .rules [bst ].value > 1 :
588+ if len (self .rules ) > bst + 1 and \
589+ self .rules [bst ].value == self .rules [bst + 1 ].value and \
590+ val * self .rules [bst ].value > 1 :
595591 bst += 1
596592
597593 return bst
598594
599-
600595 def __repr__ (self ):
601- return 'Ruleset %s %s\n %s\n ' % (self .name , self .private , '\n ' .join (['\t ' + str (r ) for r in self .rules ]))
596+ return 'Ruleset %s %s\n %s\n ' % (self .name , self .private , '\n ' .join (['\t ' + str (r ) for r in self .rules ]))
602597
603598
604599class Rule (object ):
605600 """
606601 base value, a divisor, rule text, and zero, one, or two substitutions.
607602 """
608- specials = (
603+ specials = {
609604 NEGATIVE_NUMBER_RULE , IMPROPER_FRACTION_RULE ,
610605 PROPER_FRACTION_RULE , MASTER_RULE , INFINITY_RULE ,
611606 NOT_A_NUMBER_RULE , SPECIAL_FRACTION_RULE ,
612- )
613-
607+ }
614608
615609 def __init__ (self , value , text , radix = None ):
616610 """
@@ -647,8 +641,7 @@ def apply(self, number, context):
647641 ruleset = context ['search_at' ].get_ruleset (ref )
648642 elif ref_type == DECIMAL_REF :
649643 loc = context ['search_at' ]._locale
650- x = numbers .format_decimal (number , format = ref , locale = loc )
651- res .append (x )
644+ res .append (format_decimal (number , format = ref , locale = loc ))
652645
653646 if ruleset :
654647 if t .type == REMAINDER_TOKEN and context ['remainder_as_fractional' ]:
@@ -671,10 +664,8 @@ def apply(self, number, context):
671664 else :
672665 raise ValueError ('unknown token %s' , t )
673666
674-
675667 return '' .join (res )
676668
677-
678669 @property
679670 def divisor (self ):
680671 """it is highest exponent of radix less then or equal to the rules's base"""
@@ -688,16 +679,14 @@ def divisor(self):
688679 def substitutions (self ):
689680 return len ([t for t in self .tokens if t .type in REFERENCE_TOKENS ])
690681
691-
692682 def _parse (self , text ):
693683 try :
694684 self .tokens = [t for t in tokenize (text )]
695685 except ValueError :
696- raise TokenizationError (self .text )
697-
686+ raise TokenizationError (text )
698687
699688 def __repr__ (self ):
700689 return 'Rule %s (%s) - %s\n %s\n ' % (
701690 self .value , self .text ,
702691 self .radix ,
703- '\n ' .join (['\t \t ' + str (t ) for t in self .tokens ]))
692+ '\n ' .join (['\t \t ' + str (t ) for t in self .tokens ]))
0 commit comments