]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ****************************************************************************** | |
2ca993e8 | 3 | * Copyright (C) 1997-2015, International Business Machines |
b75a7d8f A |
4 | * Corporation and others. All Rights Reserved. |
5 | ****************************************************************************** | |
6 | * file name: nfrs.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * Modification history | |
12 | * Date Name Comments | |
13 | * 10/11/2001 Doug Ported from ICU4J | |
14 | */ | |
15 | ||
16 | #include "nfrs.h" | |
17 | ||
18 | #if U_HAVE_RBNF | |
19 | ||
20 | #include "unicode/uchar.h" | |
21 | #include "nfrule.h" | |
22 | #include "nfrlist.h" | |
4388f060 | 23 | #include "patternprops.h" |
b75a7d8f A |
24 | |
25 | #ifdef RBNF_DEBUG | |
26 | #include "cmemory.h" | |
27 | #endif | |
28 | ||
2ca993e8 A |
29 | enum { |
30 | /** -x */ | |
31 | NEGATIVE_RULE_INDEX = 0, | |
32 | /** x.x */ | |
33 | IMPROPER_FRACTION_RULE_INDEX = 1, | |
34 | /** 0.x */ | |
35 | PROPER_FRACTION_RULE_INDEX = 2, | |
36 | /** x.0 */ | |
37 | MASTER_RULE_INDEX = 3, | |
38 | /** Inf */ | |
39 | INFINITY_RULE_INDEX = 4, | |
40 | /** NaN */ | |
41 | NAN_RULE_INDEX = 5, | |
42 | NON_NUMERICAL_RULE_LENGTH = 6 | |
43 | }; | |
44 | ||
b75a7d8f A |
45 | U_NAMESPACE_BEGIN |
46 | ||
47 | #if 0 | |
48 | // euclid's algorithm works with doubles | |
49 | // note, doubles only get us up to one quadrillion or so, which | |
50 | // isn't as much range as we get with longs. We probably still | |
51 | // want either 64-bit math, or BigInteger. | |
52 | ||
53 | static int64_t | |
54 | util_lcm(int64_t x, int64_t y) | |
55 | { | |
56 | x.abs(); | |
57 | y.abs(); | |
58 | ||
59 | if (x == 0 || y == 0) { | |
60 | return 0; | |
61 | } else { | |
62 | do { | |
63 | if (x < y) { | |
64 | int64_t t = x; x = y; y = t; | |
65 | } | |
66 | x -= y * (x/y); | |
67 | } while (x != 0); | |
68 | ||
69 | return y; | |
70 | } | |
71 | } | |
72 | ||
73 | #else | |
74 | /** | |
75 | * Calculates the least common multiple of x and y. | |
76 | */ | |
77 | static int64_t | |
78 | util_lcm(int64_t x, int64_t y) | |
79 | { | |
80 | // binary gcd algorithm from Knuth, "The Art of Computer Programming," | |
81 | // vol. 2, 1st ed., pp. 298-299 | |
82 | int64_t x1 = x; | |
83 | int64_t y1 = y; | |
84 | ||
85 | int p2 = 0; | |
86 | while ((x1 & 1) == 0 && (y1 & 1) == 0) { | |
87 | ++p2; | |
88 | x1 >>= 1; | |
89 | y1 >>= 1; | |
90 | } | |
91 | ||
92 | int64_t t; | |
93 | if ((x1 & 1) == 1) { | |
94 | t = -y1; | |
95 | } else { | |
96 | t = x1; | |
97 | } | |
98 | ||
99 | while (t != 0) { | |
100 | while ((t & 1) == 0) { | |
101 | t = t >> 1; | |
102 | } | |
103 | if (t > 0) { | |
104 | x1 = t; | |
105 | } else { | |
106 | y1 = -t; | |
107 | } | |
108 | t = x1 - y1; | |
109 | } | |
110 | ||
111 | int64_t gcd = x1 << p2; | |
112 | ||
113 | // x * y == gcd(x, y) * lcm(x, y) | |
114 | return x / gcd * y; | |
115 | } | |
116 | #endif | |
117 | ||
118 | static const UChar gPercent = 0x0025; | |
119 | static const UChar gColon = 0x003a; | |
120 | static const UChar gSemicolon = 0x003b; | |
121 | static const UChar gLineFeed = 0x000a; | |
122 | ||
b75a7d8f A |
123 | static const UChar gPercentPercent[] = |
124 | { | |
125 | 0x25, 0x25, 0 | |
126 | }; /* "%%" */ | |
127 | ||
4388f060 A |
128 | static const UChar gNoparse[] = |
129 | { | |
130 | 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0 | |
131 | }; /* "@noparse" */ | |
132 | ||
2ca993e8 | 133 | NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status) |
b75a7d8f A |
134 | : name() |
135 | , rules(0) | |
2ca993e8 A |
136 | , owner(_owner) |
137 | , fractionRules() | |
b75a7d8f A |
138 | , fIsFractionRuleSet(FALSE) |
139 | , fIsPublic(FALSE) | |
4388f060 | 140 | , fIsParseable(TRUE) |
b75a7d8f | 141 | { |
2ca993e8 A |
142 | for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { |
143 | nonNumericalRules[i] = NULL; | |
b75a7d8f A |
144 | } |
145 | ||
146 | if (U_FAILURE(status)) { | |
147 | return; | |
148 | } | |
149 | ||
150 | UnicodeString& description = descriptions[index]; // !!! make sure index is valid | |
151 | ||
374ca955 A |
152 | if (description.length() == 0) { |
153 | // throw new IllegalArgumentException("Empty rule set description"); | |
154 | status = U_PARSE_ERROR; | |
73c04bcf | 155 | return; |
374ca955 A |
156 | } |
157 | ||
b75a7d8f A |
158 | // if the description begins with a rule set name (the rule set |
159 | // name can be omitted in formatter descriptions that consist | |
160 | // of only one rule set), copy it out into our "name" member | |
161 | // and delete it from the description | |
162 | if (description.charAt(0) == gPercent) { | |
163 | int32_t pos = description.indexOf(gColon); | |
164 | if (pos == -1) { | |
165 | // throw new IllegalArgumentException("Rule set name doesn't end in colon"); | |
166 | status = U_PARSE_ERROR; | |
167 | } else { | |
168 | name.setTo(description, 0, pos); | |
4388f060 | 169 | while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) { |
b75a7d8f A |
170 | } |
171 | description.remove(0, pos); | |
172 | } | |
173 | } else { | |
374ca955 | 174 | name.setTo(UNICODE_STRING_SIMPLE("%default")); |
b75a7d8f A |
175 | } |
176 | ||
177 | if (description.length() == 0) { | |
178 | // throw new IllegalArgumentException("Empty rule set description"); | |
179 | status = U_PARSE_ERROR; | |
180 | } | |
181 | ||
4388f060 A |
182 | fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0; |
183 | ||
184 | if ( name.endsWith(gNoparse,8) ) { | |
185 | fIsParseable = FALSE; | |
186 | name.truncate(name.length()-8); // remove the @noparse from the name | |
187 | } | |
b75a7d8f A |
188 | |
189 | // all of the other members of NFRuleSet are initialized | |
190 | // by parseRules() | |
191 | } | |
192 | ||
193 | void | |
2ca993e8 | 194 | NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status) |
b75a7d8f A |
195 | { |
196 | // start by creating a Vector whose elements are Strings containing | |
197 | // the descriptions of the rules (one rule per element). The rules | |
198 | // are separated by semicolons (there's no escape facility: ALL | |
199 | // semicolons are rule delimiters) | |
200 | ||
201 | if (U_FAILURE(status)) { | |
202 | return; | |
203 | } | |
204 | ||
4388f060 A |
205 | // ensure we are starting with an empty rule list |
206 | rules.deleteAll(); | |
207 | ||
b75a7d8f A |
208 | // dlf - the original code kept a separate description array for no reason, |
209 | // so I got rid of it. The loop was too complex so I simplified it. | |
210 | ||
211 | UnicodeString currentDescription; | |
212 | int32_t oldP = 0; | |
213 | while (oldP < description.length()) { | |
214 | int32_t p = description.indexOf(gSemicolon, oldP); | |
215 | if (p == -1) { | |
216 | p = description.length(); | |
217 | } | |
218 | currentDescription.setTo(description, oldP, p - oldP); | |
219 | NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status); | |
220 | oldP = p + 1; | |
221 | } | |
222 | ||
223 | // for rules that didn't specify a base value, their base values | |
224 | // were initialized to 0. Make another pass through the list and | |
225 | // set all those rules' base values. We also remove any special | |
226 | // rules from the list and put them into their own member variables | |
227 | int64_t defaultBaseValue = 0; | |
228 | ||
229 | // (this isn't a for loop because we might be deleting items from | |
230 | // the vector-- we want to make sure we only increment i when | |
231 | // we _didn't_ delete aything from the vector) | |
2ca993e8 A |
232 | int32_t rulesSize = rules.size(); |
233 | for (int32_t i = 0; i < rulesSize; i++) { | |
b75a7d8f | 234 | NFRule* rule = rules[i]; |
2ca993e8 | 235 | int64_t baseValue = rule->getBaseValue(); |
b75a7d8f | 236 | |
2ca993e8 | 237 | if (baseValue == 0) { |
b75a7d8f A |
238 | // if the rule's base value is 0, fill in a default |
239 | // base value (this will be 1 plus the preceding | |
240 | // rule's base value for regular rule sets, and the | |
241 | // same as the preceding rule's base value in fraction | |
242 | // rule sets) | |
374ca955 | 243 | rule->setBaseValue(defaultBaseValue, status); |
2ca993e8 A |
244 | } |
245 | else { | |
b75a7d8f A |
246 | // if it's a regular rule that already knows its base value, |
247 | // check to make sure the rules are in order, and update | |
248 | // the default base value for the next rule | |
2ca993e8 | 249 | if (baseValue < defaultBaseValue) { |
b75a7d8f A |
250 | // throw new IllegalArgumentException("Rules are not in order"); |
251 | status = U_PARSE_ERROR; | |
252 | return; | |
253 | } | |
2ca993e8 A |
254 | defaultBaseValue = baseValue; |
255 | } | |
256 | if (!fIsFractionRuleSet) { | |
257 | ++defaultBaseValue; | |
258 | } | |
259 | } | |
260 | } | |
261 | ||
262 | /** | |
263 | * Set one of the non-numerical rules. | |
264 | * @param rule The rule to set. | |
265 | */ | |
266 | void NFRuleSet::setNonNumericalRule(NFRule *rule) { | |
267 | int64_t baseValue = rule->getBaseValue(); | |
268 | if (baseValue == NFRule::kNegativeNumberRule) { | |
269 | delete nonNumericalRules[NEGATIVE_RULE_INDEX]; | |
270 | nonNumericalRules[NEGATIVE_RULE_INDEX] = rule; | |
271 | } | |
272 | else if (baseValue == NFRule::kImproperFractionRule) { | |
273 | setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE); | |
274 | } | |
275 | else if (baseValue == NFRule::kProperFractionRule) { | |
276 | setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE); | |
277 | } | |
278 | else if (baseValue == NFRule::kMasterRule) { | |
279 | setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE); | |
280 | } | |
281 | else if (baseValue == NFRule::kInfinityRule) { | |
282 | delete nonNumericalRules[INFINITY_RULE_INDEX]; | |
283 | nonNumericalRules[INFINITY_RULE_INDEX] = rule; | |
284 | } | |
285 | else if (baseValue == NFRule::kNaNRule) { | |
286 | delete nonNumericalRules[NAN_RULE_INDEX]; | |
287 | nonNumericalRules[NAN_RULE_INDEX] = rule; | |
288 | } | |
289 | } | |
290 | ||
291 | /** | |
292 | * Determine the best fraction rule to use. Rules matching the decimal point from | |
293 | * DecimalFormatSymbols become the main set of rules to use. | |
294 | * @param originalIndex The index into nonNumericalRules | |
295 | * @param newRule The new rule to consider | |
296 | * @param rememberRule Should the new rule be added to fractionRules. | |
297 | */ | |
298 | void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) { | |
299 | if (rememberRule) { | |
300 | fractionRules.add(newRule); | |
301 | } | |
302 | NFRule *bestResult = nonNumericalRules[originalIndex]; | |
303 | if (bestResult == NULL) { | |
304 | nonNumericalRules[originalIndex] = newRule; | |
305 | } | |
306 | else { | |
307 | // We have more than one. Which one is better? | |
308 | const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols(); | |
309 | if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0) | |
310 | == newRule->getDecimalPoint()) | |
311 | { | |
312 | nonNumericalRules[originalIndex] = newRule; | |
b75a7d8f | 313 | } |
2ca993e8 | 314 | // else leave it alone |
b75a7d8f A |
315 | } |
316 | } | |
317 | ||
318 | NFRuleSet::~NFRuleSet() | |
319 | { | |
2ca993e8 A |
320 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { |
321 | if (i != IMPROPER_FRACTION_RULE_INDEX | |
322 | && i != PROPER_FRACTION_RULE_INDEX | |
323 | && i != MASTER_RULE_INDEX) | |
324 | { | |
325 | delete nonNumericalRules[i]; | |
326 | } | |
327 | // else it will be deleted via NFRuleList fractionRules | |
328 | } | |
b75a7d8f A |
329 | } |
330 | ||
374ca955 | 331 | static UBool |
b75a7d8f A |
332 | util_equalRules(const NFRule* rule1, const NFRule* rule2) |
333 | { | |
334 | if (rule1) { | |
335 | if (rule2) { | |
336 | return *rule1 == *rule2; | |
337 | } | |
338 | } else if (!rule2) { | |
339 | return TRUE; | |
340 | } | |
341 | return FALSE; | |
342 | } | |
343 | ||
344 | UBool | |
345 | NFRuleSet::operator==(const NFRuleSet& rhs) const | |
346 | { | |
347 | if (rules.size() == rhs.rules.size() && | |
348 | fIsFractionRuleSet == rhs.fIsFractionRuleSet && | |
2ca993e8 A |
349 | name == rhs.name) { |
350 | ||
351 | // ...then compare the non-numerical rule lists... | |
352 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { | |
353 | if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) { | |
354 | return FALSE; | |
355 | } | |
356 | } | |
b75a7d8f | 357 | |
2ca993e8 | 358 | // ...then compare the rule lists... |
b75a7d8f A |
359 | for (uint32_t i = 0; i < rules.size(); ++i) { |
360 | if (*rules[i] != *rhs.rules[i]) { | |
361 | return FALSE; | |
362 | } | |
363 | } | |
364 | return TRUE; | |
365 | } | |
366 | return FALSE; | |
367 | } | |
368 | ||
2ca993e8 A |
369 | void |
370 | NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) { | |
371 | for (uint32_t i = 0; i < rules.size(); ++i) { | |
372 | rules[i]->setDecimalFormatSymbols(newSymbols, status); | |
373 | } | |
374 | // Switch the fraction rules to mirror the DecimalFormatSymbols. | |
375 | for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) { | |
376 | if (nonNumericalRules[nonNumericalIdx]) { | |
377 | for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { | |
378 | NFRule *fractionRule = fractionRules[fIdx]; | |
379 | if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) { | |
380 | setBestFractionRule(nonNumericalIdx, fractionRule, FALSE); | |
381 | } | |
382 | } | |
383 | } | |
384 | } | |
385 | ||
386 | for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) { | |
387 | NFRule *rule = nonNumericalRules[nnrIdx]; | |
388 | if (rule) { | |
389 | rule->setDecimalFormatSymbols(newSymbols, status); | |
390 | } | |
391 | } | |
392 | } | |
393 | ||
394 | #define RECURSION_LIMIT 64 | |
374ca955 | 395 | |
b75a7d8f | 396 | void |
2ca993e8 | 397 | NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const |
b75a7d8f | 398 | { |
2ca993e8 A |
399 | if (recursionCount >= RECURSION_LIMIT) { |
400 | // stop recursion | |
401 | status = U_INVALID_STATE_ERROR; | |
402 | return; | |
403 | } | |
404 | const NFRule *rule = findNormalRule(number); | |
374ca955 | 405 | if (rule) { // else error, but can't report it |
2ca993e8 | 406 | rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); |
374ca955 | 407 | } |
b75a7d8f A |
408 | } |
409 | ||
410 | void | |
2ca993e8 | 411 | NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const |
b75a7d8f | 412 | { |
2ca993e8 A |
413 | if (recursionCount >= RECURSION_LIMIT) { |
414 | // stop recursion | |
415 | status = U_INVALID_STATE_ERROR; | |
416 | return; | |
417 | } | |
418 | const NFRule *rule = findDoubleRule(number); | |
374ca955 | 419 | if (rule) { // else error, but can't report it |
2ca993e8 | 420 | rule->doFormat(number, toAppendTo, pos, ++recursionCount, status); |
374ca955 | 421 | } |
b75a7d8f A |
422 | } |
423 | ||
2ca993e8 | 424 | const NFRule* |
b75a7d8f A |
425 | NFRuleSet::findDoubleRule(double number) const |
426 | { | |
427 | // if this is a fraction rule set, use findFractionRuleSetRule() | |
428 | if (isFractionRuleSet()) { | |
429 | return findFractionRuleSetRule(number); | |
430 | } | |
431 | ||
2ca993e8 A |
432 | if (uprv_isNaN(number)) { |
433 | const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX]; | |
434 | if (!rule) { | |
435 | rule = owner->getDefaultNaNRule(); | |
436 | } | |
437 | return rule; | |
438 | } | |
439 | ||
b75a7d8f A |
440 | // if the number is negative, return the negative number rule |
441 | // (if there isn't a negative-number rule, we pretend it's a | |
442 | // positive number) | |
443 | if (number < 0) { | |
2ca993e8 A |
444 | if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { |
445 | return nonNumericalRules[NEGATIVE_RULE_INDEX]; | |
b75a7d8f A |
446 | } else { |
447 | number = -number; | |
448 | } | |
449 | } | |
450 | ||
2ca993e8 A |
451 | if (uprv_isInfinite(number)) { |
452 | const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX]; | |
453 | if (!rule) { | |
454 | rule = owner->getDefaultInfinityRule(); | |
455 | } | |
456 | return rule; | |
457 | } | |
458 | ||
b75a7d8f A |
459 | // if the number isn't an integer, we use one of the fraction rules... |
460 | if (number != uprv_floor(number)) { | |
461 | // if the number is between 0 and 1, return the proper | |
462 | // fraction rule | |
2ca993e8 A |
463 | if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) { |
464 | return nonNumericalRules[PROPER_FRACTION_RULE_INDEX]; | |
b75a7d8f A |
465 | } |
466 | // otherwise, return the improper fraction rule | |
2ca993e8 A |
467 | else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) { |
468 | return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]; | |
b75a7d8f A |
469 | } |
470 | } | |
471 | ||
472 | // if there's a master rule, use it to format the number | |
2ca993e8 A |
473 | if (nonNumericalRules[MASTER_RULE_INDEX]) { |
474 | return nonNumericalRules[MASTER_RULE_INDEX]; | |
729e4ab9 A |
475 | } |
476 | ||
b75a7d8f A |
477 | // and if we haven't yet returned a rule, use findNormalRule() |
478 | // to find the applicable rule | |
479 | int64_t r = util64_fromDouble(number + 0.5); | |
480 | return findNormalRule(r); | |
481 | } | |
482 | ||
2ca993e8 | 483 | const NFRule * |
b75a7d8f A |
484 | NFRuleSet::findNormalRule(int64_t number) const |
485 | { | |
486 | // if this is a fraction rule set, use findFractionRuleSetRule() | |
487 | // to find the rule (we should only go into this clause if the | |
488 | // value is 0) | |
489 | if (fIsFractionRuleSet) { | |
490 | return findFractionRuleSetRule((double)number); | |
491 | } | |
492 | ||
493 | // if the number is negative, return the negative-number rule | |
494 | // (if there isn't one, pretend the number is positive) | |
495 | if (number < 0) { | |
2ca993e8 A |
496 | if (nonNumericalRules[NEGATIVE_RULE_INDEX]) { |
497 | return nonNumericalRules[NEGATIVE_RULE_INDEX]; | |
b75a7d8f A |
498 | } else { |
499 | number = -number; | |
500 | } | |
501 | } | |
502 | ||
503 | // we have to repeat the preceding two checks, even though we | |
504 | // do them in findRule(), because the version of format() that | |
505 | // takes a long bypasses findRule() and goes straight to this | |
506 | // function. This function does skip the fraction rules since | |
507 | // we know the value is an integer (it also skips the master | |
508 | // rule, since it's considered a fraction rule. Skipping the | |
509 | // master rule in this function is also how we avoid infinite | |
510 | // recursion) | |
511 | ||
512 | // {dlf} unfortunately this fails if there are no rules except | |
513 | // special rules. If there are no rules, use the master rule. | |
514 | ||
515 | // binary-search the rule list for the applicable rule | |
516 | // (a rule is used for all values from its base value to | |
517 | // the next rule's base value) | |
518 | int32_t hi = rules.size(); | |
519 | if (hi > 0) { | |
520 | int32_t lo = 0; | |
521 | ||
522 | while (lo < hi) { | |
523 | int32_t mid = (lo + hi) / 2; | |
524 | if (rules[mid]->getBaseValue() == number) { | |
525 | return rules[mid]; | |
526 | } | |
527 | else if (rules[mid]->getBaseValue() > number) { | |
528 | hi = mid; | |
529 | } | |
530 | else { | |
531 | lo = mid + 1; | |
532 | } | |
533 | } | |
374ca955 A |
534 | if (hi == 0) { // bad rule set, minimum base > 0 |
535 | return NULL; // want to throw exception here | |
536 | } | |
537 | ||
b75a7d8f A |
538 | NFRule *result = rules[hi - 1]; |
539 | ||
540 | // use shouldRollBack() to see whether we need to invoke the | |
541 | // rollback rule (see shouldRollBack()'s documentation for | |
542 | // an explanation of the rollback rule). If we do, roll back | |
543 | // one rule and return that one instead of the one we'd normally | |
544 | // return | |
545 | if (result->shouldRollBack((double)number)) { | |
374ca955 A |
546 | if (hi == 1) { // bad rule set, no prior rule to rollback to from this base |
547 | return NULL; | |
548 | } | |
b75a7d8f A |
549 | result = rules[hi - 2]; |
550 | } | |
551 | return result; | |
552 | } | |
553 | // else use the master rule | |
2ca993e8 | 554 | return nonNumericalRules[MASTER_RULE_INDEX]; |
b75a7d8f A |
555 | } |
556 | ||
557 | /** | |
558 | * If this rule is a fraction rule set, this function is used by | |
559 | * findRule() to select the most appropriate rule for formatting | |
560 | * the number. Basically, the base value of each rule in the rule | |
561 | * set is treated as the denominator of a fraction. Whichever | |
562 | * denominator can produce the fraction closest in value to the | |
563 | * number passed in is the result. If there's a tie, the earlier | |
564 | * one in the list wins. (If there are two rules in a row with the | |
565 | * same base value, the first one is used when the numerator of the | |
566 | * fraction would be 1, and the second rule is used the rest of the | |
567 | * time. | |
568 | * @param number The number being formatted (which will always be | |
569 | * a number between 0 and 1) | |
570 | * @return The rule to use to format this number | |
571 | */ | |
2ca993e8 | 572 | const NFRule* |
b75a7d8f A |
573 | NFRuleSet::findFractionRuleSetRule(double number) const |
574 | { | |
575 | // the obvious way to do this (multiply the value being formatted | |
576 | // by each rule's base value until you get an integral result) | |
577 | // doesn't work because of rounding error. This method is more | |
578 | // accurate | |
579 | ||
580 | // find the least common multiple of the rules' base values | |
581 | // and multiply this by the number being formatted. This is | |
582 | // all the precision we need, and we can do all of the rest | |
583 | // of the math using integer arithmetic | |
584 | int64_t leastCommonMultiple = rules[0]->getBaseValue(); | |
585 | int64_t numerator; | |
586 | { | |
587 | for (uint32_t i = 1; i < rules.size(); ++i) { | |
588 | leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue()); | |
589 | } | |
590 | numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5); | |
591 | } | |
592 | // for each rule, do the following... | |
593 | int64_t tempDifference; | |
594 | int64_t difference = util64_fromDouble(uprv_maxMantissa()); | |
595 | int32_t winner = 0; | |
596 | for (uint32_t i = 0; i < rules.size(); ++i) { | |
597 | // "numerator" is the numerator of the fraction if the | |
598 | // denominator is the LCD. The numerator if the rule's | |
599 | // base value is the denominator is "numerator" times the | |
600 | // base value divided bythe LCD. Here we check to see if | |
601 | // that's an integer, and if not, how close it is to being | |
602 | // an integer. | |
603 | tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple; | |
604 | ||
605 | ||
606 | // normalize the result of the above calculation: we want | |
607 | // the numerator's distance from the CLOSEST multiple | |
608 | // of the LCD | |
609 | if (leastCommonMultiple - tempDifference < tempDifference) { | |
610 | tempDifference = leastCommonMultiple - tempDifference; | |
611 | } | |
612 | ||
613 | // if this is as close as we've come, keep track of how close | |
614 | // that is, and the line number of the rule that did it. If | |
615 | // we've scored a direct hit, we don't have to look at any more | |
616 | // rules | |
617 | if (tempDifference < difference) { | |
618 | difference = tempDifference; | |
619 | winner = i; | |
620 | if (difference == 0) { | |
621 | break; | |
622 | } | |
623 | } | |
624 | } | |
625 | ||
626 | // if we have two successive rules that both have the winning base | |
627 | // value, then the first one (the one we found above) is used if | |
628 | // the numerator of the fraction is 1 and the second one is used if | |
629 | // the numerator of the fraction is anything else (this lets us | |
630 | // do things like "one third"/"two thirds" without haveing to define | |
631 | // a whole bunch of extra rule sets) | |
632 | if ((unsigned)(winner + 1) < rules.size() && | |
633 | rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) { | |
634 | double n = ((double)rules[winner]->getBaseValue()) * number; | |
635 | if (n < 0.5 || n >= 2) { | |
636 | ++winner; | |
637 | } | |
638 | } | |
639 | ||
640 | // finally, return the winning rule | |
641 | return rules[winner]; | |
642 | } | |
643 | ||
644 | /** | |
645 | * Parses a string. Matches the string to be parsed against each | |
646 | * of its rules (with a base value less than upperBound) and returns | |
647 | * the value produced by the rule that matched the most charcters | |
648 | * in the source string. | |
649 | * @param text The string to parse | |
650 | * @param parsePosition The initial position is ignored and assumed | |
651 | * to be 0. On exit, this object has been updated to point to the | |
652 | * first character position this rule set didn't consume. | |
653 | * @param upperBound Limits the rules that can be allowed to match. | |
654 | * Only rules whose base values are strictly less than upperBound | |
655 | * are considered. | |
656 | * @return The numerical result of parsing this string. This will | |
657 | * be the matching rule's base value, composed appropriately with | |
658 | * the results of matching any of its substitutions. The object | |
659 | * will be an instance of Long if it's an integral value; otherwise, | |
660 | * it will be an instance of Double. This function always returns | |
661 | * a valid object: If nothing matched the input string at all, | |
662 | * this function returns new Long(0), and the parse position is | |
663 | * left unchanged. | |
664 | */ | |
665 | #ifdef RBNF_DEBUG | |
666 | #include <stdio.h> | |
667 | ||
668 | static void dumpUS(FILE* f, const UnicodeString& us) { | |
669 | int len = us.length(); | |
670 | char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; | |
46f4442e A |
671 | if (buf != NULL) { |
672 | us.extract(0, len, buf); | |
673 | buf[len] = 0; | |
674 | fprintf(f, "%s", buf); | |
675 | uprv_free(buf); //delete[] buf; | |
676 | } | |
b75a7d8f A |
677 | } |
678 | #endif | |
679 | ||
680 | UBool | |
729e4ab9 | 681 | NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result, UBool lenient) const |
b75a7d8f A |
682 | { |
683 | // try matching each rule in the rule set against the text being | |
684 | // parsed. Whichever one matches the most characters is the one | |
685 | // that determines the value we return. | |
686 | ||
687 | result.setLong(0); | |
688 | ||
689 | // dump out if there's no text to parse | |
690 | if (text.length() == 0) { | |
691 | return 0; | |
692 | } | |
693 | ||
694 | ParsePosition highWaterMark; | |
695 | ParsePosition workingPos = pos; | |
696 | ||
697 | #ifdef RBNF_DEBUG | |
698 | fprintf(stderr, "<nfrs> %x '", this); | |
699 | dumpUS(stderr, name); | |
700 | fprintf(stderr, "' text '"); | |
701 | dumpUS(stderr, text); | |
702 | fprintf(stderr, "'\n"); | |
703 | fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0); | |
704 | #endif | |
2ca993e8 A |
705 | // Try each of the negative rules, fraction rules, infinity rules and NaN rules |
706 | for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) { | |
707 | if (nonNumericalRules[i]) { | |
708 | Formattable tempResult; | |
709 | UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult, lenient || isDecimalFormatRuleParseable() ); | |
710 | if (success && (workingPos.getIndex() > highWaterMark.getIndex())) { | |
711 | result = tempResult; | |
712 | highWaterMark = workingPos; | |
b75a7d8f | 713 | } |
2ca993e8 | 714 | workingPos = pos; |
b75a7d8f A |
715 | } |
716 | } | |
717 | #ifdef RBNF_DEBUG | |
718 | fprintf(stderr, "<nfrs> continue other with text '"); | |
719 | dumpUS(stderr, text); | |
720 | fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex()); | |
721 | #endif | |
722 | ||
723 | // finally, go through the regular rules one at a time. We start | |
724 | // at the end of the list because we want to try matching the most | |
725 | // sigificant rule first (this helps ensure that we parse | |
726 | // "five thousand three hundred six" as | |
727 | // "(five thousand) (three hundred) (six)" rather than | |
728 | // "((five thousand three) hundred) (six)"). Skip rules whose | |
729 | // base values are higher than the upper bound (again, this helps | |
730 | // limit ambiguity by making sure the rules that match a rule's | |
731 | // are less significant than the rule containing the substitutions)/ | |
732 | { | |
733 | int64_t ub = util64_fromDouble(upperBound); | |
734 | #ifdef RBNF_DEBUG | |
735 | { | |
736 | char ubstr[64]; | |
737 | util64_toa(ub, ubstr, 64); | |
738 | char ubstrhex[64]; | |
739 | util64_toa(ub, ubstrhex, 64, 16); | |
740 | fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex); | |
741 | } | |
742 | #endif | |
743 | for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) { | |
744 | if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) { | |
745 | continue; | |
746 | } | |
747 | Formattable tempResult; | |
748 | UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult); | |
749 | if (success && workingPos.getIndex() > highWaterMark.getIndex()) { | |
750 | result = tempResult; | |
751 | highWaterMark = workingPos; | |
752 | } | |
753 | workingPos = pos; | |
754 | } | |
755 | } | |
756 | #ifdef RBNF_DEBUG | |
757 | fprintf(stderr, "<nfrs> exit\n"); | |
758 | #endif | |
759 | // finally, update the parse postion we were passed to point to the | |
760 | // first character we didn't use, and return the result that | |
761 | // corresponds to that string of characters | |
762 | pos = highWaterMark; | |
763 | ||
764 | return 1; | |
765 | } | |
766 | ||
767 | void | |
768 | NFRuleSet::appendRules(UnicodeString& result) const | |
769 | { | |
2ca993e8 A |
770 | uint32_t i; |
771 | ||
b75a7d8f A |
772 | // the rule set name goes first... |
773 | result.append(name); | |
774 | result.append(gColon); | |
775 | result.append(gLineFeed); | |
776 | ||
777 | // followed by the regular rules... | |
2ca993e8 | 778 | for (i = 0; i < rules.size(); i++) { |
73c04bcf | 779 | rules[i]->_appendRuleText(result); |
b75a7d8f A |
780 | result.append(gLineFeed); |
781 | } | |
782 | ||
783 | // followed by the special rules (if they exist) | |
2ca993e8 A |
784 | for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) { |
785 | NFRule *rule = nonNumericalRules[i]; | |
786 | if (nonNumericalRules[i]) { | |
787 | if (rule->getBaseValue() == NFRule::kImproperFractionRule | |
788 | || rule->getBaseValue() == NFRule::kProperFractionRule | |
789 | || rule->getBaseValue() == NFRule::kMasterRule) | |
790 | { | |
791 | for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) { | |
792 | NFRule *fractionRule = fractionRules[fIdx]; | |
793 | if (fractionRule->getBaseValue() == rule->getBaseValue()) { | |
794 | fractionRule->_appendRuleText(result); | |
795 | result.append(gLineFeed); | |
796 | } | |
797 | } | |
798 | } | |
799 | else { | |
800 | rule->_appendRuleText(result); | |
b75a7d8f A |
801 | result.append(gLineFeed); |
802 | } | |
803 | } | |
804 | } | |
805 | } | |
806 | ||
807 | // utility functions | |
808 | ||
809 | int64_t util64_fromDouble(double d) { | |
810 | int64_t result = 0; | |
811 | if (!uprv_isNaN(d)) { | |
812 | double mant = uprv_maxMantissa(); | |
813 | if (d < -mant) { | |
814 | d = -mant; | |
815 | } else if (d > mant) { | |
816 | d = mant; | |
817 | } | |
818 | UBool neg = d < 0; | |
819 | if (neg) { | |
820 | d = -d; | |
821 | } | |
822 | result = (int64_t)uprv_floor(d); | |
823 | if (neg) { | |
824 | result = -result; | |
825 | } | |
826 | } | |
827 | return result; | |
828 | } | |
829 | ||
830 | int64_t util64_pow(int32_t r, uint32_t e) { | |
831 | if (r == 0) { | |
832 | return 0; | |
833 | } else if (e == 0) { | |
834 | return 1; | |
835 | } else { | |
836 | int64_t n = r; | |
837 | while (--e > 0) { | |
838 | n *= r; | |
839 | } | |
840 | return n; | |
841 | } | |
842 | } | |
843 | ||
844 | static const uint8_t asciiDigits[] = { | |
845 | 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u, | |
846 | 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u, | |
847 | 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu, | |
848 | 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u, | |
849 | 0x77u, 0x78u, 0x79u, 0x7au, | |
850 | }; | |
851 | ||
852 | static const UChar kUMinus = (UChar)0x002d; | |
853 | ||
73c04bcf | 854 | #ifdef RBNF_DEBUG |
b75a7d8f A |
855 | static const char kMinus = '-'; |
856 | ||
857 | static const uint8_t digitInfo[] = { | |
858 | 0, 0, 0, 0, 0, 0, 0, 0, | |
859 | 0, 0, 0, 0, 0, 0, 0, 0, | |
860 | 0, 0, 0, 0, 0, 0, 0, 0, | |
861 | 0, 0, 0, 0, 0, 0, 0, 0, | |
862 | 0, 0, 0, 0, 0, 0, 0, 0, | |
863 | 0, 0, 0, 0, 0, 0, 0, 0, | |
864 | 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u, | |
865 | 0x88u, 0x89u, 0, 0, 0, 0, 0, 0, | |
866 | 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, | |
867 | 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, | |
868 | 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, | |
869 | 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, | |
870 | 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u, | |
871 | 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u, | |
872 | 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u, | |
873 | 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0, | |
874 | }; | |
875 | ||
b75a7d8f A |
876 | int64_t util64_atoi(const char* str, uint32_t radix) |
877 | { | |
878 | if (radix > 36) { | |
879 | radix = 36; | |
880 | } else if (radix < 2) { | |
881 | radix = 2; | |
882 | } | |
883 | int64_t lradix = radix; | |
884 | ||
885 | int neg = 0; | |
886 | if (*str == kMinus) { | |
887 | ++str; | |
888 | neg = 1; | |
889 | } | |
890 | int64_t result = 0; | |
891 | uint8_t b; | |
892 | while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) { | |
893 | result *= lradix; | |
894 | result += (int32_t)b; | |
895 | } | |
896 | if (neg) { | |
897 | result = -result; | |
898 | } | |
899 | return result; | |
900 | } | |
b75a7d8f A |
901 | |
902 | int64_t util64_utoi(const UChar* str, uint32_t radix) | |
903 | { | |
904 | if (radix > 36) { | |
905 | radix = 36; | |
906 | } else if (radix < 2) { | |
907 | radix = 2; | |
908 | } | |
909 | int64_t lradix = radix; | |
910 | ||
911 | int neg = 0; | |
912 | if (*str == kUMinus) { | |
913 | ++str; | |
914 | neg = 1; | |
915 | } | |
916 | int64_t result = 0; | |
917 | UChar c; | |
918 | uint8_t b; | |
919 | while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) { | |
920 | result *= lradix; | |
921 | result += (int32_t)b; | |
922 | } | |
923 | if (neg) { | |
924 | result = -result; | |
925 | } | |
926 | return result; | |
927 | } | |
928 | ||
b75a7d8f A |
929 | uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw) |
930 | { | |
931 | if (radix > 36) { | |
932 | radix = 36; | |
933 | } else if (radix < 2) { | |
934 | radix = 2; | |
935 | } | |
936 | int64_t base = radix; | |
937 | ||
938 | char* p = buf; | |
939 | if (len && (w < 0) && (radix == 10) && !raw) { | |
940 | w = -w; | |
941 | *p++ = kMinus; | |
942 | --len; | |
943 | } else if (len && (w == 0)) { | |
944 | *p++ = (char)raw ? 0 : asciiDigits[0]; | |
945 | --len; | |
946 | } | |
947 | ||
948 | while (len && w != 0) { | |
949 | int64_t n = w / base; | |
950 | int64_t m = n * base; | |
951 | int32_t d = (int32_t)(w-m); | |
952 | *p++ = raw ? (char)d : asciiDigits[d]; | |
953 | w = n; | |
954 | --len; | |
955 | } | |
956 | if (len) { | |
957 | *p = 0; // null terminate if room for caller convenience | |
958 | } | |
959 | ||
960 | len = p - buf; | |
961 | if (*buf == kMinus) { | |
962 | ++buf; | |
963 | } | |
964 | while (--p > buf) { | |
965 | char c = *p; | |
966 | *p = *buf; | |
967 | *buf = c; | |
968 | ++buf; | |
969 | } | |
970 | ||
971 | return len; | |
972 | } | |
973 | #endif | |
974 | ||
975 | uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw) | |
976 | { | |
977 | if (radix > 36) { | |
978 | radix = 36; | |
979 | } else if (radix < 2) { | |
980 | radix = 2; | |
981 | } | |
982 | int64_t base = radix; | |
983 | ||
984 | UChar* p = buf; | |
985 | if (len && (w < 0) && (radix == 10) && !raw) { | |
986 | w = -w; | |
987 | *p++ = kUMinus; | |
988 | --len; | |
989 | } else if (len && (w == 0)) { | |
990 | *p++ = (UChar)raw ? 0 : asciiDigits[0]; | |
991 | --len; | |
992 | } | |
993 | ||
994 | while (len && (w != 0)) { | |
995 | int64_t n = w / base; | |
996 | int64_t m = n * base; | |
997 | int32_t d = (int32_t)(w-m); | |
998 | *p++ = (UChar)(raw ? d : asciiDigits[d]); | |
999 | w = n; | |
1000 | --len; | |
1001 | } | |
1002 | if (len) { | |
1003 | *p = 0; // null terminate if room for caller convenience | |
1004 | } | |
1005 | ||
1006 | len = (uint32_t)(p - buf); | |
1007 | if (*buf == kUMinus) { | |
1008 | ++buf; | |
1009 | } | |
1010 | while (--p > buf) { | |
1011 | UChar c = *p; | |
1012 | *p = *buf; | |
1013 | *buf = c; | |
1014 | ++buf; | |
1015 | } | |
1016 | ||
1017 | return len; | |
1018 | } | |
1019 | ||
1020 | ||
1021 | U_NAMESPACE_END | |
1022 | ||
1023 | /* U_HAVE_RBNF */ | |
1024 | #endif | |
1025 |