]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/nfrs.cpp
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / i18n / nfrs.cpp
1 /*
2 ******************************************************************************
3 * Copyright (C) 1997-2001, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 * file name: nfrs.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * Modification history
12 * Date Name Comments
13 * 10/11/2001 Doug Ported from ICU4J
14 */
15
16 #include "nfrs.h"
17
18 #if U_HAVE_RBNF
19
20 #include "unicode/uchar.h"
21 #include "nfrule.h"
22 #include "nfrlist.h"
23
24 #ifdef RBNF_DEBUG
25 #include "cmemory.h"
26 #endif
27
28 #include "uprops.h"
29
30 U_NAMESPACE_BEGIN
31
32 #if 0
33 // euclid's algorithm works with doubles
34 // note, doubles only get us up to one quadrillion or so, which
35 // isn't as much range as we get with longs. We probably still
36 // want either 64-bit math, or BigInteger.
37
38 static int64_t
39 util_lcm(int64_t x, int64_t y)
40 {
41 x.abs();
42 y.abs();
43
44 if (x == 0 || y == 0) {
45 return 0;
46 } else {
47 do {
48 if (x < y) {
49 int64_t t = x; x = y; y = t;
50 }
51 x -= y * (x/y);
52 } while (x != 0);
53
54 return y;
55 }
56 }
57
58 #else
59 /**
60 * Calculates the least common multiple of x and y.
61 */
62 static int64_t
63 util_lcm(int64_t x, int64_t y)
64 {
65 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
66 // vol. 2, 1st ed., pp. 298-299
67 int64_t x1 = x;
68 int64_t y1 = y;
69
70 int p2 = 0;
71 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
72 ++p2;
73 x1 >>= 1;
74 y1 >>= 1;
75 }
76
77 int64_t t;
78 if ((x1 & 1) == 1) {
79 t = -y1;
80 } else {
81 t = x1;
82 }
83
84 while (t != 0) {
85 while ((t & 1) == 0) {
86 t = t >> 1;
87 }
88 if (t > 0) {
89 x1 = t;
90 } else {
91 y1 = -t;
92 }
93 t = x1 - y1;
94 }
95
96 int64_t gcd = x1 << p2;
97
98 // x * y == gcd(x, y) * lcm(x, y)
99 return x / gcd * y;
100 }
101 #endif
102
103 static const UChar gPercent = 0x0025;
104 static const UChar gColon = 0x003a;
105 static const UChar gSemicolon = 0x003b;
106 static const UChar gLineFeed = 0x000a;
107
108 static const UChar gFourSpaces[] =
109 {
110 0x20, 0x20, 0x20, 0x20, 0
111 }; /* " " */
112 static const UChar gPercentPercent[] =
113 {
114 0x25, 0x25, 0
115 }; /* "%%" */
116
117 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
118 : name()
119 , rules(0)
120 , negativeNumberRule(NULL)
121 , fIsFractionRuleSet(FALSE)
122 , fIsPublic(FALSE)
123 {
124 for (int i = 0; i < 3; ++i) {
125 fractionRules[i] = NULL;
126 }
127
128 if (U_FAILURE(status)) {
129 return;
130 }
131
132 UnicodeString& description = descriptions[index]; // !!! make sure index is valid
133
134 // if the description begins with a rule set name (the rule set
135 // name can be omitted in formatter descriptions that consist
136 // of only one rule set), copy it out into our "name" member
137 // and delete it from the description
138 if (description.charAt(0) == gPercent) {
139 int32_t pos = description.indexOf(gColon);
140 if (pos == -1) {
141 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
142 status = U_PARSE_ERROR;
143 } else {
144 name.setTo(description, 0, pos);
145 while (pos < description.length() && uprv_isRuleWhiteSpace(description.charAt(++pos))) {
146 }
147 description.remove(0, pos);
148 }
149 } else {
150 name.setTo("%default");
151 }
152
153 if (description.length() == 0) {
154 // throw new IllegalArgumentException("Empty rule set description");
155 status = U_PARSE_ERROR;
156 }
157
158 fIsPublic = name.indexOf(gPercentPercent) != 0;
159
160 // all of the other members of NFRuleSet are initialized
161 // by parseRules()
162 }
163
164 void
165 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
166 {
167 // start by creating a Vector whose elements are Strings containing
168 // the descriptions of the rules (one rule per element). The rules
169 // are separated by semicolons (there's no escape facility: ALL
170 // semicolons are rule delimiters)
171
172 if (U_FAILURE(status)) {
173 return;
174 }
175
176 // dlf - the original code kept a separate description array for no reason,
177 // so I got rid of it. The loop was too complex so I simplified it.
178
179 UnicodeString currentDescription;
180 int32_t oldP = 0;
181 while (oldP < description.length()) {
182 int32_t p = description.indexOf(gSemicolon, oldP);
183 if (p == -1) {
184 p = description.length();
185 }
186 currentDescription.setTo(description, oldP, p - oldP);
187 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
188 oldP = p + 1;
189 }
190
191 // for rules that didn't specify a base value, their base values
192 // were initialized to 0. Make another pass through the list and
193 // set all those rules' base values. We also remove any special
194 // rules from the list and put them into their own member variables
195 int64_t defaultBaseValue = 0;
196
197 // (this isn't a for loop because we might be deleting items from
198 // the vector-- we want to make sure we only increment i when
199 // we _didn't_ delete aything from the vector)
200 uint32_t i = 0;
201 while (i < rules.size()) {
202 NFRule* rule = rules[i];
203
204 switch (rule->getType()) {
205 // if the rule's base value is 0, fill in a default
206 // base value (this will be 1 plus the preceding
207 // rule's base value for regular rule sets, and the
208 // same as the preceding rule's base value in fraction
209 // rule sets)
210 case NFRule::kNoBase:
211 rule->setBaseValue(defaultBaseValue);
212 if (!isFractionRuleSet()) {
213 ++defaultBaseValue;
214 }
215 ++i;
216 break;
217
218 // if it's the negative-number rule, copy it into its own
219 // data member and delete it from the list
220 case NFRule::kNegativeNumberRule:
221 negativeNumberRule = rules.remove(i);
222 break;
223
224 // if it's the improper fraction rule, copy it into the
225 // correct element of fractionRules
226 case NFRule::kImproperFractionRule:
227 fractionRules[0] = rules.remove(i);
228 break;
229
230 // if it's the proper fraction rule, copy it into the
231 // correct element of fractionRules
232 case NFRule::kProperFractionRule:
233 fractionRules[1] = rules.remove(i);
234 break;
235
236 // if it's the master rule, copy it into the
237 // correct element of fractionRules
238 case NFRule::kMasterRule:
239 fractionRules[2] = rules.remove(i);
240 break;
241
242 // if it's a regular rule that already knows its base value,
243 // check to make sure the rules are in order, and update
244 // the default base value for the next rule
245 default:
246 if (rule->getBaseValue() < defaultBaseValue) {
247 // throw new IllegalArgumentException("Rules are not in order");
248 status = U_PARSE_ERROR;
249 return;
250 }
251 defaultBaseValue = rule->getBaseValue();
252 if (!isFractionRuleSet()) {
253 ++defaultBaseValue;
254 }
255 ++i;
256 break;
257 }
258 }
259 }
260
261 NFRuleSet::~NFRuleSet()
262 {
263 delete negativeNumberRule;
264 delete fractionRules[0];
265 delete fractionRules[1];
266 delete fractionRules[2];
267 }
268
269 UBool
270 util_equalRules(const NFRule* rule1, const NFRule* rule2)
271 {
272 if (rule1) {
273 if (rule2) {
274 return *rule1 == *rule2;
275 }
276 } else if (!rule2) {
277 return TRUE;
278 }
279 return FALSE;
280 }
281
282 UBool
283 NFRuleSet::operator==(const NFRuleSet& rhs) const
284 {
285 if (rules.size() == rhs.rules.size() &&
286 fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
287 name == rhs.name &&
288 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
289 util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
290 util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
291 util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
292
293 for (uint32_t i = 0; i < rules.size(); ++i) {
294 if (*rules[i] != *rhs.rules[i]) {
295 return FALSE;
296 }
297 }
298 return TRUE;
299 }
300 return FALSE;
301 }
302
303 void
304 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const
305 {
306 NFRule *rule = findNormalRule(number);
307 rule->doFormat(number, toAppendTo, pos);
308 }
309
310 void
311 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
312 {
313 NFRule *rule = findDoubleRule(number);
314 rule->doFormat(number, toAppendTo, pos);
315 }
316
317 NFRule*
318 NFRuleSet::findDoubleRule(double number) const
319 {
320 // if this is a fraction rule set, use findFractionRuleSetRule()
321 if (isFractionRuleSet()) {
322 return findFractionRuleSetRule(number);
323 }
324
325 // if the number is negative, return the negative number rule
326 // (if there isn't a negative-number rule, we pretend it's a
327 // positive number)
328 if (number < 0) {
329 if (negativeNumberRule) {
330 return negativeNumberRule;
331 } else {
332 number = -number;
333 }
334 }
335
336 // if the number isn't an integer, we use one of the fraction rules...
337 if (number != uprv_floor(number)) {
338 // if the number is between 0 and 1, return the proper
339 // fraction rule
340 if (number < 1 && fractionRules[1]) {
341 return fractionRules[1];
342 }
343 // otherwise, return the improper fraction rule
344 else if (fractionRules[0]) {
345 return fractionRules[0];
346 }
347 }
348
349 // if there's a master rule, use it to format the number
350 if (fractionRules[2]) {
351 return fractionRules[2];
352 }
353
354 // and if we haven't yet returned a rule, use findNormalRule()
355 // to find the applicable rule
356 int64_t r = util64_fromDouble(number + 0.5);
357 return findNormalRule(r);
358 }
359
360 NFRule *
361 NFRuleSet::findNormalRule(int64_t number) const
362 {
363 // if this is a fraction rule set, use findFractionRuleSetRule()
364 // to find the rule (we should only go into this clause if the
365 // value is 0)
366 if (fIsFractionRuleSet) {
367 return findFractionRuleSetRule((double)number);
368 }
369
370 // if the number is negative, return the negative-number rule
371 // (if there isn't one, pretend the number is positive)
372 if (number < 0) {
373 if (negativeNumberRule) {
374 return negativeNumberRule;
375 } else {
376 number = -number;
377 }
378 }
379
380 // we have to repeat the preceding two checks, even though we
381 // do them in findRule(), because the version of format() that
382 // takes a long bypasses findRule() and goes straight to this
383 // function. This function does skip the fraction rules since
384 // we know the value is an integer (it also skips the master
385 // rule, since it's considered a fraction rule. Skipping the
386 // master rule in this function is also how we avoid infinite
387 // recursion)
388
389 // {dlf} unfortunately this fails if there are no rules except
390 // special rules. If there are no rules, use the master rule.
391
392 // binary-search the rule list for the applicable rule
393 // (a rule is used for all values from its base value to
394 // the next rule's base value)
395 int32_t hi = rules.size();
396 if (hi > 0) {
397 int32_t lo = 0;
398
399 while (lo < hi) {
400 int32_t mid = (lo + hi) / 2;
401 if (rules[mid]->getBaseValue() == number) {
402 return rules[mid];
403 }
404 else if (rules[mid]->getBaseValue() > number) {
405 hi = mid;
406 }
407 else {
408 lo = mid + 1;
409 }
410 }
411 NFRule *result = rules[hi - 1];
412
413 // use shouldRollBack() to see whether we need to invoke the
414 // rollback rule (see shouldRollBack()'s documentation for
415 // an explanation of the rollback rule). If we do, roll back
416 // one rule and return that one instead of the one we'd normally
417 // return
418 if (result->shouldRollBack((double)number)) {
419 result = rules[hi - 2];
420 }
421 return result;
422 }
423 // else use the master rule
424 return fractionRules[2];
425 }
426
427 /**
428 * If this rule is a fraction rule set, this function is used by
429 * findRule() to select the most appropriate rule for formatting
430 * the number. Basically, the base value of each rule in the rule
431 * set is treated as the denominator of a fraction. Whichever
432 * denominator can produce the fraction closest in value to the
433 * number passed in is the result. If there's a tie, the earlier
434 * one in the list wins. (If there are two rules in a row with the
435 * same base value, the first one is used when the numerator of the
436 * fraction would be 1, and the second rule is used the rest of the
437 * time.
438 * @param number The number being formatted (which will always be
439 * a number between 0 and 1)
440 * @return The rule to use to format this number
441 */
442 NFRule*
443 NFRuleSet::findFractionRuleSetRule(double number) const
444 {
445 // the obvious way to do this (multiply the value being formatted
446 // by each rule's base value until you get an integral result)
447 // doesn't work because of rounding error. This method is more
448 // accurate
449
450 // find the least common multiple of the rules' base values
451 // and multiply this by the number being formatted. This is
452 // all the precision we need, and we can do all of the rest
453 // of the math using integer arithmetic
454 int64_t leastCommonMultiple = rules[0]->getBaseValue();
455 int64_t numerator;
456 {
457 for (uint32_t i = 1; i < rules.size(); ++i) {
458 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
459 }
460 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
461 }
462 // for each rule, do the following...
463 int64_t tempDifference;
464 int64_t difference = util64_fromDouble(uprv_maxMantissa());
465 int32_t winner = 0;
466 for (uint32_t i = 0; i < rules.size(); ++i) {
467 // "numerator" is the numerator of the fraction if the
468 // denominator is the LCD. The numerator if the rule's
469 // base value is the denominator is "numerator" times the
470 // base value divided bythe LCD. Here we check to see if
471 // that's an integer, and if not, how close it is to being
472 // an integer.
473 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
474
475
476 // normalize the result of the above calculation: we want
477 // the numerator's distance from the CLOSEST multiple
478 // of the LCD
479 if (leastCommonMultiple - tempDifference < tempDifference) {
480 tempDifference = leastCommonMultiple - tempDifference;
481 }
482
483 // if this is as close as we've come, keep track of how close
484 // that is, and the line number of the rule that did it. If
485 // we've scored a direct hit, we don't have to look at any more
486 // rules
487 if (tempDifference < difference) {
488 difference = tempDifference;
489 winner = i;
490 if (difference == 0) {
491 break;
492 }
493 }
494 }
495
496 // if we have two successive rules that both have the winning base
497 // value, then the first one (the one we found above) is used if
498 // the numerator of the fraction is 1 and the second one is used if
499 // the numerator of the fraction is anything else (this lets us
500 // do things like "one third"/"two thirds" without haveing to define
501 // a whole bunch of extra rule sets)
502 if ((unsigned)(winner + 1) < rules.size() &&
503 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
504 double n = ((double)rules[winner]->getBaseValue()) * number;
505 if (n < 0.5 || n >= 2) {
506 ++winner;
507 }
508 }
509
510 // finally, return the winning rule
511 return rules[winner];
512 }
513
514 /**
515 * Parses a string. Matches the string to be parsed against each
516 * of its rules (with a base value less than upperBound) and returns
517 * the value produced by the rule that matched the most charcters
518 * in the source string.
519 * @param text The string to parse
520 * @param parsePosition The initial position is ignored and assumed
521 * to be 0. On exit, this object has been updated to point to the
522 * first character position this rule set didn't consume.
523 * @param upperBound Limits the rules that can be allowed to match.
524 * Only rules whose base values are strictly less than upperBound
525 * are considered.
526 * @return The numerical result of parsing this string. This will
527 * be the matching rule's base value, composed appropriately with
528 * the results of matching any of its substitutions. The object
529 * will be an instance of Long if it's an integral value; otherwise,
530 * it will be an instance of Double. This function always returns
531 * a valid object: If nothing matched the input string at all,
532 * this function returns new Long(0), and the parse position is
533 * left unchanged.
534 */
535 #ifdef RBNF_DEBUG
536 #include <stdio.h>
537
538 static void dumpUS(FILE* f, const UnicodeString& us) {
539 int len = us.length();
540 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
541 us.extract(0, len, buf);
542 buf[len] = 0;
543 fprintf(f, "%s", buf);
544 uprv_free(buf); //delete[] buf;
545 }
546 #endif
547
548 UBool
549 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
550 {
551 // try matching each rule in the rule set against the text being
552 // parsed. Whichever one matches the most characters is the one
553 // that determines the value we return.
554
555 result.setLong(0);
556
557 // dump out if there's no text to parse
558 if (text.length() == 0) {
559 return 0;
560 }
561
562 ParsePosition highWaterMark;
563 ParsePosition workingPos = pos;
564
565 #ifdef RBNF_DEBUG
566 fprintf(stderr, "<nfrs> %x '", this);
567 dumpUS(stderr, name);
568 fprintf(stderr, "' text '");
569 dumpUS(stderr, text);
570 fprintf(stderr, "'\n");
571 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
572 #endif
573
574 // start by trying the negative number rule (if there is one)
575 if (negativeNumberRule) {
576 Formattable tempResult;
577 #ifdef RBNF_DEBUG
578 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
579 #endif
580 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
581 #ifdef RBNF_DEBUG
582 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
583 #endif
584 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
585 result = tempResult;
586 highWaterMark = workingPos;
587 }
588 workingPos = pos;
589 }
590 #ifdef RBNF_DEBUG
591 fprintf(stderr, "<nfrs> continue fractional with text '");
592 dumpUS(stderr, text);
593 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
594 #endif
595 // then try each of the fraction rules
596 {
597 for (int i = 0; i < 3; i++) {
598 if (fractionRules[i]) {
599 Formattable tempResult;
600 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
601 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
602 result = tempResult;
603 highWaterMark = workingPos;
604 }
605 workingPos = pos;
606 }
607 }
608 }
609 #ifdef RBNF_DEBUG
610 fprintf(stderr, "<nfrs> continue other with text '");
611 dumpUS(stderr, text);
612 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
613 #endif
614
615 // finally, go through the regular rules one at a time. We start
616 // at the end of the list because we want to try matching the most
617 // sigificant rule first (this helps ensure that we parse
618 // "five thousand three hundred six" as
619 // "(five thousand) (three hundred) (six)" rather than
620 // "((five thousand three) hundred) (six)"). Skip rules whose
621 // base values are higher than the upper bound (again, this helps
622 // limit ambiguity by making sure the rules that match a rule's
623 // are less significant than the rule containing the substitutions)/
624 {
625 int64_t ub = util64_fromDouble(upperBound);
626 #ifdef RBNF_DEBUG
627 {
628 char ubstr[64];
629 util64_toa(ub, ubstr, 64);
630 char ubstrhex[64];
631 util64_toa(ub, ubstrhex, 64, 16);
632 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
633 }
634 #endif
635 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
636 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
637 continue;
638 }
639 Formattable tempResult;
640 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
641 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
642 result = tempResult;
643 highWaterMark = workingPos;
644 }
645 workingPos = pos;
646 }
647 }
648 #ifdef RBNF_DEBUG
649 fprintf(stderr, "<nfrs> exit\n");
650 #endif
651 // finally, update the parse postion we were passed to point to the
652 // first character we didn't use, and return the result that
653 // corresponds to that string of characters
654 pos = highWaterMark;
655
656 return 1;
657 }
658
659 void
660 NFRuleSet::appendRules(UnicodeString& result) const
661 {
662 // the rule set name goes first...
663 result.append(name);
664 result.append(gColon);
665 result.append(gLineFeed);
666
667 // followed by the regular rules...
668 for (uint32_t i = 0; i < rules.size(); i++) {
669 result.append(gFourSpaces);
670 rules[i]->appendRuleText(result);
671 result.append(gLineFeed);
672 }
673
674 // followed by the special rules (if they exist)
675 if (negativeNumberRule) {
676 result.append(gFourSpaces);
677 negativeNumberRule->appendRuleText(result);
678 result.append(gLineFeed);
679 }
680
681 {
682 for (uint32_t i = 0; i < 3; ++i) {
683 if (fractionRules[i]) {
684 result.append(gFourSpaces);
685 fractionRules[i]->appendRuleText(result);
686 result.append(gLineFeed);
687 }
688 }
689 }
690 }
691
692 // utility functions
693
694 int64_t util64_fromDouble(double d) {
695 int64_t result = 0;
696 if (!uprv_isNaN(d)) {
697 double mant = uprv_maxMantissa();
698 if (d < -mant) {
699 d = -mant;
700 } else if (d > mant) {
701 d = mant;
702 }
703 UBool neg = d < 0;
704 if (neg) {
705 d = -d;
706 }
707 result = (int64_t)uprv_floor(d);
708 if (neg) {
709 result = -result;
710 }
711 }
712 return result;
713 }
714
715 int64_t util64_pow(int32_t r, uint32_t e) {
716 if (r == 0) {
717 return 0;
718 } else if (e == 0) {
719 return 1;
720 } else {
721 int64_t n = r;
722 while (--e > 0) {
723 n *= r;
724 }
725 return n;
726 }
727 }
728
729 static const uint8_t asciiDigits[] = {
730 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
731 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
732 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
733 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
734 0x77u, 0x78u, 0x79u, 0x7au,
735 };
736
737 static const UChar kUMinus = (UChar)0x002d;
738
739 static const char kMinus = '-';
740
741 static const uint8_t digitInfo[] = {
742 0, 0, 0, 0, 0, 0, 0, 0,
743 0, 0, 0, 0, 0, 0, 0, 0,
744 0, 0, 0, 0, 0, 0, 0, 0,
745 0, 0, 0, 0, 0, 0, 0, 0,
746 0, 0, 0, 0, 0, 0, 0, 0,
747 0, 0, 0, 0, 0, 0, 0, 0,
748 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
749 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
750 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
751 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
752 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
753 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
754 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
755 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
756 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
757 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
758 };
759
760 #ifdef RBNF_DEBUG
761 int64_t util64_atoi(const char* str, uint32_t radix)
762 {
763 if (radix > 36) {
764 radix = 36;
765 } else if (radix < 2) {
766 radix = 2;
767 }
768 int64_t lradix = radix;
769
770 int neg = 0;
771 if (*str == kMinus) {
772 ++str;
773 neg = 1;
774 }
775 int64_t result = 0;
776 uint8_t b;
777 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
778 result *= lradix;
779 result += (int32_t)b;
780 }
781 if (neg) {
782 result = -result;
783 }
784 return result;
785 }
786 #endif
787
788 int64_t util64_utoi(const UChar* str, uint32_t radix)
789 {
790 if (radix > 36) {
791 radix = 36;
792 } else if (radix < 2) {
793 radix = 2;
794 }
795 int64_t lradix = radix;
796
797 int neg = 0;
798 if (*str == kUMinus) {
799 ++str;
800 neg = 1;
801 }
802 int64_t result = 0;
803 UChar c;
804 uint8_t b;
805 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
806 result *= lradix;
807 result += (int32_t)b;
808 }
809 if (neg) {
810 result = -result;
811 }
812 return result;
813 }
814
815 #ifdef RBNF_DEBUG
816 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
817 {
818 if (radix > 36) {
819 radix = 36;
820 } else if (radix < 2) {
821 radix = 2;
822 }
823 int64_t base = radix;
824
825 char* p = buf;
826 if (len && (w < 0) && (radix == 10) && !raw) {
827 w = -w;
828 *p++ = kMinus;
829 --len;
830 } else if (len && (w == 0)) {
831 *p++ = (char)raw ? 0 : asciiDigits[0];
832 --len;
833 }
834
835 while (len && w != 0) {
836 int64_t n = w / base;
837 int64_t m = n * base;
838 int32_t d = (int32_t)(w-m);
839 *p++ = raw ? (char)d : asciiDigits[d];
840 w = n;
841 --len;
842 }
843 if (len) {
844 *p = 0; // null terminate if room for caller convenience
845 }
846
847 len = p - buf;
848 if (*buf == kMinus) {
849 ++buf;
850 }
851 while (--p > buf) {
852 char c = *p;
853 *p = *buf;
854 *buf = c;
855 ++buf;
856 }
857
858 return len;
859 }
860 #endif
861
862 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
863 {
864 if (radix > 36) {
865 radix = 36;
866 } else if (radix < 2) {
867 radix = 2;
868 }
869 int64_t base = radix;
870
871 UChar* p = buf;
872 if (len && (w < 0) && (radix == 10) && !raw) {
873 w = -w;
874 *p++ = kUMinus;
875 --len;
876 } else if (len && (w == 0)) {
877 *p++ = (UChar)raw ? 0 : asciiDigits[0];
878 --len;
879 }
880
881 while (len && (w != 0)) {
882 int64_t n = w / base;
883 int64_t m = n * base;
884 int32_t d = (int32_t)(w-m);
885 *p++ = (UChar)(raw ? d : asciiDigits[d]);
886 w = n;
887 --len;
888 }
889 if (len) {
890 *p = 0; // null terminate if room for caller convenience
891 }
892
893 len = (uint32_t)(p - buf);
894 if (*buf == kUMinus) {
895 ++buf;
896 }
897 while (--p > buf) {
898 UChar c = *p;
899 *p = *buf;
900 *buf = c;
901 ++buf;
902 }
903
904 return len;
905 }
906
907
908 U_NAMESPACE_END
909
910 /* U_HAVE_RBNF */
911 #endif
912