]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/rbnf.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / rbnf.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2015, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 *******************************************************************************
8 */
9
10 #include "unicode/utypes.h"
11 #include "utypeinfo.h" // for 'typeid' to work
12
13 #include "unicode/rbnf.h"
14
15 #if U_HAVE_RBNF
16
17 #include "unicode/normlzr.h"
18 #include "unicode/plurfmt.h"
19 #include "unicode/tblcoll.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ucol.h"
22 #include "unicode/uloc.h"
23 #include "unicode/unum.h"
24 #include "unicode/ures.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utf16.h"
27 #include "unicode/udata.h"
28 #include "unicode/udisplaycontext.h"
29 #include "unicode/brkiter.h"
30 #include "unicode/ucasemap.h"
31
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "patternprops.h"
35 #include "uresimp.h"
36 #include "nfrs.h"
37 #include "number_decimalquantity.h"
38
39 // debugging
40 // #define RBNF_DEBUG
41
42 #ifdef RBNF_DEBUG
43 #include <stdio.h>
44 #endif
45
46 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
47
48 static const UChar gPercentPercent[] =
49 {
50 0x25, 0x25, 0
51 }; /* "%%" */
52
53 // All urbnf objects are created through openRules, so we init all of the
54 // Unicode string constants required by rbnf, nfrs, or nfr here.
55 static const UChar gLenientParse[] =
56 {
57 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
58 }; /* "%%lenient-parse:" */
59 static const UChar gSemiColon = 0x003B;
60 static const UChar gSemiPercent[] =
61 {
62 0x3B, 0x25, 0
63 }; /* ";%" */
64
65 #define kSomeNumberOfBitsDiv2 22
66 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
67 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
68
69 U_NAMESPACE_BEGIN
70
71 using number::impl::DecimalQuantity;
72
73 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
74
75 /*
76 This is a utility class. It does not use ICU's RTTI.
77 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
78 Please make sure that intltest passes on Windows in Release mode,
79 since the string pooling per compilation unit will mess up how RTTI works.
80 The RTTI code was also removed due to lack of code coverage.
81 */
82 class LocalizationInfo : public UMemory {
83 protected:
84 virtual ~LocalizationInfo();
85 uint32_t refcount;
86
87 public:
88 LocalizationInfo() : refcount(0) {}
89
90 LocalizationInfo* ref(void) {
91 ++refcount;
92 return this;
93 }
94
95 LocalizationInfo* unref(void) {
96 if (refcount && --refcount == 0) {
97 delete this;
98 }
99 return NULL;
100 }
101
102 virtual UBool operator==(const LocalizationInfo* rhs) const;
103 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
104
105 virtual int32_t getNumberOfRuleSets(void) const = 0;
106 virtual const UChar* getRuleSetName(int32_t index) const = 0;
107 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
108 virtual const UChar* getLocaleName(int32_t index) const = 0;
109 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
110
111 virtual int32_t indexForLocale(const UChar* locale) const;
112 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
113
114 // virtual UClassID getDynamicClassID() const = 0;
115 // static UClassID getStaticClassID(void);
116 };
117
118 LocalizationInfo::~LocalizationInfo() {}
119
120 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
121
122 // if both strings are NULL, this returns TRUE
123 static UBool
124 streq(const UChar* lhs, const UChar* rhs) {
125 if (rhs == lhs) {
126 return TRUE;
127 }
128 if (lhs && rhs) {
129 return u_strcmp(lhs, rhs) == 0;
130 }
131 return FALSE;
132 }
133
134 UBool
135 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
136 if (rhs) {
137 if (this == rhs) {
138 return TRUE;
139 }
140
141 int32_t rsc = getNumberOfRuleSets();
142 if (rsc == rhs->getNumberOfRuleSets()) {
143 for (int i = 0; i < rsc; ++i) {
144 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
145 return FALSE;
146 }
147 }
148 int32_t dlc = getNumberOfDisplayLocales();
149 if (dlc == rhs->getNumberOfDisplayLocales()) {
150 for (int i = 0; i < dlc; ++i) {
151 const UChar* locale = getLocaleName(i);
152 int32_t ix = rhs->indexForLocale(locale);
153 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
154 if (!streq(locale, rhs->getLocaleName(ix))) {
155 return FALSE;
156 }
157 for (int j = 0; j < rsc; ++j) {
158 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
159 return FALSE;
160 }
161 }
162 }
163 return TRUE;
164 }
165 }
166 }
167 return FALSE;
168 }
169
170 int32_t
171 LocalizationInfo::indexForLocale(const UChar* locale) const {
172 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
173 if (streq(locale, getLocaleName(i))) {
174 return i;
175 }
176 }
177 return -1;
178 }
179
180 int32_t
181 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
182 if (ruleset) {
183 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
184 if (streq(ruleset, getRuleSetName(i))) {
185 return i;
186 }
187 }
188 }
189 return -1;
190 }
191
192
193 typedef void (*Fn_Deleter)(void*);
194
195 class VArray {
196 void** buf;
197 int32_t cap;
198 int32_t size;
199 Fn_Deleter deleter;
200 public:
201 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
202
203 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
204
205 ~VArray() {
206 if (deleter) {
207 for (int i = 0; i < size; ++i) {
208 (*deleter)(buf[i]);
209 }
210 }
211 uprv_free(buf);
212 }
213
214 int32_t length() {
215 return size;
216 }
217
218 void add(void* elem, UErrorCode& status) {
219 if (U_SUCCESS(status)) {
220 if (size == cap) {
221 if (cap == 0) {
222 cap = 1;
223 } else if (cap < 256) {
224 cap *= 2;
225 } else {
226 cap += 256;
227 }
228 if (buf == NULL) {
229 buf = (void**)uprv_malloc(cap * sizeof(void*));
230 } else {
231 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
232 }
233 if (buf == NULL) {
234 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
235 status = U_MEMORY_ALLOCATION_ERROR;
236 return;
237 }
238 void* start = &buf[size];
239 size_t count = (cap - size) * sizeof(void*);
240 uprv_memset(start, 0, count); // fill with nulls, just because
241 }
242 buf[size++] = elem;
243 }
244 }
245
246 void** release(void) {
247 void** result = buf;
248 buf = NULL;
249 cap = 0;
250 size = 0;
251 return result;
252 }
253 };
254
255 class LocDataParser;
256
257 class StringLocalizationInfo : public LocalizationInfo {
258 UChar* info;
259 UChar*** data;
260 int32_t numRuleSets;
261 int32_t numLocales;
262
263 friend class LocDataParser;
264
265 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
266 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
267 {
268 }
269
270 public:
271 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
272
273 virtual ~StringLocalizationInfo();
274 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
275 virtual const UChar* getRuleSetName(int32_t index) const;
276 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
277 virtual const UChar* getLocaleName(int32_t index) const;
278 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
279
280 // virtual UClassID getDynamicClassID() const;
281 // static UClassID getStaticClassID(void);
282
283 private:
284 void init(UErrorCode& status) const;
285 };
286
287
288 enum {
289 OPEN_ANGLE = 0x003c, /* '<' */
290 CLOSE_ANGLE = 0x003e, /* '>' */
291 COMMA = 0x002c,
292 TICK = 0x0027,
293 QUOTE = 0x0022,
294 SPACE = 0x0020
295 };
296
297 /**
298 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
299 */
300 class LocDataParser {
301 UChar* data;
302 const UChar* e;
303 UChar* p;
304 UChar ch;
305 UParseError& pe;
306 UErrorCode& ec;
307
308 public:
309 LocDataParser(UParseError& parseError, UErrorCode& status)
310 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
311 ~LocDataParser() {}
312
313 /*
314 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
315 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
316 */
317 StringLocalizationInfo* parse(UChar* data, int32_t len);
318
319 private:
320
321 inline void inc(void) {
322 ++p;
323 ch = 0xffff;
324 }
325 inline UBool checkInc(UChar c) {
326 if (p < e && (ch == c || *p == c)) {
327 inc();
328 return TRUE;
329 }
330 return FALSE;
331 }
332 inline UBool check(UChar c) {
333 return p < e && (ch == c || *p == c);
334 }
335 inline void skipWhitespace(void) {
336 while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) {
337 inc();
338 }
339 }
340 inline UBool inList(UChar c, const UChar* list) const {
341 if (*list == SPACE && PatternProps::isWhiteSpace(c)) {
342 return TRUE;
343 }
344 while (*list && *list != c) {
345 ++list;
346 }
347 return *list == c;
348 }
349 void parseError(const char* msg);
350
351 StringLocalizationInfo* doParse(void);
352
353 UChar** nextArray(int32_t& requiredLength);
354 UChar* nextString(void);
355 };
356
357 #ifdef RBNF_DEBUG
358 #define ERROR(msg) parseError(msg); return NULL;
359 #define EXPLANATION_ARG explanationArg
360 #else
361 #define ERROR(msg) parseError(NULL); return NULL;
362 #define EXPLANATION_ARG
363 #endif
364
365
366 static const UChar DQUOTE_STOPLIST[] = {
367 QUOTE, 0
368 };
369
370 static const UChar SQUOTE_STOPLIST[] = {
371 TICK, 0
372 };
373
374 static const UChar NOQUOTE_STOPLIST[] = {
375 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
376 };
377
378 static void
379 DeleteFn(void* p) {
380 uprv_free(p);
381 }
382
383 StringLocalizationInfo*
384 LocDataParser::parse(UChar* _data, int32_t len) {
385 if (U_FAILURE(ec)) {
386 if (_data) uprv_free(_data);
387 return NULL;
388 }
389
390 pe.line = 0;
391 pe.offset = -1;
392 pe.postContext[0] = 0;
393 pe.preContext[0] = 0;
394
395 if (_data == NULL) {
396 ec = U_ILLEGAL_ARGUMENT_ERROR;
397 return NULL;
398 }
399
400 if (len <= 0) {
401 ec = U_ILLEGAL_ARGUMENT_ERROR;
402 uprv_free(_data);
403 return NULL;
404 }
405
406 data = _data;
407 e = data + len;
408 p = _data;
409 ch = 0xffff;
410
411 return doParse();
412 }
413
414
415 StringLocalizationInfo*
416 LocDataParser::doParse(void) {
417 skipWhitespace();
418 if (!checkInc(OPEN_ANGLE)) {
419 ERROR("Missing open angle");
420 } else {
421 VArray array(DeleteFn);
422 UBool mightHaveNext = TRUE;
423 int32_t requiredLength = -1;
424 while (mightHaveNext) {
425 mightHaveNext = FALSE;
426 UChar** elem = nextArray(requiredLength);
427 skipWhitespace();
428 UBool haveComma = check(COMMA);
429 if (elem) {
430 array.add(elem, ec);
431 if (haveComma) {
432 inc();
433 mightHaveNext = TRUE;
434 }
435 } else if (haveComma) {
436 ERROR("Unexpected character");
437 }
438 }
439
440 skipWhitespace();
441 if (!checkInc(CLOSE_ANGLE)) {
442 if (check(OPEN_ANGLE)) {
443 ERROR("Missing comma in outer array");
444 } else {
445 ERROR("Missing close angle bracket in outer array");
446 }
447 }
448
449 skipWhitespace();
450 if (p != e) {
451 ERROR("Extra text after close of localization data");
452 }
453
454 array.add(NULL, ec);
455 if (U_SUCCESS(ec)) {
456 int32_t numLocs = array.length() - 2; // subtract first, NULL
457 UChar*** result = (UChar***)array.release();
458
459 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
460 }
461 }
462
463 ERROR("Unknown error");
464 }
465
466 UChar**
467 LocDataParser::nextArray(int32_t& requiredLength) {
468 if (U_FAILURE(ec)) {
469 return NULL;
470 }
471
472 skipWhitespace();
473 if (!checkInc(OPEN_ANGLE)) {
474 ERROR("Missing open angle");
475 }
476
477 VArray array;
478 UBool mightHaveNext = TRUE;
479 while (mightHaveNext) {
480 mightHaveNext = FALSE;
481 UChar* elem = nextString();
482 skipWhitespace();
483 UBool haveComma = check(COMMA);
484 if (elem) {
485 array.add(elem, ec);
486 if (haveComma) {
487 inc();
488 mightHaveNext = TRUE;
489 }
490 } else if (haveComma) {
491 ERROR("Unexpected comma");
492 }
493 }
494 skipWhitespace();
495 if (!checkInc(CLOSE_ANGLE)) {
496 if (check(OPEN_ANGLE)) {
497 ERROR("Missing close angle bracket in inner array");
498 } else {
499 ERROR("Missing comma in inner array");
500 }
501 }
502
503 array.add(NULL, ec);
504 if (U_SUCCESS(ec)) {
505 if (requiredLength == -1) {
506 requiredLength = array.length() + 1;
507 } else if (array.length() != requiredLength) {
508 ec = U_ILLEGAL_ARGUMENT_ERROR;
509 ERROR("Array not of required length");
510 }
511
512 return (UChar**)array.release();
513 }
514 ERROR("Unknown Error");
515 }
516
517 UChar*
518 LocDataParser::nextString() {
519 UChar* result = NULL;
520
521 skipWhitespace();
522 if (p < e) {
523 const UChar* terminators;
524 UChar c = *p;
525 UBool haveQuote = c == QUOTE || c == TICK;
526 if (haveQuote) {
527 inc();
528 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
529 } else {
530 terminators = NOQUOTE_STOPLIST;
531 }
532 UChar* start = p;
533 while (p < e && !inList(*p, terminators)) ++p;
534 if (p == e) {
535 ERROR("Unexpected end of data");
536 }
537
538 UChar x = *p;
539 if (p > start) {
540 ch = x;
541 *p = 0x0; // terminate by writing to data
542 result = start; // just point into data
543 }
544 if (haveQuote) {
545 if (x != c) {
546 ERROR("Missing matching quote");
547 } else if (p == start) {
548 ERROR("Empty string");
549 }
550 inc();
551 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
552 ERROR("Unexpected character in string");
553 }
554 }
555
556 // ok for there to be no next string
557 return result;
558 }
559
560 void LocDataParser::parseError(const char* EXPLANATION_ARG)
561 {
562 if (!data) {
563 return;
564 }
565
566 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
567 if (start < data) {
568 start = data;
569 }
570 for (UChar* x = p; --x >= start;) {
571 if (!*x) {
572 start = x+1;
573 break;
574 }
575 }
576 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
577 if (limit > e) {
578 limit = e;
579 }
580 u_strncpy(pe.preContext, start, (int32_t)(p-start));
581 pe.preContext[p-start] = 0;
582 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
583 pe.postContext[limit-p] = 0;
584 pe.offset = (int32_t)(p - data);
585
586 #ifdef RBNF_DEBUG
587 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
588
589 UnicodeString msg;
590 msg.append(start, p - start);
591 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
592 msg.append(p, limit-p);
593 msg.append(UNICODE_STRING_SIMPLE("'"));
594
595 char buf[128];
596 int32_t len = msg.extract(0, msg.length(), buf, 128);
597 if (len >= 128) {
598 buf[127] = 0;
599 } else {
600 buf[len] = 0;
601 }
602 fprintf(stderr, "%s\n", buf);
603 fflush(stderr);
604 #endif
605
606 uprv_free(data);
607 data = NULL;
608 p = NULL;
609 e = NULL;
610
611 if (U_SUCCESS(ec)) {
612 ec = U_PARSE_ERROR;
613 }
614 }
615
616 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
617
618 StringLocalizationInfo*
619 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
620 if (U_FAILURE(status)) {
621 return NULL;
622 }
623
624 int32_t len = info.length();
625 if (len == 0) {
626 return NULL; // no error;
627 }
628
629 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
630 if (!p) {
631 status = U_MEMORY_ALLOCATION_ERROR;
632 return NULL;
633 }
634 info.extract(p, len, status);
635 if (!U_FAILURE(status)) {
636 status = U_ZERO_ERROR; // clear warning about non-termination
637 }
638
639 LocDataParser parser(perror, status);
640 return parser.parse(p, len);
641 }
642
643 StringLocalizationInfo::~StringLocalizationInfo() {
644 for (UChar*** p = (UChar***)data; *p; ++p) {
645 // remaining data is simply pointer into our unicode string data.
646 if (*p) uprv_free(*p);
647 }
648 if (data) uprv_free(data);
649 if (info) uprv_free(info);
650 }
651
652
653 const UChar*
654 StringLocalizationInfo::getRuleSetName(int32_t index) const {
655 if (index >= 0 && index < getNumberOfRuleSets()) {
656 return data[0][index];
657 }
658 return NULL;
659 }
660
661 const UChar*
662 StringLocalizationInfo::getLocaleName(int32_t index) const {
663 if (index >= 0 && index < getNumberOfDisplayLocales()) {
664 return data[index+1][0];
665 }
666 return NULL;
667 }
668
669 const UChar*
670 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
671 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
672 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
673 return data[localeIndex+1][ruleIndex+1];
674 }
675 return NULL;
676 }
677
678 // ----------
679
680 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
681 const UnicodeString& locs,
682 const Locale& alocale, UParseError& perror, UErrorCode& status)
683 : fRuleSets(NULL)
684 , ruleSetDescriptions(NULL)
685 , numRuleSets(0)
686 , defaultRuleSet(NULL)
687 , locale(alocale)
688 , collator(NULL)
689 , decimalFormatSymbols(NULL)
690 , defaultInfinityRule(NULL)
691 , defaultNaNRule(NULL)
692 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
693 , lenient(FALSE)
694 , lenientParseRules(NULL)
695 , localizations(NULL)
696 , capitalizationInfoSet(FALSE)
697 , capitalizationForUIListMenu(FALSE)
698 , capitalizationForStandAlone(FALSE)
699 , capitalizationBrkIter(NULL)
700 {
701 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
702 init(description, locinfo, perror, status);
703 }
704
705 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
706 const UnicodeString& locs,
707 UParseError& perror, UErrorCode& status)
708 : fRuleSets(NULL)
709 , ruleSetDescriptions(NULL)
710 , numRuleSets(0)
711 , defaultRuleSet(NULL)
712 , locale(Locale::getDefault())
713 , collator(NULL)
714 , decimalFormatSymbols(NULL)
715 , defaultInfinityRule(NULL)
716 , defaultNaNRule(NULL)
717 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
718 , lenient(FALSE)
719 , lenientParseRules(NULL)
720 , localizations(NULL)
721 , capitalizationInfoSet(FALSE)
722 , capitalizationForUIListMenu(FALSE)
723 , capitalizationForStandAlone(FALSE)
724 , capitalizationBrkIter(NULL)
725 {
726 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
727 init(description, locinfo, perror, status);
728 }
729
730 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
731 LocalizationInfo* info,
732 const Locale& alocale, UParseError& perror, UErrorCode& status)
733 : fRuleSets(NULL)
734 , ruleSetDescriptions(NULL)
735 , numRuleSets(0)
736 , defaultRuleSet(NULL)
737 , locale(alocale)
738 , collator(NULL)
739 , decimalFormatSymbols(NULL)
740 , defaultInfinityRule(NULL)
741 , defaultNaNRule(NULL)
742 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
743 , lenient(FALSE)
744 , lenientParseRules(NULL)
745 , localizations(NULL)
746 , capitalizationInfoSet(FALSE)
747 , capitalizationForUIListMenu(FALSE)
748 , capitalizationForStandAlone(FALSE)
749 , capitalizationBrkIter(NULL)
750 {
751 init(description, info, perror, status);
752 }
753
754 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
755 UParseError& perror,
756 UErrorCode& status)
757 : fRuleSets(NULL)
758 , ruleSetDescriptions(NULL)
759 , numRuleSets(0)
760 , defaultRuleSet(NULL)
761 , locale(Locale::getDefault())
762 , collator(NULL)
763 , decimalFormatSymbols(NULL)
764 , defaultInfinityRule(NULL)
765 , defaultNaNRule(NULL)
766 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
767 , lenient(FALSE)
768 , lenientParseRules(NULL)
769 , localizations(NULL)
770 , capitalizationInfoSet(FALSE)
771 , capitalizationForUIListMenu(FALSE)
772 , capitalizationForStandAlone(FALSE)
773 , capitalizationBrkIter(NULL)
774 {
775 init(description, NULL, perror, status);
776 }
777
778 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
779 const Locale& aLocale,
780 UParseError& perror,
781 UErrorCode& status)
782 : fRuleSets(NULL)
783 , ruleSetDescriptions(NULL)
784 , numRuleSets(0)
785 , defaultRuleSet(NULL)
786 , locale(aLocale)
787 , collator(NULL)
788 , decimalFormatSymbols(NULL)
789 , defaultInfinityRule(NULL)
790 , defaultNaNRule(NULL)
791 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
792 , lenient(FALSE)
793 , lenientParseRules(NULL)
794 , localizations(NULL)
795 , capitalizationInfoSet(FALSE)
796 , capitalizationForUIListMenu(FALSE)
797 , capitalizationForStandAlone(FALSE)
798 , capitalizationBrkIter(NULL)
799 {
800 init(description, NULL, perror, status);
801 }
802
803 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
804 : fRuleSets(NULL)
805 , ruleSetDescriptions(NULL)
806 , numRuleSets(0)
807 , defaultRuleSet(NULL)
808 , locale(alocale)
809 , collator(NULL)
810 , decimalFormatSymbols(NULL)
811 , defaultInfinityRule(NULL)
812 , defaultNaNRule(NULL)
813 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
814 , lenient(FALSE)
815 , lenientParseRules(NULL)
816 , localizations(NULL)
817 , capitalizationInfoSet(FALSE)
818 , capitalizationForUIListMenu(FALSE)
819 , capitalizationForStandAlone(FALSE)
820 , capitalizationBrkIter(NULL)
821 {
822 if (U_FAILURE(status)) {
823 return;
824 }
825
826 const char* rules_tag = "RBNFRules";
827 const char* fmt_tag = "";
828 switch (tag) {
829 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
830 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
831 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
832 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
833 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
834 }
835
836 // TODO: read localization info from resource
837 LocalizationInfo* locinfo = NULL;
838
839 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
840 if (U_SUCCESS(status)) {
841 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
842 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
843
844 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
845 if (U_FAILURE(status)) {
846 ures_close(nfrb);
847 }
848 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
849 if (U_FAILURE(status)) {
850 ures_close(rbnfRules);
851 ures_close(nfrb);
852 return;
853 }
854
855 UnicodeString desc;
856 while (ures_hasNext(ruleSets)) {
857 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
858 }
859 UParseError perror;
860
861 init(desc, locinfo, perror, status);
862
863 ures_close(ruleSets);
864 ures_close(rbnfRules);
865 }
866 ures_close(nfrb);
867 }
868
869 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
870 : NumberFormat(rhs)
871 , fRuleSets(NULL)
872 , ruleSetDescriptions(NULL)
873 , numRuleSets(0)
874 , defaultRuleSet(NULL)
875 , locale(rhs.locale)
876 , collator(NULL)
877 , decimalFormatSymbols(NULL)
878 , defaultInfinityRule(NULL)
879 , defaultNaNRule(NULL)
880 , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary)
881 , lenient(FALSE)
882 , lenientParseRules(NULL)
883 , localizations(NULL)
884 , capitalizationInfoSet(FALSE)
885 , capitalizationForUIListMenu(FALSE)
886 , capitalizationForStandAlone(FALSE)
887 , capitalizationBrkIter(NULL)
888 {
889 this->operator=(rhs);
890 }
891
892 // --------
893
894 RuleBasedNumberFormat&
895 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
896 {
897 if (this == &rhs) {
898 return *this;
899 }
900 NumberFormat::operator=(rhs);
901 UErrorCode status = U_ZERO_ERROR;
902 dispose();
903 locale = rhs.locale;
904 lenient = rhs.lenient;
905
906 UParseError perror;
907 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
908 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
909 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
910 setRoundingMode(rhs.getRoundingMode());
911
912 capitalizationInfoSet = rhs.capitalizationInfoSet;
913 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
914 capitalizationForStandAlone = rhs.capitalizationForStandAlone;
915 #if !UCONFIG_NO_BREAK_ITERATION
916 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
917 #endif
918
919 return *this;
920 }
921
922 RuleBasedNumberFormat::~RuleBasedNumberFormat()
923 {
924 dispose();
925 }
926
927 Format*
928 RuleBasedNumberFormat::clone(void) const
929 {
930 return new RuleBasedNumberFormat(*this);
931 }
932
933 UBool
934 RuleBasedNumberFormat::operator==(const Format& other) const
935 {
936 if (this == &other) {
937 return TRUE;
938 }
939
940 if (typeid(*this) == typeid(other)) {
941 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
942 // test for capitalization info equality is adequately handled
943 // by the NumberFormat test for fCapitalizationContext equality;
944 // the info here is just derived from that.
945 if (locale == rhs.locale &&
946 lenient == rhs.lenient &&
947 (localizations == NULL
948 ? rhs.localizations == NULL
949 : (rhs.localizations == NULL
950 ? FALSE
951 : *localizations == rhs.localizations))) {
952
953 NFRuleSet** p = fRuleSets;
954 NFRuleSet** q = rhs.fRuleSets;
955 if (p == NULL) {
956 return q == NULL;
957 } else if (q == NULL) {
958 return FALSE;
959 }
960 while (*p && *q && (**p == **q)) {
961 ++p;
962 ++q;
963 }
964 return *q == NULL && *p == NULL;
965 }
966 }
967
968 return FALSE;
969 }
970
971 UnicodeString
972 RuleBasedNumberFormat::getRules() const
973 {
974 UnicodeString result;
975 if (fRuleSets != NULL) {
976 for (NFRuleSet** p = fRuleSets; *p; ++p) {
977 (*p)->appendRules(result);
978 }
979 }
980 return result;
981 }
982
983 UnicodeString
984 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
985 {
986 if (localizations) {
987 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
988 return string;
989 }
990 else if (fRuleSets) {
991 UnicodeString result;
992 for (NFRuleSet** p = fRuleSets; *p; ++p) {
993 NFRuleSet* rs = *p;
994 if (rs->isPublic()) {
995 if (--index == -1) {
996 rs->getName(result);
997 return result;
998 }
999 }
1000 }
1001 }
1002 UnicodeString empty;
1003 return empty;
1004 }
1005
1006 int32_t
1007 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
1008 {
1009 int32_t result = 0;
1010 if (localizations) {
1011 result = localizations->getNumberOfRuleSets();
1012 }
1013 else if (fRuleSets) {
1014 for (NFRuleSet** p = fRuleSets; *p; ++p) {
1015 if ((**p).isPublic()) {
1016 ++result;
1017 }
1018 }
1019 }
1020 return result;
1021 }
1022
1023 int32_t
1024 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
1025 if (localizations) {
1026 return localizations->getNumberOfDisplayLocales();
1027 }
1028 return 0;
1029 }
1030
1031 Locale
1032 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
1033 if (U_FAILURE(status)) {
1034 return Locale("");
1035 }
1036 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
1037 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
1038 char buffer[64];
1039 int32_t cap = name.length() + 1;
1040 char* bp = buffer;
1041 if (cap > 64) {
1042 bp = (char *)uprv_malloc(cap);
1043 if (bp == NULL) {
1044 status = U_MEMORY_ALLOCATION_ERROR;
1045 return Locale("");
1046 }
1047 }
1048 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
1049 Locale retLocale(bp);
1050 if (bp != buffer) {
1051 uprv_free(bp);
1052 }
1053 return retLocale;
1054 }
1055 status = U_ILLEGAL_ARGUMENT_ERROR;
1056 Locale retLocale;
1057 return retLocale;
1058 }
1059
1060 UnicodeString
1061 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1062 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1063 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1064 int32_t len = localeName.length();
1065 UChar* localeStr = localeName.getBuffer(len + 1);
1066 while (len >= 0) {
1067 localeStr[len] = 0;
1068 int32_t ix = localizations->indexForLocale(localeStr);
1069 if (ix >= 0) {
1070 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1071 return name;
1072 }
1073
1074 // trim trailing portion, skipping over ommitted sections
1075 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1076 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1077 }
1078 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1079 return name;
1080 }
1081 UnicodeString bogus;
1082 bogus.setToBogus();
1083 return bogus;
1084 }
1085
1086 UnicodeString
1087 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1088 if (localizations) {
1089 UnicodeString rsn(ruleSetName);
1090 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1091 return getRuleSetDisplayName(ix, localeParam);
1092 }
1093 UnicodeString bogus;
1094 bogus.setToBogus();
1095 return bogus;
1096 }
1097
1098 NFRuleSet*
1099 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1100 {
1101 if (U_SUCCESS(status) && fRuleSets) {
1102 for (NFRuleSet** p = fRuleSets; *p; ++p) {
1103 NFRuleSet* rs = *p;
1104 if (rs->isNamed(name)) {
1105 return rs;
1106 }
1107 }
1108 status = U_ILLEGAL_ARGUMENT_ERROR;
1109 }
1110 return NULL;
1111 }
1112
1113 UnicodeString&
1114 RuleBasedNumberFormat::format(const DecimalQuantity &number,
1115 UnicodeString &appendTo,
1116 FieldPositionIterator *posIter,
1117 UErrorCode &status) const {
1118 if (U_FAILURE(status)) {
1119 return appendTo;
1120 }
1121 DecimalQuantity copy(number);
1122 if (copy.fitsInLong()) {
1123 format(number.toLong(), appendTo, posIter, status);
1124 }
1125 else {
1126 copy.roundToMagnitude(0, number::impl::RoundingMode::UNUM_ROUND_HALFEVEN, status);
1127 if (copy.fitsInLong()) {
1128 format(number.toDouble(), appendTo, posIter, status);
1129 }
1130 else {
1131 // We're outside of our normal range that this framework can handle.
1132 // The DecimalFormat will provide more accurate results.
1133
1134 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1135 LocalPointer<NumberFormat> decimalFormat(NumberFormat::createInstance(locale, UNUM_DECIMAL, status), status);
1136 if (decimalFormat.isNull()) {
1137 return appendTo;
1138 }
1139 Formattable f;
1140 LocalPointer<DecimalQuantity> decimalQuantity(new DecimalQuantity(number), status);
1141 if (decimalQuantity.isNull()) {
1142 return appendTo;
1143 }
1144 f.adoptDecimalQuantity(decimalQuantity.orphan()); // f now owns decimalQuantity.
1145 decimalFormat->format(f, appendTo, posIter, status);
1146 }
1147 }
1148 return appendTo;
1149 }
1150
1151
1152 UnicodeString&
1153 RuleBasedNumberFormat::format(const DecimalQuantity &number,
1154 UnicodeString& appendTo,
1155 FieldPosition& pos,
1156 UErrorCode &status) const {
1157 if (U_FAILURE(status)) {
1158 return appendTo;
1159 }
1160 DecimalQuantity copy(number);
1161 if (copy.fitsInLong()) {
1162 format(number.toLong(), appendTo, pos, status);
1163 }
1164 else {
1165 copy.roundToMagnitude(0, number::impl::RoundingMode::UNUM_ROUND_HALFEVEN, status);
1166 if (copy.fitsInLong()) {
1167 format(number.toDouble(), appendTo, pos, status);
1168 }
1169 else {
1170 // We're outside of our normal range that this framework can handle.
1171 // The DecimalFormat will provide more accurate results.
1172
1173 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1174 LocalPointer<NumberFormat> decimalFormat(NumberFormat::createInstance(locale, UNUM_DECIMAL, status), status);
1175 if (decimalFormat.isNull()) {
1176 return appendTo;
1177 }
1178 Formattable f;
1179 LocalPointer<DecimalQuantity> decimalQuantity(new DecimalQuantity(number), status);
1180 if (decimalQuantity.isNull()) {
1181 return appendTo;
1182 }
1183 f.adoptDecimalQuantity(decimalQuantity.orphan()); // f now owns decimalQuantity.
1184 decimalFormat->format(f, appendTo, pos, status);
1185 }
1186 }
1187 return appendTo;
1188 }
1189
1190 UnicodeString&
1191 RuleBasedNumberFormat::format(int32_t number,
1192 UnicodeString& toAppendTo,
1193 FieldPosition& pos) const
1194 {
1195 return format((int64_t)number, toAppendTo, pos);
1196 }
1197
1198
1199 UnicodeString&
1200 RuleBasedNumberFormat::format(int64_t number,
1201 UnicodeString& toAppendTo,
1202 FieldPosition& /* pos */) const
1203 {
1204 if (defaultRuleSet) {
1205 UErrorCode status = U_ZERO_ERROR;
1206 format(number, defaultRuleSet, toAppendTo, status);
1207 }
1208 return toAppendTo;
1209 }
1210
1211
1212 UnicodeString&
1213 RuleBasedNumberFormat::format(double number,
1214 UnicodeString& toAppendTo,
1215 FieldPosition& /* pos */) const
1216 {
1217 UErrorCode status = U_ZERO_ERROR;
1218 if (defaultRuleSet) {
1219 format(number, *defaultRuleSet, toAppendTo, status);
1220 }
1221 return toAppendTo;
1222 }
1223
1224
1225 UnicodeString&
1226 RuleBasedNumberFormat::format(int32_t number,
1227 const UnicodeString& ruleSetName,
1228 UnicodeString& toAppendTo,
1229 FieldPosition& pos,
1230 UErrorCode& status) const
1231 {
1232 return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1233 }
1234
1235
1236 UnicodeString&
1237 RuleBasedNumberFormat::format(int64_t number,
1238 const UnicodeString& ruleSetName,
1239 UnicodeString& toAppendTo,
1240 FieldPosition& /* pos */,
1241 UErrorCode& status) const
1242 {
1243 if (U_SUCCESS(status)) {
1244 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1245 // throw new IllegalArgumentException("Can't use internal rule set");
1246 status = U_ILLEGAL_ARGUMENT_ERROR;
1247 } else {
1248 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1249 if (rs) {
1250 format(number, rs, toAppendTo, status);
1251 }
1252 }
1253 }
1254 return toAppendTo;
1255 }
1256
1257
1258 UnicodeString&
1259 RuleBasedNumberFormat::format(double number,
1260 const UnicodeString& ruleSetName,
1261 UnicodeString& toAppendTo,
1262 FieldPosition& /* pos */,
1263 UErrorCode& status) const
1264 {
1265 if (U_SUCCESS(status)) {
1266 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1267 // throw new IllegalArgumentException("Can't use internal rule set");
1268 status = U_ILLEGAL_ARGUMENT_ERROR;
1269 } else {
1270 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1271 if (rs) {
1272 format(number, *rs, toAppendTo, status);
1273 }
1274 }
1275 }
1276 return toAppendTo;
1277 }
1278
1279 void
1280 RuleBasedNumberFormat::format(double number,
1281 NFRuleSet& rs,
1282 UnicodeString& toAppendTo,
1283 UErrorCode& status) const
1284 {
1285 int32_t startPos = toAppendTo.length();
1286 if (getRoundingMode() != DecimalFormat::ERoundingMode::kRoundUnnecessary && !uprv_isNaN(number) && !uprv_isInfinite(number)) {
1287 DecimalQuantity digitList;
1288 digitList.setToDouble(number);
1289 digitList.roundToMagnitude(
1290 -getMaximumFractionDigits(),
1291 static_cast<UNumberFormatRoundingMode>(getRoundingMode()),
1292 status);
1293 number = digitList.toDouble();
1294 }
1295 rs.format(number, toAppendTo, toAppendTo.length(), 0, status);
1296 adjustForCapitalizationContext(startPos, toAppendTo, status);
1297 }
1298
1299 /**
1300 * Bottleneck through which all the public format() methods
1301 * that take a long pass. By the time we get here, we know
1302 * which rule set we're using to do the formatting.
1303 * @param number The number to format
1304 * @param ruleSet The rule set to use to format the number
1305 * @return The text that resulted from formatting the number
1306 */
1307 UnicodeString&
1308 RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const
1309 {
1310 // all API format() routines that take a double vector through
1311 // here. We have these two identical functions-- one taking a
1312 // double and one taking a long-- the couple digits of precision
1313 // that long has but double doesn't (both types are 8 bytes long,
1314 // but double has to borrow some of the mantissa bits to hold
1315 // the exponent).
1316 // Create an empty string buffer where the result will
1317 // be built, and pass it to the rule set (along with an insertion
1318 // position of 0 and the number being formatted) to the rule set
1319 // for formatting
1320
1321 if (U_SUCCESS(status)) {
1322 if (number == U_INT64_MIN) {
1323 // We can't handle this value right now. Provide an accurate default value.
1324
1325 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1326 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
1327 if (decimalFormat == nullptr) {
1328 return toAppendTo;
1329 }
1330 Formattable f;
1331 FieldPosition pos(FieldPosition::DONT_CARE);
1332 DecimalQuantity *decimalQuantity = new DecimalQuantity();
1333 if (decimalQuantity == nullptr) {
1334 status = U_MEMORY_ALLOCATION_ERROR;
1335 delete decimalFormat;
1336 return toAppendTo;
1337 }
1338 decimalQuantity->setToLong(number);
1339 f.adoptDecimalQuantity(decimalQuantity); // f now owns decimalQuantity.
1340 decimalFormat->format(f, toAppendTo, pos, status);
1341 delete decimalFormat;
1342 }
1343 else {
1344 int32_t startPos = toAppendTo.length();
1345 ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1346 adjustForCapitalizationContext(startPos, toAppendTo, status);
1347 }
1348 }
1349 return toAppendTo;
1350 }
1351
1352 UnicodeString&
1353 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1354 UnicodeString& currentResult,
1355 UErrorCode& status) const
1356 {
1357 #if !UCONFIG_NO_BREAK_ITERATION
1358 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1359 if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) {
1360 // capitalize currentResult according to context
1361 UChar32 ch = currentResult.char32At(0);
1362 if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL &&
1363 ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1364 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1365 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1366 // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1367 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1368 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1369 }
1370 }
1371 #endif
1372 return currentResult;
1373 }
1374
1375
1376 void
1377 RuleBasedNumberFormat::parse(const UnicodeString& text,
1378 Formattable& result,
1379 ParsePosition& parsePosition) const
1380 {
1381 if (!fRuleSets) {
1382 parsePosition.setErrorIndex(0);
1383 return;
1384 }
1385
1386 UnicodeString workingText(text, parsePosition.getIndex());
1387 ParsePosition workingPos(0);
1388
1389 ParsePosition high_pp(0);
1390 Formattable high_result;
1391
1392 for (NFRuleSet** p = fRuleSets; *p; ++p) {
1393 NFRuleSet *rp = *p;
1394 if (rp->isPublic() && rp->isParseable()) {
1395 ParsePosition working_pp(0);
1396 Formattable working_result;
1397
1398 rp->parse(workingText, working_pp, kMaxDouble, 0, working_result, lenient);
1399 if (working_pp.getIndex() > high_pp.getIndex()) {
1400 high_pp = working_pp;
1401 high_result = working_result;
1402
1403 if (high_pp.getIndex() == workingText.length()) {
1404 break;
1405 }
1406 }
1407 }
1408 }
1409
1410 int32_t startIndex = parsePosition.getIndex();
1411 parsePosition.setIndex(startIndex + high_pp.getIndex());
1412 if (high_pp.getIndex() > 0) {
1413 parsePosition.setErrorIndex(-1);
1414 } else {
1415 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1416 parsePosition.setErrorIndex(startIndex + errorIndex);
1417 }
1418 result = high_result;
1419 if (result.getType() == Formattable::kDouble) {
1420 double d = result.getDouble();
1421 if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
1422 // Note: casting a double to an int when the double is too large or small
1423 // to fit the destination is undefined behavior. The explicit range checks,
1424 // above, are required. Just casting and checking the result value is undefined.
1425 result.setLong(static_cast<int32_t>(d));
1426 }
1427 }
1428 }
1429
1430 #if !UCONFIG_NO_COLLATION
1431
1432 void
1433 RuleBasedNumberFormat::setLenient(UBool enabled)
1434 {
1435 lenient = enabled;
1436 if (!enabled && collator) {
1437 delete collator;
1438 collator = NULL;
1439 }
1440 }
1441
1442 #endif
1443
1444 void
1445 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1446 if (U_SUCCESS(status)) {
1447 if (ruleSetName.isEmpty()) {
1448 if (localizations) {
1449 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1450 defaultRuleSet = findRuleSet(name, status);
1451 } else {
1452 initDefaultRuleSet();
1453 }
1454 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1455 status = U_ILLEGAL_ARGUMENT_ERROR;
1456 } else {
1457 NFRuleSet* result = findRuleSet(ruleSetName, status);
1458 if (result != NULL) {
1459 defaultRuleSet = result;
1460 }
1461 }
1462 }
1463 }
1464
1465 UnicodeString
1466 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1467 UnicodeString result;
1468 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1469 defaultRuleSet->getName(result);
1470 } else {
1471 result.setToBogus();
1472 }
1473 return result;
1474 }
1475
1476 void
1477 RuleBasedNumberFormat::initDefaultRuleSet()
1478 {
1479 defaultRuleSet = NULL;
1480 if (!fRuleSets) {
1481 return;
1482 }
1483
1484 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1485 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1486 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
1487
1488 NFRuleSet**p = &fRuleSets[0];
1489 while (*p) {
1490 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1491 defaultRuleSet = *p;
1492 return;
1493 } else {
1494 ++p;
1495 }
1496 }
1497
1498 defaultRuleSet = *--p;
1499 if (!defaultRuleSet->isPublic()) {
1500 while (p != fRuleSets) {
1501 if ((*--p)->isPublic()) {
1502 defaultRuleSet = *p;
1503 break;
1504 }
1505 }
1506 }
1507 }
1508
1509
1510 void
1511 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1512 UParseError& pErr, UErrorCode& status)
1513 {
1514 // TODO: implement UParseError
1515 uprv_memset(&pErr, 0, sizeof(UParseError));
1516 // Note: this can leave ruleSets == NULL, so remaining code should check
1517 if (U_FAILURE(status)) {
1518 return;
1519 }
1520
1521 initializeDecimalFormatSymbols(status);
1522 initializeDefaultInfinityRule(status);
1523 initializeDefaultNaNRule(status);
1524 if (U_FAILURE(status)) {
1525 return;
1526 }
1527
1528 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1529
1530 UnicodeString description(rules);
1531 if (!description.length()) {
1532 status = U_MEMORY_ALLOCATION_ERROR;
1533 return;
1534 }
1535
1536 // start by stripping the trailing whitespace from all the rules
1537 // (this is all the whitespace follwing each semicolon in the
1538 // description). This allows us to look for rule-set boundaries
1539 // by searching for ";%" without having to worry about whitespace
1540 // between the ; and the %
1541 stripWhitespace(description);
1542
1543 // check to see if there's a set of lenient-parse rules. If there
1544 // is, pull them out into our temporary holding place for them,
1545 // and delete them from the description before the real desciption-
1546 // parsing code sees them
1547 int32_t lp = description.indexOf(gLenientParse, -1, 0);
1548 if (lp != -1) {
1549 // we've got to make sure we're not in the middle of a rule
1550 // (where "%%lenient-parse" would actually get treated as
1551 // rule text)
1552 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1553 // locate the beginning and end of the actual collation
1554 // rules (there may be whitespace between the name and
1555 // the first token in the description)
1556 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1557
1558 if (lpEnd == -1) {
1559 lpEnd = description.length() - 1;
1560 }
1561 int lpStart = lp + u_strlen(gLenientParse);
1562 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1563 ++lpStart;
1564 }
1565
1566 // copy out the lenient-parse rules and delete them
1567 // from the description
1568 lenientParseRules = new UnicodeString();
1569 /* test for NULL */
1570 if (lenientParseRules == nullptr) {
1571 status = U_MEMORY_ALLOCATION_ERROR;
1572 return;
1573 }
1574 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1575
1576 description.remove(lp, lpEnd + 1 - lp);
1577 }
1578 }
1579
1580 // pre-flight parsing the description and count the number of
1581 // rule sets (";%" marks the end of one rule set and the beginning
1582 // of the next)
1583 numRuleSets = 0;
1584 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1585 ++numRuleSets;
1586 ++p;
1587 }
1588 ++numRuleSets;
1589
1590 // our rule list is an array of the appropriate size
1591 fRuleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1592 /* test for NULL */
1593 if (fRuleSets == 0) {
1594 status = U_MEMORY_ALLOCATION_ERROR;
1595 return;
1596 }
1597
1598 for (int i = 0; i <= numRuleSets; ++i) {
1599 fRuleSets[i] = NULL;
1600 }
1601
1602 // divide up the descriptions into individual rule-set descriptions
1603 // and store them in a temporary array. At each step, we also
1604 // new up a rule set, but all this does is initialize its name
1605 // and remove it from its description. We can't actually parse
1606 // the rest of the descriptions and finish initializing everything
1607 // because we have to know the names and locations of all the rule
1608 // sets before we can actually set everything up
1609 if(!numRuleSets) {
1610 status = U_ILLEGAL_ARGUMENT_ERROR;
1611 return;
1612 }
1613
1614 ruleSetDescriptions = new UnicodeString[numRuleSets];
1615 if (ruleSetDescriptions == nullptr) {
1616 status = U_MEMORY_ALLOCATION_ERROR;
1617 return;
1618 }
1619
1620 {
1621 int curRuleSet = 0;
1622 int32_t start = 0;
1623 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1624 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1625 fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1626 if (fRuleSets[curRuleSet] == nullptr) {
1627 status = U_MEMORY_ALLOCATION_ERROR;
1628 return;
1629 }
1630 ++curRuleSet;
1631 start = p + 1;
1632 }
1633 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1634 fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1635 if (fRuleSets[curRuleSet] == nullptr) {
1636 status = U_MEMORY_ALLOCATION_ERROR;
1637 return;
1638 }
1639 }
1640
1641 // now we can take note of the formatter's default rule set, which
1642 // is the last public rule set in the description (it's the last
1643 // rather than the first so that a user can create a new formatter
1644 // from an existing formatter and change its default behavior just
1645 // by appending more rule sets to the end)
1646
1647 // {dlf} Initialization of a fraction rule set requires the default rule
1648 // set to be known. For purposes of initialization, this is always the
1649 // last public rule set, no matter what the localization data says.
1650 initDefaultRuleSet();
1651
1652 // finally, we can go back through the temporary descriptions
1653 // list and finish setting up the substructure (and we throw
1654 // away the temporary descriptions as we go)
1655 {
1656 for (int i = 0; i < numRuleSets; i++) {
1657 fRuleSets[i]->parseRules(ruleSetDescriptions[i], status);
1658 }
1659 }
1660
1661 // Now that the rules are initialized, the 'real' default rule
1662 // set can be adjusted by the localization data.
1663
1664 // The C code keeps the localization array as is, rather than building
1665 // a separate array of the public rule set names, so we have less work
1666 // to do here-- but we still need to check the names.
1667
1668 if (localizationInfos) {
1669 // confirm the names, if any aren't in the rules, that's an error
1670 // it is ok if the rules contain public rule sets that are not in this list
1671 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1672 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1673 NFRuleSet* rs = findRuleSet(name, status);
1674 if (rs == NULL) {
1675 break; // error
1676 }
1677 if (i == 0) {
1678 defaultRuleSet = rs;
1679 }
1680 }
1681 } else {
1682 defaultRuleSet = getDefaultRuleSet();
1683 }
1684 originalDescription = rules;
1685 }
1686
1687 // override the NumberFormat implementation in order to
1688 // lazily initialize relevant items
1689 void
1690 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1691 {
1692 NumberFormat::setContext(value, status);
1693 if (U_SUCCESS(status)) {
1694 if (!capitalizationInfoSet &&
1695 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1696 initCapitalizationContextInfo(locale);
1697 capitalizationInfoSet = TRUE;
1698 }
1699 #if !UCONFIG_NO_BREAK_ITERATION
1700 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1701 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1702 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1703 status = U_ZERO_ERROR;
1704 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1705 if (U_FAILURE(status)) {
1706 delete capitalizationBrkIter;
1707 capitalizationBrkIter = NULL;
1708 }
1709 }
1710 #endif
1711 }
1712 }
1713
1714 void
1715 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1716 {
1717 #if !UCONFIG_NO_BREAK_ITERATION
1718 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1719 UErrorCode status = U_ZERO_ERROR;
1720 UResourceBundle *rb = ures_open(NULL, localeID, &status);
1721 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1722 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1723 if (U_SUCCESS(status) && rb != NULL) {
1724 int32_t len = 0;
1725 const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1726 if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1727 capitalizationForUIListMenu = static_cast<UBool>(intVector[0]);
1728 capitalizationForStandAlone = static_cast<UBool>(intVector[1]);
1729 }
1730 }
1731 ures_close(rb);
1732 #endif
1733 }
1734
1735 void
1736 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1737 {
1738 // iterate through the characters...
1739 UnicodeString result;
1740
1741 int start = 0;
1742 while (start != -1 && start < description.length()) {
1743 // seek to the first non-whitespace character...
1744 while (start < description.length()
1745 && PatternProps::isWhiteSpace(description.charAt(start))) {
1746 ++start;
1747 }
1748
1749 // locate the next semicolon in the text and copy the text from
1750 // our current position up to that semicolon into the result
1751 int32_t p = description.indexOf(gSemiColon, start);
1752 if (p == -1) {
1753 // or if we don't find a semicolon, just copy the rest of
1754 // the string into the result
1755 result.append(description, start, description.length() - start);
1756 start = -1;
1757 }
1758 else if (p < description.length()) {
1759 result.append(description, start, p + 1 - start);
1760 start = p + 1;
1761 }
1762
1763 // when we get here, we've seeked off the end of the string, and
1764 // we terminate the loop (we continue until *start* is -1 rather
1765 // than until *p* is -1, because otherwise we'd miss the last
1766 // rule in the description)
1767 else {
1768 start = -1;
1769 }
1770 }
1771
1772 description.setTo(result);
1773 }
1774
1775
1776 void
1777 RuleBasedNumberFormat::dispose()
1778 {
1779 if (fRuleSets) {
1780 for (NFRuleSet** p = fRuleSets; *p; ++p) {
1781 delete *p;
1782 }
1783 uprv_free(fRuleSets);
1784 fRuleSets = NULL;
1785 }
1786
1787 if (ruleSetDescriptions) {
1788 delete [] ruleSetDescriptions;
1789 ruleSetDescriptions = NULL;
1790 }
1791
1792 #if !UCONFIG_NO_COLLATION
1793 delete collator;
1794 #endif
1795 collator = NULL;
1796
1797 delete decimalFormatSymbols;
1798 decimalFormatSymbols = NULL;
1799
1800 delete defaultInfinityRule;
1801 defaultInfinityRule = NULL;
1802
1803 delete defaultNaNRule;
1804 defaultNaNRule = NULL;
1805
1806 delete lenientParseRules;
1807 lenientParseRules = NULL;
1808
1809 #if !UCONFIG_NO_BREAK_ITERATION
1810 delete capitalizationBrkIter;
1811 capitalizationBrkIter = NULL;
1812 #endif
1813
1814 if (localizations) {
1815 localizations = localizations->unref();
1816 }
1817 }
1818
1819
1820 //-----------------------------------------------------------------------
1821 // package-internal API
1822 //-----------------------------------------------------------------------
1823
1824 /**
1825 * Returns the collator to use for lenient parsing. The collator is lazily created:
1826 * this function creates it the first time it's called.
1827 * @return The collator to use for lenient parsing, or null if lenient parsing
1828 * is turned off.
1829 */
1830 const RuleBasedCollator*
1831 RuleBasedNumberFormat::getCollator() const
1832 {
1833 #if !UCONFIG_NO_COLLATION
1834 if (!fRuleSets) {
1835 return NULL;
1836 }
1837
1838 // lazy-evaluate the collator
1839 if (collator == NULL && lenient) {
1840 // create a default collator based on the formatter's locale,
1841 // then pull out that collator's rules, append any additional
1842 // rules specified in the description, and create a _new_
1843 // collator based on the combination of those rules
1844
1845 UErrorCode status = U_ZERO_ERROR;
1846
1847 Collator* temp = Collator::createInstance(locale, status);
1848 RuleBasedCollator* newCollator;
1849 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1850 if (lenientParseRules) {
1851 UnicodeString rules(newCollator->getRules());
1852 rules.append(*lenientParseRules);
1853
1854 newCollator = new RuleBasedCollator(rules, status);
1855 // Exit if newCollator could not be created.
1856 if (newCollator == NULL) {
1857 return NULL;
1858 }
1859 } else {
1860 temp = NULL;
1861 }
1862 if (U_SUCCESS(status)) {
1863 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1864 // cast away const
1865 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1866 } else {
1867 delete newCollator;
1868 }
1869 }
1870 delete temp;
1871 }
1872 #endif
1873
1874 // if lenient-parse mode is off, this will be null
1875 // (see setLenientParseMode())
1876 return collator;
1877 }
1878
1879
1880 DecimalFormatSymbols*
1881 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
1882 {
1883 // lazy-evaluate the DecimalFormatSymbols object. This object
1884 // is shared by all DecimalFormat instances belonging to this
1885 // formatter
1886 if (decimalFormatSymbols == nullptr) {
1887 LocalPointer<DecimalFormatSymbols> temp(new DecimalFormatSymbols(locale, status), status);
1888 if (U_SUCCESS(status)) {
1889 decimalFormatSymbols = temp.orphan();
1890 }
1891 }
1892 return decimalFormatSymbols;
1893 }
1894
1895 /**
1896 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1897 * instances owned by this formatter.
1898 */
1899 const DecimalFormatSymbols*
1900 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1901 {
1902 return decimalFormatSymbols;
1903 }
1904
1905 NFRule*
1906 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1907 {
1908 if (U_FAILURE(status)) {
1909 return nullptr;
1910 }
1911 if (defaultInfinityRule == NULL) {
1912 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1913 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1914 LocalPointer<NFRule> temp(new NFRule(this, rule, status), status);
1915 if (U_SUCCESS(status)) {
1916 defaultInfinityRule = temp.orphan();
1917 }
1918 }
1919 return defaultInfinityRule;
1920 }
1921
1922 const NFRule*
1923 RuleBasedNumberFormat::getDefaultInfinityRule() const
1924 {
1925 return defaultInfinityRule;
1926 }
1927
1928 NFRule*
1929 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1930 {
1931 if (U_FAILURE(status)) {
1932 return nullptr;
1933 }
1934 if (defaultNaNRule == nullptr) {
1935 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1936 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1937 LocalPointer<NFRule> temp(new NFRule(this, rule, status), status);
1938 if (U_SUCCESS(status)) {
1939 defaultNaNRule = temp.orphan();
1940 }
1941 }
1942 return defaultNaNRule;
1943 }
1944
1945 const NFRule*
1946 RuleBasedNumberFormat::getDefaultNaNRule() const
1947 {
1948 return defaultNaNRule;
1949 }
1950
1951 // De-owning the current localized symbols and adopt the new symbols.
1952 void
1953 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1954 {
1955 if (symbolsToAdopt == NULL) {
1956 return; // do not allow caller to set decimalFormatSymbols to NULL
1957 }
1958
1959 if (decimalFormatSymbols != NULL) {
1960 delete decimalFormatSymbols;
1961 }
1962
1963 decimalFormatSymbols = symbolsToAdopt;
1964
1965 {
1966 // Apply the new decimalFormatSymbols by reparsing the rulesets
1967 UErrorCode status = U_ZERO_ERROR;
1968
1969 delete defaultInfinityRule;
1970 defaultInfinityRule = NULL;
1971 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1972
1973 delete defaultNaNRule;
1974 defaultNaNRule = NULL;
1975 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1976
1977 if (fRuleSets) {
1978 for (int32_t i = 0; i < numRuleSets; i++) {
1979 fRuleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1980 }
1981 }
1982 }
1983 }
1984
1985 // Setting the symbols is equivalent to adopting a newly created localized symbols.
1986 void
1987 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1988 {
1989 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1990 }
1991
1992 PluralFormat *
1993 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1994 const UnicodeString &pattern,
1995 UErrorCode& status) const
1996 {
1997 auto *pf = new PluralFormat(locale, pluralType, pattern, status);
1998 if (pf == nullptr) {
1999 status = U_MEMORY_ALLOCATION_ERROR;
2000 }
2001 return pf;
2002 }
2003
2004 /**
2005 * Get the rounding mode.
2006 * @return A rounding mode
2007 */
2008 DecimalFormat::ERoundingMode RuleBasedNumberFormat::getRoundingMode() const {
2009 return fRoundingMode;
2010 }
2011
2012 /**
2013 * Set the rounding mode. This has no effect unless the rounding
2014 * increment is greater than zero.
2015 * @param roundingMode A rounding mode
2016 */
2017 void RuleBasedNumberFormat::setRoundingMode(DecimalFormat::ERoundingMode roundingMode) {
2018 fRoundingMode = roundingMode;
2019 }
2020
2021 U_NAMESPACE_END
2022
2023 /* U_HAVE_RBNF */
2024 #endif