]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/i18n/rbnf.cpp
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / rbnf.cpp
... / ...
CommitLineData
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 1997-2015, International Business Machines Corporation
6* and others. All Rights Reserved.
7*******************************************************************************
8*/
9
10#include "unicode/utypes.h"
11#include "utypeinfo.h" // for 'typeid' to work
12
13#include "unicode/rbnf.h"
14
15#if U_HAVE_RBNF
16
17#include "unicode/normlzr.h"
18#include "unicode/plurfmt.h"
19#include "unicode/tblcoll.h"
20#include "unicode/uchar.h"
21#include "unicode/ucol.h"
22#include "unicode/uloc.h"
23#include "unicode/unum.h"
24#include "unicode/ures.h"
25#include "unicode/ustring.h"
26#include "unicode/utf16.h"
27#include "unicode/udata.h"
28#include "unicode/udisplaycontext.h"
29#include "unicode/brkiter.h"
30#include "unicode/ucasemap.h"
31
32#include "cmemory.h"
33#include "cstring.h"
34#include "patternprops.h"
35#include "uresimp.h"
36#include "nfrs.h"
37#include "digitlst.h"
38
39// debugging
40// #define RBNF_DEBUG
41
42#ifdef RBNF_DEBUG
43#include <stdio.h>
44#endif
45
46#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
47
48static const UChar gPercentPercent[] =
49{
50 0x25, 0x25, 0
51}; /* "%%" */
52
53// All urbnf objects are created through openRules, so we init all of the
54// Unicode string constants required by rbnf, nfrs, or nfr here.
55static const UChar gLenientParse[] =
56{
57 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
58}; /* "%%lenient-parse:" */
59static const UChar gSemiColon = 0x003B;
60static const UChar gSemiPercent[] =
61{
62 0x3B, 0x25, 0
63}; /* ";%" */
64
65#define kSomeNumberOfBitsDiv2 22
66#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
67#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
68
69U_NAMESPACE_BEGIN
70
71UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
72
73/*
74This is a utility class. It does not use ICU's RTTI.
75If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
76Please make sure that intltest passes on Windows in Release mode,
77since the string pooling per compilation unit will mess up how RTTI works.
78The RTTI code was also removed due to lack of code coverage.
79*/
80class LocalizationInfo : public UMemory {
81protected:
82 virtual ~LocalizationInfo();
83 uint32_t refcount;
84
85public:
86 LocalizationInfo() : refcount(0) {}
87
88 LocalizationInfo* ref(void) {
89 ++refcount;
90 return this;
91 }
92
93 LocalizationInfo* unref(void) {
94 if (refcount && --refcount == 0) {
95 delete this;
96 }
97 return NULL;
98 }
99
100 virtual UBool operator==(const LocalizationInfo* rhs) const;
101 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
102
103 virtual int32_t getNumberOfRuleSets(void) const = 0;
104 virtual const UChar* getRuleSetName(int32_t index) const = 0;
105 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
106 virtual const UChar* getLocaleName(int32_t index) const = 0;
107 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
108
109 virtual int32_t indexForLocale(const UChar* locale) const;
110 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
111
112// virtual UClassID getDynamicClassID() const = 0;
113// static UClassID getStaticClassID(void);
114};
115
116LocalizationInfo::~LocalizationInfo() {}
117
118//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
119
120// if both strings are NULL, this returns TRUE
121static UBool
122streq(const UChar* lhs, const UChar* rhs) {
123 if (rhs == lhs) {
124 return TRUE;
125 }
126 if (lhs && rhs) {
127 return u_strcmp(lhs, rhs) == 0;
128 }
129 return FALSE;
130}
131
132UBool
133LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
134 if (rhs) {
135 if (this == rhs) {
136 return TRUE;
137 }
138
139 int32_t rsc = getNumberOfRuleSets();
140 if (rsc == rhs->getNumberOfRuleSets()) {
141 for (int i = 0; i < rsc; ++i) {
142 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
143 return FALSE;
144 }
145 }
146 int32_t dlc = getNumberOfDisplayLocales();
147 if (dlc == rhs->getNumberOfDisplayLocales()) {
148 for (int i = 0; i < dlc; ++i) {
149 const UChar* locale = getLocaleName(i);
150 int32_t ix = rhs->indexForLocale(locale);
151 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
152 if (!streq(locale, rhs->getLocaleName(ix))) {
153 return FALSE;
154 }
155 for (int j = 0; j < rsc; ++j) {
156 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
157 return FALSE;
158 }
159 }
160 }
161 return TRUE;
162 }
163 }
164 }
165 return FALSE;
166}
167
168int32_t
169LocalizationInfo::indexForLocale(const UChar* locale) const {
170 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
171 if (streq(locale, getLocaleName(i))) {
172 return i;
173 }
174 }
175 return -1;
176}
177
178int32_t
179LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
180 if (ruleset) {
181 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
182 if (streq(ruleset, getRuleSetName(i))) {
183 return i;
184 }
185 }
186 }
187 return -1;
188}
189
190
191typedef void (*Fn_Deleter)(void*);
192
193class VArray {
194 void** buf;
195 int32_t cap;
196 int32_t size;
197 Fn_Deleter deleter;
198public:
199 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
200
201 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
202
203 ~VArray() {
204 if (deleter) {
205 for (int i = 0; i < size; ++i) {
206 (*deleter)(buf[i]);
207 }
208 }
209 uprv_free(buf);
210 }
211
212 int32_t length() {
213 return size;
214 }
215
216 void add(void* elem, UErrorCode& status) {
217 if (U_SUCCESS(status)) {
218 if (size == cap) {
219 if (cap == 0) {
220 cap = 1;
221 } else if (cap < 256) {
222 cap *= 2;
223 } else {
224 cap += 256;
225 }
226 if (buf == NULL) {
227 buf = (void**)uprv_malloc(cap * sizeof(void*));
228 } else {
229 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
230 }
231 if (buf == NULL) {
232 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
233 status = U_MEMORY_ALLOCATION_ERROR;
234 return;
235 }
236 void* start = &buf[size];
237 size_t count = (cap - size) * sizeof(void*);
238 uprv_memset(start, 0, count); // fill with nulls, just because
239 }
240 buf[size++] = elem;
241 }
242 }
243
244 void** release(void) {
245 void** result = buf;
246 buf = NULL;
247 cap = 0;
248 size = 0;
249 return result;
250 }
251};
252
253class LocDataParser;
254
255class StringLocalizationInfo : public LocalizationInfo {
256 UChar* info;
257 UChar*** data;
258 int32_t numRuleSets;
259 int32_t numLocales;
260
261friend class LocDataParser;
262
263 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
264 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
265 {
266 }
267
268public:
269 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
270
271 virtual ~StringLocalizationInfo();
272 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
273 virtual const UChar* getRuleSetName(int32_t index) const;
274 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
275 virtual const UChar* getLocaleName(int32_t index) const;
276 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
277
278// virtual UClassID getDynamicClassID() const;
279// static UClassID getStaticClassID(void);
280
281private:
282 void init(UErrorCode& status) const;
283};
284
285
286enum {
287 OPEN_ANGLE = 0x003c, /* '<' */
288 CLOSE_ANGLE = 0x003e, /* '>' */
289 COMMA = 0x002c,
290 TICK = 0x0027,
291 QUOTE = 0x0022,
292 SPACE = 0x0020
293};
294
295/**
296 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
297 */
298class LocDataParser {
299 UChar* data;
300 const UChar* e;
301 UChar* p;
302 UChar ch;
303 UParseError& pe;
304 UErrorCode& ec;
305
306public:
307 LocDataParser(UParseError& parseError, UErrorCode& status)
308 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
309 ~LocDataParser() {}
310
311 /*
312 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
313 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
314 */
315 StringLocalizationInfo* parse(UChar* data, int32_t len);
316
317private:
318
319 void inc(void) { ++p; ch = 0xffff; }
320 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
321 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
322 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
323 UBool inList(UChar c, const UChar* list) const {
324 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
325 while (*list && *list != c) ++list; return *list == c;
326 }
327 void parseError(const char* msg);
328
329 StringLocalizationInfo* doParse(void);
330
331 UChar** nextArray(int32_t& requiredLength);
332 UChar* nextString(void);
333};
334
335#ifdef RBNF_DEBUG
336#define ERROR(msg) parseError(msg); return NULL;
337#define EXPLANATION_ARG explanationArg
338#else
339#define ERROR(msg) parseError(NULL); return NULL;
340#define EXPLANATION_ARG
341#endif
342
343
344static const UChar DQUOTE_STOPLIST[] = {
345 QUOTE, 0
346};
347
348static const UChar SQUOTE_STOPLIST[] = {
349 TICK, 0
350};
351
352static const UChar NOQUOTE_STOPLIST[] = {
353 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
354};
355
356static void
357DeleteFn(void* p) {
358 uprv_free(p);
359}
360
361StringLocalizationInfo*
362LocDataParser::parse(UChar* _data, int32_t len) {
363 if (U_FAILURE(ec)) {
364 if (_data) uprv_free(_data);
365 return NULL;
366 }
367
368 pe.line = 0;
369 pe.offset = -1;
370 pe.postContext[0] = 0;
371 pe.preContext[0] = 0;
372
373 if (_data == NULL) {
374 ec = U_ILLEGAL_ARGUMENT_ERROR;
375 return NULL;
376 }
377
378 if (len <= 0) {
379 ec = U_ILLEGAL_ARGUMENT_ERROR;
380 uprv_free(_data);
381 return NULL;
382 }
383
384 data = _data;
385 e = data + len;
386 p = _data;
387 ch = 0xffff;
388
389 return doParse();
390}
391
392
393StringLocalizationInfo*
394LocDataParser::doParse(void) {
395 skipWhitespace();
396 if (!checkInc(OPEN_ANGLE)) {
397 ERROR("Missing open angle");
398 } else {
399 VArray array(DeleteFn);
400 UBool mightHaveNext = TRUE;
401 int32_t requiredLength = -1;
402 while (mightHaveNext) {
403 mightHaveNext = FALSE;
404 UChar** elem = nextArray(requiredLength);
405 skipWhitespace();
406 UBool haveComma = check(COMMA);
407 if (elem) {
408 array.add(elem, ec);
409 if (haveComma) {
410 inc();
411 mightHaveNext = TRUE;
412 }
413 } else if (haveComma) {
414 ERROR("Unexpected character");
415 }
416 }
417
418 skipWhitespace();
419 if (!checkInc(CLOSE_ANGLE)) {
420 if (check(OPEN_ANGLE)) {
421 ERROR("Missing comma in outer array");
422 } else {
423 ERROR("Missing close angle bracket in outer array");
424 }
425 }
426
427 skipWhitespace();
428 if (p != e) {
429 ERROR("Extra text after close of localization data");
430 }
431
432 array.add(NULL, ec);
433 if (U_SUCCESS(ec)) {
434 int32_t numLocs = array.length() - 2; // subtract first, NULL
435 UChar*** result = (UChar***)array.release();
436
437 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
438 }
439 }
440
441 ERROR("Unknown error");
442}
443
444UChar**
445LocDataParser::nextArray(int32_t& requiredLength) {
446 if (U_FAILURE(ec)) {
447 return NULL;
448 }
449
450 skipWhitespace();
451 if (!checkInc(OPEN_ANGLE)) {
452 ERROR("Missing open angle");
453 }
454
455 VArray array;
456 UBool mightHaveNext = TRUE;
457 while (mightHaveNext) {
458 mightHaveNext = FALSE;
459 UChar* elem = nextString();
460 skipWhitespace();
461 UBool haveComma = check(COMMA);
462 if (elem) {
463 array.add(elem, ec);
464 if (haveComma) {
465 inc();
466 mightHaveNext = TRUE;
467 }
468 } else if (haveComma) {
469 ERROR("Unexpected comma");
470 }
471 }
472 skipWhitespace();
473 if (!checkInc(CLOSE_ANGLE)) {
474 if (check(OPEN_ANGLE)) {
475 ERROR("Missing close angle bracket in inner array");
476 } else {
477 ERROR("Missing comma in inner array");
478 }
479 }
480
481 array.add(NULL, ec);
482 if (U_SUCCESS(ec)) {
483 if (requiredLength == -1) {
484 requiredLength = array.length() + 1;
485 } else if (array.length() != requiredLength) {
486 ec = U_ILLEGAL_ARGUMENT_ERROR;
487 ERROR("Array not of required length");
488 }
489
490 return (UChar**)array.release();
491 }
492 ERROR("Unknown Error");
493}
494
495UChar*
496LocDataParser::nextString() {
497 UChar* result = NULL;
498
499 skipWhitespace();
500 if (p < e) {
501 const UChar* terminators;
502 UChar c = *p;
503 UBool haveQuote = c == QUOTE || c == TICK;
504 if (haveQuote) {
505 inc();
506 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
507 } else {
508 terminators = NOQUOTE_STOPLIST;
509 }
510 UChar* start = p;
511 while (p < e && !inList(*p, terminators)) ++p;
512 if (p == e) {
513 ERROR("Unexpected end of data");
514 }
515
516 UChar x = *p;
517 if (p > start) {
518 ch = x;
519 *p = 0x0; // terminate by writing to data
520 result = start; // just point into data
521 }
522 if (haveQuote) {
523 if (x != c) {
524 ERROR("Missing matching quote");
525 } else if (p == start) {
526 ERROR("Empty string");
527 }
528 inc();
529 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
530 ERROR("Unexpected character in string");
531 }
532 }
533
534 // ok for there to be no next string
535 return result;
536}
537
538void LocDataParser::parseError(const char* EXPLANATION_ARG)
539{
540 if (!data) {
541 return;
542 }
543
544 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
545 if (start < data) {
546 start = data;
547 }
548 for (UChar* x = p; --x >= start;) {
549 if (!*x) {
550 start = x+1;
551 break;
552 }
553 }
554 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
555 if (limit > e) {
556 limit = e;
557 }
558 u_strncpy(pe.preContext, start, (int32_t)(p-start));
559 pe.preContext[p-start] = 0;
560 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
561 pe.postContext[limit-p] = 0;
562 pe.offset = (int32_t)(p - data);
563
564#ifdef RBNF_DEBUG
565 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
566
567 UnicodeString msg;
568 msg.append(start, p - start);
569 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
570 msg.append(p, limit-p);
571 msg.append(UNICODE_STRING_SIMPLE("'"));
572
573 char buf[128];
574 int32_t len = msg.extract(0, msg.length(), buf, 128);
575 if (len >= 128) {
576 buf[127] = 0;
577 } else {
578 buf[len] = 0;
579 }
580 fprintf(stderr, "%s\n", buf);
581 fflush(stderr);
582#endif
583
584 uprv_free(data);
585 data = NULL;
586 p = NULL;
587 e = NULL;
588
589 if (U_SUCCESS(ec)) {
590 ec = U_PARSE_ERROR;
591 }
592}
593
594//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
595
596StringLocalizationInfo*
597StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
598 if (U_FAILURE(status)) {
599 return NULL;
600 }
601
602 int32_t len = info.length();
603 if (len == 0) {
604 return NULL; // no error;
605 }
606
607 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
608 if (!p) {
609 status = U_MEMORY_ALLOCATION_ERROR;
610 return NULL;
611 }
612 info.extract(p, len, status);
613 if (!U_FAILURE(status)) {
614 status = U_ZERO_ERROR; // clear warning about non-termination
615 }
616
617 LocDataParser parser(perror, status);
618 return parser.parse(p, len);
619}
620
621StringLocalizationInfo::~StringLocalizationInfo() {
622 for (UChar*** p = (UChar***)data; *p; ++p) {
623 // remaining data is simply pointer into our unicode string data.
624 if (*p) uprv_free(*p);
625 }
626 if (data) uprv_free(data);
627 if (info) uprv_free(info);
628}
629
630
631const UChar*
632StringLocalizationInfo::getRuleSetName(int32_t index) const {
633 if (index >= 0 && index < getNumberOfRuleSets()) {
634 return data[0][index];
635 }
636 return NULL;
637}
638
639const UChar*
640StringLocalizationInfo::getLocaleName(int32_t index) const {
641 if (index >= 0 && index < getNumberOfDisplayLocales()) {
642 return data[index+1][0];
643 }
644 return NULL;
645}
646
647const UChar*
648StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
649 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
650 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
651 return data[localeIndex+1][ruleIndex+1];
652 }
653 return NULL;
654}
655
656// ----------
657
658RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
659 const UnicodeString& locs,
660 const Locale& alocale, UParseError& perror, UErrorCode& status)
661 : ruleSets(NULL)
662 , ruleSetDescriptions(NULL)
663 , numRuleSets(0)
664 , defaultRuleSet(NULL)
665 , locale(alocale)
666 , collator(NULL)
667 , decimalFormatSymbols(NULL)
668 , defaultInfinityRule(NULL)
669 , defaultNaNRule(NULL)
670 , lenient(FALSE)
671 , lenientParseRules(NULL)
672 , localizations(NULL)
673 , capitalizationInfoSet(FALSE)
674 , capitalizationForUIListMenu(FALSE)
675 , capitalizationForStandAlone(FALSE)
676 , capitalizationBrkIter(NULL)
677{
678 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
679 init(description, locinfo, perror, status);
680}
681
682RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
683 const UnicodeString& locs,
684 UParseError& perror, UErrorCode& status)
685 : ruleSets(NULL)
686 , ruleSetDescriptions(NULL)
687 , numRuleSets(0)
688 , defaultRuleSet(NULL)
689 , locale(Locale::getDefault())
690 , collator(NULL)
691 , decimalFormatSymbols(NULL)
692 , defaultInfinityRule(NULL)
693 , defaultNaNRule(NULL)
694 , lenient(FALSE)
695 , lenientParseRules(NULL)
696 , localizations(NULL)
697 , capitalizationInfoSet(FALSE)
698 , capitalizationForUIListMenu(FALSE)
699 , capitalizationForStandAlone(FALSE)
700 , capitalizationBrkIter(NULL)
701{
702 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
703 init(description, locinfo, perror, status);
704}
705
706RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
707 LocalizationInfo* info,
708 const Locale& alocale, UParseError& perror, UErrorCode& status)
709 : ruleSets(NULL)
710 , ruleSetDescriptions(NULL)
711 , numRuleSets(0)
712 , defaultRuleSet(NULL)
713 , locale(alocale)
714 , collator(NULL)
715 , decimalFormatSymbols(NULL)
716 , defaultInfinityRule(NULL)
717 , defaultNaNRule(NULL)
718 , lenient(FALSE)
719 , lenientParseRules(NULL)
720 , localizations(NULL)
721 , capitalizationInfoSet(FALSE)
722 , capitalizationForUIListMenu(FALSE)
723 , capitalizationForStandAlone(FALSE)
724 , capitalizationBrkIter(NULL)
725{
726 init(description, info, perror, status);
727}
728
729RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
730 UParseError& perror,
731 UErrorCode& status)
732 : ruleSets(NULL)
733 , ruleSetDescriptions(NULL)
734 , numRuleSets(0)
735 , defaultRuleSet(NULL)
736 , locale(Locale::getDefault())
737 , collator(NULL)
738 , decimalFormatSymbols(NULL)
739 , defaultInfinityRule(NULL)
740 , defaultNaNRule(NULL)
741 , lenient(FALSE)
742 , lenientParseRules(NULL)
743 , localizations(NULL)
744 , capitalizationInfoSet(FALSE)
745 , capitalizationForUIListMenu(FALSE)
746 , capitalizationForStandAlone(FALSE)
747 , capitalizationBrkIter(NULL)
748{
749 init(description, NULL, perror, status);
750}
751
752RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
753 const Locale& aLocale,
754 UParseError& perror,
755 UErrorCode& status)
756 : ruleSets(NULL)
757 , ruleSetDescriptions(NULL)
758 , numRuleSets(0)
759 , defaultRuleSet(NULL)
760 , locale(aLocale)
761 , collator(NULL)
762 , decimalFormatSymbols(NULL)
763 , defaultInfinityRule(NULL)
764 , defaultNaNRule(NULL)
765 , lenient(FALSE)
766 , lenientParseRules(NULL)
767 , localizations(NULL)
768 , capitalizationInfoSet(FALSE)
769 , capitalizationForUIListMenu(FALSE)
770 , capitalizationForStandAlone(FALSE)
771 , capitalizationBrkIter(NULL)
772{
773 init(description, NULL, perror, status);
774}
775
776RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
777 : ruleSets(NULL)
778 , ruleSetDescriptions(NULL)
779 , numRuleSets(0)
780 , defaultRuleSet(NULL)
781 , locale(alocale)
782 , collator(NULL)
783 , decimalFormatSymbols(NULL)
784 , defaultInfinityRule(NULL)
785 , defaultNaNRule(NULL)
786 , lenient(FALSE)
787 , lenientParseRules(NULL)
788 , localizations(NULL)
789 , capitalizationInfoSet(FALSE)
790 , capitalizationForUIListMenu(FALSE)
791 , capitalizationForStandAlone(FALSE)
792 , capitalizationBrkIter(NULL)
793{
794 if (U_FAILURE(status)) {
795 return;
796 }
797
798 const char* rules_tag = "RBNFRules";
799 const char* fmt_tag = "";
800 switch (tag) {
801 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
802 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
803 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
804 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
805 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
806 }
807
808 // TODO: read localization info from resource
809 LocalizationInfo* locinfo = NULL;
810
811 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
812 if (U_SUCCESS(status)) {
813 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
814 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
815
816 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
817 if (U_FAILURE(status)) {
818 ures_close(nfrb);
819 }
820 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
821 if (U_FAILURE(status)) {
822 ures_close(rbnfRules);
823 ures_close(nfrb);
824 return;
825 }
826
827 UnicodeString desc;
828 while (ures_hasNext(ruleSets)) {
829 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
830 }
831 UParseError perror;
832
833 init(desc, locinfo, perror, status);
834
835 ures_close(ruleSets);
836 ures_close(rbnfRules);
837 }
838 ures_close(nfrb);
839}
840
841RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
842 : NumberFormat(rhs)
843 , ruleSets(NULL)
844 , ruleSetDescriptions(NULL)
845 , numRuleSets(0)
846 , defaultRuleSet(NULL)
847 , locale(rhs.locale)
848 , collator(NULL)
849 , decimalFormatSymbols(NULL)
850 , defaultInfinityRule(NULL)
851 , defaultNaNRule(NULL)
852 , lenient(FALSE)
853 , lenientParseRules(NULL)
854 , localizations(NULL)
855 , capitalizationInfoSet(FALSE)
856 , capitalizationForUIListMenu(FALSE)
857 , capitalizationForStandAlone(FALSE)
858 , capitalizationBrkIter(NULL)
859{
860 this->operator=(rhs);
861}
862
863// --------
864
865RuleBasedNumberFormat&
866RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
867{
868 if (this == &rhs) {
869 return *this;
870 }
871 NumberFormat::operator=(rhs);
872 UErrorCode status = U_ZERO_ERROR;
873 dispose();
874 locale = rhs.locale;
875 lenient = rhs.lenient;
876
877 UParseError perror;
878 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
879 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
880 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
881
882 capitalizationInfoSet = rhs.capitalizationInfoSet;
883 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
884 capitalizationForStandAlone = rhs.capitalizationForStandAlone;
885#if !UCONFIG_NO_BREAK_ITERATION
886 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
887#endif
888
889 return *this;
890}
891
892RuleBasedNumberFormat::~RuleBasedNumberFormat()
893{
894 dispose();
895}
896
897Format*
898RuleBasedNumberFormat::clone(void) const
899{
900 return new RuleBasedNumberFormat(*this);
901}
902
903UBool
904RuleBasedNumberFormat::operator==(const Format& other) const
905{
906 if (this == &other) {
907 return TRUE;
908 }
909
910 if (typeid(*this) == typeid(other)) {
911 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
912 // test for capitalization info equality is adequately handled
913 // by the NumberFormat test for fCapitalizationContext equality;
914 // the info here is just derived from that.
915 if (locale == rhs.locale &&
916 lenient == rhs.lenient &&
917 (localizations == NULL
918 ? rhs.localizations == NULL
919 : (rhs.localizations == NULL
920 ? FALSE
921 : *localizations == rhs.localizations))) {
922
923 NFRuleSet** p = ruleSets;
924 NFRuleSet** q = rhs.ruleSets;
925 if (p == NULL) {
926 return q == NULL;
927 } else if (q == NULL) {
928 return FALSE;
929 }
930 while (*p && *q && (**p == **q)) {
931 ++p;
932 ++q;
933 }
934 return *q == NULL && *p == NULL;
935 }
936 }
937
938 return FALSE;
939}
940
941UnicodeString
942RuleBasedNumberFormat::getRules() const
943{
944 UnicodeString result;
945 if (ruleSets != NULL) {
946 for (NFRuleSet** p = ruleSets; *p; ++p) {
947 (*p)->appendRules(result);
948 }
949 }
950 return result;
951}
952
953UnicodeString
954RuleBasedNumberFormat::getRuleSetName(int32_t index) const
955{
956 if (localizations) {
957 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
958 return string;
959 }
960 else if (ruleSets) {
961 UnicodeString result;
962 for (NFRuleSet** p = ruleSets; *p; ++p) {
963 NFRuleSet* rs = *p;
964 if (rs->isPublic()) {
965 if (--index == -1) {
966 rs->getName(result);
967 return result;
968 }
969 }
970 }
971 }
972 UnicodeString empty;
973 return empty;
974}
975
976int32_t
977RuleBasedNumberFormat::getNumberOfRuleSetNames() const
978{
979 int32_t result = 0;
980 if (localizations) {
981 result = localizations->getNumberOfRuleSets();
982 }
983 else if (ruleSets) {
984 for (NFRuleSet** p = ruleSets; *p; ++p) {
985 if ((**p).isPublic()) {
986 ++result;
987 }
988 }
989 }
990 return result;
991}
992
993int32_t
994RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
995 if (localizations) {
996 return localizations->getNumberOfDisplayLocales();
997 }
998 return 0;
999}
1000
1001Locale
1002RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
1003 if (U_FAILURE(status)) {
1004 return Locale("");
1005 }
1006 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
1007 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
1008 char buffer[64];
1009 int32_t cap = name.length() + 1;
1010 char* bp = buffer;
1011 if (cap > 64) {
1012 bp = (char *)uprv_malloc(cap);
1013 if (bp == NULL) {
1014 status = U_MEMORY_ALLOCATION_ERROR;
1015 return Locale("");
1016 }
1017 }
1018 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
1019 Locale retLocale(bp);
1020 if (bp != buffer) {
1021 uprv_free(bp);
1022 }
1023 return retLocale;
1024 }
1025 status = U_ILLEGAL_ARGUMENT_ERROR;
1026 Locale retLocale;
1027 return retLocale;
1028}
1029
1030UnicodeString
1031RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1032 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1033 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1034 int32_t len = localeName.length();
1035 UChar* localeStr = localeName.getBuffer(len + 1);
1036 while (len >= 0) {
1037 localeStr[len] = 0;
1038 int32_t ix = localizations->indexForLocale(localeStr);
1039 if (ix >= 0) {
1040 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1041 return name;
1042 }
1043
1044 // trim trailing portion, skipping over ommitted sections
1045 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1046 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1047 }
1048 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1049 return name;
1050 }
1051 UnicodeString bogus;
1052 bogus.setToBogus();
1053 return bogus;
1054}
1055
1056UnicodeString
1057RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1058 if (localizations) {
1059 UnicodeString rsn(ruleSetName);
1060 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1061 return getRuleSetDisplayName(ix, localeParam);
1062 }
1063 UnicodeString bogus;
1064 bogus.setToBogus();
1065 return bogus;
1066}
1067
1068NFRuleSet*
1069RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1070{
1071 if (U_SUCCESS(status) && ruleSets) {
1072 for (NFRuleSet** p = ruleSets; *p; ++p) {
1073 NFRuleSet* rs = *p;
1074 if (rs->isNamed(name)) {
1075 return rs;
1076 }
1077 }
1078 status = U_ILLEGAL_ARGUMENT_ERROR;
1079 }
1080 return NULL;
1081}
1082
1083UnicodeString&
1084RuleBasedNumberFormat::format(const DigitList &number,
1085 UnicodeString &appendTo,
1086 FieldPositionIterator *posIter,
1087 UErrorCode &status) const {
1088 if (U_FAILURE(status)) {
1089 return appendTo;
1090 }
1091 DigitList copy(number);
1092 if (copy.fitsIntoInt64(false)) {
1093 format(((DigitList &)number).getInt64(), appendTo, posIter, status);
1094 }
1095 else {
1096 copy.roundAtExponent(0);
1097 if (copy.fitsIntoInt64(false)) {
1098 format(number.getDouble(), appendTo, posIter, status);
1099 }
1100 else {
1101 // We're outside of our normal range that this framework can handle.
1102 // The DecimalFormat will provide more accurate results.
1103
1104 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1105 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
1106 Formattable f;
1107 f.adoptDigitList(new DigitList(number));
1108 decimalFormat->format(f, appendTo, posIter, status);
1109 delete decimalFormat;
1110 }
1111 }
1112 return appendTo;
1113}
1114
1115
1116UnicodeString&
1117RuleBasedNumberFormat::format(const DigitList &number,
1118 UnicodeString& appendTo,
1119 FieldPosition& pos,
1120 UErrorCode &status) const {
1121 if (U_FAILURE(status)) {
1122 return appendTo;
1123 }
1124 DigitList copy(number);
1125 if (copy.fitsIntoInt64(false)) {
1126 format(((DigitList &)number).getInt64(), appendTo, pos, status);
1127 }
1128 else {
1129 copy.roundAtExponent(0);
1130 if (copy.fitsIntoInt64(false)) {
1131 format(number.getDouble(), appendTo, pos, status);
1132 }
1133 else {
1134 // We're outside of our normal range that this framework can handle.
1135 // The DecimalFormat will provide more accurate results.
1136
1137 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1138 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
1139 Formattable f;
1140 f.adoptDigitList(new DigitList(number));
1141 decimalFormat->format(f, appendTo, pos, status);
1142 delete decimalFormat;
1143 }
1144 }
1145 return appendTo;
1146}
1147
1148UnicodeString&
1149RuleBasedNumberFormat::format(int32_t number,
1150 UnicodeString& toAppendTo,
1151 FieldPosition& pos) const
1152{
1153 return format((int64_t)number, toAppendTo, pos);
1154}
1155
1156
1157UnicodeString&
1158RuleBasedNumberFormat::format(int64_t number,
1159 UnicodeString& toAppendTo,
1160 FieldPosition& /* pos */) const
1161{
1162 if (defaultRuleSet) {
1163 UErrorCode status = U_ZERO_ERROR;
1164 format(number, defaultRuleSet, toAppendTo, status);
1165 }
1166 return toAppendTo;
1167}
1168
1169
1170UnicodeString&
1171RuleBasedNumberFormat::format(double number,
1172 UnicodeString& toAppendTo,
1173 FieldPosition& /* pos */) const
1174{
1175 int32_t startPos = toAppendTo.length();
1176 UErrorCode status = U_ZERO_ERROR;
1177 if (defaultRuleSet) {
1178 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1179 }
1180 return adjustForCapitalizationContext(startPos, toAppendTo, status);
1181}
1182
1183
1184UnicodeString&
1185RuleBasedNumberFormat::format(int32_t number,
1186 const UnicodeString& ruleSetName,
1187 UnicodeString& toAppendTo,
1188 FieldPosition& pos,
1189 UErrorCode& status) const
1190{
1191 return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1192}
1193
1194
1195UnicodeString&
1196RuleBasedNumberFormat::format(int64_t number,
1197 const UnicodeString& ruleSetName,
1198 UnicodeString& toAppendTo,
1199 FieldPosition& /* pos */,
1200 UErrorCode& status) const
1201{
1202 if (U_SUCCESS(status)) {
1203 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1204 // throw new IllegalArgumentException("Can't use internal rule set");
1205 status = U_ILLEGAL_ARGUMENT_ERROR;
1206 } else {
1207 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1208 if (rs) {
1209 format(number, rs, toAppendTo, status);
1210 }
1211 }
1212 }
1213 return toAppendTo;
1214}
1215
1216
1217UnicodeString&
1218RuleBasedNumberFormat::format(double number,
1219 const UnicodeString& ruleSetName,
1220 UnicodeString& toAppendTo,
1221 FieldPosition& /* pos */,
1222 UErrorCode& status) const
1223{
1224 if (U_SUCCESS(status)) {
1225 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1226 // throw new IllegalArgumentException("Can't use internal rule set");
1227 status = U_ILLEGAL_ARGUMENT_ERROR;
1228 } else {
1229 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1230 if (rs) {
1231 int32_t startPos = toAppendTo.length();
1232 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
1233 adjustForCapitalizationContext(startPos, toAppendTo, status);
1234 }
1235 }
1236 }
1237 return toAppendTo;
1238}
1239
1240/**
1241 * Bottleneck through which all the public format() methods
1242 * that take a long pass. By the time we get here, we know
1243 * which rule set we're using to do the formatting.
1244 * @param number The number to format
1245 * @param ruleSet The rule set to use to format the number
1246 * @return The text that resulted from formatting the number
1247 */
1248UnicodeString&
1249RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& toAppendTo, UErrorCode& status) const
1250{
1251 // all API format() routines that take a double vector through
1252 // here. We have these two identical functions-- one taking a
1253 // double and one taking a long-- the couple digits of precision
1254 // that long has but double doesn't (both types are 8 bytes long,
1255 // but double has to borrow some of the mantissa bits to hold
1256 // the exponent).
1257 // Create an empty string buffer where the result will
1258 // be built, and pass it to the rule set (along with an insertion
1259 // position of 0 and the number being formatted) to the rule set
1260 // for formatting
1261
1262 if (U_SUCCESS(status)) {
1263 if (number == U_INT64_MIN) {
1264 // We can't handle this value right now. Provide an accurate default value.
1265
1266 // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J.
1267 NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status);
1268 Formattable f;
1269 FieldPosition pos(FieldPosition::DONT_CARE);
1270 DigitList *digitList = new DigitList();
1271 digitList->set(number);
1272 f.adoptDigitList(digitList);
1273 decimalFormat->format(f, toAppendTo, pos, status);
1274 delete decimalFormat;
1275 }
1276 else {
1277 int32_t startPos = toAppendTo.length();
1278 ruleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1279 adjustForCapitalizationContext(startPos, toAppendTo, status);
1280 }
1281 }
1282 return toAppendTo;
1283}
1284
1285UnicodeString&
1286RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1287 UnicodeString& currentResult,
1288 UErrorCode& status) const
1289{
1290#if !UCONFIG_NO_BREAK_ITERATION
1291 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1292 if (capitalizationContext != UDISPCTX_CAPITALIZATION_NONE && startPos == 0 && currentResult.length() > 0) {
1293 // capitalize currentResult according to context
1294 UChar32 ch = currentResult.char32At(0);
1295 if (u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter != NULL &&
1296 ( capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1297 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1298 (capitalizationContext == UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1299 // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1300 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1301 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1302 }
1303 }
1304#endif
1305 return currentResult;
1306}
1307
1308
1309void
1310RuleBasedNumberFormat::parse(const UnicodeString& text,
1311 Formattable& result,
1312 ParsePosition& parsePosition) const
1313{
1314 if (!ruleSets) {
1315 parsePosition.setErrorIndex(0);
1316 return;
1317 }
1318
1319 UnicodeString workingText(text, parsePosition.getIndex());
1320 ParsePosition workingPos(0);
1321
1322 ParsePosition high_pp(0);
1323 Formattable high_result;
1324
1325 for (NFRuleSet** p = ruleSets; *p; ++p) {
1326 NFRuleSet *rp = *p;
1327 if (rp->isPublic() && rp->isParseable()) {
1328 ParsePosition working_pp(0);
1329 Formattable working_result;
1330
1331 rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient);
1332 if (working_pp.getIndex() > high_pp.getIndex()) {
1333 high_pp = working_pp;
1334 high_result = working_result;
1335
1336 if (high_pp.getIndex() == workingText.length()) {
1337 break;
1338 }
1339 }
1340 }
1341 }
1342
1343 int32_t startIndex = parsePosition.getIndex();
1344 parsePosition.setIndex(startIndex + high_pp.getIndex());
1345 if (high_pp.getIndex() > 0) {
1346 parsePosition.setErrorIndex(-1);
1347 } else {
1348 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1349 parsePosition.setErrorIndex(startIndex + errorIndex);
1350 }
1351 result = high_result;
1352 if (result.getType() == Formattable::kDouble) {
1353 double d = result.getDouble();
1354 if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
1355 // Note: casting a double to an int when the double is too large or small
1356 // to fit the destination is undefined behavior. The explicit range checks,
1357 // above, are required. Just casting and checking the result value is undefined.
1358 result.setLong(static_cast<int32_t>(d));
1359 }
1360 }
1361}
1362
1363#if !UCONFIG_NO_COLLATION
1364
1365void
1366RuleBasedNumberFormat::setLenient(UBool enabled)
1367{
1368 lenient = enabled;
1369 if (!enabled && collator) {
1370 delete collator;
1371 collator = NULL;
1372 }
1373}
1374
1375#endif
1376
1377void
1378RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1379 if (U_SUCCESS(status)) {
1380 if (ruleSetName.isEmpty()) {
1381 if (localizations) {
1382 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1383 defaultRuleSet = findRuleSet(name, status);
1384 } else {
1385 initDefaultRuleSet();
1386 }
1387 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1388 status = U_ILLEGAL_ARGUMENT_ERROR;
1389 } else {
1390 NFRuleSet* result = findRuleSet(ruleSetName, status);
1391 if (result != NULL) {
1392 defaultRuleSet = result;
1393 }
1394 }
1395 }
1396}
1397
1398UnicodeString
1399RuleBasedNumberFormat::getDefaultRuleSetName() const {
1400 UnicodeString result;
1401 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1402 defaultRuleSet->getName(result);
1403 } else {
1404 result.setToBogus();
1405 }
1406 return result;
1407}
1408
1409void
1410RuleBasedNumberFormat::initDefaultRuleSet()
1411{
1412 defaultRuleSet = NULL;
1413 if (!ruleSets) {
1414 return;
1415 }
1416
1417 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1418 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1419 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
1420
1421 NFRuleSet**p = &ruleSets[0];
1422 while (*p) {
1423 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1424 defaultRuleSet = *p;
1425 return;
1426 } else {
1427 ++p;
1428 }
1429 }
1430
1431 defaultRuleSet = *--p;
1432 if (!defaultRuleSet->isPublic()) {
1433 while (p != ruleSets) {
1434 if ((*--p)->isPublic()) {
1435 defaultRuleSet = *p;
1436 break;
1437 }
1438 }
1439 }
1440}
1441
1442
1443void
1444RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1445 UParseError& pErr, UErrorCode& status)
1446{
1447 // TODO: implement UParseError
1448 uprv_memset(&pErr, 0, sizeof(UParseError));
1449 // Note: this can leave ruleSets == NULL, so remaining code should check
1450 if (U_FAILURE(status)) {
1451 return;
1452 }
1453
1454 initializeDecimalFormatSymbols(status);
1455 initializeDefaultInfinityRule(status);
1456 initializeDefaultNaNRule(status);
1457 if (U_FAILURE(status)) {
1458 return;
1459 }
1460
1461 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1462
1463 UnicodeString description(rules);
1464 if (!description.length()) {
1465 status = U_MEMORY_ALLOCATION_ERROR;
1466 return;
1467 }
1468
1469 // start by stripping the trailing whitespace from all the rules
1470 // (this is all the whitespace follwing each semicolon in the
1471 // description). This allows us to look for rule-set boundaries
1472 // by searching for ";%" without having to worry about whitespace
1473 // between the ; and the %
1474 stripWhitespace(description);
1475
1476 // check to see if there's a set of lenient-parse rules. If there
1477 // is, pull them out into our temporary holding place for them,
1478 // and delete them from the description before the real desciption-
1479 // parsing code sees them
1480 int32_t lp = description.indexOf(gLenientParse, -1, 0);
1481 if (lp != -1) {
1482 // we've got to make sure we're not in the middle of a rule
1483 // (where "%%lenient-parse" would actually get treated as
1484 // rule text)
1485 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1486 // locate the beginning and end of the actual collation
1487 // rules (there may be whitespace between the name and
1488 // the first token in the description)
1489 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1490
1491 if (lpEnd == -1) {
1492 lpEnd = description.length() - 1;
1493 }
1494 int lpStart = lp + u_strlen(gLenientParse);
1495 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1496 ++lpStart;
1497 }
1498
1499 // copy out the lenient-parse rules and delete them
1500 // from the description
1501 lenientParseRules = new UnicodeString();
1502 /* test for NULL */
1503 if (lenientParseRules == 0) {
1504 status = U_MEMORY_ALLOCATION_ERROR;
1505 return;
1506 }
1507 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1508
1509 description.remove(lp, lpEnd + 1 - lp);
1510 }
1511 }
1512
1513 // pre-flight parsing the description and count the number of
1514 // rule sets (";%" marks the end of one rule set and the beginning
1515 // of the next)
1516 numRuleSets = 0;
1517 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1518 ++numRuleSets;
1519 ++p;
1520 }
1521 ++numRuleSets;
1522
1523 // our rule list is an array of the appropriate size
1524 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1525 /* test for NULL */
1526 if (ruleSets == 0) {
1527 status = U_MEMORY_ALLOCATION_ERROR;
1528 return;
1529 }
1530
1531 for (int i = 0; i <= numRuleSets; ++i) {
1532 ruleSets[i] = NULL;
1533 }
1534
1535 // divide up the descriptions into individual rule-set descriptions
1536 // and store them in a temporary array. At each step, we also
1537 // new up a rule set, but all this does is initialize its name
1538 // and remove it from its description. We can't actually parse
1539 // the rest of the descriptions and finish initializing everything
1540 // because we have to know the names and locations of all the rule
1541 // sets before we can actually set everything up
1542 if(!numRuleSets) {
1543 status = U_ILLEGAL_ARGUMENT_ERROR;
1544 return;
1545 }
1546
1547 ruleSetDescriptions = new UnicodeString[numRuleSets];
1548 if (ruleSetDescriptions == 0) {
1549 status = U_MEMORY_ALLOCATION_ERROR;
1550 return;
1551 }
1552
1553 {
1554 int curRuleSet = 0;
1555 int32_t start = 0;
1556 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1557 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1558 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1559 if (ruleSets[curRuleSet] == 0) {
1560 status = U_MEMORY_ALLOCATION_ERROR;
1561 return;
1562 }
1563 ++curRuleSet;
1564 start = p + 1;
1565 }
1566 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1567 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1568 if (ruleSets[curRuleSet] == 0) {
1569 status = U_MEMORY_ALLOCATION_ERROR;
1570 return;
1571 }
1572 }
1573
1574 // now we can take note of the formatter's default rule set, which
1575 // is the last public rule set in the description (it's the last
1576 // rather than the first so that a user can create a new formatter
1577 // from an existing formatter and change its default behavior just
1578 // by appending more rule sets to the end)
1579
1580 // {dlf} Initialization of a fraction rule set requires the default rule
1581 // set to be known. For purposes of initialization, this is always the
1582 // last public rule set, no matter what the localization data says.
1583 initDefaultRuleSet();
1584
1585 // finally, we can go back through the temporary descriptions
1586 // list and finish seting up the substructure (and we throw
1587 // away the temporary descriptions as we go)
1588 {
1589 for (int i = 0; i < numRuleSets; i++) {
1590 ruleSets[i]->parseRules(ruleSetDescriptions[i], status);
1591 }
1592 }
1593
1594 // Now that the rules are initialized, the 'real' default rule
1595 // set can be adjusted by the localization data.
1596
1597 // The C code keeps the localization array as is, rather than building
1598 // a separate array of the public rule set names, so we have less work
1599 // to do here-- but we still need to check the names.
1600
1601 if (localizationInfos) {
1602 // confirm the names, if any aren't in the rules, that's an error
1603 // it is ok if the rules contain public rule sets that are not in this list
1604 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1605 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1606 NFRuleSet* rs = findRuleSet(name, status);
1607 if (rs == NULL) {
1608 break; // error
1609 }
1610 if (i == 0) {
1611 defaultRuleSet = rs;
1612 }
1613 }
1614 } else {
1615 defaultRuleSet = getDefaultRuleSet();
1616 }
1617 originalDescription = rules;
1618}
1619
1620// override the NumberFormat implementation in order to
1621// lazily initialize relevant items
1622void
1623RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1624{
1625 NumberFormat::setContext(value, status);
1626 if (U_SUCCESS(status)) {
1627 if (!capitalizationInfoSet &&
1628 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1629 initCapitalizationContextInfo(locale);
1630 capitalizationInfoSet = TRUE;
1631 }
1632#if !UCONFIG_NO_BREAK_ITERATION
1633 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1634 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1635 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1636 UErrorCode status = U_ZERO_ERROR;
1637 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1638 if (U_FAILURE(status)) {
1639 delete capitalizationBrkIter;
1640 capitalizationBrkIter = NULL;
1641 }
1642 }
1643#endif
1644 }
1645}
1646
1647void
1648RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1649{
1650#if !UCONFIG_NO_BREAK_ITERATION
1651 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1652 UErrorCode status = U_ZERO_ERROR;
1653 UResourceBundle *rb = ures_open(NULL, localeID, &status);
1654 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1655 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1656 if (U_SUCCESS(status) && rb != NULL) {
1657 int32_t len = 0;
1658 const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1659 if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1660 capitalizationForUIListMenu = intVector[0];
1661 capitalizationForStandAlone = intVector[1];
1662 }
1663 }
1664 ures_close(rb);
1665#endif
1666}
1667
1668void
1669RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1670{
1671 // iterate through the characters...
1672 UnicodeString result;
1673
1674 int start = 0;
1675 while (start != -1 && start < description.length()) {
1676 // seek to the first non-whitespace character...
1677 while (start < description.length()
1678 && PatternProps::isWhiteSpace(description.charAt(start))) {
1679 ++start;
1680 }
1681
1682 // locate the next semicolon in the text and copy the text from
1683 // our current position up to that semicolon into the result
1684 int32_t p = description.indexOf(gSemiColon, start);
1685 if (p == -1) {
1686 // or if we don't find a semicolon, just copy the rest of
1687 // the string into the result
1688 result.append(description, start, description.length() - start);
1689 start = -1;
1690 }
1691 else if (p < description.length()) {
1692 result.append(description, start, p + 1 - start);
1693 start = p + 1;
1694 }
1695
1696 // when we get here, we've seeked off the end of the sring, and
1697 // we terminate the loop (we continue until *start* is -1 rather
1698 // than until *p* is -1, because otherwise we'd miss the last
1699 // rule in the description)
1700 else {
1701 start = -1;
1702 }
1703 }
1704
1705 description.setTo(result);
1706}
1707
1708
1709void
1710RuleBasedNumberFormat::dispose()
1711{
1712 if (ruleSets) {
1713 for (NFRuleSet** p = ruleSets; *p; ++p) {
1714 delete *p;
1715 }
1716 uprv_free(ruleSets);
1717 ruleSets = NULL;
1718 }
1719
1720 if (ruleSetDescriptions) {
1721 delete [] ruleSetDescriptions;
1722 ruleSetDescriptions = NULL;
1723 }
1724
1725#if !UCONFIG_NO_COLLATION
1726 delete collator;
1727#endif
1728 collator = NULL;
1729
1730 delete decimalFormatSymbols;
1731 decimalFormatSymbols = NULL;
1732
1733 delete defaultInfinityRule;
1734 defaultInfinityRule = NULL;
1735
1736 delete defaultNaNRule;
1737 defaultNaNRule = NULL;
1738
1739 delete lenientParseRules;
1740 lenientParseRules = NULL;
1741
1742#if !UCONFIG_NO_BREAK_ITERATION
1743 delete capitalizationBrkIter;
1744 capitalizationBrkIter = NULL;
1745#endif
1746
1747 if (localizations) {
1748 localizations = localizations->unref();
1749 }
1750}
1751
1752
1753//-----------------------------------------------------------------------
1754// package-internal API
1755//-----------------------------------------------------------------------
1756
1757/**
1758 * Returns the collator to use for lenient parsing. The collator is lazily created:
1759 * this function creates it the first time it's called.
1760 * @return The collator to use for lenient parsing, or null if lenient parsing
1761 * is turned off.
1762*/
1763const RuleBasedCollator*
1764RuleBasedNumberFormat::getCollator() const
1765{
1766#if !UCONFIG_NO_COLLATION
1767 if (!ruleSets) {
1768 return NULL;
1769 }
1770
1771 // lazy-evaluate the collator
1772 if (collator == NULL && lenient) {
1773 // create a default collator based on the formatter's locale,
1774 // then pull out that collator's rules, append any additional
1775 // rules specified in the description, and create a _new_
1776 // collator based on the combinaiton of those rules
1777
1778 UErrorCode status = U_ZERO_ERROR;
1779
1780 Collator* temp = Collator::createInstance(locale, status);
1781 RuleBasedCollator* newCollator;
1782 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1783 if (lenientParseRules) {
1784 UnicodeString rules(newCollator->getRules());
1785 rules.append(*lenientParseRules);
1786
1787 newCollator = new RuleBasedCollator(rules, status);
1788 // Exit if newCollator could not be created.
1789 if (newCollator == NULL) {
1790 return NULL;
1791 }
1792 } else {
1793 temp = NULL;
1794 }
1795 if (U_SUCCESS(status)) {
1796 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1797 // cast away const
1798 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1799 } else {
1800 delete newCollator;
1801 }
1802 }
1803 delete temp;
1804 }
1805#endif
1806
1807 // if lenient-parse mode is off, this will be null
1808 // (see setLenientParseMode())
1809 return collator;
1810}
1811
1812
1813DecimalFormatSymbols*
1814RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
1815{
1816 // lazy-evaluate the DecimalFormatSymbols object. This object
1817 // is shared by all DecimalFormat instances belonging to this
1818 // formatter
1819 if (decimalFormatSymbols == NULL) {
1820 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1821 if (U_SUCCESS(status)) {
1822 decimalFormatSymbols = temp;
1823 }
1824 else {
1825 delete temp;
1826 }
1827 }
1828 return decimalFormatSymbols;
1829}
1830
1831/**
1832 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1833 * instances owned by this formatter.
1834*/
1835const DecimalFormatSymbols*
1836RuleBasedNumberFormat::getDecimalFormatSymbols() const
1837{
1838 return decimalFormatSymbols;
1839}
1840
1841NFRule*
1842RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1843{
1844 if (U_FAILURE(status)) {
1845 return NULL;
1846 }
1847 if (defaultInfinityRule == NULL) {
1848 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1849 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1850 NFRule* temp = new NFRule(this, rule, status);
1851 if (U_SUCCESS(status)) {
1852 defaultInfinityRule = temp;
1853 }
1854 else {
1855 delete temp;
1856 }
1857 }
1858 return defaultInfinityRule;
1859}
1860
1861const NFRule*
1862RuleBasedNumberFormat::getDefaultInfinityRule() const
1863{
1864 return defaultInfinityRule;
1865}
1866
1867NFRule*
1868RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1869{
1870 if (U_FAILURE(status)) {
1871 return NULL;
1872 }
1873 if (defaultNaNRule == NULL) {
1874 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1875 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1876 NFRule* temp = new NFRule(this, rule, status);
1877 if (U_SUCCESS(status)) {
1878 defaultNaNRule = temp;
1879 }
1880 else {
1881 delete temp;
1882 }
1883 }
1884 return defaultNaNRule;
1885}
1886
1887const NFRule*
1888RuleBasedNumberFormat::getDefaultNaNRule() const
1889{
1890 return defaultNaNRule;
1891}
1892
1893// De-owning the current localized symbols and adopt the new symbols.
1894void
1895RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1896{
1897 if (symbolsToAdopt == NULL) {
1898 return; // do not allow caller to set decimalFormatSymbols to NULL
1899 }
1900
1901 if (decimalFormatSymbols != NULL) {
1902 delete decimalFormatSymbols;
1903 }
1904
1905 decimalFormatSymbols = symbolsToAdopt;
1906
1907 {
1908 // Apply the new decimalFormatSymbols by reparsing the rulesets
1909 UErrorCode status = U_ZERO_ERROR;
1910
1911 delete defaultInfinityRule;
1912 defaultInfinityRule = NULL;
1913 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1914
1915 delete defaultNaNRule;
1916 defaultNaNRule = NULL;
1917 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1918
1919 if (ruleSets) {
1920 for (int32_t i = 0; i < numRuleSets; i++) {
1921 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1922 }
1923 }
1924 }
1925}
1926
1927// Setting the symbols is equlivalent to adopting a newly created localized symbols.
1928void
1929RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1930{
1931 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1932}
1933
1934PluralFormat *
1935RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1936 const UnicodeString &pattern,
1937 UErrorCode& status) const
1938{
1939 return new PluralFormat(locale, pluralType, pattern, status);
1940}
1941
1942U_NAMESPACE_END
1943
1944/* U_HAVE_RBNF */
1945#endif