]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/rbnf.cpp
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / rbnf.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
2ca993e8 3* Copyright (C) 1997-2015, International Business Machines Corporation
73c04bcf 4* and others. All Rights Reserved.
b75a7d8f
A
5*******************************************************************************
6*/
7
57a6839d 8#include "unicode/utypes.h"
51004dcb 9#include "utypeinfo.h" // for 'typeid' to work
729e4ab9 10
b75a7d8f
A
11#include "unicode/rbnf.h"
12
13#if U_HAVE_RBNF
14
15#include "unicode/normlzr.h"
b331163b 16#include "unicode/plurfmt.h"
b75a7d8f
A
17#include "unicode/tblcoll.h"
18#include "unicode/uchar.h"
19#include "unicode/ucol.h"
20#include "unicode/uloc.h"
21#include "unicode/unum.h"
22#include "unicode/ures.h"
23#include "unicode/ustring.h"
24#include "unicode/utf16.h"
374ca955 25#include "unicode/udata.h"
57a6839d
A
26#include "unicode/udisplaycontext.h"
27#include "unicode/brkiter.h"
b75a7d8f
A
28#include "nfrs.h"
29
30#include "cmemory.h"
31#include "cstring.h"
4388f060 32#include "patternprops.h"
729e4ab9 33#include "uresimp.h"
374ca955
A
34
35// debugging
b331163b 36// #define RBNF_DEBUG
374ca955 37
b331163b 38#ifdef RBNF_DEBUG
2ca993e8 39#include <stdio.h>
374ca955
A
40#endif
41
42#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
b75a7d8f
A
43
44static const UChar gPercentPercent[] =
45{
46 0x25, 0x25, 0
47}; /* "%%" */
48
49// All urbnf objects are created through openRules, so we init all of the
50// Unicode string constants required by rbnf, nfrs, or nfr here.
51static const UChar gLenientParse[] =
52{
53 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
54}; /* "%%lenient-parse:" */
55static const UChar gSemiColon = 0x003B;
56static const UChar gSemiPercent[] =
57{
58 0x3B, 0x25, 0
59}; /* ";%" */
60
61#define kSomeNumberOfBitsDiv2 22
62#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
63#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
64
374ca955
A
65U_NAMESPACE_BEGIN
66
67UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
68
73c04bcf
A
69/*
70This is a utility class. It does not use ICU's RTTI.
71If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72Please make sure that intltest passes on Windows in Release mode,
73since the string pooling per compilation unit will mess up how RTTI works.
74The RTTI code was also removed due to lack of code coverage.
75*/
76class LocalizationInfo : public UMemory {
374ca955 77protected:
4388f060 78 virtual ~LocalizationInfo();
374ca955
A
79 uint32_t refcount;
80
81public:
82 LocalizationInfo() : refcount(0) {}
83
84 LocalizationInfo* ref(void) {
85 ++refcount;
86 return this;
87 }
88
89 LocalizationInfo* unref(void) {
90 if (refcount && --refcount == 0) {
91 delete this;
92 }
93 return NULL;
94 }
95
96 virtual UBool operator==(const LocalizationInfo* rhs) const;
97 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
98
99 virtual int32_t getNumberOfRuleSets(void) const = 0;
100 virtual const UChar* getRuleSetName(int32_t index) const = 0;
101 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102 virtual const UChar* getLocaleName(int32_t index) const = 0;
103 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
104
105 virtual int32_t indexForLocale(const UChar* locale) const;
106 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
107
73c04bcf
A
108// virtual UClassID getDynamicClassID() const = 0;
109// static UClassID getStaticClassID(void);
374ca955
A
110};
111
4388f060
A
112LocalizationInfo::~LocalizationInfo() {}
113
73c04bcf 114//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
374ca955
A
115
116// if both strings are NULL, this returns TRUE
117static UBool
118streq(const UChar* lhs, const UChar* rhs) {
119 if (rhs == lhs) {
120 return TRUE;
121 }
122 if (lhs && rhs) {
123 return u_strcmp(lhs, rhs) == 0;
124 }
125 return FALSE;
126}
127
128UBool
129LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
130 if (rhs) {
131 if (this == rhs) {
132 return TRUE;
133 }
134
135 int32_t rsc = getNumberOfRuleSets();
136 if (rsc == rhs->getNumberOfRuleSets()) {
137 for (int i = 0; i < rsc; ++i) {
138 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
139 return FALSE;
140 }
141 }
142 int32_t dlc = getNumberOfDisplayLocales();
143 if (dlc == rhs->getNumberOfDisplayLocales()) {
144 for (int i = 0; i < dlc; ++i) {
145 const UChar* locale = getLocaleName(i);
146 int32_t ix = rhs->indexForLocale(locale);
147 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
148 if (!streq(locale, rhs->getLocaleName(ix))) {
149 return FALSE;
150 }
151 for (int j = 0; j < rsc; ++j) {
152 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
153 return FALSE;
154 }
155 }
156 }
157 return TRUE;
158 }
159 }
160 }
161 return FALSE;
162}
163
164int32_t
165LocalizationInfo::indexForLocale(const UChar* locale) const {
166 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
167 if (streq(locale, getLocaleName(i))) {
168 return i;
169 }
170 }
171 return -1;
172}
173
174int32_t
175LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
176 if (ruleset) {
177 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
178 if (streq(ruleset, getRuleSetName(i))) {
179 return i;
180 }
181 }
182 }
183 return -1;
184}
185
186
187typedef void (*Fn_Deleter)(void*);
188
189class VArray {
190 void** buf;
191 int32_t cap;
192 int32_t size;
193 Fn_Deleter deleter;
194public:
195 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
196
197 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
198
199 ~VArray() {
200 if (deleter) {
201 for (int i = 0; i < size; ++i) {
202 (*deleter)(buf[i]);
203 }
204 }
205 uprv_free(buf);
206 }
207
208 int32_t length() {
209 return size;
210 }
211
212 void add(void* elem, UErrorCode& status) {
213 if (U_SUCCESS(status)) {
214 if (size == cap) {
215 if (cap == 0) {
216 cap = 1;
217 } else if (cap < 256) {
218 cap *= 2;
219 } else {
220 cap += 256;
221 }
222 if (buf == NULL) {
223 buf = (void**)uprv_malloc(cap * sizeof(void*));
224 } else {
225 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
226 }
227 if (buf == NULL) {
228 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
229 status = U_MEMORY_ALLOCATION_ERROR;
230 return;
231 }
232 void* start = &buf[size];
233 size_t count = (cap - size) * sizeof(void*);
234 uprv_memset(start, 0, count); // fill with nulls, just because
235 }
236 buf[size++] = elem;
237 }
238 }
239
240 void** release(void) {
241 void** result = buf;
242 buf = NULL;
243 cap = 0;
244 size = 0;
245 return result;
246 }
247};
248
249class LocDataParser;
250
251class StringLocalizationInfo : public LocalizationInfo {
252 UChar* info;
253 UChar*** data;
254 int32_t numRuleSets;
255 int32_t numLocales;
256
257friend class LocDataParser;
258
259 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
260 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
261 {
262 }
263
264public:
265 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
266
267 virtual ~StringLocalizationInfo();
268 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
269 virtual const UChar* getRuleSetName(int32_t index) const;
270 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
271 virtual const UChar* getLocaleName(int32_t index) const;
272 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
273
73c04bcf
A
274// virtual UClassID getDynamicClassID() const;
275// static UClassID getStaticClassID(void);
374ca955
A
276
277private:
278 void init(UErrorCode& status) const;
279};
280
281
282enum {
283 OPEN_ANGLE = 0x003c, /* '<' */
284 CLOSE_ANGLE = 0x003e, /* '>' */
285 COMMA = 0x002c,
286 TICK = 0x0027,
287 QUOTE = 0x0022,
288 SPACE = 0x0020
289};
290
291/**
292 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
293 */
294class LocDataParser {
295 UChar* data;
296 const UChar* e;
297 UChar* p;
298 UChar ch;
299 UParseError& pe;
300 UErrorCode& ec;
301
302public:
303 LocDataParser(UParseError& parseError, UErrorCode& status)
304 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
305 ~LocDataParser() {}
306
307 /*
308 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
309 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
310 */
311 StringLocalizationInfo* parse(UChar* data, int32_t len);
312
313private:
314
315 void inc(void) { ++p; ch = 0xffff; }
316 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
317 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
4388f060 318 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
374ca955 319 UBool inList(UChar c, const UChar* list) const {
4388f060 320 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
374ca955
A
321 while (*list && *list != c) ++list; return *list == c;
322 }
323 void parseError(const char* msg);
324
325 StringLocalizationInfo* doParse(void);
326
327 UChar** nextArray(int32_t& requiredLength);
328 UChar* nextString(void);
329};
330
b331163b 331#ifdef RBNF_DEBUG
374ca955 332#define ERROR(msg) parseError(msg); return NULL;
57a6839d 333#define EXPLANATION_ARG explanationArg
374ca955
A
334#else
335#define ERROR(msg) parseError(NULL); return NULL;
57a6839d 336#define EXPLANATION_ARG
374ca955
A
337#endif
338
339
340static const UChar DQUOTE_STOPLIST[] = {
341 QUOTE, 0
342};
343
344static const UChar SQUOTE_STOPLIST[] = {
345 TICK, 0
346};
347
348static const UChar NOQUOTE_STOPLIST[] = {
349 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
350};
351
352static void
353DeleteFn(void* p) {
354 uprv_free(p);
355}
356
357StringLocalizationInfo*
358LocDataParser::parse(UChar* _data, int32_t len) {
359 if (U_FAILURE(ec)) {
360 if (_data) uprv_free(_data);
361 return NULL;
362 }
363
364 pe.line = 0;
365 pe.offset = -1;
366 pe.postContext[0] = 0;
367 pe.preContext[0] = 0;
368
369 if (_data == NULL) {
370 ec = U_ILLEGAL_ARGUMENT_ERROR;
371 return NULL;
372 }
373
374 if (len <= 0) {
375 ec = U_ILLEGAL_ARGUMENT_ERROR;
376 uprv_free(_data);
377 return NULL;
378 }
379
380 data = _data;
381 e = data + len;
382 p = _data;
383 ch = 0xffff;
384
385 return doParse();
386}
387
388
389StringLocalizationInfo*
390LocDataParser::doParse(void) {
391 skipWhitespace();
392 if (!checkInc(OPEN_ANGLE)) {
393 ERROR("Missing open angle");
394 } else {
395 VArray array(DeleteFn);
396 UBool mightHaveNext = TRUE;
397 int32_t requiredLength = -1;
398 while (mightHaveNext) {
399 mightHaveNext = FALSE;
400 UChar** elem = nextArray(requiredLength);
401 skipWhitespace();
402 UBool haveComma = check(COMMA);
403 if (elem) {
404 array.add(elem, ec);
405 if (haveComma) {
406 inc();
407 mightHaveNext = TRUE;
408 }
409 } else if (haveComma) {
410 ERROR("Unexpected character");
411 }
412 }
413
414 skipWhitespace();
415 if (!checkInc(CLOSE_ANGLE)) {
416 if (check(OPEN_ANGLE)) {
417 ERROR("Missing comma in outer array");
418 } else {
419 ERROR("Missing close angle bracket in outer array");
420 }
421 }
422
423 skipWhitespace();
424 if (p != e) {
425 ERROR("Extra text after close of localization data");
426 }
427
428 array.add(NULL, ec);
429 if (U_SUCCESS(ec)) {
430 int32_t numLocs = array.length() - 2; // subtract first, NULL
431 UChar*** result = (UChar***)array.release();
432
433 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
434 }
435 }
436
437 ERROR("Unknown error");
438}
439
440UChar**
441LocDataParser::nextArray(int32_t& requiredLength) {
442 if (U_FAILURE(ec)) {
443 return NULL;
444 }
445
446 skipWhitespace();
447 if (!checkInc(OPEN_ANGLE)) {
448 ERROR("Missing open angle");
449 }
450
451 VArray array;
452 UBool mightHaveNext = TRUE;
453 while (mightHaveNext) {
454 mightHaveNext = FALSE;
455 UChar* elem = nextString();
456 skipWhitespace();
457 UBool haveComma = check(COMMA);
458 if (elem) {
459 array.add(elem, ec);
460 if (haveComma) {
461 inc();
462 mightHaveNext = TRUE;
463 }
464 } else if (haveComma) {
465 ERROR("Unexpected comma");
466 }
467 }
468 skipWhitespace();
469 if (!checkInc(CLOSE_ANGLE)) {
470 if (check(OPEN_ANGLE)) {
471 ERROR("Missing close angle bracket in inner array");
472 } else {
473 ERROR("Missing comma in inner array");
474 }
475 }
476
477 array.add(NULL, ec);
478 if (U_SUCCESS(ec)) {
479 if (requiredLength == -1) {
480 requiredLength = array.length() + 1;
481 } else if (array.length() != requiredLength) {
482 ec = U_ILLEGAL_ARGUMENT_ERROR;
483 ERROR("Array not of required length");
484 }
485
486 return (UChar**)array.release();
487 }
488 ERROR("Unknown Error");
489}
490
491UChar*
492LocDataParser::nextString() {
493 UChar* result = NULL;
494
495 skipWhitespace();
496 if (p < e) {
497 const UChar* terminators;
498 UChar c = *p;
499 UBool haveQuote = c == QUOTE || c == TICK;
500 if (haveQuote) {
501 inc();
502 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
503 } else {
504 terminators = NOQUOTE_STOPLIST;
505 }
506 UChar* start = p;
507 while (p < e && !inList(*p, terminators)) ++p;
508 if (p == e) {
509 ERROR("Unexpected end of data");
510 }
511
512 UChar x = *p;
513 if (p > start) {
514 ch = x;
515 *p = 0x0; // terminate by writing to data
516 result = start; // just point into data
517 }
518 if (haveQuote) {
519 if (x != c) {
520 ERROR("Missing matching quote");
521 } else if (p == start) {
522 ERROR("Empty string");
523 }
524 inc();
525 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
526 ERROR("Unexpected character in string");
527 }
528 }
529
530 // ok for there to be no next string
531 return result;
532}
533
57a6839d
A
534void LocDataParser::parseError(const char* EXPLANATION_ARG)
535{
374ca955
A
536 if (!data) {
537 return;
538 }
b75a7d8f 539
374ca955 540 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
73c04bcf 541 if (start < data) {
374ca955 542 start = data;
73c04bcf
A
543 }
544 for (UChar* x = p; --x >= start;) {
374ca955
A
545 if (!*x) {
546 start = x+1;
547 break;
548 }
73c04bcf 549 }
374ca955 550 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
73c04bcf 551 if (limit > e) {
374ca955 552 limit = e;
73c04bcf
A
553 }
554 u_strncpy(pe.preContext, start, (int32_t)(p-start));
374ca955 555 pe.preContext[p-start] = 0;
73c04bcf 556 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
374ca955 557 pe.postContext[limit-p] = 0;
73c04bcf 558 pe.offset = (int32_t)(p - data);
374ca955 559
b331163b 560#ifdef RBNF_DEBUG
57a6839d 561 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
374ca955
A
562
563 UnicodeString msg;
564 msg.append(start, p - start);
565 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
566 msg.append(p, limit-p);
57a6839d 567 msg.append(UNICODE_STRING_SIMPLE("'"));
374ca955
A
568
569 char buf[128];
570 int32_t len = msg.extract(0, msg.length(), buf, 128);
571 if (len >= 128) {
572 buf[127] = 0;
573 } else {
574 buf[len] = 0;
575 }
576 fprintf(stderr, "%s\n", buf);
577 fflush(stderr);
578#endif
579
580 uprv_free(data);
581 data = NULL;
582 p = NULL;
583 e = NULL;
584
585 if (U_SUCCESS(ec)) {
586 ec = U_PARSE_ERROR;
587 }
588}
589
73c04bcf 590//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
374ca955
A
591
592StringLocalizationInfo*
593StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
594 if (U_FAILURE(status)) {
595 return NULL;
596 }
597
598 int32_t len = info.length();
599 if (len == 0) {
600 return NULL; // no error;
601 }
602
603 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
604 if (!p) {
605 status = U_MEMORY_ALLOCATION_ERROR;
606 return NULL;
607 }
608 info.extract(p, len, status);
609 if (!U_FAILURE(status)) {
610 status = U_ZERO_ERROR; // clear warning about non-termination
611 }
612
613 LocDataParser parser(perror, status);
614 return parser.parse(p, len);
615}
616
617StringLocalizationInfo::~StringLocalizationInfo() {
618 for (UChar*** p = (UChar***)data; *p; ++p) {
619 // remaining data is simply pointer into our unicode string data.
620 if (*p) uprv_free(*p);
621 }
622 if (data) uprv_free(data);
623 if (info) uprv_free(info);
624}
625
626
627const UChar*
628StringLocalizationInfo::getRuleSetName(int32_t index) const {
629 if (index >= 0 && index < getNumberOfRuleSets()) {
630 return data[0][index];
631 }
632 return NULL;
633}
634
635const UChar*
636StringLocalizationInfo::getLocaleName(int32_t index) const {
637 if (index >= 0 && index < getNumberOfDisplayLocales()) {
638 return data[index+1][0];
639 }
640 return NULL;
641}
642
643const UChar*
644StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
645 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
646 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
647 return data[localeIndex+1][ruleIndex+1];
648 }
649 return NULL;
650}
651
652// ----------
653
654RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
655 const UnicodeString& locs,
656 const Locale& alocale, UParseError& perror, UErrorCode& status)
b75a7d8f 657 : ruleSets(NULL)
4388f060
A
658 , ruleSetDescriptions(NULL)
659 , numRuleSets(0)
b75a7d8f
A
660 , defaultRuleSet(NULL)
661 , locale(alocale)
662 , collator(NULL)
663 , decimalFormatSymbols(NULL)
2ca993e8
A
664 , defaultInfinityRule(NULL)
665 , defaultNaNRule(NULL)
b75a7d8f
A
666 , lenient(FALSE)
667 , lenientParseRules(NULL)
374ca955 668 , localizations(NULL)
57a6839d
A
669 , capitalizationInfoSet(FALSE)
670 , capitalizationForUIListMenu(FALSE)
671 , capitalizationForStandAlone(FALSE)
672 , capitalizationBrkIter(NULL)
374ca955
A
673{
674 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
675 init(description, locinfo, perror, status);
676}
677
678RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
679 const UnicodeString& locs,
680 UParseError& perror, UErrorCode& status)
681 : ruleSets(NULL)
4388f060
A
682 , ruleSetDescriptions(NULL)
683 , numRuleSets(0)
374ca955
A
684 , defaultRuleSet(NULL)
685 , locale(Locale::getDefault())
686 , collator(NULL)
687 , decimalFormatSymbols(NULL)
2ca993e8
A
688 , defaultInfinityRule(NULL)
689 , defaultNaNRule(NULL)
374ca955
A
690 , lenient(FALSE)
691 , lenientParseRules(NULL)
692 , localizations(NULL)
57a6839d
A
693 , capitalizationInfoSet(FALSE)
694 , capitalizationForUIListMenu(FALSE)
695 , capitalizationForStandAlone(FALSE)
696 , capitalizationBrkIter(NULL)
374ca955
A
697{
698 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
699 init(description, locinfo, perror, status);
700}
701
702RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
703 LocalizationInfo* info,
704 const Locale& alocale, UParseError& perror, UErrorCode& status)
705 : ruleSets(NULL)
4388f060
A
706 , ruleSetDescriptions(NULL)
707 , numRuleSets(0)
374ca955
A
708 , defaultRuleSet(NULL)
709 , locale(alocale)
710 , collator(NULL)
711 , decimalFormatSymbols(NULL)
2ca993e8
A
712 , defaultInfinityRule(NULL)
713 , defaultNaNRule(NULL)
374ca955
A
714 , lenient(FALSE)
715 , lenientParseRules(NULL)
716 , localizations(NULL)
57a6839d
A
717 , capitalizationInfoSet(FALSE)
718 , capitalizationForUIListMenu(FALSE)
719 , capitalizationForStandAlone(FALSE)
720 , capitalizationBrkIter(NULL)
374ca955
A
721{
722 init(description, info, perror, status);
723}
724
725RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
726 UParseError& perror,
727 UErrorCode& status)
728 : ruleSets(NULL)
4388f060
A
729 , ruleSetDescriptions(NULL)
730 , numRuleSets(0)
374ca955
A
731 , defaultRuleSet(NULL)
732 , locale(Locale::getDefault())
733 , collator(NULL)
734 , decimalFormatSymbols(NULL)
2ca993e8
A
735 , defaultInfinityRule(NULL)
736 , defaultNaNRule(NULL)
374ca955
A
737 , lenient(FALSE)
738 , lenientParseRules(NULL)
739 , localizations(NULL)
57a6839d
A
740 , capitalizationInfoSet(FALSE)
741 , capitalizationForUIListMenu(FALSE)
742 , capitalizationForStandAlone(FALSE)
743 , capitalizationBrkIter(NULL)
374ca955
A
744{
745 init(description, NULL, perror, status);
746}
747
748RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
749 const Locale& aLocale,
750 UParseError& perror,
751 UErrorCode& status)
752 : ruleSets(NULL)
4388f060
A
753 , ruleSetDescriptions(NULL)
754 , numRuleSets(0)
374ca955
A
755 , defaultRuleSet(NULL)
756 , locale(aLocale)
757 , collator(NULL)
758 , decimalFormatSymbols(NULL)
2ca993e8
A
759 , defaultInfinityRule(NULL)
760 , defaultNaNRule(NULL)
374ca955
A
761 , lenient(FALSE)
762 , lenientParseRules(NULL)
763 , localizations(NULL)
57a6839d
A
764 , capitalizationInfoSet(FALSE)
765 , capitalizationForUIListMenu(FALSE)
766 , capitalizationForStandAlone(FALSE)
767 , capitalizationBrkIter(NULL)
b75a7d8f 768{
374ca955 769 init(description, NULL, perror, status);
b75a7d8f
A
770}
771
772RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
773 : ruleSets(NULL)
4388f060
A
774 , ruleSetDescriptions(NULL)
775 , numRuleSets(0)
b75a7d8f
A
776 , defaultRuleSet(NULL)
777 , locale(alocale)
778 , collator(NULL)
779 , decimalFormatSymbols(NULL)
2ca993e8
A
780 , defaultInfinityRule(NULL)
781 , defaultNaNRule(NULL)
b75a7d8f
A
782 , lenient(FALSE)
783 , lenientParseRules(NULL)
374ca955 784 , localizations(NULL)
57a6839d
A
785 , capitalizationInfoSet(FALSE)
786 , capitalizationForUIListMenu(FALSE)
787 , capitalizationForStandAlone(FALSE)
788 , capitalizationBrkIter(NULL)
b75a7d8f
A
789{
790 if (U_FAILURE(status)) {
791 return;
792 }
793
729e4ab9 794 const char* rules_tag = "RBNFRules";
b75a7d8f
A
795 const char* fmt_tag = "";
796 switch (tag) {
797 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
798 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
799 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
729e4ab9 800 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
b75a7d8f
A
801 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
802 }
803
374ca955
A
804 // TODO: read localization info from resource
805 LocalizationInfo* locinfo = NULL;
806
374ca955 807 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
b75a7d8f 808 if (U_SUCCESS(status)) {
374ca955
A
809 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
810 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
729e4ab9
A
811
812 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
813 if (U_FAILURE(status)) {
814 ures_close(nfrb);
815 }
816 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
817 if (U_FAILURE(status)) {
818 ures_close(rbnfRules);
819 ures_close(nfrb);
820 return;
821 }
4388f060 822
729e4ab9
A
823 UnicodeString desc;
824 while (ures_hasNext(ruleSets)) {
4388f060 825 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
729e4ab9 826 }
b75a7d8f 827 UParseError perror;
729e4ab9 828
2ca993e8 829 init(desc, locinfo, perror, status);
729e4ab9 830
729e4ab9
A
831 ures_close(ruleSets);
832 ures_close(rbnfRules);
b75a7d8f 833 }
b75a7d8f
A
834 ures_close(nfrb);
835}
836
837RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
838 : NumberFormat(rhs)
839 , ruleSets(NULL)
4388f060
A
840 , ruleSetDescriptions(NULL)
841 , numRuleSets(0)
b75a7d8f
A
842 , defaultRuleSet(NULL)
843 , locale(rhs.locale)
844 , collator(NULL)
845 , decimalFormatSymbols(NULL)
2ca993e8
A
846 , defaultInfinityRule(NULL)
847 , defaultNaNRule(NULL)
b75a7d8f
A
848 , lenient(FALSE)
849 , lenientParseRules(NULL)
374ca955 850 , localizations(NULL)
57a6839d
A
851 , capitalizationInfoSet(FALSE)
852 , capitalizationForUIListMenu(FALSE)
853 , capitalizationForStandAlone(FALSE)
854 , capitalizationBrkIter(NULL)
b75a7d8f
A
855{
856 this->operator=(rhs);
857}
858
374ca955
A
859// --------
860
b75a7d8f
A
861RuleBasedNumberFormat&
862RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
863{
57a6839d
A
864 if (this == &rhs) {
865 return *this;
866 }
867 NumberFormat::operator=(rhs);
b75a7d8f
A
868 UErrorCode status = U_ZERO_ERROR;
869 dispose();
870 locale = rhs.locale;
374ca955
A
871 lenient = rhs.lenient;
872
b75a7d8f 873 UParseError perror;
57a6839d 874 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
2ca993e8 875 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
57a6839d
A
876 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
877
878 capitalizationInfoSet = rhs.capitalizationInfoSet;
879 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
880 capitalizationForStandAlone = rhs.capitalizationForStandAlone;
881#if !UCONFIG_NO_BREAK_ITERATION
882 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
883#endif
374ca955 884
b75a7d8f
A
885 return *this;
886}
887
888RuleBasedNumberFormat::~RuleBasedNumberFormat()
889{
890 dispose();
891}
892
893Format*
894RuleBasedNumberFormat::clone(void) const
895{
57a6839d 896 return new RuleBasedNumberFormat(*this);
b75a7d8f
A
897}
898
899UBool
900RuleBasedNumberFormat::operator==(const Format& other) const
901{
902 if (this == &other) {
903 return TRUE;
904 }
905
729e4ab9 906 if (typeid(*this) == typeid(other)) {
b75a7d8f 907 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
57a6839d
A
908 // test for capitalization info equality is adequately handled
909 // by the NumberFormat test for fCapitalizationContext equality;
910 // the info here is just derived from that.
b75a7d8f 911 if (locale == rhs.locale &&
374ca955
A
912 lenient == rhs.lenient &&
913 (localizations == NULL
914 ? rhs.localizations == NULL
915 : (rhs.localizations == NULL
916 ? FALSE
917 : *localizations == rhs.localizations))) {
918
b75a7d8f
A
919 NFRuleSet** p = ruleSets;
920 NFRuleSet** q = rhs.ruleSets;
b75a7d8f 921 if (p == NULL) {
374ca955
A
922 return q == NULL;
923 } else if (q == NULL) {
b75a7d8f
A
924 return FALSE;
925 }
926 while (*p && *q && (**p == **q)) {
927 ++p;
928 ++q;
929 }
930 return *q == NULL && *p == NULL;
931 }
932 }
933
934 return FALSE;
935}
936
937UnicodeString
938RuleBasedNumberFormat::getRules() const
939{
940 UnicodeString result;
941 if (ruleSets != NULL) {
942 for (NFRuleSet** p = ruleSets; *p; ++p) {
943 (*p)->appendRules(result);
944 }
945 }
946 return result;
947}
948
949UnicodeString
950RuleBasedNumberFormat::getRuleSetName(int32_t index) const
951{
374ca955 952 if (localizations) {
2ca993e8
A
953 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
954 return string;
955 }
956 else if (ruleSets) {
374ca955 957 UnicodeString result;
b75a7d8f
A
958 for (NFRuleSet** p = ruleSets; *p; ++p) {
959 NFRuleSet* rs = *p;
960 if (rs->isPublic()) {
961 if (--index == -1) {
962 rs->getName(result);
963 return result;
964 }
965 }
966 }
967 }
374ca955
A
968 UnicodeString empty;
969 return empty;
b75a7d8f
A
970}
971
972int32_t
973RuleBasedNumberFormat::getNumberOfRuleSetNames() const
974{
975 int32_t result = 0;
374ca955 976 if (localizations) {
2ca993e8
A
977 result = localizations->getNumberOfRuleSets();
978 }
979 else if (ruleSets) {
b75a7d8f
A
980 for (NFRuleSet** p = ruleSets; *p; ++p) {
981 if ((**p).isPublic()) {
982 ++result;
983 }
984 }
985 }
986 return result;
987}
988
374ca955
A
989int32_t
990RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
991 if (localizations) {
992 return localizations->getNumberOfDisplayLocales();
993 }
994 return 0;
995}
996
997Locale
998RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
999 if (U_FAILURE(status)) {
73c04bcf 1000 return Locale("");
374ca955
A
1001 }
1002 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
1003 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
1004 char buffer[64];
1005 int32_t cap = name.length() + 1;
1006 char* bp = buffer;
1007 if (cap > 64) {
1008 bp = (char *)uprv_malloc(cap);
1009 if (bp == NULL) {
1010 status = U_MEMORY_ALLOCATION_ERROR;
73c04bcf 1011 return Locale("");
374ca955
A
1012 }
1013 }
1014 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
1015 Locale retLocale(bp);
1016 if (bp != buffer) {
1017 uprv_free(bp);
1018 }
1019 return retLocale;
1020 }
1021 status = U_ILLEGAL_ARGUMENT_ERROR;
1022 Locale retLocale;
1023 return retLocale;
1024}
1025
1026UnicodeString
1027RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1028 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1029 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1030 int32_t len = localeName.length();
1031 UChar* localeStr = localeName.getBuffer(len + 1);
1032 while (len >= 0) {
1033 localeStr[len] = 0;
1034 int32_t ix = localizations->indexForLocale(localeStr);
1035 if (ix >= 0) {
1036 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1037 return name;
1038 }
1039
1040 // trim trailing portion, skipping over ommitted sections
1041 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1042 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1043 }
1044 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1045 return name;
1046 }
1047 UnicodeString bogus;
1048 bogus.setToBogus();
1049 return bogus;
1050}
1051
1052UnicodeString
1053RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1054 if (localizations) {
1055 UnicodeString rsn(ruleSetName);
1056 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1057 return getRuleSetDisplayName(ix, localeParam);
1058 }
1059 UnicodeString bogus;
1060 bogus.setToBogus();
1061 return bogus;
1062}
1063
b75a7d8f
A
1064NFRuleSet*
1065RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1066{
1067 if (U_SUCCESS(status) && ruleSets) {
1068 for (NFRuleSet** p = ruleSets; *p; ++p) {
1069 NFRuleSet* rs = *p;
1070 if (rs->isNamed(name)) {
1071 return rs;
1072 }
1073 }
1074 status = U_ILLEGAL_ARGUMENT_ERROR;
1075 }
1076 return NULL;
1077}
1078
1079UnicodeString&
1080RuleBasedNumberFormat::format(int32_t number,
1081 UnicodeString& toAppendTo,
374ca955 1082 FieldPosition& /* pos */) const
b75a7d8f 1083{
57a6839d 1084 if (defaultRuleSet) {
b331163b 1085 UErrorCode status = U_ZERO_ERROR;
57a6839d 1086 int32_t startPos = toAppendTo.length();
2ca993e8 1087 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
57a6839d
A
1088 adjustForCapitalizationContext(startPos, toAppendTo);
1089 }
b75a7d8f
A
1090 return toAppendTo;
1091}
1092
1093
1094UnicodeString&
1095RuleBasedNumberFormat::format(int64_t number,
1096 UnicodeString& toAppendTo,
374ca955 1097 FieldPosition& /* pos */) const
b75a7d8f 1098{
57a6839d 1099 if (defaultRuleSet) {
b331163b 1100 UErrorCode status = U_ZERO_ERROR;
57a6839d 1101 int32_t startPos = toAppendTo.length();
2ca993e8 1102 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
57a6839d
A
1103 adjustForCapitalizationContext(startPos, toAppendTo);
1104 }
b75a7d8f
A
1105 return toAppendTo;
1106}
1107
1108
1109UnicodeString&
1110RuleBasedNumberFormat::format(double number,
1111 UnicodeString& toAppendTo,
374ca955 1112 FieldPosition& /* pos */) const
b75a7d8f 1113{
57a6839d 1114 int32_t startPos = toAppendTo.length();
2ca993e8 1115 if (defaultRuleSet) {
b331163b 1116 UErrorCode status = U_ZERO_ERROR;
2ca993e8 1117 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
729e4ab9 1118 }
57a6839d 1119 return adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1120}
1121
1122
1123UnicodeString&
1124RuleBasedNumberFormat::format(int32_t number,
1125 const UnicodeString& ruleSetName,
1126 UnicodeString& toAppendTo,
374ca955 1127 FieldPosition& /* pos */,
b75a7d8f
A
1128 UErrorCode& status) const
1129{
1130 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1131 if (U_SUCCESS(status)) {
4388f060 1132 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1133 // throw new IllegalArgumentException("Can't use internal rule set");
1134 status = U_ILLEGAL_ARGUMENT_ERROR;
1135 } else {
1136 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1137 if (rs) {
57a6839d 1138 int32_t startPos = toAppendTo.length();
2ca993e8 1139 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
57a6839d 1140 adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1141 }
1142 }
1143 }
1144 return toAppendTo;
1145}
1146
1147
1148UnicodeString&
1149RuleBasedNumberFormat::format(int64_t number,
1150 const UnicodeString& ruleSetName,
1151 UnicodeString& toAppendTo,
374ca955 1152 FieldPosition& /* pos */,
b75a7d8f
A
1153 UErrorCode& status) const
1154{
1155 if (U_SUCCESS(status)) {
4388f060 1156 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1157 // throw new IllegalArgumentException("Can't use internal rule set");
1158 status = U_ILLEGAL_ARGUMENT_ERROR;
1159 } else {
1160 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1161 if (rs) {
57a6839d 1162 int32_t startPos = toAppendTo.length();
2ca993e8 1163 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
57a6839d 1164 adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1165 }
1166 }
1167 }
1168 return toAppendTo;
1169}
1170
1171
b75a7d8f
A
1172UnicodeString&
1173RuleBasedNumberFormat::format(double number,
1174 const UnicodeString& ruleSetName,
1175 UnicodeString& toAppendTo,
374ca955 1176 FieldPosition& /* pos */,
b75a7d8f
A
1177 UErrorCode& status) const
1178{
1179 if (U_SUCCESS(status)) {
4388f060 1180 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1181 // throw new IllegalArgumentException("Can't use internal rule set");
1182 status = U_ILLEGAL_ARGUMENT_ERROR;
1183 } else {
1184 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1185 if (rs) {
57a6839d 1186 int32_t startPos = toAppendTo.length();
2ca993e8 1187 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
57a6839d 1188 adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1189 }
1190 }
1191 }
1192 return toAppendTo;
1193}
1194
57a6839d
A
1195UnicodeString&
1196RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1197 UnicodeString& currentResult) const
1198{
1199#if !UCONFIG_NO_BREAK_ITERATION
1200 if (startPos==0 && currentResult.length() > 0) {
1201 // capitalize currentResult according to context
1202 UChar32 ch = currentResult.char32At(0);
1203 UErrorCode status = U_ZERO_ERROR;
1204 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1205 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1206 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1207 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1208 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1209 // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1210 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1211 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1212 }
1213 }
1214#endif
1215 return currentResult;
1216}
1217
1218
b75a7d8f
A
1219void
1220RuleBasedNumberFormat::parse(const UnicodeString& text,
1221 Formattable& result,
1222 ParsePosition& parsePosition) const
1223{
1224 if (!ruleSets) {
1225 parsePosition.setErrorIndex(0);
1226 return;
1227 }
1228
374ca955
A
1229 UnicodeString workingText(text, parsePosition.getIndex());
1230 ParsePosition workingPos(0);
1231
1232 ParsePosition high_pp(0);
b75a7d8f
A
1233 Formattable high_result;
1234
1235 for (NFRuleSet** p = ruleSets; *p; ++p) {
1236 NFRuleSet *rp = *p;
729e4ab9 1237 if (rp->isPublic() && rp->isParseable()) {
374ca955 1238 ParsePosition working_pp(0);
b75a7d8f
A
1239 Formattable working_result;
1240
729e4ab9 1241 rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient);
b75a7d8f
A
1242 if (working_pp.getIndex() > high_pp.getIndex()) {
1243 high_pp = working_pp;
1244 high_result = working_result;
1245
374ca955 1246 if (high_pp.getIndex() == workingText.length()) {
b75a7d8f
A
1247 break;
1248 }
1249 }
1250 }
1251 }
1252
46f4442e
A
1253 int32_t startIndex = parsePosition.getIndex();
1254 parsePosition.setIndex(startIndex + high_pp.getIndex());
374ca955
A
1255 if (high_pp.getIndex() > 0) {
1256 parsePosition.setErrorIndex(-1);
46f4442e
A
1257 } else {
1258 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1259 parsePosition.setErrorIndex(startIndex + errorIndex);
b75a7d8f 1260 }
b75a7d8f
A
1261 result = high_result;
1262 if (result.getType() == Formattable::kDouble) {
2ca993e8
A
1263 double d = result.getDouble();
1264 if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
1265 // Note: casting a double to an int when the double is too large or small
1266 // to fit the destination is undefined behavior. The explicit range checks,
1267 // above, are required. Just casting and checking the result value is undefined.
1268 result.setLong(static_cast<int32_t>(d));
b75a7d8f
A
1269 }
1270 }
1271}
1272
1273#if !UCONFIG_NO_COLLATION
1274
1275void
1276RuleBasedNumberFormat::setLenient(UBool enabled)
1277{
1278 lenient = enabled;
1279 if (!enabled && collator) {
1280 delete collator;
1281 collator = NULL;
1282 }
1283}
1284
1285#endif
1286
1287void
1288RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1289 if (U_SUCCESS(status)) {
1290 if (ruleSetName.isEmpty()) {
374ca955
A
1291 if (localizations) {
1292 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1293 defaultRuleSet = findRuleSet(name, status);
1294 } else {
b75a7d8f 1295 initDefaultRuleSet();
374ca955
A
1296 }
1297 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
b75a7d8f
A
1298 status = U_ILLEGAL_ARGUMENT_ERROR;
1299 } else {
1300 NFRuleSet* result = findRuleSet(ruleSetName, status);
1301 if (result != NULL) {
1302 defaultRuleSet = result;
1303 }
1304 }
1305 }
1306}
1307
374ca955
A
1308UnicodeString
1309RuleBasedNumberFormat::getDefaultRuleSetName() const {
2ca993e8
A
1310 UnicodeString result;
1311 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1312 defaultRuleSet->getName(result);
1313 } else {
1314 result.setToBogus();
1315 }
1316 return result;
374ca955
A
1317}
1318
b75a7d8f
A
1319void
1320RuleBasedNumberFormat::initDefaultRuleSet()
1321{
374ca955 1322 defaultRuleSet = NULL;
b75a7d8f 1323 if (!ruleSets) {
2ca993e8 1324 return;
b75a7d8f 1325 }
374ca955 1326
2ca993e8
A
1327 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1328 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1329 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
729e4ab9 1330
374ca955 1331 NFRuleSet**p = &ruleSets[0];
b75a7d8f 1332 while (*p) {
729e4ab9
A
1333 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1334 defaultRuleSet = *p;
1335 return;
1336 } else {
1337 ++p;
1338 }
b75a7d8f
A
1339 }
1340
1341 defaultRuleSet = *--p;
1342 if (!defaultRuleSet->isPublic()) {
1343 while (p != ruleSets) {
1344 if ((*--p)->isPublic()) {
1345 defaultRuleSet = *p;
1346 break;
1347 }
1348 }
1349 }
1350}
1351
1352
1353void
374ca955 1354RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
73c04bcf 1355 UParseError& pErr, UErrorCode& status)
b75a7d8f
A
1356{
1357 // TODO: implement UParseError
73c04bcf 1358 uprv_memset(&pErr, 0, sizeof(UParseError));
b75a7d8f
A
1359 // Note: this can leave ruleSets == NULL, so remaining code should check
1360 if (U_FAILURE(status)) {
1361 return;
1362 }
1363
2ca993e8
A
1364 initializeDecimalFormatSymbols(status);
1365 initializeDefaultInfinityRule(status);
1366 initializeDefaultNaNRule(status);
1367 if (U_FAILURE(status)) {
1368 return;
1369 }
1370
374ca955
A
1371 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1372
b75a7d8f
A
1373 UnicodeString description(rules);
1374 if (!description.length()) {
1375 status = U_MEMORY_ALLOCATION_ERROR;
1376 return;
1377 }
1378
1379 // start by stripping the trailing whitespace from all the rules
1380 // (this is all the whitespace follwing each semicolon in the
1381 // description). This allows us to look for rule-set boundaries
1382 // by searching for ";%" without having to worry about whitespace
1383 // between the ; and the %
1384 stripWhitespace(description);
1385
1386 // check to see if there's a set of lenient-parse rules. If there
1387 // is, pull them out into our temporary holding place for them,
1388 // and delete them from the description before the real desciption-
1389 // parsing code sees them
4388f060 1390 int32_t lp = description.indexOf(gLenientParse, -1, 0);
b75a7d8f
A
1391 if (lp != -1) {
1392 // we've got to make sure we're not in the middle of a rule
1393 // (where "%%lenient-parse" would actually get treated as
1394 // rule text)
1395 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1396 // locate the beginning and end of the actual collation
1397 // rules (there may be whitespace between the name and
1398 // the first token in the description)
4388f060 1399 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
b75a7d8f
A
1400
1401 if (lpEnd == -1) {
1402 lpEnd = description.length() - 1;
1403 }
1404 int lpStart = lp + u_strlen(gLenientParse);
4388f060 1405 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
b75a7d8f
A
1406 ++lpStart;
1407 }
1408
1409 // copy out the lenient-parse rules and delete them
1410 // from the description
1411 lenientParseRules = new UnicodeString();
1412 /* test for NULL */
1413 if (lenientParseRules == 0) {
1414 status = U_MEMORY_ALLOCATION_ERROR;
1415 return;
1416 }
1417 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1418
1419 description.remove(lp, lpEnd + 1 - lp);
1420 }
1421 }
1422
1423 // pre-flight parsing the description and count the number of
1424 // rule sets (";%" marks the end of one rule set and the beginning
1425 // of the next)
4388f060
A
1426 numRuleSets = 0;
1427 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
b75a7d8f
A
1428 ++numRuleSets;
1429 ++p;
1430 }
1431 ++numRuleSets;
1432
1433 // our rule list is an array of the appropriate size
1434 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1435 /* test for NULL */
1436 if (ruleSets == 0) {
1437 status = U_MEMORY_ALLOCATION_ERROR;
1438 return;
1439 }
1440
1441 for (int i = 0; i <= numRuleSets; ++i) {
1442 ruleSets[i] = NULL;
1443 }
1444
1445 // divide up the descriptions into individual rule-set descriptions
1446 // and store them in a temporary array. At each step, we also
1447 // new up a rule set, but all this does is initialize its name
1448 // and remove it from its description. We can't actually parse
1449 // the rest of the descriptions and finish initializing everything
1450 // because we have to know the names and locations of all the rule
1451 // sets before we can actually set everything up
1452 if(!numRuleSets) {
46f4442e
A
1453 status = U_ILLEGAL_ARGUMENT_ERROR;
1454 return;
b75a7d8f 1455 }
4388f060
A
1456
1457 ruleSetDescriptions = new UnicodeString[numRuleSets];
b75a7d8f
A
1458 if (ruleSetDescriptions == 0) {
1459 status = U_MEMORY_ALLOCATION_ERROR;
1460 return;
1461 }
1462
1463 {
1464 int curRuleSet = 0;
1465 int32_t start = 0;
4388f060 1466 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
b75a7d8f 1467 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
2ca993e8 1468 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1469 if (ruleSets[curRuleSet] == 0) {
1470 status = U_MEMORY_ALLOCATION_ERROR;
4388f060 1471 return;
b75a7d8f
A
1472 }
1473 ++curRuleSet;
1474 start = p + 1;
1475 }
1476 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
2ca993e8 1477 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1478 if (ruleSets[curRuleSet] == 0) {
1479 status = U_MEMORY_ALLOCATION_ERROR;
4388f060 1480 return;
b75a7d8f
A
1481 }
1482 }
1483
1484 // now we can take note of the formatter's default rule set, which
1485 // is the last public rule set in the description (it's the last
1486 // rather than the first so that a user can create a new formatter
1487 // from an existing formatter and change its default behavior just
1488 // by appending more rule sets to the end)
374ca955
A
1489
1490 // {dlf} Initialization of a fraction rule set requires the default rule
1491 // set to be known. For purposes of initialization, this is always the
1492 // last public rule set, no matter what the localization data says.
1493 initDefaultRuleSet();
b75a7d8f
A
1494
1495 // finally, we can go back through the temporary descriptions
1496 // list and finish seting up the substructure (and we throw
1497 // away the temporary descriptions as we go)
1498 {
1499 for (int i = 0; i < numRuleSets; i++) {
2ca993e8 1500 ruleSets[i]->parseRules(ruleSetDescriptions[i], status);
b75a7d8f
A
1501 }
1502 }
1503
374ca955
A
1504 // Now that the rules are initialized, the 'real' default rule
1505 // set can be adjusted by the localization data.
1506
1507 // The C code keeps the localization array as is, rather than building
1508 // a separate array of the public rule set names, so we have less work
1509 // to do here-- but we still need to check the names.
1510
1511 if (localizationInfos) {
1512 // confirm the names, if any aren't in the rules, that's an error
1513 // it is ok if the rules contain public rule sets that are not in this list
1514 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1515 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1516 NFRuleSet* rs = findRuleSet(name, status);
1517 if (rs == NULL) {
1518 break; // error
1519 }
1520 if (i == 0) {
1521 defaultRuleSet = rs;
1522 }
1523 }
1524 } else {
1525 defaultRuleSet = getDefaultRuleSet();
1526 }
57a6839d
A
1527 originalDescription = rules;
1528}
1529
1530// override the NumberFormat implementation in order to
1531// lazily initialize relevant items
1532void
1533RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1534{
1535 NumberFormat::setContext(value, status);
1536 if (U_SUCCESS(status)) {
1537 if (!capitalizationInfoSet &&
1538 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1539 initCapitalizationContextInfo(locale);
1540 capitalizationInfoSet = TRUE;
1541 }
1542#if !UCONFIG_NO_BREAK_ITERATION
1543 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1544 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1545 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1546 UErrorCode status = U_ZERO_ERROR;
1547 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1548 if (U_FAILURE(status)) {
1549 delete capitalizationBrkIter;
1550 capitalizationBrkIter = NULL;
1551 }
1552 }
1553#endif
1554 }
1555}
1556
1557void
1558RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1559{
1560#if !UCONFIG_NO_BREAK_ITERATION
1561 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1562 UErrorCode status = U_ZERO_ERROR;
1563 UResourceBundle *rb = ures_open(NULL, localeID, &status);
1564 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1565 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1566 if (U_SUCCESS(status) && rb != NULL) {
1567 int32_t len = 0;
1568 const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1569 if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1570 capitalizationForUIListMenu = intVector[0];
1571 capitalizationForStandAlone = intVector[1];
1572 }
1573 }
1574 ures_close(rb);
1575#endif
b75a7d8f
A
1576}
1577
1578void
1579RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1580{
1581 // iterate through the characters...
1582 UnicodeString result;
1583
1584 int start = 0;
1585 while (start != -1 && start < description.length()) {
1586 // seek to the first non-whitespace character...
1587 while (start < description.length()
4388f060 1588 && PatternProps::isWhiteSpace(description.charAt(start))) {
b75a7d8f
A
1589 ++start;
1590 }
1591
1592 // locate the next semicolon in the text and copy the text from
1593 // our current position up to that semicolon into the result
1594 int32_t p = description.indexOf(gSemiColon, start);
1595 if (p == -1) {
1596 // or if we don't find a semicolon, just copy the rest of
1597 // the string into the result
1598 result.append(description, start, description.length() - start);
1599 start = -1;
1600 }
1601 else if (p < description.length()) {
1602 result.append(description, start, p + 1 - start);
1603 start = p + 1;
1604 }
1605
1606 // when we get here, we've seeked off the end of the sring, and
1607 // we terminate the loop (we continue until *start* is -1 rather
1608 // than until *p* is -1, because otherwise we'd miss the last
1609 // rule in the description)
1610 else {
1611 start = -1;
1612 }
1613 }
1614
1615 description.setTo(result);
1616}
1617
1618
1619void
1620RuleBasedNumberFormat::dispose()
1621{
1622 if (ruleSets) {
1623 for (NFRuleSet** p = ruleSets; *p; ++p) {
1624 delete *p;
1625 }
1626 uprv_free(ruleSets);
1627 ruleSets = NULL;
1628 }
1629
4388f060
A
1630 if (ruleSetDescriptions) {
1631 delete [] ruleSetDescriptions;
2ca993e8 1632 ruleSetDescriptions = NULL;
4388f060
A
1633 }
1634
b75a7d8f
A
1635#if !UCONFIG_NO_COLLATION
1636 delete collator;
1637#endif
1638 collator = NULL;
1639
1640 delete decimalFormatSymbols;
1641 decimalFormatSymbols = NULL;
1642
2ca993e8
A
1643 delete defaultInfinityRule;
1644 defaultInfinityRule = NULL;
1645
1646 delete defaultNaNRule;
1647 defaultNaNRule = NULL;
1648
b75a7d8f
A
1649 delete lenientParseRules;
1650 lenientParseRules = NULL;
374ca955 1651
57a6839d 1652#if !UCONFIG_NO_BREAK_ITERATION
2ca993e8
A
1653 delete capitalizationBrkIter;
1654 capitalizationBrkIter = NULL;
57a6839d
A
1655#endif
1656
2ca993e8
A
1657 if (localizations) {
1658 localizations = localizations->unref();
1659 }
b75a7d8f
A
1660}
1661
1662
1663//-----------------------------------------------------------------------
1664// package-internal API
1665//-----------------------------------------------------------------------
1666
1667/**
1668 * Returns the collator to use for lenient parsing. The collator is lazily created:
1669 * this function creates it the first time it's called.
1670 * @return The collator to use for lenient parsing, or null if lenient parsing
1671 * is turned off.
1672*/
57a6839d 1673const RuleBasedCollator*
b75a7d8f
A
1674RuleBasedNumberFormat::getCollator() const
1675{
1676#if !UCONFIG_NO_COLLATION
1677 if (!ruleSets) {
1678 return NULL;
1679 }
1680
57a6839d 1681 // lazy-evaluate the collator
b75a7d8f
A
1682 if (collator == NULL && lenient) {
1683 // create a default collator based on the formatter's locale,
1684 // then pull out that collator's rules, append any additional
1685 // rules specified in the description, and create a _new_
1686 // collator based on the combinaiton of those rules
1687
1688 UErrorCode status = U_ZERO_ERROR;
1689
1690 Collator* temp = Collator::createInstance(locale, status);
729e4ab9
A
1691 RuleBasedCollator* newCollator;
1692 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
b75a7d8f
A
1693 if (lenientParseRules) {
1694 UnicodeString rules(newCollator->getRules());
1695 rules.append(*lenientParseRules);
1696
1697 newCollator = new RuleBasedCollator(rules, status);
46f4442e
A
1698 // Exit if newCollator could not be created.
1699 if (newCollator == NULL) {
57a6839d 1700 return NULL;
46f4442e 1701 }
b75a7d8f
A
1702 } else {
1703 temp = NULL;
1704 }
1705 if (U_SUCCESS(status)) {
1706 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1707 // cast away const
1708 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1709 } else {
1710 delete newCollator;
1711 }
1712 }
1713 delete temp;
1714 }
1715#endif
1716
1717 // if lenient-parse mode is off, this will be null
1718 // (see setLenientParseMode())
1719 return collator;
1720}
1721
1722
b75a7d8f 1723DecimalFormatSymbols*
2ca993e8 1724RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
b75a7d8f
A
1725{
1726 // lazy-evaluate the DecimalFormatSymbols object. This object
1727 // is shared by all DecimalFormat instances belonging to this
1728 // formatter
1729 if (decimalFormatSymbols == NULL) {
b75a7d8f
A
1730 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1731 if (U_SUCCESS(status)) {
2ca993e8
A
1732 decimalFormatSymbols = temp;
1733 }
1734 else {
b75a7d8f
A
1735 delete temp;
1736 }
1737 }
1738 return decimalFormatSymbols;
1739}
1740
2ca993e8
A
1741/**
1742 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1743 * instances owned by this formatter.
1744*/
1745const DecimalFormatSymbols*
1746RuleBasedNumberFormat::getDecimalFormatSymbols() const
1747{
1748 return decimalFormatSymbols;
1749}
1750
1751NFRule*
1752RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1753{
1754 if (U_FAILURE(status)) {
1755 return NULL;
1756 }
1757 if (defaultInfinityRule == NULL) {
1758 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1759 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1760 NFRule* temp = new NFRule(this, rule, status);
1761 if (U_SUCCESS(status)) {
1762 defaultInfinityRule = temp;
1763 }
1764 else {
1765 delete temp;
1766 }
1767 }
1768 return defaultInfinityRule;
1769}
1770
1771const NFRule*
1772RuleBasedNumberFormat::getDefaultInfinityRule() const
1773{
1774 return defaultInfinityRule;
1775}
1776
1777NFRule*
1778RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1779{
1780 if (U_FAILURE(status)) {
1781 return NULL;
1782 }
1783 if (defaultNaNRule == NULL) {
1784 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1785 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1786 NFRule* temp = new NFRule(this, rule, status);
1787 if (U_SUCCESS(status)) {
1788 defaultNaNRule = temp;
1789 }
1790 else {
1791 delete temp;
1792 }
1793 }
1794 return defaultNaNRule;
1795}
1796
1797const NFRule*
1798RuleBasedNumberFormat::getDefaultNaNRule() const
1799{
1800 return defaultNaNRule;
1801}
1802
4388f060
A
1803// De-owning the current localized symbols and adopt the new symbols.
1804void
1805RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1806{
1807 if (symbolsToAdopt == NULL) {
1808 return; // do not allow caller to set decimalFormatSymbols to NULL
1809 }
1810
1811 if (decimalFormatSymbols != NULL) {
1812 delete decimalFormatSymbols;
1813 }
1814
1815 decimalFormatSymbols = symbolsToAdopt;
1816
1817 {
1818 // Apply the new decimalFormatSymbols by reparsing the rulesets
1819 UErrorCode status = U_ZERO_ERROR;
1820
2ca993e8
A
1821 delete defaultInfinityRule;
1822 defaultInfinityRule = NULL;
1823 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1824
1825 delete defaultNaNRule;
1826 defaultNaNRule = NULL;
1827 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1828
1829 if (ruleSets) {
1830 for (int32_t i = 0; i < numRuleSets; i++) {
1831 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1832 }
4388f060
A
1833 }
1834 }
1835}
1836
1837// Setting the symbols is equlivalent to adopting a newly created localized symbols.
1838void
1839RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1840{
1841 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1842}
1843
b331163b
A
1844PluralFormat *
1845RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1846 const UnicodeString &pattern,
1847 UErrorCode& status) const
1848{
1849 return new PluralFormat(locale, pluralType, pattern, status);
1850}
1851
374ca955
A
1852U_NAMESPACE_END
1853
b75a7d8f
A
1854/* U_HAVE_RBNF */
1855#endif