]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/rbnf.cpp
ICU-551.51.4.tar.gz
[apple/icu.git] / icuSources / i18n / rbnf.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
57a6839d 3* Copyright (C) 1997-2014, International Business Machines Corporation
73c04bcf 4* and others. All Rights Reserved.
b75a7d8f
A
5*******************************************************************************
6*/
7
57a6839d 8#include "unicode/utypes.h"
51004dcb 9#include "utypeinfo.h" // for 'typeid' to work
729e4ab9 10
b75a7d8f
A
11#include "unicode/rbnf.h"
12
13#if U_HAVE_RBNF
14
15#include "unicode/normlzr.h"
b331163b 16#include "unicode/plurfmt.h"
b75a7d8f
A
17#include "unicode/tblcoll.h"
18#include "unicode/uchar.h"
19#include "unicode/ucol.h"
20#include "unicode/uloc.h"
21#include "unicode/unum.h"
22#include "unicode/ures.h"
23#include "unicode/ustring.h"
24#include "unicode/utf16.h"
374ca955 25#include "unicode/udata.h"
57a6839d
A
26#include "unicode/udisplaycontext.h"
27#include "unicode/brkiter.h"
b75a7d8f
A
28#include "nfrs.h"
29
30#include "cmemory.h"
31#include "cstring.h"
4388f060 32#include "patternprops.h"
729e4ab9 33#include "uresimp.h"
374ca955
A
34
35// debugging
b331163b 36// #define RBNF_DEBUG
374ca955 37
b331163b 38#ifdef RBNF_DEBUG
374ca955
A
39#include "stdio.h"
40#endif
41
42#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
b75a7d8f
A
43
44static const UChar gPercentPercent[] =
45{
46 0x25, 0x25, 0
47}; /* "%%" */
48
49// All urbnf objects are created through openRules, so we init all of the
50// Unicode string constants required by rbnf, nfrs, or nfr here.
51static const UChar gLenientParse[] =
52{
53 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
54}; /* "%%lenient-parse:" */
55static const UChar gSemiColon = 0x003B;
56static const UChar gSemiPercent[] =
57{
58 0x3B, 0x25, 0
59}; /* ";%" */
60
61#define kSomeNumberOfBitsDiv2 22
62#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
63#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
64
374ca955
A
65U_NAMESPACE_BEGIN
66
67UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
68
73c04bcf
A
69/*
70This is a utility class. It does not use ICU's RTTI.
71If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72Please make sure that intltest passes on Windows in Release mode,
73since the string pooling per compilation unit will mess up how RTTI works.
74The RTTI code was also removed due to lack of code coverage.
75*/
76class LocalizationInfo : public UMemory {
374ca955 77protected:
4388f060 78 virtual ~LocalizationInfo();
374ca955
A
79 uint32_t refcount;
80
81public:
82 LocalizationInfo() : refcount(0) {}
83
84 LocalizationInfo* ref(void) {
85 ++refcount;
86 return this;
87 }
88
89 LocalizationInfo* unref(void) {
90 if (refcount && --refcount == 0) {
91 delete this;
92 }
93 return NULL;
94 }
95
96 virtual UBool operator==(const LocalizationInfo* rhs) const;
97 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
98
99 virtual int32_t getNumberOfRuleSets(void) const = 0;
100 virtual const UChar* getRuleSetName(int32_t index) const = 0;
101 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102 virtual const UChar* getLocaleName(int32_t index) const = 0;
103 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
104
105 virtual int32_t indexForLocale(const UChar* locale) const;
106 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
107
73c04bcf
A
108// virtual UClassID getDynamicClassID() const = 0;
109// static UClassID getStaticClassID(void);
374ca955
A
110};
111
4388f060
A
112LocalizationInfo::~LocalizationInfo() {}
113
73c04bcf 114//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
374ca955
A
115
116// if both strings are NULL, this returns TRUE
117static UBool
118streq(const UChar* lhs, const UChar* rhs) {
119 if (rhs == lhs) {
120 return TRUE;
121 }
122 if (lhs && rhs) {
123 return u_strcmp(lhs, rhs) == 0;
124 }
125 return FALSE;
126}
127
128UBool
129LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
130 if (rhs) {
131 if (this == rhs) {
132 return TRUE;
133 }
134
135 int32_t rsc = getNumberOfRuleSets();
136 if (rsc == rhs->getNumberOfRuleSets()) {
137 for (int i = 0; i < rsc; ++i) {
138 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
139 return FALSE;
140 }
141 }
142 int32_t dlc = getNumberOfDisplayLocales();
143 if (dlc == rhs->getNumberOfDisplayLocales()) {
144 for (int i = 0; i < dlc; ++i) {
145 const UChar* locale = getLocaleName(i);
146 int32_t ix = rhs->indexForLocale(locale);
147 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
148 if (!streq(locale, rhs->getLocaleName(ix))) {
149 return FALSE;
150 }
151 for (int j = 0; j < rsc; ++j) {
152 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
153 return FALSE;
154 }
155 }
156 }
157 return TRUE;
158 }
159 }
160 }
161 return FALSE;
162}
163
164int32_t
165LocalizationInfo::indexForLocale(const UChar* locale) const {
166 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
167 if (streq(locale, getLocaleName(i))) {
168 return i;
169 }
170 }
171 return -1;
172}
173
174int32_t
175LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
176 if (ruleset) {
177 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
178 if (streq(ruleset, getRuleSetName(i))) {
179 return i;
180 }
181 }
182 }
183 return -1;
184}
185
186
187typedef void (*Fn_Deleter)(void*);
188
189class VArray {
190 void** buf;
191 int32_t cap;
192 int32_t size;
193 Fn_Deleter deleter;
194public:
195 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
196
197 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
198
199 ~VArray() {
200 if (deleter) {
201 for (int i = 0; i < size; ++i) {
202 (*deleter)(buf[i]);
203 }
204 }
205 uprv_free(buf);
206 }
207
208 int32_t length() {
209 return size;
210 }
211
212 void add(void* elem, UErrorCode& status) {
213 if (U_SUCCESS(status)) {
214 if (size == cap) {
215 if (cap == 0) {
216 cap = 1;
217 } else if (cap < 256) {
218 cap *= 2;
219 } else {
220 cap += 256;
221 }
222 if (buf == NULL) {
223 buf = (void**)uprv_malloc(cap * sizeof(void*));
224 } else {
225 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
226 }
227 if (buf == NULL) {
228 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
229 status = U_MEMORY_ALLOCATION_ERROR;
230 return;
231 }
232 void* start = &buf[size];
233 size_t count = (cap - size) * sizeof(void*);
234 uprv_memset(start, 0, count); // fill with nulls, just because
235 }
236 buf[size++] = elem;
237 }
238 }
239
240 void** release(void) {
241 void** result = buf;
242 buf = NULL;
243 cap = 0;
244 size = 0;
245 return result;
246 }
247};
248
249class LocDataParser;
250
251class StringLocalizationInfo : public LocalizationInfo {
252 UChar* info;
253 UChar*** data;
254 int32_t numRuleSets;
255 int32_t numLocales;
256
257friend class LocDataParser;
258
259 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
260 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
261 {
262 }
263
264public:
265 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
266
267 virtual ~StringLocalizationInfo();
268 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
269 virtual const UChar* getRuleSetName(int32_t index) const;
270 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
271 virtual const UChar* getLocaleName(int32_t index) const;
272 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
273
73c04bcf
A
274// virtual UClassID getDynamicClassID() const;
275// static UClassID getStaticClassID(void);
374ca955
A
276
277private:
278 void init(UErrorCode& status) const;
279};
280
281
282enum {
283 OPEN_ANGLE = 0x003c, /* '<' */
284 CLOSE_ANGLE = 0x003e, /* '>' */
285 COMMA = 0x002c,
286 TICK = 0x0027,
287 QUOTE = 0x0022,
288 SPACE = 0x0020
289};
290
291/**
292 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
293 */
294class LocDataParser {
295 UChar* data;
296 const UChar* e;
297 UChar* p;
298 UChar ch;
299 UParseError& pe;
300 UErrorCode& ec;
301
302public:
303 LocDataParser(UParseError& parseError, UErrorCode& status)
304 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
305 ~LocDataParser() {}
306
307 /*
308 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
309 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
310 */
311 StringLocalizationInfo* parse(UChar* data, int32_t len);
312
313private:
314
315 void inc(void) { ++p; ch = 0xffff; }
316 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
317 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
4388f060 318 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
374ca955 319 UBool inList(UChar c, const UChar* list) const {
4388f060 320 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
374ca955
A
321 while (*list && *list != c) ++list; return *list == c;
322 }
323 void parseError(const char* msg);
324
325 StringLocalizationInfo* doParse(void);
326
327 UChar** nextArray(int32_t& requiredLength);
328 UChar* nextString(void);
329};
330
b331163b 331#ifdef RBNF_DEBUG
374ca955 332#define ERROR(msg) parseError(msg); return NULL;
57a6839d 333#define EXPLANATION_ARG explanationArg
374ca955
A
334#else
335#define ERROR(msg) parseError(NULL); return NULL;
57a6839d 336#define EXPLANATION_ARG
374ca955
A
337#endif
338
339
340static const UChar DQUOTE_STOPLIST[] = {
341 QUOTE, 0
342};
343
344static const UChar SQUOTE_STOPLIST[] = {
345 TICK, 0
346};
347
348static const UChar NOQUOTE_STOPLIST[] = {
349 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
350};
351
352static void
353DeleteFn(void* p) {
354 uprv_free(p);
355}
356
357StringLocalizationInfo*
358LocDataParser::parse(UChar* _data, int32_t len) {
359 if (U_FAILURE(ec)) {
360 if (_data) uprv_free(_data);
361 return NULL;
362 }
363
364 pe.line = 0;
365 pe.offset = -1;
366 pe.postContext[0] = 0;
367 pe.preContext[0] = 0;
368
369 if (_data == NULL) {
370 ec = U_ILLEGAL_ARGUMENT_ERROR;
371 return NULL;
372 }
373
374 if (len <= 0) {
375 ec = U_ILLEGAL_ARGUMENT_ERROR;
376 uprv_free(_data);
377 return NULL;
378 }
379
380 data = _data;
381 e = data + len;
382 p = _data;
383 ch = 0xffff;
384
385 return doParse();
386}
387
388
389StringLocalizationInfo*
390LocDataParser::doParse(void) {
391 skipWhitespace();
392 if (!checkInc(OPEN_ANGLE)) {
393 ERROR("Missing open angle");
394 } else {
395 VArray array(DeleteFn);
396 UBool mightHaveNext = TRUE;
397 int32_t requiredLength = -1;
398 while (mightHaveNext) {
399 mightHaveNext = FALSE;
400 UChar** elem = nextArray(requiredLength);
401 skipWhitespace();
402 UBool haveComma = check(COMMA);
403 if (elem) {
404 array.add(elem, ec);
405 if (haveComma) {
406 inc();
407 mightHaveNext = TRUE;
408 }
409 } else if (haveComma) {
410 ERROR("Unexpected character");
411 }
412 }
413
414 skipWhitespace();
415 if (!checkInc(CLOSE_ANGLE)) {
416 if (check(OPEN_ANGLE)) {
417 ERROR("Missing comma in outer array");
418 } else {
419 ERROR("Missing close angle bracket in outer array");
420 }
421 }
422
423 skipWhitespace();
424 if (p != e) {
425 ERROR("Extra text after close of localization data");
426 }
427
428 array.add(NULL, ec);
429 if (U_SUCCESS(ec)) {
430 int32_t numLocs = array.length() - 2; // subtract first, NULL
431 UChar*** result = (UChar***)array.release();
432
433 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
434 }
435 }
436
437 ERROR("Unknown error");
438}
439
440UChar**
441LocDataParser::nextArray(int32_t& requiredLength) {
442 if (U_FAILURE(ec)) {
443 return NULL;
444 }
445
446 skipWhitespace();
447 if (!checkInc(OPEN_ANGLE)) {
448 ERROR("Missing open angle");
449 }
450
451 VArray array;
452 UBool mightHaveNext = TRUE;
453 while (mightHaveNext) {
454 mightHaveNext = FALSE;
455 UChar* elem = nextString();
456 skipWhitespace();
457 UBool haveComma = check(COMMA);
458 if (elem) {
459 array.add(elem, ec);
460 if (haveComma) {
461 inc();
462 mightHaveNext = TRUE;
463 }
464 } else if (haveComma) {
465 ERROR("Unexpected comma");
466 }
467 }
468 skipWhitespace();
469 if (!checkInc(CLOSE_ANGLE)) {
470 if (check(OPEN_ANGLE)) {
471 ERROR("Missing close angle bracket in inner array");
472 } else {
473 ERROR("Missing comma in inner array");
474 }
475 }
476
477 array.add(NULL, ec);
478 if (U_SUCCESS(ec)) {
479 if (requiredLength == -1) {
480 requiredLength = array.length() + 1;
481 } else if (array.length() != requiredLength) {
482 ec = U_ILLEGAL_ARGUMENT_ERROR;
483 ERROR("Array not of required length");
484 }
485
486 return (UChar**)array.release();
487 }
488 ERROR("Unknown Error");
489}
490
491UChar*
492LocDataParser::nextString() {
493 UChar* result = NULL;
494
495 skipWhitespace();
496 if (p < e) {
497 const UChar* terminators;
498 UChar c = *p;
499 UBool haveQuote = c == QUOTE || c == TICK;
500 if (haveQuote) {
501 inc();
502 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
503 } else {
504 terminators = NOQUOTE_STOPLIST;
505 }
506 UChar* start = p;
507 while (p < e && !inList(*p, terminators)) ++p;
508 if (p == e) {
509 ERROR("Unexpected end of data");
510 }
511
512 UChar x = *p;
513 if (p > start) {
514 ch = x;
515 *p = 0x0; // terminate by writing to data
516 result = start; // just point into data
517 }
518 if (haveQuote) {
519 if (x != c) {
520 ERROR("Missing matching quote");
521 } else if (p == start) {
522 ERROR("Empty string");
523 }
524 inc();
525 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
526 ERROR("Unexpected character in string");
527 }
528 }
529
530 // ok for there to be no next string
531 return result;
532}
533
57a6839d
A
534void LocDataParser::parseError(const char* EXPLANATION_ARG)
535{
374ca955
A
536 if (!data) {
537 return;
538 }
b75a7d8f 539
374ca955 540 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
73c04bcf 541 if (start < data) {
374ca955 542 start = data;
73c04bcf
A
543 }
544 for (UChar* x = p; --x >= start;) {
374ca955
A
545 if (!*x) {
546 start = x+1;
547 break;
548 }
73c04bcf 549 }
374ca955 550 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
73c04bcf 551 if (limit > e) {
374ca955 552 limit = e;
73c04bcf
A
553 }
554 u_strncpy(pe.preContext, start, (int32_t)(p-start));
374ca955 555 pe.preContext[p-start] = 0;
73c04bcf 556 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
374ca955 557 pe.postContext[limit-p] = 0;
73c04bcf 558 pe.offset = (int32_t)(p - data);
374ca955 559
b331163b 560#ifdef RBNF_DEBUG
57a6839d 561 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
374ca955
A
562
563 UnicodeString msg;
564 msg.append(start, p - start);
565 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
566 msg.append(p, limit-p);
57a6839d 567 msg.append(UNICODE_STRING_SIMPLE("'"));
374ca955
A
568
569 char buf[128];
570 int32_t len = msg.extract(0, msg.length(), buf, 128);
571 if (len >= 128) {
572 buf[127] = 0;
573 } else {
574 buf[len] = 0;
575 }
576 fprintf(stderr, "%s\n", buf);
577 fflush(stderr);
578#endif
579
580 uprv_free(data);
581 data = NULL;
582 p = NULL;
583 e = NULL;
584
585 if (U_SUCCESS(ec)) {
586 ec = U_PARSE_ERROR;
587 }
588}
589
73c04bcf 590//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
374ca955
A
591
592StringLocalizationInfo*
593StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
594 if (U_FAILURE(status)) {
595 return NULL;
596 }
597
598 int32_t len = info.length();
599 if (len == 0) {
600 return NULL; // no error;
601 }
602
603 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
604 if (!p) {
605 status = U_MEMORY_ALLOCATION_ERROR;
606 return NULL;
607 }
608 info.extract(p, len, status);
609 if (!U_FAILURE(status)) {
610 status = U_ZERO_ERROR; // clear warning about non-termination
611 }
612
613 LocDataParser parser(perror, status);
614 return parser.parse(p, len);
615}
616
617StringLocalizationInfo::~StringLocalizationInfo() {
618 for (UChar*** p = (UChar***)data; *p; ++p) {
619 // remaining data is simply pointer into our unicode string data.
620 if (*p) uprv_free(*p);
621 }
622 if (data) uprv_free(data);
623 if (info) uprv_free(info);
624}
625
626
627const UChar*
628StringLocalizationInfo::getRuleSetName(int32_t index) const {
629 if (index >= 0 && index < getNumberOfRuleSets()) {
630 return data[0][index];
631 }
632 return NULL;
633}
634
635const UChar*
636StringLocalizationInfo::getLocaleName(int32_t index) const {
637 if (index >= 0 && index < getNumberOfDisplayLocales()) {
638 return data[index+1][0];
639 }
640 return NULL;
641}
642
643const UChar*
644StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
645 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
646 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
647 return data[localeIndex+1][ruleIndex+1];
648 }
649 return NULL;
650}
651
652// ----------
653
654RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
655 const UnicodeString& locs,
656 const Locale& alocale, UParseError& perror, UErrorCode& status)
b75a7d8f 657 : ruleSets(NULL)
4388f060
A
658 , ruleSetDescriptions(NULL)
659 , numRuleSets(0)
b75a7d8f
A
660 , defaultRuleSet(NULL)
661 , locale(alocale)
662 , collator(NULL)
663 , decimalFormatSymbols(NULL)
664 , lenient(FALSE)
665 , lenientParseRules(NULL)
374ca955 666 , localizations(NULL)
57a6839d
A
667 , capitalizationInfoSet(FALSE)
668 , capitalizationForUIListMenu(FALSE)
669 , capitalizationForStandAlone(FALSE)
670 , capitalizationBrkIter(NULL)
374ca955
A
671{
672 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
673 init(description, locinfo, perror, status);
674}
675
676RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
677 const UnicodeString& locs,
678 UParseError& perror, UErrorCode& status)
679 : ruleSets(NULL)
4388f060
A
680 , ruleSetDescriptions(NULL)
681 , numRuleSets(0)
374ca955
A
682 , defaultRuleSet(NULL)
683 , locale(Locale::getDefault())
684 , collator(NULL)
685 , decimalFormatSymbols(NULL)
686 , lenient(FALSE)
687 , lenientParseRules(NULL)
688 , localizations(NULL)
57a6839d
A
689 , capitalizationInfoSet(FALSE)
690 , capitalizationForUIListMenu(FALSE)
691 , capitalizationForStandAlone(FALSE)
692 , capitalizationBrkIter(NULL)
374ca955
A
693{
694 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
695 init(description, locinfo, perror, status);
696}
697
698RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
699 LocalizationInfo* info,
700 const Locale& alocale, UParseError& perror, UErrorCode& status)
701 : ruleSets(NULL)
4388f060
A
702 , ruleSetDescriptions(NULL)
703 , numRuleSets(0)
374ca955
A
704 , defaultRuleSet(NULL)
705 , locale(alocale)
706 , collator(NULL)
707 , decimalFormatSymbols(NULL)
708 , lenient(FALSE)
709 , lenientParseRules(NULL)
710 , localizations(NULL)
57a6839d
A
711 , capitalizationInfoSet(FALSE)
712 , capitalizationForUIListMenu(FALSE)
713 , capitalizationForStandAlone(FALSE)
714 , capitalizationBrkIter(NULL)
374ca955
A
715{
716 init(description, info, perror, status);
717}
718
719RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
720 UParseError& perror,
721 UErrorCode& status)
722 : ruleSets(NULL)
4388f060
A
723 , ruleSetDescriptions(NULL)
724 , numRuleSets(0)
374ca955
A
725 , defaultRuleSet(NULL)
726 , locale(Locale::getDefault())
727 , collator(NULL)
728 , decimalFormatSymbols(NULL)
729 , lenient(FALSE)
730 , lenientParseRules(NULL)
731 , localizations(NULL)
57a6839d
A
732 , capitalizationInfoSet(FALSE)
733 , capitalizationForUIListMenu(FALSE)
734 , capitalizationForStandAlone(FALSE)
735 , capitalizationBrkIter(NULL)
374ca955
A
736{
737 init(description, NULL, perror, status);
738}
739
740RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
741 const Locale& aLocale,
742 UParseError& perror,
743 UErrorCode& status)
744 : ruleSets(NULL)
4388f060
A
745 , ruleSetDescriptions(NULL)
746 , numRuleSets(0)
374ca955
A
747 , defaultRuleSet(NULL)
748 , locale(aLocale)
749 , collator(NULL)
750 , decimalFormatSymbols(NULL)
751 , lenient(FALSE)
752 , lenientParseRules(NULL)
753 , localizations(NULL)
57a6839d
A
754 , capitalizationInfoSet(FALSE)
755 , capitalizationForUIListMenu(FALSE)
756 , capitalizationForStandAlone(FALSE)
757 , capitalizationBrkIter(NULL)
b75a7d8f 758{
374ca955 759 init(description, NULL, perror, status);
b75a7d8f
A
760}
761
762RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
763 : ruleSets(NULL)
4388f060
A
764 , ruleSetDescriptions(NULL)
765 , numRuleSets(0)
b75a7d8f
A
766 , defaultRuleSet(NULL)
767 , locale(alocale)
768 , collator(NULL)
769 , decimalFormatSymbols(NULL)
770 , lenient(FALSE)
771 , lenientParseRules(NULL)
374ca955 772 , localizations(NULL)
57a6839d
A
773 , capitalizationInfoSet(FALSE)
774 , capitalizationForUIListMenu(FALSE)
775 , capitalizationForStandAlone(FALSE)
776 , capitalizationBrkIter(NULL)
b75a7d8f
A
777{
778 if (U_FAILURE(status)) {
779 return;
780 }
781
729e4ab9 782 const char* rules_tag = "RBNFRules";
b75a7d8f
A
783 const char* fmt_tag = "";
784 switch (tag) {
785 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
786 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
787 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
729e4ab9 788 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
b75a7d8f
A
789 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
790 }
791
374ca955
A
792 // TODO: read localization info from resource
793 LocalizationInfo* locinfo = NULL;
794
374ca955 795 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
b75a7d8f 796 if (U_SUCCESS(status)) {
374ca955
A
797 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
798 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
729e4ab9
A
799
800 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
801 if (U_FAILURE(status)) {
802 ures_close(nfrb);
803 }
804 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
805 if (U_FAILURE(status)) {
806 ures_close(rbnfRules);
807 ures_close(nfrb);
808 return;
809 }
4388f060 810
729e4ab9
A
811 UnicodeString desc;
812 while (ures_hasNext(ruleSets)) {
4388f060 813 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
729e4ab9 814 }
b75a7d8f 815 UParseError perror;
729e4ab9 816
374ca955 817 init (desc, locinfo, perror, status);
729e4ab9 818
729e4ab9
A
819 ures_close(ruleSets);
820 ures_close(rbnfRules);
b75a7d8f 821 }
b75a7d8f
A
822 ures_close(nfrb);
823}
824
825RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
826 : NumberFormat(rhs)
827 , ruleSets(NULL)
4388f060
A
828 , ruleSetDescriptions(NULL)
829 , numRuleSets(0)
b75a7d8f
A
830 , defaultRuleSet(NULL)
831 , locale(rhs.locale)
832 , collator(NULL)
833 , decimalFormatSymbols(NULL)
834 , lenient(FALSE)
835 , lenientParseRules(NULL)
374ca955 836 , localizations(NULL)
57a6839d
A
837 , capitalizationInfoSet(FALSE)
838 , capitalizationForUIListMenu(FALSE)
839 , capitalizationForStandAlone(FALSE)
840 , capitalizationBrkIter(NULL)
b75a7d8f
A
841{
842 this->operator=(rhs);
843}
844
374ca955
A
845// --------
846
b75a7d8f
A
847RuleBasedNumberFormat&
848RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
849{
57a6839d
A
850 if (this == &rhs) {
851 return *this;
852 }
853 NumberFormat::operator=(rhs);
b75a7d8f
A
854 UErrorCode status = U_ZERO_ERROR;
855 dispose();
856 locale = rhs.locale;
374ca955
A
857 lenient = rhs.lenient;
858
b75a7d8f 859 UParseError perror;
57a6839d
A
860 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
861 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
862 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
863
864 capitalizationInfoSet = rhs.capitalizationInfoSet;
865 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
866 capitalizationForStandAlone = rhs.capitalizationForStandAlone;
867#if !UCONFIG_NO_BREAK_ITERATION
868 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
869#endif
374ca955 870
b75a7d8f
A
871 return *this;
872}
873
874RuleBasedNumberFormat::~RuleBasedNumberFormat()
875{
876 dispose();
877}
878
879Format*
880RuleBasedNumberFormat::clone(void) const
881{
57a6839d 882 return new RuleBasedNumberFormat(*this);
b75a7d8f
A
883}
884
885UBool
886RuleBasedNumberFormat::operator==(const Format& other) const
887{
888 if (this == &other) {
889 return TRUE;
890 }
891
729e4ab9 892 if (typeid(*this) == typeid(other)) {
b75a7d8f 893 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
57a6839d
A
894 // test for capitalization info equality is adequately handled
895 // by the NumberFormat test for fCapitalizationContext equality;
896 // the info here is just derived from that.
b75a7d8f 897 if (locale == rhs.locale &&
374ca955
A
898 lenient == rhs.lenient &&
899 (localizations == NULL
900 ? rhs.localizations == NULL
901 : (rhs.localizations == NULL
902 ? FALSE
903 : *localizations == rhs.localizations))) {
904
b75a7d8f
A
905 NFRuleSet** p = ruleSets;
906 NFRuleSet** q = rhs.ruleSets;
b75a7d8f 907 if (p == NULL) {
374ca955
A
908 return q == NULL;
909 } else if (q == NULL) {
b75a7d8f
A
910 return FALSE;
911 }
912 while (*p && *q && (**p == **q)) {
913 ++p;
914 ++q;
915 }
916 return *q == NULL && *p == NULL;
917 }
918 }
919
920 return FALSE;
921}
922
923UnicodeString
924RuleBasedNumberFormat::getRules() const
925{
926 UnicodeString result;
927 if (ruleSets != NULL) {
928 for (NFRuleSet** p = ruleSets; *p; ++p) {
929 (*p)->appendRules(result);
930 }
931 }
932 return result;
933}
934
935UnicodeString
936RuleBasedNumberFormat::getRuleSetName(int32_t index) const
937{
374ca955
A
938 if (localizations) {
939 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
940 return string;
941 } else if (ruleSets) {
942 UnicodeString result;
b75a7d8f
A
943 for (NFRuleSet** p = ruleSets; *p; ++p) {
944 NFRuleSet* rs = *p;
945 if (rs->isPublic()) {
946 if (--index == -1) {
947 rs->getName(result);
948 return result;
949 }
950 }
951 }
952 }
374ca955
A
953 UnicodeString empty;
954 return empty;
b75a7d8f
A
955}
956
957int32_t
958RuleBasedNumberFormat::getNumberOfRuleSetNames() const
959{
960 int32_t result = 0;
374ca955
A
961 if (localizations) {
962 result = localizations->getNumberOfRuleSets();
963 } else if (ruleSets) {
b75a7d8f
A
964 for (NFRuleSet** p = ruleSets; *p; ++p) {
965 if ((**p).isPublic()) {
966 ++result;
967 }
968 }
969 }
970 return result;
971}
972
374ca955
A
973int32_t
974RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
975 if (localizations) {
976 return localizations->getNumberOfDisplayLocales();
977 }
978 return 0;
979}
980
981Locale
982RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
983 if (U_FAILURE(status)) {
73c04bcf 984 return Locale("");
374ca955
A
985 }
986 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
987 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
988 char buffer[64];
989 int32_t cap = name.length() + 1;
990 char* bp = buffer;
991 if (cap > 64) {
992 bp = (char *)uprv_malloc(cap);
993 if (bp == NULL) {
994 status = U_MEMORY_ALLOCATION_ERROR;
73c04bcf 995 return Locale("");
374ca955
A
996 }
997 }
998 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
999 Locale retLocale(bp);
1000 if (bp != buffer) {
1001 uprv_free(bp);
1002 }
1003 return retLocale;
1004 }
1005 status = U_ILLEGAL_ARGUMENT_ERROR;
1006 Locale retLocale;
1007 return retLocale;
1008}
1009
1010UnicodeString
1011RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1012 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1013 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1014 int32_t len = localeName.length();
1015 UChar* localeStr = localeName.getBuffer(len + 1);
1016 while (len >= 0) {
1017 localeStr[len] = 0;
1018 int32_t ix = localizations->indexForLocale(localeStr);
1019 if (ix >= 0) {
1020 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1021 return name;
1022 }
1023
1024 // trim trailing portion, skipping over ommitted sections
1025 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1026 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1027 }
1028 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1029 return name;
1030 }
1031 UnicodeString bogus;
1032 bogus.setToBogus();
1033 return bogus;
1034}
1035
1036UnicodeString
1037RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1038 if (localizations) {
1039 UnicodeString rsn(ruleSetName);
1040 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1041 return getRuleSetDisplayName(ix, localeParam);
1042 }
1043 UnicodeString bogus;
1044 bogus.setToBogus();
1045 return bogus;
1046}
1047
b75a7d8f
A
1048NFRuleSet*
1049RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1050{
1051 if (U_SUCCESS(status) && ruleSets) {
1052 for (NFRuleSet** p = ruleSets; *p; ++p) {
1053 NFRuleSet* rs = *p;
1054 if (rs->isNamed(name)) {
1055 return rs;
1056 }
1057 }
1058 status = U_ILLEGAL_ARGUMENT_ERROR;
1059 }
1060 return NULL;
1061}
1062
1063UnicodeString&
1064RuleBasedNumberFormat::format(int32_t number,
1065 UnicodeString& toAppendTo,
374ca955 1066 FieldPosition& /* pos */) const
b75a7d8f 1067{
57a6839d 1068 if (defaultRuleSet) {
b331163b 1069 UErrorCode status = U_ZERO_ERROR;
57a6839d 1070 int32_t startPos = toAppendTo.length();
b331163b 1071 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), status);
57a6839d
A
1072 adjustForCapitalizationContext(startPos, toAppendTo);
1073 }
b75a7d8f
A
1074 return toAppendTo;
1075}
1076
1077
1078UnicodeString&
1079RuleBasedNumberFormat::format(int64_t number,
1080 UnicodeString& toAppendTo,
374ca955 1081 FieldPosition& /* pos */) const
b75a7d8f 1082{
57a6839d 1083 if (defaultRuleSet) {
b331163b 1084 UErrorCode status = U_ZERO_ERROR;
57a6839d 1085 int32_t startPos = toAppendTo.length();
b331163b 1086 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status);
57a6839d
A
1087 adjustForCapitalizationContext(startPos, toAppendTo);
1088 }
b75a7d8f
A
1089 return toAppendTo;
1090}
1091
1092
1093UnicodeString&
1094RuleBasedNumberFormat::format(double number,
1095 UnicodeString& toAppendTo,
374ca955 1096 FieldPosition& /* pos */) const
b75a7d8f 1097{
57a6839d 1098 int32_t startPos = toAppendTo.length();
729e4ab9
A
1099 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1100 if (uprv_isNaN(number)) {
1101 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1102 if (decFmtSyms) {
1103 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1104 }
1105 } else if (defaultRuleSet) {
b331163b
A
1106 UErrorCode status = U_ZERO_ERROR;
1107 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), status);
729e4ab9 1108 }
57a6839d 1109 return adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1110}
1111
1112
1113UnicodeString&
1114RuleBasedNumberFormat::format(int32_t number,
1115 const UnicodeString& ruleSetName,
1116 UnicodeString& toAppendTo,
374ca955 1117 FieldPosition& /* pos */,
b75a7d8f
A
1118 UErrorCode& status) const
1119{
1120 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1121 if (U_SUCCESS(status)) {
4388f060 1122 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1123 // throw new IllegalArgumentException("Can't use internal rule set");
1124 status = U_ILLEGAL_ARGUMENT_ERROR;
1125 } else {
1126 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1127 if (rs) {
57a6839d 1128 int32_t startPos = toAppendTo.length();
b331163b 1129 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), status);
57a6839d 1130 adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1131 }
1132 }
1133 }
1134 return toAppendTo;
1135}
1136
1137
1138UnicodeString&
1139RuleBasedNumberFormat::format(int64_t number,
1140 const UnicodeString& ruleSetName,
1141 UnicodeString& toAppendTo,
374ca955 1142 FieldPosition& /* pos */,
b75a7d8f
A
1143 UErrorCode& status) const
1144{
1145 if (U_SUCCESS(status)) {
4388f060 1146 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1147 // throw new IllegalArgumentException("Can't use internal rule set");
1148 status = U_ILLEGAL_ARGUMENT_ERROR;
1149 } else {
1150 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1151 if (rs) {
57a6839d 1152 int32_t startPos = toAppendTo.length();
b331163b 1153 rs->format(number, toAppendTo, toAppendTo.length(), status);
57a6839d 1154 adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1155 }
1156 }
1157 }
1158 return toAppendTo;
1159}
1160
1161
b75a7d8f
A
1162UnicodeString&
1163RuleBasedNumberFormat::format(double number,
1164 const UnicodeString& ruleSetName,
1165 UnicodeString& toAppendTo,
374ca955 1166 FieldPosition& /* pos */,
b75a7d8f
A
1167 UErrorCode& status) const
1168{
1169 if (U_SUCCESS(status)) {
4388f060 1170 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1171 // throw new IllegalArgumentException("Can't use internal rule set");
1172 status = U_ILLEGAL_ARGUMENT_ERROR;
1173 } else {
1174 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1175 if (rs) {
57a6839d 1176 int32_t startPos = toAppendTo.length();
b331163b 1177 rs->format(number, toAppendTo, toAppendTo.length(), status);
57a6839d 1178 adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1179 }
1180 }
1181 }
1182 return toAppendTo;
1183}
1184
57a6839d
A
1185UnicodeString&
1186RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1187 UnicodeString& currentResult) const
1188{
1189#if !UCONFIG_NO_BREAK_ITERATION
1190 if (startPos==0 && currentResult.length() > 0) {
1191 // capitalize currentResult according to context
1192 UChar32 ch = currentResult.char32At(0);
1193 UErrorCode status = U_ZERO_ERROR;
1194 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1195 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1196 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1197 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1198 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1199 // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1200 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1201 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1202 }
1203 }
1204#endif
1205 return currentResult;
1206}
1207
1208
b75a7d8f
A
1209void
1210RuleBasedNumberFormat::parse(const UnicodeString& text,
1211 Formattable& result,
1212 ParsePosition& parsePosition) const
1213{
1214 if (!ruleSets) {
1215 parsePosition.setErrorIndex(0);
1216 return;
1217 }
1218
374ca955
A
1219 UnicodeString workingText(text, parsePosition.getIndex());
1220 ParsePosition workingPos(0);
1221
1222 ParsePosition high_pp(0);
b75a7d8f
A
1223 Formattable high_result;
1224
1225 for (NFRuleSet** p = ruleSets; *p; ++p) {
1226 NFRuleSet *rp = *p;
729e4ab9 1227 if (rp->isPublic() && rp->isParseable()) {
374ca955 1228 ParsePosition working_pp(0);
b75a7d8f
A
1229 Formattable working_result;
1230
729e4ab9 1231 rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient);
b75a7d8f
A
1232 if (working_pp.getIndex() > high_pp.getIndex()) {
1233 high_pp = working_pp;
1234 high_result = working_result;
1235
374ca955 1236 if (high_pp.getIndex() == workingText.length()) {
b75a7d8f
A
1237 break;
1238 }
1239 }
1240 }
1241 }
1242
46f4442e
A
1243 int32_t startIndex = parsePosition.getIndex();
1244 parsePosition.setIndex(startIndex + high_pp.getIndex());
374ca955
A
1245 if (high_pp.getIndex() > 0) {
1246 parsePosition.setErrorIndex(-1);
46f4442e
A
1247 } else {
1248 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1249 parsePosition.setErrorIndex(startIndex + errorIndex);
b75a7d8f 1250 }
b75a7d8f
A
1251 result = high_result;
1252 if (result.getType() == Formattable::kDouble) {
1253 int32_t r = (int32_t)result.getDouble();
1254 if ((double)r == result.getDouble()) {
1255 result.setLong(r);
1256 }
1257 }
1258}
1259
1260#if !UCONFIG_NO_COLLATION
1261
1262void
1263RuleBasedNumberFormat::setLenient(UBool enabled)
1264{
1265 lenient = enabled;
1266 if (!enabled && collator) {
1267 delete collator;
1268 collator = NULL;
1269 }
1270}
1271
1272#endif
1273
1274void
1275RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1276 if (U_SUCCESS(status)) {
1277 if (ruleSetName.isEmpty()) {
374ca955
A
1278 if (localizations) {
1279 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1280 defaultRuleSet = findRuleSet(name, status);
1281 } else {
b75a7d8f 1282 initDefaultRuleSet();
374ca955
A
1283 }
1284 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
b75a7d8f
A
1285 status = U_ILLEGAL_ARGUMENT_ERROR;
1286 } else {
1287 NFRuleSet* result = findRuleSet(ruleSetName, status);
1288 if (result != NULL) {
1289 defaultRuleSet = result;
1290 }
1291 }
1292 }
1293}
1294
374ca955
A
1295UnicodeString
1296RuleBasedNumberFormat::getDefaultRuleSetName() const {
1297 UnicodeString result;
1298 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1299 defaultRuleSet->getName(result);
1300 } else {
1301 result.setToBogus();
1302 }
1303 return result;
1304}
1305
b75a7d8f
A
1306void
1307RuleBasedNumberFormat::initDefaultRuleSet()
1308{
374ca955 1309 defaultRuleSet = NULL;
b75a7d8f 1310 if (!ruleSets) {
374ca955 1311 return;
b75a7d8f 1312 }
374ca955 1313
729e4ab9
A
1314 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1315 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1316 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1317
374ca955 1318 NFRuleSet**p = &ruleSets[0];
b75a7d8f 1319 while (*p) {
729e4ab9
A
1320 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1321 defaultRuleSet = *p;
1322 return;
1323 } else {
1324 ++p;
1325 }
b75a7d8f
A
1326 }
1327
1328 defaultRuleSet = *--p;
1329 if (!defaultRuleSet->isPublic()) {
1330 while (p != ruleSets) {
1331 if ((*--p)->isPublic()) {
1332 defaultRuleSet = *p;
1333 break;
1334 }
1335 }
1336 }
1337}
1338
1339
1340void
374ca955 1341RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
73c04bcf 1342 UParseError& pErr, UErrorCode& status)
b75a7d8f
A
1343{
1344 // TODO: implement UParseError
73c04bcf 1345 uprv_memset(&pErr, 0, sizeof(UParseError));
b75a7d8f
A
1346 // Note: this can leave ruleSets == NULL, so remaining code should check
1347 if (U_FAILURE(status)) {
1348 return;
1349 }
1350
374ca955
A
1351 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1352
b75a7d8f
A
1353 UnicodeString description(rules);
1354 if (!description.length()) {
1355 status = U_MEMORY_ALLOCATION_ERROR;
1356 return;
1357 }
1358
1359 // start by stripping the trailing whitespace from all the rules
1360 // (this is all the whitespace follwing each semicolon in the
1361 // description). This allows us to look for rule-set boundaries
1362 // by searching for ";%" without having to worry about whitespace
1363 // between the ; and the %
1364 stripWhitespace(description);
1365
1366 // check to see if there's a set of lenient-parse rules. If there
1367 // is, pull them out into our temporary holding place for them,
1368 // and delete them from the description before the real desciption-
1369 // parsing code sees them
4388f060 1370 int32_t lp = description.indexOf(gLenientParse, -1, 0);
b75a7d8f
A
1371 if (lp != -1) {
1372 // we've got to make sure we're not in the middle of a rule
1373 // (where "%%lenient-parse" would actually get treated as
1374 // rule text)
1375 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1376 // locate the beginning and end of the actual collation
1377 // rules (there may be whitespace between the name and
1378 // the first token in the description)
4388f060 1379 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
b75a7d8f
A
1380
1381 if (lpEnd == -1) {
1382 lpEnd = description.length() - 1;
1383 }
1384 int lpStart = lp + u_strlen(gLenientParse);
4388f060 1385 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
b75a7d8f
A
1386 ++lpStart;
1387 }
1388
1389 // copy out the lenient-parse rules and delete them
1390 // from the description
1391 lenientParseRules = new UnicodeString();
1392 /* test for NULL */
1393 if (lenientParseRules == 0) {
1394 status = U_MEMORY_ALLOCATION_ERROR;
1395 return;
1396 }
1397 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1398
1399 description.remove(lp, lpEnd + 1 - lp);
1400 }
1401 }
1402
1403 // pre-flight parsing the description and count the number of
1404 // rule sets (";%" marks the end of one rule set and the beginning
1405 // of the next)
4388f060
A
1406 numRuleSets = 0;
1407 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
b75a7d8f
A
1408 ++numRuleSets;
1409 ++p;
1410 }
1411 ++numRuleSets;
1412
1413 // our rule list is an array of the appropriate size
1414 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1415 /* test for NULL */
1416 if (ruleSets == 0) {
1417 status = U_MEMORY_ALLOCATION_ERROR;
1418 return;
1419 }
1420
1421 for (int i = 0; i <= numRuleSets; ++i) {
1422 ruleSets[i] = NULL;
1423 }
1424
1425 // divide up the descriptions into individual rule-set descriptions
1426 // and store them in a temporary array. At each step, we also
1427 // new up a rule set, but all this does is initialize its name
1428 // and remove it from its description. We can't actually parse
1429 // the rest of the descriptions and finish initializing everything
1430 // because we have to know the names and locations of all the rule
1431 // sets before we can actually set everything up
1432 if(!numRuleSets) {
46f4442e
A
1433 status = U_ILLEGAL_ARGUMENT_ERROR;
1434 return;
b75a7d8f 1435 }
4388f060
A
1436
1437 ruleSetDescriptions = new UnicodeString[numRuleSets];
b75a7d8f
A
1438 if (ruleSetDescriptions == 0) {
1439 status = U_MEMORY_ALLOCATION_ERROR;
1440 return;
1441 }
1442
1443 {
1444 int curRuleSet = 0;
1445 int32_t start = 0;
4388f060 1446 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
b75a7d8f
A
1447 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1448 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1449 if (ruleSets[curRuleSet] == 0) {
1450 status = U_MEMORY_ALLOCATION_ERROR;
4388f060 1451 return;
b75a7d8f
A
1452 }
1453 ++curRuleSet;
1454 start = p + 1;
1455 }
1456 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1457 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1458 if (ruleSets[curRuleSet] == 0) {
1459 status = U_MEMORY_ALLOCATION_ERROR;
4388f060 1460 return;
b75a7d8f
A
1461 }
1462 }
1463
1464 // now we can take note of the formatter's default rule set, which
1465 // is the last public rule set in the description (it's the last
1466 // rather than the first so that a user can create a new formatter
1467 // from an existing formatter and change its default behavior just
1468 // by appending more rule sets to the end)
374ca955
A
1469
1470 // {dlf} Initialization of a fraction rule set requires the default rule
1471 // set to be known. For purposes of initialization, this is always the
1472 // last public rule set, no matter what the localization data says.
1473 initDefaultRuleSet();
b75a7d8f
A
1474
1475 // finally, we can go back through the temporary descriptions
1476 // list and finish seting up the substructure (and we throw
1477 // away the temporary descriptions as we go)
1478 {
1479 for (int i = 0; i < numRuleSets; i++) {
1480 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1481 }
1482 }
1483
374ca955
A
1484 // Now that the rules are initialized, the 'real' default rule
1485 // set can be adjusted by the localization data.
1486
1487 // The C code keeps the localization array as is, rather than building
1488 // a separate array of the public rule set names, so we have less work
1489 // to do here-- but we still need to check the names.
1490
1491 if (localizationInfos) {
1492 // confirm the names, if any aren't in the rules, that's an error
1493 // it is ok if the rules contain public rule sets that are not in this list
1494 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1495 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1496 NFRuleSet* rs = findRuleSet(name, status);
1497 if (rs == NULL) {
1498 break; // error
1499 }
1500 if (i == 0) {
1501 defaultRuleSet = rs;
1502 }
1503 }
1504 } else {
1505 defaultRuleSet = getDefaultRuleSet();
1506 }
57a6839d
A
1507 originalDescription = rules;
1508}
1509
1510// override the NumberFormat implementation in order to
1511// lazily initialize relevant items
1512void
1513RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1514{
1515 NumberFormat::setContext(value, status);
1516 if (U_SUCCESS(status)) {
1517 if (!capitalizationInfoSet &&
1518 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1519 initCapitalizationContextInfo(locale);
1520 capitalizationInfoSet = TRUE;
1521 }
1522#if !UCONFIG_NO_BREAK_ITERATION
1523 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1524 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1525 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1526 UErrorCode status = U_ZERO_ERROR;
1527 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1528 if (U_FAILURE(status)) {
1529 delete capitalizationBrkIter;
1530 capitalizationBrkIter = NULL;
1531 }
1532 }
1533#endif
1534 }
1535}
1536
1537void
1538RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1539{
1540#if !UCONFIG_NO_BREAK_ITERATION
1541 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1542 UErrorCode status = U_ZERO_ERROR;
1543 UResourceBundle *rb = ures_open(NULL, localeID, &status);
1544 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1545 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1546 if (U_SUCCESS(status) && rb != NULL) {
1547 int32_t len = 0;
1548 const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1549 if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1550 capitalizationForUIListMenu = intVector[0];
1551 capitalizationForStandAlone = intVector[1];
1552 }
1553 }
1554 ures_close(rb);
1555#endif
b75a7d8f
A
1556}
1557
1558void
1559RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1560{
1561 // iterate through the characters...
1562 UnicodeString result;
1563
1564 int start = 0;
1565 while (start != -1 && start < description.length()) {
1566 // seek to the first non-whitespace character...
1567 while (start < description.length()
4388f060 1568 && PatternProps::isWhiteSpace(description.charAt(start))) {
b75a7d8f
A
1569 ++start;
1570 }
1571
1572 // locate the next semicolon in the text and copy the text from
1573 // our current position up to that semicolon into the result
1574 int32_t p = description.indexOf(gSemiColon, start);
1575 if (p == -1) {
1576 // or if we don't find a semicolon, just copy the rest of
1577 // the string into the result
1578 result.append(description, start, description.length() - start);
1579 start = -1;
1580 }
1581 else if (p < description.length()) {
1582 result.append(description, start, p + 1 - start);
1583 start = p + 1;
1584 }
1585
1586 // when we get here, we've seeked off the end of the sring, and
1587 // we terminate the loop (we continue until *start* is -1 rather
1588 // than until *p* is -1, because otherwise we'd miss the last
1589 // rule in the description)
1590 else {
1591 start = -1;
1592 }
1593 }
1594
1595 description.setTo(result);
1596}
1597
1598
1599void
1600RuleBasedNumberFormat::dispose()
1601{
1602 if (ruleSets) {
1603 for (NFRuleSet** p = ruleSets; *p; ++p) {
1604 delete *p;
1605 }
1606 uprv_free(ruleSets);
1607 ruleSets = NULL;
1608 }
1609
4388f060
A
1610 if (ruleSetDescriptions) {
1611 delete [] ruleSetDescriptions;
1612 }
1613
b75a7d8f
A
1614#if !UCONFIG_NO_COLLATION
1615 delete collator;
1616#endif
1617 collator = NULL;
1618
1619 delete decimalFormatSymbols;
1620 decimalFormatSymbols = NULL;
1621
1622 delete lenientParseRules;
1623 lenientParseRules = NULL;
374ca955 1624
57a6839d
A
1625#if !UCONFIG_NO_BREAK_ITERATION
1626 delete capitalizationBrkIter;
1627 capitalizationBrkIter = NULL;
1628#endif
1629
374ca955 1630 if (localizations) localizations = localizations->unref();
b75a7d8f
A
1631}
1632
1633
1634//-----------------------------------------------------------------------
1635// package-internal API
1636//-----------------------------------------------------------------------
1637
1638/**
1639 * Returns the collator to use for lenient parsing. The collator is lazily created:
1640 * this function creates it the first time it's called.
1641 * @return The collator to use for lenient parsing, or null if lenient parsing
1642 * is turned off.
1643*/
57a6839d 1644const RuleBasedCollator*
b75a7d8f
A
1645RuleBasedNumberFormat::getCollator() const
1646{
1647#if !UCONFIG_NO_COLLATION
1648 if (!ruleSets) {
1649 return NULL;
1650 }
1651
57a6839d 1652 // lazy-evaluate the collator
b75a7d8f
A
1653 if (collator == NULL && lenient) {
1654 // create a default collator based on the formatter's locale,
1655 // then pull out that collator's rules, append any additional
1656 // rules specified in the description, and create a _new_
1657 // collator based on the combinaiton of those rules
1658
1659 UErrorCode status = U_ZERO_ERROR;
1660
1661 Collator* temp = Collator::createInstance(locale, status);
729e4ab9
A
1662 RuleBasedCollator* newCollator;
1663 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
b75a7d8f
A
1664 if (lenientParseRules) {
1665 UnicodeString rules(newCollator->getRules());
1666 rules.append(*lenientParseRules);
1667
1668 newCollator = new RuleBasedCollator(rules, status);
46f4442e
A
1669 // Exit if newCollator could not be created.
1670 if (newCollator == NULL) {
57a6839d 1671 return NULL;
46f4442e 1672 }
b75a7d8f
A
1673 } else {
1674 temp = NULL;
1675 }
1676 if (U_SUCCESS(status)) {
1677 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1678 // cast away const
1679 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1680 } else {
1681 delete newCollator;
1682 }
1683 }
1684 delete temp;
1685 }
1686#endif
1687
1688 // if lenient-parse mode is off, this will be null
1689 // (see setLenientParseMode())
1690 return collator;
1691}
1692
1693
1694/**
1695 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1696 * instances owned by this formatter. This object is lazily created: this function
1697 * creates it the first time it's called.
1698 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1699 * instances owned by this formatter.
1700*/
1701DecimalFormatSymbols*
1702RuleBasedNumberFormat::getDecimalFormatSymbols() const
1703{
1704 // lazy-evaluate the DecimalFormatSymbols object. This object
1705 // is shared by all DecimalFormat instances belonging to this
1706 // formatter
1707 if (decimalFormatSymbols == NULL) {
1708 UErrorCode status = U_ZERO_ERROR;
1709 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1710 if (U_SUCCESS(status)) {
1711 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1712 } else {
1713 delete temp;
1714 }
1715 }
1716 return decimalFormatSymbols;
1717}
1718
4388f060
A
1719// De-owning the current localized symbols and adopt the new symbols.
1720void
1721RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1722{
1723 if (symbolsToAdopt == NULL) {
1724 return; // do not allow caller to set decimalFormatSymbols to NULL
1725 }
1726
1727 if (decimalFormatSymbols != NULL) {
1728 delete decimalFormatSymbols;
1729 }
1730
1731 decimalFormatSymbols = symbolsToAdopt;
1732
1733 {
1734 // Apply the new decimalFormatSymbols by reparsing the rulesets
1735 UErrorCode status = U_ZERO_ERROR;
1736
1737 for (int32_t i = 0; i < numRuleSets; i++) {
1738 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1739 }
1740 }
1741}
1742
1743// Setting the symbols is equlivalent to adopting a newly created localized symbols.
1744void
1745RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1746{
1747 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1748}
1749
b331163b
A
1750PluralFormat *
1751RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1752 const UnicodeString &pattern,
1753 UErrorCode& status) const
1754{
1755 return new PluralFormat(locale, pluralType, pattern, status);
1756}
1757
374ca955
A
1758U_NAMESPACE_END
1759
b75a7d8f
A
1760/* U_HAVE_RBNF */
1761#endif