]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/rbnf.cpp
ICU-531.30.tar.gz
[apple/icu.git] / icuSources / i18n / rbnf.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
57a6839d 3* Copyright (C) 1997-2014, International Business Machines Corporation
73c04bcf 4* and others. All Rights Reserved.
b75a7d8f
A
5*******************************************************************************
6*/
7
57a6839d 8#include "unicode/utypes.h"
51004dcb 9#include "utypeinfo.h" // for 'typeid' to work
729e4ab9 10
b75a7d8f
A
11#include "unicode/rbnf.h"
12
13#if U_HAVE_RBNF
14
15#include "unicode/normlzr.h"
16#include "unicode/tblcoll.h"
17#include "unicode/uchar.h"
18#include "unicode/ucol.h"
19#include "unicode/uloc.h"
20#include "unicode/unum.h"
21#include "unicode/ures.h"
22#include "unicode/ustring.h"
23#include "unicode/utf16.h"
374ca955 24#include "unicode/udata.h"
57a6839d
A
25#include "unicode/udisplaycontext.h"
26#include "unicode/brkiter.h"
b75a7d8f
A
27#include "nfrs.h"
28
29#include "cmemory.h"
30#include "cstring.h"
4388f060 31#include "patternprops.h"
729e4ab9 32#include "uresimp.h"
374ca955
A
33
34// debugging
35// #define DEBUG
36
37#ifdef DEBUG
38#include "stdio.h"
39#endif
40
41#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
b75a7d8f
A
42
43static const UChar gPercentPercent[] =
44{
45 0x25, 0x25, 0
46}; /* "%%" */
47
48// All urbnf objects are created through openRules, so we init all of the
49// Unicode string constants required by rbnf, nfrs, or nfr here.
50static const UChar gLenientParse[] =
51{
52 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
53}; /* "%%lenient-parse:" */
54static const UChar gSemiColon = 0x003B;
55static const UChar gSemiPercent[] =
56{
57 0x3B, 0x25, 0
58}; /* ";%" */
59
60#define kSomeNumberOfBitsDiv2 22
61#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
62#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
63
374ca955
A
64U_NAMESPACE_BEGIN
65
66UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
67
73c04bcf
A
68/*
69This is a utility class. It does not use ICU's RTTI.
70If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
71Please make sure that intltest passes on Windows in Release mode,
72since the string pooling per compilation unit will mess up how RTTI works.
73The RTTI code was also removed due to lack of code coverage.
74*/
75class LocalizationInfo : public UMemory {
374ca955 76protected:
4388f060 77 virtual ~LocalizationInfo();
374ca955
A
78 uint32_t refcount;
79
80public:
81 LocalizationInfo() : refcount(0) {}
82
83 LocalizationInfo* ref(void) {
84 ++refcount;
85 return this;
86 }
87
88 LocalizationInfo* unref(void) {
89 if (refcount && --refcount == 0) {
90 delete this;
91 }
92 return NULL;
93 }
94
95 virtual UBool operator==(const LocalizationInfo* rhs) const;
96 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
97
98 virtual int32_t getNumberOfRuleSets(void) const = 0;
99 virtual const UChar* getRuleSetName(int32_t index) const = 0;
100 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
101 virtual const UChar* getLocaleName(int32_t index) const = 0;
102 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
103
104 virtual int32_t indexForLocale(const UChar* locale) const;
105 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
106
73c04bcf
A
107// virtual UClassID getDynamicClassID() const = 0;
108// static UClassID getStaticClassID(void);
374ca955
A
109};
110
4388f060
A
111LocalizationInfo::~LocalizationInfo() {}
112
73c04bcf 113//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
374ca955
A
114
115// if both strings are NULL, this returns TRUE
116static UBool
117streq(const UChar* lhs, const UChar* rhs) {
118 if (rhs == lhs) {
119 return TRUE;
120 }
121 if (lhs && rhs) {
122 return u_strcmp(lhs, rhs) == 0;
123 }
124 return FALSE;
125}
126
127UBool
128LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
129 if (rhs) {
130 if (this == rhs) {
131 return TRUE;
132 }
133
134 int32_t rsc = getNumberOfRuleSets();
135 if (rsc == rhs->getNumberOfRuleSets()) {
136 for (int i = 0; i < rsc; ++i) {
137 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
138 return FALSE;
139 }
140 }
141 int32_t dlc = getNumberOfDisplayLocales();
142 if (dlc == rhs->getNumberOfDisplayLocales()) {
143 for (int i = 0; i < dlc; ++i) {
144 const UChar* locale = getLocaleName(i);
145 int32_t ix = rhs->indexForLocale(locale);
146 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
147 if (!streq(locale, rhs->getLocaleName(ix))) {
148 return FALSE;
149 }
150 for (int j = 0; j < rsc; ++j) {
151 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
152 return FALSE;
153 }
154 }
155 }
156 return TRUE;
157 }
158 }
159 }
160 return FALSE;
161}
162
163int32_t
164LocalizationInfo::indexForLocale(const UChar* locale) const {
165 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
166 if (streq(locale, getLocaleName(i))) {
167 return i;
168 }
169 }
170 return -1;
171}
172
173int32_t
174LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
175 if (ruleset) {
176 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
177 if (streq(ruleset, getRuleSetName(i))) {
178 return i;
179 }
180 }
181 }
182 return -1;
183}
184
185
186typedef void (*Fn_Deleter)(void*);
187
188class VArray {
189 void** buf;
190 int32_t cap;
191 int32_t size;
192 Fn_Deleter deleter;
193public:
194 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
195
196 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
197
198 ~VArray() {
199 if (deleter) {
200 for (int i = 0; i < size; ++i) {
201 (*deleter)(buf[i]);
202 }
203 }
204 uprv_free(buf);
205 }
206
207 int32_t length() {
208 return size;
209 }
210
211 void add(void* elem, UErrorCode& status) {
212 if (U_SUCCESS(status)) {
213 if (size == cap) {
214 if (cap == 0) {
215 cap = 1;
216 } else if (cap < 256) {
217 cap *= 2;
218 } else {
219 cap += 256;
220 }
221 if (buf == NULL) {
222 buf = (void**)uprv_malloc(cap * sizeof(void*));
223 } else {
224 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
225 }
226 if (buf == NULL) {
227 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
228 status = U_MEMORY_ALLOCATION_ERROR;
229 return;
230 }
231 void* start = &buf[size];
232 size_t count = (cap - size) * sizeof(void*);
233 uprv_memset(start, 0, count); // fill with nulls, just because
234 }
235 buf[size++] = elem;
236 }
237 }
238
239 void** release(void) {
240 void** result = buf;
241 buf = NULL;
242 cap = 0;
243 size = 0;
244 return result;
245 }
246};
247
248class LocDataParser;
249
250class StringLocalizationInfo : public LocalizationInfo {
251 UChar* info;
252 UChar*** data;
253 int32_t numRuleSets;
254 int32_t numLocales;
255
256friend class LocDataParser;
257
258 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
259 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
260 {
261 }
262
263public:
264 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
265
266 virtual ~StringLocalizationInfo();
267 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
268 virtual const UChar* getRuleSetName(int32_t index) const;
269 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
270 virtual const UChar* getLocaleName(int32_t index) const;
271 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
272
73c04bcf
A
273// virtual UClassID getDynamicClassID() const;
274// static UClassID getStaticClassID(void);
374ca955
A
275
276private:
277 void init(UErrorCode& status) const;
278};
279
280
281enum {
282 OPEN_ANGLE = 0x003c, /* '<' */
283 CLOSE_ANGLE = 0x003e, /* '>' */
284 COMMA = 0x002c,
285 TICK = 0x0027,
286 QUOTE = 0x0022,
287 SPACE = 0x0020
288};
289
290/**
291 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
292 */
293class LocDataParser {
294 UChar* data;
295 const UChar* e;
296 UChar* p;
297 UChar ch;
298 UParseError& pe;
299 UErrorCode& ec;
300
301public:
302 LocDataParser(UParseError& parseError, UErrorCode& status)
303 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
304 ~LocDataParser() {}
305
306 /*
307 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
308 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
309 */
310 StringLocalizationInfo* parse(UChar* data, int32_t len);
311
312private:
313
314 void inc(void) { ++p; ch = 0xffff; }
315 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
316 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
4388f060 317 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
374ca955 318 UBool inList(UChar c, const UChar* list) const {
4388f060 319 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
374ca955
A
320 while (*list && *list != c) ++list; return *list == c;
321 }
322 void parseError(const char* msg);
323
324 StringLocalizationInfo* doParse(void);
325
326 UChar** nextArray(int32_t& requiredLength);
327 UChar* nextString(void);
328};
329
330#ifdef DEBUG
331#define ERROR(msg) parseError(msg); return NULL;
57a6839d 332#define EXPLANATION_ARG explanationArg
374ca955
A
333#else
334#define ERROR(msg) parseError(NULL); return NULL;
57a6839d 335#define EXPLANATION_ARG
374ca955
A
336#endif
337
338
339static const UChar DQUOTE_STOPLIST[] = {
340 QUOTE, 0
341};
342
343static const UChar SQUOTE_STOPLIST[] = {
344 TICK, 0
345};
346
347static const UChar NOQUOTE_STOPLIST[] = {
348 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
349};
350
351static void
352DeleteFn(void* p) {
353 uprv_free(p);
354}
355
356StringLocalizationInfo*
357LocDataParser::parse(UChar* _data, int32_t len) {
358 if (U_FAILURE(ec)) {
359 if (_data) uprv_free(_data);
360 return NULL;
361 }
362
363 pe.line = 0;
364 pe.offset = -1;
365 pe.postContext[0] = 0;
366 pe.preContext[0] = 0;
367
368 if (_data == NULL) {
369 ec = U_ILLEGAL_ARGUMENT_ERROR;
370 return NULL;
371 }
372
373 if (len <= 0) {
374 ec = U_ILLEGAL_ARGUMENT_ERROR;
375 uprv_free(_data);
376 return NULL;
377 }
378
379 data = _data;
380 e = data + len;
381 p = _data;
382 ch = 0xffff;
383
384 return doParse();
385}
386
387
388StringLocalizationInfo*
389LocDataParser::doParse(void) {
390 skipWhitespace();
391 if (!checkInc(OPEN_ANGLE)) {
392 ERROR("Missing open angle");
393 } else {
394 VArray array(DeleteFn);
395 UBool mightHaveNext = TRUE;
396 int32_t requiredLength = -1;
397 while (mightHaveNext) {
398 mightHaveNext = FALSE;
399 UChar** elem = nextArray(requiredLength);
400 skipWhitespace();
401 UBool haveComma = check(COMMA);
402 if (elem) {
403 array.add(elem, ec);
404 if (haveComma) {
405 inc();
406 mightHaveNext = TRUE;
407 }
408 } else if (haveComma) {
409 ERROR("Unexpected character");
410 }
411 }
412
413 skipWhitespace();
414 if (!checkInc(CLOSE_ANGLE)) {
415 if (check(OPEN_ANGLE)) {
416 ERROR("Missing comma in outer array");
417 } else {
418 ERROR("Missing close angle bracket in outer array");
419 }
420 }
421
422 skipWhitespace();
423 if (p != e) {
424 ERROR("Extra text after close of localization data");
425 }
426
427 array.add(NULL, ec);
428 if (U_SUCCESS(ec)) {
429 int32_t numLocs = array.length() - 2; // subtract first, NULL
430 UChar*** result = (UChar***)array.release();
431
432 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
433 }
434 }
435
436 ERROR("Unknown error");
437}
438
439UChar**
440LocDataParser::nextArray(int32_t& requiredLength) {
441 if (U_FAILURE(ec)) {
442 return NULL;
443 }
444
445 skipWhitespace();
446 if (!checkInc(OPEN_ANGLE)) {
447 ERROR("Missing open angle");
448 }
449
450 VArray array;
451 UBool mightHaveNext = TRUE;
452 while (mightHaveNext) {
453 mightHaveNext = FALSE;
454 UChar* elem = nextString();
455 skipWhitespace();
456 UBool haveComma = check(COMMA);
457 if (elem) {
458 array.add(elem, ec);
459 if (haveComma) {
460 inc();
461 mightHaveNext = TRUE;
462 }
463 } else if (haveComma) {
464 ERROR("Unexpected comma");
465 }
466 }
467 skipWhitespace();
468 if (!checkInc(CLOSE_ANGLE)) {
469 if (check(OPEN_ANGLE)) {
470 ERROR("Missing close angle bracket in inner array");
471 } else {
472 ERROR("Missing comma in inner array");
473 }
474 }
475
476 array.add(NULL, ec);
477 if (U_SUCCESS(ec)) {
478 if (requiredLength == -1) {
479 requiredLength = array.length() + 1;
480 } else if (array.length() != requiredLength) {
481 ec = U_ILLEGAL_ARGUMENT_ERROR;
482 ERROR("Array not of required length");
483 }
484
485 return (UChar**)array.release();
486 }
487 ERROR("Unknown Error");
488}
489
490UChar*
491LocDataParser::nextString() {
492 UChar* result = NULL;
493
494 skipWhitespace();
495 if (p < e) {
496 const UChar* terminators;
497 UChar c = *p;
498 UBool haveQuote = c == QUOTE || c == TICK;
499 if (haveQuote) {
500 inc();
501 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
502 } else {
503 terminators = NOQUOTE_STOPLIST;
504 }
505 UChar* start = p;
506 while (p < e && !inList(*p, terminators)) ++p;
507 if (p == e) {
508 ERROR("Unexpected end of data");
509 }
510
511 UChar x = *p;
512 if (p > start) {
513 ch = x;
514 *p = 0x0; // terminate by writing to data
515 result = start; // just point into data
516 }
517 if (haveQuote) {
518 if (x != c) {
519 ERROR("Missing matching quote");
520 } else if (p == start) {
521 ERROR("Empty string");
522 }
523 inc();
524 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
525 ERROR("Unexpected character in string");
526 }
527 }
528
529 // ok for there to be no next string
530 return result;
531}
532
57a6839d
A
533void LocDataParser::parseError(const char* EXPLANATION_ARG)
534{
374ca955
A
535 if (!data) {
536 return;
537 }
b75a7d8f 538
374ca955 539 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
73c04bcf 540 if (start < data) {
374ca955 541 start = data;
73c04bcf
A
542 }
543 for (UChar* x = p; --x >= start;) {
374ca955
A
544 if (!*x) {
545 start = x+1;
546 break;
547 }
73c04bcf 548 }
374ca955 549 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
73c04bcf 550 if (limit > e) {
374ca955 551 limit = e;
73c04bcf
A
552 }
553 u_strncpy(pe.preContext, start, (int32_t)(p-start));
374ca955 554 pe.preContext[p-start] = 0;
73c04bcf 555 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
374ca955 556 pe.postContext[limit-p] = 0;
73c04bcf 557 pe.offset = (int32_t)(p - data);
374ca955
A
558
559#ifdef DEBUG
57a6839d 560 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
374ca955
A
561
562 UnicodeString msg;
563 msg.append(start, p - start);
564 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
565 msg.append(p, limit-p);
57a6839d 566 msg.append(UNICODE_STRING_SIMPLE("'"));
374ca955
A
567
568 char buf[128];
569 int32_t len = msg.extract(0, msg.length(), buf, 128);
570 if (len >= 128) {
571 buf[127] = 0;
572 } else {
573 buf[len] = 0;
574 }
575 fprintf(stderr, "%s\n", buf);
576 fflush(stderr);
577#endif
578
579 uprv_free(data);
580 data = NULL;
581 p = NULL;
582 e = NULL;
583
584 if (U_SUCCESS(ec)) {
585 ec = U_PARSE_ERROR;
586 }
587}
588
73c04bcf 589//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
374ca955
A
590
591StringLocalizationInfo*
592StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
593 if (U_FAILURE(status)) {
594 return NULL;
595 }
596
597 int32_t len = info.length();
598 if (len == 0) {
599 return NULL; // no error;
600 }
601
602 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
603 if (!p) {
604 status = U_MEMORY_ALLOCATION_ERROR;
605 return NULL;
606 }
607 info.extract(p, len, status);
608 if (!U_FAILURE(status)) {
609 status = U_ZERO_ERROR; // clear warning about non-termination
610 }
611
612 LocDataParser parser(perror, status);
613 return parser.parse(p, len);
614}
615
616StringLocalizationInfo::~StringLocalizationInfo() {
617 for (UChar*** p = (UChar***)data; *p; ++p) {
618 // remaining data is simply pointer into our unicode string data.
619 if (*p) uprv_free(*p);
620 }
621 if (data) uprv_free(data);
622 if (info) uprv_free(info);
623}
624
625
626const UChar*
627StringLocalizationInfo::getRuleSetName(int32_t index) const {
628 if (index >= 0 && index < getNumberOfRuleSets()) {
629 return data[0][index];
630 }
631 return NULL;
632}
633
634const UChar*
635StringLocalizationInfo::getLocaleName(int32_t index) const {
636 if (index >= 0 && index < getNumberOfDisplayLocales()) {
637 return data[index+1][0];
638 }
639 return NULL;
640}
641
642const UChar*
643StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
644 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
645 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
646 return data[localeIndex+1][ruleIndex+1];
647 }
648 return NULL;
649}
650
651// ----------
652
653RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
654 const UnicodeString& locs,
655 const Locale& alocale, UParseError& perror, UErrorCode& status)
b75a7d8f 656 : ruleSets(NULL)
4388f060
A
657 , ruleSetDescriptions(NULL)
658 , numRuleSets(0)
b75a7d8f
A
659 , defaultRuleSet(NULL)
660 , locale(alocale)
661 , collator(NULL)
662 , decimalFormatSymbols(NULL)
663 , lenient(FALSE)
664 , lenientParseRules(NULL)
374ca955 665 , localizations(NULL)
57a6839d
A
666 , capitalizationInfoSet(FALSE)
667 , capitalizationForUIListMenu(FALSE)
668 , capitalizationForStandAlone(FALSE)
669 , capitalizationBrkIter(NULL)
374ca955
A
670{
671 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
672 init(description, locinfo, perror, status);
673}
674
675RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
676 const UnicodeString& locs,
677 UParseError& perror, UErrorCode& status)
678 : ruleSets(NULL)
4388f060
A
679 , ruleSetDescriptions(NULL)
680 , numRuleSets(0)
374ca955
A
681 , defaultRuleSet(NULL)
682 , locale(Locale::getDefault())
683 , collator(NULL)
684 , decimalFormatSymbols(NULL)
685 , lenient(FALSE)
686 , lenientParseRules(NULL)
687 , localizations(NULL)
57a6839d
A
688 , capitalizationInfoSet(FALSE)
689 , capitalizationForUIListMenu(FALSE)
690 , capitalizationForStandAlone(FALSE)
691 , capitalizationBrkIter(NULL)
374ca955
A
692{
693 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
694 init(description, locinfo, perror, status);
695}
696
697RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
698 LocalizationInfo* info,
699 const Locale& alocale, UParseError& perror, UErrorCode& status)
700 : ruleSets(NULL)
4388f060
A
701 , ruleSetDescriptions(NULL)
702 , numRuleSets(0)
374ca955
A
703 , defaultRuleSet(NULL)
704 , locale(alocale)
705 , collator(NULL)
706 , decimalFormatSymbols(NULL)
707 , lenient(FALSE)
708 , lenientParseRules(NULL)
709 , localizations(NULL)
57a6839d
A
710 , capitalizationInfoSet(FALSE)
711 , capitalizationForUIListMenu(FALSE)
712 , capitalizationForStandAlone(FALSE)
713 , capitalizationBrkIter(NULL)
374ca955
A
714{
715 init(description, info, perror, status);
716}
717
718RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
719 UParseError& perror,
720 UErrorCode& status)
721 : ruleSets(NULL)
4388f060
A
722 , ruleSetDescriptions(NULL)
723 , numRuleSets(0)
374ca955
A
724 , defaultRuleSet(NULL)
725 , locale(Locale::getDefault())
726 , collator(NULL)
727 , decimalFormatSymbols(NULL)
728 , lenient(FALSE)
729 , lenientParseRules(NULL)
730 , localizations(NULL)
57a6839d
A
731 , capitalizationInfoSet(FALSE)
732 , capitalizationForUIListMenu(FALSE)
733 , capitalizationForStandAlone(FALSE)
734 , capitalizationBrkIter(NULL)
374ca955
A
735{
736 init(description, NULL, perror, status);
737}
738
739RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
740 const Locale& aLocale,
741 UParseError& perror,
742 UErrorCode& status)
743 : ruleSets(NULL)
4388f060
A
744 , ruleSetDescriptions(NULL)
745 , numRuleSets(0)
374ca955
A
746 , defaultRuleSet(NULL)
747 , locale(aLocale)
748 , collator(NULL)
749 , decimalFormatSymbols(NULL)
750 , lenient(FALSE)
751 , lenientParseRules(NULL)
752 , localizations(NULL)
57a6839d
A
753 , capitalizationInfoSet(FALSE)
754 , capitalizationForUIListMenu(FALSE)
755 , capitalizationForStandAlone(FALSE)
756 , capitalizationBrkIter(NULL)
b75a7d8f 757{
374ca955 758 init(description, NULL, perror, status);
b75a7d8f
A
759}
760
761RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
762 : ruleSets(NULL)
4388f060
A
763 , ruleSetDescriptions(NULL)
764 , numRuleSets(0)
b75a7d8f
A
765 , defaultRuleSet(NULL)
766 , locale(alocale)
767 , collator(NULL)
768 , decimalFormatSymbols(NULL)
769 , lenient(FALSE)
770 , lenientParseRules(NULL)
374ca955 771 , localizations(NULL)
57a6839d
A
772 , capitalizationInfoSet(FALSE)
773 , capitalizationForUIListMenu(FALSE)
774 , capitalizationForStandAlone(FALSE)
775 , capitalizationBrkIter(NULL)
b75a7d8f
A
776{
777 if (U_FAILURE(status)) {
778 return;
779 }
780
729e4ab9 781 const char* rules_tag = "RBNFRules";
b75a7d8f
A
782 const char* fmt_tag = "";
783 switch (tag) {
784 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
785 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
786 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
729e4ab9 787 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
b75a7d8f
A
788 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
789 }
790
374ca955
A
791 // TODO: read localization info from resource
792 LocalizationInfo* locinfo = NULL;
793
374ca955 794 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
b75a7d8f 795 if (U_SUCCESS(status)) {
374ca955
A
796 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
797 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
729e4ab9
A
798
799 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
800 if (U_FAILURE(status)) {
801 ures_close(nfrb);
802 }
803 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
804 if (U_FAILURE(status)) {
805 ures_close(rbnfRules);
806 ures_close(nfrb);
807 return;
808 }
4388f060 809
729e4ab9
A
810 UnicodeString desc;
811 while (ures_hasNext(ruleSets)) {
4388f060 812 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
729e4ab9 813 }
b75a7d8f 814 UParseError perror;
729e4ab9 815
374ca955 816 init (desc, locinfo, perror, status);
729e4ab9 817
729e4ab9
A
818 ures_close(ruleSets);
819 ures_close(rbnfRules);
b75a7d8f 820 }
b75a7d8f
A
821 ures_close(nfrb);
822}
823
824RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
825 : NumberFormat(rhs)
826 , ruleSets(NULL)
4388f060
A
827 , ruleSetDescriptions(NULL)
828 , numRuleSets(0)
b75a7d8f
A
829 , defaultRuleSet(NULL)
830 , locale(rhs.locale)
831 , collator(NULL)
832 , decimalFormatSymbols(NULL)
833 , lenient(FALSE)
834 , lenientParseRules(NULL)
374ca955 835 , localizations(NULL)
57a6839d
A
836 , capitalizationInfoSet(FALSE)
837 , capitalizationForUIListMenu(FALSE)
838 , capitalizationForStandAlone(FALSE)
839 , capitalizationBrkIter(NULL)
b75a7d8f
A
840{
841 this->operator=(rhs);
842}
843
374ca955
A
844// --------
845
b75a7d8f
A
846RuleBasedNumberFormat&
847RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
848{
57a6839d
A
849 if (this == &rhs) {
850 return *this;
851 }
852 NumberFormat::operator=(rhs);
b75a7d8f
A
853 UErrorCode status = U_ZERO_ERROR;
854 dispose();
855 locale = rhs.locale;
374ca955
A
856 lenient = rhs.lenient;
857
b75a7d8f 858 UParseError perror;
57a6839d
A
859 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
860 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
861 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
862
863 capitalizationInfoSet = rhs.capitalizationInfoSet;
864 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
865 capitalizationForStandAlone = rhs.capitalizationForStandAlone;
866#if !UCONFIG_NO_BREAK_ITERATION
867 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
868#endif
374ca955 869
b75a7d8f
A
870 return *this;
871}
872
873RuleBasedNumberFormat::~RuleBasedNumberFormat()
874{
875 dispose();
876}
877
878Format*
879RuleBasedNumberFormat::clone(void) const
880{
57a6839d 881 return new RuleBasedNumberFormat(*this);
b75a7d8f
A
882}
883
884UBool
885RuleBasedNumberFormat::operator==(const Format& other) const
886{
887 if (this == &other) {
888 return TRUE;
889 }
890
729e4ab9 891 if (typeid(*this) == typeid(other)) {
b75a7d8f 892 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
57a6839d
A
893 // test for capitalization info equality is adequately handled
894 // by the NumberFormat test for fCapitalizationContext equality;
895 // the info here is just derived from that.
b75a7d8f 896 if (locale == rhs.locale &&
374ca955
A
897 lenient == rhs.lenient &&
898 (localizations == NULL
899 ? rhs.localizations == NULL
900 : (rhs.localizations == NULL
901 ? FALSE
902 : *localizations == rhs.localizations))) {
903
b75a7d8f
A
904 NFRuleSet** p = ruleSets;
905 NFRuleSet** q = rhs.ruleSets;
b75a7d8f 906 if (p == NULL) {
374ca955
A
907 return q == NULL;
908 } else if (q == NULL) {
b75a7d8f
A
909 return FALSE;
910 }
911 while (*p && *q && (**p == **q)) {
912 ++p;
913 ++q;
914 }
915 return *q == NULL && *p == NULL;
916 }
917 }
918
919 return FALSE;
920}
921
922UnicodeString
923RuleBasedNumberFormat::getRules() const
924{
925 UnicodeString result;
926 if (ruleSets != NULL) {
927 for (NFRuleSet** p = ruleSets; *p; ++p) {
928 (*p)->appendRules(result);
929 }
930 }
931 return result;
932}
933
934UnicodeString
935RuleBasedNumberFormat::getRuleSetName(int32_t index) const
936{
374ca955
A
937 if (localizations) {
938 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
939 return string;
940 } else if (ruleSets) {
941 UnicodeString result;
b75a7d8f
A
942 for (NFRuleSet** p = ruleSets; *p; ++p) {
943 NFRuleSet* rs = *p;
944 if (rs->isPublic()) {
945 if (--index == -1) {
946 rs->getName(result);
947 return result;
948 }
949 }
950 }
951 }
374ca955
A
952 UnicodeString empty;
953 return empty;
b75a7d8f
A
954}
955
956int32_t
957RuleBasedNumberFormat::getNumberOfRuleSetNames() const
958{
959 int32_t result = 0;
374ca955
A
960 if (localizations) {
961 result = localizations->getNumberOfRuleSets();
962 } else if (ruleSets) {
b75a7d8f
A
963 for (NFRuleSet** p = ruleSets; *p; ++p) {
964 if ((**p).isPublic()) {
965 ++result;
966 }
967 }
968 }
969 return result;
970}
971
374ca955
A
972int32_t
973RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
974 if (localizations) {
975 return localizations->getNumberOfDisplayLocales();
976 }
977 return 0;
978}
979
980Locale
981RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
982 if (U_FAILURE(status)) {
73c04bcf 983 return Locale("");
374ca955
A
984 }
985 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
986 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
987 char buffer[64];
988 int32_t cap = name.length() + 1;
989 char* bp = buffer;
990 if (cap > 64) {
991 bp = (char *)uprv_malloc(cap);
992 if (bp == NULL) {
993 status = U_MEMORY_ALLOCATION_ERROR;
73c04bcf 994 return Locale("");
374ca955
A
995 }
996 }
997 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
998 Locale retLocale(bp);
999 if (bp != buffer) {
1000 uprv_free(bp);
1001 }
1002 return retLocale;
1003 }
1004 status = U_ILLEGAL_ARGUMENT_ERROR;
1005 Locale retLocale;
1006 return retLocale;
1007}
1008
1009UnicodeString
1010RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1011 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1012 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1013 int32_t len = localeName.length();
1014 UChar* localeStr = localeName.getBuffer(len + 1);
1015 while (len >= 0) {
1016 localeStr[len] = 0;
1017 int32_t ix = localizations->indexForLocale(localeStr);
1018 if (ix >= 0) {
1019 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1020 return name;
1021 }
1022
1023 // trim trailing portion, skipping over ommitted sections
1024 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1025 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1026 }
1027 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1028 return name;
1029 }
1030 UnicodeString bogus;
1031 bogus.setToBogus();
1032 return bogus;
1033}
1034
1035UnicodeString
1036RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1037 if (localizations) {
1038 UnicodeString rsn(ruleSetName);
1039 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1040 return getRuleSetDisplayName(ix, localeParam);
1041 }
1042 UnicodeString bogus;
1043 bogus.setToBogus();
1044 return bogus;
1045}
1046
b75a7d8f
A
1047NFRuleSet*
1048RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1049{
1050 if (U_SUCCESS(status) && ruleSets) {
1051 for (NFRuleSet** p = ruleSets; *p; ++p) {
1052 NFRuleSet* rs = *p;
1053 if (rs->isNamed(name)) {
1054 return rs;
1055 }
1056 }
1057 status = U_ILLEGAL_ARGUMENT_ERROR;
1058 }
1059 return NULL;
1060}
1061
1062UnicodeString&
1063RuleBasedNumberFormat::format(int32_t number,
1064 UnicodeString& toAppendTo,
374ca955 1065 FieldPosition& /* pos */) const
b75a7d8f 1066{
57a6839d
A
1067 if (defaultRuleSet) {
1068 int32_t startPos = toAppendTo.length();
1069 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1070 adjustForCapitalizationContext(startPos, toAppendTo);
1071 }
b75a7d8f
A
1072 return toAppendTo;
1073}
1074
1075
1076UnicodeString&
1077RuleBasedNumberFormat::format(int64_t number,
1078 UnicodeString& toAppendTo,
374ca955 1079 FieldPosition& /* pos */) const
b75a7d8f 1080{
57a6839d
A
1081 if (defaultRuleSet) {
1082 int32_t startPos = toAppendTo.length();
1083 defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1084 adjustForCapitalizationContext(startPos, toAppendTo);
1085 }
b75a7d8f
A
1086 return toAppendTo;
1087}
1088
1089
1090UnicodeString&
1091RuleBasedNumberFormat::format(double number,
1092 UnicodeString& toAppendTo,
374ca955 1093 FieldPosition& /* pos */) const
b75a7d8f 1094{
57a6839d 1095 int32_t startPos = toAppendTo.length();
729e4ab9
A
1096 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1097 if (uprv_isNaN(number)) {
1098 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1099 if (decFmtSyms) {
1100 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1101 }
1102 } else if (defaultRuleSet) {
1103 defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1104 }
57a6839d 1105 return adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1106}
1107
1108
1109UnicodeString&
1110RuleBasedNumberFormat::format(int32_t number,
1111 const UnicodeString& ruleSetName,
1112 UnicodeString& toAppendTo,
374ca955 1113 FieldPosition& /* pos */,
b75a7d8f
A
1114 UErrorCode& status) const
1115{
1116 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1117 if (U_SUCCESS(status)) {
4388f060 1118 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1119 // throw new IllegalArgumentException("Can't use internal rule set");
1120 status = U_ILLEGAL_ARGUMENT_ERROR;
1121 } else {
1122 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1123 if (rs) {
57a6839d 1124 int32_t startPos = toAppendTo.length();
b75a7d8f 1125 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
57a6839d 1126 adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1127 }
1128 }
1129 }
1130 return toAppendTo;
1131}
1132
1133
1134UnicodeString&
1135RuleBasedNumberFormat::format(int64_t number,
1136 const UnicodeString& ruleSetName,
1137 UnicodeString& toAppendTo,
374ca955 1138 FieldPosition& /* pos */,
b75a7d8f
A
1139 UErrorCode& status) const
1140{
1141 if (U_SUCCESS(status)) {
4388f060 1142 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1143 // throw new IllegalArgumentException("Can't use internal rule set");
1144 status = U_ILLEGAL_ARGUMENT_ERROR;
1145 } else {
1146 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1147 if (rs) {
57a6839d 1148 int32_t startPos = toAppendTo.length();
b75a7d8f 1149 rs->format(number, toAppendTo, toAppendTo.length());
57a6839d 1150 adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1151 }
1152 }
1153 }
1154 return toAppendTo;
1155}
1156
1157
b75a7d8f
A
1158UnicodeString&
1159RuleBasedNumberFormat::format(double number,
1160 const UnicodeString& ruleSetName,
1161 UnicodeString& toAppendTo,
374ca955 1162 FieldPosition& /* pos */,
b75a7d8f
A
1163 UErrorCode& status) const
1164{
1165 if (U_SUCCESS(status)) {
4388f060 1166 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1167 // throw new IllegalArgumentException("Can't use internal rule set");
1168 status = U_ILLEGAL_ARGUMENT_ERROR;
1169 } else {
1170 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1171 if (rs) {
57a6839d 1172 int32_t startPos = toAppendTo.length();
b75a7d8f 1173 rs->format(number, toAppendTo, toAppendTo.length());
57a6839d 1174 adjustForCapitalizationContext(startPos, toAppendTo);
b75a7d8f
A
1175 }
1176 }
1177 }
1178 return toAppendTo;
1179}
1180
57a6839d
A
1181UnicodeString&
1182RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1183 UnicodeString& currentResult) const
1184{
1185#if !UCONFIG_NO_BREAK_ITERATION
1186 if (startPos==0 && currentResult.length() > 0) {
1187 // capitalize currentResult according to context
1188 UChar32 ch = currentResult.char32At(0);
1189 UErrorCode status = U_ZERO_ERROR;
1190 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1191 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1192 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1193 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1194 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1195 // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1196 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1197 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1198 }
1199 }
1200#endif
1201 return currentResult;
1202}
1203
1204
b75a7d8f
A
1205void
1206RuleBasedNumberFormat::parse(const UnicodeString& text,
1207 Formattable& result,
1208 ParsePosition& parsePosition) const
1209{
1210 if (!ruleSets) {
1211 parsePosition.setErrorIndex(0);
1212 return;
1213 }
1214
374ca955
A
1215 UnicodeString workingText(text, parsePosition.getIndex());
1216 ParsePosition workingPos(0);
1217
1218 ParsePosition high_pp(0);
b75a7d8f
A
1219 Formattable high_result;
1220
1221 for (NFRuleSet** p = ruleSets; *p; ++p) {
1222 NFRuleSet *rp = *p;
729e4ab9 1223 if (rp->isPublic() && rp->isParseable()) {
374ca955 1224 ParsePosition working_pp(0);
b75a7d8f
A
1225 Formattable working_result;
1226
729e4ab9 1227 rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient);
b75a7d8f
A
1228 if (working_pp.getIndex() > high_pp.getIndex()) {
1229 high_pp = working_pp;
1230 high_result = working_result;
1231
374ca955 1232 if (high_pp.getIndex() == workingText.length()) {
b75a7d8f
A
1233 break;
1234 }
1235 }
1236 }
1237 }
1238
46f4442e
A
1239 int32_t startIndex = parsePosition.getIndex();
1240 parsePosition.setIndex(startIndex + high_pp.getIndex());
374ca955
A
1241 if (high_pp.getIndex() > 0) {
1242 parsePosition.setErrorIndex(-1);
46f4442e
A
1243 } else {
1244 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1245 parsePosition.setErrorIndex(startIndex + errorIndex);
b75a7d8f 1246 }
b75a7d8f
A
1247 result = high_result;
1248 if (result.getType() == Formattable::kDouble) {
1249 int32_t r = (int32_t)result.getDouble();
1250 if ((double)r == result.getDouble()) {
1251 result.setLong(r);
1252 }
1253 }
1254}
1255
1256#if !UCONFIG_NO_COLLATION
1257
1258void
1259RuleBasedNumberFormat::setLenient(UBool enabled)
1260{
1261 lenient = enabled;
1262 if (!enabled && collator) {
1263 delete collator;
1264 collator = NULL;
1265 }
1266}
1267
1268#endif
1269
1270void
1271RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1272 if (U_SUCCESS(status)) {
1273 if (ruleSetName.isEmpty()) {
374ca955
A
1274 if (localizations) {
1275 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1276 defaultRuleSet = findRuleSet(name, status);
1277 } else {
b75a7d8f 1278 initDefaultRuleSet();
374ca955
A
1279 }
1280 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
b75a7d8f
A
1281 status = U_ILLEGAL_ARGUMENT_ERROR;
1282 } else {
1283 NFRuleSet* result = findRuleSet(ruleSetName, status);
1284 if (result != NULL) {
1285 defaultRuleSet = result;
1286 }
1287 }
1288 }
1289}
1290
374ca955
A
1291UnicodeString
1292RuleBasedNumberFormat::getDefaultRuleSetName() const {
1293 UnicodeString result;
1294 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1295 defaultRuleSet->getName(result);
1296 } else {
1297 result.setToBogus();
1298 }
1299 return result;
1300}
1301
b75a7d8f
A
1302void
1303RuleBasedNumberFormat::initDefaultRuleSet()
1304{
374ca955 1305 defaultRuleSet = NULL;
b75a7d8f 1306 if (!ruleSets) {
374ca955 1307 return;
b75a7d8f 1308 }
374ca955 1309
729e4ab9
A
1310 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1311 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1312 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1313
374ca955 1314 NFRuleSet**p = &ruleSets[0];
b75a7d8f 1315 while (*p) {
729e4ab9
A
1316 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1317 defaultRuleSet = *p;
1318 return;
1319 } else {
1320 ++p;
1321 }
b75a7d8f
A
1322 }
1323
1324 defaultRuleSet = *--p;
1325 if (!defaultRuleSet->isPublic()) {
1326 while (p != ruleSets) {
1327 if ((*--p)->isPublic()) {
1328 defaultRuleSet = *p;
1329 break;
1330 }
1331 }
1332 }
1333}
1334
1335
1336void
374ca955 1337RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
73c04bcf 1338 UParseError& pErr, UErrorCode& status)
b75a7d8f
A
1339{
1340 // TODO: implement UParseError
73c04bcf 1341 uprv_memset(&pErr, 0, sizeof(UParseError));
b75a7d8f
A
1342 // Note: this can leave ruleSets == NULL, so remaining code should check
1343 if (U_FAILURE(status)) {
1344 return;
1345 }
1346
374ca955
A
1347 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1348
b75a7d8f
A
1349 UnicodeString description(rules);
1350 if (!description.length()) {
1351 status = U_MEMORY_ALLOCATION_ERROR;
1352 return;
1353 }
1354
1355 // start by stripping the trailing whitespace from all the rules
1356 // (this is all the whitespace follwing each semicolon in the
1357 // description). This allows us to look for rule-set boundaries
1358 // by searching for ";%" without having to worry about whitespace
1359 // between the ; and the %
1360 stripWhitespace(description);
1361
1362 // check to see if there's a set of lenient-parse rules. If there
1363 // is, pull them out into our temporary holding place for them,
1364 // and delete them from the description before the real desciption-
1365 // parsing code sees them
4388f060 1366 int32_t lp = description.indexOf(gLenientParse, -1, 0);
b75a7d8f
A
1367 if (lp != -1) {
1368 // we've got to make sure we're not in the middle of a rule
1369 // (where "%%lenient-parse" would actually get treated as
1370 // rule text)
1371 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1372 // locate the beginning and end of the actual collation
1373 // rules (there may be whitespace between the name and
1374 // the first token in the description)
4388f060 1375 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
b75a7d8f
A
1376
1377 if (lpEnd == -1) {
1378 lpEnd = description.length() - 1;
1379 }
1380 int lpStart = lp + u_strlen(gLenientParse);
4388f060 1381 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
b75a7d8f
A
1382 ++lpStart;
1383 }
1384
1385 // copy out the lenient-parse rules and delete them
1386 // from the description
1387 lenientParseRules = new UnicodeString();
1388 /* test for NULL */
1389 if (lenientParseRules == 0) {
1390 status = U_MEMORY_ALLOCATION_ERROR;
1391 return;
1392 }
1393 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1394
1395 description.remove(lp, lpEnd + 1 - lp);
1396 }
1397 }
1398
1399 // pre-flight parsing the description and count the number of
1400 // rule sets (";%" marks the end of one rule set and the beginning
1401 // of the next)
4388f060
A
1402 numRuleSets = 0;
1403 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
b75a7d8f
A
1404 ++numRuleSets;
1405 ++p;
1406 }
1407 ++numRuleSets;
1408
1409 // our rule list is an array of the appropriate size
1410 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1411 /* test for NULL */
1412 if (ruleSets == 0) {
1413 status = U_MEMORY_ALLOCATION_ERROR;
1414 return;
1415 }
1416
1417 for (int i = 0; i <= numRuleSets; ++i) {
1418 ruleSets[i] = NULL;
1419 }
1420
1421 // divide up the descriptions into individual rule-set descriptions
1422 // and store them in a temporary array. At each step, we also
1423 // new up a rule set, but all this does is initialize its name
1424 // and remove it from its description. We can't actually parse
1425 // the rest of the descriptions and finish initializing everything
1426 // because we have to know the names and locations of all the rule
1427 // sets before we can actually set everything up
1428 if(!numRuleSets) {
46f4442e
A
1429 status = U_ILLEGAL_ARGUMENT_ERROR;
1430 return;
b75a7d8f 1431 }
4388f060
A
1432
1433 ruleSetDescriptions = new UnicodeString[numRuleSets];
b75a7d8f
A
1434 if (ruleSetDescriptions == 0) {
1435 status = U_MEMORY_ALLOCATION_ERROR;
1436 return;
1437 }
1438
1439 {
1440 int curRuleSet = 0;
1441 int32_t start = 0;
4388f060 1442 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
b75a7d8f
A
1443 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1444 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1445 if (ruleSets[curRuleSet] == 0) {
1446 status = U_MEMORY_ALLOCATION_ERROR;
4388f060 1447 return;
b75a7d8f
A
1448 }
1449 ++curRuleSet;
1450 start = p + 1;
1451 }
1452 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1453 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1454 if (ruleSets[curRuleSet] == 0) {
1455 status = U_MEMORY_ALLOCATION_ERROR;
4388f060 1456 return;
b75a7d8f
A
1457 }
1458 }
1459
1460 // now we can take note of the formatter's default rule set, which
1461 // is the last public rule set in the description (it's the last
1462 // rather than the first so that a user can create a new formatter
1463 // from an existing formatter and change its default behavior just
1464 // by appending more rule sets to the end)
374ca955
A
1465
1466 // {dlf} Initialization of a fraction rule set requires the default rule
1467 // set to be known. For purposes of initialization, this is always the
1468 // last public rule set, no matter what the localization data says.
1469 initDefaultRuleSet();
b75a7d8f
A
1470
1471 // finally, we can go back through the temporary descriptions
1472 // list and finish seting up the substructure (and we throw
1473 // away the temporary descriptions as we go)
1474 {
1475 for (int i = 0; i < numRuleSets; i++) {
1476 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1477 }
1478 }
1479
374ca955
A
1480 // Now that the rules are initialized, the 'real' default rule
1481 // set can be adjusted by the localization data.
1482
1483 // The C code keeps the localization array as is, rather than building
1484 // a separate array of the public rule set names, so we have less work
1485 // to do here-- but we still need to check the names.
1486
1487 if (localizationInfos) {
1488 // confirm the names, if any aren't in the rules, that's an error
1489 // it is ok if the rules contain public rule sets that are not in this list
1490 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1491 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1492 NFRuleSet* rs = findRuleSet(name, status);
1493 if (rs == NULL) {
1494 break; // error
1495 }
1496 if (i == 0) {
1497 defaultRuleSet = rs;
1498 }
1499 }
1500 } else {
1501 defaultRuleSet = getDefaultRuleSet();
1502 }
57a6839d
A
1503 originalDescription = rules;
1504}
1505
1506// override the NumberFormat implementation in order to
1507// lazily initialize relevant items
1508void
1509RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1510{
1511 NumberFormat::setContext(value, status);
1512 if (U_SUCCESS(status)) {
1513 if (!capitalizationInfoSet &&
1514 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1515 initCapitalizationContextInfo(locale);
1516 capitalizationInfoSet = TRUE;
1517 }
1518#if !UCONFIG_NO_BREAK_ITERATION
1519 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1520 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1521 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1522 UErrorCode status = U_ZERO_ERROR;
1523 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1524 if (U_FAILURE(status)) {
1525 delete capitalizationBrkIter;
1526 capitalizationBrkIter = NULL;
1527 }
1528 }
1529#endif
1530 }
1531}
1532
1533void
1534RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1535{
1536#if !UCONFIG_NO_BREAK_ITERATION
1537 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1538 UErrorCode status = U_ZERO_ERROR;
1539 UResourceBundle *rb = ures_open(NULL, localeID, &status);
1540 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1541 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1542 if (U_SUCCESS(status) && rb != NULL) {
1543 int32_t len = 0;
1544 const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1545 if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1546 capitalizationForUIListMenu = intVector[0];
1547 capitalizationForStandAlone = intVector[1];
1548 }
1549 }
1550 ures_close(rb);
1551#endif
b75a7d8f
A
1552}
1553
1554void
1555RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1556{
1557 // iterate through the characters...
1558 UnicodeString result;
1559
1560 int start = 0;
1561 while (start != -1 && start < description.length()) {
1562 // seek to the first non-whitespace character...
1563 while (start < description.length()
4388f060 1564 && PatternProps::isWhiteSpace(description.charAt(start))) {
b75a7d8f
A
1565 ++start;
1566 }
1567
1568 // locate the next semicolon in the text and copy the text from
1569 // our current position up to that semicolon into the result
1570 int32_t p = description.indexOf(gSemiColon, start);
1571 if (p == -1) {
1572 // or if we don't find a semicolon, just copy the rest of
1573 // the string into the result
1574 result.append(description, start, description.length() - start);
1575 start = -1;
1576 }
1577 else if (p < description.length()) {
1578 result.append(description, start, p + 1 - start);
1579 start = p + 1;
1580 }
1581
1582 // when we get here, we've seeked off the end of the sring, and
1583 // we terminate the loop (we continue until *start* is -1 rather
1584 // than until *p* is -1, because otherwise we'd miss the last
1585 // rule in the description)
1586 else {
1587 start = -1;
1588 }
1589 }
1590
1591 description.setTo(result);
1592}
1593
1594
1595void
1596RuleBasedNumberFormat::dispose()
1597{
1598 if (ruleSets) {
1599 for (NFRuleSet** p = ruleSets; *p; ++p) {
1600 delete *p;
1601 }
1602 uprv_free(ruleSets);
1603 ruleSets = NULL;
1604 }
1605
4388f060
A
1606 if (ruleSetDescriptions) {
1607 delete [] ruleSetDescriptions;
1608 }
1609
b75a7d8f
A
1610#if !UCONFIG_NO_COLLATION
1611 delete collator;
1612#endif
1613 collator = NULL;
1614
1615 delete decimalFormatSymbols;
1616 decimalFormatSymbols = NULL;
1617
1618 delete lenientParseRules;
1619 lenientParseRules = NULL;
374ca955 1620
57a6839d
A
1621#if !UCONFIG_NO_BREAK_ITERATION
1622 delete capitalizationBrkIter;
1623 capitalizationBrkIter = NULL;
1624#endif
1625
374ca955 1626 if (localizations) localizations = localizations->unref();
b75a7d8f
A
1627}
1628
1629
1630//-----------------------------------------------------------------------
1631// package-internal API
1632//-----------------------------------------------------------------------
1633
1634/**
1635 * Returns the collator to use for lenient parsing. The collator is lazily created:
1636 * this function creates it the first time it's called.
1637 * @return The collator to use for lenient parsing, or null if lenient parsing
1638 * is turned off.
1639*/
57a6839d 1640const RuleBasedCollator*
b75a7d8f
A
1641RuleBasedNumberFormat::getCollator() const
1642{
1643#if !UCONFIG_NO_COLLATION
1644 if (!ruleSets) {
1645 return NULL;
1646 }
1647
57a6839d 1648 // lazy-evaluate the collator
b75a7d8f
A
1649 if (collator == NULL && lenient) {
1650 // create a default collator based on the formatter's locale,
1651 // then pull out that collator's rules, append any additional
1652 // rules specified in the description, and create a _new_
1653 // collator based on the combinaiton of those rules
1654
1655 UErrorCode status = U_ZERO_ERROR;
1656
1657 Collator* temp = Collator::createInstance(locale, status);
729e4ab9
A
1658 RuleBasedCollator* newCollator;
1659 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
b75a7d8f
A
1660 if (lenientParseRules) {
1661 UnicodeString rules(newCollator->getRules());
1662 rules.append(*lenientParseRules);
1663
1664 newCollator = new RuleBasedCollator(rules, status);
46f4442e
A
1665 // Exit if newCollator could not be created.
1666 if (newCollator == NULL) {
57a6839d 1667 return NULL;
46f4442e 1668 }
b75a7d8f
A
1669 } else {
1670 temp = NULL;
1671 }
1672 if (U_SUCCESS(status)) {
1673 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1674 // cast away const
1675 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1676 } else {
1677 delete newCollator;
1678 }
1679 }
1680 delete temp;
1681 }
1682#endif
1683
1684 // if lenient-parse mode is off, this will be null
1685 // (see setLenientParseMode())
1686 return collator;
1687}
1688
1689
1690/**
1691 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1692 * instances owned by this formatter. This object is lazily created: this function
1693 * creates it the first time it's called.
1694 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1695 * instances owned by this formatter.
1696*/
1697DecimalFormatSymbols*
1698RuleBasedNumberFormat::getDecimalFormatSymbols() const
1699{
1700 // lazy-evaluate the DecimalFormatSymbols object. This object
1701 // is shared by all DecimalFormat instances belonging to this
1702 // formatter
1703 if (decimalFormatSymbols == NULL) {
1704 UErrorCode status = U_ZERO_ERROR;
1705 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1706 if (U_SUCCESS(status)) {
1707 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1708 } else {
1709 delete temp;
1710 }
1711 }
1712 return decimalFormatSymbols;
1713}
1714
4388f060
A
1715// De-owning the current localized symbols and adopt the new symbols.
1716void
1717RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1718{
1719 if (symbolsToAdopt == NULL) {
1720 return; // do not allow caller to set decimalFormatSymbols to NULL
1721 }
1722
1723 if (decimalFormatSymbols != NULL) {
1724 delete decimalFormatSymbols;
1725 }
1726
1727 decimalFormatSymbols = symbolsToAdopt;
1728
1729 {
1730 // Apply the new decimalFormatSymbols by reparsing the rulesets
1731 UErrorCode status = U_ZERO_ERROR;
1732
1733 for (int32_t i = 0; i < numRuleSets; i++) {
1734 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1735 }
1736 }
1737}
1738
1739// Setting the symbols is equlivalent to adopting a newly created localized symbols.
1740void
1741RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1742{
1743 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1744}
1745
374ca955
A
1746U_NAMESPACE_END
1747
b75a7d8f
A
1748/* U_HAVE_RBNF */
1749#endif