]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/rbnf.cpp
ICU-511.32.tar.gz
[apple/icu.git] / icuSources / i18n / rbnf.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
4388f060 3* Copyright (C) 1997-2012, International Business Machines Corporation
73c04bcf 4* and others. All Rights Reserved.
b75a7d8f
A
5*******************************************************************************
6*/
7
51004dcb 8#include "utypeinfo.h" // for 'typeid' to work
729e4ab9 9
b75a7d8f
A
10#include "unicode/rbnf.h"
11
12#if U_HAVE_RBNF
13
14#include "unicode/normlzr.h"
15#include "unicode/tblcoll.h"
16#include "unicode/uchar.h"
17#include "unicode/ucol.h"
18#include "unicode/uloc.h"
19#include "unicode/unum.h"
20#include "unicode/ures.h"
21#include "unicode/ustring.h"
22#include "unicode/utf16.h"
374ca955 23#include "unicode/udata.h"
b75a7d8f
A
24#include "nfrs.h"
25
26#include "cmemory.h"
27#include "cstring.h"
4388f060 28#include "patternprops.h"
729e4ab9 29#include "uresimp.h"
374ca955
A
30
31// debugging
32// #define DEBUG
33
34#ifdef DEBUG
35#include "stdio.h"
36#endif
37
38#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
b75a7d8f
A
39
40static const UChar gPercentPercent[] =
41{
42 0x25, 0x25, 0
43}; /* "%%" */
44
45// All urbnf objects are created through openRules, so we init all of the
46// Unicode string constants required by rbnf, nfrs, or nfr here.
47static const UChar gLenientParse[] =
48{
49 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
50}; /* "%%lenient-parse:" */
51static const UChar gSemiColon = 0x003B;
52static const UChar gSemiPercent[] =
53{
54 0x3B, 0x25, 0
55}; /* ";%" */
56
57#define kSomeNumberOfBitsDiv2 22
58#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
59#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
60
374ca955
A
61U_NAMESPACE_BEGIN
62
63UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
64
73c04bcf
A
65/*
66This is a utility class. It does not use ICU's RTTI.
67If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
68Please make sure that intltest passes on Windows in Release mode,
69since the string pooling per compilation unit will mess up how RTTI works.
70The RTTI code was also removed due to lack of code coverage.
71*/
72class LocalizationInfo : public UMemory {
374ca955 73protected:
4388f060 74 virtual ~LocalizationInfo();
374ca955
A
75 uint32_t refcount;
76
77public:
78 LocalizationInfo() : refcount(0) {}
79
80 LocalizationInfo* ref(void) {
81 ++refcount;
82 return this;
83 }
84
85 LocalizationInfo* unref(void) {
86 if (refcount && --refcount == 0) {
87 delete this;
88 }
89 return NULL;
90 }
91
92 virtual UBool operator==(const LocalizationInfo* rhs) const;
93 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
94
95 virtual int32_t getNumberOfRuleSets(void) const = 0;
96 virtual const UChar* getRuleSetName(int32_t index) const = 0;
97 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
98 virtual const UChar* getLocaleName(int32_t index) const = 0;
99 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
100
101 virtual int32_t indexForLocale(const UChar* locale) const;
102 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
103
73c04bcf
A
104// virtual UClassID getDynamicClassID() const = 0;
105// static UClassID getStaticClassID(void);
374ca955
A
106};
107
4388f060
A
108LocalizationInfo::~LocalizationInfo() {}
109
73c04bcf 110//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
374ca955
A
111
112// if both strings are NULL, this returns TRUE
113static UBool
114streq(const UChar* lhs, const UChar* rhs) {
115 if (rhs == lhs) {
116 return TRUE;
117 }
118 if (lhs && rhs) {
119 return u_strcmp(lhs, rhs) == 0;
120 }
121 return FALSE;
122}
123
124UBool
125LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
126 if (rhs) {
127 if (this == rhs) {
128 return TRUE;
129 }
130
131 int32_t rsc = getNumberOfRuleSets();
132 if (rsc == rhs->getNumberOfRuleSets()) {
133 for (int i = 0; i < rsc; ++i) {
134 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
135 return FALSE;
136 }
137 }
138 int32_t dlc = getNumberOfDisplayLocales();
139 if (dlc == rhs->getNumberOfDisplayLocales()) {
140 for (int i = 0; i < dlc; ++i) {
141 const UChar* locale = getLocaleName(i);
142 int32_t ix = rhs->indexForLocale(locale);
143 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
144 if (!streq(locale, rhs->getLocaleName(ix))) {
145 return FALSE;
146 }
147 for (int j = 0; j < rsc; ++j) {
148 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
149 return FALSE;
150 }
151 }
152 }
153 return TRUE;
154 }
155 }
156 }
157 return FALSE;
158}
159
160int32_t
161LocalizationInfo::indexForLocale(const UChar* locale) const {
162 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
163 if (streq(locale, getLocaleName(i))) {
164 return i;
165 }
166 }
167 return -1;
168}
169
170int32_t
171LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
172 if (ruleset) {
173 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
174 if (streq(ruleset, getRuleSetName(i))) {
175 return i;
176 }
177 }
178 }
179 return -1;
180}
181
182
183typedef void (*Fn_Deleter)(void*);
184
185class VArray {
186 void** buf;
187 int32_t cap;
188 int32_t size;
189 Fn_Deleter deleter;
190public:
191 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
192
193 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
194
195 ~VArray() {
196 if (deleter) {
197 for (int i = 0; i < size; ++i) {
198 (*deleter)(buf[i]);
199 }
200 }
201 uprv_free(buf);
202 }
203
204 int32_t length() {
205 return size;
206 }
207
208 void add(void* elem, UErrorCode& status) {
209 if (U_SUCCESS(status)) {
210 if (size == cap) {
211 if (cap == 0) {
212 cap = 1;
213 } else if (cap < 256) {
214 cap *= 2;
215 } else {
216 cap += 256;
217 }
218 if (buf == NULL) {
219 buf = (void**)uprv_malloc(cap * sizeof(void*));
220 } else {
221 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
222 }
223 if (buf == NULL) {
224 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
225 status = U_MEMORY_ALLOCATION_ERROR;
226 return;
227 }
228 void* start = &buf[size];
229 size_t count = (cap - size) * sizeof(void*);
230 uprv_memset(start, 0, count); // fill with nulls, just because
231 }
232 buf[size++] = elem;
233 }
234 }
235
236 void** release(void) {
237 void** result = buf;
238 buf = NULL;
239 cap = 0;
240 size = 0;
241 return result;
242 }
243};
244
245class LocDataParser;
246
247class StringLocalizationInfo : public LocalizationInfo {
248 UChar* info;
249 UChar*** data;
250 int32_t numRuleSets;
251 int32_t numLocales;
252
253friend class LocDataParser;
254
255 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
256 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
257 {
258 }
259
260public:
261 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
262
263 virtual ~StringLocalizationInfo();
264 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
265 virtual const UChar* getRuleSetName(int32_t index) const;
266 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
267 virtual const UChar* getLocaleName(int32_t index) const;
268 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
269
73c04bcf
A
270// virtual UClassID getDynamicClassID() const;
271// static UClassID getStaticClassID(void);
374ca955
A
272
273private:
274 void init(UErrorCode& status) const;
275};
276
277
278enum {
279 OPEN_ANGLE = 0x003c, /* '<' */
280 CLOSE_ANGLE = 0x003e, /* '>' */
281 COMMA = 0x002c,
282 TICK = 0x0027,
283 QUOTE = 0x0022,
284 SPACE = 0x0020
285};
286
287/**
288 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
289 */
290class LocDataParser {
291 UChar* data;
292 const UChar* e;
293 UChar* p;
294 UChar ch;
295 UParseError& pe;
296 UErrorCode& ec;
297
298public:
299 LocDataParser(UParseError& parseError, UErrorCode& status)
300 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
301 ~LocDataParser() {}
302
303 /*
304 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
305 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
306 */
307 StringLocalizationInfo* parse(UChar* data, int32_t len);
308
309private:
310
311 void inc(void) { ++p; ch = 0xffff; }
312 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
313 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
4388f060 314 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
374ca955 315 UBool inList(UChar c, const UChar* list) const {
4388f060 316 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
374ca955
A
317 while (*list && *list != c) ++list; return *list == c;
318 }
319 void parseError(const char* msg);
320
321 StringLocalizationInfo* doParse(void);
322
323 UChar** nextArray(int32_t& requiredLength);
324 UChar* nextString(void);
325};
326
327#ifdef DEBUG
328#define ERROR(msg) parseError(msg); return NULL;
329#else
330#define ERROR(msg) parseError(NULL); return NULL;
331#endif
332
333
334static const UChar DQUOTE_STOPLIST[] = {
335 QUOTE, 0
336};
337
338static const UChar SQUOTE_STOPLIST[] = {
339 TICK, 0
340};
341
342static const UChar NOQUOTE_STOPLIST[] = {
343 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
344};
345
346static void
347DeleteFn(void* p) {
348 uprv_free(p);
349}
350
351StringLocalizationInfo*
352LocDataParser::parse(UChar* _data, int32_t len) {
353 if (U_FAILURE(ec)) {
354 if (_data) uprv_free(_data);
355 return NULL;
356 }
357
358 pe.line = 0;
359 pe.offset = -1;
360 pe.postContext[0] = 0;
361 pe.preContext[0] = 0;
362
363 if (_data == NULL) {
364 ec = U_ILLEGAL_ARGUMENT_ERROR;
365 return NULL;
366 }
367
368 if (len <= 0) {
369 ec = U_ILLEGAL_ARGUMENT_ERROR;
370 uprv_free(_data);
371 return NULL;
372 }
373
374 data = _data;
375 e = data + len;
376 p = _data;
377 ch = 0xffff;
378
379 return doParse();
380}
381
382
383StringLocalizationInfo*
384LocDataParser::doParse(void) {
385 skipWhitespace();
386 if (!checkInc(OPEN_ANGLE)) {
387 ERROR("Missing open angle");
388 } else {
389 VArray array(DeleteFn);
390 UBool mightHaveNext = TRUE;
391 int32_t requiredLength = -1;
392 while (mightHaveNext) {
393 mightHaveNext = FALSE;
394 UChar** elem = nextArray(requiredLength);
395 skipWhitespace();
396 UBool haveComma = check(COMMA);
397 if (elem) {
398 array.add(elem, ec);
399 if (haveComma) {
400 inc();
401 mightHaveNext = TRUE;
402 }
403 } else if (haveComma) {
404 ERROR("Unexpected character");
405 }
406 }
407
408 skipWhitespace();
409 if (!checkInc(CLOSE_ANGLE)) {
410 if (check(OPEN_ANGLE)) {
411 ERROR("Missing comma in outer array");
412 } else {
413 ERROR("Missing close angle bracket in outer array");
414 }
415 }
416
417 skipWhitespace();
418 if (p != e) {
419 ERROR("Extra text after close of localization data");
420 }
421
422 array.add(NULL, ec);
423 if (U_SUCCESS(ec)) {
424 int32_t numLocs = array.length() - 2; // subtract first, NULL
425 UChar*** result = (UChar***)array.release();
426
427 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
428 }
429 }
430
431 ERROR("Unknown error");
432}
433
434UChar**
435LocDataParser::nextArray(int32_t& requiredLength) {
436 if (U_FAILURE(ec)) {
437 return NULL;
438 }
439
440 skipWhitespace();
441 if (!checkInc(OPEN_ANGLE)) {
442 ERROR("Missing open angle");
443 }
444
445 VArray array;
446 UBool mightHaveNext = TRUE;
447 while (mightHaveNext) {
448 mightHaveNext = FALSE;
449 UChar* elem = nextString();
450 skipWhitespace();
451 UBool haveComma = check(COMMA);
452 if (elem) {
453 array.add(elem, ec);
454 if (haveComma) {
455 inc();
456 mightHaveNext = TRUE;
457 }
458 } else if (haveComma) {
459 ERROR("Unexpected comma");
460 }
461 }
462 skipWhitespace();
463 if (!checkInc(CLOSE_ANGLE)) {
464 if (check(OPEN_ANGLE)) {
465 ERROR("Missing close angle bracket in inner array");
466 } else {
467 ERROR("Missing comma in inner array");
468 }
469 }
470
471 array.add(NULL, ec);
472 if (U_SUCCESS(ec)) {
473 if (requiredLength == -1) {
474 requiredLength = array.length() + 1;
475 } else if (array.length() != requiredLength) {
476 ec = U_ILLEGAL_ARGUMENT_ERROR;
477 ERROR("Array not of required length");
478 }
479
480 return (UChar**)array.release();
481 }
482 ERROR("Unknown Error");
483}
484
485UChar*
486LocDataParser::nextString() {
487 UChar* result = NULL;
488
489 skipWhitespace();
490 if (p < e) {
491 const UChar* terminators;
492 UChar c = *p;
493 UBool haveQuote = c == QUOTE || c == TICK;
494 if (haveQuote) {
495 inc();
496 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
497 } else {
498 terminators = NOQUOTE_STOPLIST;
499 }
500 UChar* start = p;
501 while (p < e && !inList(*p, terminators)) ++p;
502 if (p == e) {
503 ERROR("Unexpected end of data");
504 }
505
506 UChar x = *p;
507 if (p > start) {
508 ch = x;
509 *p = 0x0; // terminate by writing to data
510 result = start; // just point into data
511 }
512 if (haveQuote) {
513 if (x != c) {
514 ERROR("Missing matching quote");
515 } else if (p == start) {
516 ERROR("Empty string");
517 }
518 inc();
519 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
520 ERROR("Unexpected character in string");
521 }
522 }
523
524 // ok for there to be no next string
525 return result;
526}
527
528void
529LocDataParser::parseError(const char* /*str*/) {
530 if (!data) {
531 return;
532 }
b75a7d8f 533
374ca955 534 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
73c04bcf 535 if (start < data) {
374ca955 536 start = data;
73c04bcf
A
537 }
538 for (UChar* x = p; --x >= start;) {
374ca955
A
539 if (!*x) {
540 start = x+1;
541 break;
542 }
73c04bcf 543 }
374ca955 544 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
73c04bcf 545 if (limit > e) {
374ca955 546 limit = e;
73c04bcf
A
547 }
548 u_strncpy(pe.preContext, start, (int32_t)(p-start));
374ca955 549 pe.preContext[p-start] = 0;
73c04bcf 550 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
374ca955 551 pe.postContext[limit-p] = 0;
73c04bcf 552 pe.offset = (int32_t)(p - data);
374ca955
A
553
554#ifdef DEBUG
555 fprintf(stderr, "%s at or near character %d: ", str, p-data);
556
557 UnicodeString msg;
558 msg.append(start, p - start);
559 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
560 msg.append(p, limit-p);
561 msg.append("'");
562
563 char buf[128];
564 int32_t len = msg.extract(0, msg.length(), buf, 128);
565 if (len >= 128) {
566 buf[127] = 0;
567 } else {
568 buf[len] = 0;
569 }
570 fprintf(stderr, "%s\n", buf);
571 fflush(stderr);
572#endif
573
574 uprv_free(data);
575 data = NULL;
576 p = NULL;
577 e = NULL;
578
579 if (U_SUCCESS(ec)) {
580 ec = U_PARSE_ERROR;
581 }
582}
583
73c04bcf 584//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
374ca955
A
585
586StringLocalizationInfo*
587StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
588 if (U_FAILURE(status)) {
589 return NULL;
590 }
591
592 int32_t len = info.length();
593 if (len == 0) {
594 return NULL; // no error;
595 }
596
597 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
598 if (!p) {
599 status = U_MEMORY_ALLOCATION_ERROR;
600 return NULL;
601 }
602 info.extract(p, len, status);
603 if (!U_FAILURE(status)) {
604 status = U_ZERO_ERROR; // clear warning about non-termination
605 }
606
607 LocDataParser parser(perror, status);
608 return parser.parse(p, len);
609}
610
611StringLocalizationInfo::~StringLocalizationInfo() {
612 for (UChar*** p = (UChar***)data; *p; ++p) {
613 // remaining data is simply pointer into our unicode string data.
614 if (*p) uprv_free(*p);
615 }
616 if (data) uprv_free(data);
617 if (info) uprv_free(info);
618}
619
620
621const UChar*
622StringLocalizationInfo::getRuleSetName(int32_t index) const {
623 if (index >= 0 && index < getNumberOfRuleSets()) {
624 return data[0][index];
625 }
626 return NULL;
627}
628
629const UChar*
630StringLocalizationInfo::getLocaleName(int32_t index) const {
631 if (index >= 0 && index < getNumberOfDisplayLocales()) {
632 return data[index+1][0];
633 }
634 return NULL;
635}
636
637const UChar*
638StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
639 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
640 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
641 return data[localeIndex+1][ruleIndex+1];
642 }
643 return NULL;
644}
645
646// ----------
647
648RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
649 const UnicodeString& locs,
650 const Locale& alocale, UParseError& perror, UErrorCode& status)
b75a7d8f 651 : ruleSets(NULL)
4388f060
A
652 , ruleSetDescriptions(NULL)
653 , numRuleSets(0)
b75a7d8f
A
654 , defaultRuleSet(NULL)
655 , locale(alocale)
656 , collator(NULL)
657 , decimalFormatSymbols(NULL)
658 , lenient(FALSE)
659 , lenientParseRules(NULL)
374ca955
A
660 , localizations(NULL)
661{
662 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
663 init(description, locinfo, perror, status);
664}
665
666RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
667 const UnicodeString& locs,
668 UParseError& perror, UErrorCode& status)
669 : ruleSets(NULL)
4388f060
A
670 , ruleSetDescriptions(NULL)
671 , numRuleSets(0)
374ca955
A
672 , defaultRuleSet(NULL)
673 , locale(Locale::getDefault())
674 , collator(NULL)
675 , decimalFormatSymbols(NULL)
676 , lenient(FALSE)
677 , lenientParseRules(NULL)
678 , localizations(NULL)
679{
680 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
681 init(description, locinfo, perror, status);
682}
683
684RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
685 LocalizationInfo* info,
686 const Locale& alocale, UParseError& perror, UErrorCode& status)
687 : ruleSets(NULL)
4388f060
A
688 , ruleSetDescriptions(NULL)
689 , numRuleSets(0)
374ca955
A
690 , defaultRuleSet(NULL)
691 , locale(alocale)
692 , collator(NULL)
693 , decimalFormatSymbols(NULL)
694 , lenient(FALSE)
695 , lenientParseRules(NULL)
696 , localizations(NULL)
697{
698 init(description, info, perror, status);
699}
700
701RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
702 UParseError& perror,
703 UErrorCode& status)
704 : ruleSets(NULL)
4388f060
A
705 , ruleSetDescriptions(NULL)
706 , numRuleSets(0)
374ca955
A
707 , defaultRuleSet(NULL)
708 , locale(Locale::getDefault())
709 , collator(NULL)
710 , decimalFormatSymbols(NULL)
711 , lenient(FALSE)
712 , lenientParseRules(NULL)
713 , localizations(NULL)
714{
715 init(description, NULL, perror, status);
716}
717
718RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
719 const Locale& aLocale,
720 UParseError& perror,
721 UErrorCode& status)
722 : ruleSets(NULL)
4388f060
A
723 , ruleSetDescriptions(NULL)
724 , numRuleSets(0)
374ca955
A
725 , defaultRuleSet(NULL)
726 , locale(aLocale)
727 , collator(NULL)
728 , decimalFormatSymbols(NULL)
729 , lenient(FALSE)
730 , lenientParseRules(NULL)
731 , localizations(NULL)
b75a7d8f 732{
374ca955 733 init(description, NULL, perror, status);
b75a7d8f
A
734}
735
736RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
737 : ruleSets(NULL)
4388f060
A
738 , ruleSetDescriptions(NULL)
739 , numRuleSets(0)
b75a7d8f
A
740 , defaultRuleSet(NULL)
741 , locale(alocale)
742 , collator(NULL)
743 , decimalFormatSymbols(NULL)
744 , lenient(FALSE)
745 , lenientParseRules(NULL)
374ca955 746 , localizations(NULL)
b75a7d8f
A
747{
748 if (U_FAILURE(status)) {
749 return;
750 }
751
729e4ab9 752 const char* rules_tag = "RBNFRules";
b75a7d8f
A
753 const char* fmt_tag = "";
754 switch (tag) {
755 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
756 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
757 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
729e4ab9 758 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
b75a7d8f
A
759 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
760 }
761
374ca955
A
762 // TODO: read localization info from resource
763 LocalizationInfo* locinfo = NULL;
764
374ca955 765 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
b75a7d8f 766 if (U_SUCCESS(status)) {
374ca955
A
767 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
768 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
729e4ab9
A
769
770 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
771 if (U_FAILURE(status)) {
772 ures_close(nfrb);
773 }
774 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
775 if (U_FAILURE(status)) {
776 ures_close(rbnfRules);
777 ures_close(nfrb);
778 return;
779 }
4388f060 780
729e4ab9
A
781 UnicodeString desc;
782 while (ures_hasNext(ruleSets)) {
4388f060 783 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
729e4ab9 784 }
b75a7d8f 785 UParseError perror;
729e4ab9 786
374ca955 787 init (desc, locinfo, perror, status);
729e4ab9 788
729e4ab9
A
789 ures_close(ruleSets);
790 ures_close(rbnfRules);
b75a7d8f 791 }
b75a7d8f
A
792 ures_close(nfrb);
793}
794
795RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
796 : NumberFormat(rhs)
797 , ruleSets(NULL)
4388f060
A
798 , ruleSetDescriptions(NULL)
799 , numRuleSets(0)
b75a7d8f
A
800 , defaultRuleSet(NULL)
801 , locale(rhs.locale)
802 , collator(NULL)
803 , decimalFormatSymbols(NULL)
804 , lenient(FALSE)
805 , lenientParseRules(NULL)
374ca955 806 , localizations(NULL)
b75a7d8f
A
807{
808 this->operator=(rhs);
809}
810
374ca955
A
811// --------
812
b75a7d8f
A
813RuleBasedNumberFormat&
814RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
815{
816 UErrorCode status = U_ZERO_ERROR;
817 dispose();
818 locale = rhs.locale;
374ca955
A
819 lenient = rhs.lenient;
820
b75a7d8f
A
821 UnicodeString rules = rhs.getRules();
822 UParseError perror;
374ca955
A
823 init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
824
b75a7d8f
A
825 return *this;
826}
827
828RuleBasedNumberFormat::~RuleBasedNumberFormat()
829{
830 dispose();
831}
832
833Format*
834RuleBasedNumberFormat::clone(void) const
835{
836 RuleBasedNumberFormat * result = NULL;
837 UnicodeString rules = getRules();
838 UErrorCode status = U_ZERO_ERROR;
839 UParseError perror;
374ca955 840 result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
b75a7d8f
A
841 /* test for NULL */
842 if (result == 0) {
843 status = U_MEMORY_ALLOCATION_ERROR;
844 return 0;
845 }
846 if (U_FAILURE(status)) {
847 delete result;
848 result = 0;
849 } else {
850 result->lenient = lenient;
851 }
852 return result;
853}
854
855UBool
856RuleBasedNumberFormat::operator==(const Format& other) const
857{
858 if (this == &other) {
859 return TRUE;
860 }
861
729e4ab9 862 if (typeid(*this) == typeid(other)) {
b75a7d8f
A
863 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
864 if (locale == rhs.locale &&
374ca955
A
865 lenient == rhs.lenient &&
866 (localizations == NULL
867 ? rhs.localizations == NULL
868 : (rhs.localizations == NULL
869 ? FALSE
870 : *localizations == rhs.localizations))) {
871
b75a7d8f
A
872 NFRuleSet** p = ruleSets;
873 NFRuleSet** q = rhs.ruleSets;
b75a7d8f 874 if (p == NULL) {
374ca955
A
875 return q == NULL;
876 } else if (q == NULL) {
b75a7d8f
A
877 return FALSE;
878 }
879 while (*p && *q && (**p == **q)) {
880 ++p;
881 ++q;
882 }
883 return *q == NULL && *p == NULL;
884 }
885 }
886
887 return FALSE;
888}
889
890UnicodeString
891RuleBasedNumberFormat::getRules() const
892{
893 UnicodeString result;
894 if (ruleSets != NULL) {
895 for (NFRuleSet** p = ruleSets; *p; ++p) {
896 (*p)->appendRules(result);
897 }
898 }
899 return result;
900}
901
902UnicodeString
903RuleBasedNumberFormat::getRuleSetName(int32_t index) const
904{
374ca955
A
905 if (localizations) {
906 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
907 return string;
908 } else if (ruleSets) {
909 UnicodeString result;
b75a7d8f
A
910 for (NFRuleSet** p = ruleSets; *p; ++p) {
911 NFRuleSet* rs = *p;
912 if (rs->isPublic()) {
913 if (--index == -1) {
914 rs->getName(result);
915 return result;
916 }
917 }
918 }
919 }
374ca955
A
920 UnicodeString empty;
921 return empty;
b75a7d8f
A
922}
923
924int32_t
925RuleBasedNumberFormat::getNumberOfRuleSetNames() const
926{
927 int32_t result = 0;
374ca955
A
928 if (localizations) {
929 result = localizations->getNumberOfRuleSets();
930 } else if (ruleSets) {
b75a7d8f
A
931 for (NFRuleSet** p = ruleSets; *p; ++p) {
932 if ((**p).isPublic()) {
933 ++result;
934 }
935 }
936 }
937 return result;
938}
939
374ca955
A
940int32_t
941RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
942 if (localizations) {
943 return localizations->getNumberOfDisplayLocales();
944 }
945 return 0;
946}
947
948Locale
949RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
950 if (U_FAILURE(status)) {
73c04bcf 951 return Locale("");
374ca955
A
952 }
953 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
954 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
955 char buffer[64];
956 int32_t cap = name.length() + 1;
957 char* bp = buffer;
958 if (cap > 64) {
959 bp = (char *)uprv_malloc(cap);
960 if (bp == NULL) {
961 status = U_MEMORY_ALLOCATION_ERROR;
73c04bcf 962 return Locale("");
374ca955
A
963 }
964 }
965 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
966 Locale retLocale(bp);
967 if (bp != buffer) {
968 uprv_free(bp);
969 }
970 return retLocale;
971 }
972 status = U_ILLEGAL_ARGUMENT_ERROR;
973 Locale retLocale;
974 return retLocale;
975}
976
977UnicodeString
978RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
979 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
980 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
981 int32_t len = localeName.length();
982 UChar* localeStr = localeName.getBuffer(len + 1);
983 while (len >= 0) {
984 localeStr[len] = 0;
985 int32_t ix = localizations->indexForLocale(localeStr);
986 if (ix >= 0) {
987 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
988 return name;
989 }
990
991 // trim trailing portion, skipping over ommitted sections
992 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
993 while (len > 0 && localeStr[len-1] == 0x005F) --len;
994 }
995 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
996 return name;
997 }
998 UnicodeString bogus;
999 bogus.setToBogus();
1000 return bogus;
1001}
1002
1003UnicodeString
1004RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1005 if (localizations) {
1006 UnicodeString rsn(ruleSetName);
1007 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1008 return getRuleSetDisplayName(ix, localeParam);
1009 }
1010 UnicodeString bogus;
1011 bogus.setToBogus();
1012 return bogus;
1013}
1014
b75a7d8f
A
1015NFRuleSet*
1016RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1017{
1018 if (U_SUCCESS(status) && ruleSets) {
1019 for (NFRuleSet** p = ruleSets; *p; ++p) {
1020 NFRuleSet* rs = *p;
1021 if (rs->isNamed(name)) {
1022 return rs;
1023 }
1024 }
1025 status = U_ILLEGAL_ARGUMENT_ERROR;
1026 }
1027 return NULL;
1028}
1029
1030UnicodeString&
1031RuleBasedNumberFormat::format(int32_t number,
1032 UnicodeString& toAppendTo,
374ca955 1033 FieldPosition& /* pos */) const
b75a7d8f
A
1034{
1035 if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1036 return toAppendTo;
1037}
1038
1039
1040UnicodeString&
1041RuleBasedNumberFormat::format(int64_t number,
1042 UnicodeString& toAppendTo,
374ca955 1043 FieldPosition& /* pos */) const
b75a7d8f
A
1044{
1045 if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1046 return toAppendTo;
1047}
1048
1049
1050UnicodeString&
1051RuleBasedNumberFormat::format(double number,
1052 UnicodeString& toAppendTo,
374ca955 1053 FieldPosition& /* pos */) const
b75a7d8f 1054{
729e4ab9
A
1055 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1056 if (uprv_isNaN(number)) {
1057 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1058 if (decFmtSyms) {
1059 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1060 }
1061 } else if (defaultRuleSet) {
1062 defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1063 }
b75a7d8f
A
1064 return toAppendTo;
1065}
1066
1067
1068UnicodeString&
1069RuleBasedNumberFormat::format(int32_t number,
1070 const UnicodeString& ruleSetName,
1071 UnicodeString& toAppendTo,
374ca955 1072 FieldPosition& /* pos */,
b75a7d8f
A
1073 UErrorCode& status) const
1074{
1075 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1076 if (U_SUCCESS(status)) {
4388f060 1077 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1078 // throw new IllegalArgumentException("Can't use internal rule set");
1079 status = U_ILLEGAL_ARGUMENT_ERROR;
1080 } else {
1081 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1082 if (rs) {
1083 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1084 }
1085 }
1086 }
1087 return toAppendTo;
1088}
1089
1090
1091UnicodeString&
1092RuleBasedNumberFormat::format(int64_t number,
1093 const UnicodeString& ruleSetName,
1094 UnicodeString& toAppendTo,
374ca955 1095 FieldPosition& /* pos */,
b75a7d8f
A
1096 UErrorCode& status) const
1097{
1098 if (U_SUCCESS(status)) {
4388f060 1099 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1100 // throw new IllegalArgumentException("Can't use internal rule set");
1101 status = U_ILLEGAL_ARGUMENT_ERROR;
1102 } else {
1103 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1104 if (rs) {
1105 rs->format(number, toAppendTo, toAppendTo.length());
1106 }
1107 }
1108 }
1109 return toAppendTo;
1110}
1111
1112
1113// make linker happy
1114UnicodeString&
1115RuleBasedNumberFormat::format(const Formattable& obj,
1116 UnicodeString& toAppendTo,
1117 FieldPosition& pos,
1118 UErrorCode& status) const
1119{
1120 return NumberFormat::format(obj, toAppendTo, pos, status);
1121}
1122
1123UnicodeString&
1124RuleBasedNumberFormat::format(double number,
1125 const UnicodeString& ruleSetName,
1126 UnicodeString& toAppendTo,
374ca955 1127 FieldPosition& /* pos */,
b75a7d8f
A
1128 UErrorCode& status) const
1129{
1130 if (U_SUCCESS(status)) {
4388f060 1131 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
b75a7d8f
A
1132 // throw new IllegalArgumentException("Can't use internal rule set");
1133 status = U_ILLEGAL_ARGUMENT_ERROR;
1134 } else {
1135 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1136 if (rs) {
1137 rs->format(number, toAppendTo, toAppendTo.length());
1138 }
1139 }
1140 }
1141 return toAppendTo;
1142}
1143
1144void
1145RuleBasedNumberFormat::parse(const UnicodeString& text,
1146 Formattable& result,
1147 ParsePosition& parsePosition) const
1148{
1149 if (!ruleSets) {
1150 parsePosition.setErrorIndex(0);
1151 return;
1152 }
1153
374ca955
A
1154 UnicodeString workingText(text, parsePosition.getIndex());
1155 ParsePosition workingPos(0);
1156
1157 ParsePosition high_pp(0);
b75a7d8f
A
1158 Formattable high_result;
1159
1160 for (NFRuleSet** p = ruleSets; *p; ++p) {
1161 NFRuleSet *rp = *p;
729e4ab9 1162 if (rp->isPublic() && rp->isParseable()) {
374ca955 1163 ParsePosition working_pp(0);
b75a7d8f
A
1164 Formattable working_result;
1165
729e4ab9 1166 rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient);
b75a7d8f
A
1167 if (working_pp.getIndex() > high_pp.getIndex()) {
1168 high_pp = working_pp;
1169 high_result = working_result;
1170
374ca955 1171 if (high_pp.getIndex() == workingText.length()) {
b75a7d8f
A
1172 break;
1173 }
1174 }
1175 }
1176 }
1177
46f4442e
A
1178 int32_t startIndex = parsePosition.getIndex();
1179 parsePosition.setIndex(startIndex + high_pp.getIndex());
374ca955
A
1180 if (high_pp.getIndex() > 0) {
1181 parsePosition.setErrorIndex(-1);
46f4442e
A
1182 } else {
1183 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1184 parsePosition.setErrorIndex(startIndex + errorIndex);
b75a7d8f 1185 }
b75a7d8f
A
1186 result = high_result;
1187 if (result.getType() == Formattable::kDouble) {
1188 int32_t r = (int32_t)result.getDouble();
1189 if ((double)r == result.getDouble()) {
1190 result.setLong(r);
1191 }
1192 }
1193}
1194
1195#if !UCONFIG_NO_COLLATION
1196
1197void
1198RuleBasedNumberFormat::setLenient(UBool enabled)
1199{
1200 lenient = enabled;
1201 if (!enabled && collator) {
1202 delete collator;
1203 collator = NULL;
1204 }
1205}
1206
1207#endif
1208
1209void
1210RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1211 if (U_SUCCESS(status)) {
1212 if (ruleSetName.isEmpty()) {
374ca955
A
1213 if (localizations) {
1214 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1215 defaultRuleSet = findRuleSet(name, status);
1216 } else {
b75a7d8f 1217 initDefaultRuleSet();
374ca955
A
1218 }
1219 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
b75a7d8f
A
1220 status = U_ILLEGAL_ARGUMENT_ERROR;
1221 } else {
1222 NFRuleSet* result = findRuleSet(ruleSetName, status);
1223 if (result != NULL) {
1224 defaultRuleSet = result;
1225 }
1226 }
1227 }
1228}
1229
374ca955
A
1230UnicodeString
1231RuleBasedNumberFormat::getDefaultRuleSetName() const {
1232 UnicodeString result;
1233 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1234 defaultRuleSet->getName(result);
1235 } else {
1236 result.setToBogus();
1237 }
1238 return result;
1239}
1240
b75a7d8f
A
1241void
1242RuleBasedNumberFormat::initDefaultRuleSet()
1243{
374ca955 1244 defaultRuleSet = NULL;
b75a7d8f 1245 if (!ruleSets) {
374ca955 1246 return;
b75a7d8f 1247 }
374ca955 1248
729e4ab9
A
1249 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1250 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1251 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1252
374ca955 1253 NFRuleSet**p = &ruleSets[0];
b75a7d8f 1254 while (*p) {
729e4ab9
A
1255 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1256 defaultRuleSet = *p;
1257 return;
1258 } else {
1259 ++p;
1260 }
b75a7d8f
A
1261 }
1262
1263 defaultRuleSet = *--p;
1264 if (!defaultRuleSet->isPublic()) {
1265 while (p != ruleSets) {
1266 if ((*--p)->isPublic()) {
1267 defaultRuleSet = *p;
1268 break;
1269 }
1270 }
1271 }
1272}
1273
1274
1275void
374ca955 1276RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
73c04bcf 1277 UParseError& pErr, UErrorCode& status)
b75a7d8f
A
1278{
1279 // TODO: implement UParseError
73c04bcf 1280 uprv_memset(&pErr, 0, sizeof(UParseError));
b75a7d8f
A
1281 // Note: this can leave ruleSets == NULL, so remaining code should check
1282 if (U_FAILURE(status)) {
1283 return;
1284 }
1285
374ca955
A
1286 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1287
b75a7d8f
A
1288 UnicodeString description(rules);
1289 if (!description.length()) {
1290 status = U_MEMORY_ALLOCATION_ERROR;
1291 return;
1292 }
1293
1294 // start by stripping the trailing whitespace from all the rules
1295 // (this is all the whitespace follwing each semicolon in the
1296 // description). This allows us to look for rule-set boundaries
1297 // by searching for ";%" without having to worry about whitespace
1298 // between the ; and the %
1299 stripWhitespace(description);
1300
1301 // check to see if there's a set of lenient-parse rules. If there
1302 // is, pull them out into our temporary holding place for them,
1303 // and delete them from the description before the real desciption-
1304 // parsing code sees them
4388f060 1305 int32_t lp = description.indexOf(gLenientParse, -1, 0);
b75a7d8f
A
1306 if (lp != -1) {
1307 // we've got to make sure we're not in the middle of a rule
1308 // (where "%%lenient-parse" would actually get treated as
1309 // rule text)
1310 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1311 // locate the beginning and end of the actual collation
1312 // rules (there may be whitespace between the name and
1313 // the first token in the description)
4388f060 1314 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
b75a7d8f
A
1315
1316 if (lpEnd == -1) {
1317 lpEnd = description.length() - 1;
1318 }
1319 int lpStart = lp + u_strlen(gLenientParse);
4388f060 1320 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
b75a7d8f
A
1321 ++lpStart;
1322 }
1323
1324 // copy out the lenient-parse rules and delete them
1325 // from the description
1326 lenientParseRules = new UnicodeString();
1327 /* test for NULL */
1328 if (lenientParseRules == 0) {
1329 status = U_MEMORY_ALLOCATION_ERROR;
1330 return;
1331 }
1332 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1333
1334 description.remove(lp, lpEnd + 1 - lp);
1335 }
1336 }
1337
1338 // pre-flight parsing the description and count the number of
1339 // rule sets (";%" marks the end of one rule set and the beginning
1340 // of the next)
4388f060
A
1341 numRuleSets = 0;
1342 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
b75a7d8f
A
1343 ++numRuleSets;
1344 ++p;
1345 }
1346 ++numRuleSets;
1347
1348 // our rule list is an array of the appropriate size
1349 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1350 /* test for NULL */
1351 if (ruleSets == 0) {
1352 status = U_MEMORY_ALLOCATION_ERROR;
1353 return;
1354 }
1355
1356 for (int i = 0; i <= numRuleSets; ++i) {
1357 ruleSets[i] = NULL;
1358 }
1359
1360 // divide up the descriptions into individual rule-set descriptions
1361 // and store them in a temporary array. At each step, we also
1362 // new up a rule set, but all this does is initialize its name
1363 // and remove it from its description. We can't actually parse
1364 // the rest of the descriptions and finish initializing everything
1365 // because we have to know the names and locations of all the rule
1366 // sets before we can actually set everything up
1367 if(!numRuleSets) {
46f4442e
A
1368 status = U_ILLEGAL_ARGUMENT_ERROR;
1369 return;
b75a7d8f 1370 }
4388f060
A
1371
1372 ruleSetDescriptions = new UnicodeString[numRuleSets];
b75a7d8f
A
1373 if (ruleSetDescriptions == 0) {
1374 status = U_MEMORY_ALLOCATION_ERROR;
1375 return;
1376 }
1377
1378 {
1379 int curRuleSet = 0;
1380 int32_t start = 0;
4388f060 1381 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
b75a7d8f
A
1382 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1383 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1384 if (ruleSets[curRuleSet] == 0) {
1385 status = U_MEMORY_ALLOCATION_ERROR;
4388f060 1386 return;
b75a7d8f
A
1387 }
1388 ++curRuleSet;
1389 start = p + 1;
1390 }
1391 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1392 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1393 if (ruleSets[curRuleSet] == 0) {
1394 status = U_MEMORY_ALLOCATION_ERROR;
4388f060 1395 return;
b75a7d8f
A
1396 }
1397 }
1398
1399 // now we can take note of the formatter's default rule set, which
1400 // is the last public rule set in the description (it's the last
1401 // rather than the first so that a user can create a new formatter
1402 // from an existing formatter and change its default behavior just
1403 // by appending more rule sets to the end)
374ca955
A
1404
1405 // {dlf} Initialization of a fraction rule set requires the default rule
1406 // set to be known. For purposes of initialization, this is always the
1407 // last public rule set, no matter what the localization data says.
1408 initDefaultRuleSet();
b75a7d8f
A
1409
1410 // finally, we can go back through the temporary descriptions
1411 // list and finish seting up the substructure (and we throw
1412 // away the temporary descriptions as we go)
1413 {
1414 for (int i = 0; i < numRuleSets; i++) {
1415 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1416 }
1417 }
1418
374ca955
A
1419 // Now that the rules are initialized, the 'real' default rule
1420 // set can be adjusted by the localization data.
1421
1422 // The C code keeps the localization array as is, rather than building
1423 // a separate array of the public rule set names, so we have less work
1424 // to do here-- but we still need to check the names.
1425
1426 if (localizationInfos) {
1427 // confirm the names, if any aren't in the rules, that's an error
1428 // it is ok if the rules contain public rule sets that are not in this list
1429 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1430 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1431 NFRuleSet* rs = findRuleSet(name, status);
1432 if (rs == NULL) {
1433 break; // error
1434 }
1435 if (i == 0) {
1436 defaultRuleSet = rs;
1437 }
1438 }
1439 } else {
1440 defaultRuleSet = getDefaultRuleSet();
1441 }
b75a7d8f
A
1442}
1443
1444void
1445RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1446{
1447 // iterate through the characters...
1448 UnicodeString result;
1449
1450 int start = 0;
1451 while (start != -1 && start < description.length()) {
1452 // seek to the first non-whitespace character...
1453 while (start < description.length()
4388f060 1454 && PatternProps::isWhiteSpace(description.charAt(start))) {
b75a7d8f
A
1455 ++start;
1456 }
1457
1458 // locate the next semicolon in the text and copy the text from
1459 // our current position up to that semicolon into the result
1460 int32_t p = description.indexOf(gSemiColon, start);
1461 if (p == -1) {
1462 // or if we don't find a semicolon, just copy the rest of
1463 // the string into the result
1464 result.append(description, start, description.length() - start);
1465 start = -1;
1466 }
1467 else if (p < description.length()) {
1468 result.append(description, start, p + 1 - start);
1469 start = p + 1;
1470 }
1471
1472 // when we get here, we've seeked off the end of the sring, and
1473 // we terminate the loop (we continue until *start* is -1 rather
1474 // than until *p* is -1, because otherwise we'd miss the last
1475 // rule in the description)
1476 else {
1477 start = -1;
1478 }
1479 }
1480
1481 description.setTo(result);
1482}
1483
1484
1485void
1486RuleBasedNumberFormat::dispose()
1487{
1488 if (ruleSets) {
1489 for (NFRuleSet** p = ruleSets; *p; ++p) {
1490 delete *p;
1491 }
1492 uprv_free(ruleSets);
1493 ruleSets = NULL;
1494 }
1495
4388f060
A
1496 if (ruleSetDescriptions) {
1497 delete [] ruleSetDescriptions;
1498 }
1499
b75a7d8f
A
1500#if !UCONFIG_NO_COLLATION
1501 delete collator;
1502#endif
1503 collator = NULL;
1504
1505 delete decimalFormatSymbols;
1506 decimalFormatSymbols = NULL;
1507
1508 delete lenientParseRules;
1509 lenientParseRules = NULL;
374ca955
A
1510
1511 if (localizations) localizations = localizations->unref();
b75a7d8f
A
1512}
1513
1514
1515//-----------------------------------------------------------------------
1516// package-internal API
1517//-----------------------------------------------------------------------
1518
1519/**
1520 * Returns the collator to use for lenient parsing. The collator is lazily created:
1521 * this function creates it the first time it's called.
1522 * @return The collator to use for lenient parsing, or null if lenient parsing
1523 * is turned off.
1524*/
1525Collator*
1526RuleBasedNumberFormat::getCollator() const
1527{
1528#if !UCONFIG_NO_COLLATION
1529 if (!ruleSets) {
1530 return NULL;
1531 }
1532
1533 // lazy-evaulate the collator
1534 if (collator == NULL && lenient) {
1535 // create a default collator based on the formatter's locale,
1536 // then pull out that collator's rules, append any additional
1537 // rules specified in the description, and create a _new_
1538 // collator based on the combinaiton of those rules
1539
1540 UErrorCode status = U_ZERO_ERROR;
1541
1542 Collator* temp = Collator::createInstance(locale, status);
729e4ab9
A
1543 RuleBasedCollator* newCollator;
1544 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
b75a7d8f
A
1545 if (lenientParseRules) {
1546 UnicodeString rules(newCollator->getRules());
1547 rules.append(*lenientParseRules);
1548
1549 newCollator = new RuleBasedCollator(rules, status);
46f4442e
A
1550 // Exit if newCollator could not be created.
1551 if (newCollator == NULL) {
1552 return NULL;
1553 }
b75a7d8f
A
1554 } else {
1555 temp = NULL;
1556 }
1557 if (U_SUCCESS(status)) {
1558 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1559 // cast away const
1560 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1561 } else {
1562 delete newCollator;
1563 }
1564 }
1565 delete temp;
1566 }
1567#endif
1568
1569 // if lenient-parse mode is off, this will be null
1570 // (see setLenientParseMode())
1571 return collator;
1572}
1573
1574
1575/**
1576 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1577 * instances owned by this formatter. This object is lazily created: this function
1578 * creates it the first time it's called.
1579 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1580 * instances owned by this formatter.
1581*/
1582DecimalFormatSymbols*
1583RuleBasedNumberFormat::getDecimalFormatSymbols() const
1584{
1585 // lazy-evaluate the DecimalFormatSymbols object. This object
1586 // is shared by all DecimalFormat instances belonging to this
1587 // formatter
1588 if (decimalFormatSymbols == NULL) {
1589 UErrorCode status = U_ZERO_ERROR;
1590 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1591 if (U_SUCCESS(status)) {
1592 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1593 } else {
1594 delete temp;
1595 }
1596 }
1597 return decimalFormatSymbols;
1598}
1599
4388f060
A
1600// De-owning the current localized symbols and adopt the new symbols.
1601void
1602RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1603{
1604 if (symbolsToAdopt == NULL) {
1605 return; // do not allow caller to set decimalFormatSymbols to NULL
1606 }
1607
1608 if (decimalFormatSymbols != NULL) {
1609 delete decimalFormatSymbols;
1610 }
1611
1612 decimalFormatSymbols = symbolsToAdopt;
1613
1614 {
1615 // Apply the new decimalFormatSymbols by reparsing the rulesets
1616 UErrorCode status = U_ZERO_ERROR;
1617
1618 for (int32_t i = 0; i < numRuleSets; i++) {
1619 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1620 }
1621 }
1622}
1623
1624// Setting the symbols is equlivalent to adopting a newly created localized symbols.
1625void
1626RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1627{
1628 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1629}
1630
374ca955
A
1631U_NAMESPACE_END
1632
b75a7d8f
A
1633/* U_HAVE_RBNF */
1634#endif