]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/rbnf.cpp
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / i18n / rbnf.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
729e4ab9 3* Copyright (C) 1997-2010, International Business Machines Corporation
73c04bcf 4* and others. All Rights Reserved.
b75a7d8f
A
5*******************************************************************************
6*/
7
729e4ab9
A
8#include <typeinfo> // for 'typeid' to work
9
b75a7d8f
A
10#include "unicode/rbnf.h"
11
12#if U_HAVE_RBNF
13
14#include "unicode/normlzr.h"
15#include "unicode/tblcoll.h"
16#include "unicode/uchar.h"
17#include "unicode/ucol.h"
18#include "unicode/uloc.h"
19#include "unicode/unum.h"
20#include "unicode/ures.h"
21#include "unicode/ustring.h"
22#include "unicode/utf16.h"
374ca955 23#include "unicode/udata.h"
b75a7d8f
A
24#include "nfrs.h"
25
26#include "cmemory.h"
27#include "cstring.h"
374ca955 28#include "util.h"
729e4ab9 29#include "uresimp.h"
374ca955
A
30
31// debugging
32// #define DEBUG
33
34#ifdef DEBUG
35#include "stdio.h"
36#endif
37
38#define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
b75a7d8f
A
39
40static const UChar gPercentPercent[] =
41{
42 0x25, 0x25, 0
43}; /* "%%" */
44
45// All urbnf objects are created through openRules, so we init all of the
46// Unicode string constants required by rbnf, nfrs, or nfr here.
47static const UChar gLenientParse[] =
48{
49 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
50}; /* "%%lenient-parse:" */
51static const UChar gSemiColon = 0x003B;
52static const UChar gSemiPercent[] =
53{
54 0x3B, 0x25, 0
55}; /* ";%" */
56
57#define kSomeNumberOfBitsDiv2 22
58#define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
59#define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
60
729e4ab9
A
61// Temporary workaround - when noParse is true, do noting in parse.
62// TODO: We need a real fix - see #6895/#6896
63static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
64
374ca955
A
65U_NAMESPACE_BEGIN
66
67UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
68
73c04bcf
A
69/*
70This is a utility class. It does not use ICU's RTTI.
71If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72Please make sure that intltest passes on Windows in Release mode,
73since the string pooling per compilation unit will mess up how RTTI works.
74The RTTI code was also removed due to lack of code coverage.
75*/
76class LocalizationInfo : public UMemory {
374ca955
A
77protected:
78 virtual ~LocalizationInfo() {};
79 uint32_t refcount;
80
81public:
82 LocalizationInfo() : refcount(0) {}
83
84 LocalizationInfo* ref(void) {
85 ++refcount;
86 return this;
87 }
88
89 LocalizationInfo* unref(void) {
90 if (refcount && --refcount == 0) {
91 delete this;
92 }
93 return NULL;
94 }
95
96 virtual UBool operator==(const LocalizationInfo* rhs) const;
97 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
98
99 virtual int32_t getNumberOfRuleSets(void) const = 0;
100 virtual const UChar* getRuleSetName(int32_t index) const = 0;
101 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102 virtual const UChar* getLocaleName(int32_t index) const = 0;
103 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
104
105 virtual int32_t indexForLocale(const UChar* locale) const;
106 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
107
73c04bcf
A
108// virtual UClassID getDynamicClassID() const = 0;
109// static UClassID getStaticClassID(void);
374ca955
A
110};
111
73c04bcf 112//UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
374ca955
A
113
114// if both strings are NULL, this returns TRUE
115static UBool
116streq(const UChar* lhs, const UChar* rhs) {
117 if (rhs == lhs) {
118 return TRUE;
119 }
120 if (lhs && rhs) {
121 return u_strcmp(lhs, rhs) == 0;
122 }
123 return FALSE;
124}
125
126UBool
127LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
128 if (rhs) {
129 if (this == rhs) {
130 return TRUE;
131 }
132
133 int32_t rsc = getNumberOfRuleSets();
134 if (rsc == rhs->getNumberOfRuleSets()) {
135 for (int i = 0; i < rsc; ++i) {
136 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
137 return FALSE;
138 }
139 }
140 int32_t dlc = getNumberOfDisplayLocales();
141 if (dlc == rhs->getNumberOfDisplayLocales()) {
142 for (int i = 0; i < dlc; ++i) {
143 const UChar* locale = getLocaleName(i);
144 int32_t ix = rhs->indexForLocale(locale);
145 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
146 if (!streq(locale, rhs->getLocaleName(ix))) {
147 return FALSE;
148 }
149 for (int j = 0; j < rsc; ++j) {
150 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
151 return FALSE;
152 }
153 }
154 }
155 return TRUE;
156 }
157 }
158 }
159 return FALSE;
160}
161
162int32_t
163LocalizationInfo::indexForLocale(const UChar* locale) const {
164 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
165 if (streq(locale, getLocaleName(i))) {
166 return i;
167 }
168 }
169 return -1;
170}
171
172int32_t
173LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
174 if (ruleset) {
175 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
176 if (streq(ruleset, getRuleSetName(i))) {
177 return i;
178 }
179 }
180 }
181 return -1;
182}
183
184
185typedef void (*Fn_Deleter)(void*);
186
187class VArray {
188 void** buf;
189 int32_t cap;
190 int32_t size;
191 Fn_Deleter deleter;
192public:
193 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
194
195 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
196
197 ~VArray() {
198 if (deleter) {
199 for (int i = 0; i < size; ++i) {
200 (*deleter)(buf[i]);
201 }
202 }
203 uprv_free(buf);
204 }
205
206 int32_t length() {
207 return size;
208 }
209
210 void add(void* elem, UErrorCode& status) {
211 if (U_SUCCESS(status)) {
212 if (size == cap) {
213 if (cap == 0) {
214 cap = 1;
215 } else if (cap < 256) {
216 cap *= 2;
217 } else {
218 cap += 256;
219 }
220 if (buf == NULL) {
221 buf = (void**)uprv_malloc(cap * sizeof(void*));
222 } else {
223 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
224 }
225 if (buf == NULL) {
226 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
227 status = U_MEMORY_ALLOCATION_ERROR;
228 return;
229 }
230 void* start = &buf[size];
231 size_t count = (cap - size) * sizeof(void*);
232 uprv_memset(start, 0, count); // fill with nulls, just because
233 }
234 buf[size++] = elem;
235 }
236 }
237
238 void** release(void) {
239 void** result = buf;
240 buf = NULL;
241 cap = 0;
242 size = 0;
243 return result;
244 }
245};
246
247class LocDataParser;
248
249class StringLocalizationInfo : public LocalizationInfo {
250 UChar* info;
251 UChar*** data;
252 int32_t numRuleSets;
253 int32_t numLocales;
254
255friend class LocDataParser;
256
257 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
258 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
259 {
260 }
261
262public:
263 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
264
265 virtual ~StringLocalizationInfo();
266 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
267 virtual const UChar* getRuleSetName(int32_t index) const;
268 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
269 virtual const UChar* getLocaleName(int32_t index) const;
270 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
271
73c04bcf
A
272// virtual UClassID getDynamicClassID() const;
273// static UClassID getStaticClassID(void);
374ca955
A
274
275private:
276 void init(UErrorCode& status) const;
277};
278
279
280enum {
281 OPEN_ANGLE = 0x003c, /* '<' */
282 CLOSE_ANGLE = 0x003e, /* '>' */
283 COMMA = 0x002c,
284 TICK = 0x0027,
285 QUOTE = 0x0022,
286 SPACE = 0x0020
287};
288
289/**
290 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
291 */
292class LocDataParser {
293 UChar* data;
294 const UChar* e;
295 UChar* p;
296 UChar ch;
297 UParseError& pe;
298 UErrorCode& ec;
299
300public:
301 LocDataParser(UParseError& parseError, UErrorCode& status)
302 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
303 ~LocDataParser() {}
304
305 /*
306 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
307 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
308 */
309 StringLocalizationInfo* parse(UChar* data, int32_t len);
310
311private:
312
313 void inc(void) { ++p; ch = 0xffff; }
314 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
315 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
316 void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
317 UBool inList(UChar c, const UChar* list) const {
318 if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
319 while (*list && *list != c) ++list; return *list == c;
320 }
321 void parseError(const char* msg);
322
323 StringLocalizationInfo* doParse(void);
324
325 UChar** nextArray(int32_t& requiredLength);
326 UChar* nextString(void);
327};
328
329#ifdef DEBUG
330#define ERROR(msg) parseError(msg); return NULL;
331#else
332#define ERROR(msg) parseError(NULL); return NULL;
333#endif
334
335
336static const UChar DQUOTE_STOPLIST[] = {
337 QUOTE, 0
338};
339
340static const UChar SQUOTE_STOPLIST[] = {
341 TICK, 0
342};
343
344static const UChar NOQUOTE_STOPLIST[] = {
345 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
346};
347
348static void
349DeleteFn(void* p) {
350 uprv_free(p);
351}
352
353StringLocalizationInfo*
354LocDataParser::parse(UChar* _data, int32_t len) {
355 if (U_FAILURE(ec)) {
356 if (_data) uprv_free(_data);
357 return NULL;
358 }
359
360 pe.line = 0;
361 pe.offset = -1;
362 pe.postContext[0] = 0;
363 pe.preContext[0] = 0;
364
365 if (_data == NULL) {
366 ec = U_ILLEGAL_ARGUMENT_ERROR;
367 return NULL;
368 }
369
370 if (len <= 0) {
371 ec = U_ILLEGAL_ARGUMENT_ERROR;
372 uprv_free(_data);
373 return NULL;
374 }
375
376 data = _data;
377 e = data + len;
378 p = _data;
379 ch = 0xffff;
380
381 return doParse();
382}
383
384
385StringLocalizationInfo*
386LocDataParser::doParse(void) {
387 skipWhitespace();
388 if (!checkInc(OPEN_ANGLE)) {
389 ERROR("Missing open angle");
390 } else {
391 VArray array(DeleteFn);
392 UBool mightHaveNext = TRUE;
393 int32_t requiredLength = -1;
394 while (mightHaveNext) {
395 mightHaveNext = FALSE;
396 UChar** elem = nextArray(requiredLength);
397 skipWhitespace();
398 UBool haveComma = check(COMMA);
399 if (elem) {
400 array.add(elem, ec);
401 if (haveComma) {
402 inc();
403 mightHaveNext = TRUE;
404 }
405 } else if (haveComma) {
406 ERROR("Unexpected character");
407 }
408 }
409
410 skipWhitespace();
411 if (!checkInc(CLOSE_ANGLE)) {
412 if (check(OPEN_ANGLE)) {
413 ERROR("Missing comma in outer array");
414 } else {
415 ERROR("Missing close angle bracket in outer array");
416 }
417 }
418
419 skipWhitespace();
420 if (p != e) {
421 ERROR("Extra text after close of localization data");
422 }
423
424 array.add(NULL, ec);
425 if (U_SUCCESS(ec)) {
426 int32_t numLocs = array.length() - 2; // subtract first, NULL
427 UChar*** result = (UChar***)array.release();
428
429 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
430 }
431 }
432
433 ERROR("Unknown error");
434}
435
436UChar**
437LocDataParser::nextArray(int32_t& requiredLength) {
438 if (U_FAILURE(ec)) {
439 return NULL;
440 }
441
442 skipWhitespace();
443 if (!checkInc(OPEN_ANGLE)) {
444 ERROR("Missing open angle");
445 }
446
447 VArray array;
448 UBool mightHaveNext = TRUE;
449 while (mightHaveNext) {
450 mightHaveNext = FALSE;
451 UChar* elem = nextString();
452 skipWhitespace();
453 UBool haveComma = check(COMMA);
454 if (elem) {
455 array.add(elem, ec);
456 if (haveComma) {
457 inc();
458 mightHaveNext = TRUE;
459 }
460 } else if (haveComma) {
461 ERROR("Unexpected comma");
462 }
463 }
464 skipWhitespace();
465 if (!checkInc(CLOSE_ANGLE)) {
466 if (check(OPEN_ANGLE)) {
467 ERROR("Missing close angle bracket in inner array");
468 } else {
469 ERROR("Missing comma in inner array");
470 }
471 }
472
473 array.add(NULL, ec);
474 if (U_SUCCESS(ec)) {
475 if (requiredLength == -1) {
476 requiredLength = array.length() + 1;
477 } else if (array.length() != requiredLength) {
478 ec = U_ILLEGAL_ARGUMENT_ERROR;
479 ERROR("Array not of required length");
480 }
481
482 return (UChar**)array.release();
483 }
484 ERROR("Unknown Error");
485}
486
487UChar*
488LocDataParser::nextString() {
489 UChar* result = NULL;
490
491 skipWhitespace();
492 if (p < e) {
493 const UChar* terminators;
494 UChar c = *p;
495 UBool haveQuote = c == QUOTE || c == TICK;
496 if (haveQuote) {
497 inc();
498 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
499 } else {
500 terminators = NOQUOTE_STOPLIST;
501 }
502 UChar* start = p;
503 while (p < e && !inList(*p, terminators)) ++p;
504 if (p == e) {
505 ERROR("Unexpected end of data");
506 }
507
508 UChar x = *p;
509 if (p > start) {
510 ch = x;
511 *p = 0x0; // terminate by writing to data
512 result = start; // just point into data
513 }
514 if (haveQuote) {
515 if (x != c) {
516 ERROR("Missing matching quote");
517 } else if (p == start) {
518 ERROR("Empty string");
519 }
520 inc();
521 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
522 ERROR("Unexpected character in string");
523 }
524 }
525
526 // ok for there to be no next string
527 return result;
528}
529
530void
531LocDataParser::parseError(const char* /*str*/) {
532 if (!data) {
533 return;
534 }
b75a7d8f 535
374ca955 536 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
73c04bcf 537 if (start < data) {
374ca955 538 start = data;
73c04bcf
A
539 }
540 for (UChar* x = p; --x >= start;) {
374ca955
A
541 if (!*x) {
542 start = x+1;
543 break;
544 }
73c04bcf 545 }
374ca955 546 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
73c04bcf 547 if (limit > e) {
374ca955 548 limit = e;
73c04bcf
A
549 }
550 u_strncpy(pe.preContext, start, (int32_t)(p-start));
374ca955 551 pe.preContext[p-start] = 0;
73c04bcf 552 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
374ca955 553 pe.postContext[limit-p] = 0;
73c04bcf 554 pe.offset = (int32_t)(p - data);
374ca955
A
555
556#ifdef DEBUG
557 fprintf(stderr, "%s at or near character %d: ", str, p-data);
558
559 UnicodeString msg;
560 msg.append(start, p - start);
561 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
562 msg.append(p, limit-p);
563 msg.append("'");
564
565 char buf[128];
566 int32_t len = msg.extract(0, msg.length(), buf, 128);
567 if (len >= 128) {
568 buf[127] = 0;
569 } else {
570 buf[len] = 0;
571 }
572 fprintf(stderr, "%s\n", buf);
573 fflush(stderr);
574#endif
575
576 uprv_free(data);
577 data = NULL;
578 p = NULL;
579 e = NULL;
580
581 if (U_SUCCESS(ec)) {
582 ec = U_PARSE_ERROR;
583 }
584}
585
73c04bcf 586//UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
374ca955
A
587
588StringLocalizationInfo*
589StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
590 if (U_FAILURE(status)) {
591 return NULL;
592 }
593
594 int32_t len = info.length();
595 if (len == 0) {
596 return NULL; // no error;
597 }
598
599 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
600 if (!p) {
601 status = U_MEMORY_ALLOCATION_ERROR;
602 return NULL;
603 }
604 info.extract(p, len, status);
605 if (!U_FAILURE(status)) {
606 status = U_ZERO_ERROR; // clear warning about non-termination
607 }
608
609 LocDataParser parser(perror, status);
610 return parser.parse(p, len);
611}
612
613StringLocalizationInfo::~StringLocalizationInfo() {
614 for (UChar*** p = (UChar***)data; *p; ++p) {
615 // remaining data is simply pointer into our unicode string data.
616 if (*p) uprv_free(*p);
617 }
618 if (data) uprv_free(data);
619 if (info) uprv_free(info);
620}
621
622
623const UChar*
624StringLocalizationInfo::getRuleSetName(int32_t index) const {
625 if (index >= 0 && index < getNumberOfRuleSets()) {
626 return data[0][index];
627 }
628 return NULL;
629}
630
631const UChar*
632StringLocalizationInfo::getLocaleName(int32_t index) const {
633 if (index >= 0 && index < getNumberOfDisplayLocales()) {
634 return data[index+1][0];
635 }
636 return NULL;
637}
638
639const UChar*
640StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
641 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
642 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
643 return data[localeIndex+1][ruleIndex+1];
644 }
645 return NULL;
646}
647
648// ----------
649
650RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
651 const UnicodeString& locs,
652 const Locale& alocale, UParseError& perror, UErrorCode& status)
b75a7d8f
A
653 : ruleSets(NULL)
654 , defaultRuleSet(NULL)
655 , locale(alocale)
656 , collator(NULL)
657 , decimalFormatSymbols(NULL)
658 , lenient(FALSE)
659 , lenientParseRules(NULL)
374ca955 660 , localizations(NULL)
729e4ab9 661 , noParse(FALSE) //TODO: to be removed after #6895
374ca955
A
662{
663 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
664 init(description, locinfo, perror, status);
665}
666
667RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
668 const UnicodeString& locs,
669 UParseError& perror, UErrorCode& status)
670 : ruleSets(NULL)
671 , defaultRuleSet(NULL)
672 , locale(Locale::getDefault())
673 , collator(NULL)
674 , decimalFormatSymbols(NULL)
675 , lenient(FALSE)
676 , lenientParseRules(NULL)
677 , localizations(NULL)
729e4ab9 678 , noParse(FALSE) //TODO: to be removed after #6895
374ca955
A
679{
680 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
681 init(description, locinfo, perror, status);
682}
683
684RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
685 LocalizationInfo* info,
686 const Locale& alocale, UParseError& perror, UErrorCode& status)
687 : ruleSets(NULL)
688 , defaultRuleSet(NULL)
689 , locale(alocale)
690 , collator(NULL)
691 , decimalFormatSymbols(NULL)
692 , lenient(FALSE)
693 , lenientParseRules(NULL)
694 , localizations(NULL)
729e4ab9 695 , noParse(FALSE) //TODO: to be removed after #6895
374ca955
A
696{
697 init(description, info, perror, status);
698}
699
700RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
701 UParseError& perror,
702 UErrorCode& status)
703 : ruleSets(NULL)
704 , defaultRuleSet(NULL)
705 , locale(Locale::getDefault())
706 , collator(NULL)
707 , decimalFormatSymbols(NULL)
708 , lenient(FALSE)
709 , lenientParseRules(NULL)
710 , localizations(NULL)
729e4ab9 711 , noParse(FALSE) //TODO: to be removed after #6895
374ca955
A
712{
713 init(description, NULL, perror, status);
714}
715
716RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
717 const Locale& aLocale,
718 UParseError& perror,
719 UErrorCode& status)
720 : ruleSets(NULL)
721 , defaultRuleSet(NULL)
722 , locale(aLocale)
723 , collator(NULL)
724 , decimalFormatSymbols(NULL)
725 , lenient(FALSE)
726 , lenientParseRules(NULL)
727 , localizations(NULL)
729e4ab9 728 , noParse(FALSE) //TODO: to be removed after #6895
b75a7d8f 729{
374ca955 730 init(description, NULL, perror, status);
b75a7d8f
A
731}
732
733RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
734 : ruleSets(NULL)
735 , defaultRuleSet(NULL)
736 , locale(alocale)
737 , collator(NULL)
738 , decimalFormatSymbols(NULL)
739 , lenient(FALSE)
740 , lenientParseRules(NULL)
374ca955 741 , localizations(NULL)
b75a7d8f
A
742{
743 if (U_FAILURE(status)) {
744 return;
745 }
746
729e4ab9 747 const char* rules_tag = "RBNFRules";
b75a7d8f
A
748 const char* fmt_tag = "";
749 switch (tag) {
750 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
751 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
752 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
729e4ab9 753 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
b75a7d8f
A
754 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
755 }
756
374ca955
A
757 // TODO: read localization info from resource
758 LocalizationInfo* locinfo = NULL;
759
b75a7d8f 760 int32_t len = 0;
374ca955 761 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
b75a7d8f 762 if (U_SUCCESS(status)) {
374ca955
A
763 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
764 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
729e4ab9
A
765
766 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
767 if (U_FAILURE(status)) {
768 ures_close(nfrb);
769 }
770 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
771 if (U_FAILURE(status)) {
772 ures_close(rbnfRules);
773 ures_close(nfrb);
774 return;
775 }
776
777 UnicodeString desc;
778 while (ures_hasNext(ruleSets)) {
779 const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status);
780 desc.append(currentString);
781 }
b75a7d8f 782 UParseError perror;
729e4ab9
A
783
784
374ca955 785 init (desc, locinfo, perror, status);
729e4ab9
A
786
787 //TODO: we need a real fix - see #6895 / #6896
788 noParse = FALSE;
789 if (tag == URBNF_SPELLOUT) {
790 const char *lang = alocale.getLanguage();
791 for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
792 if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
793 noParse = TRUE;
794 break;
795 }
796 }
797 }
798 //TODO: end
799
800 ures_close(ruleSets);
801 ures_close(rbnfRules);
b75a7d8f 802 }
b75a7d8f
A
803 ures_close(nfrb);
804}
805
806RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
807 : NumberFormat(rhs)
808 , ruleSets(NULL)
809 , defaultRuleSet(NULL)
810 , locale(rhs.locale)
811 , collator(NULL)
812 , decimalFormatSymbols(NULL)
813 , lenient(FALSE)
814 , lenientParseRules(NULL)
374ca955 815 , localizations(NULL)
b75a7d8f
A
816{
817 this->operator=(rhs);
818}
819
374ca955
A
820// --------
821
b75a7d8f
A
822RuleBasedNumberFormat&
823RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
824{
825 UErrorCode status = U_ZERO_ERROR;
826 dispose();
827 locale = rhs.locale;
374ca955
A
828 lenient = rhs.lenient;
829
b75a7d8f
A
830 UnicodeString rules = rhs.getRules();
831 UParseError perror;
374ca955
A
832 init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
833
729e4ab9
A
834 //TODO: remove below when we fix the parse bug - See #6895 / #6896
835 noParse = rhs.noParse;
836
b75a7d8f
A
837 return *this;
838}
839
840RuleBasedNumberFormat::~RuleBasedNumberFormat()
841{
842 dispose();
843}
844
845Format*
846RuleBasedNumberFormat::clone(void) const
847{
848 RuleBasedNumberFormat * result = NULL;
849 UnicodeString rules = getRules();
850 UErrorCode status = U_ZERO_ERROR;
851 UParseError perror;
374ca955 852 result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
b75a7d8f
A
853 /* test for NULL */
854 if (result == 0) {
855 status = U_MEMORY_ALLOCATION_ERROR;
856 return 0;
857 }
858 if (U_FAILURE(status)) {
859 delete result;
860 result = 0;
861 } else {
862 result->lenient = lenient;
729e4ab9
A
863
864 //TODO: remove below when we fix the parse bug - See #6895 / #6896
865 result->noParse = noParse;
b75a7d8f
A
866 }
867 return result;
868}
869
870UBool
871RuleBasedNumberFormat::operator==(const Format& other) const
872{
873 if (this == &other) {
874 return TRUE;
875 }
876
729e4ab9 877 if (typeid(*this) == typeid(other)) {
b75a7d8f
A
878 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
879 if (locale == rhs.locale &&
374ca955
A
880 lenient == rhs.lenient &&
881 (localizations == NULL
882 ? rhs.localizations == NULL
883 : (rhs.localizations == NULL
884 ? FALSE
885 : *localizations == rhs.localizations))) {
886
b75a7d8f
A
887 NFRuleSet** p = ruleSets;
888 NFRuleSet** q = rhs.ruleSets;
b75a7d8f 889 if (p == NULL) {
374ca955
A
890 return q == NULL;
891 } else if (q == NULL) {
b75a7d8f
A
892 return FALSE;
893 }
894 while (*p && *q && (**p == **q)) {
895 ++p;
896 ++q;
897 }
898 return *q == NULL && *p == NULL;
899 }
900 }
901
902 return FALSE;
903}
904
905UnicodeString
906RuleBasedNumberFormat::getRules() const
907{
908 UnicodeString result;
909 if (ruleSets != NULL) {
910 for (NFRuleSet** p = ruleSets; *p; ++p) {
911 (*p)->appendRules(result);
912 }
913 }
914 return result;
915}
916
917UnicodeString
918RuleBasedNumberFormat::getRuleSetName(int32_t index) const
919{
374ca955
A
920 if (localizations) {
921 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
922 return string;
923 } else if (ruleSets) {
924 UnicodeString result;
b75a7d8f
A
925 for (NFRuleSet** p = ruleSets; *p; ++p) {
926 NFRuleSet* rs = *p;
927 if (rs->isPublic()) {
928 if (--index == -1) {
929 rs->getName(result);
930 return result;
931 }
932 }
933 }
934 }
374ca955
A
935 UnicodeString empty;
936 return empty;
b75a7d8f
A
937}
938
939int32_t
940RuleBasedNumberFormat::getNumberOfRuleSetNames() const
941{
942 int32_t result = 0;
374ca955
A
943 if (localizations) {
944 result = localizations->getNumberOfRuleSets();
945 } else if (ruleSets) {
b75a7d8f
A
946 for (NFRuleSet** p = ruleSets; *p; ++p) {
947 if ((**p).isPublic()) {
948 ++result;
949 }
950 }
951 }
952 return result;
953}
954
374ca955
A
955int32_t
956RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
957 if (localizations) {
958 return localizations->getNumberOfDisplayLocales();
959 }
960 return 0;
961}
962
963Locale
964RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
965 if (U_FAILURE(status)) {
73c04bcf 966 return Locale("");
374ca955
A
967 }
968 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
969 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
970 char buffer[64];
971 int32_t cap = name.length() + 1;
972 char* bp = buffer;
973 if (cap > 64) {
974 bp = (char *)uprv_malloc(cap);
975 if (bp == NULL) {
976 status = U_MEMORY_ALLOCATION_ERROR;
73c04bcf 977 return Locale("");
374ca955
A
978 }
979 }
980 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
981 Locale retLocale(bp);
982 if (bp != buffer) {
983 uprv_free(bp);
984 }
985 return retLocale;
986 }
987 status = U_ILLEGAL_ARGUMENT_ERROR;
988 Locale retLocale;
989 return retLocale;
990}
991
992UnicodeString
993RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
994 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
995 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
996 int32_t len = localeName.length();
997 UChar* localeStr = localeName.getBuffer(len + 1);
998 while (len >= 0) {
999 localeStr[len] = 0;
1000 int32_t ix = localizations->indexForLocale(localeStr);
1001 if (ix >= 0) {
1002 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1003 return name;
1004 }
1005
1006 // trim trailing portion, skipping over ommitted sections
1007 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1008 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1009 }
1010 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1011 return name;
1012 }
1013 UnicodeString bogus;
1014 bogus.setToBogus();
1015 return bogus;
1016}
1017
1018UnicodeString
1019RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1020 if (localizations) {
1021 UnicodeString rsn(ruleSetName);
1022 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1023 return getRuleSetDisplayName(ix, localeParam);
1024 }
1025 UnicodeString bogus;
1026 bogus.setToBogus();
1027 return bogus;
1028}
1029
b75a7d8f
A
1030NFRuleSet*
1031RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1032{
1033 if (U_SUCCESS(status) && ruleSets) {
1034 for (NFRuleSet** p = ruleSets; *p; ++p) {
1035 NFRuleSet* rs = *p;
1036 if (rs->isNamed(name)) {
1037 return rs;
1038 }
1039 }
1040 status = U_ILLEGAL_ARGUMENT_ERROR;
1041 }
1042 return NULL;
1043}
1044
1045UnicodeString&
1046RuleBasedNumberFormat::format(int32_t number,
1047 UnicodeString& toAppendTo,
374ca955 1048 FieldPosition& /* pos */) const
b75a7d8f
A
1049{
1050 if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1051 return toAppendTo;
1052}
1053
1054
1055UnicodeString&
1056RuleBasedNumberFormat::format(int64_t number,
1057 UnicodeString& toAppendTo,
374ca955 1058 FieldPosition& /* pos */) const
b75a7d8f
A
1059{
1060 if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1061 return toAppendTo;
1062}
1063
1064
1065UnicodeString&
1066RuleBasedNumberFormat::format(double number,
1067 UnicodeString& toAppendTo,
374ca955 1068 FieldPosition& /* pos */) const
b75a7d8f 1069{
729e4ab9
A
1070 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1071 if (uprv_isNaN(number)) {
1072 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1073 if (decFmtSyms) {
1074 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1075 }
1076 } else if (defaultRuleSet) {
1077 defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1078 }
b75a7d8f
A
1079 return toAppendTo;
1080}
1081
1082
1083UnicodeString&
1084RuleBasedNumberFormat::format(int32_t number,
1085 const UnicodeString& ruleSetName,
1086 UnicodeString& toAppendTo,
374ca955 1087 FieldPosition& /* pos */,
b75a7d8f
A
1088 UErrorCode& status) const
1089{
1090 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1091 if (U_SUCCESS(status)) {
1092 if (ruleSetName.indexOf(gPercentPercent) == 0) {
1093 // throw new IllegalArgumentException("Can't use internal rule set");
1094 status = U_ILLEGAL_ARGUMENT_ERROR;
1095 } else {
1096 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1097 if (rs) {
1098 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1099 }
1100 }
1101 }
1102 return toAppendTo;
1103}
1104
1105
1106UnicodeString&
1107RuleBasedNumberFormat::format(int64_t number,
1108 const UnicodeString& ruleSetName,
1109 UnicodeString& toAppendTo,
374ca955 1110 FieldPosition& /* pos */,
b75a7d8f
A
1111 UErrorCode& status) const
1112{
1113 if (U_SUCCESS(status)) {
1114 if (ruleSetName.indexOf(gPercentPercent) == 0) {
1115 // throw new IllegalArgumentException("Can't use internal rule set");
1116 status = U_ILLEGAL_ARGUMENT_ERROR;
1117 } else {
1118 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1119 if (rs) {
1120 rs->format(number, toAppendTo, toAppendTo.length());
1121 }
1122 }
1123 }
1124 return toAppendTo;
1125}
1126
1127
1128// make linker happy
1129UnicodeString&
1130RuleBasedNumberFormat::format(const Formattable& obj,
1131 UnicodeString& toAppendTo,
1132 FieldPosition& pos,
1133 UErrorCode& status) const
1134{
1135 return NumberFormat::format(obj, toAppendTo, pos, status);
1136}
1137
1138UnicodeString&
1139RuleBasedNumberFormat::format(double number,
1140 const UnicodeString& ruleSetName,
1141 UnicodeString& toAppendTo,
374ca955 1142 FieldPosition& /* pos */,
b75a7d8f
A
1143 UErrorCode& status) const
1144{
1145 if (U_SUCCESS(status)) {
1146 if (ruleSetName.indexOf(gPercentPercent) == 0) {
1147 // throw new IllegalArgumentException("Can't use internal rule set");
1148 status = U_ILLEGAL_ARGUMENT_ERROR;
1149 } else {
1150 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1151 if (rs) {
1152 rs->format(number, toAppendTo, toAppendTo.length());
1153 }
1154 }
1155 }
1156 return toAppendTo;
1157}
1158
1159void
1160RuleBasedNumberFormat::parse(const UnicodeString& text,
1161 Formattable& result,
1162 ParsePosition& parsePosition) const
1163{
729e4ab9
A
1164 //TODO: We need a real fix. See #6895 / #6896
1165 if (noParse) {
1166 // skip parsing
1167 parsePosition.setErrorIndex(0);
1168 return;
1169 }
1170
b75a7d8f
A
1171 if (!ruleSets) {
1172 parsePosition.setErrorIndex(0);
1173 return;
1174 }
1175
374ca955
A
1176 UnicodeString workingText(text, parsePosition.getIndex());
1177 ParsePosition workingPos(0);
1178
1179 ParsePosition high_pp(0);
b75a7d8f
A
1180 Formattable high_result;
1181
1182 for (NFRuleSet** p = ruleSets; *p; ++p) {
1183 NFRuleSet *rp = *p;
729e4ab9 1184 if (rp->isPublic() && rp->isParseable()) {
374ca955 1185 ParsePosition working_pp(0);
b75a7d8f
A
1186 Formattable working_result;
1187
729e4ab9 1188 rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient);
b75a7d8f
A
1189 if (working_pp.getIndex() > high_pp.getIndex()) {
1190 high_pp = working_pp;
1191 high_result = working_result;
1192
374ca955 1193 if (high_pp.getIndex() == workingText.length()) {
b75a7d8f
A
1194 break;
1195 }
1196 }
1197 }
1198 }
1199
46f4442e
A
1200 int32_t startIndex = parsePosition.getIndex();
1201 parsePosition.setIndex(startIndex + high_pp.getIndex());
374ca955
A
1202 if (high_pp.getIndex() > 0) {
1203 parsePosition.setErrorIndex(-1);
46f4442e
A
1204 } else {
1205 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1206 parsePosition.setErrorIndex(startIndex + errorIndex);
b75a7d8f 1207 }
b75a7d8f
A
1208 result = high_result;
1209 if (result.getType() == Formattable::kDouble) {
1210 int32_t r = (int32_t)result.getDouble();
1211 if ((double)r == result.getDouble()) {
1212 result.setLong(r);
1213 }
1214 }
1215}
1216
1217#if !UCONFIG_NO_COLLATION
1218
1219void
1220RuleBasedNumberFormat::setLenient(UBool enabled)
1221{
1222 lenient = enabled;
1223 if (!enabled && collator) {
1224 delete collator;
1225 collator = NULL;
1226 }
1227}
1228
1229#endif
1230
1231void
1232RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1233 if (U_SUCCESS(status)) {
1234 if (ruleSetName.isEmpty()) {
374ca955
A
1235 if (localizations) {
1236 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1237 defaultRuleSet = findRuleSet(name, status);
1238 } else {
b75a7d8f 1239 initDefaultRuleSet();
374ca955
A
1240 }
1241 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
b75a7d8f
A
1242 status = U_ILLEGAL_ARGUMENT_ERROR;
1243 } else {
1244 NFRuleSet* result = findRuleSet(ruleSetName, status);
1245 if (result != NULL) {
1246 defaultRuleSet = result;
1247 }
1248 }
1249 }
1250}
1251
374ca955
A
1252UnicodeString
1253RuleBasedNumberFormat::getDefaultRuleSetName() const {
1254 UnicodeString result;
1255 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1256 defaultRuleSet->getName(result);
1257 } else {
1258 result.setToBogus();
1259 }
1260 return result;
1261}
1262
b75a7d8f
A
1263void
1264RuleBasedNumberFormat::initDefaultRuleSet()
1265{
374ca955 1266 defaultRuleSet = NULL;
b75a7d8f 1267 if (!ruleSets) {
374ca955 1268 return;
b75a7d8f 1269 }
374ca955 1270
729e4ab9
A
1271 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1272 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1273 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1274
374ca955 1275 NFRuleSet**p = &ruleSets[0];
b75a7d8f 1276 while (*p) {
729e4ab9
A
1277 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1278 defaultRuleSet = *p;
1279 return;
1280 } else {
1281 ++p;
1282 }
b75a7d8f
A
1283 }
1284
1285 defaultRuleSet = *--p;
1286 if (!defaultRuleSet->isPublic()) {
1287 while (p != ruleSets) {
1288 if ((*--p)->isPublic()) {
1289 defaultRuleSet = *p;
1290 break;
1291 }
1292 }
1293 }
1294}
1295
1296
1297void
374ca955 1298RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
73c04bcf 1299 UParseError& pErr, UErrorCode& status)
b75a7d8f
A
1300{
1301 // TODO: implement UParseError
73c04bcf 1302 uprv_memset(&pErr, 0, sizeof(UParseError));
b75a7d8f
A
1303 // Note: this can leave ruleSets == NULL, so remaining code should check
1304 if (U_FAILURE(status)) {
1305 return;
1306 }
1307
374ca955
A
1308 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1309
b75a7d8f
A
1310 UnicodeString description(rules);
1311 if (!description.length()) {
1312 status = U_MEMORY_ALLOCATION_ERROR;
1313 return;
1314 }
1315
1316 // start by stripping the trailing whitespace from all the rules
1317 // (this is all the whitespace follwing each semicolon in the
1318 // description). This allows us to look for rule-set boundaries
1319 // by searching for ";%" without having to worry about whitespace
1320 // between the ; and the %
1321 stripWhitespace(description);
1322
1323 // check to see if there's a set of lenient-parse rules. If there
1324 // is, pull them out into our temporary holding place for them,
1325 // and delete them from the description before the real desciption-
1326 // parsing code sees them
1327 int32_t lp = description.indexOf(gLenientParse);
1328 if (lp != -1) {
1329 // we've got to make sure we're not in the middle of a rule
1330 // (where "%%lenient-parse" would actually get treated as
1331 // rule text)
1332 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1333 // locate the beginning and end of the actual collation
1334 // rules (there may be whitespace between the name and
1335 // the first token in the description)
1336 int lpEnd = description.indexOf(gSemiPercent, lp);
1337
1338 if (lpEnd == -1) {
1339 lpEnd = description.length() - 1;
1340 }
1341 int lpStart = lp + u_strlen(gLenientParse);
1342 while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
1343 ++lpStart;
1344 }
1345
1346 // copy out the lenient-parse rules and delete them
1347 // from the description
1348 lenientParseRules = new UnicodeString();
1349 /* test for NULL */
1350 if (lenientParseRules == 0) {
1351 status = U_MEMORY_ALLOCATION_ERROR;
1352 return;
1353 }
1354 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1355
1356 description.remove(lp, lpEnd + 1 - lp);
1357 }
1358 }
1359
1360 // pre-flight parsing the description and count the number of
1361 // rule sets (";%" marks the end of one rule set and the beginning
1362 // of the next)
1363 int numRuleSets = 0;
1364 for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1365 ++numRuleSets;
1366 ++p;
1367 }
1368 ++numRuleSets;
1369
1370 // our rule list is an array of the appropriate size
1371 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1372 /* test for NULL */
1373 if (ruleSets == 0) {
1374 status = U_MEMORY_ALLOCATION_ERROR;
1375 return;
1376 }
1377
1378 for (int i = 0; i <= numRuleSets; ++i) {
1379 ruleSets[i] = NULL;
1380 }
1381
1382 // divide up the descriptions into individual rule-set descriptions
1383 // and store them in a temporary array. At each step, we also
1384 // new up a rule set, but all this does is initialize its name
1385 // and remove it from its description. We can't actually parse
1386 // the rest of the descriptions and finish initializing everything
1387 // because we have to know the names and locations of all the rule
1388 // sets before we can actually set everything up
1389 if(!numRuleSets) {
46f4442e
A
1390 status = U_ILLEGAL_ARGUMENT_ERROR;
1391 return;
b75a7d8f
A
1392 }
1393 UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
b75a7d8f
A
1394 if (ruleSetDescriptions == 0) {
1395 status = U_MEMORY_ALLOCATION_ERROR;
1396 return;
1397 }
1398
1399 {
1400 int curRuleSet = 0;
1401 int32_t start = 0;
1402 for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1403 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1404 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1405 if (ruleSets[curRuleSet] == 0) {
1406 status = U_MEMORY_ALLOCATION_ERROR;
46f4442e 1407 goto cleanup;
b75a7d8f
A
1408 }
1409 ++curRuleSet;
1410 start = p + 1;
1411 }
1412 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1413 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
b75a7d8f
A
1414 if (ruleSets[curRuleSet] == 0) {
1415 status = U_MEMORY_ALLOCATION_ERROR;
46f4442e 1416 goto cleanup;
b75a7d8f
A
1417 }
1418 }
1419
1420 // now we can take note of the formatter's default rule set, which
1421 // is the last public rule set in the description (it's the last
1422 // rather than the first so that a user can create a new formatter
1423 // from an existing formatter and change its default behavior just
1424 // by appending more rule sets to the end)
374ca955
A
1425
1426 // {dlf} Initialization of a fraction rule set requires the default rule
1427 // set to be known. For purposes of initialization, this is always the
1428 // last public rule set, no matter what the localization data says.
1429 initDefaultRuleSet();
b75a7d8f
A
1430
1431 // finally, we can go back through the temporary descriptions
1432 // list and finish seting up the substructure (and we throw
1433 // away the temporary descriptions as we go)
1434 {
1435 for (int i = 0; i < numRuleSets; i++) {
1436 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1437 }
1438 }
1439
374ca955
A
1440 // Now that the rules are initialized, the 'real' default rule
1441 // set can be adjusted by the localization data.
1442
1443 // The C code keeps the localization array as is, rather than building
1444 // a separate array of the public rule set names, so we have less work
1445 // to do here-- but we still need to check the names.
1446
1447 if (localizationInfos) {
1448 // confirm the names, if any aren't in the rules, that's an error
1449 // it is ok if the rules contain public rule sets that are not in this list
1450 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1451 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1452 NFRuleSet* rs = findRuleSet(name, status);
1453 if (rs == NULL) {
1454 break; // error
1455 }
1456 if (i == 0) {
1457 defaultRuleSet = rs;
1458 }
1459 }
1460 } else {
1461 defaultRuleSet = getDefaultRuleSet();
1462 }
46f4442e
A
1463
1464cleanup:
1465 delete[] ruleSetDescriptions;
b75a7d8f
A
1466}
1467
1468void
1469RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1470{
1471 // iterate through the characters...
1472 UnicodeString result;
1473
1474 int start = 0;
1475 while (start != -1 && start < description.length()) {
1476 // seek to the first non-whitespace character...
1477 while (start < description.length()
1478 && uprv_isRuleWhiteSpace(description.charAt(start))) {
1479 ++start;
1480 }
1481
1482 // locate the next semicolon in the text and copy the text from
1483 // our current position up to that semicolon into the result
1484 int32_t p = description.indexOf(gSemiColon, start);
1485 if (p == -1) {
1486 // or if we don't find a semicolon, just copy the rest of
1487 // the string into the result
1488 result.append(description, start, description.length() - start);
1489 start = -1;
1490 }
1491 else if (p < description.length()) {
1492 result.append(description, start, p + 1 - start);
1493 start = p + 1;
1494 }
1495
1496 // when we get here, we've seeked off the end of the sring, and
1497 // we terminate the loop (we continue until *start* is -1 rather
1498 // than until *p* is -1, because otherwise we'd miss the last
1499 // rule in the description)
1500 else {
1501 start = -1;
1502 }
1503 }
1504
1505 description.setTo(result);
1506}
1507
1508
1509void
1510RuleBasedNumberFormat::dispose()
1511{
1512 if (ruleSets) {
1513 for (NFRuleSet** p = ruleSets; *p; ++p) {
1514 delete *p;
1515 }
1516 uprv_free(ruleSets);
1517 ruleSets = NULL;
1518 }
1519
1520#if !UCONFIG_NO_COLLATION
1521 delete collator;
1522#endif
1523 collator = NULL;
1524
1525 delete decimalFormatSymbols;
1526 decimalFormatSymbols = NULL;
1527
1528 delete lenientParseRules;
1529 lenientParseRules = NULL;
374ca955
A
1530
1531 if (localizations) localizations = localizations->unref();
b75a7d8f
A
1532}
1533
1534
1535//-----------------------------------------------------------------------
1536// package-internal API
1537//-----------------------------------------------------------------------
1538
1539/**
1540 * Returns the collator to use for lenient parsing. The collator is lazily created:
1541 * this function creates it the first time it's called.
1542 * @return The collator to use for lenient parsing, or null if lenient parsing
1543 * is turned off.
1544*/
1545Collator*
1546RuleBasedNumberFormat::getCollator() const
1547{
1548#if !UCONFIG_NO_COLLATION
1549 if (!ruleSets) {
1550 return NULL;
1551 }
1552
1553 // lazy-evaulate the collator
1554 if (collator == NULL && lenient) {
1555 // create a default collator based on the formatter's locale,
1556 // then pull out that collator's rules, append any additional
1557 // rules specified in the description, and create a _new_
1558 // collator based on the combinaiton of those rules
1559
1560 UErrorCode status = U_ZERO_ERROR;
1561
1562 Collator* temp = Collator::createInstance(locale, status);
729e4ab9
A
1563 RuleBasedCollator* newCollator;
1564 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
b75a7d8f
A
1565 if (lenientParseRules) {
1566 UnicodeString rules(newCollator->getRules());
1567 rules.append(*lenientParseRules);
1568
1569 newCollator = new RuleBasedCollator(rules, status);
46f4442e
A
1570 // Exit if newCollator could not be created.
1571 if (newCollator == NULL) {
1572 return NULL;
1573 }
b75a7d8f
A
1574 } else {
1575 temp = NULL;
1576 }
1577 if (U_SUCCESS(status)) {
1578 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1579 // cast away const
1580 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1581 } else {
1582 delete newCollator;
1583 }
1584 }
1585 delete temp;
1586 }
1587#endif
1588
1589 // if lenient-parse mode is off, this will be null
1590 // (see setLenientParseMode())
1591 return collator;
1592}
1593
1594
1595/**
1596 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1597 * instances owned by this formatter. This object is lazily created: this function
1598 * creates it the first time it's called.
1599 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1600 * instances owned by this formatter.
1601*/
1602DecimalFormatSymbols*
1603RuleBasedNumberFormat::getDecimalFormatSymbols() const
1604{
1605 // lazy-evaluate the DecimalFormatSymbols object. This object
1606 // is shared by all DecimalFormat instances belonging to this
1607 // formatter
1608 if (decimalFormatSymbols == NULL) {
1609 UErrorCode status = U_ZERO_ERROR;
1610 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1611 if (U_SUCCESS(status)) {
1612 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1613 } else {
1614 delete temp;
1615 }
1616 }
1617 return decimalFormatSymbols;
1618}
1619
374ca955
A
1620U_NAMESPACE_END
1621
b75a7d8f
A
1622/* U_HAVE_RBNF */
1623#endif