icuSources/i18n/rbnf.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 1997-2004, International Business Machines Corporation and others. All Rights Reserved.
   4 *******************************************************************************
   5 */
   6
   7 #include "unicode/rbnf.h"
   8
   9 #if U_HAVE_RBNF
  10
  11 #include "unicode/normlzr.h"
  12 #include "unicode/tblcoll.h"
  13 #include "unicode/uchar.h"
  14 #include "unicode/ucol.h"
  15 #include "unicode/uloc.h"
  16 #include "unicode/unum.h"
  17 #include "unicode/ures.h"
  18 #include "unicode/ustring.h"
  19 #include "unicode/utf16.h"
  20 #include "unicode/udata.h"
  21 #include "nfrs.h"
  22
  23 #include "cmemory.h"
  24 #include "cstring.h"
  25 #include "util.h"
  26
  27 // debugging
  28 // #define DEBUG
  29
  30 #ifdef DEBUG
  31 #include "stdio.h"
  32 #endif
  33
  34 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
  35
  36 static const UChar gPercentPercent[] =
  37 {
  38     0x25, 0x25, 0
  39 }; /* "%%" */
  40
  41 // All urbnf objects are created through openRules, so we init all of the
  42 // Unicode string constants required by rbnf, nfrs, or nfr here.
  43 static const UChar gLenientParse[] =
  44 {
  45     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
  46 }; /* "%%lenient-parse:" */
  47 static const UChar gSemiColon = 0x003B;
  48 static const UChar gSemiPercent[] =
  49 {
  50     0x3B, 0x25, 0
  51 }; /* ";%" */
  52
  53 #define kSomeNumberOfBitsDiv2 22
  54 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
  55 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
  56
  57 U_NAMESPACE_BEGIN
  58
  59 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
  60
  61 class LocalizationInfo : public UObject {
  62 protected:
  63     virtual ~LocalizationInfo() {};
  64     uint32_t refcount;
  65
  66 public:
  67     LocalizationInfo() : refcount(0) {}
  68
  69     LocalizationInfo* ref(void) {
  70         ++refcount;
  71         return this;
  72     }
  73
  74     LocalizationInfo* unref(void) {
  75         if (refcount && --refcount == 0) {
  76             delete this;
  77         }
  78         return NULL;
  79     }
  80
  81     virtual UBool operator==(const LocalizationInfo* rhs) const;
  82     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
  83
  84     virtual int32_t getNumberOfRuleSets(void) const = 0;
  85     virtual const UChar* getRuleSetName(int32_t index) const = 0;
  86     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
  87     virtual const UChar* getLocaleName(int32_t index) const = 0;
  88     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
  89
  90     virtual int32_t indexForLocale(const UChar* locale) const;
  91     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
  92
  93     virtual UClassID getDynamicClassID() const = 0;
  94     static UClassID getStaticClassID(void);
  95 };
  96
  97 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
  98
  99 // if both strings are NULL, this returns TRUE
 100 static UBool
 101 streq(const UChar* lhs, const UChar* rhs) {
 102     if (rhs == lhs) {
 103         return TRUE;
 104     }
 105     if (lhs && rhs) {
 106         return u_strcmp(lhs, rhs) == 0;
 107     }
 108     return FALSE;
 109 }
 110
 111 UBool
 112 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
 113     if (rhs) {
 114         if (this == rhs) {
 115             return TRUE;
 116         }
 117
 118         int32_t rsc = getNumberOfRuleSets();
 119         if (rsc == rhs->getNumberOfRuleSets()) {
 120             for (int i = 0; i < rsc; ++i) {
 121                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
 122                     return FALSE;
 123                 }
 124             }
 125             int32_t dlc = getNumberOfDisplayLocales();
 126             if (dlc == rhs->getNumberOfDisplayLocales()) {
 127                 for (int i = 0; i < dlc; ++i) {
 128                     const UChar* locale = getLocaleName(i);
 129                     int32_t ix = rhs->indexForLocale(locale);
 130                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
 131                     if (!streq(locale, rhs->getLocaleName(ix))) {
 132                         return FALSE;
 133                     }
 134                     for (int j = 0; j < rsc; ++j) {
 135                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
 136                             return FALSE;
 137                         }
 138                     }
 139                 }
 140                 return TRUE;
 141             }
 142         }
 143     }
 144     return FALSE;
 145 }
 146
 147 int32_t
 148 LocalizationInfo::indexForLocale(const UChar* locale) const {
 149     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
 150         if (streq(locale, getLocaleName(i))) {
 151             return i;
 152         }
 153     }
 154     return -1;
 155 }
 156
 157 int32_t
 158 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
 159     if (ruleset) {
 160         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
 161             if (streq(ruleset, getRuleSetName(i))) {
 162                 return i;
 163             }
 164         }
 165     }
 166     return -1;
 167 }
 168
 169
 170 typedef void (*Fn_Deleter)(void*);
 171
 172 class VArray {
 173     void** buf;
 174     int32_t cap;
 175     int32_t size;
 176     Fn_Deleter deleter;
 177 public:
 178     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
 179
 180     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
 181
 182     ~VArray() {
 183         if (deleter) {
 184             for (int i = 0; i < size; ++i) {
 185                 (*deleter)(buf[i]);
 186             }
 187         }
 188         uprv_free(buf);
 189     }
 190
 191     int32_t length() {
 192         return size;
 193     }
 194
 195     void add(void* elem, UErrorCode& status) {
 196         if (U_SUCCESS(status)) {
 197             if (size == cap) {
 198                 if (cap == 0) {
 199                     cap = 1;
 200                 } else if (cap < 256) {
 201                     cap *= 2;
 202                 } else {
 203                     cap += 256;
 204                 }
 205                 if (buf == NULL) {
 206                     buf = (void**)uprv_malloc(cap * sizeof(void*));
 207                 } else {
 208                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
 209                 }
 210                 if (buf == NULL) {
 211                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
 212                     status = U_MEMORY_ALLOCATION_ERROR;
 213                     return;
 214                 }
 215                 void* start = &buf[size];
 216                 size_t count = (cap - size) * sizeof(void*);
 217                 uprv_memset(start, 0, count); // fill with nulls, just because
 218             }
 219             buf[size++] = elem;
 220         }
 221     }
 222
 223     void** release(void) {
 224         void** result = buf;
 225         buf = NULL;
 226         cap = 0;
 227         size = 0;
 228         return result;
 229     }
 230 };
 231
 232 class LocDataParser;
 233
 234 class StringLocalizationInfo : public LocalizationInfo {
 235     UChar* info;
 236     UChar*** data;
 237     int32_t numRuleSets;
 238     int32_t numLocales;
 239
 240 friend class LocDataParser;
 241
 242     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
 243         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
 244     {
 245     }
 246
 247 public:
 248     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
 249
 250     virtual ~StringLocalizationInfo();
 251     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
 252     virtual const UChar* getRuleSetName(int32_t index) const;
 253     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
 254     virtual const UChar* getLocaleName(int32_t index) const;
 255     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
 256
 257     virtual UClassID getDynamicClassID() const;
 258     static UClassID getStaticClassID(void);
 259
 260 private:
 261     void init(UErrorCode& status) const;
 262 };
 263
 264
 265 enum {
 266     OPEN_ANGLE = 0x003c, /* '<' */
 267     CLOSE_ANGLE = 0x003e, /* '>' */
 268     COMMA = 0x002c,
 269     TICK = 0x0027,
 270     QUOTE = 0x0022,
 271     SPACE = 0x0020
 272 };
 273
 274 /**
 275  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
 276  */
 277 class LocDataParser {
 278     UChar* data;
 279     const UChar* e;
 280     UChar* p;
 281     UChar ch;
 282     UParseError& pe;
 283     UErrorCode& ec;
 284
 285 public:
 286     LocDataParser(UParseError& parseError, UErrorCode& status)
 287         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
 288     ~LocDataParser() {}
 289
 290     /*
 291     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
 292     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
 293     */
 294     StringLocalizationInfo* parse(UChar* data, int32_t len);
 295
 296 private:
 297
 298     void inc(void) { ++p; ch = 0xffff; }
 299     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
 300     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
 301     void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
 302     UBool inList(UChar c, const UChar* list) const {
 303         if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
 304         while (*list && *list != c) ++list; return *list == c;
 305     }
 306     void parseError(const char* msg);
 307
 308     StringLocalizationInfo* doParse(void);
 309
 310     UChar** nextArray(int32_t& requiredLength);
 311     UChar*  nextString(void);
 312 };
 313
 314 #ifdef DEBUG
 315 #define ERROR(msg) parseError(msg); return NULL;
 316 #else
 317 #define ERROR(msg) parseError(NULL); return NULL;
 318 #endif
 319
 320
 321 static const UChar DQUOTE_STOPLIST[] = {
 322     QUOTE, 0
 323 };
 324
 325 static const UChar SQUOTE_STOPLIST[] = {
 326     TICK, 0
 327 };
 328
 329 static const UChar NOQUOTE_STOPLIST[] = {
 330     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
 331 };
 332
 333 static void
 334 DeleteFn(void* p) {
 335   uprv_free(p);
 336 }
 337
 338 StringLocalizationInfo*
 339 LocDataParser::parse(UChar* _data, int32_t len) {
 340     if (U_FAILURE(ec)) {
 341         if (_data) uprv_free(_data);
 342         return NULL;
 343     }
 344
 345     pe.line = 0;
 346     pe.offset = -1;
 347     pe.postContext[0] = 0;
 348     pe.preContext[0] = 0;
 349
 350     if (_data == NULL) {
 351         ec = U_ILLEGAL_ARGUMENT_ERROR;
 352         return NULL;
 353     }
 354
 355     if (len <= 0) {
 356         ec = U_ILLEGAL_ARGUMENT_ERROR;
 357         uprv_free(_data);
 358         return NULL;
 359     }
 360
 361     data = _data;
 362     e = data + len;
 363     p = _data;
 364     ch = 0xffff;
 365
 366     return doParse();
 367 }
 368
 369
 370 StringLocalizationInfo*
 371 LocDataParser::doParse(void) {
 372     skipWhitespace();
 373     if (!checkInc(OPEN_ANGLE)) {
 374         ERROR("Missing open angle");
 375     } else {
 376         VArray array(DeleteFn);
 377         UBool mightHaveNext = TRUE;
 378         int32_t requiredLength = -1;
 379         while (mightHaveNext) {
 380             mightHaveNext = FALSE;
 381             UChar** elem = nextArray(requiredLength);
 382             skipWhitespace();
 383             UBool haveComma = check(COMMA);
 384             if (elem) {
 385                 array.add(elem, ec);
 386                 if (haveComma) {
 387                     inc();
 388                     mightHaveNext = TRUE;
 389                 }
 390             } else if (haveComma) {
 391                 ERROR("Unexpected character");
 392             }
 393         }
 394
 395         skipWhitespace();
 396         if (!checkInc(CLOSE_ANGLE)) {
 397             if (check(OPEN_ANGLE)) {
 398                 ERROR("Missing comma in outer array");
 399             } else {
 400                 ERROR("Missing close angle bracket in outer array");
 401             }
 402         }
 403
 404         skipWhitespace();
 405         if (p != e) {
 406             ERROR("Extra text after close of localization data");
 407         }
 408
 409         array.add(NULL, ec);
 410         if (U_SUCCESS(ec)) {
 411             int32_t numLocs = array.length() - 2; // subtract first, NULL
 412             UChar*** result = (UChar***)array.release();
 413
 414             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
 415         }
 416     }
 417
 418     ERROR("Unknown error");
 419 }
 420
 421 UChar**
 422 LocDataParser::nextArray(int32_t& requiredLength) {
 423     if (U_FAILURE(ec)) {
 424         return NULL;
 425     }
 426
 427     skipWhitespace();
 428     if (!checkInc(OPEN_ANGLE)) {
 429         ERROR("Missing open angle");
 430     }
 431
 432     VArray array;
 433     UBool mightHaveNext = TRUE;
 434     while (mightHaveNext) {
 435         mightHaveNext = FALSE;
 436         UChar* elem = nextString();
 437         skipWhitespace();
 438         UBool haveComma = check(COMMA);
 439         if (elem) {
 440             array.add(elem, ec);
 441             if (haveComma) {
 442                 inc();
 443                 mightHaveNext = TRUE;
 444             }
 445         } else if (haveComma) {
 446             ERROR("Unexpected comma");
 447         }
 448     }
 449     skipWhitespace();
 450     if (!checkInc(CLOSE_ANGLE)) {
 451         if (check(OPEN_ANGLE)) {
 452             ERROR("Missing close angle bracket in inner array");
 453         } else {
 454             ERROR("Missing comma in inner array");
 455         }
 456     }
 457
 458     array.add(NULL, ec);
 459     if (U_SUCCESS(ec)) {
 460         if (requiredLength == -1) {
 461             requiredLength = array.length() + 1;
 462         } else if (array.length() != requiredLength) {
 463             ec = U_ILLEGAL_ARGUMENT_ERROR;
 464             ERROR("Array not of required length");
 465         }
 466
 467         return (UChar**)array.release();
 468     }
 469     ERROR("Unknown Error");
 470 }
 471
 472 UChar*
 473 LocDataParser::nextString() {
 474     UChar* result = NULL;
 475
 476     skipWhitespace();
 477     if (p < e) {
 478         const UChar* terminators;
 479         UChar c = *p;
 480         UBool haveQuote = c == QUOTE || c == TICK;
 481         if (haveQuote) {
 482             inc();
 483             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
 484         } else {
 485             terminators = NOQUOTE_STOPLIST;
 486         }
 487         UChar* start = p;
 488         while (p < e && !inList(*p, terminators)) ++p;
 489         if (p == e) {
 490             ERROR("Unexpected end of data");
 491         }
 492
 493         UChar x = *p;
 494         if (p > start) {
 495             ch = x;
 496             *p = 0x0; // terminate by writing to data
 497             result = start; // just point into data
 498         }
 499         if (haveQuote) {
 500             if (x != c) {
 501                 ERROR("Missing matching quote");
 502             } else if (p == start) {
 503                 ERROR("Empty string");
 504             }
 505             inc();
 506         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
 507             ERROR("Unexpected character in string");
 508         }
 509     }
 510
 511     // ok for there to be no next string
 512     return result;
 513 }
 514
 515 void
 516 LocDataParser::parseError(const char* /*str*/) {
 517     if (!data) {
 518         return;
 519     }
 520
 521     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
 522     if (start < data)
 523         start = data;
 524     for (UChar* x = p; --x >= start;)
 525         if (!*x) {
 526             start = x+1;
 527             break;
 528         }
 529     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
 530     if (limit > e)
 531         limit = e;
 532     u_strncpy(pe.preContext, start, p-start);
 533     pe.preContext[p-start] = 0;
 534     u_strncpy(pe.postContext, p, limit-p);
 535     pe.postContext[limit-p] = 0;
 536     pe.offset = p - data;
 537
 538 #ifdef DEBUG
 539     fprintf(stderr, "%s at or near character %d: ", str, p-data);
 540
 541     UnicodeString msg;
 542     msg.append(start, p - start);
 543     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
 544     msg.append(p, limit-p);
 545     msg.append("'");
 546
 547     char buf[128];
 548     int32_t len = msg.extract(0, msg.length(), buf, 128);
 549     if (len >= 128) {
 550         buf[127] = 0;
 551     } else {
 552         buf[len] = 0;
 553     }
 554     fprintf(stderr, "%s\n", buf);
 555     fflush(stderr);
 556 #endif
 557
 558     uprv_free(data);
 559     data = NULL;
 560     p = NULL;
 561     e = NULL;
 562
 563     if (U_SUCCESS(ec)) {
 564         ec = U_PARSE_ERROR;
 565     }
 566 }
 567
 568 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
 569
 570 StringLocalizationInfo*
 571 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
 572     if (U_FAILURE(status)) {
 573         return NULL;
 574     }
 575
 576     int32_t len = info.length();
 577     if (len == 0) {
 578         return NULL; // no error;
 579     }
 580
 581     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
 582     if (!p) {
 583         status = U_MEMORY_ALLOCATION_ERROR;
 584         return NULL;
 585     }
 586     info.extract(p, len, status);
 587     if (!U_FAILURE(status)) {
 588         status = U_ZERO_ERROR; // clear warning about non-termination
 589     }
 590
 591     LocDataParser parser(perror, status);
 592     return parser.parse(p, len);
 593 }
 594
 595 StringLocalizationInfo::~StringLocalizationInfo() {
 596     for (UChar*** p = (UChar***)data; *p; ++p) {
 597         // remaining data is simply pointer into our unicode string data.
 598         if (*p) uprv_free(*p);
 599     }
 600     if (data) uprv_free(data);
 601     if (info) uprv_free(info);
 602 }
 603
 604
 605 const UChar*
 606 StringLocalizationInfo::getRuleSetName(int32_t index) const {
 607     if (index >= 0 && index < getNumberOfRuleSets()) {
 608         return data[0][index];
 609     }
 610     return NULL;
 611 }
 612
 613 const UChar*
 614 StringLocalizationInfo::getLocaleName(int32_t index) const {
 615     if (index >= 0 && index < getNumberOfDisplayLocales()) {
 616         return data[index+1][0];
 617     }
 618     return NULL;
 619 }
 620
 621 const UChar*
 622 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
 623     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
 624         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
 625         return data[localeIndex+1][ruleIndex+1];
 626     }
 627     return NULL;
 628 }
 629
 630 // ----------
 631
 632 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 633                                              const UnicodeString& locs,
 634                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 635   : ruleSets(NULL)
 636   , defaultRuleSet(NULL)
 637   , locale(alocale)
 638   , collator(NULL)
 639   , decimalFormatSymbols(NULL)
 640   , lenient(FALSE)
 641   , lenientParseRules(NULL)
 642   , localizations(NULL)
 643 {
 644   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 645   init(description, locinfo, perror, status);
 646 }
 647
 648 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 649                                              const UnicodeString& locs,
 650                                              UParseError& perror, UErrorCode& status)
 651   : ruleSets(NULL)
 652   , defaultRuleSet(NULL)
 653   , locale(Locale::getDefault())
 654   , collator(NULL)
 655   , decimalFormatSymbols(NULL)
 656   , lenient(FALSE)
 657   , lenientParseRules(NULL)
 658   , localizations(NULL)
 659 {
 660   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 661   init(description, locinfo, perror, status);
 662 }
 663
 664 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 665                                              LocalizationInfo* info,
 666                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 667   : ruleSets(NULL)
 668   , defaultRuleSet(NULL)
 669   , locale(alocale)
 670   , collator(NULL)
 671   , decimalFormatSymbols(NULL)
 672   , lenient(FALSE)
 673   , lenientParseRules(NULL)
 674   , localizations(NULL)
 675 {
 676   init(description, info, perror, status);
 677 }
 678
 679 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 680                          UParseError& perror,
 681                          UErrorCode& status)
 682   : ruleSets(NULL)
 683   , defaultRuleSet(NULL)
 684   , locale(Locale::getDefault())
 685   , collator(NULL)
 686   , decimalFormatSymbols(NULL)
 687   , lenient(FALSE)
 688   , lenientParseRules(NULL)
 689   , localizations(NULL)
 690 {
 691     init(description, NULL, perror, status);
 692 }
 693
 694 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 695                          const Locale& aLocale,
 696                          UParseError& perror,
 697                          UErrorCode& status)
 698   : ruleSets(NULL)
 699   , defaultRuleSet(NULL)
 700   , locale(aLocale)
 701   , collator(NULL)
 702   , decimalFormatSymbols(NULL)
 703   , lenient(FALSE)
 704   , lenientParseRules(NULL)
 705   , localizations(NULL)
 706 {
 707     init(description, NULL, perror, status);
 708 }
 709
 710 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
 711   : ruleSets(NULL)
 712   , defaultRuleSet(NULL)
 713   , locale(alocale)
 714   , collator(NULL)
 715   , decimalFormatSymbols(NULL)
 716   , lenient(FALSE)
 717   , lenientParseRules(NULL)
 718   , localizations(NULL)
 719 {
 720     if (U_FAILURE(status)) {
 721         return;
 722     }
 723
 724     const char* fmt_tag = "";
 725     switch (tag) {
 726     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
 727     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
 728     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
 729     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
 730     }
 731
 732     // TODO: read localization info from resource
 733     LocalizationInfo* locinfo = NULL;
 734
 735     int32_t len = 0;
 736     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
 737     if (U_SUCCESS(status)) {
 738         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
 739                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
 740         const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status);
 741         UnicodeString desc(description, len);
 742         UParseError perror;
 743         init (desc, locinfo, perror, status);
 744     }
 745     ures_close(nfrb);
 746 }
 747
 748 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
 749   : NumberFormat(rhs)
 750   , ruleSets(NULL)
 751   , defaultRuleSet(NULL)
 752   , locale(rhs.locale)
 753   , collator(NULL)
 754   , decimalFormatSymbols(NULL)
 755   , lenient(FALSE)
 756   , lenientParseRules(NULL)
 757   , localizations(NULL)
 758 {
 759     this->operator=(rhs);
 760 }
 761
 762 // --------
 763
 764 RuleBasedNumberFormat&
 765 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
 766 {
 767     UErrorCode status = U_ZERO_ERROR;
 768     dispose();
 769     locale = rhs.locale;
 770     lenient = rhs.lenient;
 771
 772     UnicodeString rules = rhs.getRules();
 773     UParseError perror;
 774     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
 775
 776     return *this;
 777 }
 778
 779 RuleBasedNumberFormat::~RuleBasedNumberFormat()
 780 {
 781     dispose();
 782 }
 783
 784 Format*
 785 RuleBasedNumberFormat::clone(void) const
 786 {
 787     RuleBasedNumberFormat * result = NULL;
 788     UnicodeString rules = getRules();
 789     UErrorCode status = U_ZERO_ERROR;
 790     UParseError perror;
 791     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
 792     /* test for NULL */
 793     if (result == 0) {
 794         status = U_MEMORY_ALLOCATION_ERROR;
 795         return 0;
 796     }
 797     if (U_FAILURE(status)) {
 798         delete result;
 799         result = 0;
 800     } else {
 801         result->lenient = lenient;
 802     }
 803     return result;
 804 }
 805
 806 UBool
 807 RuleBasedNumberFormat::operator==(const Format& other) const
 808 {
 809     if (this == &other) {
 810         return TRUE;
 811     }
 812
 813     if (other.getDynamicClassID() == getStaticClassID()) {
 814         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
 815         if (locale == rhs.locale &&
 816             lenient == rhs.lenient &&
 817             (localizations == NULL
 818                 ? rhs.localizations == NULL
 819                 : (rhs.localizations == NULL
 820                     ? FALSE
 821                     : *localizations == rhs.localizations))) {
 822
 823             NFRuleSet** p = ruleSets;
 824             NFRuleSet** q = rhs.ruleSets;
 825             if (p == NULL) {
 826                 return q == NULL;
 827             } else if (q == NULL) {
 828                 return FALSE;
 829             }
 830             while (*p && *q && (**p == **q)) {
 831                 ++p;
 832                 ++q;
 833             }
 834             return *q == NULL && *p == NULL;
 835         }
 836     }
 837
 838     return FALSE;
 839 }
 840
 841 UnicodeString
 842 RuleBasedNumberFormat::getRules() const
 843 {
 844     UnicodeString result;
 845     if (ruleSets != NULL) {
 846         for (NFRuleSet** p = ruleSets; *p; ++p) {
 847             (*p)->appendRules(result);
 848         }
 849     }
 850     return result;
 851 }
 852
 853 UnicodeString
 854 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
 855 {
 856     if (localizations) {
 857       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
 858       return string;
 859     } else if (ruleSets) {
 860         UnicodeString result;
 861         for (NFRuleSet** p = ruleSets; *p; ++p) {
 862             NFRuleSet* rs = *p;
 863             if (rs->isPublic()) {
 864                 if (--index == -1) {
 865                     rs->getName(result);
 866                     return result;
 867                 }
 868             }
 869         }
 870     }
 871     UnicodeString empty;
 872     return empty;
 873 }
 874
 875 int32_t
 876 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
 877 {
 878     int32_t result = 0;
 879     if (localizations) {
 880       result = localizations->getNumberOfRuleSets();
 881     } else if (ruleSets) {
 882         for (NFRuleSet** p = ruleSets; *p; ++p) {
 883             if ((**p).isPublic()) {
 884                 ++result;
 885             }
 886         }
 887     }
 888     return result;
 889 }
 890
 891 int32_t
 892 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
 893     if (localizations) {
 894         return localizations->getNumberOfDisplayLocales();
 895     }
 896     return 0;
 897 }
 898
 899 Locale
 900 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
 901     if (U_FAILURE(status)) {
 902         return Locale();
 903     }
 904     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
 905         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
 906         char buffer[64];
 907         int32_t cap = name.length() + 1;
 908         char* bp = buffer;
 909         if (cap > 64) {
 910             bp = (char *)uprv_malloc(cap);
 911             if (bp == NULL) {
 912                 status = U_MEMORY_ALLOCATION_ERROR;
 913                 return Locale();
 914             }
 915         }
 916         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
 917         Locale retLocale(bp);
 918         if (bp != buffer) {
 919             uprv_free(bp);
 920         }
 921         return retLocale;
 922     }
 923     status = U_ILLEGAL_ARGUMENT_ERROR;
 924     Locale retLocale;
 925     return retLocale;
 926 }
 927
 928 UnicodeString
 929 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
 930     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
 931         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
 932         int32_t len = localeName.length();
 933         UChar* localeStr = localeName.getBuffer(len + 1);
 934         while (len >= 0) {
 935             localeStr[len] = 0;
 936             int32_t ix = localizations->indexForLocale(localeStr);
 937             if (ix >= 0) {
 938                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
 939                 return name;
 940             }
 941
 942             // trim trailing portion, skipping over ommitted sections
 943             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
 944             while (len > 0 && localeStr[len-1] == 0x005F) --len;
 945         }
 946         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
 947         return name;
 948     }
 949     UnicodeString bogus;
 950     bogus.setToBogus();
 951     return bogus;
 952 }
 953
 954 UnicodeString
 955 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
 956     if (localizations) {
 957         UnicodeString rsn(ruleSetName);
 958         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
 959         return getRuleSetDisplayName(ix, localeParam);
 960     }
 961     UnicodeString bogus;
 962     bogus.setToBogus();
 963     return bogus;
 964 }
 965
 966 NFRuleSet*
 967 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
 968 {
 969     if (U_SUCCESS(status) && ruleSets) {
 970         for (NFRuleSet** p = ruleSets; *p; ++p) {
 971             NFRuleSet* rs = *p;
 972             if (rs->isNamed(name)) {
 973                 return rs;
 974             }
 975         }
 976         status = U_ILLEGAL_ARGUMENT_ERROR;
 977     }
 978     return NULL;
 979 }
 980
 981 UnicodeString&
 982 RuleBasedNumberFormat::format(int32_t number,
 983                               UnicodeString& toAppendTo,
 984                               FieldPosition& /* pos */) const
 985 {
 986     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
 987     return toAppendTo;
 988 }
 989
 990
 991 UnicodeString&
 992 RuleBasedNumberFormat::format(int64_t number,
 993                               UnicodeString& toAppendTo,
 994                               FieldPosition& /* pos */) const
 995 {
 996     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
 997     return toAppendTo;
 998 }
 999
1000
1001 UnicodeString&
1002 RuleBasedNumberFormat::format(double number,
1003                               UnicodeString& toAppendTo,
1004                               FieldPosition& /* pos */) const
1005 {
1006     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1007     return toAppendTo;
1008 }
1009
1010
1011 UnicodeString&
1012 RuleBasedNumberFormat::format(int32_t number,
1013                               const UnicodeString& ruleSetName,
1014                               UnicodeString& toAppendTo,
1015                               FieldPosition& /* pos */,
1016                               UErrorCode& status) const
1017 {
1018     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1019     if (U_SUCCESS(status)) {
1020         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1021             // throw new IllegalArgumentException("Can't use internal rule set");
1022             status = U_ILLEGAL_ARGUMENT_ERROR;
1023         } else {
1024             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1025             if (rs) {
1026                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1027             }
1028         }
1029     }
1030     return toAppendTo;
1031 }
1032
1033
1034 UnicodeString&
1035 RuleBasedNumberFormat::format(int64_t number,
1036                               const UnicodeString& ruleSetName,
1037                               UnicodeString& toAppendTo,
1038                               FieldPosition& /* pos */,
1039                               UErrorCode& status) const
1040 {
1041     if (U_SUCCESS(status)) {
1042         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1043             // throw new IllegalArgumentException("Can't use internal rule set");
1044             status = U_ILLEGAL_ARGUMENT_ERROR;
1045         } else {
1046             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1047             if (rs) {
1048                 rs->format(number, toAppendTo, toAppendTo.length());
1049             }
1050         }
1051     }
1052     return toAppendTo;
1053 }
1054
1055
1056 // make linker happy
1057 UnicodeString&
1058 RuleBasedNumberFormat::format(const Formattable& obj,
1059                               UnicodeString& toAppendTo,
1060                               FieldPosition& pos,
1061                               UErrorCode& status) const
1062 {
1063     return NumberFormat::format(obj, toAppendTo, pos, status);
1064 }
1065
1066 UnicodeString&
1067 RuleBasedNumberFormat::format(double number,
1068                               const UnicodeString& ruleSetName,
1069                               UnicodeString& toAppendTo,
1070                               FieldPosition& /* pos */,
1071                               UErrorCode& status) const
1072 {
1073     if (U_SUCCESS(status)) {
1074         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1075             // throw new IllegalArgumentException("Can't use internal rule set");
1076             status = U_ILLEGAL_ARGUMENT_ERROR;
1077         } else {
1078             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1079             if (rs) {
1080                 rs->format(number, toAppendTo, toAppendTo.length());
1081             }
1082         }
1083     }
1084     return toAppendTo;
1085 }
1086
1087 void
1088 RuleBasedNumberFormat::parse(const UnicodeString& text,
1089                              Formattable& result,
1090                              ParsePosition& parsePosition) const
1091 {
1092     if (!ruleSets) {
1093         parsePosition.setErrorIndex(0);
1094         return;
1095     }
1096
1097     UnicodeString workingText(text, parsePosition.getIndex());
1098     ParsePosition workingPos(0);
1099
1100     ParsePosition high_pp(0);
1101     Formattable high_result;
1102
1103     for (NFRuleSet** p = ruleSets; *p; ++p) {
1104         NFRuleSet *rp = *p;
1105         if (rp->isPublic()) {
1106             ParsePosition working_pp(0);
1107             Formattable working_result;
1108
1109             rp->parse(workingText, working_pp, kMaxDouble, working_result);
1110             if (working_pp.getIndex() > high_pp.getIndex()) {
1111                 high_pp = working_pp;
1112                 high_result = working_result;
1113
1114                 if (high_pp.getIndex() == workingText.length()) {
1115                     break;
1116                 }
1117             }
1118         }
1119     }
1120
1121     parsePosition.setIndex(parsePosition.getIndex() + high_pp.getIndex());
1122     if (high_pp.getIndex() > 0) {
1123         parsePosition.setErrorIndex(-1);
1124     }
1125     result = high_result;
1126     if (result.getType() == Formattable::kDouble) {
1127         int32_t r = (int32_t)result.getDouble();
1128         if ((double)r == result.getDouble()) {
1129             result.setLong(r);
1130         }
1131     }
1132 }
1133
1134 #if !UCONFIG_NO_COLLATION
1135
1136 void
1137 RuleBasedNumberFormat::setLenient(UBool enabled)
1138 {
1139     lenient = enabled;
1140     if (!enabled && collator) {
1141         delete collator;
1142         collator = NULL;
1143     }
1144 }
1145
1146 #endif
1147
1148 void
1149 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1150     if (U_SUCCESS(status)) {
1151         if (ruleSetName.isEmpty()) {
1152           if (localizations) {
1153               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1154               defaultRuleSet = findRuleSet(name, status);
1155           } else {
1156             initDefaultRuleSet();
1157           }
1158         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1159             status = U_ILLEGAL_ARGUMENT_ERROR;
1160         } else {
1161             NFRuleSet* result = findRuleSet(ruleSetName, status);
1162             if (result != NULL) {
1163                 defaultRuleSet = result;
1164             }
1165         }
1166     }
1167 }
1168
1169 UnicodeString
1170 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1171   UnicodeString result;
1172   if (defaultRuleSet && defaultRuleSet->isPublic()) {
1173     defaultRuleSet->getName(result);
1174   } else {
1175     result.setToBogus();
1176   }
1177   return result;
1178 }
1179
1180 void
1181 RuleBasedNumberFormat::initDefaultRuleSet()
1182 {
1183     defaultRuleSet = NULL;
1184     if (!ruleSets) {
1185       return;
1186     }
1187
1188     NFRuleSet**p = &ruleSets[0];
1189     while (*p) {
1190         ++p;
1191     }
1192
1193     defaultRuleSet = *--p;
1194     if (!defaultRuleSet->isPublic()) {
1195         while (p != ruleSets) {
1196             if ((*--p)->isPublic()) {
1197                 defaultRuleSet = *p;
1198                 break;
1199             }
1200         }
1201     }
1202 }
1203
1204
1205 void
1206 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1207                             UParseError& /* pErr */, UErrorCode& status)
1208 {
1209     // TODO: implement UParseError
1210     // Note: this can leave ruleSets == NULL, so remaining code should check
1211     if (U_FAILURE(status)) {
1212         return;
1213     }
1214
1215     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1216
1217     UnicodeString description(rules);
1218     if (!description.length()) {
1219         status = U_MEMORY_ALLOCATION_ERROR;
1220         return;
1221     }
1222
1223     // start by stripping the trailing whitespace from all the rules
1224     // (this is all the whitespace follwing each semicolon in the
1225     // description).  This allows us to look for rule-set boundaries
1226     // by searching for ";%" without having to worry about whitespace
1227     // between the ; and the %
1228     stripWhitespace(description);
1229
1230     // check to see if there's a set of lenient-parse rules.  If there
1231     // is, pull them out into our temporary holding place for them,
1232     // and delete them from the description before the real desciption-
1233     // parsing code sees them
1234     int32_t lp = description.indexOf(gLenientParse);
1235     if (lp != -1) {
1236         // we've got to make sure we're not in the middle of a rule
1237         // (where "%%lenient-parse" would actually get treated as
1238         // rule text)
1239         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1240             // locate the beginning and end of the actual collation
1241             // rules (there may be whitespace between the name and
1242             // the first token in the description)
1243             int lpEnd = description.indexOf(gSemiPercent, lp);
1244
1245             if (lpEnd == -1) {
1246                 lpEnd = description.length() - 1;
1247             }
1248             int lpStart = lp + u_strlen(gLenientParse);
1249             while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
1250                 ++lpStart;
1251             }
1252
1253             // copy out the lenient-parse rules and delete them
1254             // from the description
1255             lenientParseRules = new UnicodeString();
1256             /* test for NULL */
1257             if (lenientParseRules == 0) {
1258                 status = U_MEMORY_ALLOCATION_ERROR;
1259                 return;
1260             }
1261             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1262
1263             description.remove(lp, lpEnd + 1 - lp);
1264         }
1265     }
1266
1267     // pre-flight parsing the description and count the number of
1268     // rule sets (";%" marks the end of one rule set and the beginning
1269     // of the next)
1270     int numRuleSets = 0;
1271     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1272         ++numRuleSets;
1273         ++p;
1274     }
1275     ++numRuleSets;
1276
1277     // our rule list is an array of the appropriate size
1278     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1279     /* test for NULL */
1280     if (ruleSets == 0) {
1281         status = U_MEMORY_ALLOCATION_ERROR;
1282         return;
1283     }
1284
1285     for (int i = 0; i <= numRuleSets; ++i) {
1286         ruleSets[i] = NULL;
1287     }
1288
1289     // divide up the descriptions into individual rule-set descriptions
1290     // and store them in a temporary array.  At each step, we also
1291     // new up a rule set, but all this does is initialize its name
1292     // and remove it from its description.  We can't actually parse
1293     // the rest of the descriptions and finish initializing everything
1294     // because we have to know the names and locations of all the rule
1295     // sets before we can actually set everything up
1296     if(!numRuleSets) {
1297       status = U_ILLEGAL_ARGUMENT_ERROR;
1298       return;
1299     }
1300     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1301     /* test for NULL */
1302     if (ruleSetDescriptions == 0) {
1303         status = U_MEMORY_ALLOCATION_ERROR;
1304         return;
1305     }
1306
1307     {
1308         int curRuleSet = 0;
1309         int32_t start = 0;
1310         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1311             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1312             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1313             /* test for NULL */
1314             if (ruleSets[curRuleSet] == 0) {
1315                 status = U_MEMORY_ALLOCATION_ERROR;
1316                 return;
1317             }
1318             ++curRuleSet;
1319             start = p + 1;
1320         }
1321         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1322         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1323         /* test for NULL */
1324         if (ruleSets[curRuleSet] == 0) {
1325             status = U_MEMORY_ALLOCATION_ERROR;
1326             return;
1327         }
1328     }
1329
1330     // now we can take note of the formatter's default rule set, which
1331     // is the last public rule set in the description (it's the last
1332     // rather than the first so that a user can create a new formatter
1333     // from an existing formatter and change its default behavior just
1334     // by appending more rule sets to the end)
1335
1336     // {dlf} Initialization of a fraction rule set requires the default rule
1337     // set to be known.  For purposes of initialization, this is always the
1338     // last public rule set, no matter what the localization data says.
1339     initDefaultRuleSet();
1340
1341     // finally, we can go back through the temporary descriptions
1342     // list and finish seting up the substructure (and we throw
1343     // away the temporary descriptions as we go)
1344     {
1345         for (int i = 0; i < numRuleSets; i++) {
1346             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1347         }
1348     }
1349
1350     delete[] ruleSetDescriptions;
1351
1352     // Now that the rules are initialized, the 'real' default rule
1353     // set can be adjusted by the localization data.
1354
1355     // The C code keeps the localization array as is, rather than building
1356     // a separate array of the public rule set names, so we have less work
1357     // to do here-- but we still need to check the names.
1358
1359     if (localizationInfos) {
1360         // confirm the names, if any aren't in the rules, that's an error
1361         // it is ok if the rules contain public rule sets that are not in this list
1362         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1363             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1364             NFRuleSet* rs = findRuleSet(name, status);
1365             if (rs == NULL) {
1366                 break; // error
1367             }
1368             if (i == 0) {
1369                 defaultRuleSet = rs;
1370             }
1371         }
1372     } else {
1373         defaultRuleSet = getDefaultRuleSet();
1374     }
1375 }
1376
1377 void
1378 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1379 {
1380     // iterate through the characters...
1381     UnicodeString result;
1382
1383     int start = 0;
1384     while (start != -1 && start < description.length()) {
1385         // seek to the first non-whitespace character...
1386         while (start < description.length()
1387             && uprv_isRuleWhiteSpace(description.charAt(start))) {
1388             ++start;
1389         }
1390
1391         // locate the next semicolon in the text and copy the text from
1392         // our current position up to that semicolon into the result
1393         int32_t p = description.indexOf(gSemiColon, start);
1394         if (p == -1) {
1395             // or if we don't find a semicolon, just copy the rest of
1396             // the string into the result
1397             result.append(description, start, description.length() - start);
1398             start = -1;
1399         }
1400         else if (p < description.length()) {
1401             result.append(description, start, p + 1 - start);
1402             start = p + 1;
1403         }
1404
1405         // when we get here, we've seeked off the end of the sring, and
1406         // we terminate the loop (we continue until *start* is -1 rather
1407         // than until *p* is -1, because otherwise we'd miss the last
1408         // rule in the description)
1409         else {
1410             start = -1;
1411         }
1412     }
1413
1414     description.setTo(result);
1415 }
1416
1417
1418 void
1419 RuleBasedNumberFormat::dispose()
1420 {
1421     if (ruleSets) {
1422         for (NFRuleSet** p = ruleSets; *p; ++p) {
1423             delete *p;
1424         }
1425         uprv_free(ruleSets);
1426         ruleSets = NULL;
1427     }
1428
1429 #if !UCONFIG_NO_COLLATION
1430     delete collator;
1431 #endif
1432     collator = NULL;
1433
1434     delete decimalFormatSymbols;
1435     decimalFormatSymbols = NULL;
1436
1437     delete lenientParseRules;
1438     lenientParseRules = NULL;
1439
1440     if (localizations) localizations = localizations->unref();
1441 }
1442
1443
1444 //-----------------------------------------------------------------------
1445 // package-internal API
1446 //-----------------------------------------------------------------------
1447
1448 /**
1449  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1450  * this function creates it the first time it's called.
1451  * @return The collator to use for lenient parsing, or null if lenient parsing
1452  * is turned off.
1453 */
1454 Collator*
1455 RuleBasedNumberFormat::getCollator() const
1456 {
1457 #if !UCONFIG_NO_COLLATION
1458     if (!ruleSets) {
1459         return NULL;
1460     }
1461
1462     // lazy-evaulate the collator
1463     if (collator == NULL && lenient) {
1464         // create a default collator based on the formatter's locale,
1465         // then pull out that collator's rules, append any additional
1466         // rules specified in the description, and create a _new_
1467         // collator based on the combinaiton of those rules
1468
1469         UErrorCode status = U_ZERO_ERROR;
1470
1471         Collator* temp = Collator::createInstance(locale, status);
1472         if (U_SUCCESS(status) &&
1473             temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
1474
1475             RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
1476             if (lenientParseRules) {
1477                 UnicodeString rules(newCollator->getRules());
1478                 rules.append(*lenientParseRules);
1479
1480                 newCollator = new RuleBasedCollator(rules, status);
1481             } else {
1482                 temp = NULL;
1483             }
1484             if (U_SUCCESS(status)) {
1485                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1486                 // cast away const
1487                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1488             } else {
1489                 delete newCollator;
1490             }
1491         }
1492         delete temp;
1493     }
1494 #endif
1495
1496     // if lenient-parse mode is off, this will be null
1497     // (see setLenientParseMode())
1498     return collator;
1499 }
1500
1501
1502 /**
1503  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1504  * instances owned by this formatter.  This object is lazily created: this function
1505  * creates it the first time it's called.
1506  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1507  * instances owned by this formatter.
1508 */
1509 DecimalFormatSymbols*
1510 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1511 {
1512     // lazy-evaluate the DecimalFormatSymbols object.  This object
1513     // is shared by all DecimalFormat instances belonging to this
1514     // formatter
1515     if (decimalFormatSymbols == NULL) {
1516         UErrorCode status = U_ZERO_ERROR;
1517         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1518         if (U_SUCCESS(status)) {
1519             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1520         } else {
1521             delete temp;
1522         }
1523     }
1524     return decimalFormatSymbols;
1525 }
1526
1527 U_NAMESPACE_END
1528
1529 /* U_HAVE_RBNF */
1530 #endif