icuSources/i18n/rbnf.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 1997-2006, International Business Machines Corporation
   4 * and others. All Rights Reserved.
   5 *******************************************************************************
   6 */
   7
   8 #include "unicode/rbnf.h"
   9
  10 #if U_HAVE_RBNF
  11
  12 #include "unicode/normlzr.h"
  13 #include "unicode/tblcoll.h"
  14 #include "unicode/uchar.h"
  15 #include "unicode/ucol.h"
  16 #include "unicode/uloc.h"
  17 #include "unicode/unum.h"
  18 #include "unicode/ures.h"
  19 #include "unicode/ustring.h"
  20 #include "unicode/utf16.h"
  21 #include "unicode/udata.h"
  22 #include "nfrs.h"
  23
  24 #include "cmemory.h"
  25 #include "cstring.h"
  26 #include "util.h"
  27
  28 // debugging
  29 // #define DEBUG
  30
  31 #ifdef DEBUG
  32 #include "stdio.h"
  33 #endif
  34
  35 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
  36
  37 static const UChar gPercentPercent[] =
  38 {
  39     0x25, 0x25, 0
  40 }; /* "%%" */
  41
  42 // All urbnf objects are created through openRules, so we init all of the
  43 // Unicode string constants required by rbnf, nfrs, or nfr here.
  44 static const UChar gLenientParse[] =
  45 {
  46     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
  47 }; /* "%%lenient-parse:" */
  48 static const UChar gSemiColon = 0x003B;
  49 static const UChar gSemiPercent[] =
  50 {
  51     0x3B, 0x25, 0
  52 }; /* ";%" */
  53
  54 #define kSomeNumberOfBitsDiv2 22
  55 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
  56 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
  57
  58 U_NAMESPACE_BEGIN
  59
  60 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
  61
  62 /*
  63 This is a utility class. It does not use ICU's RTTI.
  64 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
  65 Please make sure that intltest passes on Windows in Release mode,
  66 since the string pooling per compilation unit will mess up how RTTI works.
  67 The RTTI code was also removed due to lack of code coverage.
  68 */
  69 class LocalizationInfo : public UMemory {
  70 protected:
  71     virtual ~LocalizationInfo() {};
  72     uint32_t refcount;
  73
  74 public:
  75     LocalizationInfo() : refcount(0) {}
  76
  77     LocalizationInfo* ref(void) {
  78         ++refcount;
  79         return this;
  80     }
  81
  82     LocalizationInfo* unref(void) {
  83         if (refcount && --refcount == 0) {
  84             delete this;
  85         }
  86         return NULL;
  87     }
  88
  89     virtual UBool operator==(const LocalizationInfo* rhs) const;
  90     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
  91
  92     virtual int32_t getNumberOfRuleSets(void) const = 0;
  93     virtual const UChar* getRuleSetName(int32_t index) const = 0;
  94     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
  95     virtual const UChar* getLocaleName(int32_t index) const = 0;
  96     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
  97
  98     virtual int32_t indexForLocale(const UChar* locale) const;
  99     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
 100
 101 //    virtual UClassID getDynamicClassID() const = 0;
 102 //    static UClassID getStaticClassID(void);
 103 };
 104
 105 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
 106
 107 // if both strings are NULL, this returns TRUE
 108 static UBool
 109 streq(const UChar* lhs, const UChar* rhs) {
 110     if (rhs == lhs) {
 111         return TRUE;
 112     }
 113     if (lhs && rhs) {
 114         return u_strcmp(lhs, rhs) == 0;
 115     }
 116     return FALSE;
 117 }
 118
 119 UBool
 120 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
 121     if (rhs) {
 122         if (this == rhs) {
 123             return TRUE;
 124         }
 125
 126         int32_t rsc = getNumberOfRuleSets();
 127         if (rsc == rhs->getNumberOfRuleSets()) {
 128             for (int i = 0; i < rsc; ++i) {
 129                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
 130                     return FALSE;
 131                 }
 132             }
 133             int32_t dlc = getNumberOfDisplayLocales();
 134             if (dlc == rhs->getNumberOfDisplayLocales()) {
 135                 for (int i = 0; i < dlc; ++i) {
 136                     const UChar* locale = getLocaleName(i);
 137                     int32_t ix = rhs->indexForLocale(locale);
 138                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
 139                     if (!streq(locale, rhs->getLocaleName(ix))) {
 140                         return FALSE;
 141                     }
 142                     for (int j = 0; j < rsc; ++j) {
 143                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
 144                             return FALSE;
 145                         }
 146                     }
 147                 }
 148                 return TRUE;
 149             }
 150         }
 151     }
 152     return FALSE;
 153 }
 154
 155 int32_t
 156 LocalizationInfo::indexForLocale(const UChar* locale) const {
 157     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
 158         if (streq(locale, getLocaleName(i))) {
 159             return i;
 160         }
 161     }
 162     return -1;
 163 }
 164
 165 int32_t
 166 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
 167     if (ruleset) {
 168         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
 169             if (streq(ruleset, getRuleSetName(i))) {
 170                 return i;
 171             }
 172         }
 173     }
 174     return -1;
 175 }
 176
 177
 178 typedef void (*Fn_Deleter)(void*);
 179
 180 class VArray {
 181     void** buf;
 182     int32_t cap;
 183     int32_t size;
 184     Fn_Deleter deleter;
 185 public:
 186     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
 187
 188     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
 189
 190     ~VArray() {
 191         if (deleter) {
 192             for (int i = 0; i < size; ++i) {
 193                 (*deleter)(buf[i]);
 194             }
 195         }
 196         uprv_free(buf);
 197     }
 198
 199     int32_t length() {
 200         return size;
 201     }
 202
 203     void add(void* elem, UErrorCode& status) {
 204         if (U_SUCCESS(status)) {
 205             if (size == cap) {
 206                 if (cap == 0) {
 207                     cap = 1;
 208                 } else if (cap < 256) {
 209                     cap *= 2;
 210                 } else {
 211                     cap += 256;
 212                 }
 213                 if (buf == NULL) {
 214                     buf = (void**)uprv_malloc(cap * sizeof(void*));
 215                 } else {
 216                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
 217                 }
 218                 if (buf == NULL) {
 219                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
 220                     status = U_MEMORY_ALLOCATION_ERROR;
 221                     return;
 222                 }
 223                 void* start = &buf[size];
 224                 size_t count = (cap - size) * sizeof(void*);
 225                 uprv_memset(start, 0, count); // fill with nulls, just because
 226             }
 227             buf[size++] = elem;
 228         }
 229     }
 230
 231     void** release(void) {
 232         void** result = buf;
 233         buf = NULL;
 234         cap = 0;
 235         size = 0;
 236         return result;
 237     }
 238 };
 239
 240 class LocDataParser;
 241
 242 class StringLocalizationInfo : public LocalizationInfo {
 243     UChar* info;
 244     UChar*** data;
 245     int32_t numRuleSets;
 246     int32_t numLocales;
 247
 248 friend class LocDataParser;
 249
 250     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
 251         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
 252     {
 253     }
 254
 255 public:
 256     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
 257
 258     virtual ~StringLocalizationInfo();
 259     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
 260     virtual const UChar* getRuleSetName(int32_t index) const;
 261     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
 262     virtual const UChar* getLocaleName(int32_t index) const;
 263     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
 264
 265 //    virtual UClassID getDynamicClassID() const;
 266 //    static UClassID getStaticClassID(void);
 267
 268 private:
 269     void init(UErrorCode& status) const;
 270 };
 271
 272
 273 enum {
 274     OPEN_ANGLE = 0x003c, /* '<' */
 275     CLOSE_ANGLE = 0x003e, /* '>' */
 276     COMMA = 0x002c,
 277     TICK = 0x0027,
 278     QUOTE = 0x0022,
 279     SPACE = 0x0020
 280 };
 281
 282 /**
 283  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
 284  */
 285 class LocDataParser {
 286     UChar* data;
 287     const UChar* e;
 288     UChar* p;
 289     UChar ch;
 290     UParseError& pe;
 291     UErrorCode& ec;
 292
 293 public:
 294     LocDataParser(UParseError& parseError, UErrorCode& status)
 295         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
 296     ~LocDataParser() {}
 297
 298     /*
 299     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
 300     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
 301     */
 302     StringLocalizationInfo* parse(UChar* data, int32_t len);
 303
 304 private:
 305
 306     void inc(void) { ++p; ch = 0xffff; }
 307     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
 308     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
 309     void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
 310     UBool inList(UChar c, const UChar* list) const {
 311         if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
 312         while (*list && *list != c) ++list; return *list == c;
 313     }
 314     void parseError(const char* msg);
 315
 316     StringLocalizationInfo* doParse(void);
 317
 318     UChar** nextArray(int32_t& requiredLength);
 319     UChar*  nextString(void);
 320 };
 321
 322 #ifdef DEBUG
 323 #define ERROR(msg) parseError(msg); return NULL;
 324 #else
 325 #define ERROR(msg) parseError(NULL); return NULL;
 326 #endif
 327
 328
 329 static const UChar DQUOTE_STOPLIST[] = {
 330     QUOTE, 0
 331 };
 332
 333 static const UChar SQUOTE_STOPLIST[] = {
 334     TICK, 0
 335 };
 336
 337 static const UChar NOQUOTE_STOPLIST[] = {
 338     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
 339 };
 340
 341 static void
 342 DeleteFn(void* p) {
 343   uprv_free(p);
 344 }
 345
 346 StringLocalizationInfo*
 347 LocDataParser::parse(UChar* _data, int32_t len) {
 348     if (U_FAILURE(ec)) {
 349         if (_data) uprv_free(_data);
 350         return NULL;
 351     }
 352
 353     pe.line = 0;
 354     pe.offset = -1;
 355     pe.postContext[0] = 0;
 356     pe.preContext[0] = 0;
 357
 358     if (_data == NULL) {
 359         ec = U_ILLEGAL_ARGUMENT_ERROR;
 360         return NULL;
 361     }
 362
 363     if (len <= 0) {
 364         ec = U_ILLEGAL_ARGUMENT_ERROR;
 365         uprv_free(_data);
 366         return NULL;
 367     }
 368
 369     data = _data;
 370     e = data + len;
 371     p = _data;
 372     ch = 0xffff;
 373
 374     return doParse();
 375 }
 376
 377
 378 StringLocalizationInfo*
 379 LocDataParser::doParse(void) {
 380     skipWhitespace();
 381     if (!checkInc(OPEN_ANGLE)) {
 382         ERROR("Missing open angle");
 383     } else {
 384         VArray array(DeleteFn);
 385         UBool mightHaveNext = TRUE;
 386         int32_t requiredLength = -1;
 387         while (mightHaveNext) {
 388             mightHaveNext = FALSE;
 389             UChar** elem = nextArray(requiredLength);
 390             skipWhitespace();
 391             UBool haveComma = check(COMMA);
 392             if (elem) {
 393                 array.add(elem, ec);
 394                 if (haveComma) {
 395                     inc();
 396                     mightHaveNext = TRUE;
 397                 }
 398             } else if (haveComma) {
 399                 ERROR("Unexpected character");
 400             }
 401         }
 402
 403         skipWhitespace();
 404         if (!checkInc(CLOSE_ANGLE)) {
 405             if (check(OPEN_ANGLE)) {
 406                 ERROR("Missing comma in outer array");
 407             } else {
 408                 ERROR("Missing close angle bracket in outer array");
 409             }
 410         }
 411
 412         skipWhitespace();
 413         if (p != e) {
 414             ERROR("Extra text after close of localization data");
 415         }
 416
 417         array.add(NULL, ec);
 418         if (U_SUCCESS(ec)) {
 419             int32_t numLocs = array.length() - 2; // subtract first, NULL
 420             UChar*** result = (UChar***)array.release();
 421
 422             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
 423         }
 424     }
 425
 426     ERROR("Unknown error");
 427 }
 428
 429 UChar**
 430 LocDataParser::nextArray(int32_t& requiredLength) {
 431     if (U_FAILURE(ec)) {
 432         return NULL;
 433     }
 434
 435     skipWhitespace();
 436     if (!checkInc(OPEN_ANGLE)) {
 437         ERROR("Missing open angle");
 438     }
 439
 440     VArray array;
 441     UBool mightHaveNext = TRUE;
 442     while (mightHaveNext) {
 443         mightHaveNext = FALSE;
 444         UChar* elem = nextString();
 445         skipWhitespace();
 446         UBool haveComma = check(COMMA);
 447         if (elem) {
 448             array.add(elem, ec);
 449             if (haveComma) {
 450                 inc();
 451                 mightHaveNext = TRUE;
 452             }
 453         } else if (haveComma) {
 454             ERROR("Unexpected comma");
 455         }
 456     }
 457     skipWhitespace();
 458     if (!checkInc(CLOSE_ANGLE)) {
 459         if (check(OPEN_ANGLE)) {
 460             ERROR("Missing close angle bracket in inner array");
 461         } else {
 462             ERROR("Missing comma in inner array");
 463         }
 464     }
 465
 466     array.add(NULL, ec);
 467     if (U_SUCCESS(ec)) {
 468         if (requiredLength == -1) {
 469             requiredLength = array.length() + 1;
 470         } else if (array.length() != requiredLength) {
 471             ec = U_ILLEGAL_ARGUMENT_ERROR;
 472             ERROR("Array not of required length");
 473         }
 474
 475         return (UChar**)array.release();
 476     }
 477     ERROR("Unknown Error");
 478 }
 479
 480 UChar*
 481 LocDataParser::nextString() {
 482     UChar* result = NULL;
 483
 484     skipWhitespace();
 485     if (p < e) {
 486         const UChar* terminators;
 487         UChar c = *p;
 488         UBool haveQuote = c == QUOTE || c == TICK;
 489         if (haveQuote) {
 490             inc();
 491             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
 492         } else {
 493             terminators = NOQUOTE_STOPLIST;
 494         }
 495         UChar* start = p;
 496         while (p < e && !inList(*p, terminators)) ++p;
 497         if (p == e) {
 498             ERROR("Unexpected end of data");
 499         }
 500
 501         UChar x = *p;
 502         if (p > start) {
 503             ch = x;
 504             *p = 0x0; // terminate by writing to data
 505             result = start; // just point into data
 506         }
 507         if (haveQuote) {
 508             if (x != c) {
 509                 ERROR("Missing matching quote");
 510             } else if (p == start) {
 511                 ERROR("Empty string");
 512             }
 513             inc();
 514         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
 515             ERROR("Unexpected character in string");
 516         }
 517     }
 518
 519     // ok for there to be no next string
 520     return result;
 521 }
 522
 523 void
 524 LocDataParser::parseError(const char* /*str*/) {
 525     if (!data) {
 526         return;
 527     }
 528
 529     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
 530     if (start < data) {
 531         start = data;
 532     }
 533     for (UChar* x = p; --x >= start;) {
 534         if (!*x) {
 535             start = x+1;
 536             break;
 537         }
 538     }
 539     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
 540     if (limit > e) {
 541         limit = e;
 542     }
 543     u_strncpy(pe.preContext, start, (int32_t)(p-start));
 544     pe.preContext[p-start] = 0;
 545     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
 546     pe.postContext[limit-p] = 0;
 547     pe.offset = (int32_t)(p - data);
 548
 549 #ifdef DEBUG
 550     fprintf(stderr, "%s at or near character %d: ", str, p-data);
 551
 552     UnicodeString msg;
 553     msg.append(start, p - start);
 554     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
 555     msg.append(p, limit-p);
 556     msg.append("'");
 557
 558     char buf[128];
 559     int32_t len = msg.extract(0, msg.length(), buf, 128);
 560     if (len >= 128) {
 561         buf[127] = 0;
 562     } else {
 563         buf[len] = 0;
 564     }
 565     fprintf(stderr, "%s\n", buf);
 566     fflush(stderr);
 567 #endif
 568
 569     uprv_free(data);
 570     data = NULL;
 571     p = NULL;
 572     e = NULL;
 573
 574     if (U_SUCCESS(ec)) {
 575         ec = U_PARSE_ERROR;
 576     }
 577 }
 578
 579 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
 580
 581 StringLocalizationInfo*
 582 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
 583     if (U_FAILURE(status)) {
 584         return NULL;
 585     }
 586
 587     int32_t len = info.length();
 588     if (len == 0) {
 589         return NULL; // no error;
 590     }
 591
 592     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
 593     if (!p) {
 594         status = U_MEMORY_ALLOCATION_ERROR;
 595         return NULL;
 596     }
 597     info.extract(p, len, status);
 598     if (!U_FAILURE(status)) {
 599         status = U_ZERO_ERROR; // clear warning about non-termination
 600     }
 601
 602     LocDataParser parser(perror, status);
 603     return parser.parse(p, len);
 604 }
 605
 606 StringLocalizationInfo::~StringLocalizationInfo() {
 607     for (UChar*** p = (UChar***)data; *p; ++p) {
 608         // remaining data is simply pointer into our unicode string data.
 609         if (*p) uprv_free(*p);
 610     }
 611     if (data) uprv_free(data);
 612     if (info) uprv_free(info);
 613 }
 614
 615
 616 const UChar*
 617 StringLocalizationInfo::getRuleSetName(int32_t index) const {
 618     if (index >= 0 && index < getNumberOfRuleSets()) {
 619         return data[0][index];
 620     }
 621     return NULL;
 622 }
 623
 624 const UChar*
 625 StringLocalizationInfo::getLocaleName(int32_t index) const {
 626     if (index >= 0 && index < getNumberOfDisplayLocales()) {
 627         return data[index+1][0];
 628     }
 629     return NULL;
 630 }
 631
 632 const UChar*
 633 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
 634     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
 635         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
 636         return data[localeIndex+1][ruleIndex+1];
 637     }
 638     return NULL;
 639 }
 640
 641 // ----------
 642
 643 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 644                                              const UnicodeString& locs,
 645                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 646   : ruleSets(NULL)
 647   , defaultRuleSet(NULL)
 648   , locale(alocale)
 649   , collator(NULL)
 650   , decimalFormatSymbols(NULL)
 651   , lenient(FALSE)
 652   , lenientParseRules(NULL)
 653   , localizations(NULL)
 654 {
 655   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 656   init(description, locinfo, perror, status);
 657 }
 658
 659 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 660                                              const UnicodeString& locs,
 661                                              UParseError& perror, UErrorCode& status)
 662   : ruleSets(NULL)
 663   , defaultRuleSet(NULL)
 664   , locale(Locale::getDefault())
 665   , collator(NULL)
 666   , decimalFormatSymbols(NULL)
 667   , lenient(FALSE)
 668   , lenientParseRules(NULL)
 669   , localizations(NULL)
 670 {
 671   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 672   init(description, locinfo, perror, status);
 673 }
 674
 675 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 676                                              LocalizationInfo* info,
 677                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 678   : ruleSets(NULL)
 679   , defaultRuleSet(NULL)
 680   , locale(alocale)
 681   , collator(NULL)
 682   , decimalFormatSymbols(NULL)
 683   , lenient(FALSE)
 684   , lenientParseRules(NULL)
 685   , localizations(NULL)
 686 {
 687   init(description, info, perror, status);
 688 }
 689
 690 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 691                          UParseError& perror,
 692                          UErrorCode& status)
 693   : ruleSets(NULL)
 694   , defaultRuleSet(NULL)
 695   , locale(Locale::getDefault())
 696   , collator(NULL)
 697   , decimalFormatSymbols(NULL)
 698   , lenient(FALSE)
 699   , lenientParseRules(NULL)
 700   , localizations(NULL)
 701 {
 702     init(description, NULL, perror, status);
 703 }
 704
 705 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 706                          const Locale& aLocale,
 707                          UParseError& perror,
 708                          UErrorCode& status)
 709   : ruleSets(NULL)
 710   , defaultRuleSet(NULL)
 711   , locale(aLocale)
 712   , collator(NULL)
 713   , decimalFormatSymbols(NULL)
 714   , lenient(FALSE)
 715   , lenientParseRules(NULL)
 716   , localizations(NULL)
 717 {
 718     init(description, NULL, perror, status);
 719 }
 720
 721 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
 722   : ruleSets(NULL)
 723   , defaultRuleSet(NULL)
 724   , locale(alocale)
 725   , collator(NULL)
 726   , decimalFormatSymbols(NULL)
 727   , lenient(FALSE)
 728   , lenientParseRules(NULL)
 729   , localizations(NULL)
 730 {
 731     if (U_FAILURE(status)) {
 732         return;
 733     }
 734
 735     const char* fmt_tag = "";
 736     switch (tag) {
 737     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
 738     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
 739     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
 740     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
 741     }
 742
 743     // TODO: read localization info from resource
 744     LocalizationInfo* locinfo = NULL;
 745
 746     int32_t len = 0;
 747     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
 748     if (U_SUCCESS(status)) {
 749         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
 750                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
 751         const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status);
 752         UnicodeString desc(description, len);
 753         UParseError perror;
 754         init (desc, locinfo, perror, status);
 755     }
 756     ures_close(nfrb);
 757 }
 758
 759 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
 760   : NumberFormat(rhs)
 761   , ruleSets(NULL)
 762   , defaultRuleSet(NULL)
 763   , locale(rhs.locale)
 764   , collator(NULL)
 765   , decimalFormatSymbols(NULL)
 766   , lenient(FALSE)
 767   , lenientParseRules(NULL)
 768   , localizations(NULL)
 769 {
 770     this->operator=(rhs);
 771 }
 772
 773 // --------
 774
 775 RuleBasedNumberFormat&
 776 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
 777 {
 778     UErrorCode status = U_ZERO_ERROR;
 779     dispose();
 780     locale = rhs.locale;
 781     lenient = rhs.lenient;
 782
 783     UnicodeString rules = rhs.getRules();
 784     UParseError perror;
 785     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
 786
 787     return *this;
 788 }
 789
 790 RuleBasedNumberFormat::~RuleBasedNumberFormat()
 791 {
 792     dispose();
 793 }
 794
 795 Format*
 796 RuleBasedNumberFormat::clone(void) const
 797 {
 798     RuleBasedNumberFormat * result = NULL;
 799     UnicodeString rules = getRules();
 800     UErrorCode status = U_ZERO_ERROR;
 801     UParseError perror;
 802     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
 803     /* test for NULL */
 804     if (result == 0) {
 805         status = U_MEMORY_ALLOCATION_ERROR;
 806         return 0;
 807     }
 808     if (U_FAILURE(status)) {
 809         delete result;
 810         result = 0;
 811     } else {
 812         result->lenient = lenient;
 813     }
 814     return result;
 815 }
 816
 817 UBool
 818 RuleBasedNumberFormat::operator==(const Format& other) const
 819 {
 820     if (this == &other) {
 821         return TRUE;
 822     }
 823
 824     if (other.getDynamicClassID() == getStaticClassID()) {
 825         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
 826         if (locale == rhs.locale &&
 827             lenient == rhs.lenient &&
 828             (localizations == NULL
 829                 ? rhs.localizations == NULL
 830                 : (rhs.localizations == NULL
 831                     ? FALSE
 832                     : *localizations == rhs.localizations))) {
 833
 834             NFRuleSet** p = ruleSets;
 835             NFRuleSet** q = rhs.ruleSets;
 836             if (p == NULL) {
 837                 return q == NULL;
 838             } else if (q == NULL) {
 839                 return FALSE;
 840             }
 841             while (*p && *q && (**p == **q)) {
 842                 ++p;
 843                 ++q;
 844             }
 845             return *q == NULL && *p == NULL;
 846         }
 847     }
 848
 849     return FALSE;
 850 }
 851
 852 UnicodeString
 853 RuleBasedNumberFormat::getRules() const
 854 {
 855     UnicodeString result;
 856     if (ruleSets != NULL) {
 857         for (NFRuleSet** p = ruleSets; *p; ++p) {
 858             (*p)->appendRules(result);
 859         }
 860     }
 861     return result;
 862 }
 863
 864 UnicodeString
 865 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
 866 {
 867     if (localizations) {
 868       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
 869       return string;
 870     } else if (ruleSets) {
 871         UnicodeString result;
 872         for (NFRuleSet** p = ruleSets; *p; ++p) {
 873             NFRuleSet* rs = *p;
 874             if (rs->isPublic()) {
 875                 if (--index == -1) {
 876                     rs->getName(result);
 877                     return result;
 878                 }
 879             }
 880         }
 881     }
 882     UnicodeString empty;
 883     return empty;
 884 }
 885
 886 int32_t
 887 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
 888 {
 889     int32_t result = 0;
 890     if (localizations) {
 891       result = localizations->getNumberOfRuleSets();
 892     } else if (ruleSets) {
 893         for (NFRuleSet** p = ruleSets; *p; ++p) {
 894             if ((**p).isPublic()) {
 895                 ++result;
 896             }
 897         }
 898     }
 899     return result;
 900 }
 901
 902 int32_t
 903 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
 904     if (localizations) {
 905         return localizations->getNumberOfDisplayLocales();
 906     }
 907     return 0;
 908 }
 909
 910 Locale
 911 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
 912     if (U_FAILURE(status)) {
 913         return Locale("");
 914     }
 915     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
 916         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
 917         char buffer[64];
 918         int32_t cap = name.length() + 1;
 919         char* bp = buffer;
 920         if (cap > 64) {
 921             bp = (char *)uprv_malloc(cap);
 922             if (bp == NULL) {
 923                 status = U_MEMORY_ALLOCATION_ERROR;
 924                 return Locale("");
 925             }
 926         }
 927         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
 928         Locale retLocale(bp);
 929         if (bp != buffer) {
 930             uprv_free(bp);
 931         }
 932         return retLocale;
 933     }
 934     status = U_ILLEGAL_ARGUMENT_ERROR;
 935     Locale retLocale;
 936     return retLocale;
 937 }
 938
 939 UnicodeString
 940 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
 941     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
 942         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
 943         int32_t len = localeName.length();
 944         UChar* localeStr = localeName.getBuffer(len + 1);
 945         while (len >= 0) {
 946             localeStr[len] = 0;
 947             int32_t ix = localizations->indexForLocale(localeStr);
 948             if (ix >= 0) {
 949                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
 950                 return name;
 951             }
 952
 953             // trim trailing portion, skipping over ommitted sections
 954             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
 955             while (len > 0 && localeStr[len-1] == 0x005F) --len;
 956         }
 957         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
 958         return name;
 959     }
 960     UnicodeString bogus;
 961     bogus.setToBogus();
 962     return bogus;
 963 }
 964
 965 UnicodeString
 966 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
 967     if (localizations) {
 968         UnicodeString rsn(ruleSetName);
 969         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
 970         return getRuleSetDisplayName(ix, localeParam);
 971     }
 972     UnicodeString bogus;
 973     bogus.setToBogus();
 974     return bogus;
 975 }
 976
 977 NFRuleSet*
 978 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
 979 {
 980     if (U_SUCCESS(status) && ruleSets) {
 981         for (NFRuleSet** p = ruleSets; *p; ++p) {
 982             NFRuleSet* rs = *p;
 983             if (rs->isNamed(name)) {
 984                 return rs;
 985             }
 986         }
 987         status = U_ILLEGAL_ARGUMENT_ERROR;
 988     }
 989     return NULL;
 990 }
 991
 992 UnicodeString&
 993 RuleBasedNumberFormat::format(int32_t number,
 994                               UnicodeString& toAppendTo,
 995                               FieldPosition& /* pos */) const
 996 {
 997     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
 998     return toAppendTo;
 999 }
1000
1001
1002 UnicodeString&
1003 RuleBasedNumberFormat::format(int64_t number,
1004                               UnicodeString& toAppendTo,
1005                               FieldPosition& /* pos */) const
1006 {
1007     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1008     return toAppendTo;
1009 }
1010
1011
1012 UnicodeString&
1013 RuleBasedNumberFormat::format(double number,
1014                               UnicodeString& toAppendTo,
1015                               FieldPosition& /* pos */) const
1016 {
1017     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1018     return toAppendTo;
1019 }
1020
1021
1022 UnicodeString&
1023 RuleBasedNumberFormat::format(int32_t number,
1024                               const UnicodeString& ruleSetName,
1025                               UnicodeString& toAppendTo,
1026                               FieldPosition& /* pos */,
1027                               UErrorCode& status) const
1028 {
1029     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1030     if (U_SUCCESS(status)) {
1031         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1032             // throw new IllegalArgumentException("Can't use internal rule set");
1033             status = U_ILLEGAL_ARGUMENT_ERROR;
1034         } else {
1035             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1036             if (rs) {
1037                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1038             }
1039         }
1040     }
1041     return toAppendTo;
1042 }
1043
1044
1045 UnicodeString&
1046 RuleBasedNumberFormat::format(int64_t number,
1047                               const UnicodeString& ruleSetName,
1048                               UnicodeString& toAppendTo,
1049                               FieldPosition& /* pos */,
1050                               UErrorCode& status) const
1051 {
1052     if (U_SUCCESS(status)) {
1053         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1054             // throw new IllegalArgumentException("Can't use internal rule set");
1055             status = U_ILLEGAL_ARGUMENT_ERROR;
1056         } else {
1057             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1058             if (rs) {
1059                 rs->format(number, toAppendTo, toAppendTo.length());
1060             }
1061         }
1062     }
1063     return toAppendTo;
1064 }
1065
1066
1067 // make linker happy
1068 UnicodeString&
1069 RuleBasedNumberFormat::format(const Formattable& obj,
1070                               UnicodeString& toAppendTo,
1071                               FieldPosition& pos,
1072                               UErrorCode& status) const
1073 {
1074     return NumberFormat::format(obj, toAppendTo, pos, status);
1075 }
1076
1077 UnicodeString&
1078 RuleBasedNumberFormat::format(double number,
1079                               const UnicodeString& ruleSetName,
1080                               UnicodeString& toAppendTo,
1081                               FieldPosition& /* pos */,
1082                               UErrorCode& status) const
1083 {
1084     if (U_SUCCESS(status)) {
1085         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1086             // throw new IllegalArgumentException("Can't use internal rule set");
1087             status = U_ILLEGAL_ARGUMENT_ERROR;
1088         } else {
1089             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1090             if (rs) {
1091                 rs->format(number, toAppendTo, toAppendTo.length());
1092             }
1093         }
1094     }
1095     return toAppendTo;
1096 }
1097
1098 void
1099 RuleBasedNumberFormat::parse(const UnicodeString& text,
1100                              Formattable& result,
1101                              ParsePosition& parsePosition) const
1102 {
1103     if (!ruleSets) {
1104         parsePosition.setErrorIndex(0);
1105         return;
1106     }
1107
1108     UnicodeString workingText(text, parsePosition.getIndex());
1109     ParsePosition workingPos(0);
1110
1111     ParsePosition high_pp(0);
1112     Formattable high_result;
1113
1114     for (NFRuleSet** p = ruleSets; *p; ++p) {
1115         NFRuleSet *rp = *p;
1116         if (rp->isPublic()) {
1117             ParsePosition working_pp(0);
1118             Formattable working_result;
1119
1120             rp->parse(workingText, working_pp, kMaxDouble, working_result);
1121             if (working_pp.getIndex() > high_pp.getIndex()) {
1122                 high_pp = working_pp;
1123                 high_result = working_result;
1124
1125                 if (high_pp.getIndex() == workingText.length()) {
1126                     break;
1127                 }
1128             }
1129         }
1130     }
1131
1132     parsePosition.setIndex(parsePosition.getIndex() + high_pp.getIndex());
1133     if (high_pp.getIndex() > 0) {
1134         parsePosition.setErrorIndex(-1);
1135     }
1136     result = high_result;
1137     if (result.getType() == Formattable::kDouble) {
1138         int32_t r = (int32_t)result.getDouble();
1139         if ((double)r == result.getDouble()) {
1140             result.setLong(r);
1141         }
1142     }
1143 }
1144
1145 #if !UCONFIG_NO_COLLATION
1146
1147 void
1148 RuleBasedNumberFormat::setLenient(UBool enabled)
1149 {
1150     lenient = enabled;
1151     if (!enabled && collator) {
1152         delete collator;
1153         collator = NULL;
1154     }
1155 }
1156
1157 #endif
1158
1159 void
1160 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1161     if (U_SUCCESS(status)) {
1162         if (ruleSetName.isEmpty()) {
1163           if (localizations) {
1164               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1165               defaultRuleSet = findRuleSet(name, status);
1166           } else {
1167             initDefaultRuleSet();
1168           }
1169         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1170             status = U_ILLEGAL_ARGUMENT_ERROR;
1171         } else {
1172             NFRuleSet* result = findRuleSet(ruleSetName, status);
1173             if (result != NULL) {
1174                 defaultRuleSet = result;
1175             }
1176         }
1177     }
1178 }
1179
1180 UnicodeString
1181 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1182   UnicodeString result;
1183   if (defaultRuleSet && defaultRuleSet->isPublic()) {
1184     defaultRuleSet->getName(result);
1185   } else {
1186     result.setToBogus();
1187   }
1188   return result;
1189 }
1190
1191 void
1192 RuleBasedNumberFormat::initDefaultRuleSet()
1193 {
1194     defaultRuleSet = NULL;
1195     if (!ruleSets) {
1196       return;
1197     }
1198
1199     NFRuleSet**p = &ruleSets[0];
1200     while (*p) {
1201         ++p;
1202     }
1203
1204     defaultRuleSet = *--p;
1205     if (!defaultRuleSet->isPublic()) {
1206         while (p != ruleSets) {
1207             if ((*--p)->isPublic()) {
1208                 defaultRuleSet = *p;
1209                 break;
1210             }
1211         }
1212     }
1213 }
1214
1215
1216 void
1217 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1218                             UParseError& pErr, UErrorCode& status)
1219 {
1220     // TODO: implement UParseError
1221     uprv_memset(&pErr, 0, sizeof(UParseError));
1222     // Note: this can leave ruleSets == NULL, so remaining code should check
1223     if (U_FAILURE(status)) {
1224         return;
1225     }
1226
1227     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1228
1229     UnicodeString description(rules);
1230     if (!description.length()) {
1231         status = U_MEMORY_ALLOCATION_ERROR;
1232         return;
1233     }
1234
1235     // start by stripping the trailing whitespace from all the rules
1236     // (this is all the whitespace follwing each semicolon in the
1237     // description).  This allows us to look for rule-set boundaries
1238     // by searching for ";%" without having to worry about whitespace
1239     // between the ; and the %
1240     stripWhitespace(description);
1241
1242     // check to see if there's a set of lenient-parse rules.  If there
1243     // is, pull them out into our temporary holding place for them,
1244     // and delete them from the description before the real desciption-
1245     // parsing code sees them
1246     int32_t lp = description.indexOf(gLenientParse);
1247     if (lp != -1) {
1248         // we've got to make sure we're not in the middle of a rule
1249         // (where "%%lenient-parse" would actually get treated as
1250         // rule text)
1251         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1252             // locate the beginning and end of the actual collation
1253             // rules (there may be whitespace between the name and
1254             // the first token in the description)
1255             int lpEnd = description.indexOf(gSemiPercent, lp);
1256
1257             if (lpEnd == -1) {
1258                 lpEnd = description.length() - 1;
1259             }
1260             int lpStart = lp + u_strlen(gLenientParse);
1261             while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
1262                 ++lpStart;
1263             }
1264
1265             // copy out the lenient-parse rules and delete them
1266             // from the description
1267             lenientParseRules = new UnicodeString();
1268             /* test for NULL */
1269             if (lenientParseRules == 0) {
1270                 status = U_MEMORY_ALLOCATION_ERROR;
1271                 return;
1272             }
1273             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1274
1275             description.remove(lp, lpEnd + 1 - lp);
1276         }
1277     }
1278
1279     // pre-flight parsing the description and count the number of
1280     // rule sets (";%" marks the end of one rule set and the beginning
1281     // of the next)
1282     int numRuleSets = 0;
1283     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1284         ++numRuleSets;
1285         ++p;
1286     }
1287     ++numRuleSets;
1288
1289     // our rule list is an array of the appropriate size
1290     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1291     /* test for NULL */
1292     if (ruleSets == 0) {
1293         status = U_MEMORY_ALLOCATION_ERROR;
1294         return;
1295     }
1296
1297     for (int i = 0; i <= numRuleSets; ++i) {
1298         ruleSets[i] = NULL;
1299     }
1300
1301     // divide up the descriptions into individual rule-set descriptions
1302     // and store them in a temporary array.  At each step, we also
1303     // new up a rule set, but all this does is initialize its name
1304     // and remove it from its description.  We can't actually parse
1305     // the rest of the descriptions and finish initializing everything
1306     // because we have to know the names and locations of all the rule
1307     // sets before we can actually set everything up
1308     if(!numRuleSets) {
1309       status = U_ILLEGAL_ARGUMENT_ERROR;
1310       return;
1311     }
1312     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1313     /* test for NULL */
1314     if (ruleSetDescriptions == 0) {
1315         status = U_MEMORY_ALLOCATION_ERROR;
1316         return;
1317     }
1318
1319     {
1320         int curRuleSet = 0;
1321         int32_t start = 0;
1322         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1323             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1324             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1325             /* test for NULL */
1326             if (ruleSets[curRuleSet] == 0) {
1327                 status = U_MEMORY_ALLOCATION_ERROR;
1328                 return;
1329             }
1330             ++curRuleSet;
1331             start = p + 1;
1332         }
1333         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1334         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1335         /* test for NULL */
1336         if (ruleSets[curRuleSet] == 0) {
1337             status = U_MEMORY_ALLOCATION_ERROR;
1338             return;
1339         }
1340     }
1341
1342     // now we can take note of the formatter's default rule set, which
1343     // is the last public rule set in the description (it's the last
1344     // rather than the first so that a user can create a new formatter
1345     // from an existing formatter and change its default behavior just
1346     // by appending more rule sets to the end)
1347
1348     // {dlf} Initialization of a fraction rule set requires the default rule
1349     // set to be known.  For purposes of initialization, this is always the
1350     // last public rule set, no matter what the localization data says.
1351     initDefaultRuleSet();
1352
1353     // finally, we can go back through the temporary descriptions
1354     // list and finish seting up the substructure (and we throw
1355     // away the temporary descriptions as we go)
1356     {
1357         for (int i = 0; i < numRuleSets; i++) {
1358             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1359         }
1360     }
1361
1362     delete[] ruleSetDescriptions;
1363
1364     // Now that the rules are initialized, the 'real' default rule
1365     // set can be adjusted by the localization data.
1366
1367     // The C code keeps the localization array as is, rather than building
1368     // a separate array of the public rule set names, so we have less work
1369     // to do here-- but we still need to check the names.
1370
1371     if (localizationInfos) {
1372         // confirm the names, if any aren't in the rules, that's an error
1373         // it is ok if the rules contain public rule sets that are not in this list
1374         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1375             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1376             NFRuleSet* rs = findRuleSet(name, status);
1377             if (rs == NULL) {
1378                 break; // error
1379             }
1380             if (i == 0) {
1381                 defaultRuleSet = rs;
1382             }
1383         }
1384     } else {
1385         defaultRuleSet = getDefaultRuleSet();
1386     }
1387 }
1388
1389 void
1390 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1391 {
1392     // iterate through the characters...
1393     UnicodeString result;
1394
1395     int start = 0;
1396     while (start != -1 && start < description.length()) {
1397         // seek to the first non-whitespace character...
1398         while (start < description.length()
1399             && uprv_isRuleWhiteSpace(description.charAt(start))) {
1400             ++start;
1401         }
1402
1403         // locate the next semicolon in the text and copy the text from
1404         // our current position up to that semicolon into the result
1405         int32_t p = description.indexOf(gSemiColon, start);
1406         if (p == -1) {
1407             // or if we don't find a semicolon, just copy the rest of
1408             // the string into the result
1409             result.append(description, start, description.length() - start);
1410             start = -1;
1411         }
1412         else if (p < description.length()) {
1413             result.append(description, start, p + 1 - start);
1414             start = p + 1;
1415         }
1416
1417         // when we get here, we've seeked off the end of the sring, and
1418         // we terminate the loop (we continue until *start* is -1 rather
1419         // than until *p* is -1, because otherwise we'd miss the last
1420         // rule in the description)
1421         else {
1422             start = -1;
1423         }
1424     }
1425
1426     description.setTo(result);
1427 }
1428
1429
1430 void
1431 RuleBasedNumberFormat::dispose()
1432 {
1433     if (ruleSets) {
1434         for (NFRuleSet** p = ruleSets; *p; ++p) {
1435             delete *p;
1436         }
1437         uprv_free(ruleSets);
1438         ruleSets = NULL;
1439     }
1440
1441 #if !UCONFIG_NO_COLLATION
1442     delete collator;
1443 #endif
1444     collator = NULL;
1445
1446     delete decimalFormatSymbols;
1447     decimalFormatSymbols = NULL;
1448
1449     delete lenientParseRules;
1450     lenientParseRules = NULL;
1451
1452     if (localizations) localizations = localizations->unref();
1453 }
1454
1455
1456 //-----------------------------------------------------------------------
1457 // package-internal API
1458 //-----------------------------------------------------------------------
1459
1460 /**
1461  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1462  * this function creates it the first time it's called.
1463  * @return The collator to use for lenient parsing, or null if lenient parsing
1464  * is turned off.
1465 */
1466 Collator*
1467 RuleBasedNumberFormat::getCollator() const
1468 {
1469 #if !UCONFIG_NO_COLLATION
1470     if (!ruleSets) {
1471         return NULL;
1472     }
1473
1474     // lazy-evaulate the collator
1475     if (collator == NULL && lenient) {
1476         // create a default collator based on the formatter's locale,
1477         // then pull out that collator's rules, append any additional
1478         // rules specified in the description, and create a _new_
1479         // collator based on the combinaiton of those rules
1480
1481         UErrorCode status = U_ZERO_ERROR;
1482
1483         Collator* temp = Collator::createInstance(locale, status);
1484         if (U_SUCCESS(status) &&
1485             temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
1486
1487             RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
1488             if (lenientParseRules) {
1489                 UnicodeString rules(newCollator->getRules());
1490                 rules.append(*lenientParseRules);
1491
1492                 newCollator = new RuleBasedCollator(rules, status);
1493             } else {
1494                 temp = NULL;
1495             }
1496             if (U_SUCCESS(status)) {
1497                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1498                 // cast away const
1499                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1500             } else {
1501                 delete newCollator;
1502             }
1503         }
1504         delete temp;
1505     }
1506 #endif
1507
1508     // if lenient-parse mode is off, this will be null
1509     // (see setLenientParseMode())
1510     return collator;
1511 }
1512
1513
1514 /**
1515  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1516  * instances owned by this formatter.  This object is lazily created: this function
1517  * creates it the first time it's called.
1518  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1519  * instances owned by this formatter.
1520 */
1521 DecimalFormatSymbols*
1522 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1523 {
1524     // lazy-evaluate the DecimalFormatSymbols object.  This object
1525     // is shared by all DecimalFormat instances belonging to this
1526     // formatter
1527     if (decimalFormatSymbols == NULL) {
1528         UErrorCode status = U_ZERO_ERROR;
1529         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1530         if (U_SUCCESS(status)) {
1531             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1532         } else {
1533             delete temp;
1534         }
1535     }
1536     return decimalFormatSymbols;
1537 }
1538
1539 U_NAMESPACE_END
1540
1541 /* U_HAVE_RBNF */
1542 #endif