icuSources/i18n/rbnf.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 1997-2009, International Business Machines Corporation
   4 * and others. All Rights Reserved.
   5 *******************************************************************************
   6 */
   7
   8 #include "unicode/rbnf.h"
   9
  10 #if U_HAVE_RBNF
  11
  12 #include "unicode/normlzr.h"
  13 #include "unicode/tblcoll.h"
  14 #include "unicode/uchar.h"
  15 #include "unicode/ucol.h"
  16 #include "unicode/uloc.h"
  17 #include "unicode/unum.h"
  18 #include "unicode/ures.h"
  19 #include "unicode/ustring.h"
  20 #include "unicode/utf16.h"
  21 #include "unicode/udata.h"
  22 #include "nfrs.h"
  23
  24 #include "cmemory.h"
  25 #include "cstring.h"
  26 #include "util.h"
  27
  28 // debugging
  29 // #define DEBUG
  30
  31 #ifdef DEBUG
  32 #include "stdio.h"
  33 #endif
  34
  35 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
  36
  37 static const UChar gPercentPercent[] =
  38 {
  39     0x25, 0x25, 0
  40 }; /* "%%" */
  41
  42 // All urbnf objects are created through openRules, so we init all of the
  43 // Unicode string constants required by rbnf, nfrs, or nfr here.
  44 static const UChar gLenientParse[] =
  45 {
  46     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
  47 }; /* "%%lenient-parse:" */
  48 static const UChar gSemiColon = 0x003B;
  49 static const UChar gSemiPercent[] =
  50 {
  51     0x3B, 0x25, 0
  52 }; /* ";%" */
  53
  54 #define kSomeNumberOfBitsDiv2 22
  55 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
  56 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
  57
  58 U_NAMESPACE_BEGIN
  59
  60 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
  61
  62 /*
  63 This is a utility class. It does not use ICU's RTTI.
  64 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
  65 Please make sure that intltest passes on Windows in Release mode,
  66 since the string pooling per compilation unit will mess up how RTTI works.
  67 The RTTI code was also removed due to lack of code coverage.
  68 */
  69 class LocalizationInfo : public UMemory {
  70 protected:
  71     virtual ~LocalizationInfo() {};
  72     uint32_t refcount;
  73
  74 public:
  75     LocalizationInfo() : refcount(0) {}
  76
  77     LocalizationInfo* ref(void) {
  78         ++refcount;
  79         return this;
  80     }
  81
  82     LocalizationInfo* unref(void) {
  83         if (refcount && --refcount == 0) {
  84             delete this;
  85         }
  86         return NULL;
  87     }
  88
  89     virtual UBool operator==(const LocalizationInfo* rhs) const;
  90     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
  91
  92     virtual int32_t getNumberOfRuleSets(void) const = 0;
  93     virtual const UChar* getRuleSetName(int32_t index) const = 0;
  94     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
  95     virtual const UChar* getLocaleName(int32_t index) const = 0;
  96     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
  97
  98     virtual int32_t indexForLocale(const UChar* locale) const;
  99     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
 100
 101 //    virtual UClassID getDynamicClassID() const = 0;
 102 //    static UClassID getStaticClassID(void);
 103 };
 104
 105 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
 106
 107 // if both strings are NULL, this returns TRUE
 108 static UBool
 109 streq(const UChar* lhs, const UChar* rhs) {
 110     if (rhs == lhs) {
 111         return TRUE;
 112     }
 113     if (lhs && rhs) {
 114         return u_strcmp(lhs, rhs) == 0;
 115     }
 116     return FALSE;
 117 }
 118
 119 UBool
 120 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
 121     if (rhs) {
 122         if (this == rhs) {
 123             return TRUE;
 124         }
 125
 126         int32_t rsc = getNumberOfRuleSets();
 127         if (rsc == rhs->getNumberOfRuleSets()) {
 128             for (int i = 0; i < rsc; ++i) {
 129                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
 130                     return FALSE;
 131                 }
 132             }
 133             int32_t dlc = getNumberOfDisplayLocales();
 134             if (dlc == rhs->getNumberOfDisplayLocales()) {
 135                 for (int i = 0; i < dlc; ++i) {
 136                     const UChar* locale = getLocaleName(i);
 137                     int32_t ix = rhs->indexForLocale(locale);
 138                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
 139                     if (!streq(locale, rhs->getLocaleName(ix))) {
 140                         return FALSE;
 141                     }
 142                     for (int j = 0; j < rsc; ++j) {
 143                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
 144                             return FALSE;
 145                         }
 146                     }
 147                 }
 148                 return TRUE;
 149             }
 150         }
 151     }
 152     return FALSE;
 153 }
 154
 155 int32_t
 156 LocalizationInfo::indexForLocale(const UChar* locale) const {
 157     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
 158         if (streq(locale, getLocaleName(i))) {
 159             return i;
 160         }
 161     }
 162     return -1;
 163 }
 164
 165 int32_t
 166 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
 167     if (ruleset) {
 168         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
 169             if (streq(ruleset, getRuleSetName(i))) {
 170                 return i;
 171             }
 172         }
 173     }
 174     return -1;
 175 }
 176
 177
 178 typedef void (*Fn_Deleter)(void*);
 179
 180 class VArray {
 181     void** buf;
 182     int32_t cap;
 183     int32_t size;
 184     Fn_Deleter deleter;
 185 public:
 186     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
 187
 188     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
 189
 190     ~VArray() {
 191         if (deleter) {
 192             for (int i = 0; i < size; ++i) {
 193                 (*deleter)(buf[i]);
 194             }
 195         }
 196         uprv_free(buf);
 197     }
 198
 199     int32_t length() {
 200         return size;
 201     }
 202
 203     void add(void* elem, UErrorCode& status) {
 204         if (U_SUCCESS(status)) {
 205             if (size == cap) {
 206                 if (cap == 0) {
 207                     cap = 1;
 208                 } else if (cap < 256) {
 209                     cap *= 2;
 210                 } else {
 211                     cap += 256;
 212                 }
 213                 if (buf == NULL) {
 214                     buf = (void**)uprv_malloc(cap * sizeof(void*));
 215                 } else {
 216                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
 217                 }
 218                 if (buf == NULL) {
 219                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
 220                     status = U_MEMORY_ALLOCATION_ERROR;
 221                     return;
 222                 }
 223                 void* start = &buf[size];
 224                 size_t count = (cap - size) * sizeof(void*);
 225                 uprv_memset(start, 0, count); // fill with nulls, just because
 226             }
 227             buf[size++] = elem;
 228         }
 229     }
 230
 231     void** release(void) {
 232         void** result = buf;
 233         buf = NULL;
 234         cap = 0;
 235         size = 0;
 236         return result;
 237     }
 238 };
 239
 240 class LocDataParser;
 241
 242 class StringLocalizationInfo : public LocalizationInfo {
 243     UChar* info;
 244     UChar*** data;
 245     int32_t numRuleSets;
 246     int32_t numLocales;
 247
 248 friend class LocDataParser;
 249
 250     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
 251         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
 252     {
 253     }
 254
 255 public:
 256     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
 257
 258     virtual ~StringLocalizationInfo();
 259     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
 260     virtual const UChar* getRuleSetName(int32_t index) const;
 261     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
 262     virtual const UChar* getLocaleName(int32_t index) const;
 263     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
 264
 265 //    virtual UClassID getDynamicClassID() const;
 266 //    static UClassID getStaticClassID(void);
 267
 268 private:
 269     void init(UErrorCode& status) const;
 270 };
 271
 272
 273 enum {
 274     OPEN_ANGLE = 0x003c, /* '<' */
 275     CLOSE_ANGLE = 0x003e, /* '>' */
 276     COMMA = 0x002c,
 277     TICK = 0x0027,
 278     QUOTE = 0x0022,
 279     SPACE = 0x0020
 280 };
 281
 282 /**
 283  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
 284  */
 285 class LocDataParser {
 286     UChar* data;
 287     const UChar* e;
 288     UChar* p;
 289     UChar ch;
 290     UParseError& pe;
 291     UErrorCode& ec;
 292
 293 public:
 294     LocDataParser(UParseError& parseError, UErrorCode& status)
 295         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
 296     ~LocDataParser() {}
 297
 298     /*
 299     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
 300     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
 301     */
 302     StringLocalizationInfo* parse(UChar* data, int32_t len);
 303
 304 private:
 305
 306     void inc(void) { ++p; ch = 0xffff; }
 307     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
 308     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
 309     void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
 310     UBool inList(UChar c, const UChar* list) const {
 311         if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
 312         while (*list && *list != c) ++list; return *list == c;
 313     }
 314     void parseError(const char* msg);
 315
 316     StringLocalizationInfo* doParse(void);
 317
 318     UChar** nextArray(int32_t& requiredLength);
 319     UChar*  nextString(void);
 320 };
 321
 322 #ifdef DEBUG
 323 #define ERROR(msg) parseError(msg); return NULL;
 324 #else
 325 #define ERROR(msg) parseError(NULL); return NULL;
 326 #endif
 327
 328
 329 static const UChar DQUOTE_STOPLIST[] = {
 330     QUOTE, 0
 331 };
 332
 333 static const UChar SQUOTE_STOPLIST[] = {
 334     TICK, 0
 335 };
 336
 337 static const UChar NOQUOTE_STOPLIST[] = {
 338     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
 339 };
 340
 341 static void
 342 DeleteFn(void* p) {
 343   uprv_free(p);
 344 }
 345
 346 StringLocalizationInfo*
 347 LocDataParser::parse(UChar* _data, int32_t len) {
 348     if (U_FAILURE(ec)) {
 349         if (_data) uprv_free(_data);
 350         return NULL;
 351     }
 352
 353     pe.line = 0;
 354     pe.offset = -1;
 355     pe.postContext[0] = 0;
 356     pe.preContext[0] = 0;
 357
 358     if (_data == NULL) {
 359         ec = U_ILLEGAL_ARGUMENT_ERROR;
 360         return NULL;
 361     }
 362
 363     if (len <= 0) {
 364         ec = U_ILLEGAL_ARGUMENT_ERROR;
 365         uprv_free(_data);
 366         return NULL;
 367     }
 368
 369     data = _data;
 370     e = data + len;
 371     p = _data;
 372     ch = 0xffff;
 373
 374     return doParse();
 375 }
 376
 377
 378 StringLocalizationInfo*
 379 LocDataParser::doParse(void) {
 380     skipWhitespace();
 381     if (!checkInc(OPEN_ANGLE)) {
 382         ERROR("Missing open angle");
 383     } else {
 384         VArray array(DeleteFn);
 385         UBool mightHaveNext = TRUE;
 386         int32_t requiredLength = -1;
 387         while (mightHaveNext) {
 388             mightHaveNext = FALSE;
 389             UChar** elem = nextArray(requiredLength);
 390             skipWhitespace();
 391             UBool haveComma = check(COMMA);
 392             if (elem) {
 393                 array.add(elem, ec);
 394                 if (haveComma) {
 395                     inc();
 396                     mightHaveNext = TRUE;
 397                 }
 398             } else if (haveComma) {
 399                 ERROR("Unexpected character");
 400             }
 401         }
 402
 403         skipWhitespace();
 404         if (!checkInc(CLOSE_ANGLE)) {
 405             if (check(OPEN_ANGLE)) {
 406                 ERROR("Missing comma in outer array");
 407             } else {
 408                 ERROR("Missing close angle bracket in outer array");
 409             }
 410         }
 411
 412         skipWhitespace();
 413         if (p != e) {
 414             ERROR("Extra text after close of localization data");
 415         }
 416
 417         array.add(NULL, ec);
 418         if (U_SUCCESS(ec)) {
 419             int32_t numLocs = array.length() - 2; // subtract first, NULL
 420             UChar*** result = (UChar***)array.release();
 421
 422             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
 423         }
 424     }
 425
 426     ERROR("Unknown error");
 427 }
 428
 429 UChar**
 430 LocDataParser::nextArray(int32_t& requiredLength) {
 431     if (U_FAILURE(ec)) {
 432         return NULL;
 433     }
 434
 435     skipWhitespace();
 436     if (!checkInc(OPEN_ANGLE)) {
 437         ERROR("Missing open angle");
 438     }
 439
 440     VArray array;
 441     UBool mightHaveNext = TRUE;
 442     while (mightHaveNext) {
 443         mightHaveNext = FALSE;
 444         UChar* elem = nextString();
 445         skipWhitespace();
 446         UBool haveComma = check(COMMA);
 447         if (elem) {
 448             array.add(elem, ec);
 449             if (haveComma) {
 450                 inc();
 451                 mightHaveNext = TRUE;
 452             }
 453         } else if (haveComma) {
 454             ERROR("Unexpected comma");
 455         }
 456     }
 457     skipWhitespace();
 458     if (!checkInc(CLOSE_ANGLE)) {
 459         if (check(OPEN_ANGLE)) {
 460             ERROR("Missing close angle bracket in inner array");
 461         } else {
 462             ERROR("Missing comma in inner array");
 463         }
 464     }
 465
 466     array.add(NULL, ec);
 467     if (U_SUCCESS(ec)) {
 468         if (requiredLength == -1) {
 469             requiredLength = array.length() + 1;
 470         } else if (array.length() != requiredLength) {
 471             ec = U_ILLEGAL_ARGUMENT_ERROR;
 472             ERROR("Array not of required length");
 473         }
 474
 475         return (UChar**)array.release();
 476     }
 477     ERROR("Unknown Error");
 478 }
 479
 480 UChar*
 481 LocDataParser::nextString() {
 482     UChar* result = NULL;
 483
 484     skipWhitespace();
 485     if (p < e) {
 486         const UChar* terminators;
 487         UChar c = *p;
 488         UBool haveQuote = c == QUOTE || c == TICK;
 489         if (haveQuote) {
 490             inc();
 491             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
 492         } else {
 493             terminators = NOQUOTE_STOPLIST;
 494         }
 495         UChar* start = p;
 496         while (p < e && !inList(*p, terminators)) ++p;
 497         if (p == e) {
 498             ERROR("Unexpected end of data");
 499         }
 500
 501         UChar x = *p;
 502         if (p > start) {
 503             ch = x;
 504             *p = 0x0; // terminate by writing to data
 505             result = start; // just point into data
 506         }
 507         if (haveQuote) {
 508             if (x != c) {
 509                 ERROR("Missing matching quote");
 510             } else if (p == start) {
 511                 ERROR("Empty string");
 512             }
 513             inc();
 514         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
 515             ERROR("Unexpected character in string");
 516         }
 517     }
 518
 519     // ok for there to be no next string
 520     return result;
 521 }
 522
 523 void
 524 LocDataParser::parseError(const char* /*str*/) {
 525     if (!data) {
 526         return;
 527     }
 528
 529     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
 530     if (start < data) {
 531         start = data;
 532     }
 533     for (UChar* x = p; --x >= start;) {
 534         if (!*x) {
 535             start = x+1;
 536             break;
 537         }
 538     }
 539     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
 540     if (limit > e) {
 541         limit = e;
 542     }
 543     u_strncpy(pe.preContext, start, (int32_t)(p-start));
 544     pe.preContext[p-start] = 0;
 545     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
 546     pe.postContext[limit-p] = 0;
 547     pe.offset = (int32_t)(p - data);
 548
 549 #ifdef DEBUG
 550     fprintf(stderr, "%s at or near character %d: ", str, p-data);
 551
 552     UnicodeString msg;
 553     msg.append(start, p - start);
 554     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
 555     msg.append(p, limit-p);
 556     msg.append("'");
 557
 558     char buf[128];
 559     int32_t len = msg.extract(0, msg.length(), buf, 128);
 560     if (len >= 128) {
 561         buf[127] = 0;
 562     } else {
 563         buf[len] = 0;
 564     }
 565     fprintf(stderr, "%s\n", buf);
 566     fflush(stderr);
 567 #endif
 568
 569     uprv_free(data);
 570     data = NULL;
 571     p = NULL;
 572     e = NULL;
 573
 574     if (U_SUCCESS(ec)) {
 575         ec = U_PARSE_ERROR;
 576     }
 577 }
 578
 579 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
 580
 581 StringLocalizationInfo*
 582 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
 583     if (U_FAILURE(status)) {
 584         return NULL;
 585     }
 586
 587     int32_t len = info.length();
 588     if (len == 0) {
 589         return NULL; // no error;
 590     }
 591
 592     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
 593     if (!p) {
 594         status = U_MEMORY_ALLOCATION_ERROR;
 595         return NULL;
 596     }
 597     info.extract(p, len, status);
 598     if (!U_FAILURE(status)) {
 599         status = U_ZERO_ERROR; // clear warning about non-termination
 600     }
 601
 602     LocDataParser parser(perror, status);
 603     return parser.parse(p, len);
 604 }
 605
 606 StringLocalizationInfo::~StringLocalizationInfo() {
 607     for (UChar*** p = (UChar***)data; *p; ++p) {
 608         // remaining data is simply pointer into our unicode string data.
 609         if (*p) uprv_free(*p);
 610     }
 611     if (data) uprv_free(data);
 612     if (info) uprv_free(info);
 613 }
 614
 615
 616 const UChar*
 617 StringLocalizationInfo::getRuleSetName(int32_t index) const {
 618     if (index >= 0 && index < getNumberOfRuleSets()) {
 619         return data[0][index];
 620     }
 621     return NULL;
 622 }
 623
 624 const UChar*
 625 StringLocalizationInfo::getLocaleName(int32_t index) const {
 626     if (index >= 0 && index < getNumberOfDisplayLocales()) {
 627         return data[index+1][0];
 628     }
 629     return NULL;
 630 }
 631
 632 const UChar*
 633 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
 634     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
 635         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
 636         return data[localeIndex+1][ruleIndex+1];
 637     }
 638     return NULL;
 639 }
 640
 641 // ----------
 642
 643 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 644                                              const UnicodeString& locs,
 645                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 646   : ruleSets(NULL)
 647   , defaultRuleSet(NULL)
 648   , locale(alocale)
 649   , collator(NULL)
 650   , decimalFormatSymbols(NULL)
 651   , lenient(FALSE)
 652   , lenientParseRules(NULL)
 653   , localizations(NULL)
 654 {
 655   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 656   init(description, locinfo, perror, status);
 657 }
 658
 659 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 660                                              const UnicodeString& locs,
 661                                              UParseError& perror, UErrorCode& status)
 662   : ruleSets(NULL)
 663   , defaultRuleSet(NULL)
 664   , locale(Locale::getDefault())
 665   , collator(NULL)
 666   , decimalFormatSymbols(NULL)
 667   , lenient(FALSE)
 668   , lenientParseRules(NULL)
 669   , localizations(NULL)
 670 {
 671   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 672   init(description, locinfo, perror, status);
 673 }
 674
 675 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 676                                              LocalizationInfo* info,
 677                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 678   : ruleSets(NULL)
 679   , defaultRuleSet(NULL)
 680   , locale(alocale)
 681   , collator(NULL)
 682   , decimalFormatSymbols(NULL)
 683   , lenient(FALSE)
 684   , lenientParseRules(NULL)
 685   , localizations(NULL)
 686 {
 687   init(description, info, perror, status);
 688 }
 689
 690 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 691                          UParseError& perror,
 692                          UErrorCode& status)
 693   : ruleSets(NULL)
 694   , defaultRuleSet(NULL)
 695   , locale(Locale::getDefault())
 696   , collator(NULL)
 697   , decimalFormatSymbols(NULL)
 698   , lenient(FALSE)
 699   , lenientParseRules(NULL)
 700   , localizations(NULL)
 701 {
 702     init(description, NULL, perror, status);
 703 }
 704
 705 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 706                          const Locale& aLocale,
 707                          UParseError& perror,
 708                          UErrorCode& status)
 709   : ruleSets(NULL)
 710   , defaultRuleSet(NULL)
 711   , locale(aLocale)
 712   , collator(NULL)
 713   , decimalFormatSymbols(NULL)
 714   , lenient(FALSE)
 715   , lenientParseRules(NULL)
 716   , localizations(NULL)
 717 {
 718     init(description, NULL, perror, status);
 719 }
 720
 721 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
 722   : ruleSets(NULL)
 723   , defaultRuleSet(NULL)
 724   , locale(alocale)
 725   , collator(NULL)
 726   , decimalFormatSymbols(NULL)
 727   , lenient(FALSE)
 728   , lenientParseRules(NULL)
 729   , localizations(NULL)
 730 {
 731     if (U_FAILURE(status)) {
 732         return;
 733     }
 734
 735     const char* fmt_tag = "";
 736     switch (tag) {
 737     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
 738     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
 739     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
 740     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
 741     }
 742
 743     // TODO: read localization info from resource
 744     LocalizationInfo* locinfo = NULL;
 745
 746     int32_t len = 0;
 747     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
 748     if (U_SUCCESS(status)) {
 749         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
 750                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
 751         const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status);
 752         UnicodeString desc(description, len);
 753         UParseError perror;
 754         init (desc, locinfo, perror, status);
 755     }
 756     ures_close(nfrb);
 757 }
 758
 759 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
 760   : NumberFormat(rhs)
 761   , ruleSets(NULL)
 762   , defaultRuleSet(NULL)
 763   , locale(rhs.locale)
 764   , collator(NULL)
 765   , decimalFormatSymbols(NULL)
 766   , lenient(FALSE)
 767   , lenientParseRules(NULL)
 768   , localizations(NULL)
 769 {
 770     this->operator=(rhs);
 771 }
 772
 773 // --------
 774
 775 RuleBasedNumberFormat&
 776 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
 777 {
 778     UErrorCode status = U_ZERO_ERROR;
 779     dispose();
 780     locale = rhs.locale;
 781     lenient = rhs.lenient;
 782
 783     UnicodeString rules = rhs.getRules();
 784     UParseError perror;
 785     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
 786
 787     return *this;
 788 }
 789
 790 RuleBasedNumberFormat::~RuleBasedNumberFormat()
 791 {
 792     dispose();
 793 }
 794
 795 Format*
 796 RuleBasedNumberFormat::clone(void) const
 797 {
 798     RuleBasedNumberFormat * result = NULL;
 799     UnicodeString rules = getRules();
 800     UErrorCode status = U_ZERO_ERROR;
 801     UParseError perror;
 802     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
 803     /* test for NULL */
 804     if (result == 0) {
 805         status = U_MEMORY_ALLOCATION_ERROR;
 806         return 0;
 807     }
 808     if (U_FAILURE(status)) {
 809         delete result;
 810         result = 0;
 811     } else {
 812         result->lenient = lenient;
 813     }
 814     return result;
 815 }
 816
 817 UBool
 818 RuleBasedNumberFormat::operator==(const Format& other) const
 819 {
 820     if (this == &other) {
 821         return TRUE;
 822     }
 823
 824     if (other.getDynamicClassID() == getStaticClassID()) {
 825         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
 826         if (locale == rhs.locale &&
 827             lenient == rhs.lenient &&
 828             (localizations == NULL
 829                 ? rhs.localizations == NULL
 830                 : (rhs.localizations == NULL
 831                     ? FALSE
 832                     : *localizations == rhs.localizations))) {
 833
 834             NFRuleSet** p = ruleSets;
 835             NFRuleSet** q = rhs.ruleSets;
 836             if (p == NULL) {
 837                 return q == NULL;
 838             } else if (q == NULL) {
 839                 return FALSE;
 840             }
 841             while (*p && *q && (**p == **q)) {
 842                 ++p;
 843                 ++q;
 844             }
 845             return *q == NULL && *p == NULL;
 846         }
 847     }
 848
 849     return FALSE;
 850 }
 851
 852 UnicodeString
 853 RuleBasedNumberFormat::getRules() const
 854 {
 855     UnicodeString result;
 856     if (ruleSets != NULL) {
 857         for (NFRuleSet** p = ruleSets; *p; ++p) {
 858             (*p)->appendRules(result);
 859         }
 860     }
 861     return result;
 862 }
 863
 864 UnicodeString
 865 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
 866 {
 867     if (localizations) {
 868       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
 869       return string;
 870     } else if (ruleSets) {
 871         UnicodeString result;
 872         for (NFRuleSet** p = ruleSets; *p; ++p) {
 873             NFRuleSet* rs = *p;
 874             if (rs->isPublic()) {
 875                 if (--index == -1) {
 876                     rs->getName(result);
 877                     return result;
 878                 }
 879             }
 880         }
 881     }
 882     UnicodeString empty;
 883     return empty;
 884 }
 885
 886 int32_t
 887 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
 888 {
 889     int32_t result = 0;
 890     if (localizations) {
 891       result = localizations->getNumberOfRuleSets();
 892     } else if (ruleSets) {
 893         for (NFRuleSet** p = ruleSets; *p; ++p) {
 894             if ((**p).isPublic()) {
 895                 ++result;
 896             }
 897         }
 898     }
 899     return result;
 900 }
 901
 902 int32_t
 903 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
 904     if (localizations) {
 905         return localizations->getNumberOfDisplayLocales();
 906     }
 907     return 0;
 908 }
 909
 910 Locale
 911 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
 912     if (U_FAILURE(status)) {
 913         return Locale("");
 914     }
 915     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
 916         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
 917         char buffer[64];
 918         int32_t cap = name.length() + 1;
 919         char* bp = buffer;
 920         if (cap > 64) {
 921             bp = (char *)uprv_malloc(cap);
 922             if (bp == NULL) {
 923                 status = U_MEMORY_ALLOCATION_ERROR;
 924                 return Locale("");
 925             }
 926         }
 927         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
 928         Locale retLocale(bp);
 929         if (bp != buffer) {
 930             uprv_free(bp);
 931         }
 932         return retLocale;
 933     }
 934     status = U_ILLEGAL_ARGUMENT_ERROR;
 935     Locale retLocale;
 936     return retLocale;
 937 }
 938
 939 UnicodeString
 940 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
 941     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
 942         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
 943         int32_t len = localeName.length();
 944         UChar* localeStr = localeName.getBuffer(len + 1);
 945         while (len >= 0) {
 946             localeStr[len] = 0;
 947             int32_t ix = localizations->indexForLocale(localeStr);
 948             if (ix >= 0) {
 949                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
 950                 return name;
 951             }
 952
 953             // trim trailing portion, skipping over ommitted sections
 954             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
 955             while (len > 0 && localeStr[len-1] == 0x005F) --len;
 956         }
 957         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
 958         return name;
 959     }
 960     UnicodeString bogus;
 961     bogus.setToBogus();
 962     return bogus;
 963 }
 964
 965 UnicodeString
 966 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
 967     if (localizations) {
 968         UnicodeString rsn(ruleSetName);
 969         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
 970         return getRuleSetDisplayName(ix, localeParam);
 971     }
 972     UnicodeString bogus;
 973     bogus.setToBogus();
 974     return bogus;
 975 }
 976
 977 NFRuleSet*
 978 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
 979 {
 980     if (U_SUCCESS(status) && ruleSets) {
 981         for (NFRuleSet** p = ruleSets; *p; ++p) {
 982             NFRuleSet* rs = *p;
 983             if (rs->isNamed(name)) {
 984                 return rs;
 985             }
 986         }
 987         status = U_ILLEGAL_ARGUMENT_ERROR;
 988     }
 989     return NULL;
 990 }
 991
 992 UnicodeString&
 993 RuleBasedNumberFormat::format(int32_t number,
 994                               UnicodeString& toAppendTo,
 995                               FieldPosition& /* pos */) const
 996 {
 997     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
 998     return toAppendTo;
 999 }
1000
1001
1002 UnicodeString&
1003 RuleBasedNumberFormat::format(int64_t number,
1004                               UnicodeString& toAppendTo,
1005                               FieldPosition& /* pos */) const
1006 {
1007     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1008     return toAppendTo;
1009 }
1010
1011
1012 UnicodeString&
1013 RuleBasedNumberFormat::format(double number,
1014                               UnicodeString& toAppendTo,
1015                               FieldPosition& /* pos */) const
1016 {
1017     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1018     return toAppendTo;
1019 }
1020
1021
1022 UnicodeString&
1023 RuleBasedNumberFormat::format(int32_t number,
1024                               const UnicodeString& ruleSetName,
1025                               UnicodeString& toAppendTo,
1026                               FieldPosition& /* pos */,
1027                               UErrorCode& status) const
1028 {
1029     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1030     if (U_SUCCESS(status)) {
1031         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1032             // throw new IllegalArgumentException("Can't use internal rule set");
1033             status = U_ILLEGAL_ARGUMENT_ERROR;
1034         } else {
1035             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1036             if (rs) {
1037                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1038             }
1039         }
1040     }
1041     return toAppendTo;
1042 }
1043
1044
1045 UnicodeString&
1046 RuleBasedNumberFormat::format(int64_t number,
1047                               const UnicodeString& ruleSetName,
1048                               UnicodeString& toAppendTo,
1049                               FieldPosition& /* pos */,
1050                               UErrorCode& status) const
1051 {
1052     if (U_SUCCESS(status)) {
1053         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1054             // throw new IllegalArgumentException("Can't use internal rule set");
1055             status = U_ILLEGAL_ARGUMENT_ERROR;
1056         } else {
1057             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1058             if (rs) {
1059                 rs->format(number, toAppendTo, toAppendTo.length());
1060             }
1061         }
1062     }
1063     return toAppendTo;
1064 }
1065
1066
1067 // make linker happy
1068 UnicodeString&
1069 RuleBasedNumberFormat::format(const Formattable& obj,
1070                               UnicodeString& toAppendTo,
1071                               FieldPosition& pos,
1072                               UErrorCode& status) const
1073 {
1074     return NumberFormat::format(obj, toAppendTo, pos, status);
1075 }
1076
1077 UnicodeString&
1078 RuleBasedNumberFormat::format(double number,
1079                               const UnicodeString& ruleSetName,
1080                               UnicodeString& toAppendTo,
1081                               FieldPosition& /* pos */,
1082                               UErrorCode& status) const
1083 {
1084     if (U_SUCCESS(status)) {
1085         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1086             // throw new IllegalArgumentException("Can't use internal rule set");
1087             status = U_ILLEGAL_ARGUMENT_ERROR;
1088         } else {
1089             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1090             if (rs) {
1091                 rs->format(number, toAppendTo, toAppendTo.length());
1092             }
1093         }
1094     }
1095     return toAppendTo;
1096 }
1097
1098 void
1099 RuleBasedNumberFormat::parse(const UnicodeString& text,
1100                              Formattable& result,
1101                              ParsePosition& parsePosition) const
1102 {
1103     if (!ruleSets) {
1104         parsePosition.setErrorIndex(0);
1105         return;
1106     }
1107
1108     UnicodeString workingText(text, parsePosition.getIndex());
1109     ParsePosition workingPos(0);
1110
1111     ParsePosition high_pp(0);
1112     Formattable high_result;
1113
1114     for (NFRuleSet** p = ruleSets; *p; ++p) {
1115         NFRuleSet *rp = *p;
1116         if (rp->isPublic()) {
1117             ParsePosition working_pp(0);
1118             Formattable working_result;
1119
1120             rp->parse(workingText, working_pp, kMaxDouble, working_result);
1121             if (working_pp.getIndex() > high_pp.getIndex()) {
1122                 high_pp = working_pp;
1123                 high_result = working_result;
1124
1125                 if (high_pp.getIndex() == workingText.length()) {
1126                     break;
1127                 }
1128             }
1129         }
1130     }
1131
1132     int32_t startIndex = parsePosition.getIndex();
1133     parsePosition.setIndex(startIndex + high_pp.getIndex());
1134     if (high_pp.getIndex() > 0) {
1135         parsePosition.setErrorIndex(-1);
1136     } else {
1137         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1138         parsePosition.setErrorIndex(startIndex + errorIndex);
1139     }
1140     result = high_result;
1141     if (result.getType() == Formattable::kDouble) {
1142         int32_t r = (int32_t)result.getDouble();
1143         if ((double)r == result.getDouble()) {
1144             result.setLong(r);
1145         }
1146     }
1147 }
1148
1149 #if !UCONFIG_NO_COLLATION
1150
1151 void
1152 RuleBasedNumberFormat::setLenient(UBool enabled)
1153 {
1154     lenient = enabled;
1155     if (!enabled && collator) {
1156         delete collator;
1157         collator = NULL;
1158     }
1159 }
1160
1161 #endif
1162
1163 void
1164 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1165     if (U_SUCCESS(status)) {
1166         if (ruleSetName.isEmpty()) {
1167           if (localizations) {
1168               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1169               defaultRuleSet = findRuleSet(name, status);
1170           } else {
1171             initDefaultRuleSet();
1172           }
1173         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1174             status = U_ILLEGAL_ARGUMENT_ERROR;
1175         } else {
1176             NFRuleSet* result = findRuleSet(ruleSetName, status);
1177             if (result != NULL) {
1178                 defaultRuleSet = result;
1179             }
1180         }
1181     }
1182 }
1183
1184 UnicodeString
1185 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1186   UnicodeString result;
1187   if (defaultRuleSet && defaultRuleSet->isPublic()) {
1188     defaultRuleSet->getName(result);
1189   } else {
1190     result.setToBogus();
1191   }
1192   return result;
1193 }
1194
1195 void
1196 RuleBasedNumberFormat::initDefaultRuleSet()
1197 {
1198     defaultRuleSet = NULL;
1199     if (!ruleSets) {
1200       return;
1201     }
1202
1203     NFRuleSet**p = &ruleSets[0];
1204     while (*p) {
1205         ++p;
1206     }
1207
1208     defaultRuleSet = *--p;
1209     if (!defaultRuleSet->isPublic()) {
1210         while (p != ruleSets) {
1211             if ((*--p)->isPublic()) {
1212                 defaultRuleSet = *p;
1213                 break;
1214             }
1215         }
1216     }
1217 }
1218
1219
1220 void
1221 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1222                             UParseError& pErr, UErrorCode& status)
1223 {
1224     // TODO: implement UParseError
1225     uprv_memset(&pErr, 0, sizeof(UParseError));
1226     // Note: this can leave ruleSets == NULL, so remaining code should check
1227     if (U_FAILURE(status)) {
1228         return;
1229     }
1230
1231     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1232
1233     UnicodeString description(rules);
1234     if (!description.length()) {
1235         status = U_MEMORY_ALLOCATION_ERROR;
1236         return;
1237     }
1238
1239     // start by stripping the trailing whitespace from all the rules
1240     // (this is all the whitespace follwing each semicolon in the
1241     // description).  This allows us to look for rule-set boundaries
1242     // by searching for ";%" without having to worry about whitespace
1243     // between the ; and the %
1244     stripWhitespace(description);
1245
1246     // check to see if there's a set of lenient-parse rules.  If there
1247     // is, pull them out into our temporary holding place for them,
1248     // and delete them from the description before the real desciption-
1249     // parsing code sees them
1250     int32_t lp = description.indexOf(gLenientParse);
1251     if (lp != -1) {
1252         // we've got to make sure we're not in the middle of a rule
1253         // (where "%%lenient-parse" would actually get treated as
1254         // rule text)
1255         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1256             // locate the beginning and end of the actual collation
1257             // rules (there may be whitespace between the name and
1258             // the first token in the description)
1259             int lpEnd = description.indexOf(gSemiPercent, lp);
1260
1261             if (lpEnd == -1) {
1262                 lpEnd = description.length() - 1;
1263             }
1264             int lpStart = lp + u_strlen(gLenientParse);
1265             while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
1266                 ++lpStart;
1267             }
1268
1269             // copy out the lenient-parse rules and delete them
1270             // from the description
1271             lenientParseRules = new UnicodeString();
1272             /* test for NULL */
1273             if (lenientParseRules == 0) {
1274                 status = U_MEMORY_ALLOCATION_ERROR;
1275                 return;
1276             }
1277             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1278
1279             description.remove(lp, lpEnd + 1 - lp);
1280         }
1281     }
1282
1283     // pre-flight parsing the description and count the number of
1284     // rule sets (";%" marks the end of one rule set and the beginning
1285     // of the next)
1286     int numRuleSets = 0;
1287     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1288         ++numRuleSets;
1289         ++p;
1290     }
1291     ++numRuleSets;
1292
1293     // our rule list is an array of the appropriate size
1294     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1295     /* test for NULL */
1296     if (ruleSets == 0) {
1297         status = U_MEMORY_ALLOCATION_ERROR;
1298         return;
1299     }
1300
1301     for (int i = 0; i <= numRuleSets; ++i) {
1302         ruleSets[i] = NULL;
1303     }
1304
1305     // divide up the descriptions into individual rule-set descriptions
1306     // and store them in a temporary array.  At each step, we also
1307     // new up a rule set, but all this does is initialize its name
1308     // and remove it from its description.  We can't actually parse
1309     // the rest of the descriptions and finish initializing everything
1310     // because we have to know the names and locations of all the rule
1311     // sets before we can actually set everything up
1312     if(!numRuleSets) {
1313         status = U_ILLEGAL_ARGUMENT_ERROR;
1314         return;
1315     }
1316     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1317     if (ruleSetDescriptions == 0) {
1318         status = U_MEMORY_ALLOCATION_ERROR;
1319         return;
1320     }
1321
1322     {
1323         int curRuleSet = 0;
1324         int32_t start = 0;
1325         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1326             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1327             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1328             if (ruleSets[curRuleSet] == 0) {
1329                 status = U_MEMORY_ALLOCATION_ERROR;
1330                 goto cleanup;
1331             }
1332             ++curRuleSet;
1333             start = p + 1;
1334         }
1335         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1336         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1337         if (ruleSets[curRuleSet] == 0) {
1338             status = U_MEMORY_ALLOCATION_ERROR;
1339             goto cleanup;
1340         }
1341     }
1342
1343     // now we can take note of the formatter's default rule set, which
1344     // is the last public rule set in the description (it's the last
1345     // rather than the first so that a user can create a new formatter
1346     // from an existing formatter and change its default behavior just
1347     // by appending more rule sets to the end)
1348
1349     // {dlf} Initialization of a fraction rule set requires the default rule
1350     // set to be known.  For purposes of initialization, this is always the
1351     // last public rule set, no matter what the localization data says.
1352     initDefaultRuleSet();
1353
1354     // finally, we can go back through the temporary descriptions
1355     // list and finish seting up the substructure (and we throw
1356     // away the temporary descriptions as we go)
1357     {
1358         for (int i = 0; i < numRuleSets; i++) {
1359             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1360         }
1361     }
1362
1363     // Now that the rules are initialized, the 'real' default rule
1364     // set can be adjusted by the localization data.
1365
1366     // The C code keeps the localization array as is, rather than building
1367     // a separate array of the public rule set names, so we have less work
1368     // to do here-- but we still need to check the names.
1369
1370     if (localizationInfos) {
1371         // confirm the names, if any aren't in the rules, that's an error
1372         // it is ok if the rules contain public rule sets that are not in this list
1373         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1374             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1375             NFRuleSet* rs = findRuleSet(name, status);
1376             if (rs == NULL) {
1377                 break; // error
1378             }
1379             if (i == 0) {
1380                 defaultRuleSet = rs;
1381             }
1382         }
1383     } else {
1384         defaultRuleSet = getDefaultRuleSet();
1385     }
1386
1387 cleanup:
1388     delete[] ruleSetDescriptions;
1389 }
1390
1391 void
1392 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1393 {
1394     // iterate through the characters...
1395     UnicodeString result;
1396
1397     int start = 0;
1398     while (start != -1 && start < description.length()) {
1399         // seek to the first non-whitespace character...
1400         while (start < description.length()
1401             && uprv_isRuleWhiteSpace(description.charAt(start))) {
1402             ++start;
1403         }
1404
1405         // locate the next semicolon in the text and copy the text from
1406         // our current position up to that semicolon into the result
1407         int32_t p = description.indexOf(gSemiColon, start);
1408         if (p == -1) {
1409             // or if we don't find a semicolon, just copy the rest of
1410             // the string into the result
1411             result.append(description, start, description.length() - start);
1412             start = -1;
1413         }
1414         else if (p < description.length()) {
1415             result.append(description, start, p + 1 - start);
1416             start = p + 1;
1417         }
1418
1419         // when we get here, we've seeked off the end of the sring, and
1420         // we terminate the loop (we continue until *start* is -1 rather
1421         // than until *p* is -1, because otherwise we'd miss the last
1422         // rule in the description)
1423         else {
1424             start = -1;
1425         }
1426     }
1427
1428     description.setTo(result);
1429 }
1430
1431
1432 void
1433 RuleBasedNumberFormat::dispose()
1434 {
1435     if (ruleSets) {
1436         for (NFRuleSet** p = ruleSets; *p; ++p) {
1437             delete *p;
1438         }
1439         uprv_free(ruleSets);
1440         ruleSets = NULL;
1441     }
1442
1443 #if !UCONFIG_NO_COLLATION
1444     delete collator;
1445 #endif
1446     collator = NULL;
1447
1448     delete decimalFormatSymbols;
1449     decimalFormatSymbols = NULL;
1450
1451     delete lenientParseRules;
1452     lenientParseRules = NULL;
1453
1454     if (localizations) localizations = localizations->unref();
1455 }
1456
1457
1458 //-----------------------------------------------------------------------
1459 // package-internal API
1460 //-----------------------------------------------------------------------
1461
1462 /**
1463  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1464  * this function creates it the first time it's called.
1465  * @return The collator to use for lenient parsing, or null if lenient parsing
1466  * is turned off.
1467 */
1468 Collator*
1469 RuleBasedNumberFormat::getCollator() const
1470 {
1471 #if !UCONFIG_NO_COLLATION
1472     if (!ruleSets) {
1473         return NULL;
1474     }
1475
1476     // lazy-evaulate the collator
1477     if (collator == NULL && lenient) {
1478         // create a default collator based on the formatter's locale,
1479         // then pull out that collator's rules, append any additional
1480         // rules specified in the description, and create a _new_
1481         // collator based on the combinaiton of those rules
1482
1483         UErrorCode status = U_ZERO_ERROR;
1484
1485         Collator* temp = Collator::createInstance(locale, status);
1486         if (U_SUCCESS(status) &&
1487             temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
1488
1489             RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
1490             if (lenientParseRules) {
1491                 UnicodeString rules(newCollator->getRules());
1492                 rules.append(*lenientParseRules);
1493
1494                 newCollator = new RuleBasedCollator(rules, status);
1495                 // Exit if newCollator could not be created.
1496                 if (newCollator == NULL) {
1497                         return NULL;
1498                 }
1499             } else {
1500                 temp = NULL;
1501             }
1502             if (U_SUCCESS(status)) {
1503                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1504                 // cast away const
1505                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1506             } else {
1507                 delete newCollator;
1508             }
1509         }
1510         delete temp;
1511     }
1512 #endif
1513
1514     // if lenient-parse mode is off, this will be null
1515     // (see setLenientParseMode())
1516     return collator;
1517 }
1518
1519
1520 /**
1521  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1522  * instances owned by this formatter.  This object is lazily created: this function
1523  * creates it the first time it's called.
1524  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1525  * instances owned by this formatter.
1526 */
1527 DecimalFormatSymbols*
1528 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1529 {
1530     // lazy-evaluate the DecimalFormatSymbols object.  This object
1531     // is shared by all DecimalFormat instances belonging to this
1532     // formatter
1533     if (decimalFormatSymbols == NULL) {
1534         UErrorCode status = U_ZERO_ERROR;
1535         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1536         if (U_SUCCESS(status)) {
1537             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1538         } else {
1539             delete temp;
1540         }
1541     }
1542     return decimalFormatSymbols;
1543 }
1544
1545 U_NAMESPACE_END
1546
1547 /* U_HAVE_RBNF */
1548 #endif