icuSources/i18n/rbnf.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 1997-2010, International Business Machines Corporation
   4 * and others. All Rights Reserved.
   5 *******************************************************************************
   6 */
   7
   8 #include <typeinfo>  // for 'typeid' to work
   9
  10 #include "unicode/rbnf.h"
  11
  12 #if U_HAVE_RBNF
  13
  14 #include "unicode/normlzr.h"
  15 #include "unicode/tblcoll.h"
  16 #include "unicode/uchar.h"
  17 #include "unicode/ucol.h"
  18 #include "unicode/uloc.h"
  19 #include "unicode/unum.h"
  20 #include "unicode/ures.h"
  21 #include "unicode/ustring.h"
  22 #include "unicode/utf16.h"
  23 #include "unicode/udata.h"
  24 #include "nfrs.h"
  25
  26 #include "cmemory.h"
  27 #include "cstring.h"
  28 #include "util.h"
  29 #include "uresimp.h"
  30
  31 // debugging
  32 // #define DEBUG
  33
  34 #ifdef DEBUG
  35 #include "stdio.h"
  36 #endif
  37
  38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
  39
  40 static const UChar gPercentPercent[] =
  41 {
  42     0x25, 0x25, 0
  43 }; /* "%%" */
  44
  45 // All urbnf objects are created through openRules, so we init all of the
  46 // Unicode string constants required by rbnf, nfrs, or nfr here.
  47 static const UChar gLenientParse[] =
  48 {
  49     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
  50 }; /* "%%lenient-parse:" */
  51 static const UChar gSemiColon = 0x003B;
  52 static const UChar gSemiPercent[] =
  53 {
  54     0x3B, 0x25, 0
  55 }; /* ";%" */
  56
  57 #define kSomeNumberOfBitsDiv2 22
  58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
  59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
  60
  61 // Temporary workaround - when noParse is true, do noting in parse.
  62 // TODO: We need a real fix - see #6895/#6896
  63 static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
  64
  65 U_NAMESPACE_BEGIN
  66
  67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
  68
  69 /*
  70 This is a utility class. It does not use ICU's RTTI.
  71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
  72 Please make sure that intltest passes on Windows in Release mode,
  73 since the string pooling per compilation unit will mess up how RTTI works.
  74 The RTTI code was also removed due to lack of code coverage.
  75 */
  76 class LocalizationInfo : public UMemory {
  77 protected:
  78     virtual ~LocalizationInfo() {};
  79     uint32_t refcount;
  80
  81 public:
  82     LocalizationInfo() : refcount(0) {}
  83
  84     LocalizationInfo* ref(void) {
  85         ++refcount;
  86         return this;
  87     }
  88
  89     LocalizationInfo* unref(void) {
  90         if (refcount && --refcount == 0) {
  91             delete this;
  92         }
  93         return NULL;
  94     }
  95
  96     virtual UBool operator==(const LocalizationInfo* rhs) const;
  97     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
  98
  99     virtual int32_t getNumberOfRuleSets(void) const = 0;
 100     virtual const UChar* getRuleSetName(int32_t index) const = 0;
 101     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
 102     virtual const UChar* getLocaleName(int32_t index) const = 0;
 103     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
 104
 105     virtual int32_t indexForLocale(const UChar* locale) const;
 106     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
 107
 108 //    virtual UClassID getDynamicClassID() const = 0;
 109 //    static UClassID getStaticClassID(void);
 110 };
 111
 112 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
 113
 114 // if both strings are NULL, this returns TRUE
 115 static UBool
 116 streq(const UChar* lhs, const UChar* rhs) {
 117     if (rhs == lhs) {
 118         return TRUE;
 119     }
 120     if (lhs && rhs) {
 121         return u_strcmp(lhs, rhs) == 0;
 122     }
 123     return FALSE;
 124 }
 125
 126 UBool
 127 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
 128     if (rhs) {
 129         if (this == rhs) {
 130             return TRUE;
 131         }
 132
 133         int32_t rsc = getNumberOfRuleSets();
 134         if (rsc == rhs->getNumberOfRuleSets()) {
 135             for (int i = 0; i < rsc; ++i) {
 136                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
 137                     return FALSE;
 138                 }
 139             }
 140             int32_t dlc = getNumberOfDisplayLocales();
 141             if (dlc == rhs->getNumberOfDisplayLocales()) {
 142                 for (int i = 0; i < dlc; ++i) {
 143                     const UChar* locale = getLocaleName(i);
 144                     int32_t ix = rhs->indexForLocale(locale);
 145                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
 146                     if (!streq(locale, rhs->getLocaleName(ix))) {
 147                         return FALSE;
 148                     }
 149                     for (int j = 0; j < rsc; ++j) {
 150                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
 151                             return FALSE;
 152                         }
 153                     }
 154                 }
 155                 return TRUE;
 156             }
 157         }
 158     }
 159     return FALSE;
 160 }
 161
 162 int32_t
 163 LocalizationInfo::indexForLocale(const UChar* locale) const {
 164     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
 165         if (streq(locale, getLocaleName(i))) {
 166             return i;
 167         }
 168     }
 169     return -1;
 170 }
 171
 172 int32_t
 173 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
 174     if (ruleset) {
 175         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
 176             if (streq(ruleset, getRuleSetName(i))) {
 177                 return i;
 178             }
 179         }
 180     }
 181     return -1;
 182 }
 183
 184
 185 typedef void (*Fn_Deleter)(void*);
 186
 187 class VArray {
 188     void** buf;
 189     int32_t cap;
 190     int32_t size;
 191     Fn_Deleter deleter;
 192 public:
 193     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
 194
 195     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
 196
 197     ~VArray() {
 198         if (deleter) {
 199             for (int i = 0; i < size; ++i) {
 200                 (*deleter)(buf[i]);
 201             }
 202         }
 203         uprv_free(buf);
 204     }
 205
 206     int32_t length() {
 207         return size;
 208     }
 209
 210     void add(void* elem, UErrorCode& status) {
 211         if (U_SUCCESS(status)) {
 212             if (size == cap) {
 213                 if (cap == 0) {
 214                     cap = 1;
 215                 } else if (cap < 256) {
 216                     cap *= 2;
 217                 } else {
 218                     cap += 256;
 219                 }
 220                 if (buf == NULL) {
 221                     buf = (void**)uprv_malloc(cap * sizeof(void*));
 222                 } else {
 223                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
 224                 }
 225                 if (buf == NULL) {
 226                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
 227                     status = U_MEMORY_ALLOCATION_ERROR;
 228                     return;
 229                 }
 230                 void* start = &buf[size];
 231                 size_t count = (cap - size) * sizeof(void*);
 232                 uprv_memset(start, 0, count); // fill with nulls, just because
 233             }
 234             buf[size++] = elem;
 235         }
 236     }
 237
 238     void** release(void) {
 239         void** result = buf;
 240         buf = NULL;
 241         cap = 0;
 242         size = 0;
 243         return result;
 244     }
 245 };
 246
 247 class LocDataParser;
 248
 249 class StringLocalizationInfo : public LocalizationInfo {
 250     UChar* info;
 251     UChar*** data;
 252     int32_t numRuleSets;
 253     int32_t numLocales;
 254
 255 friend class LocDataParser;
 256
 257     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
 258         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
 259     {
 260     }
 261
 262 public:
 263     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
 264
 265     virtual ~StringLocalizationInfo();
 266     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
 267     virtual const UChar* getRuleSetName(int32_t index) const;
 268     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
 269     virtual const UChar* getLocaleName(int32_t index) const;
 270     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
 271
 272 //    virtual UClassID getDynamicClassID() const;
 273 //    static UClassID getStaticClassID(void);
 274
 275 private:
 276     void init(UErrorCode& status) const;
 277 };
 278
 279
 280 enum {
 281     OPEN_ANGLE = 0x003c, /* '<' */
 282     CLOSE_ANGLE = 0x003e, /* '>' */
 283     COMMA = 0x002c,
 284     TICK = 0x0027,
 285     QUOTE = 0x0022,
 286     SPACE = 0x0020
 287 };
 288
 289 /**
 290  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
 291  */
 292 class LocDataParser {
 293     UChar* data;
 294     const UChar* e;
 295     UChar* p;
 296     UChar ch;
 297     UParseError& pe;
 298     UErrorCode& ec;
 299
 300 public:
 301     LocDataParser(UParseError& parseError, UErrorCode& status)
 302         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
 303     ~LocDataParser() {}
 304
 305     /*
 306     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
 307     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
 308     */
 309     StringLocalizationInfo* parse(UChar* data, int32_t len);
 310
 311 private:
 312
 313     void inc(void) { ++p; ch = 0xffff; }
 314     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
 315     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
 316     void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
 317     UBool inList(UChar c, const UChar* list) const {
 318         if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
 319         while (*list && *list != c) ++list; return *list == c;
 320     }
 321     void parseError(const char* msg);
 322
 323     StringLocalizationInfo* doParse(void);
 324
 325     UChar** nextArray(int32_t& requiredLength);
 326     UChar*  nextString(void);
 327 };
 328
 329 #ifdef DEBUG
 330 #define ERROR(msg) parseError(msg); return NULL;
 331 #else
 332 #define ERROR(msg) parseError(NULL); return NULL;
 333 #endif
 334
 335
 336 static const UChar DQUOTE_STOPLIST[] = {
 337     QUOTE, 0
 338 };
 339
 340 static const UChar SQUOTE_STOPLIST[] = {
 341     TICK, 0
 342 };
 343
 344 static const UChar NOQUOTE_STOPLIST[] = {
 345     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
 346 };
 347
 348 static void
 349 DeleteFn(void* p) {
 350   uprv_free(p);
 351 }
 352
 353 StringLocalizationInfo*
 354 LocDataParser::parse(UChar* _data, int32_t len) {
 355     if (U_FAILURE(ec)) {
 356         if (_data) uprv_free(_data);
 357         return NULL;
 358     }
 359
 360     pe.line = 0;
 361     pe.offset = -1;
 362     pe.postContext[0] = 0;
 363     pe.preContext[0] = 0;
 364
 365     if (_data == NULL) {
 366         ec = U_ILLEGAL_ARGUMENT_ERROR;
 367         return NULL;
 368     }
 369
 370     if (len <= 0) {
 371         ec = U_ILLEGAL_ARGUMENT_ERROR;
 372         uprv_free(_data);
 373         return NULL;
 374     }
 375
 376     data = _data;
 377     e = data + len;
 378     p = _data;
 379     ch = 0xffff;
 380
 381     return doParse();
 382 }
 383
 384
 385 StringLocalizationInfo*
 386 LocDataParser::doParse(void) {
 387     skipWhitespace();
 388     if (!checkInc(OPEN_ANGLE)) {
 389         ERROR("Missing open angle");
 390     } else {
 391         VArray array(DeleteFn);
 392         UBool mightHaveNext = TRUE;
 393         int32_t requiredLength = -1;
 394         while (mightHaveNext) {
 395             mightHaveNext = FALSE;
 396             UChar** elem = nextArray(requiredLength);
 397             skipWhitespace();
 398             UBool haveComma = check(COMMA);
 399             if (elem) {
 400                 array.add(elem, ec);
 401                 if (haveComma) {
 402                     inc();
 403                     mightHaveNext = TRUE;
 404                 }
 405             } else if (haveComma) {
 406                 ERROR("Unexpected character");
 407             }
 408         }
 409
 410         skipWhitespace();
 411         if (!checkInc(CLOSE_ANGLE)) {
 412             if (check(OPEN_ANGLE)) {
 413                 ERROR("Missing comma in outer array");
 414             } else {
 415                 ERROR("Missing close angle bracket in outer array");
 416             }
 417         }
 418
 419         skipWhitespace();
 420         if (p != e) {
 421             ERROR("Extra text after close of localization data");
 422         }
 423
 424         array.add(NULL, ec);
 425         if (U_SUCCESS(ec)) {
 426             int32_t numLocs = array.length() - 2; // subtract first, NULL
 427             UChar*** result = (UChar***)array.release();
 428
 429             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
 430         }
 431     }
 432
 433     ERROR("Unknown error");
 434 }
 435
 436 UChar**
 437 LocDataParser::nextArray(int32_t& requiredLength) {
 438     if (U_FAILURE(ec)) {
 439         return NULL;
 440     }
 441
 442     skipWhitespace();
 443     if (!checkInc(OPEN_ANGLE)) {
 444         ERROR("Missing open angle");
 445     }
 446
 447     VArray array;
 448     UBool mightHaveNext = TRUE;
 449     while (mightHaveNext) {
 450         mightHaveNext = FALSE;
 451         UChar* elem = nextString();
 452         skipWhitespace();
 453         UBool haveComma = check(COMMA);
 454         if (elem) {
 455             array.add(elem, ec);
 456             if (haveComma) {
 457                 inc();
 458                 mightHaveNext = TRUE;
 459             }
 460         } else if (haveComma) {
 461             ERROR("Unexpected comma");
 462         }
 463     }
 464     skipWhitespace();
 465     if (!checkInc(CLOSE_ANGLE)) {
 466         if (check(OPEN_ANGLE)) {
 467             ERROR("Missing close angle bracket in inner array");
 468         } else {
 469             ERROR("Missing comma in inner array");
 470         }
 471     }
 472
 473     array.add(NULL, ec);
 474     if (U_SUCCESS(ec)) {
 475         if (requiredLength == -1) {
 476             requiredLength = array.length() + 1;
 477         } else if (array.length() != requiredLength) {
 478             ec = U_ILLEGAL_ARGUMENT_ERROR;
 479             ERROR("Array not of required length");
 480         }
 481
 482         return (UChar**)array.release();
 483     }
 484     ERROR("Unknown Error");
 485 }
 486
 487 UChar*
 488 LocDataParser::nextString() {
 489     UChar* result = NULL;
 490
 491     skipWhitespace();
 492     if (p < e) {
 493         const UChar* terminators;
 494         UChar c = *p;
 495         UBool haveQuote = c == QUOTE || c == TICK;
 496         if (haveQuote) {
 497             inc();
 498             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
 499         } else {
 500             terminators = NOQUOTE_STOPLIST;
 501         }
 502         UChar* start = p;
 503         while (p < e && !inList(*p, terminators)) ++p;
 504         if (p == e) {
 505             ERROR("Unexpected end of data");
 506         }
 507
 508         UChar x = *p;
 509         if (p > start) {
 510             ch = x;
 511             *p = 0x0; // terminate by writing to data
 512             result = start; // just point into data
 513         }
 514         if (haveQuote) {
 515             if (x != c) {
 516                 ERROR("Missing matching quote");
 517             } else if (p == start) {
 518                 ERROR("Empty string");
 519             }
 520             inc();
 521         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
 522             ERROR("Unexpected character in string");
 523         }
 524     }
 525
 526     // ok for there to be no next string
 527     return result;
 528 }
 529
 530 void
 531 LocDataParser::parseError(const char* /*str*/) {
 532     if (!data) {
 533         return;
 534     }
 535
 536     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
 537     if (start < data) {
 538         start = data;
 539     }
 540     for (UChar* x = p; --x >= start;) {
 541         if (!*x) {
 542             start = x+1;
 543             break;
 544         }
 545     }
 546     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
 547     if (limit > e) {
 548         limit = e;
 549     }
 550     u_strncpy(pe.preContext, start, (int32_t)(p-start));
 551     pe.preContext[p-start] = 0;
 552     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
 553     pe.postContext[limit-p] = 0;
 554     pe.offset = (int32_t)(p - data);
 555
 556 #ifdef DEBUG
 557     fprintf(stderr, "%s at or near character %d: ", str, p-data);
 558
 559     UnicodeString msg;
 560     msg.append(start, p - start);
 561     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
 562     msg.append(p, limit-p);
 563     msg.append("'");
 564
 565     char buf[128];
 566     int32_t len = msg.extract(0, msg.length(), buf, 128);
 567     if (len >= 128) {
 568         buf[127] = 0;
 569     } else {
 570         buf[len] = 0;
 571     }
 572     fprintf(stderr, "%s\n", buf);
 573     fflush(stderr);
 574 #endif
 575
 576     uprv_free(data);
 577     data = NULL;
 578     p = NULL;
 579     e = NULL;
 580
 581     if (U_SUCCESS(ec)) {
 582         ec = U_PARSE_ERROR;
 583     }
 584 }
 585
 586 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
 587
 588 StringLocalizationInfo*
 589 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
 590     if (U_FAILURE(status)) {
 591         return NULL;
 592     }
 593
 594     int32_t len = info.length();
 595     if (len == 0) {
 596         return NULL; // no error;
 597     }
 598
 599     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
 600     if (!p) {
 601         status = U_MEMORY_ALLOCATION_ERROR;
 602         return NULL;
 603     }
 604     info.extract(p, len, status);
 605     if (!U_FAILURE(status)) {
 606         status = U_ZERO_ERROR; // clear warning about non-termination
 607     }
 608
 609     LocDataParser parser(perror, status);
 610     return parser.parse(p, len);
 611 }
 612
 613 StringLocalizationInfo::~StringLocalizationInfo() {
 614     for (UChar*** p = (UChar***)data; *p; ++p) {
 615         // remaining data is simply pointer into our unicode string data.
 616         if (*p) uprv_free(*p);
 617     }
 618     if (data) uprv_free(data);
 619     if (info) uprv_free(info);
 620 }
 621
 622
 623 const UChar*
 624 StringLocalizationInfo::getRuleSetName(int32_t index) const {
 625     if (index >= 0 && index < getNumberOfRuleSets()) {
 626         return data[0][index];
 627     }
 628     return NULL;
 629 }
 630
 631 const UChar*
 632 StringLocalizationInfo::getLocaleName(int32_t index) const {
 633     if (index >= 0 && index < getNumberOfDisplayLocales()) {
 634         return data[index+1][0];
 635     }
 636     return NULL;
 637 }
 638
 639 const UChar*
 640 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
 641     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
 642         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
 643         return data[localeIndex+1][ruleIndex+1];
 644     }
 645     return NULL;
 646 }
 647
 648 // ----------
 649
 650 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 651                                              const UnicodeString& locs,
 652                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 653   : ruleSets(NULL)
 654   , defaultRuleSet(NULL)
 655   , locale(alocale)
 656   , collator(NULL)
 657   , decimalFormatSymbols(NULL)
 658   , lenient(FALSE)
 659   , lenientParseRules(NULL)
 660   , localizations(NULL)
 661   , noParse(FALSE) //TODO: to be removed after #6895
 662 {
 663   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 664   init(description, locinfo, perror, status);
 665 }
 666
 667 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 668                                              const UnicodeString& locs,
 669                                              UParseError& perror, UErrorCode& status)
 670   : ruleSets(NULL)
 671   , defaultRuleSet(NULL)
 672   , locale(Locale::getDefault())
 673   , collator(NULL)
 674   , decimalFormatSymbols(NULL)
 675   , lenient(FALSE)
 676   , lenientParseRules(NULL)
 677   , localizations(NULL)
 678   , noParse(FALSE) //TODO: to be removed after #6895
 679 {
 680   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 681   init(description, locinfo, perror, status);
 682 }
 683
 684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 685                                              LocalizationInfo* info,
 686                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 687   : ruleSets(NULL)
 688   , defaultRuleSet(NULL)
 689   , locale(alocale)
 690   , collator(NULL)
 691   , decimalFormatSymbols(NULL)
 692   , lenient(FALSE)
 693   , lenientParseRules(NULL)
 694   , localizations(NULL)
 695   , noParse(FALSE) //TODO: to be removed after #6895
 696 {
 697   init(description, info, perror, status);
 698 }
 699
 700 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 701                          UParseError& perror,
 702                          UErrorCode& status)
 703   : ruleSets(NULL)
 704   , defaultRuleSet(NULL)
 705   , locale(Locale::getDefault())
 706   , collator(NULL)
 707   , decimalFormatSymbols(NULL)
 708   , lenient(FALSE)
 709   , lenientParseRules(NULL)
 710   , localizations(NULL)
 711   , noParse(FALSE) //TODO: to be removed after #6895
 712 {
 713     init(description, NULL, perror, status);
 714 }
 715
 716 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 717                          const Locale& aLocale,
 718                          UParseError& perror,
 719                          UErrorCode& status)
 720   : ruleSets(NULL)
 721   , defaultRuleSet(NULL)
 722   , locale(aLocale)
 723   , collator(NULL)
 724   , decimalFormatSymbols(NULL)
 725   , lenient(FALSE)
 726   , lenientParseRules(NULL)
 727   , localizations(NULL)
 728   , noParse(FALSE) //TODO: to be removed after #6895
 729 {
 730     init(description, NULL, perror, status);
 731 }
 732
 733 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
 734   : ruleSets(NULL)
 735   , defaultRuleSet(NULL)
 736   , locale(alocale)
 737   , collator(NULL)
 738   , decimalFormatSymbols(NULL)
 739   , lenient(FALSE)
 740   , lenientParseRules(NULL)
 741   , localizations(NULL)
 742 {
 743     if (U_FAILURE(status)) {
 744         return;
 745     }
 746
 747     const char* rules_tag = "RBNFRules";
 748     const char* fmt_tag = "";
 749     switch (tag) {
 750     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
 751     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
 752     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
 753     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
 754     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
 755     }
 756
 757     // TODO: read localization info from resource
 758     LocalizationInfo* locinfo = NULL;
 759
 760     int32_t len = 0;
 761     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
 762     if (U_SUCCESS(status)) {
 763         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
 764                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
 765
 766         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
 767         if (U_FAILURE(status)) {
 768             ures_close(nfrb);
 769         }
 770         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
 771         if (U_FAILURE(status)) {
 772             ures_close(rbnfRules);
 773             ures_close(nfrb);
 774             return;
 775         }
 776
 777         UnicodeString desc;
 778         while (ures_hasNext(ruleSets)) {
 779            const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status);
 780            desc.append(currentString);
 781         }
 782         UParseError perror;
 783
 784
 785         init (desc, locinfo, perror, status);
 786
 787         //TODO: we need a real fix - see #6895 / #6896
 788         noParse = FALSE;
 789         if (tag == URBNF_SPELLOUT) {
 790             const char *lang = alocale.getLanguage();
 791             for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
 792                 if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
 793                     noParse = TRUE;
 794                     break;
 795                 }
 796             }
 797         }
 798         //TODO: end
 799
 800         ures_close(ruleSets);
 801         ures_close(rbnfRules);
 802     }
 803     ures_close(nfrb);
 804 }
 805
 806 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
 807   : NumberFormat(rhs)
 808   , ruleSets(NULL)
 809   , defaultRuleSet(NULL)
 810   , locale(rhs.locale)
 811   , collator(NULL)
 812   , decimalFormatSymbols(NULL)
 813   , lenient(FALSE)
 814   , lenientParseRules(NULL)
 815   , localizations(NULL)
 816 {
 817     this->operator=(rhs);
 818 }
 819
 820 // --------
 821
 822 RuleBasedNumberFormat&
 823 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
 824 {
 825     UErrorCode status = U_ZERO_ERROR;
 826     dispose();
 827     locale = rhs.locale;
 828     lenient = rhs.lenient;
 829
 830     UnicodeString rules = rhs.getRules();
 831     UParseError perror;
 832     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
 833
 834     //TODO: remove below when we fix the parse bug - See #6895 / #6896
 835     noParse = rhs.noParse;
 836
 837     return *this;
 838 }
 839
 840 RuleBasedNumberFormat::~RuleBasedNumberFormat()
 841 {
 842     dispose();
 843 }
 844
 845 Format*
 846 RuleBasedNumberFormat::clone(void) const
 847 {
 848     RuleBasedNumberFormat * result = NULL;
 849     UnicodeString rules = getRules();
 850     UErrorCode status = U_ZERO_ERROR;
 851     UParseError perror;
 852     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
 853     /* test for NULL */
 854     if (result == 0) {
 855         status = U_MEMORY_ALLOCATION_ERROR;
 856         return 0;
 857     }
 858     if (U_FAILURE(status)) {
 859         delete result;
 860         result = 0;
 861     } else {
 862         result->lenient = lenient;
 863
 864         //TODO: remove below when we fix the parse bug - See #6895 / #6896
 865         result->noParse = noParse;
 866     }
 867     return result;
 868 }
 869
 870 UBool
 871 RuleBasedNumberFormat::operator==(const Format& other) const
 872 {
 873     if (this == &other) {
 874         return TRUE;
 875     }
 876
 877     if (typeid(*this) == typeid(other)) {
 878         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
 879         if (locale == rhs.locale &&
 880             lenient == rhs.lenient &&
 881             (localizations == NULL
 882                 ? rhs.localizations == NULL
 883                 : (rhs.localizations == NULL
 884                     ? FALSE
 885                     : *localizations == rhs.localizations))) {
 886
 887             NFRuleSet** p = ruleSets;
 888             NFRuleSet** q = rhs.ruleSets;
 889             if (p == NULL) {
 890                 return q == NULL;
 891             } else if (q == NULL) {
 892                 return FALSE;
 893             }
 894             while (*p && *q && (**p == **q)) {
 895                 ++p;
 896                 ++q;
 897             }
 898             return *q == NULL && *p == NULL;
 899         }
 900     }
 901
 902     return FALSE;
 903 }
 904
 905 UnicodeString
 906 RuleBasedNumberFormat::getRules() const
 907 {
 908     UnicodeString result;
 909     if (ruleSets != NULL) {
 910         for (NFRuleSet** p = ruleSets; *p; ++p) {
 911             (*p)->appendRules(result);
 912         }
 913     }
 914     return result;
 915 }
 916
 917 UnicodeString
 918 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
 919 {
 920     if (localizations) {
 921       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
 922       return string;
 923     } else if (ruleSets) {
 924         UnicodeString result;
 925         for (NFRuleSet** p = ruleSets; *p; ++p) {
 926             NFRuleSet* rs = *p;
 927             if (rs->isPublic()) {
 928                 if (--index == -1) {
 929                     rs->getName(result);
 930                     return result;
 931                 }
 932             }
 933         }
 934     }
 935     UnicodeString empty;
 936     return empty;
 937 }
 938
 939 int32_t
 940 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
 941 {
 942     int32_t result = 0;
 943     if (localizations) {
 944       result = localizations->getNumberOfRuleSets();
 945     } else if (ruleSets) {
 946         for (NFRuleSet** p = ruleSets; *p; ++p) {
 947             if ((**p).isPublic()) {
 948                 ++result;
 949             }
 950         }
 951     }
 952     return result;
 953 }
 954
 955 int32_t
 956 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
 957     if (localizations) {
 958         return localizations->getNumberOfDisplayLocales();
 959     }
 960     return 0;
 961 }
 962
 963 Locale
 964 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
 965     if (U_FAILURE(status)) {
 966         return Locale("");
 967     }
 968     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
 969         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
 970         char buffer[64];
 971         int32_t cap = name.length() + 1;
 972         char* bp = buffer;
 973         if (cap > 64) {
 974             bp = (char *)uprv_malloc(cap);
 975             if (bp == NULL) {
 976                 status = U_MEMORY_ALLOCATION_ERROR;
 977                 return Locale("");
 978             }
 979         }
 980         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
 981         Locale retLocale(bp);
 982         if (bp != buffer) {
 983             uprv_free(bp);
 984         }
 985         return retLocale;
 986     }
 987     status = U_ILLEGAL_ARGUMENT_ERROR;
 988     Locale retLocale;
 989     return retLocale;
 990 }
 991
 992 UnicodeString
 993 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
 994     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
 995         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
 996         int32_t len = localeName.length();
 997         UChar* localeStr = localeName.getBuffer(len + 1);
 998         while (len >= 0) {
 999             localeStr[len] = 0;
1000             int32_t ix = localizations->indexForLocale(localeStr);
1001             if (ix >= 0) {
1002                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1003                 return name;
1004             }
1005
1006             // trim trailing portion, skipping over ommitted sections
1007             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1008             while (len > 0 && localeStr[len-1] == 0x005F) --len;
1009         }
1010         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1011         return name;
1012     }
1013     UnicodeString bogus;
1014     bogus.setToBogus();
1015     return bogus;
1016 }
1017
1018 UnicodeString
1019 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1020     if (localizations) {
1021         UnicodeString rsn(ruleSetName);
1022         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1023         return getRuleSetDisplayName(ix, localeParam);
1024     }
1025     UnicodeString bogus;
1026     bogus.setToBogus();
1027     return bogus;
1028 }
1029
1030 NFRuleSet*
1031 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1032 {
1033     if (U_SUCCESS(status) && ruleSets) {
1034         for (NFRuleSet** p = ruleSets; *p; ++p) {
1035             NFRuleSet* rs = *p;
1036             if (rs->isNamed(name)) {
1037                 return rs;
1038             }
1039         }
1040         status = U_ILLEGAL_ARGUMENT_ERROR;
1041     }
1042     return NULL;
1043 }
1044
1045 UnicodeString&
1046 RuleBasedNumberFormat::format(int32_t number,
1047                               UnicodeString& toAppendTo,
1048                               FieldPosition& /* pos */) const
1049 {
1050     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1051     return toAppendTo;
1052 }
1053
1054
1055 UnicodeString&
1056 RuleBasedNumberFormat::format(int64_t number,
1057                               UnicodeString& toAppendTo,
1058                               FieldPosition& /* pos */) const
1059 {
1060     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1061     return toAppendTo;
1062 }
1063
1064
1065 UnicodeString&
1066 RuleBasedNumberFormat::format(double number,
1067                               UnicodeString& toAppendTo,
1068                               FieldPosition& /* pos */) const
1069 {
1070     // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1071     if (uprv_isNaN(number)) {
1072         DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1073         if (decFmtSyms) {
1074             toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1075         }
1076     } else if (defaultRuleSet) {
1077         defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1078     }
1079     return toAppendTo;
1080 }
1081
1082
1083 UnicodeString&
1084 RuleBasedNumberFormat::format(int32_t number,
1085                               const UnicodeString& ruleSetName,
1086                               UnicodeString& toAppendTo,
1087                               FieldPosition& /* pos */,
1088                               UErrorCode& status) const
1089 {
1090     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1091     if (U_SUCCESS(status)) {
1092         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1093             // throw new IllegalArgumentException("Can't use internal rule set");
1094             status = U_ILLEGAL_ARGUMENT_ERROR;
1095         } else {
1096             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1097             if (rs) {
1098                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1099             }
1100         }
1101     }
1102     return toAppendTo;
1103 }
1104
1105
1106 UnicodeString&
1107 RuleBasedNumberFormat::format(int64_t number,
1108                               const UnicodeString& ruleSetName,
1109                               UnicodeString& toAppendTo,
1110                               FieldPosition& /* pos */,
1111                               UErrorCode& status) const
1112 {
1113     if (U_SUCCESS(status)) {
1114         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1115             // throw new IllegalArgumentException("Can't use internal rule set");
1116             status = U_ILLEGAL_ARGUMENT_ERROR;
1117         } else {
1118             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1119             if (rs) {
1120                 rs->format(number, toAppendTo, toAppendTo.length());
1121             }
1122         }
1123     }
1124     return toAppendTo;
1125 }
1126
1127
1128 // make linker happy
1129 UnicodeString&
1130 RuleBasedNumberFormat::format(const Formattable& obj,
1131                               UnicodeString& toAppendTo,
1132                               FieldPosition& pos,
1133                               UErrorCode& status) const
1134 {
1135     return NumberFormat::format(obj, toAppendTo, pos, status);
1136 }
1137
1138 UnicodeString&
1139 RuleBasedNumberFormat::format(double number,
1140                               const UnicodeString& ruleSetName,
1141                               UnicodeString& toAppendTo,
1142                               FieldPosition& /* pos */,
1143                               UErrorCode& status) const
1144 {
1145     if (U_SUCCESS(status)) {
1146         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1147             // throw new IllegalArgumentException("Can't use internal rule set");
1148             status = U_ILLEGAL_ARGUMENT_ERROR;
1149         } else {
1150             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1151             if (rs) {
1152                 rs->format(number, toAppendTo, toAppendTo.length());
1153             }
1154         }
1155     }
1156     return toAppendTo;
1157 }
1158
1159 void
1160 RuleBasedNumberFormat::parse(const UnicodeString& text,
1161                              Formattable& result,
1162                              ParsePosition& parsePosition) const
1163 {
1164     //TODO: We need a real fix.  See #6895 / #6896
1165     if (noParse) {
1166         // skip parsing
1167         parsePosition.setErrorIndex(0);
1168         return;
1169     }
1170
1171     if (!ruleSets) {
1172         parsePosition.setErrorIndex(0);
1173         return;
1174     }
1175
1176     UnicodeString workingText(text, parsePosition.getIndex());
1177     ParsePosition workingPos(0);
1178
1179     ParsePosition high_pp(0);
1180     Formattable high_result;
1181
1182     for (NFRuleSet** p = ruleSets; *p; ++p) {
1183         NFRuleSet *rp = *p;
1184         if (rp->isPublic() && rp->isParseable()) {
1185             ParsePosition working_pp(0);
1186             Formattable working_result;
1187
1188             rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient);
1189             if (working_pp.getIndex() > high_pp.getIndex()) {
1190                 high_pp = working_pp;
1191                 high_result = working_result;
1192
1193                 if (high_pp.getIndex() == workingText.length()) {
1194                     break;
1195                 }
1196             }
1197         }
1198     }
1199
1200     int32_t startIndex = parsePosition.getIndex();
1201     parsePosition.setIndex(startIndex + high_pp.getIndex());
1202     if (high_pp.getIndex() > 0) {
1203         parsePosition.setErrorIndex(-1);
1204     } else {
1205         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1206         parsePosition.setErrorIndex(startIndex + errorIndex);
1207     }
1208     result = high_result;
1209     if (result.getType() == Formattable::kDouble) {
1210         int32_t r = (int32_t)result.getDouble();
1211         if ((double)r == result.getDouble()) {
1212             result.setLong(r);
1213         }
1214     }
1215 }
1216
1217 #if !UCONFIG_NO_COLLATION
1218
1219 void
1220 RuleBasedNumberFormat::setLenient(UBool enabled)
1221 {
1222     lenient = enabled;
1223     if (!enabled && collator) {
1224         delete collator;
1225         collator = NULL;
1226     }
1227 }
1228
1229 #endif
1230
1231 void
1232 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1233     if (U_SUCCESS(status)) {
1234         if (ruleSetName.isEmpty()) {
1235           if (localizations) {
1236               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1237               defaultRuleSet = findRuleSet(name, status);
1238           } else {
1239             initDefaultRuleSet();
1240           }
1241         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1242             status = U_ILLEGAL_ARGUMENT_ERROR;
1243         } else {
1244             NFRuleSet* result = findRuleSet(ruleSetName, status);
1245             if (result != NULL) {
1246                 defaultRuleSet = result;
1247             }
1248         }
1249     }
1250 }
1251
1252 UnicodeString
1253 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1254   UnicodeString result;
1255   if (defaultRuleSet && defaultRuleSet->isPublic()) {
1256     defaultRuleSet->getName(result);
1257   } else {
1258     result.setToBogus();
1259   }
1260   return result;
1261 }
1262
1263 void
1264 RuleBasedNumberFormat::initDefaultRuleSet()
1265 {
1266     defaultRuleSet = NULL;
1267     if (!ruleSets) {
1268       return;
1269     }
1270
1271     const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1272     const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1273     const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1274
1275     NFRuleSet**p = &ruleSets[0];
1276     while (*p) {
1277         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1278             defaultRuleSet = *p;
1279             return;
1280         } else {
1281             ++p;
1282         }
1283     }
1284
1285     defaultRuleSet = *--p;
1286     if (!defaultRuleSet->isPublic()) {
1287         while (p != ruleSets) {
1288             if ((*--p)->isPublic()) {
1289                 defaultRuleSet = *p;
1290                 break;
1291             }
1292         }
1293     }
1294 }
1295
1296
1297 void
1298 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1299                             UParseError& pErr, UErrorCode& status)
1300 {
1301     // TODO: implement UParseError
1302     uprv_memset(&pErr, 0, sizeof(UParseError));
1303     // Note: this can leave ruleSets == NULL, so remaining code should check
1304     if (U_FAILURE(status)) {
1305         return;
1306     }
1307
1308     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1309
1310     UnicodeString description(rules);
1311     if (!description.length()) {
1312         status = U_MEMORY_ALLOCATION_ERROR;
1313         return;
1314     }
1315
1316     // start by stripping the trailing whitespace from all the rules
1317     // (this is all the whitespace follwing each semicolon in the
1318     // description).  This allows us to look for rule-set boundaries
1319     // by searching for ";%" without having to worry about whitespace
1320     // between the ; and the %
1321     stripWhitespace(description);
1322
1323     // check to see if there's a set of lenient-parse rules.  If there
1324     // is, pull them out into our temporary holding place for them,
1325     // and delete them from the description before the real desciption-
1326     // parsing code sees them
1327     int32_t lp = description.indexOf(gLenientParse);
1328     if (lp != -1) {
1329         // we've got to make sure we're not in the middle of a rule
1330         // (where "%%lenient-parse" would actually get treated as
1331         // rule text)
1332         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1333             // locate the beginning and end of the actual collation
1334             // rules (there may be whitespace between the name and
1335             // the first token in the description)
1336             int lpEnd = description.indexOf(gSemiPercent, lp);
1337
1338             if (lpEnd == -1) {
1339                 lpEnd = description.length() - 1;
1340             }
1341             int lpStart = lp + u_strlen(gLenientParse);
1342             while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
1343                 ++lpStart;
1344             }
1345
1346             // copy out the lenient-parse rules and delete them
1347             // from the description
1348             lenientParseRules = new UnicodeString();
1349             /* test for NULL */
1350             if (lenientParseRules == 0) {
1351                 status = U_MEMORY_ALLOCATION_ERROR;
1352                 return;
1353             }
1354             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1355
1356             description.remove(lp, lpEnd + 1 - lp);
1357         }
1358     }
1359
1360     // pre-flight parsing the description and count the number of
1361     // rule sets (";%" marks the end of one rule set and the beginning
1362     // of the next)
1363     int numRuleSets = 0;
1364     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1365         ++numRuleSets;
1366         ++p;
1367     }
1368     ++numRuleSets;
1369
1370     // our rule list is an array of the appropriate size
1371     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1372     /* test for NULL */
1373     if (ruleSets == 0) {
1374         status = U_MEMORY_ALLOCATION_ERROR;
1375         return;
1376     }
1377
1378     for (int i = 0; i <= numRuleSets; ++i) {
1379         ruleSets[i] = NULL;
1380     }
1381
1382     // divide up the descriptions into individual rule-set descriptions
1383     // and store them in a temporary array.  At each step, we also
1384     // new up a rule set, but all this does is initialize its name
1385     // and remove it from its description.  We can't actually parse
1386     // the rest of the descriptions and finish initializing everything
1387     // because we have to know the names and locations of all the rule
1388     // sets before we can actually set everything up
1389     if(!numRuleSets) {
1390         status = U_ILLEGAL_ARGUMENT_ERROR;
1391         return;
1392     }
1393     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1394     if (ruleSetDescriptions == 0) {
1395         status = U_MEMORY_ALLOCATION_ERROR;
1396         return;
1397     }
1398
1399     {
1400         int curRuleSet = 0;
1401         int32_t start = 0;
1402         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1403             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1404             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1405             if (ruleSets[curRuleSet] == 0) {
1406                 status = U_MEMORY_ALLOCATION_ERROR;
1407                 goto cleanup;
1408             }
1409             ++curRuleSet;
1410             start = p + 1;
1411         }
1412         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1413         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1414         if (ruleSets[curRuleSet] == 0) {
1415             status = U_MEMORY_ALLOCATION_ERROR;
1416             goto cleanup;
1417         }
1418     }
1419
1420     // now we can take note of the formatter's default rule set, which
1421     // is the last public rule set in the description (it's the last
1422     // rather than the first so that a user can create a new formatter
1423     // from an existing formatter and change its default behavior just
1424     // by appending more rule sets to the end)
1425
1426     // {dlf} Initialization of a fraction rule set requires the default rule
1427     // set to be known.  For purposes of initialization, this is always the
1428     // last public rule set, no matter what the localization data says.
1429     initDefaultRuleSet();
1430
1431     // finally, we can go back through the temporary descriptions
1432     // list and finish seting up the substructure (and we throw
1433     // away the temporary descriptions as we go)
1434     {
1435         for (int i = 0; i < numRuleSets; i++) {
1436             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1437         }
1438     }
1439
1440     // Now that the rules are initialized, the 'real' default rule
1441     // set can be adjusted by the localization data.
1442
1443     // The C code keeps the localization array as is, rather than building
1444     // a separate array of the public rule set names, so we have less work
1445     // to do here-- but we still need to check the names.
1446
1447     if (localizationInfos) {
1448         // confirm the names, if any aren't in the rules, that's an error
1449         // it is ok if the rules contain public rule sets that are not in this list
1450         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1451             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1452             NFRuleSet* rs = findRuleSet(name, status);
1453             if (rs == NULL) {
1454                 break; // error
1455             }
1456             if (i == 0) {
1457                 defaultRuleSet = rs;
1458             }
1459         }
1460     } else {
1461         defaultRuleSet = getDefaultRuleSet();
1462     }
1463
1464 cleanup:
1465     delete[] ruleSetDescriptions;
1466 }
1467
1468 void
1469 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1470 {
1471     // iterate through the characters...
1472     UnicodeString result;
1473
1474     int start = 0;
1475     while (start != -1 && start < description.length()) {
1476         // seek to the first non-whitespace character...
1477         while (start < description.length()
1478             && uprv_isRuleWhiteSpace(description.charAt(start))) {
1479             ++start;
1480         }
1481
1482         // locate the next semicolon in the text and copy the text from
1483         // our current position up to that semicolon into the result
1484         int32_t p = description.indexOf(gSemiColon, start);
1485         if (p == -1) {
1486             // or if we don't find a semicolon, just copy the rest of
1487             // the string into the result
1488             result.append(description, start, description.length() - start);
1489             start = -1;
1490         }
1491         else if (p < description.length()) {
1492             result.append(description, start, p + 1 - start);
1493             start = p + 1;
1494         }
1495
1496         // when we get here, we've seeked off the end of the sring, and
1497         // we terminate the loop (we continue until *start* is -1 rather
1498         // than until *p* is -1, because otherwise we'd miss the last
1499         // rule in the description)
1500         else {
1501             start = -1;
1502         }
1503     }
1504
1505     description.setTo(result);
1506 }
1507
1508
1509 void
1510 RuleBasedNumberFormat::dispose()
1511 {
1512     if (ruleSets) {
1513         for (NFRuleSet** p = ruleSets; *p; ++p) {
1514             delete *p;
1515         }
1516         uprv_free(ruleSets);
1517         ruleSets = NULL;
1518     }
1519
1520 #if !UCONFIG_NO_COLLATION
1521     delete collator;
1522 #endif
1523     collator = NULL;
1524
1525     delete decimalFormatSymbols;
1526     decimalFormatSymbols = NULL;
1527
1528     delete lenientParseRules;
1529     lenientParseRules = NULL;
1530
1531     if (localizations) localizations = localizations->unref();
1532 }
1533
1534
1535 //-----------------------------------------------------------------------
1536 // package-internal API
1537 //-----------------------------------------------------------------------
1538
1539 /**
1540  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1541  * this function creates it the first time it's called.
1542  * @return The collator to use for lenient parsing, or null if lenient parsing
1543  * is turned off.
1544 */
1545 Collator*
1546 RuleBasedNumberFormat::getCollator() const
1547 {
1548 #if !UCONFIG_NO_COLLATION
1549     if (!ruleSets) {
1550         return NULL;
1551     }
1552
1553     // lazy-evaulate the collator
1554     if (collator == NULL && lenient) {
1555         // create a default collator based on the formatter's locale,
1556         // then pull out that collator's rules, append any additional
1557         // rules specified in the description, and create a _new_
1558         // collator based on the combinaiton of those rules
1559
1560         UErrorCode status = U_ZERO_ERROR;
1561
1562         Collator* temp = Collator::createInstance(locale, status);
1563         RuleBasedCollator* newCollator;
1564         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1565             if (lenientParseRules) {
1566                 UnicodeString rules(newCollator->getRules());
1567                 rules.append(*lenientParseRules);
1568
1569                 newCollator = new RuleBasedCollator(rules, status);
1570                 // Exit if newCollator could not be created.
1571                 if (newCollator == NULL) {
1572                         return NULL;
1573                 }
1574             } else {
1575                 temp = NULL;
1576             }
1577             if (U_SUCCESS(status)) {
1578                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1579                 // cast away const
1580                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1581             } else {
1582                 delete newCollator;
1583             }
1584         }
1585         delete temp;
1586     }
1587 #endif
1588
1589     // if lenient-parse mode is off, this will be null
1590     // (see setLenientParseMode())
1591     return collator;
1592 }
1593
1594
1595 /**
1596  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1597  * instances owned by this formatter.  This object is lazily created: this function
1598  * creates it the first time it's called.
1599  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1600  * instances owned by this formatter.
1601 */
1602 DecimalFormatSymbols*
1603 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1604 {
1605     // lazy-evaluate the DecimalFormatSymbols object.  This object
1606     // is shared by all DecimalFormat instances belonging to this
1607     // formatter
1608     if (decimalFormatSymbols == NULL) {
1609         UErrorCode status = U_ZERO_ERROR;
1610         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1611         if (U_SUCCESS(status)) {
1612             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1613         } else {
1614             delete temp;
1615         }
1616     }
1617     return decimalFormatSymbols;
1618 }
1619
1620 U_NAMESPACE_END
1621
1622 /* U_HAVE_RBNF */
1623 #endif