icuSources/i18n/rbnf.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 1997-2012, International Business Machines Corporation
   4 * and others. All Rights Reserved.
   5 *******************************************************************************
   6 */
   7
   8 #include "utypeinfo.h"  // for 'typeid' to work
   9
  10 #include "unicode/rbnf.h"
  11
  12 #if U_HAVE_RBNF
  13
  14 #include "unicode/normlzr.h"
  15 #include "unicode/tblcoll.h"
  16 #include "unicode/uchar.h"
  17 #include "unicode/ucol.h"
  18 #include "unicode/uloc.h"
  19 #include "unicode/unum.h"
  20 #include "unicode/ures.h"
  21 #include "unicode/ustring.h"
  22 #include "unicode/utf16.h"
  23 #include "unicode/udata.h"
  24 #include "nfrs.h"
  25
  26 #include "cmemory.h"
  27 #include "cstring.h"
  28 #include "patternprops.h"
  29 #include "uresimp.h"
  30
  31 // debugging
  32 // #define DEBUG
  33
  34 #ifdef DEBUG
  35 #include "stdio.h"
  36 #endif
  37
  38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
  39
  40 static const UChar gPercentPercent[] =
  41 {
  42     0x25, 0x25, 0
  43 }; /* "%%" */
  44
  45 // All urbnf objects are created through openRules, so we init all of the
  46 // Unicode string constants required by rbnf, nfrs, or nfr here.
  47 static const UChar gLenientParse[] =
  48 {
  49     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
  50 }; /* "%%lenient-parse:" */
  51 static const UChar gSemiColon = 0x003B;
  52 static const UChar gSemiPercent[] =
  53 {
  54     0x3B, 0x25, 0
  55 }; /* ";%" */
  56
  57 #define kSomeNumberOfBitsDiv2 22
  58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
  59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
  60
  61 U_NAMESPACE_BEGIN
  62
  63 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
  64
  65 /*
  66 This is a utility class. It does not use ICU's RTTI.
  67 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
  68 Please make sure that intltest passes on Windows in Release mode,
  69 since the string pooling per compilation unit will mess up how RTTI works.
  70 The RTTI code was also removed due to lack of code coverage.
  71 */
  72 class LocalizationInfo : public UMemory {
  73 protected:
  74     virtual ~LocalizationInfo();
  75     uint32_t refcount;
  76
  77 public:
  78     LocalizationInfo() : refcount(0) {}
  79
  80     LocalizationInfo* ref(void) {
  81         ++refcount;
  82         return this;
  83     }
  84
  85     LocalizationInfo* unref(void) {
  86         if (refcount && --refcount == 0) {
  87             delete this;
  88         }
  89         return NULL;
  90     }
  91
  92     virtual UBool operator==(const LocalizationInfo* rhs) const;
  93     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
  94
  95     virtual int32_t getNumberOfRuleSets(void) const = 0;
  96     virtual const UChar* getRuleSetName(int32_t index) const = 0;
  97     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
  98     virtual const UChar* getLocaleName(int32_t index) const = 0;
  99     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
 100
 101     virtual int32_t indexForLocale(const UChar* locale) const;
 102     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
 103
 104 //    virtual UClassID getDynamicClassID() const = 0;
 105 //    static UClassID getStaticClassID(void);
 106 };
 107
 108 LocalizationInfo::~LocalizationInfo() {}
 109
 110 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
 111
 112 // if both strings are NULL, this returns TRUE
 113 static UBool
 114 streq(const UChar* lhs, const UChar* rhs) {
 115     if (rhs == lhs) {
 116         return TRUE;
 117     }
 118     if (lhs && rhs) {
 119         return u_strcmp(lhs, rhs) == 0;
 120     }
 121     return FALSE;
 122 }
 123
 124 UBool
 125 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
 126     if (rhs) {
 127         if (this == rhs) {
 128             return TRUE;
 129         }
 130
 131         int32_t rsc = getNumberOfRuleSets();
 132         if (rsc == rhs->getNumberOfRuleSets()) {
 133             for (int i = 0; i < rsc; ++i) {
 134                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
 135                     return FALSE;
 136                 }
 137             }
 138             int32_t dlc = getNumberOfDisplayLocales();
 139             if (dlc == rhs->getNumberOfDisplayLocales()) {
 140                 for (int i = 0; i < dlc; ++i) {
 141                     const UChar* locale = getLocaleName(i);
 142                     int32_t ix = rhs->indexForLocale(locale);
 143                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
 144                     if (!streq(locale, rhs->getLocaleName(ix))) {
 145                         return FALSE;
 146                     }
 147                     for (int j = 0; j < rsc; ++j) {
 148                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
 149                             return FALSE;
 150                         }
 151                     }
 152                 }
 153                 return TRUE;
 154             }
 155         }
 156     }
 157     return FALSE;
 158 }
 159
 160 int32_t
 161 LocalizationInfo::indexForLocale(const UChar* locale) const {
 162     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
 163         if (streq(locale, getLocaleName(i))) {
 164             return i;
 165         }
 166     }
 167     return -1;
 168 }
 169
 170 int32_t
 171 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
 172     if (ruleset) {
 173         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
 174             if (streq(ruleset, getRuleSetName(i))) {
 175                 return i;
 176             }
 177         }
 178     }
 179     return -1;
 180 }
 181
 182
 183 typedef void (*Fn_Deleter)(void*);
 184
 185 class VArray {
 186     void** buf;
 187     int32_t cap;
 188     int32_t size;
 189     Fn_Deleter deleter;
 190 public:
 191     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
 192
 193     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
 194
 195     ~VArray() {
 196         if (deleter) {
 197             for (int i = 0; i < size; ++i) {
 198                 (*deleter)(buf[i]);
 199             }
 200         }
 201         uprv_free(buf);
 202     }
 203
 204     int32_t length() {
 205         return size;
 206     }
 207
 208     void add(void* elem, UErrorCode& status) {
 209         if (U_SUCCESS(status)) {
 210             if (size == cap) {
 211                 if (cap == 0) {
 212                     cap = 1;
 213                 } else if (cap < 256) {
 214                     cap *= 2;
 215                 } else {
 216                     cap += 256;
 217                 }
 218                 if (buf == NULL) {
 219                     buf = (void**)uprv_malloc(cap * sizeof(void*));
 220                 } else {
 221                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
 222                 }
 223                 if (buf == NULL) {
 224                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
 225                     status = U_MEMORY_ALLOCATION_ERROR;
 226                     return;
 227                 }
 228                 void* start = &buf[size];
 229                 size_t count = (cap - size) * sizeof(void*);
 230                 uprv_memset(start, 0, count); // fill with nulls, just because
 231             }
 232             buf[size++] = elem;
 233         }
 234     }
 235
 236     void** release(void) {
 237         void** result = buf;
 238         buf = NULL;
 239         cap = 0;
 240         size = 0;
 241         return result;
 242     }
 243 };
 244
 245 class LocDataParser;
 246
 247 class StringLocalizationInfo : public LocalizationInfo {
 248     UChar* info;
 249     UChar*** data;
 250     int32_t numRuleSets;
 251     int32_t numLocales;
 252
 253 friend class LocDataParser;
 254
 255     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
 256         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
 257     {
 258     }
 259
 260 public:
 261     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
 262
 263     virtual ~StringLocalizationInfo();
 264     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
 265     virtual const UChar* getRuleSetName(int32_t index) const;
 266     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
 267     virtual const UChar* getLocaleName(int32_t index) const;
 268     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
 269
 270 //    virtual UClassID getDynamicClassID() const;
 271 //    static UClassID getStaticClassID(void);
 272
 273 private:
 274     void init(UErrorCode& status) const;
 275 };
 276
 277
 278 enum {
 279     OPEN_ANGLE = 0x003c, /* '<' */
 280     CLOSE_ANGLE = 0x003e, /* '>' */
 281     COMMA = 0x002c,
 282     TICK = 0x0027,
 283     QUOTE = 0x0022,
 284     SPACE = 0x0020
 285 };
 286
 287 /**
 288  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
 289  */
 290 class LocDataParser {
 291     UChar* data;
 292     const UChar* e;
 293     UChar* p;
 294     UChar ch;
 295     UParseError& pe;
 296     UErrorCode& ec;
 297
 298 public:
 299     LocDataParser(UParseError& parseError, UErrorCode& status)
 300         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
 301     ~LocDataParser() {}
 302
 303     /*
 304     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
 305     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
 306     */
 307     StringLocalizationInfo* parse(UChar* data, int32_t len);
 308
 309 private:
 310
 311     void inc(void) { ++p; ch = 0xffff; }
 312     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
 313     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
 314     void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
 315     UBool inList(UChar c, const UChar* list) const {
 316         if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
 317         while (*list && *list != c) ++list; return *list == c;
 318     }
 319     void parseError(const char* msg);
 320
 321     StringLocalizationInfo* doParse(void);
 322
 323     UChar** nextArray(int32_t& requiredLength);
 324     UChar*  nextString(void);
 325 };
 326
 327 #ifdef DEBUG
 328 #define ERROR(msg) parseError(msg); return NULL;
 329 #else
 330 #define ERROR(msg) parseError(NULL); return NULL;
 331 #endif
 332
 333
 334 static const UChar DQUOTE_STOPLIST[] = {
 335     QUOTE, 0
 336 };
 337
 338 static const UChar SQUOTE_STOPLIST[] = {
 339     TICK, 0
 340 };
 341
 342 static const UChar NOQUOTE_STOPLIST[] = {
 343     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
 344 };
 345
 346 static void
 347 DeleteFn(void* p) {
 348   uprv_free(p);
 349 }
 350
 351 StringLocalizationInfo*
 352 LocDataParser::parse(UChar* _data, int32_t len) {
 353     if (U_FAILURE(ec)) {
 354         if (_data) uprv_free(_data);
 355         return NULL;
 356     }
 357
 358     pe.line = 0;
 359     pe.offset = -1;
 360     pe.postContext[0] = 0;
 361     pe.preContext[0] = 0;
 362
 363     if (_data == NULL) {
 364         ec = U_ILLEGAL_ARGUMENT_ERROR;
 365         return NULL;
 366     }
 367
 368     if (len <= 0) {
 369         ec = U_ILLEGAL_ARGUMENT_ERROR;
 370         uprv_free(_data);
 371         return NULL;
 372     }
 373
 374     data = _data;
 375     e = data + len;
 376     p = _data;
 377     ch = 0xffff;
 378
 379     return doParse();
 380 }
 381
 382
 383 StringLocalizationInfo*
 384 LocDataParser::doParse(void) {
 385     skipWhitespace();
 386     if (!checkInc(OPEN_ANGLE)) {
 387         ERROR("Missing open angle");
 388     } else {
 389         VArray array(DeleteFn);
 390         UBool mightHaveNext = TRUE;
 391         int32_t requiredLength = -1;
 392         while (mightHaveNext) {
 393             mightHaveNext = FALSE;
 394             UChar** elem = nextArray(requiredLength);
 395             skipWhitespace();
 396             UBool haveComma = check(COMMA);
 397             if (elem) {
 398                 array.add(elem, ec);
 399                 if (haveComma) {
 400                     inc();
 401                     mightHaveNext = TRUE;
 402                 }
 403             } else if (haveComma) {
 404                 ERROR("Unexpected character");
 405             }
 406         }
 407
 408         skipWhitespace();
 409         if (!checkInc(CLOSE_ANGLE)) {
 410             if (check(OPEN_ANGLE)) {
 411                 ERROR("Missing comma in outer array");
 412             } else {
 413                 ERROR("Missing close angle bracket in outer array");
 414             }
 415         }
 416
 417         skipWhitespace();
 418         if (p != e) {
 419             ERROR("Extra text after close of localization data");
 420         }
 421
 422         array.add(NULL, ec);
 423         if (U_SUCCESS(ec)) {
 424             int32_t numLocs = array.length() - 2; // subtract first, NULL
 425             UChar*** result = (UChar***)array.release();
 426
 427             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
 428         }
 429     }
 430
 431     ERROR("Unknown error");
 432 }
 433
 434 UChar**
 435 LocDataParser::nextArray(int32_t& requiredLength) {
 436     if (U_FAILURE(ec)) {
 437         return NULL;
 438     }
 439
 440     skipWhitespace();
 441     if (!checkInc(OPEN_ANGLE)) {
 442         ERROR("Missing open angle");
 443     }
 444
 445     VArray array;
 446     UBool mightHaveNext = TRUE;
 447     while (mightHaveNext) {
 448         mightHaveNext = FALSE;
 449         UChar* elem = nextString();
 450         skipWhitespace();
 451         UBool haveComma = check(COMMA);
 452         if (elem) {
 453             array.add(elem, ec);
 454             if (haveComma) {
 455                 inc();
 456                 mightHaveNext = TRUE;
 457             }
 458         } else if (haveComma) {
 459             ERROR("Unexpected comma");
 460         }
 461     }
 462     skipWhitespace();
 463     if (!checkInc(CLOSE_ANGLE)) {
 464         if (check(OPEN_ANGLE)) {
 465             ERROR("Missing close angle bracket in inner array");
 466         } else {
 467             ERROR("Missing comma in inner array");
 468         }
 469     }
 470
 471     array.add(NULL, ec);
 472     if (U_SUCCESS(ec)) {
 473         if (requiredLength == -1) {
 474             requiredLength = array.length() + 1;
 475         } else if (array.length() != requiredLength) {
 476             ec = U_ILLEGAL_ARGUMENT_ERROR;
 477             ERROR("Array not of required length");
 478         }
 479
 480         return (UChar**)array.release();
 481     }
 482     ERROR("Unknown Error");
 483 }
 484
 485 UChar*
 486 LocDataParser::nextString() {
 487     UChar* result = NULL;
 488
 489     skipWhitespace();
 490     if (p < e) {
 491         const UChar* terminators;
 492         UChar c = *p;
 493         UBool haveQuote = c == QUOTE || c == TICK;
 494         if (haveQuote) {
 495             inc();
 496             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
 497         } else {
 498             terminators = NOQUOTE_STOPLIST;
 499         }
 500         UChar* start = p;
 501         while (p < e && !inList(*p, terminators)) ++p;
 502         if (p == e) {
 503             ERROR("Unexpected end of data");
 504         }
 505
 506         UChar x = *p;
 507         if (p > start) {
 508             ch = x;
 509             *p = 0x0; // terminate by writing to data
 510             result = start; // just point into data
 511         }
 512         if (haveQuote) {
 513             if (x != c) {
 514                 ERROR("Missing matching quote");
 515             } else if (p == start) {
 516                 ERROR("Empty string");
 517             }
 518             inc();
 519         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
 520             ERROR("Unexpected character in string");
 521         }
 522     }
 523
 524     // ok for there to be no next string
 525     return result;
 526 }
 527
 528 void
 529 LocDataParser::parseError(const char* /*str*/) {
 530     if (!data) {
 531         return;
 532     }
 533
 534     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
 535     if (start < data) {
 536         start = data;
 537     }
 538     for (UChar* x = p; --x >= start;) {
 539         if (!*x) {
 540             start = x+1;
 541             break;
 542         }
 543     }
 544     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
 545     if (limit > e) {
 546         limit = e;
 547     }
 548     u_strncpy(pe.preContext, start, (int32_t)(p-start));
 549     pe.preContext[p-start] = 0;
 550     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
 551     pe.postContext[limit-p] = 0;
 552     pe.offset = (int32_t)(p - data);
 553
 554 #ifdef DEBUG
 555     fprintf(stderr, "%s at or near character %d: ", str, p-data);
 556
 557     UnicodeString msg;
 558     msg.append(start, p - start);
 559     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
 560     msg.append(p, limit-p);
 561     msg.append("'");
 562
 563     char buf[128];
 564     int32_t len = msg.extract(0, msg.length(), buf, 128);
 565     if (len >= 128) {
 566         buf[127] = 0;
 567     } else {
 568         buf[len] = 0;
 569     }
 570     fprintf(stderr, "%s\n", buf);
 571     fflush(stderr);
 572 #endif
 573
 574     uprv_free(data);
 575     data = NULL;
 576     p = NULL;
 577     e = NULL;
 578
 579     if (U_SUCCESS(ec)) {
 580         ec = U_PARSE_ERROR;
 581     }
 582 }
 583
 584 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
 585
 586 StringLocalizationInfo*
 587 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
 588     if (U_FAILURE(status)) {
 589         return NULL;
 590     }
 591
 592     int32_t len = info.length();
 593     if (len == 0) {
 594         return NULL; // no error;
 595     }
 596
 597     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
 598     if (!p) {
 599         status = U_MEMORY_ALLOCATION_ERROR;
 600         return NULL;
 601     }
 602     info.extract(p, len, status);
 603     if (!U_FAILURE(status)) {
 604         status = U_ZERO_ERROR; // clear warning about non-termination
 605     }
 606
 607     LocDataParser parser(perror, status);
 608     return parser.parse(p, len);
 609 }
 610
 611 StringLocalizationInfo::~StringLocalizationInfo() {
 612     for (UChar*** p = (UChar***)data; *p; ++p) {
 613         // remaining data is simply pointer into our unicode string data.
 614         if (*p) uprv_free(*p);
 615     }
 616     if (data) uprv_free(data);
 617     if (info) uprv_free(info);
 618 }
 619
 620
 621 const UChar*
 622 StringLocalizationInfo::getRuleSetName(int32_t index) const {
 623     if (index >= 0 && index < getNumberOfRuleSets()) {
 624         return data[0][index];
 625     }
 626     return NULL;
 627 }
 628
 629 const UChar*
 630 StringLocalizationInfo::getLocaleName(int32_t index) const {
 631     if (index >= 0 && index < getNumberOfDisplayLocales()) {
 632         return data[index+1][0];
 633     }
 634     return NULL;
 635 }
 636
 637 const UChar*
 638 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
 639     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
 640         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
 641         return data[localeIndex+1][ruleIndex+1];
 642     }
 643     return NULL;
 644 }
 645
 646 // ----------
 647
 648 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 649                                              const UnicodeString& locs,
 650                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 651   : ruleSets(NULL)
 652   , ruleSetDescriptions(NULL)
 653   , numRuleSets(0)
 654   , defaultRuleSet(NULL)
 655   , locale(alocale)
 656   , collator(NULL)
 657   , decimalFormatSymbols(NULL)
 658   , lenient(FALSE)
 659   , lenientParseRules(NULL)
 660   , localizations(NULL)
 661 {
 662   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 663   init(description, locinfo, perror, status);
 664 }
 665
 666 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 667                                              const UnicodeString& locs,
 668                                              UParseError& perror, UErrorCode& status)
 669   : ruleSets(NULL)
 670   , ruleSetDescriptions(NULL)
 671   , numRuleSets(0)
 672   , defaultRuleSet(NULL)
 673   , locale(Locale::getDefault())
 674   , collator(NULL)
 675   , decimalFormatSymbols(NULL)
 676   , lenient(FALSE)
 677   , lenientParseRules(NULL)
 678   , localizations(NULL)
 679 {
 680   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
 681   init(description, locinfo, perror, status);
 682 }
 683
 684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 685                                              LocalizationInfo* info,
 686                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
 687   : ruleSets(NULL)
 688   , ruleSetDescriptions(NULL)
 689   , numRuleSets(0)
 690   , defaultRuleSet(NULL)
 691   , locale(alocale)
 692   , collator(NULL)
 693   , decimalFormatSymbols(NULL)
 694   , lenient(FALSE)
 695   , lenientParseRules(NULL)
 696   , localizations(NULL)
 697 {
 698   init(description, info, perror, status);
 699 }
 700
 701 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 702                          UParseError& perror,
 703                          UErrorCode& status)
 704   : ruleSets(NULL)
 705   , ruleSetDescriptions(NULL)
 706   , numRuleSets(0)
 707   , defaultRuleSet(NULL)
 708   , locale(Locale::getDefault())
 709   , collator(NULL)
 710   , decimalFormatSymbols(NULL)
 711   , lenient(FALSE)
 712   , lenientParseRules(NULL)
 713   , localizations(NULL)
 714 {
 715     init(description, NULL, perror, status);
 716 }
 717
 718 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
 719                          const Locale& aLocale,
 720                          UParseError& perror,
 721                          UErrorCode& status)
 722   : ruleSets(NULL)
 723   , ruleSetDescriptions(NULL)
 724   , numRuleSets(0)
 725   , defaultRuleSet(NULL)
 726   , locale(aLocale)
 727   , collator(NULL)
 728   , decimalFormatSymbols(NULL)
 729   , lenient(FALSE)
 730   , lenientParseRules(NULL)
 731   , localizations(NULL)
 732 {
 733     init(description, NULL, perror, status);
 734 }
 735
 736 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
 737   : ruleSets(NULL)
 738   , ruleSetDescriptions(NULL)
 739   , numRuleSets(0)
 740   , defaultRuleSet(NULL)
 741   , locale(alocale)
 742   , collator(NULL)
 743   , decimalFormatSymbols(NULL)
 744   , lenient(FALSE)
 745   , lenientParseRules(NULL)
 746   , localizations(NULL)
 747 {
 748     if (U_FAILURE(status)) {
 749         return;
 750     }
 751
 752     const char* rules_tag = "RBNFRules";
 753     const char* fmt_tag = "";
 754     switch (tag) {
 755     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
 756     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
 757     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
 758     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
 759     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
 760     }
 761
 762     // TODO: read localization info from resource
 763     LocalizationInfo* locinfo = NULL;
 764
 765     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
 766     if (U_SUCCESS(status)) {
 767         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
 768                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
 769
 770         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
 771         if (U_FAILURE(status)) {
 772             ures_close(nfrb);
 773         }
 774         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
 775         if (U_FAILURE(status)) {
 776             ures_close(rbnfRules);
 777             ures_close(nfrb);
 778             return;
 779         }
 780
 781         UnicodeString desc;
 782         while (ures_hasNext(ruleSets)) {
 783            desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
 784         }
 785         UParseError perror;
 786
 787         init (desc, locinfo, perror, status);
 788
 789         ures_close(ruleSets);
 790         ures_close(rbnfRules);
 791     }
 792     ures_close(nfrb);
 793 }
 794
 795 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
 796   : NumberFormat(rhs)
 797   , ruleSets(NULL)
 798   , ruleSetDescriptions(NULL)
 799   , numRuleSets(0)
 800   , defaultRuleSet(NULL)
 801   , locale(rhs.locale)
 802   , collator(NULL)
 803   , decimalFormatSymbols(NULL)
 804   , lenient(FALSE)
 805   , lenientParseRules(NULL)
 806   , localizations(NULL)
 807 {
 808     this->operator=(rhs);
 809 }
 810
 811 // --------
 812
 813 RuleBasedNumberFormat&
 814 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
 815 {
 816     UErrorCode status = U_ZERO_ERROR;
 817     dispose();
 818     locale = rhs.locale;
 819     lenient = rhs.lenient;
 820
 821     UnicodeString rules = rhs.getRules();
 822     UParseError perror;
 823     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
 824
 825     return *this;
 826 }
 827
 828 RuleBasedNumberFormat::~RuleBasedNumberFormat()
 829 {
 830     dispose();
 831 }
 832
 833 Format*
 834 RuleBasedNumberFormat::clone(void) const
 835 {
 836     RuleBasedNumberFormat * result = NULL;
 837     UnicodeString rules = getRules();
 838     UErrorCode status = U_ZERO_ERROR;
 839     UParseError perror;
 840     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
 841     /* test for NULL */
 842     if (result == 0) {
 843         status = U_MEMORY_ALLOCATION_ERROR;
 844         return 0;
 845     }
 846     if (U_FAILURE(status)) {
 847         delete result;
 848         result = 0;
 849     } else {
 850         result->lenient = lenient;
 851     }
 852     return result;
 853 }
 854
 855 UBool
 856 RuleBasedNumberFormat::operator==(const Format& other) const
 857 {
 858     if (this == &other) {
 859         return TRUE;
 860     }
 861
 862     if (typeid(*this) == typeid(other)) {
 863         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
 864         if (locale == rhs.locale &&
 865             lenient == rhs.lenient &&
 866             (localizations == NULL
 867                 ? rhs.localizations == NULL
 868                 : (rhs.localizations == NULL
 869                     ? FALSE
 870                     : *localizations == rhs.localizations))) {
 871
 872             NFRuleSet** p = ruleSets;
 873             NFRuleSet** q = rhs.ruleSets;
 874             if (p == NULL) {
 875                 return q == NULL;
 876             } else if (q == NULL) {
 877                 return FALSE;
 878             }
 879             while (*p && *q && (**p == **q)) {
 880                 ++p;
 881                 ++q;
 882             }
 883             return *q == NULL && *p == NULL;
 884         }
 885     }
 886
 887     return FALSE;
 888 }
 889
 890 UnicodeString
 891 RuleBasedNumberFormat::getRules() const
 892 {
 893     UnicodeString result;
 894     if (ruleSets != NULL) {
 895         for (NFRuleSet** p = ruleSets; *p; ++p) {
 896             (*p)->appendRules(result);
 897         }
 898     }
 899     return result;
 900 }
 901
 902 UnicodeString
 903 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
 904 {
 905     if (localizations) {
 906       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
 907       return string;
 908     } else if (ruleSets) {
 909         UnicodeString result;
 910         for (NFRuleSet** p = ruleSets; *p; ++p) {
 911             NFRuleSet* rs = *p;
 912             if (rs->isPublic()) {
 913                 if (--index == -1) {
 914                     rs->getName(result);
 915                     return result;
 916                 }
 917             }
 918         }
 919     }
 920     UnicodeString empty;
 921     return empty;
 922 }
 923
 924 int32_t
 925 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
 926 {
 927     int32_t result = 0;
 928     if (localizations) {
 929       result = localizations->getNumberOfRuleSets();
 930     } else if (ruleSets) {
 931         for (NFRuleSet** p = ruleSets; *p; ++p) {
 932             if ((**p).isPublic()) {
 933                 ++result;
 934             }
 935         }
 936     }
 937     return result;
 938 }
 939
 940 int32_t
 941 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
 942     if (localizations) {
 943         return localizations->getNumberOfDisplayLocales();
 944     }
 945     return 0;
 946 }
 947
 948 Locale
 949 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
 950     if (U_FAILURE(status)) {
 951         return Locale("");
 952     }
 953     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
 954         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
 955         char buffer[64];
 956         int32_t cap = name.length() + 1;
 957         char* bp = buffer;
 958         if (cap > 64) {
 959             bp = (char *)uprv_malloc(cap);
 960             if (bp == NULL) {
 961                 status = U_MEMORY_ALLOCATION_ERROR;
 962                 return Locale("");
 963             }
 964         }
 965         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
 966         Locale retLocale(bp);
 967         if (bp != buffer) {
 968             uprv_free(bp);
 969         }
 970         return retLocale;
 971     }
 972     status = U_ILLEGAL_ARGUMENT_ERROR;
 973     Locale retLocale;
 974     return retLocale;
 975 }
 976
 977 UnicodeString
 978 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
 979     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
 980         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
 981         int32_t len = localeName.length();
 982         UChar* localeStr = localeName.getBuffer(len + 1);
 983         while (len >= 0) {
 984             localeStr[len] = 0;
 985             int32_t ix = localizations->indexForLocale(localeStr);
 986             if (ix >= 0) {
 987                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
 988                 return name;
 989             }
 990
 991             // trim trailing portion, skipping over ommitted sections
 992             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
 993             while (len > 0 && localeStr[len-1] == 0x005F) --len;
 994         }
 995         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
 996         return name;
 997     }
 998     UnicodeString bogus;
 999     bogus.setToBogus();
1000     return bogus;
1001 }
1002
1003 UnicodeString
1004 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1005     if (localizations) {
1006         UnicodeString rsn(ruleSetName);
1007         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1008         return getRuleSetDisplayName(ix, localeParam);
1009     }
1010     UnicodeString bogus;
1011     bogus.setToBogus();
1012     return bogus;
1013 }
1014
1015 NFRuleSet*
1016 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1017 {
1018     if (U_SUCCESS(status) && ruleSets) {
1019         for (NFRuleSet** p = ruleSets; *p; ++p) {
1020             NFRuleSet* rs = *p;
1021             if (rs->isNamed(name)) {
1022                 return rs;
1023             }
1024         }
1025         status = U_ILLEGAL_ARGUMENT_ERROR;
1026     }
1027     return NULL;
1028 }
1029
1030 UnicodeString&
1031 RuleBasedNumberFormat::format(int32_t number,
1032                               UnicodeString& toAppendTo,
1033                               FieldPosition& /* pos */) const
1034 {
1035     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1036     return toAppendTo;
1037 }
1038
1039
1040 UnicodeString&
1041 RuleBasedNumberFormat::format(int64_t number,
1042                               UnicodeString& toAppendTo,
1043                               FieldPosition& /* pos */) const
1044 {
1045     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1046     return toAppendTo;
1047 }
1048
1049
1050 UnicodeString&
1051 RuleBasedNumberFormat::format(double number,
1052                               UnicodeString& toAppendTo,
1053                               FieldPosition& /* pos */) const
1054 {
1055     // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1056     if (uprv_isNaN(number)) {
1057         DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1058         if (decFmtSyms) {
1059             toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1060         }
1061     } else if (defaultRuleSet) {
1062         defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1063     }
1064     return toAppendTo;
1065 }
1066
1067
1068 UnicodeString&
1069 RuleBasedNumberFormat::format(int32_t number,
1070                               const UnicodeString& ruleSetName,
1071                               UnicodeString& toAppendTo,
1072                               FieldPosition& /* pos */,
1073                               UErrorCode& status) const
1074 {
1075     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1076     if (U_SUCCESS(status)) {
1077         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1078             // throw new IllegalArgumentException("Can't use internal rule set");
1079             status = U_ILLEGAL_ARGUMENT_ERROR;
1080         } else {
1081             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1082             if (rs) {
1083                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1084             }
1085         }
1086     }
1087     return toAppendTo;
1088 }
1089
1090
1091 UnicodeString&
1092 RuleBasedNumberFormat::format(int64_t number,
1093                               const UnicodeString& ruleSetName,
1094                               UnicodeString& toAppendTo,
1095                               FieldPosition& /* pos */,
1096                               UErrorCode& status) const
1097 {
1098     if (U_SUCCESS(status)) {
1099         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1100             // throw new IllegalArgumentException("Can't use internal rule set");
1101             status = U_ILLEGAL_ARGUMENT_ERROR;
1102         } else {
1103             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1104             if (rs) {
1105                 rs->format(number, toAppendTo, toAppendTo.length());
1106             }
1107         }
1108     }
1109     return toAppendTo;
1110 }
1111
1112
1113 // make linker happy
1114 UnicodeString&
1115 RuleBasedNumberFormat::format(const Formattable& obj,
1116                               UnicodeString& toAppendTo,
1117                               FieldPosition& pos,
1118                               UErrorCode& status) const
1119 {
1120     return NumberFormat::format(obj, toAppendTo, pos, status);
1121 }
1122
1123 UnicodeString&
1124 RuleBasedNumberFormat::format(double number,
1125                               const UnicodeString& ruleSetName,
1126                               UnicodeString& toAppendTo,
1127                               FieldPosition& /* pos */,
1128                               UErrorCode& status) const
1129 {
1130     if (U_SUCCESS(status)) {
1131         if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1132             // throw new IllegalArgumentException("Can't use internal rule set");
1133             status = U_ILLEGAL_ARGUMENT_ERROR;
1134         } else {
1135             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1136             if (rs) {
1137                 rs->format(number, toAppendTo, toAppendTo.length());
1138             }
1139         }
1140     }
1141     return toAppendTo;
1142 }
1143
1144 void
1145 RuleBasedNumberFormat::parse(const UnicodeString& text,
1146                              Formattable& result,
1147                              ParsePosition& parsePosition) const
1148 {
1149     if (!ruleSets) {
1150         parsePosition.setErrorIndex(0);
1151         return;
1152     }
1153
1154     UnicodeString workingText(text, parsePosition.getIndex());
1155     ParsePosition workingPos(0);
1156
1157     ParsePosition high_pp(0);
1158     Formattable high_result;
1159
1160     for (NFRuleSet** p = ruleSets; *p; ++p) {
1161         NFRuleSet *rp = *p;
1162         if (rp->isPublic() && rp->isParseable()) {
1163             ParsePosition working_pp(0);
1164             Formattable working_result;
1165
1166             rp->parse(workingText, working_pp, kMaxDouble, working_result, lenient);
1167             if (working_pp.getIndex() > high_pp.getIndex()) {
1168                 high_pp = working_pp;
1169                 high_result = working_result;
1170
1171                 if (high_pp.getIndex() == workingText.length()) {
1172                     break;
1173                 }
1174             }
1175         }
1176     }
1177
1178     int32_t startIndex = parsePosition.getIndex();
1179     parsePosition.setIndex(startIndex + high_pp.getIndex());
1180     if (high_pp.getIndex() > 0) {
1181         parsePosition.setErrorIndex(-1);
1182     } else {
1183         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1184         parsePosition.setErrorIndex(startIndex + errorIndex);
1185     }
1186     result = high_result;
1187     if (result.getType() == Formattable::kDouble) {
1188         int32_t r = (int32_t)result.getDouble();
1189         if ((double)r == result.getDouble()) {
1190             result.setLong(r);
1191         }
1192     }
1193 }
1194
1195 #if !UCONFIG_NO_COLLATION
1196
1197 void
1198 RuleBasedNumberFormat::setLenient(UBool enabled)
1199 {
1200     lenient = enabled;
1201     if (!enabled && collator) {
1202         delete collator;
1203         collator = NULL;
1204     }
1205 }
1206
1207 #endif
1208
1209 void
1210 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1211     if (U_SUCCESS(status)) {
1212         if (ruleSetName.isEmpty()) {
1213           if (localizations) {
1214               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1215               defaultRuleSet = findRuleSet(name, status);
1216           } else {
1217             initDefaultRuleSet();
1218           }
1219         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1220             status = U_ILLEGAL_ARGUMENT_ERROR;
1221         } else {
1222             NFRuleSet* result = findRuleSet(ruleSetName, status);
1223             if (result != NULL) {
1224                 defaultRuleSet = result;
1225             }
1226         }
1227     }
1228 }
1229
1230 UnicodeString
1231 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1232   UnicodeString result;
1233   if (defaultRuleSet && defaultRuleSet->isPublic()) {
1234     defaultRuleSet->getName(result);
1235   } else {
1236     result.setToBogus();
1237   }
1238   return result;
1239 }
1240
1241 void
1242 RuleBasedNumberFormat::initDefaultRuleSet()
1243 {
1244     defaultRuleSet = NULL;
1245     if (!ruleSets) {
1246       return;
1247     }
1248
1249     const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1250     const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1251     const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1252
1253     NFRuleSet**p = &ruleSets[0];
1254     while (*p) {
1255         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1256             defaultRuleSet = *p;
1257             return;
1258         } else {
1259             ++p;
1260         }
1261     }
1262
1263     defaultRuleSet = *--p;
1264     if (!defaultRuleSet->isPublic()) {
1265         while (p != ruleSets) {
1266             if ((*--p)->isPublic()) {
1267                 defaultRuleSet = *p;
1268                 break;
1269             }
1270         }
1271     }
1272 }
1273
1274
1275 void
1276 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1277                             UParseError& pErr, UErrorCode& status)
1278 {
1279     // TODO: implement UParseError
1280     uprv_memset(&pErr, 0, sizeof(UParseError));
1281     // Note: this can leave ruleSets == NULL, so remaining code should check
1282     if (U_FAILURE(status)) {
1283         return;
1284     }
1285
1286     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1287
1288     UnicodeString description(rules);
1289     if (!description.length()) {
1290         status = U_MEMORY_ALLOCATION_ERROR;
1291         return;
1292     }
1293
1294     // start by stripping the trailing whitespace from all the rules
1295     // (this is all the whitespace follwing each semicolon in the
1296     // description).  This allows us to look for rule-set boundaries
1297     // by searching for ";%" without having to worry about whitespace
1298     // between the ; and the %
1299     stripWhitespace(description);
1300
1301     // check to see if there's a set of lenient-parse rules.  If there
1302     // is, pull them out into our temporary holding place for them,
1303     // and delete them from the description before the real desciption-
1304     // parsing code sees them
1305     int32_t lp = description.indexOf(gLenientParse, -1, 0);
1306     if (lp != -1) {
1307         // we've got to make sure we're not in the middle of a rule
1308         // (where "%%lenient-parse" would actually get treated as
1309         // rule text)
1310         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1311             // locate the beginning and end of the actual collation
1312             // rules (there may be whitespace between the name and
1313             // the first token in the description)
1314             int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1315
1316             if (lpEnd == -1) {
1317                 lpEnd = description.length() - 1;
1318             }
1319             int lpStart = lp + u_strlen(gLenientParse);
1320             while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1321                 ++lpStart;
1322             }
1323
1324             // copy out the lenient-parse rules and delete them
1325             // from the description
1326             lenientParseRules = new UnicodeString();
1327             /* test for NULL */
1328             if (lenientParseRules == 0) {
1329                 status = U_MEMORY_ALLOCATION_ERROR;
1330                 return;
1331             }
1332             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1333
1334             description.remove(lp, lpEnd + 1 - lp);
1335         }
1336     }
1337
1338     // pre-flight parsing the description and count the number of
1339     // rule sets (";%" marks the end of one rule set and the beginning
1340     // of the next)
1341     numRuleSets = 0;
1342     for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1343         ++numRuleSets;
1344         ++p;
1345     }
1346     ++numRuleSets;
1347
1348     // our rule list is an array of the appropriate size
1349     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1350     /* test for NULL */
1351     if (ruleSets == 0) {
1352         status = U_MEMORY_ALLOCATION_ERROR;
1353         return;
1354     }
1355
1356     for (int i = 0; i <= numRuleSets; ++i) {
1357         ruleSets[i] = NULL;
1358     }
1359
1360     // divide up the descriptions into individual rule-set descriptions
1361     // and store them in a temporary array.  At each step, we also
1362     // new up a rule set, but all this does is initialize its name
1363     // and remove it from its description.  We can't actually parse
1364     // the rest of the descriptions and finish initializing everything
1365     // because we have to know the names and locations of all the rule
1366     // sets before we can actually set everything up
1367     if(!numRuleSets) {
1368         status = U_ILLEGAL_ARGUMENT_ERROR;
1369         return;
1370     }
1371
1372     ruleSetDescriptions = new UnicodeString[numRuleSets];
1373     if (ruleSetDescriptions == 0) {
1374         status = U_MEMORY_ALLOCATION_ERROR;
1375         return;
1376     }
1377
1378     {
1379         int curRuleSet = 0;
1380         int32_t start = 0;
1381         for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1382             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1383             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1384             if (ruleSets[curRuleSet] == 0) {
1385                 status = U_MEMORY_ALLOCATION_ERROR;
1386                 return;
1387             }
1388             ++curRuleSet;
1389             start = p + 1;
1390         }
1391         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1392         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1393         if (ruleSets[curRuleSet] == 0) {
1394             status = U_MEMORY_ALLOCATION_ERROR;
1395             return;
1396         }
1397     }
1398
1399     // now we can take note of the formatter's default rule set, which
1400     // is the last public rule set in the description (it's the last
1401     // rather than the first so that a user can create a new formatter
1402     // from an existing formatter and change its default behavior just
1403     // by appending more rule sets to the end)
1404
1405     // {dlf} Initialization of a fraction rule set requires the default rule
1406     // set to be known.  For purposes of initialization, this is always the
1407     // last public rule set, no matter what the localization data says.
1408     initDefaultRuleSet();
1409
1410     // finally, we can go back through the temporary descriptions
1411     // list and finish seting up the substructure (and we throw
1412     // away the temporary descriptions as we go)
1413     {
1414         for (int i = 0; i < numRuleSets; i++) {
1415             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1416         }
1417     }
1418
1419     // Now that the rules are initialized, the 'real' default rule
1420     // set can be adjusted by the localization data.
1421
1422     // The C code keeps the localization array as is, rather than building
1423     // a separate array of the public rule set names, so we have less work
1424     // to do here-- but we still need to check the names.
1425
1426     if (localizationInfos) {
1427         // confirm the names, if any aren't in the rules, that's an error
1428         // it is ok if the rules contain public rule sets that are not in this list
1429         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1430             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1431             NFRuleSet* rs = findRuleSet(name, status);
1432             if (rs == NULL) {
1433                 break; // error
1434             }
1435             if (i == 0) {
1436                 defaultRuleSet = rs;
1437             }
1438         }
1439     } else {
1440         defaultRuleSet = getDefaultRuleSet();
1441     }
1442 }
1443
1444 void
1445 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1446 {
1447     // iterate through the characters...
1448     UnicodeString result;
1449
1450     int start = 0;
1451     while (start != -1 && start < description.length()) {
1452         // seek to the first non-whitespace character...
1453         while (start < description.length()
1454             && PatternProps::isWhiteSpace(description.charAt(start))) {
1455             ++start;
1456         }
1457
1458         // locate the next semicolon in the text and copy the text from
1459         // our current position up to that semicolon into the result
1460         int32_t p = description.indexOf(gSemiColon, start);
1461         if (p == -1) {
1462             // or if we don't find a semicolon, just copy the rest of
1463             // the string into the result
1464             result.append(description, start, description.length() - start);
1465             start = -1;
1466         }
1467         else if (p < description.length()) {
1468             result.append(description, start, p + 1 - start);
1469             start = p + 1;
1470         }
1471
1472         // when we get here, we've seeked off the end of the sring, and
1473         // we terminate the loop (we continue until *start* is -1 rather
1474         // than until *p* is -1, because otherwise we'd miss the last
1475         // rule in the description)
1476         else {
1477             start = -1;
1478         }
1479     }
1480
1481     description.setTo(result);
1482 }
1483
1484
1485 void
1486 RuleBasedNumberFormat::dispose()
1487 {
1488     if (ruleSets) {
1489         for (NFRuleSet** p = ruleSets; *p; ++p) {
1490             delete *p;
1491         }
1492         uprv_free(ruleSets);
1493         ruleSets = NULL;
1494     }
1495
1496     if (ruleSetDescriptions) {
1497         delete [] ruleSetDescriptions;
1498     }
1499
1500 #if !UCONFIG_NO_COLLATION
1501     delete collator;
1502 #endif
1503     collator = NULL;
1504
1505     delete decimalFormatSymbols;
1506     decimalFormatSymbols = NULL;
1507
1508     delete lenientParseRules;
1509     lenientParseRules = NULL;
1510
1511     if (localizations) localizations = localizations->unref();
1512 }
1513
1514
1515 //-----------------------------------------------------------------------
1516 // package-internal API
1517 //-----------------------------------------------------------------------
1518
1519 /**
1520  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1521  * this function creates it the first time it's called.
1522  * @return The collator to use for lenient parsing, or null if lenient parsing
1523  * is turned off.
1524 */
1525 Collator*
1526 RuleBasedNumberFormat::getCollator() const
1527 {
1528 #if !UCONFIG_NO_COLLATION
1529     if (!ruleSets) {
1530         return NULL;
1531     }
1532
1533     // lazy-evaulate the collator
1534     if (collator == NULL && lenient) {
1535         // create a default collator based on the formatter's locale,
1536         // then pull out that collator's rules, append any additional
1537         // rules specified in the description, and create a _new_
1538         // collator based on the combinaiton of those rules
1539
1540         UErrorCode status = U_ZERO_ERROR;
1541
1542         Collator* temp = Collator::createInstance(locale, status);
1543         RuleBasedCollator* newCollator;
1544         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1545             if (lenientParseRules) {
1546                 UnicodeString rules(newCollator->getRules());
1547                 rules.append(*lenientParseRules);
1548
1549                 newCollator = new RuleBasedCollator(rules, status);
1550                 // Exit if newCollator could not be created.
1551                 if (newCollator == NULL) {
1552                         return NULL;
1553                 }
1554             } else {
1555                 temp = NULL;
1556             }
1557             if (U_SUCCESS(status)) {
1558                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1559                 // cast away const
1560                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1561             } else {
1562                 delete newCollator;
1563             }
1564         }
1565         delete temp;
1566     }
1567 #endif
1568
1569     // if lenient-parse mode is off, this will be null
1570     // (see setLenientParseMode())
1571     return collator;
1572 }
1573
1574
1575 /**
1576  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1577  * instances owned by this formatter.  This object is lazily created: this function
1578  * creates it the first time it's called.
1579  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1580  * instances owned by this formatter.
1581 */
1582 DecimalFormatSymbols*
1583 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1584 {
1585     // lazy-evaluate the DecimalFormatSymbols object.  This object
1586     // is shared by all DecimalFormat instances belonging to this
1587     // formatter
1588     if (decimalFormatSymbols == NULL) {
1589         UErrorCode status = U_ZERO_ERROR;
1590         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1591         if (U_SUCCESS(status)) {
1592             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1593         } else {
1594             delete temp;
1595         }
1596     }
1597     return decimalFormatSymbols;
1598 }
1599
1600 // De-owning the current localized symbols and adopt the new symbols.
1601 void
1602 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1603 {
1604     if (symbolsToAdopt == NULL) {
1605         return; // do not allow caller to set decimalFormatSymbols to NULL
1606     }
1607
1608     if (decimalFormatSymbols != NULL) {
1609         delete decimalFormatSymbols;
1610     }
1611
1612     decimalFormatSymbols = symbolsToAdopt;
1613
1614     {
1615         // Apply the new decimalFormatSymbols by reparsing the rulesets
1616         UErrorCode status = U_ZERO_ERROR;
1617
1618         for (int32_t i = 0; i < numRuleSets; i++) {
1619             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1620         }
1621     }
1622 }
1623
1624 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1625 void
1626 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1627 {
1628     adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1629 }
1630
1631 U_NAMESPACE_END
1632
1633 /* U_HAVE_RBNF */
1634 #endif