icuSources/i18n/msgfmt.cpp

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2010, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************
   6  *
   7  * File MSGFMT.CPP
   8  *
   9  * Modification History:
  10  *
  11  *   Date        Name        Description
  12  *   02/19/97    aliu        Converted from java.
  13  *   03/20/97    helena      Finished first cut of implementation.
  14  *   04/10/97    aliu        Made to work on AIX.  Added stoi to replace wtoi.
  15  *   06/11/97    helena      Fixed addPattern to take the pattern correctly.
  16  *   06/17/97    helena      Fixed the getPattern to return the correct pattern.
  17  *   07/09/97    helena      Made ParsePosition into a class.
  18  *   02/22/99    stephen     Removed character literals for EBCDIC safety
  19  *   11/01/09    kirtig      Added SelectFormat
  20  ********************************************************************/
  21
  22 #include "unicode/utypes.h"
  23
  24 #if !UCONFIG_NO_FORMATTING
  25
  26 #include "unicode/msgfmt.h"
  27 #include "unicode/decimfmt.h"
  28 #include "unicode/datefmt.h"
  29 #include "unicode/smpdtfmt.h"
  30 #include "unicode/choicfmt.h"
  31 #include "unicode/plurfmt.h"
  32 #include "unicode/selfmt.h"
  33 #include "unicode/ustring.h"
  34 #include "unicode/ucnv_err.h"
  35 #include "unicode/uchar.h"
  36 #include "unicode/umsg.h"
  37 #include "unicode/rbnf.h"
  38 #include "cmemory.h"
  39 #include "msgfmt_impl.h"
  40 #include "util.h"
  41 #include "uassert.h"
  42 #include "ustrfmt.h"
  43 #include "uvector.h"
  44
  45 // *****************************************************************************
  46 // class MessageFormat
  47 // *****************************************************************************
  48
  49 #define COMMA             ((UChar)0x002C)
  50 #define SINGLE_QUOTE      ((UChar)0x0027)
  51 #define LEFT_CURLY_BRACE  ((UChar)0x007B)
  52 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
  53
  54 //---------------------------------------
  55 // static data
  56
  57 static const UChar ID_EMPTY[]     = {
  58     0 /* empty string, used for default so that null can mark end of list */
  59 };
  60
  61 static const UChar ID_NUMBER[]    = {
  62     0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0  /* "number" */
  63 };
  64 static const UChar ID_DATE[]      = {
  65     0x64, 0x61, 0x74, 0x65, 0              /* "date" */
  66 };
  67 static const UChar ID_TIME[]      = {
  68     0x74, 0x69, 0x6D, 0x65, 0              /* "time" */
  69 };
  70 static const UChar ID_CHOICE[]    = {
  71     0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0  /* "choice" */
  72 };
  73 static const UChar ID_SPELLOUT[]  = {
  74     0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
  75 };
  76 static const UChar ID_ORDINAL[]   = {
  77     0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
  78 };
  79 static const UChar ID_DURATION[]  = {
  80     0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
  81 };
  82 static const UChar ID_PLURAL[]  = {
  83     0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0  /* "plural" */
  84 };
  85 static const UChar ID_SELECT[]  = {
  86     0x73, 0x65, 0x6C, 0x65, 0x63, 0x74, 0  /* "select" */
  87 };
  88
  89 // MessageFormat Type List  Number, Date, Time or Choice
  90 static const UChar * const TYPE_IDS[] = {
  91     ID_EMPTY,
  92     ID_NUMBER,
  93     ID_DATE,
  94     ID_TIME,
  95     ID_CHOICE,
  96     ID_SPELLOUT,
  97     ID_ORDINAL,
  98     ID_DURATION,
  99     ID_PLURAL,
 100     ID_SELECT,
 101     NULL,
 102 };
 103
 104 static const UChar ID_CURRENCY[]  = {
 105     0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0  /* "currency" */
 106 };
 107 static const UChar ID_PERCENT[]   = {
 108     0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0        /* "percent" */
 109 };
 110 static const UChar ID_INTEGER[]   = {
 111     0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0        /* "integer" */
 112 };
 113
 114 // NumberFormat modifier list, default, currency, percent or integer
 115 static const UChar * const NUMBER_STYLE_IDS[] = {
 116     ID_EMPTY,
 117     ID_CURRENCY,
 118     ID_PERCENT,
 119     ID_INTEGER,
 120     NULL,
 121 };
 122
 123 static const UChar ID_SHORT[]     = {
 124     0x73, 0x68, 0x6F, 0x72, 0x74, 0        /* "short" */
 125 };
 126 static const UChar ID_MEDIUM[]    = {
 127     0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0  /* "medium" */
 128 };
 129 static const UChar ID_LONG[]      = {
 130     0x6C, 0x6F, 0x6E, 0x67, 0              /* "long" */
 131 };
 132 static const UChar ID_FULL[]      = {
 133     0x66, 0x75, 0x6C, 0x6C, 0              /* "full" */
 134 };
 135
 136 // DateFormat modifier list, default, short, medium, long or full
 137 static const UChar * const DATE_STYLE_IDS[] = {
 138     ID_EMPTY,
 139     ID_SHORT,
 140     ID_MEDIUM,
 141     ID_LONG,
 142     ID_FULL,
 143     NULL,
 144 };
 145
 146 static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = {
 147     U_NAMESPACE_QUALIFIER DateFormat::kDefault,
 148     U_NAMESPACE_QUALIFIER DateFormat::kShort,
 149     U_NAMESPACE_QUALIFIER DateFormat::kMedium,
 150     U_NAMESPACE_QUALIFIER DateFormat::kLong,
 151     U_NAMESPACE_QUALIFIER DateFormat::kFull,
 152 };
 153
 154 static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
 155
 156 U_NAMESPACE_BEGIN
 157
 158 // -------------------------------------
 159 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
 160 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
 161
 162 //--------------------------------------------------------------------
 163
 164 /**
 165  * Convert a string to an unsigned decimal, ignoring rule whitespace.
 166  * @return a non-negative number if successful, or a negative number
 167  *         upon failure.
 168  */
 169 static int32_t stou(const UnicodeString& string) {
 170     int32_t n = 0;
 171     int32_t count = 0;
 172     UChar32 c;
 173     for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) {
 174         c = string.char32At(i);
 175         if (uprv_isRuleWhiteSpace(c)) {
 176             continue;
 177         }
 178         int32_t d = u_digit(c, 10);
 179         if (d < 0 || ++count > 10) {
 180             return -1;
 181         }
 182         n = 10*n + d;
 183     }
 184     return n;
 185 }
 186
 187 /**
 188  * Convert an integer value to a string and append the result to
 189  * the given UnicodeString.
 190  */
 191 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
 192     UChar temp[16];
 193     uprv_itou(temp,16,i,10,0); // 10 == radix
 194     appendTo.append(temp);
 195     return appendTo;
 196 }
 197
 198 /*
 199  * A structure representing one subformat of this MessageFormat.
 200  * Each subformat has a Format object, an offset into the plain
 201  * pattern text fPattern, and an argument number.  The argument
 202  * number corresponds to the array of arguments to be formatted.
 203  * @internal
 204  */
 205 class MessageFormat::Subformat : public UMemory {
 206 public:
 207     /**
 208      * @internal
 209      */
 210     Format* format; // formatter
 211     /**
 212      * @internal
 213      */
 214     int32_t offset; // offset into fPattern
 215     /**
 216      * @internal
 217      */
 218     // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number
 219     int32_t argNum;    // 0-based argument number
 220     /**
 221      * @internal
 222      */
 223     UnicodeString* argName; // argument name or number
 224
 225     /**
 226      * Clone that.format and assign it to this.format
 227      * Do NOT delete this.format
 228      * @internal
 229      */
 230     Subformat& operator=(const Subformat& that) {
 231         if (this != &that) {
 232             format = that.format ? that.format->clone() : NULL;
 233             offset = that.offset;
 234             argNum = that.argNum;
 235             argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL;
 236         }
 237         return *this;
 238     }
 239
 240     /**
 241      * @internal
 242      */
 243     UBool operator==(const Subformat& that) const {
 244         // Do cheap comparisons first
 245         return offset == that.offset &&
 246                argNum == that.argNum &&
 247                ((argName == that.argName) ||
 248                 (*argName == *that.argName)) &&
 249                ((format == that.format) || // handles NULL
 250                 (*format == *that.format));
 251     }
 252
 253     /**
 254      * @internal
 255      */
 256     UBool operator!=(const Subformat& that) const {
 257         return !operator==(that);
 258     }
 259 };
 260
 261 // -------------------------------------
 262 // Creates a MessageFormat instance based on the pattern.
 263
 264 MessageFormat::MessageFormat(const UnicodeString& pattern,
 265                              UErrorCode& success)
 266 : fLocale(Locale::getDefault()),  // Uses the default locale
 267   formatAliases(NULL),
 268   formatAliasesCapacity(0),
 269   idStart(UCHAR_ID_START),
 270   idContinue(UCHAR_ID_CONTINUE),
 271   subformats(NULL),
 272   subformatCount(0),
 273   subformatCapacity(0),
 274   argTypes(NULL),
 275   argTypeCount(0),
 276   argTypeCapacity(0),
 277   isArgNumeric(TRUE),
 278   defaultNumberFormat(NULL),
 279   defaultDateFormat(NULL)
 280 {
 281     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
 282         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
 283         success = U_MEMORY_ALLOCATION_ERROR;
 284         return;
 285     }
 286     applyPattern(pattern, success);
 287     setLocaleIDs(fLocale.getName(), fLocale.getName());
 288 }
 289
 290 MessageFormat::MessageFormat(const UnicodeString& pattern,
 291                              const Locale& newLocale,
 292                              UErrorCode& success)
 293 : fLocale(newLocale),
 294   formatAliases(NULL),
 295   formatAliasesCapacity(0),
 296   idStart(UCHAR_ID_START),
 297   idContinue(UCHAR_ID_CONTINUE),
 298   subformats(NULL),
 299   subformatCount(0),
 300   subformatCapacity(0),
 301   argTypes(NULL),
 302   argTypeCount(0),
 303   argTypeCapacity(0),
 304   isArgNumeric(TRUE),
 305   defaultNumberFormat(NULL),
 306   defaultDateFormat(NULL)
 307 {
 308     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
 309         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
 310         success = U_MEMORY_ALLOCATION_ERROR;
 311         return;
 312     }
 313     applyPattern(pattern, success);
 314     setLocaleIDs(fLocale.getName(), fLocale.getName());
 315 }
 316
 317 MessageFormat::MessageFormat(const UnicodeString& pattern,
 318                              const Locale& newLocale,
 319                              UParseError& parseError,
 320                              UErrorCode& success)
 321 : fLocale(newLocale),
 322   formatAliases(NULL),
 323   formatAliasesCapacity(0),
 324   idStart(UCHAR_ID_START),
 325   idContinue(UCHAR_ID_CONTINUE),
 326   subformats(NULL),
 327   subformatCount(0),
 328   subformatCapacity(0),
 329   argTypes(NULL),
 330   argTypeCount(0),
 331   argTypeCapacity(0),
 332   isArgNumeric(TRUE),
 333   defaultNumberFormat(NULL),
 334   defaultDateFormat(NULL)
 335 {
 336     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
 337         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
 338         success = U_MEMORY_ALLOCATION_ERROR;
 339         return;
 340     }
 341     applyPattern(pattern, parseError, success);
 342     setLocaleIDs(fLocale.getName(), fLocale.getName());
 343 }
 344
 345 MessageFormat::MessageFormat(const MessageFormat& that)
 346 : Format(that),
 347   formatAliases(NULL),
 348   formatAliasesCapacity(0),
 349   idStart(UCHAR_ID_START),
 350   idContinue(UCHAR_ID_CONTINUE),
 351   subformats(NULL),
 352   subformatCount(0),
 353   subformatCapacity(0),
 354   argTypes(NULL),
 355   argTypeCount(0),
 356   argTypeCapacity(0),
 357   isArgNumeric(TRUE),
 358   defaultNumberFormat(NULL),
 359   defaultDateFormat(NULL)
 360 {
 361     *this = that;
 362 }
 363
 364 MessageFormat::~MessageFormat()
 365 {
 366     int32_t idx;
 367     for (idx = 0; idx < subformatCount; idx++) {
 368         delete subformats[idx].format;
 369         delete subformats[idx].argName;
 370     }
 371     uprv_free(subformats);
 372     subformats = NULL;
 373     subformatCount = subformatCapacity = 0;
 374
 375     uprv_free(argTypes);
 376     argTypes = NULL;
 377     argTypeCount = argTypeCapacity = 0;
 378
 379     uprv_free(formatAliases);
 380
 381     delete defaultNumberFormat;
 382     delete defaultDateFormat;
 383 }
 384
 385 //--------------------------------------------------------------------
 386 // Variable-size array management
 387
 388 /**
 389  * Allocate subformats[] to at least the given capacity and return
 390  * TRUE if successful.  If not, leave subformats[] unchanged.
 391  *
 392  * If subformats is NULL, allocate it.  If it is not NULL, enlarge it
 393  * if necessary to be at least as large as specified.
 394  */
 395 UBool MessageFormat::allocateSubformats(int32_t capacity) {
 396     if (subformats == NULL) {
 397         subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity);
 398         subformatCapacity = capacity;
 399         subformatCount = 0;
 400         if (subformats == NULL) {
 401             subformatCapacity = 0;
 402             return FALSE;
 403         }
 404     } else if (subformatCapacity < capacity) {
 405         if (capacity < 2*subformatCapacity) {
 406             capacity = 2*subformatCapacity;
 407         }
 408         Subformat* a = (Subformat*)
 409             uprv_realloc(subformats, sizeof(*subformats) * capacity);
 410         if (a == NULL) {
 411             return FALSE; // request failed
 412         }
 413         subformats = a;
 414         subformatCapacity = capacity;
 415     }
 416     return TRUE;
 417 }
 418
 419 /**
 420  * Allocate argTypes[] to at least the given capacity and return
 421  * TRUE if successful.  If not, leave argTypes[] unchanged.
 422  *
 423  * If argTypes is NULL, allocate it.  If it is not NULL, enlarge it
 424  * if necessary to be at least as large as specified.
 425  */
 426 UBool MessageFormat::allocateArgTypes(int32_t capacity) {
 427     if (argTypes == NULL) {
 428         argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity);
 429         argTypeCount = 0;
 430         argTypeCapacity = capacity;
 431         if (argTypes == NULL) {
 432             argTypeCapacity = 0;
 433             return FALSE;
 434         }
 435         for (int32_t i=0; i<capacity; ++i) {
 436             argTypes[i] = Formattable::kString;
 437         }
 438     } else if (argTypeCapacity < capacity) {
 439         if (capacity < 2*argTypeCapacity) {
 440             capacity = 2*argTypeCapacity;
 441         }
 442         Formattable::Type* a = (Formattable::Type*)
 443             uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
 444         if (a == NULL) {
 445             return FALSE; // request failed
 446         }
 447         for (int32_t i=argTypeCapacity; i<capacity; ++i) {
 448             a[i] = Formattable::kString;
 449         }
 450         argTypes = a;
 451         argTypeCapacity = capacity;
 452     }
 453     return TRUE;
 454 }
 455
 456 // -------------------------------------
 457 // assignment operator
 458
 459 const MessageFormat&
 460 MessageFormat::operator=(const MessageFormat& that)
 461 {
 462     // Reallocate the arrays BEFORE changing this object
 463     if (this != &that &&
 464         allocateSubformats(that.subformatCount) &&
 465         allocateArgTypes(that.argTypeCount)) {
 466
 467         // Calls the super class for assignment first.
 468         Format::operator=(that);
 469
 470         fPattern = that.fPattern;
 471         setLocale(that.fLocale);
 472         isArgNumeric = that.isArgNumeric;
 473         int32_t j;
 474         for (j=0; j<subformatCount; ++j) {
 475             delete subformats[j].format;
 476         }
 477         subformatCount = 0;
 478
 479         for (j=0; j<that.subformatCount; ++j) {
 480             // Subformat::operator= does NOT delete this.format
 481             subformats[j] = that.subformats[j];
 482         }
 483         subformatCount = that.subformatCount;
 484
 485         for (j=0; j<that.argTypeCount; ++j) {
 486             argTypes[j] = that.argTypes[j];
 487         }
 488         argTypeCount = that.argTypeCount;
 489     }
 490     return *this;
 491 }
 492
 493 UBool
 494 MessageFormat::operator==(const Format& rhs) const
 495 {
 496     if (this == &rhs) return TRUE;
 497
 498     MessageFormat& that = (MessageFormat&)rhs;
 499
 500     // Check class ID before checking MessageFormat members
 501     if (!Format::operator==(rhs) ||
 502         fPattern != that.fPattern ||
 503         fLocale != that.fLocale ||
 504         isArgNumeric != that.isArgNumeric) {
 505         return FALSE;
 506     }
 507
 508     int32_t j;
 509     for (j=0; j<subformatCount; ++j) {
 510         if (subformats[j] != that.subformats[j]) {
 511             return FALSE;
 512         }
 513     }
 514
 515     return TRUE;
 516 }
 517
 518 // -------------------------------------
 519 // Creates a copy of this MessageFormat, the caller owns the copy.
 520
 521 Format*
 522 MessageFormat::clone() const
 523 {
 524     return new MessageFormat(*this);
 525 }
 526
 527 // -------------------------------------
 528 // Sets the locale of this MessageFormat object to theLocale.
 529
 530 void
 531 MessageFormat::setLocale(const Locale& theLocale)
 532 {
 533     if (fLocale != theLocale) {
 534         delete defaultNumberFormat;
 535         defaultNumberFormat = NULL;
 536         delete defaultDateFormat;
 537         defaultDateFormat = NULL;
 538     }
 539     fLocale = theLocale;
 540     setLocaleIDs(fLocale.getName(), fLocale.getName());
 541 }
 542
 543 // -------------------------------------
 544 // Gets the locale of this MessageFormat object.
 545
 546 const Locale&
 547 MessageFormat::getLocale() const
 548 {
 549     return fLocale;
 550 }
 551
 552
 553
 554
 555 void
 556 MessageFormat::applyPattern(const UnicodeString& newPattern,
 557                             UErrorCode& status)
 558 {
 559     UParseError parseError;
 560     applyPattern(newPattern,parseError,status);
 561 }
 562
 563
 564 // -------------------------------------
 565 // Applies the new pattern and returns an error if the pattern
 566 // is not correct.
 567 void
 568 MessageFormat::applyPattern(const UnicodeString& pattern,
 569                             UParseError& parseError,
 570                             UErrorCode& ec)
 571 {
 572     if(U_FAILURE(ec)) {
 573         return;
 574     }
 575     // The pattern is broken up into segments.  Each time a subformat
 576     // is encountered, 4 segments are recorded.  For example, consider
 577     // the pattern:
 578     //  "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}."
 579     // The first set of segments is:
 580     //  segments[0] = "There "
 581     //  segments[1] = "0"
 582     //  segments[2] = "choice"
 583     //  segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files"
 584
 585     // During parsing, the plain text is accumulated into segments[0].
 586     // Segments 1..3 are used to parse each subpattern.  Each time a
 587     // subpattern is parsed, it creates a format object that is stored
 588     // in the subformats array, together with an offset and argument
 589     // number.  The offset into the plain text stored in
 590     // segments[0].
 591
 592     // Quotes in segment 0 are handled normally.  They are removed.
 593     // Quotes may not occur in segments 1 or 2.
 594     // Quotes in segment 3 are parsed and _copied_.  This makes
 595     //  subformat patterns work, e.g., {1,number,'#'.##} passes
 596     //  the pattern "'#'.##" to DecimalFormat.
 597
 598     UnicodeString segments[4];
 599     int32_t part = 0; // segment we are in, 0..3
 600     // Record the highest argument number in the pattern.  (In the
 601     // subpattern {3,number} the argument number is 3.)
 602     int32_t formatNumber = 0;
 603     UBool inQuote = FALSE;
 604     int32_t braceStack = 0;
 605     // Clear error struct
 606     parseError.offset = -1;
 607     parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
 608     int32_t patLen = pattern.length();
 609     int32_t i;
 610
 611     for (i=0; i<subformatCount; ++i) {
 612         delete subformats[i].format;
 613     }
 614     subformatCount = 0;
 615     argTypeCount = 0;
 616
 617     for (i=0; i<patLen; ++i) {
 618         UChar ch = pattern[i];
 619         if (part == 0) {
 620             // In segment 0, recognize and remove quotes
 621             if (ch == SINGLE_QUOTE) {
 622                 if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) {
 623                     segments[0] += ch;
 624                     ++i;
 625                 } else {
 626                     inQuote = !inQuote;
 627                 }
 628             } else if (ch == LEFT_CURLY_BRACE && !inQuote) {
 629                 // The only way we get from segment 0 to 1 is via an
 630                 // unquoted '{'.
 631                 part = 1;
 632             } else {
 633                 segments[0] += ch;
 634             }
 635         } else if (inQuote) {
 636             // In segments 1..3, recognize quoted matter, and copy it
 637             // into the segment, together with the quotes.  This takes
 638             // care of '' as well.
 639             segments[part] += ch;
 640             if (ch == SINGLE_QUOTE) {
 641                 inQuote = FALSE;
 642             }
 643         } else {
 644             // We have an unquoted character in segment 1..3
 645             switch (ch) {
 646             case COMMA:
 647                 // Commas bump us to the next segment, except for segment 3,
 648                 // which can contain commas.  See example above.
 649                 if (part < 3)
 650                     part += 1;
 651                 else
 652                     segments[3] += ch;
 653                 break;
 654             case LEFT_CURLY_BRACE:
 655                 // Handle '{' within segment 3.  The initial '{'
 656                 // before segment 1 is handled above.
 657                 if (part != 3) {
 658                     ec = U_PATTERN_SYNTAX_ERROR;
 659                     goto SYNTAX_ERROR;
 660                 }
 661                 ++braceStack;
 662                 segments[part] += ch;
 663                 break;
 664             case RIGHT_CURLY_BRACE:
 665                 if (braceStack == 0) {
 666                     makeFormat(formatNumber, segments, parseError,ec);
 667                     if (U_FAILURE(ec)){
 668                         goto SYNTAX_ERROR;
 669                     }
 670                     formatNumber++;
 671
 672                     segments[1].remove();
 673                     segments[2].remove();
 674                     segments[3].remove();
 675                     part = 0;
 676                 } else {
 677                     --braceStack;
 678                     segments[part] += ch;
 679                 }
 680                 break;
 681             case SINGLE_QUOTE:
 682                 inQuote = TRUE;
 683                 // fall through (copy quote chars in segments 1..3)
 684             default:
 685                 segments[part] += ch;
 686                 break;
 687             }
 688         }
 689     }
 690     if (braceStack != 0 || part != 0) {
 691         // Unmatched braces in the pattern
 692         ec = U_UNMATCHED_BRACES;
 693         goto SYNTAX_ERROR;
 694     }
 695     fPattern = segments[0];
 696     return;
 697
 698  SYNTAX_ERROR:
 699     syntaxError(pattern, i, parseError);
 700     for (i=0; i<subformatCount; ++i) {
 701         delete subformats[i].format;
 702     }
 703     argTypeCount = subformatCount = 0;
 704 }
 705 // -------------------------------------
 706 // Converts this MessageFormat instance to a pattern.
 707
 708 UnicodeString&
 709 MessageFormat::toPattern(UnicodeString& appendTo) const {
 710     // later, make this more extensible
 711     int32_t lastOffset = 0;
 712     int32_t i;
 713     for (i=0; i<subformatCount; ++i) {
 714         copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo);
 715         lastOffset = subformats[i].offset;
 716         appendTo += LEFT_CURLY_BRACE;
 717         if (isArgNumeric) {
 718             itos(subformats[i].argNum, appendTo);
 719         }
 720         else {
 721             appendTo += *subformats[i].argName;
 722         }
 723         Format* fmt = subformats[i].format;
 724         DecimalFormat* decfmt;
 725         SimpleDateFormat* sdtfmt;
 726         ChoiceFormat* chcfmt;
 727         PluralFormat* plfmt;
 728         SelectFormat* selfmt;
 729         if (fmt == NULL) {
 730             // do nothing, string format
 731         }
 732         else if ((decfmt = dynamic_cast<DecimalFormat*>(fmt)) != NULL) {
 733             UErrorCode ec = U_ZERO_ERROR;
 734             NumberFormat& formatAlias = *decfmt;
 735             NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec);
 736             NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec);
 737             NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec);
 738             NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec);
 739
 740             appendTo += COMMA;
 741             appendTo += ID_NUMBER;
 742             if (formatAlias != *defaultTemplate) {
 743                 appendTo += COMMA;
 744                 if (formatAlias == *currencyTemplate) {
 745                     appendTo += ID_CURRENCY;
 746                 }
 747                 else if (formatAlias == *percentTemplate) {
 748                     appendTo += ID_PERCENT;
 749                 }
 750                 else if (formatAlias == *integerTemplate) {
 751                     appendTo += ID_INTEGER;
 752                 }
 753                 else {
 754                     UnicodeString buffer;
 755                     appendTo += decfmt->toPattern(buffer);
 756                 }
 757             }
 758
 759             delete defaultTemplate;
 760             delete currencyTemplate;
 761             delete percentTemplate;
 762             delete integerTemplate;
 763         }
 764         else if ((sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt)) != NULL) {
 765             DateFormat& formatAlias = *sdtfmt;
 766             DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale);
 767             DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale);
 768             DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale);
 769             DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale);
 770             DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale);
 771             DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale);
 772             DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale);
 773             DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale);
 774
 775
 776             appendTo += COMMA;
 777             if (formatAlias == *defaultDateTemplate) {
 778                 appendTo += ID_DATE;
 779             }
 780             else if (formatAlias == *shortDateTemplate) {
 781                 appendTo += ID_DATE;
 782                 appendTo += COMMA;
 783                 appendTo += ID_SHORT;
 784             }
 785             else if (formatAlias == *defaultDateTemplate) {
 786                 appendTo += ID_DATE;
 787                 appendTo += COMMA;
 788                 appendTo += ID_MEDIUM;
 789             }
 790             else if (formatAlias == *longDateTemplate) {
 791                 appendTo += ID_DATE;
 792                 appendTo += COMMA;
 793                 appendTo += ID_LONG;
 794             }
 795             else if (formatAlias == *fullDateTemplate) {
 796                 appendTo += ID_DATE;
 797                 appendTo += COMMA;
 798                 appendTo += ID_FULL;
 799             }
 800             else if (formatAlias == *defaultTimeTemplate) {
 801                 appendTo += ID_TIME;
 802             }
 803             else if (formatAlias == *shortTimeTemplate) {
 804                 appendTo += ID_TIME;
 805                 appendTo += COMMA;
 806                 appendTo += ID_SHORT;
 807             }
 808             else if (formatAlias == *defaultTimeTemplate) {
 809                 appendTo += ID_TIME;
 810                 appendTo += COMMA;
 811                 appendTo += ID_MEDIUM;
 812             }
 813             else if (formatAlias == *longTimeTemplate) {
 814                 appendTo += ID_TIME;
 815                 appendTo += COMMA;
 816                 appendTo += ID_LONG;
 817             }
 818             else if (formatAlias == *fullTimeTemplate) {
 819                 appendTo += ID_TIME;
 820                 appendTo += COMMA;
 821                 appendTo += ID_FULL;
 822             }
 823             else {
 824                 UnicodeString buffer;
 825                 appendTo += ID_DATE;
 826                 appendTo += COMMA;
 827                 appendTo += sdtfmt->toPattern(buffer);
 828             }
 829
 830             delete defaultDateTemplate;
 831             delete shortDateTemplate;
 832             delete longDateTemplate;
 833             delete fullDateTemplate;
 834             delete defaultTimeTemplate;
 835             delete shortTimeTemplate;
 836             delete longTimeTemplate;
 837             delete fullTimeTemplate;
 838             // {sfb} there should be a more efficient way to do this!
 839         }
 840         else if ((chcfmt = dynamic_cast<ChoiceFormat*>(fmt)) != NULL) {
 841             UnicodeString buffer;
 842             appendTo += COMMA;
 843             appendTo += ID_CHOICE;
 844             appendTo += COMMA;
 845             appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer);
 846         }
 847         else if ((plfmt = dynamic_cast<PluralFormat*>(fmt)) != NULL) {
 848             UnicodeString buffer;
 849             appendTo += plfmt->toPattern(buffer);
 850         }
 851         else if ((selfmt = dynamic_cast<SelectFormat*>(fmt)) != NULL) {
 852             UnicodeString buffer;
 853             appendTo += ((SelectFormat*)fmt)->toPattern(buffer);
 854         }
 855         else {
 856             //appendTo += ", unknown";
 857         }
 858         appendTo += RIGHT_CURLY_BRACE;
 859     }
 860     copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo);
 861     return appendTo;
 862 }
 863
 864 // -------------------------------------
 865 // Adopts the new formats array and updates the array count.
 866 // This MessageFormat instance owns the new formats.
 867
 868 void
 869 MessageFormat::adoptFormats(Format** newFormats,
 870                             int32_t count) {
 871     if (newFormats == NULL || count < 0) {
 872         return;
 873     }
 874
 875     int32_t i;
 876     if (allocateSubformats(count)) {
 877         for (i=0; i<subformatCount; ++i) {
 878             delete subformats[i].format;
 879         }
 880         for (i=0; i<count; ++i) {
 881             subformats[i].format = newFormats[i];
 882         }
 883         subformatCount = count;
 884     } else {
 885         // An adopt method must always take ownership.  Delete
 886         // the incoming format objects and return unchanged.
 887         for (i=0; i<count; ++i) {
 888             delete newFormats[i];
 889         }
 890     }
 891
 892     // TODO: What about the .offset and .argNum fields?
 893 }
 894
 895 // -------------------------------------
 896 // Sets the new formats array and updates the array count.
 897 // This MessageFormat instance maks a copy of the new formats.
 898
 899 void
 900 MessageFormat::setFormats(const Format** newFormats,
 901                           int32_t count) {
 902     if (newFormats == NULL || count < 0) {
 903         return;
 904     }
 905
 906     if (allocateSubformats(count)) {
 907         int32_t i;
 908         for (i=0; i<subformatCount; ++i) {
 909             delete subformats[i].format;
 910         }
 911         subformatCount = 0;
 912
 913         for (i=0; i<count; ++i) {
 914             subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL;
 915         }
 916         subformatCount = count;
 917     }
 918
 919     // TODO: What about the .offset and .arg fields?
 920 }
 921
 922 // -------------------------------------
 923 // Adopt a single format by format number.
 924 // Do nothing if the format number is not less than the array count.
 925
 926 void
 927 MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
 928     if (n < 0 || n >= subformatCount) {
 929         delete newFormat;
 930     } else {
 931         delete subformats[n].format;
 932         subformats[n].format = newFormat;
 933     }
 934 }
 935
 936 // -------------------------------------
 937 // Adopt a single format by format name.
 938 // Do nothing if there is no match of formatName.
 939 void
 940 MessageFormat::adoptFormat(const UnicodeString& formatName,
 941                            Format* formatToAdopt,
 942                            UErrorCode& status) {
 943     if (isArgNumeric ) {
 944         int32_t argumentNumber = stou(formatName);
 945         if (argumentNumber<0) {
 946             status = U_ARGUMENT_TYPE_MISMATCH;
 947             return;
 948         }
 949         adoptFormat(argumentNumber, formatToAdopt);
 950         return;
 951     }
 952     for (int32_t i=0; i<subformatCount; ++i) {
 953         if (formatName==*subformats[i].argName) {
 954             delete subformats[i].format;
 955             if ( formatToAdopt== NULL) {
 956                 // This should never happen -- but we'll be nice if it does
 957                 subformats[i].format = NULL;
 958             } else {
 959                 subformats[i].format = formatToAdopt;
 960             }
 961         }
 962     }
 963 }
 964
 965 // -------------------------------------
 966 // Set a single format.
 967 // Do nothing if the variable is not less than the array count.
 968
 969 void
 970 MessageFormat::setFormat(int32_t n, const Format& newFormat) {
 971     if (n >= 0 && n < subformatCount) {
 972         delete subformats[n].format;
 973         if (&newFormat == NULL) {
 974             // This should never happen -- but we'll be nice if it does
 975             subformats[n].format = NULL;
 976         } else {
 977             subformats[n].format = newFormat.clone();
 978         }
 979     }
 980 }
 981
 982 // -------------------------------------
 983 // Get a single format by format name.
 984 // Do nothing if the variable is not less than the array count.
 985 Format *
 986 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
 987
 988     if (U_FAILURE(status)) return NULL;
 989
 990     if (isArgNumeric ) {
 991         int32_t argumentNumber = stou(formatName);
 992         if (argumentNumber<0) {
 993             status = U_ARGUMENT_TYPE_MISMATCH;
 994             return NULL;
 995         }
 996         if (argumentNumber < 0 || argumentNumber >= subformatCount) {
 997             return subformats[argumentNumber].format;
 998         }
 999         else {
1000             return NULL;
1001         }
1002     }
1003
1004     for (int32_t i=0; i<subformatCount; ++i) {
1005         if (formatName==*subformats[i].argName)
1006         {
1007             return subformats[i].format;
1008         }
1009     }
1010     return NULL;
1011 }
1012
1013 // -------------------------------------
1014 // Set a single format by format name
1015 // Do nothing if the variable is not less than the array count.
1016 void
1017 MessageFormat::setFormat(const UnicodeString& formatName,
1018                          const Format& newFormat,
1019                          UErrorCode& status) {
1020     if (isArgNumeric) {
1021         status = U_ARGUMENT_TYPE_MISMATCH;
1022         return;
1023     }
1024     for (int32_t i=0; i<subformatCount; ++i) {
1025         if (formatName==*subformats[i].argName)
1026         {
1027             delete subformats[i].format;
1028             if (&newFormat == NULL) {
1029                 // This should never happen -- but we'll be nice if it does
1030                 subformats[i].format = NULL;
1031             } else {
1032                 subformats[i].format = newFormat.clone();
1033             }
1034             break;
1035         }
1036     }
1037 }
1038
1039 // -------------------------------------
1040 // Gets the format array.
1041
1042 const Format**
1043 MessageFormat::getFormats(int32_t& cnt) const
1044 {
1045     // This old API returns an array (which we hold) of Format*
1046     // pointers.  The array is valid up to the next call to any
1047     // method on this object.  We construct and resize an array
1048     // on demand that contains aliases to the subformats[i].format
1049     // pointers.
1050     MessageFormat* t = (MessageFormat*) this;
1051     cnt = 0;
1052     if (formatAliases == NULL) {
1053         t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount;
1054         Format** a = (Format**)
1055             uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
1056         if (a == NULL) {
1057             return NULL;
1058         }
1059         t->formatAliases = a;
1060     } else if (subformatCount > formatAliasesCapacity) {
1061         Format** a = (Format**)
1062             uprv_realloc(formatAliases, sizeof(Format*) * subformatCount);
1063         if (a == NULL) {
1064             return NULL;
1065         }
1066         t->formatAliases = a;
1067         t->formatAliasesCapacity = subformatCount;
1068     }
1069     for (int32_t i=0; i<subformatCount; ++i) {
1070         t->formatAliases[i] = subformats[i].format;
1071     }
1072     cnt = subformatCount;
1073     return (const Format**)formatAliases;
1074 }
1075
1076
1077 StringEnumeration*
1078 MessageFormat::getFormatNames(UErrorCode& status) {
1079     if (U_FAILURE(status))  return NULL;
1080
1081     if (isArgNumeric) {
1082         status = U_ARGUMENT_TYPE_MISMATCH;
1083         return NULL;
1084     }
1085     UVector *fFormatNames = new UVector(status);
1086     if (U_FAILURE(status)) {
1087         status = U_MEMORY_ALLOCATION_ERROR;
1088         return NULL;
1089     }
1090     for (int32_t i=0; i<subformatCount; ++i) {
1091         fFormatNames->addElement(new UnicodeString(*subformats[i].argName), status);
1092     }
1093
1094     StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status);
1095     return nameEnumerator;
1096 }
1097
1098 // -------------------------------------
1099 // Formats the source Formattable array and copy into the result buffer.
1100 // Ignore the FieldPosition result for error checking.
1101
1102 UnicodeString&
1103 MessageFormat::format(const Formattable* source,
1104                       int32_t cnt,
1105                       UnicodeString& appendTo,
1106                       FieldPosition& ignore,
1107                       UErrorCode& success) const
1108 {
1109     if (U_FAILURE(success))
1110         return appendTo;
1111
1112     return format(source, cnt, appendTo, ignore, 0, success);
1113 }
1114
1115 // -------------------------------------
1116 // Internally creates a MessageFormat instance based on the
1117 // pattern and formats the arguments Formattable array and
1118 // copy into the appendTo buffer.
1119
1120 UnicodeString&
1121 MessageFormat::format(  const UnicodeString& pattern,
1122                         const Formattable* arguments,
1123                         int32_t cnt,
1124                         UnicodeString& appendTo,
1125                         UErrorCode& success)
1126 {
1127     MessageFormat temp(pattern, success);
1128     FieldPosition ignore(0);
1129     temp.format(arguments, cnt, appendTo, ignore, success);
1130     return appendTo;
1131 }
1132
1133 // -------------------------------------
1134 // Formats the source Formattable object and copy into the
1135 // appendTo buffer.  The Formattable object must be an array
1136 // of Formattable instances, returns error otherwise.
1137
1138 UnicodeString&
1139 MessageFormat::format(const Formattable& source,
1140                       UnicodeString& appendTo,
1141                       FieldPosition& ignore,
1142                       UErrorCode& success) const
1143 {
1144     int32_t cnt;
1145
1146     if (U_FAILURE(success))
1147         return appendTo;
1148     if (source.getType() != Formattable::kArray) {
1149         success = U_ILLEGAL_ARGUMENT_ERROR;
1150         return appendTo;
1151     }
1152     const Formattable* tmpPtr = source.getArray(cnt);
1153
1154     return format(tmpPtr, cnt, appendTo, ignore, 0, success);
1155 }
1156
1157
1158 UnicodeString&
1159 MessageFormat::format(const UnicodeString* argumentNames,
1160                       const Formattable* arguments,
1161                       int32_t count,
1162                       UnicodeString& appendTo,
1163                       UErrorCode& success) const {
1164     FieldPosition ignore(0);
1165     return format(arguments, argumentNames, count, appendTo, ignore, 0, success);
1166 }
1167
1168 UnicodeString&
1169 MessageFormat::format(const Formattable* arguments,
1170                       int32_t cnt,
1171                       UnicodeString& appendTo,
1172                       FieldPosition& status,
1173                       int32_t recursionProtection,
1174                       UErrorCode& success) const
1175 {
1176     return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
1177 }
1178
1179 // -------------------------------------
1180 // Formats the arguments Formattable array and copy into the appendTo buffer.
1181 // Ignore the FieldPosition result for error checking.
1182
1183 UnicodeString&
1184 MessageFormat::format(const Formattable* arguments,
1185                       const UnicodeString *argumentNames,
1186                       int32_t cnt,
1187                       UnicodeString& appendTo,
1188                       FieldPosition& status,
1189                       int32_t recursionProtection,
1190                       UErrorCode& success) const
1191 {
1192     int32_t lastOffset = 0;
1193     int32_t argumentNumber=0;
1194     if (cnt < 0 || (cnt && arguments == NULL)) {
1195         success = U_ILLEGAL_ARGUMENT_ERROR;
1196         return appendTo;
1197     }
1198
1199     if ( !isArgNumeric && argumentNames== NULL ) {
1200         success = U_ILLEGAL_ARGUMENT_ERROR;
1201         return appendTo;
1202     }
1203
1204     const Formattable *obj=NULL;
1205     for (int32_t i=0; i<subformatCount; ++i) {
1206         // Append the prefix of current format element.
1207         appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset);
1208         lastOffset = subformats[i].offset;
1209         obj = NULL;
1210         if (isArgNumeric) {
1211             argumentNumber = subformats[i].argNum;
1212
1213             // Checks the scope of the argument number.
1214             if (argumentNumber >= cnt) {
1215                 appendTo += LEFT_CURLY_BRACE;
1216                 itos(argumentNumber, appendTo);
1217                 appendTo += RIGHT_CURLY_BRACE;
1218                 continue;
1219             }
1220             obj = arguments+argumentNumber;
1221         }
1222         else {
1223             for (int32_t j=0; j<cnt; ++j) {
1224                 if (argumentNames[j]== *subformats[i].argName ) {
1225                     obj = arguments+j;
1226                     break;
1227                 }
1228             }
1229             if (obj == NULL ) {
1230                 appendTo += LEFT_CURLY_BRACE;
1231                 appendTo += *subformats[i].argName;
1232                 appendTo += RIGHT_CURLY_BRACE;
1233                 continue;
1234
1235             }
1236         }
1237         Formattable::Type type = obj->getType();
1238
1239         // Recursively calling the format process only if the current
1240         // format argument refers to either of the following:
1241         // a ChoiceFormat object, a PluralFormat object, a SelectFormat object.
1242         Format* fmt = subformats[i].format;
1243         if (fmt != NULL) {
1244             UnicodeString argNum;
1245             fmt->format(*obj, argNum, success);
1246
1247             // Needs to reprocess the ChoiceFormat and PluralFormat and SelectFormat option by using the
1248             // MessageFormat pattern application.
1249             if ((dynamic_cast<ChoiceFormat*>(fmt) != NULL ||
1250                  dynamic_cast<PluralFormat*>(fmt) != NULL ||
1251                  dynamic_cast<SelectFormat*>(fmt) != NULL) &&
1252                 argNum.indexOf(LEFT_CURLY_BRACE) >= 0
1253             ) {
1254                 MessageFormat temp(argNum, fLocale, success);
1255                 // TODO: Implement recursion protection
1256                 if ( isArgNumeric ) {
1257                     temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
1258                 }
1259                 else {
1260                     temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success);
1261                 }
1262                 if (U_FAILURE(success)) {
1263                     return appendTo;
1264                 }
1265             }
1266             else {
1267                 appendTo += argNum;
1268             }
1269         }
1270         // If the obj data type is a number, use a NumberFormat instance.
1271         else if ((type == Formattable::kDouble) ||
1272                  (type == Formattable::kLong) ||
1273                  (type == Formattable::kInt64)) {
1274
1275             const NumberFormat* nf = getDefaultNumberFormat(success);
1276             if (nf == NULL) {
1277                 return appendTo;
1278             }
1279             if (type == Formattable::kDouble) {
1280                 nf->format(obj->getDouble(), appendTo);
1281             } else if (type == Formattable::kLong) {
1282                 nf->format(obj->getLong(), appendTo);
1283             } else {
1284                 nf->format(obj->getInt64(), appendTo);
1285             }
1286         }
1287         // If the obj data type is a Date instance, use a DateFormat instance.
1288         else if (type == Formattable::kDate) {
1289             const DateFormat* df = getDefaultDateFormat(success);
1290             if (df == NULL) {
1291                 return appendTo;
1292             }
1293             df->format(obj->getDate(), appendTo);
1294         }
1295         else if (type == Formattable::kString) {
1296             appendTo += obj->getString();
1297         }
1298         else {
1299             success = U_ILLEGAL_ARGUMENT_ERROR;
1300             return appendTo;
1301         }
1302     }
1303     // Appends the rest of the pattern characters after the real last offset.
1304     appendTo.append(fPattern, lastOffset, 0x7fffffff);
1305     return appendTo;
1306 }
1307
1308
1309 // -------------------------------------
1310 // Parses the source pattern and returns the Formattable objects array,
1311 // the array count and the ending parse position.  The caller of this method
1312 // owns the array.
1313
1314 Formattable*
1315 MessageFormat::parse(const UnicodeString& source,
1316                      ParsePosition& pos,
1317                      int32_t& count) const
1318 {
1319     // Allocate at least one element.  Allocating an array of length
1320     // zero causes problems on some platforms (e.g. Win32).
1321     Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1];
1322     int32_t patternOffset = 0;
1323     int32_t sourceOffset = pos.getIndex();
1324     ParsePosition tempPos(0);
1325     count = 0; // {sfb} reset to zero
1326     int32_t len;
1327     // If resultArray could not be created, exit out.
1328     // Avoid crossing initialization of variables above.
1329     if (resultArray == NULL) {
1330         goto PARSE_ERROR;
1331     }
1332     for (int32_t i = 0; i < subformatCount; ++i) {
1333         // match up to format
1334         len = subformats[i].offset - patternOffset;
1335         if (len == 0 ||
1336             fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
1337             sourceOffset += len;
1338             patternOffset += len;
1339         }
1340         else {
1341             goto PARSE_ERROR;
1342         }
1343
1344         // now use format
1345         Format* fmt = subformats[i].format;
1346         int32_t argNum = subformats[i].argNum;
1347         if (fmt == NULL) {   // string format
1348             // if at end, use longest possible match
1349             // otherwise uses first match to intervening string
1350             // does NOT recursively try all possibilities
1351             int32_t tempLength = (i+1<subformatCount) ?
1352                 subformats[i+1].offset : fPattern.length();
1353
1354             int32_t next;
1355             if (patternOffset >= tempLength) {
1356                 next = source.length();
1357             }
1358             else {
1359                 UnicodeString buffer;
1360                 fPattern.extract(patternOffset,tempLength - patternOffset, buffer);
1361                 next = source.indexOf(buffer, sourceOffset);
1362             }
1363
1364             if (next < 0) {
1365                 goto PARSE_ERROR;
1366             }
1367             else {
1368                 UnicodeString buffer;
1369                 source.extract(sourceOffset,next - sourceOffset, buffer);
1370                 UnicodeString strValue = buffer;
1371                 UnicodeString temp(LEFT_CURLY_BRACE);
1372                 // {sfb} check this later
1373                 if (isArgNumeric) {
1374                     itos(argNum, temp);
1375                 }
1376                 else {
1377                     temp+=(*subformats[i].argName);
1378                 }
1379                 temp += RIGHT_CURLY_BRACE;
1380                 if (strValue != temp) {
1381                     source.extract(sourceOffset,next - sourceOffset, buffer);
1382                     resultArray[argNum].setString(buffer);
1383                     // {sfb} not sure about this
1384                     if ((argNum + 1) > count) {
1385                         count = argNum + 1;
1386                     }
1387                 }
1388                 sourceOffset = next;
1389             }
1390         }
1391         else {
1392             tempPos.setIndex(sourceOffset);
1393             fmt->parseObject(source, resultArray[argNum], tempPos);
1394             if (tempPos.getIndex() == sourceOffset) {
1395                 goto PARSE_ERROR;
1396             }
1397
1398             if ((argNum + 1) > count) {
1399                 count = argNum + 1;
1400             }
1401             sourceOffset = tempPos.getIndex(); // update
1402         }
1403     }
1404     len = fPattern.length() - patternOffset;
1405     if (len == 0 ||
1406         fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
1407         pos.setIndex(sourceOffset + len);
1408         return resultArray;
1409     }
1410     // else fall through...
1411
1412  PARSE_ERROR:
1413     pos.setErrorIndex(sourceOffset);
1414     delete [] resultArray;
1415     count = 0;
1416     return NULL; // leave index as is to signal error
1417 }
1418
1419 // -------------------------------------
1420 // Parses the source string and returns the array of
1421 // Formattable objects and the array count.  The caller
1422 // owns the returned array.
1423
1424 Formattable*
1425 MessageFormat::parse(const UnicodeString& source,
1426                      int32_t& cnt,
1427                      UErrorCode& success) const
1428 {
1429     if (!isArgNumeric ) {
1430         success = U_ARGUMENT_TYPE_MISMATCH;
1431         return NULL;
1432     }
1433     ParsePosition status(0);
1434     // Calls the actual implementation method and starts
1435     // from zero offset of the source text.
1436     Formattable* result = parse(source, status, cnt);
1437     if (status.getIndex() == 0) {
1438         success = U_MESSAGE_PARSE_ERROR;
1439         delete[] result;
1440         return NULL;
1441     }
1442     return result;
1443 }
1444
1445 // -------------------------------------
1446 // Parses the source text and copy into the result buffer.
1447
1448 void
1449 MessageFormat::parseObject( const UnicodeString& source,
1450                             Formattable& result,
1451                             ParsePosition& status) const
1452 {
1453     int32_t cnt = 0;
1454     Formattable* tmpResult = parse(source, status, cnt);
1455     if (tmpResult != NULL)
1456         result.adoptArray(tmpResult, cnt);
1457 }
1458
1459 UnicodeString
1460 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
1461   UnicodeString result;
1462   if (U_SUCCESS(status)) {
1463     int32_t plen = pattern.length();
1464     const UChar* pat = pattern.getBuffer();
1465     int32_t blen = plen * 2 + 1; // space for null termination, convenience
1466     UChar* buf = result.getBuffer(blen);
1467     if (buf == NULL) {
1468       status = U_MEMORY_ALLOCATION_ERROR;
1469     } else {
1470       int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
1471       result.releaseBuffer(U_SUCCESS(status) ? len : 0);
1472     }
1473   }
1474   if (U_FAILURE(status)) {
1475     result.setToBogus();
1476   }
1477   return result;
1478 }
1479
1480 // -------------------------------------
1481
1482 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
1483     RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
1484     if (fmt == NULL) {
1485         ec = U_MEMORY_ALLOCATION_ERROR;
1486     } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
1487         UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
1488         fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
1489     }
1490     return fmt;
1491 }
1492
1493 /**
1494  * Reads the segments[] array (see applyPattern()) and parses the
1495  * segments[1..3] into a Format* object.  Stores the format object in
1496  * the subformats[] array.  Updates the argTypes[] array type
1497  * information for the corresponding argument.
1498  *
1499  * @param formatNumber index into subformats[] for this format
1500  * @param segments array of strings with the parsed pattern segments
1501  * @param parseError parse error data (output param)
1502  * @param ec error code
1503  */
1504 void
1505 MessageFormat::makeFormat(int32_t formatNumber,
1506                           UnicodeString* segments,
1507                           UParseError& parseError,
1508                           UErrorCode& ec) {
1509     if (U_FAILURE(ec)) {
1510         return;
1511     }
1512
1513     // Parse the argument number
1514     int32_t argumentNumber = stou(segments[1]); // always unlocalized!
1515     UnicodeString argumentName;
1516     if (argumentNumber < 0) {
1517         if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) {
1518             ec = U_INVALID_FORMAT_ERROR;
1519             return;
1520         }
1521         isArgNumeric = FALSE;
1522         argumentNumber=formatNumber;
1523     }
1524     if (!isArgNumeric) {
1525         if ( !isLegalArgName(segments[1]) ) {
1526             ec = U_INVALID_FORMAT_ERROR;
1527             return;
1528         }
1529         argumentName = segments[1];
1530     }
1531
1532     // Parse the format, recording the argument type and creating a
1533     // new Format object (except for string arguments).
1534     Formattable::Type argType;
1535     Format *fmt = NULL;
1536     int32_t typeID, styleID;
1537     DateFormat::EStyle style;
1538     UnicodeString unquotedPattern, quotedPattern;
1539     UBool inQuote = FALSE;
1540
1541     switch (typeID = findKeyword(segments[2], TYPE_IDS)) {
1542
1543     case 0: // string
1544         argType = Formattable::kString;
1545         break;
1546
1547     case 1: // number
1548         argType = Formattable::kDouble;
1549
1550         switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) {
1551         case 0: // default
1552             fmt = NumberFormat::createInstance(fLocale, ec);
1553             break;
1554         case 1: // currency
1555             fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
1556             break;
1557         case 2: // percent
1558             fmt = NumberFormat::createPercentInstance(fLocale, ec);
1559             break;
1560         case 3: // integer
1561             argType = Formattable::kLong;
1562             fmt = createIntegerFormat(fLocale, ec);
1563             break;
1564         default: // pattern
1565             fmt = NumberFormat::createInstance(fLocale, ec);
1566             if (fmt) {
1567                 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt);
1568                 if (decfmt != NULL) {
1569                     decfmt->applyPattern(segments[3],parseError,ec);
1570                 }
1571             }
1572             break;
1573         }
1574         break;
1575
1576     case 2: // date
1577     case 3: // time
1578         argType = Formattable::kDate;
1579         styleID = findKeyword(segments[3], DATE_STYLE_IDS);
1580         style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
1581
1582         if (typeID == 2) {
1583             fmt = DateFormat::createDateInstance(style, fLocale);
1584         } else {
1585             fmt = DateFormat::createTimeInstance(style, fLocale);
1586         }
1587
1588         if (styleID < 0 && fmt != NULL) {
1589             SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt);
1590             if (sdtfmt != NULL) {
1591                 sdtfmt->applyPattern(segments[3]);
1592             }
1593         }
1594         break;
1595
1596     case 4: // choice
1597         argType = Formattable::kDouble;
1598
1599         fmt = new ChoiceFormat(segments[3], parseError, ec);
1600         break;
1601
1602     case 5: // spellout
1603         argType = Formattable::kDouble;
1604         fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec);
1605         break;
1606     case 6: // ordinal
1607         argType = Formattable::kDouble;
1608         fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec);
1609         break;
1610     case 7: // duration
1611         argType = Formattable::kDouble;
1612         fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec);
1613         break;
1614     case 8: // plural
1615     case 9: // Select
1616         if(typeID == 8)
1617             argType = Formattable::kDouble;
1618         else
1619             argType = Formattable::kString;
1620         quotedPattern = segments[3];
1621         for (int32_t i = 0; i < quotedPattern.length(); ++i) {
1622             UChar ch = quotedPattern.charAt(i);
1623             if (ch == SINGLE_QUOTE) {
1624                 if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) {
1625                     unquotedPattern+=ch;
1626                     ++i;
1627                 }
1628                 else {
1629                     inQuote = !inQuote;
1630                 }
1631             }
1632             else {
1633                 unquotedPattern += ch;
1634             }
1635         }
1636         if(typeID == 8)
1637             fmt = new PluralFormat(fLocale, unquotedPattern, ec);
1638         else
1639             fmt = new SelectFormat(unquotedPattern, ec);
1640         break;
1641     default:
1642         argType = Formattable::kString;
1643         ec = U_ILLEGAL_ARGUMENT_ERROR;
1644         break;
1645     }
1646
1647     if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) {
1648         ec = U_MEMORY_ALLOCATION_ERROR;
1649     }
1650
1651     if (!allocateSubformats(formatNumber+1) ||
1652         !allocateArgTypes(argumentNumber+1)) {
1653         ec = U_MEMORY_ALLOCATION_ERROR;
1654     }
1655
1656     if (U_FAILURE(ec)) {
1657         delete fmt;
1658         return;
1659     }
1660
1661     // Parse succeeded; record results in our arrays
1662     subformats[formatNumber].format = fmt;
1663     subformats[formatNumber].offset = segments[0].length();
1664     if (isArgNumeric) {
1665         subformats[formatNumber].argName = NULL;
1666         subformats[formatNumber].argNum = argumentNumber;
1667     }
1668     else {
1669         subformats[formatNumber].argName = new UnicodeString(argumentName);
1670         subformats[formatNumber].argNum = -1;
1671     }
1672     subformatCount = formatNumber+1;
1673
1674     // Careful here: argumentNumber may in general arrive out of
1675     // sequence, e.g., "There was {2} on {0,date} (see {1,number})."
1676     argTypes[argumentNumber] = argType;
1677     if (argumentNumber+1 > argTypeCount) {
1678         argTypeCount = argumentNumber+1;
1679     }
1680 }
1681
1682 // -------------------------------------
1683 // Finds the string, s, in the string array, list.
1684 int32_t MessageFormat::findKeyword(const UnicodeString& s,
1685                                    const UChar * const *list)
1686 {
1687     if (s.length() == 0)
1688         return 0; // default
1689
1690     UnicodeString buffer = s;
1691     // Trims the space characters and turns all characters
1692     // in s to lower case.
1693     buffer.trim().toLower("");
1694     for (int32_t i = 0; list[i]; ++i) {
1695         if (!buffer.compare(list[i], u_strlen(list[i]))) {
1696             return i;
1697         }
1698     }
1699     return -1;
1700 }
1701
1702 // -------------------------------------
1703 // Checks the range of the source text to quote the special
1704 // characters, { and ' and copy to target buffer.
1705
1706 void
1707 MessageFormat::copyAndFixQuotes(const UnicodeString& source,
1708                                 int32_t start,
1709                                 int32_t end,
1710                                 UnicodeString& appendTo)
1711 {
1712     UBool gotLB = FALSE;
1713
1714     for (int32_t i = start; i < end; ++i) {
1715         UChar ch = source[i];
1716         if (ch == LEFT_CURLY_BRACE) {
1717             appendTo += SINGLE_QUOTE;
1718             appendTo += LEFT_CURLY_BRACE;
1719             appendTo += SINGLE_QUOTE;
1720             gotLB = TRUE;
1721         }
1722         else if (ch == RIGHT_CURLY_BRACE) {
1723             if(gotLB) {
1724                 appendTo += RIGHT_CURLY_BRACE;
1725                 gotLB = FALSE;
1726             }
1727             else {
1728                 // orig code.
1729                 appendTo += SINGLE_QUOTE;
1730                 appendTo += RIGHT_CURLY_BRACE;
1731                 appendTo += SINGLE_QUOTE;
1732             }
1733         }
1734         else if (ch == SINGLE_QUOTE) {
1735             appendTo += SINGLE_QUOTE;
1736             appendTo += SINGLE_QUOTE;
1737         }
1738         else {
1739             appendTo += ch;
1740         }
1741     }
1742 }
1743
1744 /**
1745  * Convenience method that ought to be in NumberFormat
1746  */
1747 NumberFormat*
1748 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
1749     NumberFormat *temp = NumberFormat::createInstance(locale, status);
1750     DecimalFormat *temp2;
1751     if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) {
1752         temp2->setMaximumFractionDigits(0);
1753         temp2->setDecimalSeparatorAlwaysShown(FALSE);
1754         temp2->setParseIntegerOnly(TRUE);
1755     }
1756
1757     return temp;
1758 }
1759
1760 /**
1761  * Return the default number format.  Used to format a numeric
1762  * argument when subformats[i].format is NULL.  Returns NULL
1763  * on failure.
1764  *
1765  * Semantically const but may modify *this.
1766  */
1767 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
1768     if (defaultNumberFormat == NULL) {
1769         MessageFormat* t = (MessageFormat*) this;
1770         t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
1771         if (U_FAILURE(ec)) {
1772             delete t->defaultNumberFormat;
1773             t->defaultNumberFormat = NULL;
1774         } else if (t->defaultNumberFormat == NULL) {
1775             ec = U_MEMORY_ALLOCATION_ERROR;
1776         }
1777     }
1778     return defaultNumberFormat;
1779 }
1780
1781 /**
1782  * Return the default date format.  Used to format a date
1783  * argument when subformats[i].format is NULL.  Returns NULL
1784  * on failure.
1785  *
1786  * Semantically const but may modify *this.
1787  */
1788 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
1789     if (defaultDateFormat == NULL) {
1790         MessageFormat* t = (MessageFormat*) this;
1791         t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
1792         if (t->defaultDateFormat == NULL) {
1793             ec = U_MEMORY_ALLOCATION_ERROR;
1794         }
1795     }
1796     return defaultDateFormat;
1797 }
1798
1799 UBool
1800 MessageFormat::usesNamedArguments() const {
1801     return !isArgNumeric;
1802 }
1803
1804 UBool
1805 MessageFormat::isLegalArgName(const UnicodeString& argName) const {
1806     if(!u_hasBinaryProperty(argName.charAt(0), idStart)) {
1807         return FALSE;
1808     }
1809     for (int32_t i=1; i<argName.length(); ++i) {
1810         if(!u_hasBinaryProperty(argName.charAt(i), idContinue)) {
1811             return FALSE;
1812         }
1813     }
1814     return TRUE;
1815 }
1816
1817 int32_t
1818 MessageFormat::getArgTypeCount() const {
1819         return argTypeCount;
1820 }
1821
1822 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) {
1823     pos=0;
1824     fFormatNames = fNameList;
1825 }
1826
1827 const UnicodeString*
1828 FormatNameEnumeration::snext(UErrorCode& status) {
1829     if (U_SUCCESS(status) && pos < fFormatNames->size()) {
1830         return (const UnicodeString*)fFormatNames->elementAt(pos++);
1831     }
1832     return NULL;
1833 }
1834
1835 void
1836 FormatNameEnumeration::reset(UErrorCode& /*status*/) {
1837     pos=0;
1838 }
1839
1840 int32_t
1841 FormatNameEnumeration::count(UErrorCode& /*status*/) const {
1842        return (fFormatNames==NULL) ? 0 : fFormatNames->size();
1843 }
1844
1845 FormatNameEnumeration::~FormatNameEnumeration() {
1846     UnicodeString *s;
1847     for (int32_t i=0; i<fFormatNames->size(); ++i) {
1848         if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) {
1849             delete s;
1850         }
1851     }
1852     delete fFormatNames;
1853 }
1854 U_NAMESPACE_END
1855
1856 #endif /* #if !UCONFIG_NO_FORMATTING */
1857
1858 //eof