icuSources/i18n/msgfmt.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 2007-2008, International Business Machines Corporation and         *
   4 * others. All Rights Reserved.                                                *
   5 *******************************************************************************
   6 *
   7 * File MSGFMT.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   02/19/97    aliu        Converted from java.
  13 *   03/20/97    helena      Finished first cut of implementation.
  14 *   04/10/97    aliu        Made to work on AIX.  Added stoi to replace wtoi.
  15 *   06/11/97    helena      Fixed addPattern to take the pattern correctly.
  16 *   06/17/97    helena      Fixed the getPattern to return the correct pattern.
  17 *   07/09/97    helena      Made ParsePosition into a class.
  18 *   02/22/99    stephen     Removed character literals for EBCDIC safety
  19 ********************************************************************************
  20 */
  21
  22 #include "unicode/utypes.h"
  23
  24 #if !UCONFIG_NO_FORMATTING
  25
  26 #include "unicode/msgfmt.h"
  27 #include "unicode/decimfmt.h"
  28 #include "unicode/datefmt.h"
  29 #include "unicode/smpdtfmt.h"
  30 #include "unicode/choicfmt.h"
  31 #include "unicode/plurfmt.h"
  32 #include "unicode/ustring.h"
  33 #include "unicode/ucnv_err.h"
  34 #include "unicode/uchar.h"
  35 #include "unicode/umsg.h"
  36 #include "unicode/rbnf.h"
  37 #include "cmemory.h"
  38 #include "msgfmt_impl.h"
  39 #include "util.h"
  40 #include "uassert.h"
  41 #include "ustrfmt.h"
  42 #include "uvector.h"
  43
  44 // *****************************************************************************
  45 // class MessageFormat
  46 // *****************************************************************************
  47
  48 #define COMMA             ((UChar)0x002C)
  49 #define SINGLE_QUOTE      ((UChar)0x0027)
  50 #define LEFT_CURLY_BRACE  ((UChar)0x007B)
  51 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
  52
  53 //---------------------------------------
  54 // static data
  55
  56 static const UChar ID_EMPTY[]     = {
  57     0 /* empty string, used for default so that null can mark end of list */
  58 };
  59
  60 static const UChar ID_NUMBER[]    = {
  61     0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0  /* "number" */
  62 };
  63 static const UChar ID_DATE[]      = {
  64     0x64, 0x61, 0x74, 0x65, 0              /* "date" */
  65 };
  66 static const UChar ID_TIME[]      = {
  67     0x74, 0x69, 0x6D, 0x65, 0              /* "time" */
  68 };
  69 static const UChar ID_CHOICE[]    = {
  70     0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0  /* "choice" */
  71 };
  72 static const UChar ID_SPELLOUT[]  = {
  73     0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
  74 };
  75 static const UChar ID_ORDINAL[]   = {
  76     0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
  77 };
  78 static const UChar ID_DURATION[]  = {
  79     0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
  80 };
  81 static const UChar ID_PLURAL[]  = {
  82     0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0  /* "plural" */
  83 };
  84
  85 // MessageFormat Type List  Number, Date, Time or Choice
  86 static const UChar * const TYPE_IDS[] = {
  87     ID_EMPTY,
  88     ID_NUMBER,
  89     ID_DATE,
  90     ID_TIME,
  91     ID_CHOICE,
  92     ID_SPELLOUT,
  93     ID_ORDINAL,
  94     ID_DURATION,
  95     ID_PLURAL,
  96     NULL,
  97 };
  98
  99 static const UChar ID_CURRENCY[]  = {
 100     0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0  /* "currency" */
 101 };
 102 static const UChar ID_PERCENT[]   = {
 103     0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0        /* "percent" */
 104 };
 105 static const UChar ID_INTEGER[]   = {
 106     0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0        /* "integer" */
 107 };
 108
 109 // NumberFormat modifier list, default, currency, percent or integer
 110 static const UChar * const NUMBER_STYLE_IDS[] = {
 111     ID_EMPTY,
 112     ID_CURRENCY,
 113     ID_PERCENT,
 114     ID_INTEGER,
 115     NULL,
 116 };
 117
 118 static const UChar ID_SHORT[]     = {
 119     0x73, 0x68, 0x6F, 0x72, 0x74, 0        /* "short" */
 120 };
 121 static const UChar ID_MEDIUM[]    = {
 122     0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0  /* "medium" */
 123 };
 124 static const UChar ID_LONG[]      = {
 125     0x6C, 0x6F, 0x6E, 0x67, 0              /* "long" */
 126 };
 127 static const UChar ID_FULL[]      = {
 128     0x66, 0x75, 0x6C, 0x6C, 0              /* "full" */
 129 };
 130
 131 // DateFormat modifier list, default, short, medium, long or full
 132 static const UChar * const DATE_STYLE_IDS[] = {
 133     ID_EMPTY,
 134     ID_SHORT,
 135     ID_MEDIUM,
 136     ID_LONG,
 137     ID_FULL,
 138     NULL,
 139 };
 140
 141 static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = {
 142     U_NAMESPACE_QUALIFIER DateFormat::kDefault,
 143     U_NAMESPACE_QUALIFIER DateFormat::kShort,
 144     U_NAMESPACE_QUALIFIER DateFormat::kMedium,
 145     U_NAMESPACE_QUALIFIER DateFormat::kLong,
 146     U_NAMESPACE_QUALIFIER DateFormat::kFull,
 147 };
 148
 149 static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
 150
 151 U_NAMESPACE_BEGIN
 152
 153 // -------------------------------------
 154 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
 155 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
 156
 157 //--------------------------------------------------------------------
 158
 159 /**
 160  * Convert a string to an unsigned decimal, ignoring rule whitespace.
 161  * @return a non-negative number if successful, or a negative number
 162  *         upon failure.
 163  */
 164 static int32_t stou(const UnicodeString& string) {
 165     int32_t n = 0;
 166     int32_t count = 0;
 167     UChar32 c;
 168     for (int32_t i=0; i<string.length(); i+=U16_LENGTH(c)) {
 169         c = string.char32At(i);
 170         if (uprv_isRuleWhiteSpace(c)) {
 171             continue;
 172         }
 173         int32_t d = u_digit(c, 10);
 174         if (d < 0 || ++count > 10) {
 175             return -1;
 176         }
 177         n = 10*n + d;
 178     }
 179     return n;
 180 }
 181
 182 /**
 183  * Convert an integer value to a string and append the result to
 184  * the given UnicodeString.
 185  */
 186 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
 187     UChar temp[16];
 188     uprv_itou(temp,16,i,10,0); // 10 == radix
 189     appendTo.append(temp);
 190     return appendTo;
 191 }
 192
 193 /*
 194  * A structure representing one subformat of this MessageFormat.
 195  * Each subformat has a Format object, an offset into the plain
 196  * pattern text fPattern, and an argument number.  The argument
 197  * number corresponds to the array of arguments to be formatted.
 198  * @internal
 199  */
 200 class MessageFormat::Subformat : public UMemory {
 201 public:
 202     /**
 203      * @internal
 204      */
 205     Format* format; // formatter
 206     /**
 207      * @internal
 208      */
 209     int32_t offset; // offset into fPattern
 210     /**
 211      * @internal
 212      */
 213     // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number
 214     int32_t argNum;    // 0-based argument number
 215     /**
 216      * @internal
 217      */
 218     UnicodeString* argName; // argument name or number
 219
 220     /**
 221      * Clone that.format and assign it to this.format
 222      * Do NOT delete this.format
 223      * @internal
 224      */
 225     Subformat& operator=(const Subformat& that) {
 226         if (this != &that) {
 227             format = that.format ? that.format->clone() : NULL;
 228             offset = that.offset;
 229             argNum = that.argNum;
 230             argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL;
 231         }
 232         return *this;
 233     }
 234
 235     /**
 236      * @internal
 237      */
 238     UBool operator==(const Subformat& that) const {
 239         // Do cheap comparisons first
 240         return offset == that.offset &&
 241                argNum == that.argNum &&
 242                ((argName == that.argName) ||
 243                 (*argName == *that.argName)) &&
 244                ((format == that.format) || // handles NULL
 245                 (*format == *that.format));
 246     }
 247
 248     /**
 249      * @internal
 250      */
 251     UBool operator!=(const Subformat& that) const {
 252         return !operator==(that);
 253     }
 254 };
 255
 256 // -------------------------------------
 257 // Creates a MessageFormat instance based on the pattern.
 258
 259 MessageFormat::MessageFormat(const UnicodeString& pattern,
 260                              UErrorCode& success)
 261 : fLocale(Locale::getDefault()),  // Uses the default locale
 262   formatAliases(NULL),
 263   formatAliasesCapacity(0),
 264   idStart(UCHAR_ID_START),
 265   idContinue(UCHAR_ID_CONTINUE),
 266   subformats(NULL),
 267   subformatCount(0),
 268   subformatCapacity(0),
 269   argTypes(NULL),
 270   argTypeCount(0),
 271   argTypeCapacity(0),
 272   isArgNumeric(TRUE),
 273   defaultNumberFormat(NULL),
 274   defaultDateFormat(NULL)
 275 {
 276     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
 277         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
 278         success = U_MEMORY_ALLOCATION_ERROR;
 279         return;
 280     }
 281     applyPattern(pattern, success);
 282     setLocaleIDs(fLocale.getName(), fLocale.getName());
 283 }
 284
 285 MessageFormat::MessageFormat(const UnicodeString& pattern,
 286                              const Locale& newLocale,
 287                              UErrorCode& success)
 288 : fLocale(newLocale),
 289   formatAliases(NULL),
 290   formatAliasesCapacity(0),
 291   idStart(UCHAR_ID_START),
 292   idContinue(UCHAR_ID_CONTINUE),
 293   subformats(NULL),
 294   subformatCount(0),
 295   subformatCapacity(0),
 296   argTypes(NULL),
 297   argTypeCount(0),
 298   argTypeCapacity(0),
 299   isArgNumeric(TRUE),
 300   defaultNumberFormat(NULL),
 301   defaultDateFormat(NULL)
 302 {
 303     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
 304         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
 305         success = U_MEMORY_ALLOCATION_ERROR;
 306         return;
 307     }
 308     applyPattern(pattern, success);
 309     setLocaleIDs(fLocale.getName(), fLocale.getName());
 310 }
 311
 312 MessageFormat::MessageFormat(const UnicodeString& pattern,
 313                              const Locale& newLocale,
 314                              UParseError& parseError,
 315                              UErrorCode& success)
 316 : fLocale(newLocale),
 317   formatAliases(NULL),
 318   formatAliasesCapacity(0),
 319   idStart(UCHAR_ID_START),
 320   idContinue(UCHAR_ID_CONTINUE),
 321   subformats(NULL),
 322   subformatCount(0),
 323   subformatCapacity(0),
 324   argTypes(NULL),
 325   argTypeCount(0),
 326   argTypeCapacity(0),
 327   isArgNumeric(TRUE),
 328   defaultNumberFormat(NULL),
 329   defaultDateFormat(NULL)
 330 {
 331     if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) ||
 332         !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) {
 333         success = U_MEMORY_ALLOCATION_ERROR;
 334         return;
 335     }
 336     applyPattern(pattern, parseError, success);
 337     setLocaleIDs(fLocale.getName(), fLocale.getName());
 338 }
 339
 340 MessageFormat::MessageFormat(const MessageFormat& that)
 341 : Format(that),
 342   formatAliases(NULL),
 343   formatAliasesCapacity(0),
 344   idStart(UCHAR_ID_START),
 345   idContinue(UCHAR_ID_CONTINUE),
 346   subformats(NULL),
 347   subformatCount(0),
 348   subformatCapacity(0),
 349   argTypes(NULL),
 350   argTypeCount(0),
 351   argTypeCapacity(0),
 352   isArgNumeric(TRUE),
 353   defaultNumberFormat(NULL),
 354   defaultDateFormat(NULL)
 355 {
 356     *this = that;
 357 }
 358
 359 MessageFormat::~MessageFormat()
 360 {
 361     int32_t idx;
 362     for (idx = 0; idx < subformatCount; idx++) {
 363         delete subformats[idx].format;
 364         delete subformats[idx].argName;
 365     }
 366     uprv_free(subformats);
 367     subformats = NULL;
 368     subformatCount = subformatCapacity = 0;
 369
 370     uprv_free(argTypes);
 371     argTypes = NULL;
 372     argTypeCount = argTypeCapacity = 0;
 373
 374     uprv_free(formatAliases);
 375
 376     delete defaultNumberFormat;
 377     delete defaultDateFormat;
 378 }
 379
 380 //--------------------------------------------------------------------
 381 // Variable-size array management
 382
 383 /**
 384  * Allocate subformats[] to at least the given capacity and return
 385  * TRUE if successful.  If not, leave subformats[] unchanged.
 386  *
 387  * If subformats is NULL, allocate it.  If it is not NULL, enlarge it
 388  * if necessary to be at least as large as specified.
 389  */
 390 UBool MessageFormat::allocateSubformats(int32_t capacity) {
 391     if (subformats == NULL) {
 392         subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity);
 393         subformatCapacity = capacity;
 394         subformatCount = 0;
 395         if (subformats == NULL) {
 396             subformatCapacity = 0;
 397             return FALSE;
 398         }
 399     } else if (subformatCapacity < capacity) {
 400         if (capacity < 2*subformatCapacity) {
 401             capacity = 2*subformatCapacity;
 402         }
 403         Subformat* a = (Subformat*)
 404             uprv_realloc(subformats, sizeof(*subformats) * capacity);
 405         if (a == NULL) {
 406             return FALSE; // request failed
 407         }
 408         subformats = a;
 409         subformatCapacity = capacity;
 410     }
 411     return TRUE;
 412 }
 413
 414 /**
 415  * Allocate argTypes[] to at least the given capacity and return
 416  * TRUE if successful.  If not, leave argTypes[] unchanged.
 417  *
 418  * If argTypes is NULL, allocate it.  If it is not NULL, enlarge it
 419  * if necessary to be at least as large as specified.
 420  */
 421 UBool MessageFormat::allocateArgTypes(int32_t capacity) {
 422     if (argTypes == NULL) {
 423         argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity);
 424         argTypeCount = 0;
 425         argTypeCapacity = capacity;
 426         if (argTypes == NULL) {
 427             argTypeCapacity = 0;
 428             return FALSE;
 429         }
 430         for (int32_t i=0; i<capacity; ++i) {
 431             argTypes[i] = Formattable::kString;
 432         }
 433     } else if (argTypeCapacity < capacity) {
 434         if (capacity < 2*argTypeCapacity) {
 435             capacity = 2*argTypeCapacity;
 436         }
 437         Formattable::Type* a = (Formattable::Type*)
 438             uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
 439         if (a == NULL) {
 440             return FALSE; // request failed
 441         }
 442         for (int32_t i=argTypeCapacity; i<capacity; ++i) {
 443             a[i] = Formattable::kString;
 444         }
 445         argTypes = a;
 446         argTypeCapacity = capacity;
 447     }
 448     return TRUE;
 449 }
 450
 451 // -------------------------------------
 452 // assignment operator
 453
 454 const MessageFormat&
 455 MessageFormat::operator=(const MessageFormat& that)
 456 {
 457     // Reallocate the arrays BEFORE changing this object
 458     if (this != &that &&
 459         allocateSubformats(that.subformatCount) &&
 460         allocateArgTypes(that.argTypeCount)) {
 461
 462         // Calls the super class for assignment first.
 463         Format::operator=(that);
 464
 465         fPattern = that.fPattern;
 466         setLocale(that.fLocale);
 467         isArgNumeric = that.isArgNumeric;
 468         int32_t j;
 469         for (j=0; j<subformatCount; ++j) {
 470             delete subformats[j].format;
 471         }
 472         subformatCount = 0;
 473
 474         for (j=0; j<that.subformatCount; ++j) {
 475             // Subformat::operator= does NOT delete this.format
 476             subformats[j] = that.subformats[j];
 477         }
 478         subformatCount = that.subformatCount;
 479
 480         for (j=0; j<that.argTypeCount; ++j) {
 481             argTypes[j] = that.argTypes[j];
 482         }
 483         argTypeCount = that.argTypeCount;
 484     }
 485     return *this;
 486 }
 487
 488 UBool
 489 MessageFormat::operator==(const Format& rhs) const
 490 {
 491     if (this == &rhs) return TRUE;
 492
 493     MessageFormat& that = (MessageFormat&)rhs;
 494
 495     // Check class ID before checking MessageFormat members
 496     if (!Format::operator==(rhs) ||
 497         fPattern != that.fPattern ||
 498         fLocale != that.fLocale ||
 499         isArgNumeric != that.isArgNumeric) {
 500         return FALSE;
 501     }
 502
 503     int32_t j;
 504     for (j=0; j<subformatCount; ++j) {
 505         if (subformats[j] != that.subformats[j]) {
 506             return FALSE;
 507         }
 508     }
 509
 510     return TRUE;
 511 }
 512
 513 // -------------------------------------
 514 // Creates a copy of this MessageFormat, the caller owns the copy.
 515
 516 Format*
 517 MessageFormat::clone() const
 518 {
 519     return new MessageFormat(*this);
 520 }
 521
 522 // -------------------------------------
 523 // Sets the locale of this MessageFormat object to theLocale.
 524
 525 void
 526 MessageFormat::setLocale(const Locale& theLocale)
 527 {
 528     if (fLocale != theLocale) {
 529         delete defaultNumberFormat;
 530         defaultNumberFormat = NULL;
 531         delete defaultDateFormat;
 532         defaultDateFormat = NULL;
 533     }
 534     fLocale = theLocale;
 535     setLocaleIDs(fLocale.getName(), fLocale.getName());
 536 }
 537
 538 // -------------------------------------
 539 // Gets the locale of this MessageFormat object.
 540
 541 const Locale&
 542 MessageFormat::getLocale() const
 543 {
 544     return fLocale;
 545 }
 546
 547
 548
 549
 550 void
 551 MessageFormat::applyPattern(const UnicodeString& newPattern,
 552                             UErrorCode& status)
 553 {
 554     UParseError parseError;
 555     applyPattern(newPattern,parseError,status);
 556 }
 557
 558
 559 // -------------------------------------
 560 // Applies the new pattern and returns an error if the pattern
 561 // is not correct.
 562 void
 563 MessageFormat::applyPattern(const UnicodeString& pattern,
 564                             UParseError& parseError,
 565                             UErrorCode& ec)
 566 {
 567     if(U_FAILURE(ec)) {
 568         return;
 569     }
 570     // The pattern is broken up into segments.  Each time a subformat
 571     // is encountered, 4 segments are recorded.  For example, consider
 572     // the pattern:
 573     //  "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}."
 574     // The first set of segments is:
 575     //  segments[0] = "There "
 576     //  segments[1] = "0"
 577     //  segments[2] = "choice"
 578     //  segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files"
 579
 580     // During parsing, the plain text is accumulated into segments[0].
 581     // Segments 1..3 are used to parse each subpattern.  Each time a
 582     // subpattern is parsed, it creates a format object that is stored
 583     // in the subformats array, together with an offset and argument
 584     // number.  The offset into the plain text stored in
 585     // segments[0].
 586
 587     // Quotes in segment 0 are handled normally.  They are removed.
 588     // Quotes may not occur in segments 1 or 2.
 589     // Quotes in segment 3 are parsed and _copied_.  This makes
 590     //  subformat patterns work, e.g., {1,number,'#'.##} passes
 591     //  the pattern "'#'.##" to DecimalFormat.
 592
 593     UnicodeString segments[4];
 594     int32_t part = 0; // segment we are in, 0..3
 595     // Record the highest argument number in the pattern.  (In the
 596     // subpattern {3,number} the argument number is 3.)
 597     int32_t formatNumber = 0;
 598     UBool inQuote = FALSE;
 599     int32_t braceStack = 0;
 600     // Clear error struct
 601     parseError.offset = -1;
 602     parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
 603     int32_t patLen = pattern.length();
 604     int32_t i;
 605
 606     for (i=0; i<subformatCount; ++i) {
 607         delete subformats[i].format;
 608     }
 609     subformatCount = 0;
 610     argTypeCount = 0;
 611
 612     for (i=0; i<patLen; ++i) {
 613         UChar ch = pattern[i];
 614         if (part == 0) {
 615             // In segment 0, recognize and remove quotes
 616             if (ch == SINGLE_QUOTE) {
 617                 if (i+1 < patLen && pattern[i+1] == SINGLE_QUOTE) {
 618                     segments[0] += ch;
 619                     ++i;
 620                 } else {
 621                     inQuote = !inQuote;
 622                 }
 623             } else if (ch == LEFT_CURLY_BRACE && !inQuote) {
 624                 // The only way we get from segment 0 to 1 is via an
 625                 // unquoted '{'.
 626                 part = 1;
 627             } else {
 628                 segments[0] += ch;
 629             }
 630         } else if (inQuote) {
 631             // In segments 1..3, recognize quoted matter, and copy it
 632             // into the segment, together with the quotes.  This takes
 633             // care of '' as well.
 634             segments[part] += ch;
 635             if (ch == SINGLE_QUOTE) {
 636                 inQuote = FALSE;
 637             }
 638         } else {
 639             // We have an unquoted character in segment 1..3
 640             switch (ch) {
 641             case COMMA:
 642                 // Commas bump us to the next segment, except for segment 3,
 643                 // which can contain commas.  See example above.
 644                 if (part < 3)
 645                     part += 1;
 646                 else
 647                     segments[3] += ch;
 648                 break;
 649             case LEFT_CURLY_BRACE:
 650                 // Handle '{' within segment 3.  The initial '{'
 651                 // before segment 1 is handled above.
 652                 if (part != 3) {
 653                     ec = U_PATTERN_SYNTAX_ERROR;
 654                     goto SYNTAX_ERROR;
 655                 }
 656                 ++braceStack;
 657                 segments[part] += ch;
 658                 break;
 659             case RIGHT_CURLY_BRACE:
 660                 if (braceStack == 0) {
 661                     makeFormat(formatNumber, segments, parseError,ec);
 662                     if (U_FAILURE(ec)){
 663                         goto SYNTAX_ERROR;
 664                     }
 665                     formatNumber++;
 666                     segments[1].remove();
 667                     segments[2].remove();
 668                     segments[3].remove();
 669                     part = 0;
 670                 } else {
 671                     --braceStack;
 672                     segments[part] += ch;
 673                 }
 674                 break;
 675             case SINGLE_QUOTE:
 676                 inQuote = TRUE;
 677                 // fall through (copy quote chars in segments 1..3)
 678             default:
 679                 segments[part] += ch;
 680                 break;
 681             }
 682         }
 683     }
 684     if (braceStack != 0 || part != 0) {
 685         // Unmatched braces in the pattern
 686         ec = U_UNMATCHED_BRACES;
 687         goto SYNTAX_ERROR;
 688     }
 689     fPattern = segments[0];
 690     return;
 691
 692  SYNTAX_ERROR:
 693     syntaxError(pattern, i, parseError);
 694     for (i=0; i<subformatCount; ++i) {
 695         delete subformats[i].format;
 696     }
 697     argTypeCount = subformatCount = 0;
 698 }
 699 // -------------------------------------
 700 // Converts this MessageFormat instance to a pattern.
 701
 702 UnicodeString&
 703 MessageFormat::toPattern(UnicodeString& appendTo) const {
 704     // later, make this more extensible
 705     int32_t lastOffset = 0;
 706     int32_t i;
 707     for (i=0; i<subformatCount; ++i) {
 708         copyAndFixQuotes(fPattern, lastOffset, subformats[i].offset, appendTo);
 709         lastOffset = subformats[i].offset;
 710         appendTo += LEFT_CURLY_BRACE;
 711         if (isArgNumeric) {
 712             itos(subformats[i].argNum, appendTo);
 713         }
 714         else {
 715             appendTo += *subformats[i].argName;
 716         }
 717         Format* fmt = subformats[i].format;
 718         if (fmt == NULL) {
 719             // do nothing, string format
 720         }
 721         else if (fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
 722
 723             UErrorCode ec = U_ZERO_ERROR;
 724             NumberFormat& formatAlias = *(NumberFormat*)fmt;
 725             NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec);
 726             NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec);
 727             NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec);
 728             NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec);
 729
 730             appendTo += COMMA;
 731             appendTo += ID_NUMBER;
 732             if (formatAlias != *defaultTemplate) {
 733                 appendTo += COMMA;
 734                 if (formatAlias == *currencyTemplate) {
 735                     appendTo += ID_CURRENCY;
 736                 }
 737                 else if (formatAlias == *percentTemplate) {
 738                     appendTo += ID_PERCENT;
 739                 }
 740                 else if (formatAlias == *integerTemplate) {
 741                     appendTo += ID_INTEGER;
 742                 }
 743                 else {
 744                     UnicodeString buffer;
 745                     appendTo += ((DecimalFormat*)fmt)->toPattern(buffer);
 746                 }
 747             }
 748
 749             delete defaultTemplate;
 750             delete currencyTemplate;
 751             delete percentTemplate;
 752             delete integerTemplate;
 753         }
 754         else if (fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
 755             DateFormat& formatAlias = *(DateFormat*)fmt;
 756             DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale);
 757             DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale);
 758             DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale);
 759             DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale);
 760             DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale);
 761             DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale);
 762             DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale);
 763             DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale);
 764
 765
 766             appendTo += COMMA;
 767             if (formatAlias == *defaultDateTemplate) {
 768                 appendTo += ID_DATE;
 769             }
 770             else if (formatAlias == *shortDateTemplate) {
 771                 appendTo += ID_DATE;
 772                 appendTo += COMMA;
 773                 appendTo += ID_SHORT;
 774             }
 775             else if (formatAlias == *defaultDateTemplate) {
 776                 appendTo += ID_DATE;
 777                 appendTo += COMMA;
 778                 appendTo += ID_MEDIUM;
 779             }
 780             else if (formatAlias == *longDateTemplate) {
 781                 appendTo += ID_DATE;
 782                 appendTo += COMMA;
 783                 appendTo += ID_LONG;
 784             }
 785             else if (formatAlias == *fullDateTemplate) {
 786                 appendTo += ID_DATE;
 787                 appendTo += COMMA;
 788                 appendTo += ID_FULL;
 789             }
 790             else if (formatAlias == *defaultTimeTemplate) {
 791                 appendTo += ID_TIME;
 792             }
 793             else if (formatAlias == *shortTimeTemplate) {
 794                 appendTo += ID_TIME;
 795                 appendTo += COMMA;
 796                 appendTo += ID_SHORT;
 797             }
 798             else if (formatAlias == *defaultTimeTemplate) {
 799                 appendTo += ID_TIME;
 800                 appendTo += COMMA;
 801                 appendTo += ID_MEDIUM;
 802             }
 803             else if (formatAlias == *longTimeTemplate) {
 804                 appendTo += ID_TIME;
 805                 appendTo += COMMA;
 806                 appendTo += ID_LONG;
 807             }
 808             else if (formatAlias == *fullTimeTemplate) {
 809                 appendTo += ID_TIME;
 810                 appendTo += COMMA;
 811                 appendTo += ID_FULL;
 812             }
 813             else {
 814                 UnicodeString buffer;
 815                 appendTo += ID_DATE;
 816                 appendTo += COMMA;
 817                 appendTo += ((SimpleDateFormat*)fmt)->toPattern(buffer);
 818             }
 819
 820             delete defaultDateTemplate;
 821             delete shortDateTemplate;
 822             delete longDateTemplate;
 823             delete fullDateTemplate;
 824             delete defaultTimeTemplate;
 825             delete shortTimeTemplate;
 826             delete longTimeTemplate;
 827             delete fullTimeTemplate;
 828             // {sfb} there should be a more efficient way to do this!
 829         }
 830         else if (fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID()) {
 831             UnicodeString buffer;
 832             appendTo += COMMA;
 833             appendTo += ID_CHOICE;
 834             appendTo += COMMA;
 835             appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer);
 836         }
 837         else if (fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) {
 838             UnicodeString buffer;
 839             appendTo += ((PluralFormat*)fmt)->toPattern(buffer);
 840         }
 841         else {
 842             //appendTo += ", unknown";
 843         }
 844         appendTo += RIGHT_CURLY_BRACE;
 845     }
 846     copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo);
 847     return appendTo;
 848 }
 849
 850 // -------------------------------------
 851 // Adopts the new formats array and updates the array count.
 852 // This MessageFormat instance owns the new formats.
 853
 854 void
 855 MessageFormat::adoptFormats(Format** newFormats,
 856                             int32_t count) {
 857     if (newFormats == NULL || count < 0) {
 858         return;
 859     }
 860
 861     int32_t i;
 862     if (allocateSubformats(count)) {
 863         for (i=0; i<subformatCount; ++i) {
 864             delete subformats[i].format;
 865         }
 866         for (i=0; i<count; ++i) {
 867             subformats[i].format = newFormats[i];
 868         }
 869         subformatCount = count;
 870     } else {
 871         // An adopt method must always take ownership.  Delete
 872         // the incoming format objects and return unchanged.
 873         for (i=0; i<count; ++i) {
 874             delete newFormats[i];
 875         }
 876     }
 877
 878     // TODO: What about the .offset and .argNum fields?
 879 }
 880
 881 // -------------------------------------
 882 // Sets the new formats array and updates the array count.
 883 // This MessageFormat instance maks a copy of the new formats.
 884
 885 void
 886 MessageFormat::setFormats(const Format** newFormats,
 887                           int32_t count) {
 888     if (newFormats == NULL || count < 0) {
 889         return;
 890     }
 891
 892     if (allocateSubformats(count)) {
 893         int32_t i;
 894         for (i=0; i<subformatCount; ++i) {
 895             delete subformats[i].format;
 896         }
 897         subformatCount = 0;
 898
 899         for (i=0; i<count; ++i) {
 900             subformats[i].format = newFormats[i] ? newFormats[i]->clone() : NULL;
 901         }
 902         subformatCount = count;
 903     }
 904
 905     // TODO: What about the .offset and .arg fields?
 906 }
 907
 908 // -------------------------------------
 909 // Adopt a single format by format number.
 910 // Do nothing if the format number is not less than the array count.
 911
 912 void
 913 MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
 914     if (n < 0 || n >= subformatCount) {
 915         delete newFormat;
 916     } else {
 917         delete subformats[n].format;
 918         subformats[n].format = newFormat;
 919     }
 920 }
 921
 922 // -------------------------------------
 923 // Adopt a single format by format name.
 924 // Do nothing if there is no match of formatName.
 925 void
 926 MessageFormat::adoptFormat(const UnicodeString& formatName,
 927                            Format* formatToAdopt,
 928                            UErrorCode& status) {
 929     if (isArgNumeric ) {
 930         int32_t argumentNumber = stou(formatName);
 931         if (argumentNumber<0) {
 932             status = U_ARGUMENT_TYPE_MISMATCH;
 933             return;
 934         }
 935         adoptFormat(argumentNumber, formatToAdopt);
 936         return;
 937     }
 938     for (int32_t i=0; i<subformatCount; ++i) {
 939         if (formatName==*subformats[i].argName) {
 940             delete subformats[i].format;
 941             if ( formatToAdopt== NULL) {
 942                 // This should never happen -- but we'll be nice if it does
 943                 subformats[i].format = NULL;
 944             } else {
 945                 subformats[i].format = formatToAdopt;
 946             }
 947         }
 948     }
 949 }
 950
 951 // -------------------------------------
 952 // Set a single format.
 953 // Do nothing if the variable is not less than the array count.
 954
 955 void
 956 MessageFormat::setFormat(int32_t n, const Format& newFormat) {
 957     if (n >= 0 && n < subformatCount) {
 958         delete subformats[n].format;
 959         if (&newFormat == NULL) {
 960             // This should never happen -- but we'll be nice if it does
 961             subformats[n].format = NULL;
 962         } else {
 963             subformats[n].format = newFormat.clone();
 964         }
 965     }
 966 }
 967
 968 // -------------------------------------
 969 // Get a single format by format name.
 970 // Do nothing if the variable is not less than the array count.
 971 Format *
 972 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
 973
 974     if (U_FAILURE(status)) return NULL;
 975
 976     if (isArgNumeric ) {
 977         int32_t argumentNumber = stou(formatName);
 978         if (argumentNumber<0) {
 979             status = U_ARGUMENT_TYPE_MISMATCH;
 980             return NULL;
 981         }
 982         if (argumentNumber < 0 || argumentNumber >= subformatCount) {
 983             return subformats[argumentNumber].format;
 984         }
 985         else {
 986             return NULL;
 987         }
 988     }
 989
 990     for (int32_t i=0; i<subformatCount; ++i) {
 991         if (formatName==*subformats[i].argName)
 992         {
 993             return subformats[i].format;
 994         }
 995     }
 996     return NULL;
 997 }
 998
 999 // -------------------------------------
1000 // Set a single format by format name
1001 // Do nothing if the variable is not less than the array count.
1002 void
1003 MessageFormat::setFormat(const UnicodeString& formatName,
1004                          const Format& newFormat,
1005                          UErrorCode& status) {
1006     if (isArgNumeric) {
1007         status = U_ARGUMENT_TYPE_MISMATCH;
1008         return;
1009     }
1010     for (int32_t i=0; i<subformatCount; ++i) {
1011         if (formatName==*subformats[i].argName)
1012         {
1013             delete subformats[i].format;
1014             if (&newFormat == NULL) {
1015                 // This should never happen -- but we'll be nice if it does
1016                 subformats[i].format = NULL;
1017             } else {
1018                 subformats[i].format = newFormat.clone();
1019             }
1020             break;
1021         }
1022     }
1023 }
1024
1025 // -------------------------------------
1026 // Gets the format array.
1027
1028 const Format**
1029 MessageFormat::getFormats(int32_t& cnt) const
1030 {
1031     // This old API returns an array (which we hold) of Format*
1032     // pointers.  The array is valid up to the next call to any
1033     // method on this object.  We construct and resize an array
1034     // on demand that contains aliases to the subformats[i].format
1035     // pointers.
1036     MessageFormat* t = (MessageFormat*) this;
1037     cnt = 0;
1038     if (formatAliases == NULL) {
1039         t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount;
1040         Format** a = (Format**)
1041             uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
1042         if (a == NULL) {
1043             return NULL;
1044         }
1045         t->formatAliases = a;
1046     } else if (subformatCount > formatAliasesCapacity) {
1047         Format** a = (Format**)
1048             uprv_realloc(formatAliases, sizeof(Format*) * subformatCount);
1049         if (a == NULL) {
1050             return NULL;
1051         }
1052         t->formatAliases = a;
1053         t->formatAliasesCapacity = subformatCount;
1054     }
1055     for (int32_t i=0; i<subformatCount; ++i) {
1056         t->formatAliases[i] = subformats[i].format;
1057     }
1058     cnt = subformatCount;
1059     return (const Format**)formatAliases;
1060 }
1061
1062
1063 StringEnumeration*
1064 MessageFormat::getFormatNames(UErrorCode& status) {
1065     if (U_FAILURE(status))  return NULL;
1066
1067     if (isArgNumeric) {
1068         status = U_ARGUMENT_TYPE_MISMATCH;
1069         return NULL;
1070     }
1071     UVector *fFormatNames = new UVector(status);
1072     if (U_FAILURE(status)) {
1073         status = U_MEMORY_ALLOCATION_ERROR;
1074         return NULL;
1075     }
1076     for (int32_t i=0; i<subformatCount; ++i) {
1077         fFormatNames->addElement(new UnicodeString(*subformats[i].argName), status);
1078     }
1079
1080     StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status);
1081     return nameEnumerator;
1082 }
1083
1084 // -------------------------------------
1085 // Formats the source Formattable array and copy into the result buffer.
1086 // Ignore the FieldPosition result for error checking.
1087
1088 UnicodeString&
1089 MessageFormat::format(const Formattable* source,
1090                       int32_t cnt,
1091                       UnicodeString& appendTo,
1092                       FieldPosition& ignore,
1093                       UErrorCode& success) const
1094 {
1095     if (U_FAILURE(success))
1096         return appendTo;
1097
1098     return format(source, cnt, appendTo, ignore, 0, success);
1099 }
1100
1101 // -------------------------------------
1102 // Internally creates a MessageFormat instance based on the
1103 // pattern and formats the arguments Formattable array and
1104 // copy into the appendTo buffer.
1105
1106 UnicodeString&
1107 MessageFormat::format(  const UnicodeString& pattern,
1108                         const Formattable* arguments,
1109                         int32_t cnt,
1110                         UnicodeString& appendTo,
1111                         UErrorCode& success)
1112 {
1113     MessageFormat temp(pattern, success);
1114     FieldPosition ignore(0);
1115     temp.format(arguments, cnt, appendTo, ignore, success);
1116     return appendTo;
1117 }
1118
1119 // -------------------------------------
1120 // Formats the source Formattable object and copy into the
1121 // appendTo buffer.  The Formattable object must be an array
1122 // of Formattable instances, returns error otherwise.
1123
1124 UnicodeString&
1125 MessageFormat::format(const Formattable& source,
1126                       UnicodeString& appendTo,
1127                       FieldPosition& ignore,
1128                       UErrorCode& success) const
1129 {
1130     int32_t cnt;
1131
1132     if (U_FAILURE(success))
1133         return appendTo;
1134     if (source.getType() != Formattable::kArray) {
1135         success = U_ILLEGAL_ARGUMENT_ERROR;
1136         return appendTo;
1137     }
1138     const Formattable* tmpPtr = source.getArray(cnt);
1139
1140     return format(tmpPtr, cnt, appendTo, ignore, 0, success);
1141 }
1142
1143
1144 UnicodeString&
1145 MessageFormat::format(const UnicodeString* argumentNames,
1146                       const Formattable* arguments,
1147                       int32_t count,
1148                       UnicodeString& appendTo,
1149                       UErrorCode& success) const {
1150     FieldPosition ignore(0);
1151     return format(arguments, argumentNames, count, appendTo, ignore, 0, success);
1152 }
1153
1154 UnicodeString&
1155 MessageFormat::format(const Formattable* arguments,
1156                       int32_t cnt,
1157                       UnicodeString& appendTo,
1158                       FieldPosition& status,
1159                       int32_t recursionProtection,
1160                       UErrorCode& success) const
1161 {
1162     return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
1163 }
1164
1165 // -------------------------------------
1166 // Formats the arguments Formattable array and copy into the appendTo buffer.
1167 // Ignore the FieldPosition result for error checking.
1168
1169 UnicodeString&
1170 MessageFormat::format(const Formattable* arguments,
1171                       const UnicodeString *argumentNames,
1172                       int32_t cnt,
1173                       UnicodeString& appendTo,
1174                       FieldPosition& status,
1175                       int32_t recursionProtection,
1176                       UErrorCode& success) const
1177 {
1178     int32_t lastOffset = 0;
1179     int32_t argumentNumber=0;
1180     if (cnt < 0 || (cnt && arguments == NULL)) {
1181         success = U_ILLEGAL_ARGUMENT_ERROR;
1182         return appendTo;
1183     }
1184
1185     if ( !isArgNumeric && argumentNames== NULL ) {
1186         success = U_ILLEGAL_ARGUMENT_ERROR;
1187         return appendTo;
1188     }
1189
1190     const Formattable *obj=NULL;
1191     for (int32_t i=0; i<subformatCount; ++i) {
1192         // Append the prefix of current format element.
1193         appendTo.append(fPattern, lastOffset, subformats[i].offset - lastOffset);
1194         lastOffset = subformats[i].offset;
1195         obj = NULL;
1196         if (isArgNumeric) {
1197             argumentNumber = subformats[i].argNum;
1198
1199             // Checks the scope of the argument number.
1200             if (argumentNumber >= cnt) {
1201                 appendTo += LEFT_CURLY_BRACE;
1202                 itos(argumentNumber, appendTo);
1203                 appendTo += RIGHT_CURLY_BRACE;
1204                 continue;
1205             }
1206             obj = arguments+argumentNumber;
1207         }
1208         else {
1209             for (int32_t j=0; j<cnt; ++j) {
1210                 if (argumentNames[j]== *subformats[i].argName ) {
1211                     obj = arguments+j;
1212                     break;
1213                 }
1214             }
1215             if (obj == NULL ) {
1216                 appendTo += LEFT_CURLY_BRACE;
1217                 appendTo += *subformats[i].argName;
1218                 appendTo += RIGHT_CURLY_BRACE;
1219                 continue;
1220
1221             }
1222         }
1223         Formattable::Type type = obj->getType();
1224
1225         // Recursively calling the format process only if the current
1226         // format argument refers to a ChoiceFormat object.
1227         Format* fmt = subformats[i].format;
1228         if (fmt != NULL) {
1229             UnicodeString argNum;
1230             fmt->format(*obj, argNum, success);
1231
1232             // Needs to reprocess the ChoiceFormat option by using the
1233             // MessageFormat pattern application.
1234             if ((fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() ||
1235                  fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) &&
1236                 argNum.indexOf(LEFT_CURLY_BRACE) >= 0) {
1237                 MessageFormat temp(argNum, fLocale, success);
1238                 // TODO: Implement recursion protection
1239                 if ( isArgNumeric ) {
1240                     temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success);
1241                 }
1242                 else {
1243                     temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success);
1244                 }
1245                 if (U_FAILURE(success)) {
1246                     return appendTo;
1247                 }
1248             }
1249             else {
1250                 appendTo += argNum;
1251             }
1252         }
1253         // If the obj data type is a number, use a NumberFormat instance.
1254         else if ((type == Formattable::kDouble) ||
1255                  (type == Formattable::kLong) ||
1256                  (type == Formattable::kInt64)) {
1257
1258             const NumberFormat* nf = getDefaultNumberFormat(success);
1259             if (nf == NULL) {
1260                 return appendTo;
1261             }
1262             if (type == Formattable::kDouble) {
1263                 nf->format(obj->getDouble(), appendTo);
1264             } else if (type == Formattable::kLong) {
1265                 nf->format(obj->getLong(), appendTo);
1266             } else {
1267                 nf->format(obj->getInt64(), appendTo);
1268             }
1269         }
1270         // If the obj data type is a Date instance, use a DateFormat instance.
1271         else if (type == Formattable::kDate) {
1272             const DateFormat* df = getDefaultDateFormat(success);
1273             if (df == NULL) {
1274                 return appendTo;
1275             }
1276             df->format(obj->getDate(), appendTo);
1277         }
1278         else if (type == Formattable::kString) {
1279             appendTo += obj->getString();
1280         }
1281         else {
1282             success = U_ILLEGAL_ARGUMENT_ERROR;
1283             return appendTo;
1284         }
1285     }
1286     // Appends the rest of the pattern characters after the real last offset.
1287     appendTo.append(fPattern, lastOffset, 0x7fffffff);
1288     return appendTo;
1289 }
1290
1291
1292 // -------------------------------------
1293 // Parses the source pattern and returns the Formattable objects array,
1294 // the array count and the ending parse position.  The caller of this method
1295 // owns the array.
1296
1297 Formattable*
1298 MessageFormat::parse(const UnicodeString& source,
1299                      ParsePosition& pos,
1300                      int32_t& count) const
1301 {
1302     // Allocate at least one element.  Allocating an array of length
1303     // zero causes problems on some platforms (e.g. Win32).
1304     Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1];
1305     int32_t patternOffset = 0;
1306     int32_t sourceOffset = pos.getIndex();
1307     ParsePosition tempPos(0);
1308     count = 0; // {sfb} reset to zero
1309     int32_t len;
1310     // If resultArray could not be created, exit out.
1311     // Avoid crossing initialization of variables above.
1312     if (resultArray == NULL) {
1313         goto PARSE_ERROR;
1314     }
1315     for (int32_t i = 0; i < subformatCount; ++i) {
1316         // match up to format
1317         len = subformats[i].offset - patternOffset;
1318         if (len == 0 ||
1319             fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
1320             sourceOffset += len;
1321             patternOffset += len;
1322         }
1323         else {
1324             goto PARSE_ERROR;
1325         }
1326
1327         // now use format
1328         Format* fmt = subformats[i].format;
1329         int32_t argNum = subformats[i].argNum;
1330         if (fmt == NULL) {   // string format
1331             // if at end, use longest possible match
1332             // otherwise uses first match to intervening string
1333             // does NOT recursively try all possibilities
1334             int32_t tempLength = (i+1<subformatCount) ?
1335                 subformats[i+1].offset : fPattern.length();
1336
1337             int32_t next;
1338             if (patternOffset >= tempLength) {
1339                 next = source.length();
1340             }
1341             else {
1342                 UnicodeString buffer;
1343                 fPattern.extract(patternOffset,tempLength - patternOffset, buffer);
1344                 next = source.indexOf(buffer, sourceOffset);
1345             }
1346
1347             if (next < 0) {
1348                 goto PARSE_ERROR;
1349             }
1350             else {
1351                 UnicodeString buffer;
1352                 source.extract(sourceOffset,next - sourceOffset, buffer);
1353                 UnicodeString strValue = buffer;
1354                 UnicodeString temp(LEFT_CURLY_BRACE);
1355                 // {sfb} check this later
1356                 if (isArgNumeric) {
1357                     itos(argNum, temp);
1358                 }
1359                 else {
1360                     temp+=(*subformats[i].argName);
1361                 }
1362                 temp += RIGHT_CURLY_BRACE;
1363                 if (strValue != temp) {
1364                     source.extract(sourceOffset,next - sourceOffset, buffer);
1365                     resultArray[argNum].setString(buffer);
1366                     // {sfb} not sure about this
1367                     if ((argNum + 1) > count) {
1368                         count = argNum + 1;
1369                     }
1370                 }
1371                 sourceOffset = next;
1372             }
1373         }
1374         else {
1375             tempPos.setIndex(sourceOffset);
1376             fmt->parseObject(source, resultArray[argNum], tempPos);
1377             if (tempPos.getIndex() == sourceOffset) {
1378                 goto PARSE_ERROR;
1379             }
1380
1381             if ((argNum + 1) > count) {
1382                 count = argNum + 1;
1383             }
1384             sourceOffset = tempPos.getIndex(); // update
1385         }
1386     }
1387     len = fPattern.length() - patternOffset;
1388     if (len == 0 ||
1389         fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) {
1390         pos.setIndex(sourceOffset + len);
1391         return resultArray;
1392     }
1393     // else fall through...
1394
1395  PARSE_ERROR:
1396     pos.setErrorIndex(sourceOffset);
1397     delete [] resultArray;
1398     count = 0;
1399     return NULL; // leave index as is to signal error
1400 }
1401
1402 // -------------------------------------
1403 // Parses the source string and returns the array of
1404 // Formattable objects and the array count.  The caller
1405 // owns the returned array.
1406
1407 Formattable*
1408 MessageFormat::parse(const UnicodeString& source,
1409                      int32_t& cnt,
1410                      UErrorCode& success) const
1411 {
1412     if (!isArgNumeric ) {
1413         success = U_ARGUMENT_TYPE_MISMATCH;
1414         return NULL;
1415     }
1416     ParsePosition status(0);
1417     // Calls the actual implementation method and starts
1418     // from zero offset of the source text.
1419     Formattable* result = parse(source, status, cnt);
1420     if (status.getIndex() == 0) {
1421         success = U_MESSAGE_PARSE_ERROR;
1422         delete[] result;
1423         return NULL;
1424     }
1425     return result;
1426 }
1427
1428 // -------------------------------------
1429 // Parses the source text and copy into the result buffer.
1430
1431 void
1432 MessageFormat::parseObject( const UnicodeString& source,
1433                             Formattable& result,
1434                             ParsePosition& status) const
1435 {
1436     int32_t cnt = 0;
1437     Formattable* tmpResult = parse(source, status, cnt);
1438     if (tmpResult != NULL)
1439         result.adoptArray(tmpResult, cnt);
1440 }
1441
1442 UnicodeString
1443 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
1444   UnicodeString result;
1445   if (U_SUCCESS(status)) {
1446     int32_t plen = pattern.length();
1447     const UChar* pat = pattern.getBuffer();
1448     int32_t blen = plen * 2 + 1; // space for null termination, convenience
1449     UChar* buf = result.getBuffer(blen);
1450     if (buf == NULL) {
1451       status = U_MEMORY_ALLOCATION_ERROR;
1452     } else {
1453       int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
1454       result.releaseBuffer(U_SUCCESS(status) ? len : 0);
1455     }
1456   }
1457   if (U_FAILURE(status)) {
1458     result.setToBogus();
1459   }
1460   return result;
1461 }
1462
1463 // -------------------------------------
1464
1465 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
1466     RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
1467     if (fmt == NULL) {
1468         ec = U_MEMORY_ALLOCATION_ERROR;
1469     } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
1470         UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
1471         fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
1472     }
1473     return fmt;
1474 }
1475
1476 /**
1477  * Reads the segments[] array (see applyPattern()) and parses the
1478  * segments[1..3] into a Format* object.  Stores the format object in
1479  * the subformats[] array.  Updates the argTypes[] array type
1480  * information for the corresponding argument.
1481  *
1482  * @param formatNumber index into subformats[] for this format
1483  * @param segments array of strings with the parsed pattern segments
1484  * @param parseError parse error data (output param)
1485  * @param ec error code
1486  */
1487 void
1488 MessageFormat::makeFormat(int32_t formatNumber,
1489                           UnicodeString* segments,
1490                           UParseError& parseError,
1491                           UErrorCode& ec) {
1492     if (U_FAILURE(ec)) {
1493         return;
1494     }
1495
1496     // Parse the argument number
1497     int32_t argumentNumber = stou(segments[1]); // always unlocalized!
1498     UnicodeString argumentName;
1499     if (argumentNumber < 0) {
1500         if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) {
1501             ec = U_INVALID_FORMAT_ERROR;
1502             return;
1503         }
1504         isArgNumeric = FALSE;
1505         argumentNumber=formatNumber;
1506     }
1507     if (!isArgNumeric) {
1508         if ( !isLegalArgName(segments[1]) ) {
1509             ec = U_INVALID_FORMAT_ERROR;
1510             return;
1511         }
1512         argumentName = segments[1];
1513     }
1514
1515     // Parse the format, recording the argument type and creating a
1516     // new Format object (except for string arguments).
1517     Formattable::Type argType;
1518     Format *fmt = NULL;
1519     int32_t typeID, styleID;
1520     DateFormat::EStyle style;
1521     UnicodeString unquotedPattern, quotedPattern;
1522     UBool inQuote = FALSE;
1523
1524     switch (typeID = findKeyword(segments[2], TYPE_IDS)) {
1525
1526     case 0: // string
1527         argType = Formattable::kString;
1528         break;
1529
1530     case 1: // number
1531         argType = Formattable::kDouble;
1532
1533         switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) {
1534         case 0: // default
1535             fmt = NumberFormat::createInstance(fLocale, ec);
1536             break;
1537         case 1: // currency
1538             fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
1539             break;
1540         case 2: // percent
1541             fmt = NumberFormat::createPercentInstance(fLocale, ec);
1542             break;
1543         case 3: // integer
1544             argType = Formattable::kLong;
1545             fmt = createIntegerFormat(fLocale, ec);
1546             break;
1547         default: // pattern
1548             fmt = NumberFormat::createInstance(fLocale, ec);
1549             if (fmt &&
1550                 fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
1551                 ((DecimalFormat*)fmt)->applyPattern(segments[3],parseError,ec);
1552             }
1553             break;
1554         }
1555         break;
1556
1557     case 2: // date
1558     case 3: // time
1559         argType = Formattable::kDate;
1560         styleID = findKeyword(segments[3], DATE_STYLE_IDS);
1561         style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
1562
1563         if (typeID == 2) {
1564             fmt = DateFormat::createDateInstance(style, fLocale);
1565         } else {
1566             fmt = DateFormat::createTimeInstance(style, fLocale);
1567         }
1568
1569         if (styleID < 0 &&
1570             fmt != NULL &&
1571             fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
1572             ((SimpleDateFormat*)fmt)->applyPattern(segments[3]);
1573         }
1574         break;
1575
1576     case 4: // choice
1577         argType = Formattable::kDouble;
1578
1579         fmt = new ChoiceFormat(segments[3], parseError, ec);
1580         break;
1581
1582     case 5: // spellout
1583         argType = Formattable::kDouble;
1584         fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec);
1585         break;
1586     case 6: // ordinal
1587         argType = Formattable::kDouble;
1588         fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec);
1589         break;
1590     case 7: // duration
1591         argType = Formattable::kDouble;
1592         fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec);
1593         break;
1594     case 8: // plural
1595         argType = Formattable::kDouble;
1596         quotedPattern = segments[3];
1597         for (int32_t i = 0; i < quotedPattern.length(); ++i) {
1598             UChar ch = quotedPattern.charAt(i);
1599             if (ch == SINGLE_QUOTE) {
1600                 if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) {
1601                     unquotedPattern+=ch;
1602                     ++i;
1603                 }
1604                 else {
1605                     inQuote = !inQuote;
1606                 }
1607             }
1608             else {
1609                 unquotedPattern += ch;
1610             }
1611         }
1612         fmt = new PluralFormat(fLocale, unquotedPattern, ec);
1613         break;
1614     default:
1615         argType = Formattable::kString;
1616         ec = U_ILLEGAL_ARGUMENT_ERROR;
1617         break;
1618     }
1619
1620     if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) {
1621         ec = U_MEMORY_ALLOCATION_ERROR;
1622     }
1623
1624     if (!allocateSubformats(formatNumber+1) ||
1625         !allocateArgTypes(argumentNumber+1)) {
1626         ec = U_MEMORY_ALLOCATION_ERROR;
1627     }
1628
1629     if (U_FAILURE(ec)) {
1630         delete fmt;
1631         return;
1632     }
1633
1634     // Parse succeeded; record results in our arrays
1635     subformats[formatNumber].format = fmt;
1636     subformats[formatNumber].offset = segments[0].length();
1637     if (isArgNumeric) {
1638         subformats[formatNumber].argName = NULL;
1639         subformats[formatNumber].argNum = argumentNumber;
1640     }
1641     else {
1642         subformats[formatNumber].argName = new UnicodeString(argumentName);
1643         subformats[formatNumber].argNum = -1;
1644     }
1645     subformatCount = formatNumber+1;
1646
1647     // Careful here: argumentNumber may in general arrive out of
1648     // sequence, e.g., "There was {2} on {0,date} (see {1,number})."
1649     argTypes[argumentNumber] = argType;
1650     if (argumentNumber+1 > argTypeCount) {
1651         argTypeCount = argumentNumber+1;
1652     }
1653 }
1654
1655 // -------------------------------------
1656 // Finds the string, s, in the string array, list.
1657 int32_t MessageFormat::findKeyword(const UnicodeString& s,
1658                                    const UChar * const *list)
1659 {
1660     if (s.length() == 0)
1661         return 0; // default
1662
1663     UnicodeString buffer = s;
1664     // Trims the space characters and turns all characters
1665     // in s to lower case.
1666     buffer.trim().toLower("");
1667     for (int32_t i = 0; list[i]; ++i) {
1668         if (!buffer.compare(list[i], u_strlen(list[i]))) {
1669             return i;
1670         }
1671     }
1672     return -1;
1673 }
1674
1675 // -------------------------------------
1676 // Checks the range of the source text to quote the special
1677 // characters, { and ' and copy to target buffer.
1678
1679 void
1680 MessageFormat::copyAndFixQuotes(const UnicodeString& source,
1681                                 int32_t start,
1682                                 int32_t end,
1683                                 UnicodeString& appendTo)
1684 {
1685     UBool gotLB = FALSE;
1686
1687     for (int32_t i = start; i < end; ++i) {
1688         UChar ch = source[i];
1689         if (ch == LEFT_CURLY_BRACE) {
1690             appendTo += SINGLE_QUOTE;
1691             appendTo += LEFT_CURLY_BRACE;
1692             appendTo += SINGLE_QUOTE;
1693             gotLB = TRUE;
1694         }
1695         else if (ch == RIGHT_CURLY_BRACE) {
1696             if(gotLB) {
1697                 appendTo += RIGHT_CURLY_BRACE;
1698                 gotLB = FALSE;
1699             }
1700             else {
1701                 // orig code.
1702                 appendTo += SINGLE_QUOTE;
1703                 appendTo += RIGHT_CURLY_BRACE;
1704                 appendTo += SINGLE_QUOTE;
1705             }
1706         }
1707         else if (ch == SINGLE_QUOTE) {
1708             appendTo += SINGLE_QUOTE;
1709             appendTo += SINGLE_QUOTE;
1710         }
1711         else {
1712             appendTo += ch;
1713         }
1714     }
1715 }
1716
1717 /**
1718  * Convenience method that ought to be in NumberFormat
1719  */
1720 NumberFormat*
1721 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
1722     NumberFormat *temp = NumberFormat::createInstance(locale, status);
1723     if (temp != NULL && temp->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
1724         DecimalFormat *temp2 = (DecimalFormat*) temp;
1725         temp2->setMaximumFractionDigits(0);
1726         temp2->setDecimalSeparatorAlwaysShown(FALSE);
1727         temp2->setParseIntegerOnly(TRUE);
1728     }
1729
1730     return temp;
1731 }
1732
1733 /**
1734  * Return the default number format.  Used to format a numeric
1735  * argument when subformats[i].format is NULL.  Returns NULL
1736  * on failure.
1737  *
1738  * Semantically const but may modify *this.
1739  */
1740 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
1741     if (defaultNumberFormat == NULL) {
1742         MessageFormat* t = (MessageFormat*) this;
1743         t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
1744         if (U_FAILURE(ec)) {
1745             delete t->defaultNumberFormat;
1746             t->defaultNumberFormat = NULL;
1747         } else if (t->defaultNumberFormat == NULL) {
1748             ec = U_MEMORY_ALLOCATION_ERROR;
1749         }
1750     }
1751     return defaultNumberFormat;
1752 }
1753
1754 /**
1755  * Return the default date format.  Used to format a date
1756  * argument when subformats[i].format is NULL.  Returns NULL
1757  * on failure.
1758  *
1759  * Semantically const but may modify *this.
1760  */
1761 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
1762     if (defaultDateFormat == NULL) {
1763         MessageFormat* t = (MessageFormat*) this;
1764         t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
1765         if (t->defaultDateFormat == NULL) {
1766             ec = U_MEMORY_ALLOCATION_ERROR;
1767         }
1768     }
1769     return defaultDateFormat;
1770 }
1771
1772 UBool
1773 MessageFormat::usesNamedArguments() const {
1774     return !isArgNumeric;
1775 }
1776
1777 UBool
1778 MessageFormat::isLegalArgName(const UnicodeString& argName) const {
1779     if(!u_hasBinaryProperty(argName.charAt(0), idStart)) {
1780         return FALSE;
1781     }
1782     for (int32_t i=1; i<argName.length(); ++i) {
1783         if(!u_hasBinaryProperty(argName.charAt(i), idContinue)) {
1784             return FALSE;
1785         }
1786     }
1787     return TRUE;
1788 }
1789
1790 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) {
1791     pos=0;
1792     fFormatNames = fNameList;
1793 }
1794
1795 const UnicodeString*
1796 FormatNameEnumeration::snext(UErrorCode& status) {
1797     if (U_SUCCESS(status) && pos < fFormatNames->size()) {
1798         return (const UnicodeString*)fFormatNames->elementAt(pos++);
1799     }
1800     return NULL;
1801 }
1802
1803 void
1804 FormatNameEnumeration::reset(UErrorCode& /*status*/) {
1805     pos=0;
1806 }
1807
1808 int32_t
1809 FormatNameEnumeration::count(UErrorCode& /*status*/) const {
1810        return (fFormatNames==NULL) ? 0 : fFormatNames->size();
1811 }
1812
1813 FormatNameEnumeration::~FormatNameEnumeration() {
1814     UnicodeString *s;
1815     for (int32_t i=0; i<fFormatNames->size(); ++i) {
1816         if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) {
1817             delete s;
1818         }
1819     }
1820     delete fFormatNames;
1821 }
1822 U_NAMESPACE_END
1823
1824 #endif /* #if !UCONFIG_NO_FORMATTING */
1825
1826 //eof