icuSources/i18n/smpdtfmt.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 1997-2010, International Business Machines Corporation and    *
   4 * others. All Rights Reserved.                                                *
   5 *******************************************************************************
   6 *
   7 * File SMPDTFMT.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   02/19/97    aliu        Converted from java.
  13 *   03/31/97    aliu        Modified extensively to work with 50 locales.
  14 *   04/01/97    aliu        Added support for centuries.
  15 *   07/09/97    helena      Made ParsePosition into a class.
  16 *   07/21/98    stephen     Added initializeDefaultCentury.
  17 *                             Removed getZoneIndex (added in DateFormatSymbols)
  18 *                             Removed subParseLong
  19 *                             Removed chk
  20 *  02/22/99     stephen     Removed character literals for EBCDIC safety
  21 *   10/14/99    aliu        Updated 2-digit year parsing so that only "00" thru
  22 *                           "99" are recognized. {j28 4182066}
  23 *   11/15/99    weiv        Added support for week of year/day of week format
  24 ********************************************************************************
  25 */
  26
  27 #define ZID_KEY_MAX 128
  28
  29 #include "unicode/utypes.h"
  30
  31 #if !UCONFIG_NO_FORMATTING
  32
  33 #include "unicode/smpdtfmt.h"
  34 #include "unicode/dtfmtsym.h"
  35 #include "unicode/ures.h"
  36 #include "unicode/msgfmt.h"
  37 #include "unicode/calendar.h"
  38 #include "unicode/gregocal.h"
  39 #include "unicode/timezone.h"
  40 #include "unicode/decimfmt.h"
  41 #include "unicode/dcfmtsym.h"
  42 #include "unicode/uchar.h"
  43 #include "unicode/uniset.h"
  44 #include "unicode/ustring.h"
  45 #include "unicode/basictz.h"
  46 #include "unicode/simpletz.h"
  47 #include "unicode/rbtz.h"
  48 #include "unicode/vtzone.h"
  49 #include "olsontz.h"
  50 #include "util.h"
  51 #include "fphdlimp.h"
  52 #include "gregoimp.h"
  53 #include "hebrwcal.h"
  54 #include "cstring.h"
  55 #include "uassert.h"
  56 #include "zstrfmt.h"
  57 #include "cmemory.h"
  58 #include "umutex.h"
  59 #include "smpdtfst.h"
  60 #include <float.h>
  61
  62 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
  63 #include <stdio.h>
  64 #endif
  65
  66 // *****************************************************************************
  67 // class SimpleDateFormat
  68 // *****************************************************************************
  69
  70 U_NAMESPACE_BEGIN
  71
  72 static const UChar PATTERN_CHAR_BASE = 0x40;
  73
  74 /**
  75  * Last-resort string to use for "GMT" when constructing time zone strings.
  76  */
  77 // For time zones that have no names, use strings GMT+minutes and
  78 // GMT-minutes. For instance, in France the time zone is GMT+60.
  79 // Also accepted are GMT+H:MM or GMT-H:MM.
  80 static const UChar gGmt[]      = {0x0047, 0x004D, 0x0054, 0x0000};         // "GMT"
  81 static const UChar gGmtPlus[]  = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
  82 static const UChar gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
  83 static const UChar gDefGmtPat[]       = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */
  84 static const UChar gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */
  85 static const UChar gDefGmtNegHmPat[]  = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */
  86 static const UChar gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */
  87 static const UChar gDefGmtPosHmPat[]  = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */
  88 static const UChar gUt[]       = {0x0055, 0x0054, 0x0000};  // "UT"
  89 static const UChar gUtc[]      = {0x0055, 0x0054, 0x0043, 0x0000};  // "UT"
  90
  91 typedef enum GmtPatSize {
  92     kGmtLen = 3,
  93     kGmtPatLen = 6,
  94     kNegHmsLen = 9,
  95     kNegHmLen = 6,
  96     kPosHmsLen = 9,
  97     kPosHmLen = 6,
  98     kUtLen = 2,
  99     kUtcLen = 3
 100 } GmtPatSize;
 101
 102 // Stuff needed for numbering system overrides
 103
 104 typedef enum OvrStrType {
 105     kOvrStrDate = 0,
 106     kOvrStrTime = 1,
 107     kOvrStrBoth = 2
 108 } OvrStrType;
 109
 110 static const UDateFormatField kDateFields[] = {
 111     UDAT_YEAR_FIELD,
 112     UDAT_MONTH_FIELD,
 113     UDAT_DATE_FIELD,
 114     UDAT_DAY_OF_YEAR_FIELD,
 115     UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
 116     UDAT_WEEK_OF_YEAR_FIELD,
 117     UDAT_WEEK_OF_MONTH_FIELD,
 118     UDAT_YEAR_WOY_FIELD,
 119     UDAT_EXTENDED_YEAR_FIELD,
 120     UDAT_JULIAN_DAY_FIELD,
 121     UDAT_STANDALONE_DAY_FIELD,
 122     UDAT_STANDALONE_MONTH_FIELD,
 123     UDAT_QUARTER_FIELD,
 124     UDAT_STANDALONE_QUARTER_FIELD };
 125 static const int8_t kDateFieldsCount = 13;
 126
 127 static const UDateFormatField kTimeFields[] = {
 128     UDAT_HOUR_OF_DAY1_FIELD,
 129     UDAT_HOUR_OF_DAY0_FIELD,
 130     UDAT_MINUTE_FIELD,
 131     UDAT_SECOND_FIELD,
 132     UDAT_FRACTIONAL_SECOND_FIELD,
 133     UDAT_HOUR1_FIELD,
 134     UDAT_HOUR0_FIELD,
 135     UDAT_MILLISECONDS_IN_DAY_FIELD,
 136     UDAT_TIMEZONE_RFC_FIELD };
 137 static const int8_t kTimeFieldsCount = 9;
 138
 139
 140 // This is a pattern-of-last-resort used when we can't load a usable pattern out
 141 // of a resource.
 142 static const UChar gDefaultPattern[] =
 143 {
 144     0x79, 0x79, 0x79, 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
 145 };  /* "yyyyMMdd hh:mm a" */
 146
 147 // This prefix is designed to NEVER MATCH real text, in order to
 148 // suppress the parsing of negative numbers.  Adjust as needed (if
 149 // this becomes valid Unicode).
 150 static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
 151
 152 /**
 153  * These are the tags we expect to see in normal resource bundle files associated
 154  * with a locale.
 155  */
 156 static const char gDateTimePatternsTag[]="DateTimePatterns";
 157
 158 static const UChar gEtcUTC[] = {0x45, 0x74, 0x63, 0x2F, 0x55, 0x54, 0x43, 0x00}; // "Etc/UTC"
 159 static const UChar QUOTE = 0x27; // Single quote
 160
 161 /*
 162  * The field range check bias for each UDateFormatField.
 163  * The bias is added to the minimum and maximum values
 164  * before they are compared to the parsed number.
 165  * For example, the calendar stores zero-based month numbers
 166  * but the parsed month numbers start at 1, so the bias is 1.
 167  *
 168  * A value of -1 means that the value is not checked.
 169  */
 170 static const int32_t gFieldRangeBias[] = {
 171     -1,  // 'G' - UDAT_ERA_FIELD
 172     -1,  // 'y' - UDAT_YEAR_FIELD
 173      1,  // 'M' - UDAT_MONTH_FIELD
 174      0,  // 'd' - UDAT_DATE_FIELD
 175     -1,  // 'k' - UDAT_HOUR_OF_DAY1_FIELD
 176     -1,  // 'H' - UDAT_HOUR_OF_DAY0_FIELD
 177      0,  // 'm' - UDAT_MINUTE_FIELD
 178      0,  // 's' - UDAT_SEOND_FIELD
 179     -1,  // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
 180     -1,  // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
 181     -1,  // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
 182     -1,  // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
 183     -1,  // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
 184     -1,  // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
 185     -1,  // 'a' - UDAT_AM_PM_FIELD
 186     -1,  // 'h' - UDAT_HOUR1_FIELD
 187     -1,  // 'K' - UDAT_HOUR0_FIELD
 188     -1,  // 'z' - UDAT_TIMEZONE_FIELD
 189     -1,  // 'Y' - UDAT_YEAR_WOY_FIELD
 190     -1,  // 'e' - UDAT_DOW_LOCAL_FIELD
 191     -1,  // 'u' - UDAT_EXTENDED_YEAR_FIELD
 192     -1,  // 'g' - UDAT_JULIAN_DAY_FIELD
 193     -1,  // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
 194     -1,  // 'Z' - UDAT_TIMEZONE_RFC_FIELD
 195     -1,  // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
 196      0,  // 'c' - UDAT_STANDALONE_DAY_FIELD
 197      1,  // 'L' - UDAT_STANDALONE_MONTH_FIELD
 198     -1,  // 'Q' - UDAT_QUARTER_FIELD (1-4?)
 199     -1,  // 'q' - UDAT_STANDALONE_QUARTER_FIELD
 200     -1   // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
 201 };
 202
 203 static UMTX LOCK;
 204
 205 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
 206
 207 //----------------------------------------------------------------------
 208
 209 SimpleDateFormat::~SimpleDateFormat()
 210 {
 211     delete fSymbols;
 212     if (fGMTFormatters) {
 213         for (int32_t i = 0; i < kNumGMTFormatters; i++) {
 214             if (fGMTFormatters[i]) {
 215                 delete fGMTFormatters[i];
 216             }
 217         }
 218         uprv_free(fGMTFormatters);
 219
 220     }
 221     if (fNumberFormatters) {
 222         uprv_free(fNumberFormatters);
 223     }
 224
 225     while (fOverrideList) {
 226         NSOverride *cur = fOverrideList;
 227         fOverrideList = cur->next;
 228         delete cur->nf;
 229         uprv_free(cur);
 230     }
 231 }
 232
 233 //----------------------------------------------------------------------
 234
 235 SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
 236   :   fLocale(Locale::getDefault()),
 237       fSymbols(NULL),
 238       fGMTFormatters(NULL),
 239       fNumberFormatters(NULL),
 240       fOverrideList(NULL)
 241 {
 242     construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
 243     initializeDefaultCentury();
 244 }
 245
 246 //----------------------------------------------------------------------
 247
 248 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 249                                    UErrorCode &status)
 250 :   fPattern(pattern),
 251     fLocale(Locale::getDefault()),
 252     fSymbols(NULL),
 253     fGMTFormatters(NULL),
 254     fNumberFormatters(NULL),
 255     fOverrideList(NULL)
 256 {
 257     fDateOverride.setToBogus();
 258     fTimeOverride.setToBogus();
 259     initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
 260     initialize(fLocale, status);
 261     initializeDefaultCentury();
 262
 263 }
 264 //----------------------------------------------------------------------
 265
 266 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 267                                    const UnicodeString& override,
 268                                    UErrorCode &status)
 269 :   fPattern(pattern),
 270     fLocale(Locale::getDefault()),
 271     fSymbols(NULL),
 272     fGMTFormatters(NULL),
 273     fNumberFormatters(NULL),
 274     fOverrideList(NULL)
 275 {
 276     fDateOverride.setTo(override);
 277     fTimeOverride.setToBogus();
 278     initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
 279     initialize(fLocale, status);
 280     initializeDefaultCentury();
 281
 282     processOverrideString(fLocale,override,kOvrStrBoth,status);
 283
 284 }
 285
 286 //----------------------------------------------------------------------
 287
 288 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 289                                    const Locale& locale,
 290                                    UErrorCode& status)
 291 :   fPattern(pattern),
 292     fLocale(locale),
 293     fGMTFormatters(NULL),
 294     fNumberFormatters(NULL),
 295     fOverrideList(NULL)
 296 {
 297
 298     fDateOverride.setToBogus();
 299     fTimeOverride.setToBogus();
 300
 301     initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
 302     initialize(fLocale, status);
 303     initializeDefaultCentury();
 304 }
 305
 306 //----------------------------------------------------------------------
 307
 308 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 309                                    const UnicodeString& override,
 310                                    const Locale& locale,
 311                                    UErrorCode& status)
 312 :   fPattern(pattern),
 313     fLocale(locale),
 314     fGMTFormatters(NULL),
 315     fNumberFormatters(NULL),
 316     fOverrideList(NULL)
 317 {
 318
 319     fDateOverride.setTo(override);
 320     fTimeOverride.setToBogus();
 321
 322     initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
 323     initialize(fLocale, status);
 324     initializeDefaultCentury();
 325
 326     processOverrideString(locale,override,kOvrStrBoth,status);
 327
 328 }
 329
 330 //----------------------------------------------------------------------
 331
 332 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 333                                    DateFormatSymbols* symbolsToAdopt,
 334                                    UErrorCode& status)
 335 :   fPattern(pattern),
 336     fLocale(Locale::getDefault()),
 337     fSymbols(symbolsToAdopt),
 338     fGMTFormatters(NULL),
 339     fNumberFormatters(NULL),
 340     fOverrideList(NULL)
 341 {
 342
 343     fDateOverride.setToBogus();
 344     fTimeOverride.setToBogus();
 345
 346     initializeCalendar(NULL,fLocale,status);
 347     initialize(fLocale, status);
 348     initializeDefaultCentury();
 349 }
 350
 351 //----------------------------------------------------------------------
 352
 353 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 354                                    const DateFormatSymbols& symbols,
 355                                    UErrorCode& status)
 356 :   fPattern(pattern),
 357     fLocale(Locale::getDefault()),
 358     fSymbols(new DateFormatSymbols(symbols)),
 359     fGMTFormatters(NULL),
 360     fNumberFormatters(NULL),
 361     fOverrideList(NULL)
 362 {
 363
 364     fDateOverride.setToBogus();
 365     fTimeOverride.setToBogus();
 366
 367     initializeCalendar(NULL, fLocale, status);
 368     initialize(fLocale, status);
 369     initializeDefaultCentury();
 370 }
 371
 372 //----------------------------------------------------------------------
 373
 374 // Not for public consumption; used by DateFormat
 375 SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
 376                                    EStyle dateStyle,
 377                                    const Locale& locale,
 378                                    UErrorCode& status)
 379 :   fLocale(locale),
 380     fSymbols(NULL),
 381     fGMTFormatters(NULL),
 382     fNumberFormatters(NULL),
 383     fOverrideList(NULL)
 384 {
 385     construct(timeStyle, dateStyle, fLocale, status);
 386     if(U_SUCCESS(status)) {
 387       initializeDefaultCentury();
 388     }
 389 }
 390
 391 //----------------------------------------------------------------------
 392
 393 /**
 394  * Not for public consumption; used by DateFormat.  This constructor
 395  * never fails.  If the resource data is not available, it uses the
 396  * the last resort symbols.
 397  */
 398 SimpleDateFormat::SimpleDateFormat(const Locale& locale,
 399                                    UErrorCode& status)
 400 :   fPattern(gDefaultPattern),
 401     fLocale(locale),
 402     fSymbols(NULL),
 403     fGMTFormatters(NULL),
 404     fNumberFormatters(NULL),
 405     fOverrideList(NULL)
 406 {
 407     if (U_FAILURE(status)) return;
 408     initializeSymbols(fLocale, initializeCalendar(NULL, fLocale, status),status);
 409     if (U_FAILURE(status))
 410     {
 411         status = U_ZERO_ERROR;
 412         delete fSymbols;
 413         // This constructor doesn't fail; it uses last resort data
 414         fSymbols = new DateFormatSymbols(status);
 415         /* test for NULL */
 416         if (fSymbols == 0) {
 417             status = U_MEMORY_ALLOCATION_ERROR;
 418             return;
 419         }
 420     }
 421
 422     fDateOverride.setToBogus();
 423     fTimeOverride.setToBogus();
 424
 425     initialize(fLocale, status);
 426     if(U_SUCCESS(status)) {
 427       initializeDefaultCentury();
 428     }
 429 }
 430
 431 //----------------------------------------------------------------------
 432
 433 SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
 434 :   DateFormat(other),
 435     fSymbols(NULL),
 436     fGMTFormatters(NULL),
 437     fNumberFormatters(NULL),
 438     fOverrideList(NULL)
 439 {
 440     *this = other;
 441 }
 442
 443 //----------------------------------------------------------------------
 444
 445 SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
 446 {
 447     if (this == &other) {
 448         return *this;
 449     }
 450     DateFormat::operator=(other);
 451
 452     delete fSymbols;
 453     fSymbols = NULL;
 454
 455     if (other.fSymbols)
 456         fSymbols = new DateFormatSymbols(*other.fSymbols);
 457
 458     fDefaultCenturyStart         = other.fDefaultCenturyStart;
 459     fDefaultCenturyStartYear     = other.fDefaultCenturyStartYear;
 460     fHaveDefaultCentury          = other.fHaveDefaultCentury;
 461
 462     fPattern = other.fPattern;
 463
 464     return *this;
 465 }
 466
 467 //----------------------------------------------------------------------
 468
 469 Format*
 470 SimpleDateFormat::clone() const
 471 {
 472     return new SimpleDateFormat(*this);
 473 }
 474
 475 //----------------------------------------------------------------------
 476
 477 UBool
 478 SimpleDateFormat::operator==(const Format& other) const
 479 {
 480     if (DateFormat::operator==(other)) {
 481         // DateFormat::operator== guarantees following cast is safe
 482         SimpleDateFormat* that = (SimpleDateFormat*)&other;
 483         return (fPattern             == that->fPattern &&
 484                 fSymbols             != NULL && // Check for pathological object
 485                 that->fSymbols       != NULL && // Check for pathological object
 486                 *fSymbols            == *that->fSymbols &&
 487                 fHaveDefaultCentury  == that->fHaveDefaultCentury &&
 488                 fDefaultCenturyStart == that->fDefaultCenturyStart);
 489     }
 490     return FALSE;
 491 }
 492
 493 //----------------------------------------------------------------------
 494
 495 void SimpleDateFormat::construct(EStyle timeStyle,
 496                                  EStyle dateStyle,
 497                                  const Locale& locale,
 498                                  UErrorCode& status)
 499 {
 500     // called by several constructors to load pattern data from the resources
 501     if (U_FAILURE(status)) return;
 502
 503     // We will need the calendar to know what type of symbols to load.
 504     initializeCalendar(NULL, locale, status);
 505     if (U_FAILURE(status)) return;
 506
 507     CalendarData calData(locale, fCalendar?fCalendar->getType():NULL, status);
 508     UResourceBundle *dateTimePatterns = calData.getByKey(gDateTimePatternsTag, status);
 509     UResourceBundle *currentBundle;
 510
 511     if (U_FAILURE(status)) return;
 512
 513     if (ures_getSize(dateTimePatterns) <= kDateTime)
 514     {
 515         status = U_INVALID_FORMAT_ERROR;
 516         return;
 517     }
 518
 519     setLocaleIDs(ures_getLocaleByType(dateTimePatterns, ULOC_VALID_LOCALE, &status),
 520                  ures_getLocaleByType(dateTimePatterns, ULOC_ACTUAL_LOCALE, &status));
 521
 522     // create a symbols object from the locale
 523     initializeSymbols(locale,fCalendar, status);
 524     if (U_FAILURE(status)) return;
 525     /* test for NULL */
 526     if (fSymbols == 0) {
 527         status = U_MEMORY_ALLOCATION_ERROR;
 528         return;
 529     }
 530
 531     const UChar *resStr,*ovrStr;
 532     int32_t resStrLen,ovrStrLen = 0;
 533     fDateOverride.setToBogus();
 534     fTimeOverride.setToBogus();
 535
 536     // if the pattern should include both date and time information, use the date/time
 537     // pattern string as a guide to tell use how to glue together the appropriate date
 538     // and time pattern strings.  The actual gluing-together is handled by a convenience
 539     // method on MessageFormat.
 540     if ((timeStyle != kNone) && (dateStyle != kNone))
 541     {
 542         Formattable timeDateArray[2];
 543
 544         // use Formattable::adoptString() so that we can use fastCopyFrom()
 545         // instead of Formattable::setString()'s unaware, safe, deep string clone
 546         // see Jitterbug 2296
 547
 548         currentBundle = ures_getByIndex(dateTimePatterns, (int32_t)timeStyle, NULL, &status);
 549         if (U_FAILURE(status)) {
 550            status = U_INVALID_FORMAT_ERROR;
 551            return;
 552         }
 553         switch (ures_getType(currentBundle)) {
 554             case URES_STRING: {
 555                resStr = ures_getString(currentBundle, &resStrLen, &status);
 556                break;
 557             }
 558             case URES_ARRAY: {
 559                resStr = ures_getStringByIndex(currentBundle, 0, &resStrLen, &status);
 560                ovrStr = ures_getStringByIndex(currentBundle, 1, &ovrStrLen, &status);
 561                fTimeOverride.setTo(TRUE, ovrStr, ovrStrLen);
 562                break;
 563             }
 564             default: {
 565                status = U_INVALID_FORMAT_ERROR;
 566                ures_close(currentBundle);
 567                return;
 568             }
 569         }
 570         ures_close(currentBundle);
 571
 572         UnicodeString *tempus1 = new UnicodeString(TRUE, resStr, resStrLen);
 573         // NULL pointer check
 574         if (tempus1 == NULL) {
 575             status = U_MEMORY_ALLOCATION_ERROR;
 576             return;
 577         }
 578         timeDateArray[0].adoptString(tempus1);
 579
 580         currentBundle = ures_getByIndex(dateTimePatterns, (int32_t)dateStyle, NULL, &status);
 581         if (U_FAILURE(status)) {
 582            status = U_INVALID_FORMAT_ERROR;
 583            return;
 584         }
 585         switch (ures_getType(currentBundle)) {
 586             case URES_STRING: {
 587                resStr = ures_getString(currentBundle, &resStrLen, &status);
 588                break;
 589             }
 590             case URES_ARRAY: {
 591                resStr = ures_getStringByIndex(currentBundle, 0, &resStrLen, &status);
 592                ovrStr = ures_getStringByIndex(currentBundle, 1, &ovrStrLen, &status);
 593                fDateOverride.setTo(TRUE, ovrStr, ovrStrLen);
 594                break;
 595             }
 596             default: {
 597                status = U_INVALID_FORMAT_ERROR;
 598                ures_close(currentBundle);
 599                return;
 600             }
 601         }
 602         ures_close(currentBundle);
 603
 604         UnicodeString *tempus2 = new UnicodeString(TRUE, resStr, resStrLen);
 605         // Null pointer check
 606         if (tempus2 == NULL) {
 607             status = U_MEMORY_ALLOCATION_ERROR;
 608             return;
 609         }
 610         timeDateArray[1].adoptString(tempus2);
 611
 612         int32_t glueIndex = kDateTime;
 613         int32_t patternsSize = ures_getSize(dateTimePatterns);
 614         if (patternsSize >= (kDateTimeOffset + kShort + 1)) {
 615             // Get proper date time format
 616             glueIndex = (int32_t)(kDateTimeOffset + (dateStyle - kDateOffset));
 617         }
 618
 619         resStr = ures_getStringByIndex(dateTimePatterns, glueIndex, &resStrLen, &status);
 620         MessageFormat::format(UnicodeString(TRUE, resStr, resStrLen), timeDateArray, 2, fPattern, status);
 621     }
 622     // if the pattern includes just time data or just date date, load the appropriate
 623     // pattern string from the resources
 624     // setTo() - see DateFormatSymbols::assignArray comments
 625     else if (timeStyle != kNone) {
 626         currentBundle = ures_getByIndex(dateTimePatterns, (int32_t)timeStyle, NULL, &status);
 627         if (U_FAILURE(status)) {
 628            status = U_INVALID_FORMAT_ERROR;
 629            return;
 630         }
 631         switch (ures_getType(currentBundle)) {
 632             case URES_STRING: {
 633                resStr = ures_getString(currentBundle, &resStrLen, &status);
 634                break;
 635             }
 636             case URES_ARRAY: {
 637                resStr = ures_getStringByIndex(currentBundle, 0, &resStrLen, &status);
 638                ovrStr = ures_getStringByIndex(currentBundle, 1, &ovrStrLen, &status);
 639                fDateOverride.setTo(TRUE, ovrStr, ovrStrLen);
 640                break;
 641             }
 642             default: {
 643                status = U_INVALID_FORMAT_ERROR;
 644                 ures_close(currentBundle);
 645                return;
 646             }
 647         }
 648         fPattern.setTo(TRUE, resStr, resStrLen);
 649         ures_close(currentBundle);
 650     }
 651     else if (dateStyle != kNone) {
 652         currentBundle = ures_getByIndex(dateTimePatterns, (int32_t)dateStyle, NULL, &status);
 653         if (U_FAILURE(status)) {
 654            status = U_INVALID_FORMAT_ERROR;
 655            return;
 656         }
 657         switch (ures_getType(currentBundle)) {
 658             case URES_STRING: {
 659                resStr = ures_getString(currentBundle, &resStrLen, &status);
 660                break;
 661             }
 662             case URES_ARRAY: {
 663                resStr = ures_getStringByIndex(currentBundle, 0, &resStrLen, &status);
 664                ovrStr = ures_getStringByIndex(currentBundle, 1, &ovrStrLen, &status);
 665                fDateOverride.setTo(TRUE, ovrStr, ovrStrLen);
 666                break;
 667             }
 668             default: {
 669                status = U_INVALID_FORMAT_ERROR;
 670                ures_close(currentBundle);
 671                return;
 672             }
 673         }
 674         fPattern.setTo(TRUE, resStr, resStrLen);
 675         ures_close(currentBundle);
 676     }
 677
 678     // and if it includes _neither_, that's an error
 679     else
 680         status = U_INVALID_FORMAT_ERROR;
 681
 682     // finally, finish initializing by creating a Calendar and a NumberFormat
 683     initialize(locale, status);
 684 }
 685
 686 //----------------------------------------------------------------------
 687
 688 Calendar*
 689 SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
 690 {
 691     if(!U_FAILURE(status)) {
 692         fCalendar = Calendar::createInstance(adoptZone?adoptZone:TimeZone::createDefault(), locale, status);
 693     }
 694     if (U_SUCCESS(status) && fCalendar == NULL) {
 695         status = U_MEMORY_ALLOCATION_ERROR;
 696     }
 697     return fCalendar;
 698 }
 699
 700 void
 701 SimpleDateFormat::initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status)
 702 {
 703   if(U_FAILURE(status)) {
 704     fSymbols = NULL;
 705   } else {
 706     // pass in calendar type - use NULL (default) if no calendar set (or err).
 707     fSymbols = new DateFormatSymbols(locale, calendar?calendar->getType() :NULL , status);
 708     // Null pointer check
 709     if (fSymbols == NULL) {
 710         status = U_MEMORY_ALLOCATION_ERROR;
 711         return;
 712     }
 713   }
 714 }
 715
 716 void
 717 SimpleDateFormat::initialize(const Locale& locale,
 718                              UErrorCode& status)
 719 {
 720     if (U_FAILURE(status)) return;
 721
 722     // We don't need to check that the row count is >= 1, since all 2d arrays have at
 723     // least one row
 724     fNumberFormat = NumberFormat::createInstance(locale, status);
 725     if (fNumberFormat != NULL && U_SUCCESS(status))
 726     {
 727         // no matter what the locale's default number format looked like, we want
 728         // to modify it so that it doesn't use thousands separators, doesn't always
 729         // show the decimal point, and recognizes integers only when parsing
 730
 731         fNumberFormat->setGroupingUsed(FALSE);
 732         DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat);
 733         if (decfmt != NULL) {
 734             decfmt->setDecimalSeparatorAlwaysShown(FALSE);
 735         }
 736         fNumberFormat->setParseIntegerOnly(TRUE);
 737         fNumberFormat->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
 738
 739         // TODO: Really, the default should be lenient...
 740         fNumberFormat->setParseStrict(FALSE);
 741
 742         initNumberFormatters(locale,status);
 743
 744     }
 745     else if (U_SUCCESS(status))
 746     {
 747         status = U_MISSING_RESOURCE_ERROR;
 748     }
 749 }
 750
 751 /* Initialize the fields we use to disambiguate ambiguous years. Separate
 752  * so we can call it from readObject().
 753  */
 754 void SimpleDateFormat::initializeDefaultCentury()
 755 {
 756   if(fCalendar) {
 757     fHaveDefaultCentury = fCalendar->haveDefaultCentury();
 758     if(fHaveDefaultCentury) {
 759       fDefaultCenturyStart = fCalendar->defaultCenturyStart();
 760       fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
 761     } else {
 762       fDefaultCenturyStart = DBL_MIN;
 763       fDefaultCenturyStartYear = -1;
 764     }
 765   }
 766 }
 767
 768 /* Define one-century window into which to disambiguate dates using
 769  * two-digit years. Make public in JDK 1.2.
 770  */
 771 void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
 772 {
 773     if(U_FAILURE(status)) {
 774         return;
 775     }
 776     if(!fCalendar) {
 777       status = U_ILLEGAL_ARGUMENT_ERROR;
 778       return;
 779     }
 780
 781     fCalendar->setTime(startDate, status);
 782     if(U_SUCCESS(status)) {
 783         fHaveDefaultCentury = TRUE;
 784         fDefaultCenturyStart = startDate;
 785         fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
 786     }
 787 }
 788
 789 //----------------------------------------------------------------------
 790
 791 UnicodeString&
 792 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
 793 {
 794   UErrorCode status = U_ZERO_ERROR;
 795   FieldPositionOnlyHandler handler(pos);
 796   return _format(cal, appendTo, handler, status);
 797 }
 798
 799 //----------------------------------------------------------------------
 800
 801 UnicodeString&
 802 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo,
 803                          FieldPositionIterator* posIter, UErrorCode& status) const
 804 {
 805   FieldPositionIteratorHandler handler(posIter, status);
 806   return _format(cal, appendTo, handler, status);
 807 }
 808
 809 //----------------------------------------------------------------------
 810
 811 UnicodeString&
 812 SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo, FieldPositionHandler& handler,
 813                           UErrorCode& status) const
 814 {
 815     Calendar *workCal = &cal;
 816     TimeZone *backupTZ = NULL;
 817     if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
 818         // Different calendar type
 819         // We use the time and time zone from the input calendar, but
 820         // do not use the input calendar for field calculation.
 821         UDate t = cal.getTime(status);
 822         fCalendar->setTime(t, status);
 823         backupTZ = fCalendar->getTimeZone().clone();
 824         fCalendar->setTimeZone(cal.getTimeZone());
 825         workCal = fCalendar;
 826     }
 827
 828     UBool inQuote = FALSE;
 829     UChar prevCh = 0;
 830     int32_t count = 0;
 831
 832     // loop through the pattern string character by character
 833     for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) {
 834         UChar ch = fPattern[i];
 835
 836         // Use subFormat() to format a repeated pattern character
 837         // when a different pattern or non-pattern character is seen
 838         if (ch != prevCh && count > 0) {
 839             subFormat(appendTo, prevCh, count, handler, *workCal, status);
 840             count = 0;
 841         }
 842         if (ch == QUOTE) {
 843             // Consecutive single quotes are a single quote literal,
 844             // either outside of quotes or between quotes
 845             if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) {
 846                 appendTo += (UChar)QUOTE;
 847                 ++i;
 848             } else {
 849                 inQuote = ! inQuote;
 850             }
 851         }
 852         else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
 853                     || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
 854             // ch is a date-time pattern character to be interpreted
 855             // by subFormat(); count the number of times it is repeated
 856             prevCh = ch;
 857             ++count;
 858         }
 859         else {
 860             // Append quoted characters and unquoted non-pattern characters
 861             appendTo += ch;
 862         }
 863     }
 864
 865     // Format the last item in the pattern, if any
 866     if (count > 0) {
 867         subFormat(appendTo, prevCh, count, handler, *workCal, status);
 868     }
 869
 870     if (backupTZ != NULL) {
 871         // Restore the original time zone
 872         fCalendar->adoptTimeZone(backupTZ);
 873     }
 874
 875     return appendTo;
 876 }
 877
 878 //----------------------------------------------------------------------
 879
 880 /* Map calendar field into calendar field level.
 881  * the larger the level, the smaller the field unit.
 882  * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
 883  * UCAL_MONTH level is 20.
 884  * NOTE: if new fields adds in, the table needs to update.
 885  */
 886 const int32_t
 887 SimpleDateFormat::fgCalendarFieldToLevel[] =
 888 {
 889     /*GyM*/ 0, 10, 20,
 890     /*wW*/ 20, 30,
 891     /*dDEF*/ 30, 20, 30, 30,
 892     /*ahHm*/ 40, 50, 50, 60,
 893     /*sS..*/ 70, 80,
 894     /*z?Y*/ 0, 0, 10,
 895     /*eug*/ 30, 10, 0,
 896     /*A*/ 40
 897 };
 898
 899
 900 /* Map calendar field LETTER into calendar field level.
 901  * the larger the level, the smaller the field unit.
 902  * NOTE: if new fields adds in, the table needs to update.
 903  */
 904 const int32_t
 905 SimpleDateFormat::fgPatternCharToLevel[] = {
 906     //       A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
 907         -1, 40, -1, -1, 20, 30, 30,  0, 50, -1, -1, 50, 20, 20, -1, -1,
 908     //   P   Q   R   S   T   U   V   W   X   Y   Z
 909         -1, 20, -1, 80, -1, -1,  0, 30, -1, 10,  0, -1, -1, -1, -1, -1,
 910     //       a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
 911         -1, 40, -1, 30, 30, 30, -1,  0, 50, -1, -1, 50, -1, 60, -1, -1,
 912     //   p   q   r   s   t   u   v   w   x   y   z
 913         -1, 20, -1, 70, -1, 10,  0, 20, -1, 10,  0, -1, -1, -1, -1, -1
 914 };
 915
 916
 917 // Map index into pattern character string to Calendar field number.
 918 const UCalendarDateFields
 919 SimpleDateFormat::fgPatternIndexToCalendarField[] =
 920 {
 921     /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
 922     /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
 923     /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
 924     /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
 925     /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
 926     /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
 927     /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
 928     /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET,
 929     /*v*/   UCAL_ZONE_OFFSET,
 930     /*c*/   UCAL_DOW_LOCAL,
 931     /*L*/   UCAL_MONTH,
 932     /*Q*/   UCAL_MONTH,
 933     /*q*/   UCAL_MONTH,
 934     /*V*/   UCAL_ZONE_OFFSET,
 935 };
 936
 937 // Map index into pattern character string to DateFormat field number
 938 const UDateFormatField
 939 SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
 940     /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
 941     /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
 942     /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
 943     /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
 944     /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
 945     /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
 946     /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
 947     /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD,
 948     /*v*/   UDAT_TIMEZONE_GENERIC_FIELD,
 949     /*c*/   UDAT_STANDALONE_DAY_FIELD,
 950     /*L*/   UDAT_STANDALONE_MONTH_FIELD,
 951     /*Q*/   UDAT_QUARTER_FIELD,
 952     /*q*/   UDAT_STANDALONE_QUARTER_FIELD,
 953     /*V*/   UDAT_TIMEZONE_SPECIAL_FIELD,
 954 };
 955
 956 //----------------------------------------------------------------------
 957
 958 /**
 959  * Append symbols[value] to dst.  Make sure the array index is not out
 960  * of bounds.
 961  */
 962 static inline void
 963 _appendSymbol(UnicodeString& dst,
 964               int32_t value,
 965               const UnicodeString* symbols,
 966               int32_t symbolsCount) {
 967     U_ASSERT(0 <= value && value < symbolsCount);
 968     if (0 <= value && value < symbolsCount) {
 969         dst += symbols[value];
 970     }
 971 }
 972
 973 //---------------------------------------------------------------------
 974 void
 975 SimpleDateFormat::appendGMT(NumberFormat *currentNumberFormat,UnicodeString &appendTo, Calendar& cal, UErrorCode& status) const{
 976     int32_t offset = cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status);
 977     if (U_FAILURE(status)) {
 978         return;
 979     }
 980     if (isDefaultGMTFormat()) {
 981         formatGMTDefault(currentNumberFormat,appendTo, offset);
 982     } else {
 983         ((SimpleDateFormat*)this)->initGMTFormatters(status);
 984         if (U_SUCCESS(status)) {
 985             int32_t type;
 986             if (offset < 0) {
 987                 offset = -offset;
 988                 type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTNegativeHM : kGMTNegativeHMS;
 989             } else {
 990                 type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTPositiveHM : kGMTPositiveHMS;
 991             }
 992             Formattable param(offset, Formattable::kIsDate);
 993             FieldPosition fpos(0);
 994             fGMTFormatters[type]->format(&param, 1, appendTo, fpos, status);
 995         }
 996     }
 997 }
 998
 999 int32_t
1000 SimpleDateFormat::parseGMT(const UnicodeString &text, ParsePosition &pos) const {
1001     if (!isDefaultGMTFormat()) {
1002         int32_t start = pos.getIndex();
1003
1004         // Quick check
1005         UBool prefixMatch = FALSE;
1006         int32_t prefixLen = fSymbols->fGmtFormat.indexOf((UChar)0x007B /* '{' */);
1007         if (prefixLen > 0 && text.compare(start, prefixLen, fSymbols->fGmtFormat, 0, prefixLen) == 0) {
1008             prefixMatch = TRUE;
1009         }
1010         if (prefixMatch) {
1011             // Prefix matched
1012             UErrorCode status = U_ZERO_ERROR;
1013             ((SimpleDateFormat*)this)->initGMTFormatters(status);
1014             if (U_SUCCESS(status)) {
1015                 Formattable parsed;
1016                 int32_t parsedCount;
1017
1018                 // Try negative Hms
1019                 fGMTFormatters[kGMTNegativeHMS]->parseObject(text, parsed, pos);
1020                 if (pos.getErrorIndex() == -1 &&
1021                     (pos.getIndex() - start) >= fGMTFormatHmsMinLen[kGMTNegativeHMSMinLenIdx]) {
1022                     parsed.getArray(parsedCount);
1023                     if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
1024                         return (int32_t)(-1 * (int64_t)parsed[0].getDate());
1025                     }
1026                 }
1027
1028                 // Reset ParsePosition
1029                 pos.setIndex(start);
1030                 pos.setErrorIndex(-1);
1031
1032                 // Try positive Hms
1033                 fGMTFormatters[kGMTPositiveHMS]->parseObject(text, parsed, pos);
1034                 if (pos.getErrorIndex() == -1 &&
1035                     (pos.getIndex() - start) >= fGMTFormatHmsMinLen[kGMTPositiveHMSMinLenIdx]) {
1036                     parsed.getArray(parsedCount);
1037                     if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
1038                         return (int32_t)((int64_t)parsed[0].getDate());
1039                     }
1040                 }
1041
1042                 // Reset ParsePosition
1043                 pos.setIndex(start);
1044                 pos.setErrorIndex(-1);
1045
1046                 // Try negative Hm
1047                 fGMTFormatters[kGMTNegativeHM]->parseObject(text, parsed, pos);
1048                 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
1049                     parsed.getArray(parsedCount);
1050                     if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
1051                         return (int32_t)(-1 * (int64_t)parsed[0].getDate());
1052                     }
1053                 }
1054
1055                 // Reset ParsePosition
1056                 pos.setIndex(start);
1057                 pos.setErrorIndex(-1);
1058
1059                 // Try positive Hm
1060                 fGMTFormatters[kGMTPositiveHM]->parseObject(text, parsed, pos);
1061                 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
1062                     parsed.getArray(parsedCount);
1063                     if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
1064                         return (int32_t)((int64_t)parsed[0].getDate());
1065                     }
1066                 }
1067
1068                 // Reset ParsePosition
1069                 pos.setIndex(start);
1070                 pos.setErrorIndex(-1);
1071             }
1072             // fall through to the default GMT parsing method
1073         }
1074     }
1075     return parseGMTDefault(text, pos);
1076 }
1077
1078 void
1079 SimpleDateFormat::formatGMTDefault(NumberFormat *currentNumberFormat,UnicodeString &appendTo, int32_t offset) const {
1080     if (offset < 0) {
1081         appendTo += gGmtMinus;
1082         offset = -offset; // suppress the '-' sign for text display.
1083     }else{
1084         appendTo += gGmtPlus;
1085     }
1086
1087     offset /= U_MILLIS_PER_SECOND; // now in seconds
1088     int32_t sec = offset % 60;
1089     offset /= 60;
1090     int32_t min = offset % 60;
1091     int32_t hour = offset / 60;
1092
1093
1094     zeroPaddingNumber(currentNumberFormat,appendTo, hour, 2, 2);
1095     appendTo += (UChar)0x003A /*':'*/;
1096     zeroPaddingNumber(currentNumberFormat,appendTo, min, 2, 2);
1097     if (sec != 0) {
1098         appendTo += (UChar)0x003A /*':'*/;
1099         zeroPaddingNumber(currentNumberFormat,appendTo, sec, 2, 2);
1100     }
1101 }
1102
1103 int32_t
1104 SimpleDateFormat::parseGMTDefault(const UnicodeString &text, ParsePosition &pos) const {
1105     int32_t start = pos.getIndex();
1106     NumberFormat *currentNumberFormat = getNumberFormatByIndex(UDAT_TIMEZONE_RFC_FIELD);
1107
1108     if (start + kUtLen + 1 >= text.length()) {
1109         pos.setErrorIndex(start);
1110         return 0;
1111     }
1112
1113     int32_t cur = start;
1114     // "GMT"
1115     if (text.compare(start, kGmtLen, gGmt) == 0) {
1116         cur += kGmtLen;
1117     } else if (text.compare(start, kUtLen, gUt) == 0) {
1118         cur += kUtLen;
1119     } else {
1120         pos.setErrorIndex(start);
1121         return 0;
1122     }
1123     // Sign
1124     UBool negative = FALSE;
1125     if (text.charAt(cur) == (UChar)0x002D /* minus */) {
1126         negative = TRUE;
1127     } else if (text.charAt(cur) != (UChar)0x002B /* plus */) {
1128         pos.setErrorIndex(cur);
1129         return 0;
1130     }
1131     cur++;
1132
1133     // Numbers
1134     int32_t numLen;
1135     pos.setIndex(cur);
1136
1137     Formattable number;
1138     parseInt(text, number, 6, pos, FALSE,currentNumberFormat);
1139     numLen = pos.getIndex() - cur;
1140
1141     if (numLen <= 0) {
1142         pos.setIndex(start);
1143         pos.setErrorIndex(cur);
1144         return 0;
1145     }
1146
1147     int32_t numVal = number.getLong();
1148
1149     int32_t hour = 0;
1150     int32_t min = 0;
1151     int32_t sec = 0;
1152
1153     if (numLen <= 2) {
1154         // H[H][:mm[:ss]]
1155         hour = numVal;
1156         cur += numLen;
1157         if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) {
1158             cur++;
1159             pos.setIndex(cur);
1160             parseInt(text, number, 2, pos, FALSE,currentNumberFormat);
1161             numLen = pos.getIndex() - cur;
1162             if (numLen == 2) {
1163                 // got minute field
1164                 min = number.getLong();
1165                 cur += numLen;
1166                 if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) {
1167                     cur++;
1168                     pos.setIndex(cur);
1169                     parseInt(text, number, 2, pos, FALSE,currentNumberFormat);
1170                     numLen = pos.getIndex() - cur;
1171                     if (numLen == 2) {
1172                         // got second field
1173                         sec = number.getLong();
1174                     } else {
1175                         // reset position
1176                         pos.setIndex(cur - 1);
1177                         pos.setErrorIndex(-1);
1178                     }
1179                 }
1180             } else {
1181                 // reset postion
1182                 pos.setIndex(cur - 1);
1183                 pos.setErrorIndex(-1);
1184             }
1185         }
1186     } else if (numLen == 3 || numLen == 4) {
1187         // Hmm or HHmm
1188         hour = numVal / 100;
1189         min = numVal % 100;
1190     } else if (numLen == 5 || numLen == 6) {
1191         // Hmmss or HHmmss
1192         hour = numVal / 10000;
1193         min = (numVal % 10000) / 100;
1194         sec = numVal % 100;
1195     } else {
1196         // HHmmss followed by bogus numbers
1197         pos.setIndex(cur + 6);
1198
1199         int32_t shift = numLen - 6;
1200         while (shift > 0) {
1201             numVal /= 10;
1202             shift--;
1203         }
1204         hour = numVal / 10000;
1205         min = (numVal % 10000) / 100;
1206         sec = numVal % 100;
1207     }
1208
1209     int32_t offset = ((hour*60 + min)*60 + sec)*1000;
1210     if (negative) {
1211         offset = -offset;
1212     }
1213     return offset;
1214 }
1215
1216 UBool
1217 SimpleDateFormat::isDefaultGMTFormat() const {
1218     // GMT pattern
1219     if (fSymbols->fGmtFormat.length() == 0) {
1220         // No GMT pattern is set
1221         return TRUE;
1222     } else if (fSymbols->fGmtFormat.compare(gDefGmtPat, kGmtPatLen) != 0) {
1223         return FALSE;
1224     }
1225     // Hour patterns
1226     if (fSymbols->fGmtHourFormats == NULL || fSymbols->fGmtHourFormatsCount != DateFormatSymbols::GMT_HOUR_COUNT) {
1227         // No Hour pattern is set
1228         return TRUE;
1229     } else if ((fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS].compare(gDefGmtNegHmsPat, kNegHmsLen) != 0)
1230         || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM].compare(gDefGmtNegHmPat, kNegHmLen) != 0)
1231         || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS].compare(gDefGmtPosHmsPat, kPosHmsLen) != 0)
1232         || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM].compare(gDefGmtPosHmPat, kPosHmLen) != 0)) {
1233         return FALSE;
1234     }
1235     return TRUE;
1236 }
1237
1238 void
1239 SimpleDateFormat::formatRFC822TZ(UnicodeString &appendTo, int32_t offset) const {
1240     UChar sign = 0x002B /* '+' */;
1241     if (offset < 0) {
1242         offset = -offset;
1243         sign = 0x002D /* '-' */;
1244     }
1245     appendTo.append(sign);
1246
1247     int32_t offsetH = offset / U_MILLIS_PER_HOUR;
1248     offset = offset % U_MILLIS_PER_HOUR;
1249     int32_t offsetM = offset / U_MILLIS_PER_MINUTE;
1250     offset = offset % U_MILLIS_PER_MINUTE;
1251     int32_t offsetS = offset / U_MILLIS_PER_SECOND;
1252
1253     int32_t num = 0, denom = 0;
1254     if (offsetS == 0) {
1255         offset = offsetH*100 + offsetM; // HHmm
1256         num = offset % 10000;
1257         denom = 1000;
1258     } else {
1259         offset = offsetH*10000 + offsetM*100 + offsetS; // HHmmss
1260         num = offset % 1000000;
1261         denom = 100000;
1262     }
1263     while (denom >= 1) {
1264         UChar digit = (UChar)0x0030 + (num / denom);
1265         appendTo.append(digit);
1266         num = num % denom;
1267         denom /= 10;
1268     }
1269 }
1270
1271 void
1272 SimpleDateFormat::initGMTFormatters(UErrorCode &status) {
1273     if (U_FAILURE(status)) {
1274         return;
1275     }
1276     umtx_lock(&LOCK);
1277     if (fGMTFormatters == NULL) {
1278         fGMTFormatters = (MessageFormat**)uprv_malloc(kNumGMTFormatters * sizeof(MessageFormat*));
1279         if (fGMTFormatters) {
1280             for (int32_t i = 0; i < kNumGMTFormatters; i++) {
1281                 const UnicodeString *hourPattern = NULL; //initialized it to avoid warning
1282                 switch (i) {
1283                     case kGMTNegativeHMS:
1284                         hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS]);
1285                         break;
1286                     case kGMTNegativeHM:
1287                         hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM]);
1288                         break;
1289                     case kGMTPositiveHMS:
1290                         hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS]);
1291                         break;
1292                     case kGMTPositiveHM:
1293                         hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM]);
1294                         break;
1295                 }
1296                 fGMTFormatters[i] = new MessageFormat(fSymbols->fGmtFormat, status);
1297                 GregorianCalendar *gcal = new GregorianCalendar(TimeZone::createTimeZone(UnicodeString(gEtcUTC)), status);
1298                 if (U_FAILURE(status)) {
1299                     break;
1300                 }
1301                 SimpleDateFormat *sdf = (SimpleDateFormat*)this->clone();
1302                 sdf->adoptCalendar(gcal);
1303                 sdf->applyPattern(*hourPattern);
1304
1305                 // This prevents a hours format pattern like "-HH:mm:ss" from matching
1306                 // in a string like "GMT-07:00 10:08:11 PM"
1307                 sdf->setLenient(FALSE);
1308
1309                 fGMTFormatters[i]->adoptFormat(0, sdf);
1310
1311                 // For parsing, we only allow Hms patterns to be equal or longer
1312                 // than its length with fixed minutes/seconds digits.
1313                 // See #6880
1314                 if (i == kGMTNegativeHMS || i == kGMTPositiveHMS) {
1315                     UnicodeString tmp;
1316                     Formattable tmpParam(60*60*1000, Formattable::kIsDate);
1317                     FieldPosition fpos(0);
1318                     fGMTFormatters[i]->format(&tmpParam, 1, tmp, fpos, status);
1319                     if (U_FAILURE(status)) {
1320                         break;
1321                     }
1322                     if (i == kGMTNegativeHMS) {
1323                         fGMTFormatHmsMinLen[kGMTNegativeHMSMinLenIdx] = tmp.length();
1324                     } else {
1325                         fGMTFormatHmsMinLen[kGMTPositiveHMSMinLenIdx] = tmp.length();
1326                     }
1327                 }
1328             }
1329         } else {
1330             status = U_MEMORY_ALLOCATION_ERROR;
1331         }
1332     }
1333     umtx_unlock(&LOCK);
1334 }
1335
1336 void
1337 SimpleDateFormat::initNumberFormatters(const Locale &locale,UErrorCode &status) {
1338     if (U_FAILURE(status)) {
1339         return;
1340     }
1341     if ( fDateOverride.isBogus() && fTimeOverride.isBogus() ) {
1342         return;
1343     }
1344     umtx_lock(&LOCK);
1345     if (fNumberFormatters == NULL) {
1346         fNumberFormatters = (NumberFormat**)uprv_malloc(UDAT_FIELD_COUNT * sizeof(NumberFormat*));
1347         if (fNumberFormatters) {
1348             for (int32_t i = 0; i < UDAT_FIELD_COUNT; i++) {
1349                 fNumberFormatters[i] = fNumberFormat;
1350             }
1351         } else {
1352             status = U_MEMORY_ALLOCATION_ERROR;
1353         }
1354     }
1355     umtx_unlock(&LOCK);
1356
1357     processOverrideString(locale,fDateOverride,kOvrStrDate,status);
1358     processOverrideString(locale,fTimeOverride,kOvrStrTime,status);
1359
1360 }
1361
1362 void
1363 SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeString &str, int8_t type, UErrorCode &status) {
1364     if (str.isBogus()) {
1365         return;
1366     }
1367     int32_t start = 0;
1368     int32_t len;
1369     UnicodeString nsName;
1370     UnicodeString ovrField;
1371     UBool moreToProcess = TRUE;
1372
1373     while (moreToProcess) {
1374         int32_t delimiterPosition = str.indexOf(ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE,start);
1375         if (delimiterPosition == -1) {
1376             moreToProcess = FALSE;
1377             len = str.length() - start;
1378         } else {
1379             len = delimiterPosition - start;
1380         }
1381         UnicodeString currentString(str,start,len);
1382         int32_t equalSignPosition = currentString.indexOf(ULOC_KEYWORD_ASSIGN_UNICODE,0);
1383         if (equalSignPosition == -1) { // Simple override string such as "hebrew"
1384             nsName.setTo(currentString);
1385             ovrField.setToBogus();
1386         } else { // Field specific override string such as "y=hebrew"
1387             nsName.setTo(currentString,equalSignPosition+1);
1388             ovrField.setTo(currentString,0,1); // We just need the first character.
1389         }
1390
1391         int32_t nsNameHash = nsName.hashCode();
1392         // See if the numbering system is in the override list, if not, then add it.
1393         NSOverride *cur = fOverrideList;
1394         NumberFormat *nf = NULL;
1395         UBool found = FALSE;
1396         while ( cur && !found ) {
1397             if ( cur->hash == nsNameHash ) {
1398                 nf = cur->nf;
1399                 found = TRUE;
1400             }
1401             cur = cur->next;
1402         }
1403
1404         if (!found) {
1405            cur = (NSOverride *)uprv_malloc(sizeof(NSOverride));
1406            if (cur) {
1407                char kw[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1408                uprv_strcpy(kw,"numbers=");
1409                nsName.extract(0,len,kw+8,ULOC_KEYWORD_AND_VALUES_CAPACITY-8,US_INV);
1410
1411                Locale ovrLoc(locale.getLanguage(),locale.getCountry(),locale.getVariant(),kw);
1412                nf = NumberFormat::createInstance(ovrLoc,status);
1413
1414                // no matter what the locale's default number format looked like, we want
1415                // to modify it so that it doesn't use thousands separators, doesn't always
1416                // show the decimal point, and recognizes integers only when parsing
1417
1418                if (U_SUCCESS(status)) {
1419                    nf->setGroupingUsed(FALSE);
1420                    DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(nf);
1421                    if (decfmt != NULL) {
1422                        decfmt->setDecimalSeparatorAlwaysShown(FALSE);
1423                    }
1424                    nf->setParseIntegerOnly(TRUE);
1425                    nf->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
1426
1427                    cur->nf = nf;
1428                    cur->hash = nsNameHash;
1429                    cur->next = fOverrideList;
1430                    fOverrideList = cur;
1431                }
1432                else {
1433                    // clean up before returning
1434                    if (cur != NULL) {
1435                        uprv_free(cur);
1436                    }
1437                   return;
1438                }
1439
1440            } else {
1441                status = U_MEMORY_ALLOCATION_ERROR;
1442                return;
1443            }
1444         }
1445
1446         // Now that we have an appropriate number formatter, fill in the appropriate spaces in the
1447         // number formatters table.
1448
1449         if (ovrField.isBogus()) {
1450             switch (type) {
1451                 case kOvrStrDate:
1452                 case kOvrStrBoth: {
1453                     for ( int8_t i=0 ; i<kDateFieldsCount; i++ ) {
1454                         fNumberFormatters[kDateFields[i]] = nf;
1455                     }
1456                     if (type==kOvrStrDate) {
1457                         break;
1458                     }
1459                 }
1460                 case kOvrStrTime : {
1461                     for ( int8_t i=0 ; i<kTimeFieldsCount; i++ ) {
1462                         fNumberFormatters[kTimeFields[i]] = nf;
1463                     }
1464                     break;
1465                 }
1466             }
1467         } else {
1468            UChar ch = ovrField.charAt(0);
1469            UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
1470            UDateFormatField patternCharIndex;
1471
1472            // if the pattern character is unrecognized, signal an error and bail out
1473            if (patternCharPtr == NULL) {
1474                status = U_INVALID_FORMAT_ERROR;
1475                return;
1476            }
1477            patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
1478
1479            // Set the number formatter in the table
1480            fNumberFormatters[patternCharIndex] = nf;
1481         }
1482
1483         start = delimiterPosition + 1;
1484     }
1485 }
1486 //---------------------------------------------------------------------
1487 void
1488 SimpleDateFormat::subFormat(UnicodeString &appendTo,
1489                             UChar ch,
1490                             int32_t count,
1491                             FieldPositionHandler& handler,
1492                             Calendar& cal,
1493                             UErrorCode& status) const
1494 {
1495     if (U_FAILURE(status)) {
1496         return;
1497     }
1498
1499     // this function gets called by format() to produce the appropriate substitution
1500     // text for an individual pattern symbol (e.g., "HH" or "yyyy")
1501
1502     UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
1503     UDateFormatField patternCharIndex;
1504     const int32_t maxIntCount = 10;
1505     int32_t beginOffset = appendTo.length();
1506     NumberFormat *currentNumberFormat;
1507
1508     UBool isHebrewCalendar = !strcmp(cal.getType(),"hebrew");
1509
1510     // if the pattern character is unrecognized, signal an error and dump out
1511     if (patternCharPtr == NULL)
1512     {
1513         status = U_INVALID_FORMAT_ERROR;
1514         return;
1515     }
1516
1517     patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
1518     UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1519     int32_t value = cal.get(field, status);
1520     if (U_FAILURE(status)) {
1521         return;
1522     }
1523
1524     currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
1525     switch (patternCharIndex) {
1526
1527     // for any "G" symbol, write out the appropriate era string
1528     // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name
1529     case UDAT_ERA_FIELD:
1530         if (count == 5)
1531            _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount);
1532         else if (count == 4)
1533            _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount);
1534         else
1535            _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
1536         break;
1537
1538     // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
1539     // NEW: UTS#35:
1540 //Year         y     yy     yyy     yyyy     yyyyy
1541 //AD 1         1     01     001     0001     00001
1542 //AD 12       12     12     012     0012     00012
1543 //AD 123     123     23     123     0123     00123
1544 //AD 1234   1234     34    1234     1234     01234
1545 //AD 12345 12345     45   12345    12345     12345
1546     case UDAT_YEAR_FIELD:
1547     case UDAT_YEAR_WOY_FIELD:
1548         if(count == 2)
1549             zeroPaddingNumber(currentNumberFormat, appendTo, value, 2, 2);
1550         else
1551             zeroPaddingNumber(currentNumberFormat, appendTo, value, count, maxIntCount);
1552         break;
1553
1554     // for "MMMM", write out the whole month name, for "MMM", write out the month
1555     // abbreviation, for "M" or "MM", write out the month as a number with the
1556     // appropriate number of digits
1557     // for "MMMMM", use the narrow form
1558     case UDAT_MONTH_FIELD:
1559         if ( isHebrewCalendar ) {
1560            HebrewCalendar *hc = (HebrewCalendar*)&cal;
1561            if (hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value == 6 && count >= 3 )
1562                value = 13; // Show alternate form for Adar II in leap years in Hebrew calendar.
1563            if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6 && count < 3 )
1564                value--; // Adjust the month number down 1 in Hebrew non-leap years, i.e. Adar is 6, not 7.
1565         }
1566         if (count == 5)
1567             _appendSymbol(appendTo, value, fSymbols->fNarrowMonths,
1568                           fSymbols->fNarrowMonthsCount);
1569         else if (count == 4)
1570             _appendSymbol(appendTo, value, fSymbols->fMonths,
1571                           fSymbols->fMonthsCount);
1572         else if (count == 3)
1573             _appendSymbol(appendTo, value, fSymbols->fShortMonths,
1574                           fSymbols->fShortMonthsCount);
1575         else
1576             zeroPaddingNumber(currentNumberFormat,appendTo, value + 1, count, maxIntCount);
1577         break;
1578
1579     // for "LLLL", write out the whole month name, for "LLL", write out the month
1580     // abbreviation, for "L" or "LL", write out the month as a number with the
1581     // appropriate number of digits
1582     // for "LLLLL", use the narrow form
1583     case UDAT_STANDALONE_MONTH_FIELD:
1584         if (count == 5)
1585             _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowMonths,
1586                           fSymbols->fStandaloneNarrowMonthsCount);
1587         else if (count == 4)
1588             _appendSymbol(appendTo, value, fSymbols->fStandaloneMonths,
1589                           fSymbols->fStandaloneMonthsCount);
1590         else if (count == 3)
1591             _appendSymbol(appendTo, value, fSymbols->fStandaloneShortMonths,
1592                           fSymbols->fStandaloneShortMonthsCount);
1593         else
1594             zeroPaddingNumber(currentNumberFormat,appendTo, value + 1, count, maxIntCount);
1595         break;
1596
1597     // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
1598     case UDAT_HOUR_OF_DAY1_FIELD:
1599         if (value == 0)
1600             zeroPaddingNumber(currentNumberFormat,appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
1601         else
1602             zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1603         break;
1604
1605     case UDAT_FRACTIONAL_SECOND_FIELD:
1606         // Fractional seconds left-justify
1607         {
1608             currentNumberFormat->setMinimumIntegerDigits((count > 3) ? 3 : count);
1609             currentNumberFormat->setMaximumIntegerDigits(maxIntCount);
1610             if (count == 1) {
1611                 value /= 100;
1612             } else if (count == 2) {
1613                 value /= 10;
1614             }
1615             FieldPosition p(0);
1616             currentNumberFormat->format(value, appendTo, p);
1617             if (count > 3) {
1618                 currentNumberFormat->setMinimumIntegerDigits(count - 3);
1619                 currentNumberFormat->format((int32_t)0, appendTo, p);
1620             }
1621         }
1622         break;
1623
1624     // for "ee" or "e", use local numeric day-of-the-week
1625     // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name
1626     // for "EEEE" or "eeee", write out the wide day-of-the-week name
1627     // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name
1628     case UDAT_DOW_LOCAL_FIELD:
1629         if ( count < 3 ) {
1630             zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1631             break;
1632         }
1633         // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week,
1634         // we want standard day-of-week, so first fix value to work for EEEEE-EEE.
1635         value = cal.get(UCAL_DAY_OF_WEEK, status);
1636         if (U_FAILURE(status)) {
1637             return;
1638         }
1639         // fall through, do not break here
1640     case UDAT_DAY_OF_WEEK_FIELD:
1641         if (count == 5)
1642             _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays,
1643                           fSymbols->fNarrowWeekdaysCount);
1644         else if (count == 4)
1645             _appendSymbol(appendTo, value, fSymbols->fWeekdays,
1646                           fSymbols->fWeekdaysCount);
1647         else
1648             _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
1649                           fSymbols->fShortWeekdaysCount);
1650         break;
1651
1652     // for "ccc", write out the abbreviated day-of-the-week name
1653     // for "cccc", write out the wide day-of-the-week name
1654     // for "ccccc", use the narrow day-of-the-week name
1655     case UDAT_STANDALONE_DAY_FIELD:
1656         if ( count < 3 ) {
1657             zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, maxIntCount);
1658             break;
1659         }
1660         // fall through to alpha DOW handling, but for that we don't want local day-of-week,
1661         // we want standard day-of-week, so first fix value.
1662         value = cal.get(UCAL_DAY_OF_WEEK, status);
1663         if (U_FAILURE(status)) {
1664             return;
1665         }
1666         if (count == 5)
1667             _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays,
1668                           fSymbols->fStandaloneNarrowWeekdaysCount);
1669         else if (count == 4)
1670             _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays,
1671                           fSymbols->fStandaloneWeekdaysCount);
1672         else // count == 3
1673             _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays,
1674                           fSymbols->fStandaloneShortWeekdaysCount);
1675         break;
1676
1677     // for and "a" symbol, write out the whole AM/PM string
1678     case UDAT_AM_PM_FIELD:
1679         _appendSymbol(appendTo, value, fSymbols->fAmPms,
1680                       fSymbols->fAmPmsCount);
1681         break;
1682
1683     // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
1684     // as "12"
1685     case UDAT_HOUR1_FIELD:
1686         if (value == 0)
1687             zeroPaddingNumber(currentNumberFormat,appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
1688         else
1689             zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1690         break;
1691
1692     // for the "z" symbols, we have to check our time zone data first.  If we have a
1693     // localized name for the time zone, then "zzzz" / "zzz" indicate whether
1694     // daylight time is in effect (long/short) and "zz" / "z" do not (long/short).
1695     // If we don't have a localized time zone name,
1696     // then the time zone shows up as "GMT+hh:mm" or "GMT-hh:mm" (where "hh:mm" is the
1697     // offset from GMT) regardless of how many z's were in the pattern symbol
1698     case UDAT_TIMEZONE_FIELD:
1699     case UDAT_TIMEZONE_GENERIC_FIELD:
1700     case UDAT_TIMEZONE_SPECIAL_FIELD:
1701         {
1702             UnicodeString zoneString;
1703             const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat();
1704             if (zsf) {
1705                 if (patternCharIndex == UDAT_TIMEZONE_FIELD) {
1706                     if (count < 4) {
1707                         // "z", "zz", "zzz"
1708                         zsf->getSpecificShortString(cal, TRUE /*commonly used only*/,
1709                             zoneString, status);
1710                     } else {
1711                         // "zzzz"
1712                         zsf->getSpecificLongString(cal, zoneString, status);
1713                     }
1714                 } else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) {
1715                     if (count == 1) {
1716                         // "v"
1717                         zsf->getGenericShortString(cal, TRUE /*commonly used only*/,
1718                             zoneString, status);
1719                     } else if (count == 4) {
1720                         // "vvvv"
1721                         zsf->getGenericLongString(cal, zoneString, status);
1722                     }
1723                 } else { // patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD
1724                     if (count == 1) {
1725                         // "V"
1726                         zsf->getSpecificShortString(cal, FALSE /*ignore commonly used*/,
1727                             zoneString, status);
1728                     } else if (count == 4) {
1729                         // "VVVV"
1730                         zsf->getGenericLocationString(cal, zoneString, status);
1731                     }
1732                 }
1733             }
1734             if (zoneString.isEmpty()) {
1735                 appendGMT(currentNumberFormat,appendTo, cal, status);
1736             } else {
1737                 appendTo += zoneString;
1738             }
1739         }
1740         break;
1741
1742     case UDAT_TIMEZONE_RFC_FIELD: // 'Z' - TIMEZONE_RFC
1743         if (count < 4) {
1744             // RFC822 format, must use ASCII digits
1745             value = (cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status));
1746             formatRFC822TZ(appendTo, value);
1747         } else {
1748             // long form, localized GMT pattern
1749             appendGMT(currentNumberFormat,appendTo, cal, status);
1750         }
1751         break;
1752
1753     case UDAT_QUARTER_FIELD:
1754         if (count >= 4)
1755             _appendSymbol(appendTo, value/3, fSymbols->fQuarters,
1756                           fSymbols->fQuartersCount);
1757         else if (count == 3)
1758             _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters,
1759                           fSymbols->fShortQuartersCount);
1760         else
1761             zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1762         break;
1763
1764     case UDAT_STANDALONE_QUARTER_FIELD:
1765         if (count >= 4)
1766             _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters,
1767                           fSymbols->fStandaloneQuartersCount);
1768         else if (count == 3)
1769             _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters,
1770                           fSymbols->fStandaloneShortQuartersCount);
1771         else
1772             zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount);
1773         break;
1774
1775
1776     // all of the other pattern symbols can be formatted as simple numbers with
1777     // appropriate zero padding
1778     default:
1779         zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount);
1780         break;
1781     }
1782
1783     handler.addAttribute(fgPatternIndexToDateFormatField[patternCharIndex], beginOffset, appendTo.length());
1784 }
1785
1786 //----------------------------------------------------------------------
1787
1788 NumberFormat *
1789 SimpleDateFormat::getNumberFormatByIndex(UDateFormatField index) const {
1790     if (fNumberFormatters != NULL) {
1791         return fNumberFormatters[index];
1792     } else {
1793         return fNumberFormat;
1794     }
1795 }
1796
1797 //----------------------------------------------------------------------
1798 void
1799 SimpleDateFormat::zeroPaddingNumber(NumberFormat *currentNumberFormat,UnicodeString &appendTo,
1800                                     int32_t value, int32_t minDigits, int32_t maxDigits) const
1801 {
1802     if (currentNumberFormat!=NULL) {
1803         FieldPosition pos(0);
1804
1805         currentNumberFormat->setMinimumIntegerDigits(minDigits);
1806         currentNumberFormat->setMaximumIntegerDigits(maxDigits);
1807         currentNumberFormat->format(value, appendTo, pos);  // 3rd arg is there to speed up processing
1808     }
1809 }
1810
1811 //----------------------------------------------------------------------
1812
1813 /**
1814  * Format characters that indicate numeric fields.  The character
1815  * at index 0 is treated specially.
1816  */
1817 static const UChar NUMERIC_FORMAT_CHARS[] = {0x4D, 0x59, 0x79, 0x75, 0x64, 0x65, 0x68, 0x48, 0x6D, 0x73, 0x53, 0x44, 0x46, 0x77, 0x57, 0x6B, 0x4B, 0x00}; /* "MYyudehHmsSDFwWkK" */
1818
1819 /**
1820  * Return true if the given format character, occuring count
1821  * times, represents a numeric field.
1822  */
1823 UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) {
1824     UnicodeString s(NUMERIC_FORMAT_CHARS);
1825     int32_t i = s.indexOf(formatChar);
1826     return (i > 0 || (i == 0 && count < 3));
1827 }
1828
1829 void
1830 SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
1831 {
1832     UErrorCode status = U_ZERO_ERROR;
1833     int32_t pos = parsePos.getIndex();
1834     int32_t start = pos;
1835
1836     UBool ambiguousYear[] = { FALSE };
1837     int32_t saveHebrewMonth = -1;
1838     int32_t count = 0;
1839
1840     UBool lenient = isLenient();
1841
1842     // hack, reset tztype, cast away const
1843     ((SimpleDateFormat*)this)->tztype = TZTYPE_UNK;
1844
1845     // For parsing abutting numeric fields. 'abutPat' is the
1846     // offset into 'pattern' of the first of 2 or more abutting
1847     // numeric fields.  'abutStart' is the offset into 'text'
1848     // where parsing the fields begins. 'abutPass' starts off as 0
1849     // and increments each time we try to parse the fields.
1850     int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
1851     int32_t abutStart = 0;
1852     int32_t abutPass = 0;
1853     UBool inQuote = FALSE;
1854
1855     const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS);
1856
1857     TimeZone *backupTZ = NULL;
1858     Calendar *workCal = &cal;
1859     if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) {
1860         // Different calendar type
1861         // We use the time/zone from the input calendar, but
1862         // do not use the input calendar for field calculation.
1863         fCalendar->setTime(cal.getTime(status),status);
1864         if (U_FAILURE(status)) {
1865             goto ExitParse;
1866         }
1867         backupTZ = fCalendar->getTimeZone().clone();
1868         fCalendar->setTimeZone(cal.getTimeZone());
1869         workCal = fCalendar;
1870     }
1871
1872     for (int32_t i=0; i<fPattern.length(); ++i) {
1873         UChar ch = fPattern.charAt(i);
1874
1875         // Handle alphabetic field characters.
1876         if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // [A-Za-z]
1877             int32_t fieldPat = i;
1878
1879             // Count the length of this field specifier
1880             count = 1;
1881             while ((i+1)<fPattern.length() &&
1882                    fPattern.charAt(i+1) == ch) {
1883                 ++count;
1884                 ++i;
1885             }
1886
1887             if (isNumeric(ch, count)) {
1888                 if (abutPat < 0) {
1889                     // Determine if there is an abutting numeric field.  For
1890                     // most fields we can just look at the next characters,
1891                     // but the 'm' field is either numeric or text,
1892                     // depending on the count, so we have to look ahead for
1893                     // that field.
1894                     if ((i+1)<fPattern.length()) {
1895                         UBool abutting;
1896                         UChar nextCh = fPattern.charAt(i+1);
1897                         int32_t k = numericFormatChars.indexOf(nextCh);
1898                         if (k == 0) {
1899                             int32_t j = i+2;
1900                             while (j<fPattern.length() &&
1901                                    fPattern.charAt(j) == nextCh) {
1902                                 ++j;
1903                             }
1904                             abutting = (j-i) < 4; // nextCount < 3
1905                         } else {
1906                             abutting = k > 0;
1907                         }
1908
1909                         // Record the start of a set of abutting numeric
1910                         // fields.
1911                         if (abutting) {
1912                             abutPat = fieldPat;
1913                             abutStart = pos;
1914                             abutPass = 0;
1915                         }
1916                     }
1917                 }
1918             } else {
1919                 abutPat = -1; // End of any abutting fields
1920             }
1921
1922             // Handle fields within a run of abutting numeric fields.  Take
1923             // the pattern "HHmmss" as an example. We will try to parse
1924             // 2/2/2 characters of the input text, then if that fails,
1925             // 1/2/2.  We only adjust the width of the leftmost field; the
1926             // others remain fixed.  This allows "123456" => 12:34:56, but
1927             // "12345" => 1:23:45.  Likewise, for the pattern "yyyyMMdd" we
1928             // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
1929             if (abutPat >= 0) {
1930                 // If we are at the start of a run of abutting fields, then
1931                 // shorten this field in each pass.  If we can't shorten
1932                 // this field any more, then the parse of this set of
1933                 // abutting numeric fields has failed.
1934                 if (fieldPat == abutPat) {
1935                     count -= abutPass++;
1936                     if (count == 0) {
1937                         status = U_PARSE_ERROR;
1938                         goto ExitParse;
1939                     }
1940                 }
1941
1942                 pos = subParse(text, pos, ch, count,
1943                                TRUE, FALSE, ambiguousYear, saveHebrewMonth, *workCal, i);
1944
1945                 // If the parse fails anywhere in the run, back up to the
1946                 // start of the run and retry.
1947                 if (pos < 0) {
1948                     i = abutPat - 1;
1949                     pos = abutStart;
1950                     continue;
1951                 }
1952             }
1953
1954             // Handle non-numeric fields and non-abutting numeric
1955             // fields.
1956             else {
1957                 int32_t s = subParse(text, pos, ch, count,
1958                                FALSE, TRUE, ambiguousYear, saveHebrewMonth, *workCal, i);
1959
1960                 if (s == -pos-1) {
1961                     // era not present, in special cases allow this to continue
1962                     s++;
1963
1964                     if (i+1 < fPattern.length()) {
1965                         // move to next pattern character
1966                         UChar ch = fPattern.charAt(i+1);
1967
1968                         // check for whitespace
1969                         if (uprv_isRuleWhiteSpace(ch)) {
1970                             i++;
1971                             // Advance over run in pattern
1972                             while ((i+1)<fPattern.length() &&
1973                                    uprv_isRuleWhiteSpace(fPattern.charAt(i+1))) {
1974                                 ++i;
1975                             }
1976                         }
1977                     }
1978                 }
1979                 else if (s <= 0) {
1980                     status = U_PARSE_ERROR;
1981                     goto ExitParse;
1982                 }
1983                 pos = s;
1984             }
1985         }
1986
1987         // Handle literal pattern characters.  These are any
1988         // quoted characters and non-alphabetic unquoted
1989         // characters.
1990         else {
1991
1992             abutPat = -1; // End of any abutting fields
1993
1994             if (! matchLiterals(fPattern, i, text, pos, lenient)) {
1995                 status = U_PARSE_ERROR;
1996                 goto ExitParse;
1997             }
1998         }
1999     }
2000
2001     // At this point the fields of Calendar have been set.  Calendar
2002     // will fill in default values for missing fields when the time
2003     // is computed.
2004
2005     parsePos.setIndex(pos);
2006
2007     // This part is a problem:  When we call parsedDate.after, we compute the time.
2008     // Take the date April 3 2004 at 2:30 am.  When this is first set up, the year
2009     // will be wrong if we're parsing a 2-digit year pattern.  It will be 1904.
2010     // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day.  2:30 am
2011     // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
2012     // on that day.  It is therefore parsed out to fields as 3:30 am.  Then we
2013     // add 100 years, and get April 3 2004 at 3:30 am.  Note that April 3 2004 is
2014     // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
2015     /*
2016         UDate parsedDate = calendar.getTime();
2017         if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
2018             calendar.add(Calendar.YEAR, 100);
2019             parsedDate = calendar.getTime();
2020         }
2021     */
2022     // Because of the above condition, save off the fields in case we need to readjust.
2023     // The procedure we use here is not particularly efficient, but there is no other
2024     // way to do this given the API restrictions present in Calendar.  We minimize
2025     // inefficiency by only performing this computation when it might apply, that is,
2026     // when the two-digit year is equal to the start year, and thus might fall at the
2027     // front or the back of the default century.  This only works because we adjust
2028     // the year correctly to start with in other cases -- see subParse().
2029     if (ambiguousYear[0] || tztype != TZTYPE_UNK) // If this is true then the two-digit year == the default start year
2030     {
2031         // We need a copy of the fields, and we need to avoid triggering a call to
2032         // complete(), which will recalculate the fields.  Since we can't access
2033         // the fields[] array in Calendar, we clone the entire object.  This will
2034         // stop working if Calendar.clone() is ever rewritten to call complete().
2035         Calendar *copy;
2036         if (ambiguousYear[0]) {
2037             copy = cal.clone();
2038             // Check for failed cloning.
2039             if (copy == NULL) {
2040                 status = U_MEMORY_ALLOCATION_ERROR;
2041                 goto ExitParse;
2042             }
2043             UDate parsedDate = copy->getTime(status);
2044             // {sfb} check internalGetDefaultCenturyStart
2045             if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) {
2046                 // We can't use add here because that does a complete() first.
2047                 cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
2048             }
2049             delete copy;
2050         }
2051
2052         if (tztype != TZTYPE_UNK) {
2053             copy = cal.clone();
2054             // Check for failed cloning.
2055             if (copy == NULL) {
2056                 status = U_MEMORY_ALLOCATION_ERROR;
2057                 goto ExitParse;
2058             }
2059             const TimeZone & tz = cal.getTimeZone();
2060             BasicTimeZone *btz = NULL;
2061
2062             if (dynamic_cast<const OlsonTimeZone *>(&tz) != NULL
2063                 || dynamic_cast<const SimpleTimeZone *>(&tz) != NULL
2064                 || dynamic_cast<const RuleBasedTimeZone *>(&tz) != NULL
2065                 || dynamic_cast<const VTimeZone *>(&tz) != NULL) {
2066                 btz = (BasicTimeZone*)&tz;
2067             }
2068
2069             // Get local millis
2070             copy->set(UCAL_ZONE_OFFSET, 0);
2071             copy->set(UCAL_DST_OFFSET, 0);
2072             UDate localMillis = copy->getTime(status);
2073
2074             // Make sure parsed time zone type (Standard or Daylight)
2075             // matches the rule used by the parsed time zone.
2076             int32_t raw, dst;
2077             if (btz != NULL) {
2078                 if (tztype == TZTYPE_STD) {
2079                     btz->getOffsetFromLocal(localMillis,
2080                         BasicTimeZone::kStandard, BasicTimeZone::kStandard, raw, dst, status);
2081                 } else {
2082                     btz->getOffsetFromLocal(localMillis,
2083                         BasicTimeZone::kDaylight, BasicTimeZone::kDaylight, raw, dst, status);
2084                 }
2085             } else {
2086                 // No good way to resolve ambiguous time at transition,
2087                 // but following code work in most case.
2088                 tz.getOffset(localMillis, TRUE, raw, dst, status);
2089             }
2090
2091             // Now, compare the results with parsed type, either standard or daylight saving time
2092             int32_t resolvedSavings = dst;
2093             if (tztype == TZTYPE_STD) {
2094                 if (dst != 0) {
2095                     // Override DST_OFFSET = 0 in the result calendar
2096                     resolvedSavings = 0;
2097                 }
2098             } else { // tztype == TZTYPE_DST
2099                 if (dst == 0) {
2100                     if (btz != NULL) {
2101                         UDate time = localMillis + raw;
2102                         // We use the nearest daylight saving time rule.
2103                         TimeZoneTransition beforeTrs, afterTrs;
2104                         UDate beforeT = time, afterT = time;
2105                         int32_t beforeSav = 0, afterSav = 0;
2106                         UBool beforeTrsAvail, afterTrsAvail;
2107
2108                         // Search for DST rule before or on the time
2109                         while (TRUE) {
2110                             beforeTrsAvail = btz->getPreviousTransition(beforeT, TRUE, beforeTrs);
2111                             if (!beforeTrsAvail) {
2112                                 break;
2113                             }
2114                             beforeT = beforeTrs.getTime() - 1;
2115                             beforeSav = beforeTrs.getFrom()->getDSTSavings();
2116                             if (beforeSav != 0) {
2117                                 break;
2118                             }
2119                         }
2120
2121                         // Search for DST rule after the time
2122                         while (TRUE) {
2123                             afterTrsAvail = btz->getNextTransition(afterT, FALSE, afterTrs);
2124                             if (!afterTrsAvail) {
2125                                 break;
2126                             }
2127                             afterT = afterTrs.getTime();
2128                             afterSav = afterTrs.getTo()->getDSTSavings();
2129                             if (afterSav != 0) {
2130                                 break;
2131                             }
2132                         }
2133
2134                         if (beforeTrsAvail && afterTrsAvail) {
2135                             if (time - beforeT > afterT - time) {
2136                                 resolvedSavings = afterSav;
2137                             } else {
2138                                 resolvedSavings = beforeSav;
2139                             }
2140                         } else if (beforeTrsAvail && beforeSav != 0) {
2141                             resolvedSavings = beforeSav;
2142                         } else if (afterTrsAvail && afterSav != 0) {
2143                             resolvedSavings = afterSav;
2144                         } else {
2145                             resolvedSavings = btz->getDSTSavings();
2146                         }
2147                     } else {
2148                         resolvedSavings = tz.getDSTSavings();
2149                     }
2150                     if (resolvedSavings == 0) {
2151                         // final fallback
2152                         resolvedSavings = U_MILLIS_PER_HOUR;
2153                     }
2154                 }
2155             }
2156             cal.set(UCAL_ZONE_OFFSET, raw);
2157             cal.set(UCAL_DST_OFFSET, resolvedSavings);
2158             delete copy;
2159         }
2160     }
2161 ExitParse:
2162     // Set the parsed result if local calendar is used
2163     // instead of the input calendar
2164     if (U_SUCCESS(status) && workCal != &cal) {
2165         cal.setTimeZone(workCal->getTimeZone());
2166         cal.setTime(workCal->getTime(status), status);
2167     }
2168
2169     // Restore the original time zone if required
2170     if (backupTZ != NULL) {
2171         fCalendar->adoptTimeZone(backupTZ);
2172     }
2173
2174     // If any Calendar calls failed, we pretend that we
2175     // couldn't parse the string, when in reality this isn't quite accurate--
2176     // we did parse it; the Calendar calls just failed.
2177     if (U_FAILURE(status)) {
2178         parsePos.setErrorIndex(pos);
2179         parsePos.setIndex(start);
2180     }
2181 }
2182
2183 UDate
2184 SimpleDateFormat::parse( const UnicodeString& text,
2185                          ParsePosition& pos) const {
2186     // redefined here because the other parse() function hides this function's
2187     // cunterpart on DateFormat
2188     return DateFormat::parse(text, pos);
2189 }
2190
2191 UDate
2192 SimpleDateFormat::parse(const UnicodeString& text, UErrorCode& status) const
2193 {
2194     // redefined here because the other parse() function hides this function's
2195     // counterpart on DateFormat
2196     return DateFormat::parse(text, status);
2197 }
2198 //----------------------------------------------------------------------
2199
2200 int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
2201                               int32_t start,
2202                               UCalendarDateFields field,
2203                               const UnicodeString* data,
2204                               int32_t dataCount,
2205                               Calendar& cal) const
2206 {
2207     int32_t i = 0;
2208     int32_t count = dataCount;
2209
2210     // There may be multiple strings in the data[] array which begin with
2211     // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2212     // We keep track of the longest match, and return that.  Note that this
2213     // unfortunately requires us to test all array elements.
2214     int32_t bestMatchLength = 0, bestMatch = -1;
2215
2216     // {sfb} kludge to support case-insensitive comparison
2217     // {markus 2002oct11} do not just use caseCompareBetween because we do not know
2218     // the length of the match after case folding
2219     // {alan 20040607} don't case change the whole string, since the length
2220     // can change
2221     // TODO we need a case-insensitive startsWith function
2222     UnicodeString lcase, lcaseText;
2223     text.extract(start, INT32_MAX, lcaseText);
2224     lcaseText.foldCase();
2225
2226     for (; i < count; ++i)
2227     {
2228         // Always compare if we have no match yet; otherwise only compare
2229         // against potentially better matches (longer strings).
2230
2231         lcase.fastCopyFrom(data[i]).foldCase();
2232         int32_t length = lcase.length();
2233
2234         if (length > bestMatchLength &&
2235             lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
2236         {
2237             bestMatch = i;
2238             bestMatchLength = length;
2239         }
2240     }
2241     if (bestMatch >= 0)
2242     {
2243         cal.set(field, bestMatch * 3);
2244
2245         // Once we have a match, we have to determine the length of the
2246         // original source string.  This will usually be == the length of
2247         // the case folded string, but it may differ (e.g. sharp s).
2248         lcase.fastCopyFrom(data[bestMatch]).foldCase();
2249
2250         // Most of the time, the length will be the same as the length
2251         // of the string from the locale data.  Sometimes it will be
2252         // different, in which case we will have to figure it out by
2253         // adding a character at a time, until we have a match.  We do
2254         // this all in one loop, where we try 'len' first (at index
2255         // i==0).
2256         int32_t len = data[bestMatch].length(); // 99+% of the time
2257         int32_t n = text.length() - start;
2258         for (i=0; i<=n; ++i) {
2259             int32_t j=i;
2260             if (i == 0) {
2261                 j = len;
2262             } else if (i == len) {
2263                 continue; // already tried this when i was 0
2264             }
2265             text.extract(start, j, lcaseText);
2266             lcaseText.foldCase();
2267             if (lcase == lcaseText) {
2268                 return start + j;
2269             }
2270         }
2271     }
2272
2273     return -start;
2274 }
2275
2276 //----------------------------------------------------------------------
2277 UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
2278                                       int32_t &patternOffset,
2279                                       const UnicodeString &text,
2280                                       int32_t &textOffset,
2281                                       UBool lenient)
2282 {
2283     UBool inQuote = FALSE;
2284     UnicodeString literal;
2285     int32_t i = patternOffset;
2286
2287     // scan pattern looking for contiguous literal characters
2288     for ( ; i < pattern.length(); i += 1) {
2289         UChar ch = pattern.charAt(i);
2290
2291         if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // unquoted [A-Za-z]
2292             break;
2293         }
2294
2295         if (ch == QUOTE) {
2296             // Match a quote literal ('') inside OR outside of quotes
2297             if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
2298                 i += 1;
2299             } else {
2300                 inQuote = !inQuote;
2301                 continue;
2302             }
2303         }
2304
2305         literal += ch;
2306     }
2307
2308     // at this point, literal contains the literal text
2309     // and i is the index of the next non-literal pattern character.
2310     int32_t p;
2311     int32_t t = textOffset;
2312
2313     if (lenient) {
2314         // trim leading, trailing whitespace from
2315         // the literal text
2316         literal.trim();
2317
2318         // ignore any leading whitespace in the text
2319         while (t < text.length() && u_isWhitespace(text.charAt(t))) {
2320             t += 1;
2321         }
2322     }
2323
2324     for (p = 0; p < literal.length() && t < text.length(); p += 1, t += 1) {
2325         UBool needWhitespace = FALSE;
2326
2327         while (p < literal.length() && uprv_isRuleWhiteSpace(literal.charAt(p))) {
2328             needWhitespace = TRUE;
2329             p += 1;
2330         }
2331
2332         if (needWhitespace) {
2333             int32_t tStart = t;
2334
2335             while (t < text.length()) {
2336                 UChar tch = text.charAt(t);
2337
2338                 if (!u_isUWhiteSpace(tch) && !uprv_isRuleWhiteSpace(tch)) {
2339                     break;
2340                 }
2341
2342                 t += 1;
2343             }
2344
2345             // TODO: should we require internal spaces
2346             // in lenient mode? (There won't be any
2347             // leading or trailing spaces)
2348             if (!lenient && t == tStart) {
2349                 // didn't find matching whitespace:
2350                 // an error in strict mode
2351                 return FALSE;
2352             }
2353
2354             // In strict mode, this run of whitespace
2355             // may have been at the end.
2356             if (p >= literal.length()) {
2357                 break;
2358             }
2359         }
2360
2361         if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
2362             // Ran out of text, or found a non-matching character:
2363             // OK in lenient mode, an error in strict mode.
2364             if (lenient) {
2365                 break;
2366             }
2367
2368             return FALSE;
2369         }
2370     }
2371
2372     // At this point if we're in strict mode we have a complete match.
2373     // If we're in lenient mode we may have a partial match, or no
2374     // match at all.
2375     if (p <= 0) {
2376         // no match. Pretend it matched a run of whitespace
2377         // and ignorables in the text.
2378         const  UnicodeSet *ignorables = NULL;
2379         UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), pattern.charAt(i));
2380
2381         if (patternCharPtr != NULL) {
2382             UDateFormatField patternCharIndex = (UDateFormatField) (patternCharPtr - DateFormatSymbols::getPatternUChars());
2383
2384             ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
2385         }
2386
2387         for (t = textOffset; t < text.length(); t += 1) {
2388             UChar ch = text.charAt(t);
2389
2390             if (ignorables == NULL || !ignorables->contains(ch)) {
2391                 break;
2392             }
2393         }
2394     }
2395
2396     // if we get here, we've got a complete match.
2397     patternOffset = i - 1;
2398     textOffset = t;
2399
2400     return TRUE;
2401 }
2402
2403 //----------------------------------------------------------------------
2404
2405 int32_t SimpleDateFormat::matchString(const UnicodeString& text,
2406                               int32_t start,
2407                               UCalendarDateFields field,
2408                               const UnicodeString* data,
2409                               int32_t dataCount,
2410                               Calendar& cal) const
2411 {
2412     int32_t i = 0;
2413     int32_t count = dataCount;
2414
2415     if (field == UCAL_DAY_OF_WEEK) i = 1;
2416
2417     // There may be multiple strings in the data[] array which begin with
2418     // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
2419     // We keep track of the longest match, and return that.  Note that this
2420     // unfortunately requires us to test all array elements.
2421     int32_t bestMatchLength = 0, bestMatch = -1;
2422
2423     // {sfb} kludge to support case-insensitive comparison
2424     // {markus 2002oct11} do not just use caseCompareBetween because we do not know
2425     // the length of the match after case folding
2426     // {alan 20040607} don't case change the whole string, since the length
2427     // can change
2428     // TODO we need a case-insensitive startsWith function
2429     UnicodeString lcase, lcaseText;
2430     text.extract(start, INT32_MAX, lcaseText);
2431     lcaseText.foldCase();
2432
2433     for (; i < count; ++i)
2434     {
2435         // Always compare if we have no match yet; otherwise only compare
2436         // against potentially better matches (longer strings).
2437
2438         lcase.fastCopyFrom(data[i]).foldCase();
2439         int32_t length = lcase.length();
2440
2441         if (length > bestMatchLength &&
2442             lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
2443         {
2444             bestMatch = i;
2445             bestMatchLength = length;
2446         }
2447     }
2448     if (bestMatch >= 0)
2449     {
2450         // Adjustment for Hebrew Calendar month Adar II
2451         if (!strcmp(cal.getType(),"hebrew") && field==UCAL_MONTH && bestMatch==13) {
2452             cal.set(field,6);
2453         }
2454         else {
2455             cal.set(field, bestMatch);
2456         }
2457
2458         // Once we have a match, we have to determine the length of the
2459         // original source string.  This will usually be == the length of
2460         // the case folded string, but it may differ (e.g. sharp s).
2461         lcase.fastCopyFrom(data[bestMatch]).foldCase();
2462
2463         // Most of the time, the length will be the same as the length
2464         // of the string from the locale data.  Sometimes it will be
2465         // different, in which case we will have to figure it out by
2466         // adding a character at a time, until we have a match.  We do
2467         // this all in one loop, where we try 'len' first (at index
2468         // i==0).
2469         int32_t len = data[bestMatch].length(); // 99+% of the time
2470         int32_t n = text.length() - start;
2471         for (i=0; i<=n; ++i) {
2472             int32_t j=i;
2473             if (i == 0) {
2474                 j = len;
2475             } else if (i == len) {
2476                 continue; // already tried this when i was 0
2477             }
2478             text.extract(start, j, lcaseText);
2479             lcaseText.foldCase();
2480             if (lcase == lcaseText) {
2481                 return start + j;
2482             }
2483         }
2484     }
2485
2486     return -start;
2487 }
2488
2489 //----------------------------------------------------------------------
2490
2491 void
2492 SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
2493 {
2494     parseAmbiguousDatesAsAfter(d, status);
2495 }
2496
2497 /**
2498  * Private member function that converts the parsed date strings into
2499  * timeFields. Returns -start (for ParsePosition) if failed.
2500  * @param text the time text to be parsed.
2501  * @param start where to start parsing.
2502  * @param ch the pattern character for the date field text to be parsed.
2503  * @param count the count of a pattern character.
2504  * @return the new start position if matching succeeded; a negative number
2505  * indicating matching failure, otherwise.
2506  */
2507 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
2508                            UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal,
2509                            int32_t patLoc) const
2510 {
2511     Formattable number;
2512     int32_t value = 0;
2513     int32_t i;
2514     int32_t ps = 0;
2515     ParsePosition pos(0);
2516     UDateFormatField patternCharIndex;
2517     NumberFormat *currentNumberFormat;
2518     UnicodeString temp;
2519     UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
2520     UBool lenient = isLenient();
2521     UBool gotNumber = FALSE;
2522
2523 #if defined (U_DEBUG_CAL)
2524     //fprintf(stderr, "%s:%d - [%c]  st=%d \n", __FILE__, __LINE__, (char) ch, start);
2525 #endif
2526
2527     if (patternCharPtr == NULL) {
2528         return -start;
2529     }
2530
2531     patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
2532     currentNumberFormat = getNumberFormatByIndex(patternCharIndex);
2533     UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
2534
2535     // If there are any spaces here, skip over them.  If we hit the end
2536     // of the string, then fail.
2537     for (;;) {
2538         if (start >= text.length()) {
2539             return -start;
2540         }
2541
2542         UChar32 c = text.char32At(start);
2543         if (!u_isUWhiteSpace(c) /*||*/ && !uprv_isRuleWhiteSpace(c)) {
2544             break;
2545         }
2546
2547         start += UTF_CHAR_LENGTH(c);
2548     }
2549
2550     pos.setIndex(start);
2551
2552     // We handle a few special cases here where we need to parse
2553     // a number value.  We handle further, more generic cases below.  We need
2554     // to handle some of them here because some fields require extra processing on
2555     // the parsed value.
2556     if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD ||
2557         patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD ||
2558         patternCharIndex == UDAT_HOUR1_FIELD ||
2559         patternCharIndex == UDAT_HOUR0_FIELD ||
2560         patternCharIndex == UDAT_DOW_LOCAL_FIELD ||
2561         patternCharIndex == UDAT_STANDALONE_DAY_FIELD ||
2562         patternCharIndex == UDAT_MONTH_FIELD ||
2563         patternCharIndex == UDAT_STANDALONE_MONTH_FIELD ||
2564         patternCharIndex == UDAT_QUARTER_FIELD ||
2565         patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD ||
2566         patternCharIndex == UDAT_YEAR_FIELD ||
2567         patternCharIndex == UDAT_YEAR_WOY_FIELD ||
2568         patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD)
2569     {
2570         int32_t parseStart = pos.getIndex();
2571         // It would be good to unify this with the obeyCount logic below,
2572         // but that's going to be difficult.
2573         const UnicodeString* src;
2574
2575         if (obeyCount) {
2576             if ((start+count) > text.length()) {
2577                 return -start;
2578             }
2579
2580             text.extractBetween(0, start + count, temp);
2581             src = &temp;
2582         } else {
2583             src = &text;
2584         }
2585
2586         parseInt(*src, number, pos, allowNegative,currentNumberFormat);
2587
2588         int32_t txtLoc = pos.getIndex();
2589
2590         if (txtLoc > parseStart) {
2591             value = number.getLong();
2592             gotNumber = TRUE;
2593
2594             // suffix processing
2595             if (value < 0 ) {
2596                 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, TRUE);
2597                 if (txtLoc != pos.getIndex()) {
2598                     value *= -1;
2599                 }
2600             }
2601             else {
2602                 txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, FALSE);
2603             }
2604
2605             // Check the range of the value
2606             int32_t bias = gFieldRangeBias[patternCharIndex];
2607
2608             if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
2609                 return -start;
2610             }
2611
2612             pos.setIndex(txtLoc);
2613         }
2614     }
2615
2616     // Make sure that we got a number if
2617     // we want one, and didn't get one
2618     // if we don't want one.
2619     switch (patternCharIndex) {
2620         case UDAT_HOUR_OF_DAY1_FIELD:
2621         case UDAT_HOUR_OF_DAY0_FIELD:
2622         case UDAT_HOUR1_FIELD:
2623         case UDAT_HOUR0_FIELD:
2624             // special range check for hours:
2625             if (value < 0 || value > 24) {
2626                 return -start;
2627             }
2628
2629             // fall through to gotNumber check
2630
2631         case UDAT_YEAR_FIELD:
2632         case UDAT_YEAR_WOY_FIELD:
2633         case UDAT_FRACTIONAL_SECOND_FIELD:
2634             // these must be a number
2635             if (! gotNumber) {
2636                 return -start;
2637             }
2638
2639             break;
2640
2641         case UDAT_DOW_LOCAL_FIELD:
2642         case UDAT_STANDALONE_DAY_FIELD:
2643         case UDAT_MONTH_FIELD:
2644         case UDAT_STANDALONE_MONTH_FIELD:
2645         case UDAT_QUARTER_FIELD:
2646         case UDAT_STANDALONE_QUARTER_FIELD:
2647             // in strict mode, these can only
2648             // be a number if count <= 2
2649             if (!lenient && gotNumber && count > 2) {
2650                 // We have a string pattern in strict mode
2651                 // but the input parsed as a number. Ignore
2652                 // the fact that the input parsed as a number
2653                 // and try to match it as a string. (Some
2654                 // locales have numbers for the month names.)
2655                 gotNumber = FALSE;
2656                 pos.setIndex(start);
2657             }
2658
2659             break;
2660
2661         default:
2662             // we check the rest of the fields below.
2663             break;
2664     }
2665
2666     switch (patternCharIndex) {
2667     case UDAT_ERA_FIELD:
2668         if (count == 5) {
2669             ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, cal);
2670         } else if (count == 4) {
2671             ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, cal);
2672         } else {
2673             ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, cal);
2674         }
2675
2676         // check return position, if it equals -start, then matchString error
2677         // special case the return code so we don't necessarily fail out until we
2678         // verify no year information also
2679         if (ps == -start)
2680             ps--;
2681
2682         return ps;
2683
2684     case UDAT_YEAR_FIELD:
2685         // If there are 3 or more YEAR pattern characters, this indicates
2686         // that the year value is to be treated literally, without any
2687         // two-digit year adjustments (e.g., from "01" to 2001).  Otherwise
2688         // we made adjustments to place the 2-digit year in the proper
2689         // century, for parsed strings from "00" to "99".  Any other string
2690         // is treated literally:  "2250", "-1", "1", "002".
2691         if ((pos.getIndex() - start) == 2
2692             && u_isdigit(text.charAt(start))
2693             && u_isdigit(text.charAt(start+1)))
2694         {
2695             // Assume for example that the defaultCenturyStart is 6/18/1903.
2696             // This means that two-digit years will be forced into the range
2697             // 6/18/1903 to 6/17/2003.  As a result, years 00, 01, and 02
2698             // correspond to 2000, 2001, and 2002.  Years 04, 05, etc. correspond
2699             // to 1904, 1905, etc.  If the year is 03, then it is 2003 if the
2700             // other fields specify a date before 6/18, or 1903 if they specify a
2701             // date afterwards.  As a result, 03 is an ambiguous year.  All other
2702             // two-digit years are unambiguous.
2703           if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
2704               int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2705               ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2706               value += (fDefaultCenturyStartYear/100)*100 +
2707                 (value < ambiguousTwoDigitYear ? 100 : 0);
2708             }
2709         }
2710         cal.set(UCAL_YEAR, value);
2711
2712         // Delayed checking for adjustment of Hebrew month numbers in non-leap years.
2713         if (saveHebrewMonth >= 0) {
2714             HebrewCalendar *hc = (HebrewCalendar*)&cal;
2715             if (!hc->isLeapYear(value) && saveHebrewMonth >= 6) {
2716                cal.set(UCAL_MONTH,saveHebrewMonth);
2717             } else {
2718                cal.set(UCAL_MONTH,saveHebrewMonth-1);
2719             }
2720             saveHebrewMonth = -1;
2721         }
2722         return pos.getIndex();
2723
2724     case UDAT_YEAR_WOY_FIELD:
2725         // Comment is the same as for UDAT_Year_FIELDs - look above
2726         if ((pos.getIndex() - start) == 2
2727             && u_isdigit(text.charAt(start))
2728             && u_isdigit(text.charAt(start+1))
2729             && fHaveDefaultCentury )
2730         {
2731             int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2732             ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2733             value += (fDefaultCenturyStartYear/100)*100 +
2734                 (value < ambiguousTwoDigitYear ? 100 : 0);
2735         }
2736         cal.set(UCAL_YEAR_WOY, value);
2737         return pos.getIndex();
2738
2739     case UDAT_MONTH_FIELD:
2740         if (gotNumber) // i.e., M or MM.
2741         {
2742             // When parsing month numbers from the Hebrew Calendar, we might need to adjust the month depending on whether
2743             // or not it was a leap year.  We may or may not yet know what year it is, so might have to delay checking until
2744             // the year is parsed.
2745             if (!strcmp(cal.getType(),"hebrew")) {
2746                 HebrewCalendar *hc = (HebrewCalendar*)&cal;
2747                 if (cal.isSet(UCAL_YEAR)) {
2748                    UErrorCode status = U_ZERO_ERROR;
2749                    if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6) {
2750                        cal.set(UCAL_MONTH, value);
2751                    } else {
2752                        cal.set(UCAL_MONTH, value - 1);
2753                    }
2754                 } else {
2755                     saveHebrewMonth = value;
2756                 }
2757             } else {
2758                 // Don't want to parse the month if it is a string
2759                 // while pattern uses numeric style: M or MM.
2760                 // [We computed 'value' above.]
2761                 cal.set(UCAL_MONTH, value - 1);
2762             }
2763             return pos.getIndex();
2764         } else {
2765             // count >= 3 // i.e., MMM or MMMM
2766             // Want to be able to parse both short and long forms.
2767             // Try count == 4 first:
2768             int32_t newStart = 0;
2769
2770             if ((newStart = matchString(text, start, UCAL_MONTH,
2771                                       fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0)
2772                 return newStart;
2773             else // count == 4 failed, now try count == 3
2774                 return matchString(text, start, UCAL_MONTH,
2775                                    fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal);
2776         }
2777
2778     case UDAT_STANDALONE_MONTH_FIELD:
2779         if (gotNumber) // i.e., L or LL.
2780         {
2781             // Don't want to parse the month if it is a string
2782             // while pattern uses numeric style: M or MM.
2783             // [We computed 'value' above.]
2784             cal.set(UCAL_MONTH, value - 1);
2785             return pos.getIndex();
2786         } else {
2787             // count >= 3 // i.e., LLL or LLLL
2788             // Want to be able to parse both short and long forms.
2789             // Try count == 4 first:
2790             int32_t newStart = 0;
2791
2792             if ((newStart = matchString(text, start, UCAL_MONTH,
2793                                       fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0)
2794                 return newStart;
2795             else // count == 4 failed, now try count == 3
2796                 return matchString(text, start, UCAL_MONTH,
2797                                    fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal);
2798         }
2799
2800     case UDAT_HOUR_OF_DAY1_FIELD:
2801         // [We computed 'value' above.]
2802         if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
2803             value = 0;
2804
2805         // fall through to set field
2806
2807     case UDAT_HOUR_OF_DAY0_FIELD:
2808         cal.set(UCAL_HOUR_OF_DAY, value);
2809         return pos.getIndex();
2810
2811     case UDAT_FRACTIONAL_SECOND_FIELD:
2812         // Fractional seconds left-justify
2813         i = pos.getIndex() - start;
2814         if (i < 3) {
2815             while (i < 3) {
2816                 value *= 10;
2817                 i++;
2818             }
2819         } else {
2820             int32_t a = 1;
2821             while (i > 3) {
2822                 a *= 10;
2823                 i--;
2824             }
2825             value = (value + (a>>1)) / a;
2826         }
2827         cal.set(UCAL_MILLISECOND, value);
2828         return pos.getIndex();
2829
2830     case UDAT_DOW_LOCAL_FIELD:
2831         if (gotNumber) // i.e., e or ee
2832         {
2833             // [We computed 'value' above.]
2834             cal.set(UCAL_DOW_LOCAL, value);
2835             return pos.getIndex();
2836         }
2837         // else for eee-eeeee fall through to handling of EEE-EEEEE
2838         // fall through, do not break here
2839     case UDAT_DAY_OF_WEEK_FIELD:
2840         {
2841             // Want to be able to parse both short and long forms.
2842             // Try count == 4 (EEEE) first:
2843             int32_t newStart = 0;
2844             if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2845                                       fSymbols->fWeekdays, fSymbols->fWeekdaysCount, cal)) > 0)
2846                 return newStart;
2847             // EEEE failed, now try EEE
2848             else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2849                                    fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0)
2850                 return newStart;
2851             // EEE failed, now try EEEEE
2852             else
2853                 return matchString(text, start, UCAL_DAY_OF_WEEK,
2854                                    fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal);
2855         }
2856
2857     case UDAT_STANDALONE_DAY_FIELD:
2858         {
2859             if (gotNumber) // c or cc
2860             {
2861                 // [We computed 'value' above.]
2862                 cal.set(UCAL_DOW_LOCAL, value);
2863                 return pos.getIndex();
2864             }
2865             // Want to be able to parse both short and long forms.
2866             // Try count == 4 (cccc) first:
2867             int32_t newStart = 0;
2868             if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2869                                       fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0)
2870                 return newStart;
2871             else // cccc failed, now try ccc
2872                 return matchString(text, start, UCAL_DAY_OF_WEEK,
2873                                    fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal);
2874         }
2875
2876     case UDAT_AM_PM_FIELD:
2877         return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal);
2878
2879     case UDAT_HOUR1_FIELD:
2880         // [We computed 'value' above.]
2881         if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
2882             value = 0;
2883
2884         // fall through to set field
2885
2886     case UDAT_HOUR0_FIELD:
2887         cal.set(UCAL_HOUR, value);
2888         return pos.getIndex();
2889
2890     case UDAT_QUARTER_FIELD:
2891         if (gotNumber) // i.e., Q or QQ.
2892         {
2893             // Don't want to parse the month if it is a string
2894             // while pattern uses numeric style: Q or QQ.
2895             // [We computed 'value' above.]
2896             cal.set(UCAL_MONTH, (value - 1) * 3);
2897             return pos.getIndex();
2898         } else {
2899             // count >= 3 // i.e., QQQ or QQQQ
2900             // Want to be able to parse both short and long forms.
2901             // Try count == 4 first:
2902             int32_t newStart = 0;
2903
2904             if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2905                                       fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
2906                 return newStart;
2907             else // count == 4 failed, now try count == 3
2908                 return matchQuarterString(text, start, UCAL_MONTH,
2909                                    fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal);
2910         }
2911
2912     case UDAT_STANDALONE_QUARTER_FIELD:
2913         if (gotNumber) // i.e., q or qq.
2914         {
2915             // Don't want to parse the month if it is a string
2916             // while pattern uses numeric style: q or q.
2917             // [We computed 'value' above.]
2918             cal.set(UCAL_MONTH, (value - 1) * 3);
2919             return pos.getIndex();
2920         } else {
2921             // count >= 3 // i.e., qqq or qqqq
2922             // Want to be able to parse both short and long forms.
2923             // Try count == 4 first:
2924             int32_t newStart = 0;
2925
2926             if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2927                                       fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
2928                 return newStart;
2929             else // count == 4 failed, now try count == 3
2930                 return matchQuarterString(text, start, UCAL_MONTH,
2931                                    fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal);
2932         }
2933
2934     case UDAT_TIMEZONE_FIELD:
2935     case UDAT_TIMEZONE_RFC_FIELD:
2936     case UDAT_TIMEZONE_GENERIC_FIELD:
2937     case UDAT_TIMEZONE_SPECIAL_FIELD:
2938         {
2939             int32_t offset = 0;
2940             UBool parsed = FALSE;
2941
2942             // Step 1
2943             // Check if this is a long GMT offset string (either localized or default)
2944             offset = parseGMT(text, pos);
2945             if (pos.getIndex() - start > 0) {
2946                 parsed = TRUE;
2947             }
2948             if (!parsed) {
2949                 // Step 2
2950                 // Check if this is an RFC822 time zone offset.
2951                 // ICU supports the standard RFC822 format [+|-]HHmm
2952                 // and its extended form [+|-]HHmmSS.
2953                 do {
2954                     int32_t sign = 0;
2955                     UChar signChar = text.charAt(start);
2956                     if (signChar == (UChar)0x002B /* '+' */) {
2957                         sign = 1;
2958                     } else if (signChar == (UChar)0x002D /* '-' */) {
2959                         sign = -1;
2960                     } else {
2961                         // Not an RFC822 offset string
2962                         break;
2963                     }
2964
2965                     // Parse digits
2966                     int32_t orgPos = start + 1;
2967                     pos.setIndex(orgPos);
2968                     parseInt(text, number, 6, pos, FALSE,currentNumberFormat);
2969                     int32_t numLen = pos.getIndex() - orgPos;
2970                     if (numLen <= 0) {
2971                         break;
2972                     }
2973
2974                     // Followings are possible format (excluding sign char)
2975                     // HHmmSS
2976                     // HmmSS
2977                     // HHmm
2978                     // Hmm
2979                     // HH
2980                     // H
2981                     int32_t val = number.getLong();
2982                     int32_t hour = 0, min = 0, sec = 0;
2983                     switch(numLen) {
2984                     case 1: // H
2985                     case 2: // HH
2986                         hour = val;
2987                         break;
2988                     case 3: // Hmm
2989                     case 4: // HHmm
2990                         hour = val / 100;
2991                         min = val % 100;
2992                         break;
2993                     case 5: // Hmmss
2994                     case 6: // HHmmss
2995                         hour = val / 10000;
2996                         min = (val % 10000) / 100;
2997                         sec = val % 100;
2998                         break;
2999                     }
3000                     if (hour > 23 || min > 59 || sec > 59) {
3001                         // Invalid value range
3002                         break;
3003                     }
3004                     offset = (((hour * 60) + min) * 60 + sec) * 1000 * sign;
3005                     parsed = TRUE;
3006                 } while (FALSE);
3007
3008                 if (!parsed) {
3009                     // Failed to parse.  Reset the position.
3010                     pos.setIndex(start);
3011                 }
3012             }
3013
3014             if (parsed) {
3015                 // offset was successfully parsed as either a long GMT string or RFC822 zone offset
3016                 // string.  Create normalized zone ID for the offset.
3017
3018                 UnicodeString tzID(gGmt);
3019                 formatRFC822TZ(tzID, offset);
3020                 //TimeZone *customTZ = TimeZone::createTimeZone(tzID);
3021                 TimeZone *customTZ = new SimpleTimeZone(offset, tzID);    // faster than TimeZone::createTimeZone
3022                 cal.adoptTimeZone(customTZ);
3023
3024                 return pos.getIndex();
3025             }
3026
3027             // Step 3
3028             // At this point, check for named time zones by looking through
3029             // the locale data from the DateFormatZoneData strings.
3030             // Want to be able to parse both short and long forms.
3031             // optimize for calendar's current time zone
3032             const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat();
3033             if (zsf) {
3034                 UErrorCode status = U_ZERO_ERROR;
3035                 const ZoneStringInfo *zsinfo = NULL;
3036                 int32_t matchLen;
3037
3038                 switch (patternCharIndex) {
3039                     case UDAT_TIMEZONE_FIELD: // 'z'
3040                         if (count < 4) {
3041                             zsinfo = zsf->findSpecificShort(text, start, matchLen, status);
3042                         } else {
3043                             zsinfo = zsf->findSpecificLong(text, start, matchLen, status);
3044                         }
3045                         break;
3046                     case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
3047                         if (count == 1) {
3048                             zsinfo = zsf->findGenericShort(text, start, matchLen, status);
3049                         } else if (count == 4) {
3050                             zsinfo = zsf->findGenericLong(text, start, matchLen, status);
3051                         }
3052                         break;
3053                     case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
3054                         if (count == 1) {
3055                             zsinfo = zsf->findSpecificShort(text, start, matchLen, status);
3056                         } else if (count == 4) {
3057                             zsinfo = zsf->findGenericLocation(text, start, matchLen, status);
3058                         }
3059                         break;
3060                     default:
3061                         break;
3062                 }
3063
3064                 if (U_SUCCESS(status) && zsinfo != NULL) {
3065                     if (zsinfo->isStandard()) {
3066                         ((SimpleDateFormat*)this)->tztype = TZTYPE_STD;
3067                     } else if (zsinfo->isDaylight()) {
3068                         ((SimpleDateFormat*)this)->tztype = TZTYPE_DST;
3069                     }
3070                     UnicodeString tzid;
3071                     zsinfo->getID(tzid);
3072
3073                     UnicodeString current;
3074                     cal.getTimeZone().getID(current);
3075                     if (tzid != current) {
3076                         TimeZone *tz = TimeZone::createTimeZone(tzid);
3077                         cal.adoptTimeZone(tz);
3078                     }
3079                     return start + matchLen;
3080                 }
3081             }
3082             // Step 4
3083             // Final attempt - is this standalone GMT/UT/UTC?
3084             int32_t gmtLen = 0;
3085             if (text.compare(start, kGmtLen, gGmt) == 0) {
3086                 gmtLen = kGmtLen;
3087             } else if (text.compare(start, kUtcLen, gUtc) == 0) {
3088                 gmtLen = kUtcLen;
3089             } else if (text.compare(start, kUtLen, gUt) == 0) {
3090                 gmtLen = kUtLen;
3091             }
3092             if (gmtLen > 0) {
3093                 TimeZone *tz = TimeZone::createTimeZone(UnicodeString("Etc/GMT"));
3094                 cal.adoptTimeZone(tz);
3095                 return start + gmtLen;
3096             }
3097
3098             // complete failure
3099             return -start;
3100         }
3101
3102     default:
3103         // Handle "generic" fields
3104         int32_t parseStart = pos.getIndex();
3105         const UnicodeString* src;
3106         if (obeyCount) {
3107             if ((start+count) > text.length()) {
3108                 return -start;
3109             }
3110             text.extractBetween(0, start + count, temp);
3111             src = &temp;
3112         } else {
3113             src = &text;
3114         }
3115         parseInt(*src, number, pos, allowNegative,currentNumberFormat);
3116         if (pos.getIndex() != parseStart) {
3117             int32_t value = number.getLong();
3118
3119             // Check the range of the value
3120             int32_t bias = gFieldRangeBias[patternCharIndex];
3121
3122             if (bias < 0 || (value >= cal.getMinimum(field) + bias && value <= cal.getMaximum(field) + bias)) {
3123                 cal.set(field, value);
3124                 return pos.getIndex();
3125             }
3126         }
3127
3128         return -start;
3129     }
3130 }
3131
3132 /**
3133  * Parse an integer using fNumberFormat.  This method is semantically
3134  * const, but actually may modify fNumberFormat.
3135  */
3136 void SimpleDateFormat::parseInt(const UnicodeString& text,
3137                                 Formattable& number,
3138                                 ParsePosition& pos,
3139                                 UBool allowNegative,
3140                                 NumberFormat *fmt) const {
3141     parseInt(text, number, -1, pos, allowNegative,fmt);
3142 }
3143
3144 /**
3145  * Parse an integer using fNumberFormat up to maxDigits.
3146  */
3147 void SimpleDateFormat::parseInt(const UnicodeString& text,
3148                                 Formattable& number,
3149                                 int32_t maxDigits,
3150                                 ParsePosition& pos,
3151                                 UBool allowNegative,
3152                                 NumberFormat *fmt) const {
3153     UnicodeString oldPrefix;
3154     DecimalFormat* df = NULL;
3155     if (!allowNegative && (df = dynamic_cast<DecimalFormat*>(fmt)) != NULL) {
3156         df->getNegativePrefix(oldPrefix);
3157         df->setNegativePrefix(SUPPRESS_NEGATIVE_PREFIX);
3158     }
3159     int32_t oldPos = pos.getIndex();
3160     fmt->parse(text, number, pos);
3161     if (df != NULL) {
3162         df->setNegativePrefix(oldPrefix);
3163     }
3164
3165     if (maxDigits > 0) {
3166         // adjust the result to fit into
3167         // the maxDigits and move the position back
3168         int32_t nDigits = pos.getIndex() - oldPos;
3169         if (nDigits > maxDigits) {
3170             int32_t val = number.getLong();
3171             nDigits -= maxDigits;
3172             while (nDigits > 0) {
3173                 val /= 10;
3174                 nDigits--;
3175             }
3176             pos.setIndex(oldPos + maxDigits);
3177             number.setLong(val);
3178         }
3179     }
3180 }
3181
3182 //----------------------------------------------------------------------
3183
3184 void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
3185                                         UnicodeString& translatedPattern,
3186                                         const UnicodeString& from,
3187                                         const UnicodeString& to,
3188                                         UErrorCode& status)
3189 {
3190   // run through the pattern and convert any pattern symbols from the version
3191   // in "from" to the corresponding character ion "to".  This code takes
3192   // quoted strings into account (it doesn't try to translate them), and it signals
3193   // an error if a particular "pattern character" doesn't appear in "from".
3194   // Depending on the values of "from" and "to" this can convert from generic
3195   // to localized patterns or localized to generic.
3196   if (U_FAILURE(status))
3197     return;
3198
3199   translatedPattern.remove();
3200   UBool inQuote = FALSE;
3201   for (int32_t i = 0; i < originalPattern.length(); ++i) {
3202     UChar c = originalPattern[i];
3203     if (inQuote) {
3204       if (c == QUOTE)
3205     inQuote = FALSE;
3206     }
3207     else {
3208       if (c == QUOTE)
3209     inQuote = TRUE;
3210       else if ((c >= 0x0061 /*'a'*/ && c <= 0x007A) /*'z'*/
3211            || (c >= 0x0041 /*'A'*/ && c <= 0x005A /*'Z'*/)) {
3212     int32_t ci = from.indexOf(c);
3213     if (ci == -1) {
3214       status = U_INVALID_FORMAT_ERROR;
3215       return;
3216     }
3217     c = to[ci];
3218       }
3219     }
3220     translatedPattern += c;
3221   }
3222   if (inQuote) {
3223     status = U_INVALID_FORMAT_ERROR;
3224     return;
3225   }
3226 }
3227
3228 //----------------------------------------------------------------------
3229
3230 UnicodeString&
3231 SimpleDateFormat::toPattern(UnicodeString& result) const
3232 {
3233     result = fPattern;
3234     return result;
3235 }
3236
3237 //----------------------------------------------------------------------
3238
3239 UnicodeString&
3240 SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
3241                                      UErrorCode& status) const
3242 {
3243     translatePattern(fPattern, result, DateFormatSymbols::getPatternUChars(), fSymbols->fLocalPatternChars, status);
3244     return result;
3245 }
3246
3247 //----------------------------------------------------------------------
3248
3249 void
3250 SimpleDateFormat::applyPattern(const UnicodeString& pattern)
3251 {
3252     fPattern = pattern;
3253 }
3254
3255 //----------------------------------------------------------------------
3256
3257 void
3258 SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
3259                                         UErrorCode &status)
3260 {
3261     translatePattern(pattern, fPattern, fSymbols->fLocalPatternChars, DateFormatSymbols::getPatternUChars(), status);
3262 }
3263
3264 //----------------------------------------------------------------------
3265
3266 const DateFormatSymbols*
3267 SimpleDateFormat::getDateFormatSymbols() const
3268 {
3269     return fSymbols;
3270 }
3271
3272 //----------------------------------------------------------------------
3273
3274 void
3275 SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
3276 {
3277     delete fSymbols;
3278     fSymbols = newFormatSymbols;
3279 }
3280
3281 //----------------------------------------------------------------------
3282 void
3283 SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
3284 {
3285     delete fSymbols;
3286     fSymbols = new DateFormatSymbols(newFormatSymbols);
3287 }
3288
3289
3290 //----------------------------------------------------------------------
3291
3292
3293 void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
3294 {
3295   UErrorCode status = U_ZERO_ERROR;
3296   DateFormat::adoptCalendar(calendarToAdopt);
3297   delete fSymbols;
3298   fSymbols=NULL;
3299   initializeSymbols(fLocale, fCalendar, status);  // we need new symbols
3300   initializeDefaultCentury();  // we need a new century (possibly)
3301 }
3302
3303
3304 //----------------------------------------------------------------------
3305
3306
3307 UBool
3308 SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
3309     return isFieldUnitIgnored(fPattern, field);
3310 }
3311
3312
3313 UBool
3314 SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
3315                                      UCalendarDateFields field) {
3316     int32_t fieldLevel = fgCalendarFieldToLevel[field];
3317     int32_t level;
3318     UChar ch;
3319     UBool inQuote = FALSE;
3320     UChar prevCh = 0;
3321     int32_t count = 0;
3322
3323     for (int32_t i = 0; i < pattern.length(); ++i) {
3324         ch = pattern[i];
3325         if (ch != prevCh && count > 0) {
3326             level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
3327             // the larger the level, the smaller the field unit.
3328             if ( fieldLevel <= level ) {
3329                 return FALSE;
3330             }
3331             count = 0;
3332         }
3333         if (ch == QUOTE) {
3334             if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
3335                 ++i;
3336             } else {
3337                 inQuote = ! inQuote;
3338             }
3339         }
3340         else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
3341                     || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
3342             prevCh = ch;
3343             ++count;
3344         }
3345     }
3346     if ( count > 0 ) {
3347         // last item
3348         level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
3349             if ( fieldLevel <= level ) {
3350                 return FALSE;
3351             }
3352     }
3353     return TRUE;
3354 }
3355
3356 //----------------------------------------------------------------------
3357
3358 const Locale&
3359 SimpleDateFormat::getSmpFmtLocale(void) const {
3360     return fLocale;
3361 }
3362
3363 //----------------------------------------------------------------------
3364
3365 int32_t
3366 SimpleDateFormat::checkIntSuffix(const UnicodeString& text, int32_t start,
3367                                  int32_t patLoc, UBool isNegative) const {
3368     // local variables
3369     UnicodeString suf;
3370     int32_t patternMatch;
3371     int32_t textPreMatch;
3372     int32_t textPostMatch;
3373
3374     // check that we are still in range
3375     if ( (start > text.length()) ||
3376          (start < 0) ||
3377          (patLoc < 0) ||
3378          (patLoc > fPattern.length())) {
3379         // out of range, don't advance location in text
3380         return start;
3381     }
3382
3383     // get the suffix
3384     DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat);
3385     if (decfmt != NULL) {
3386         if (isNegative) {
3387             suf = decfmt->getNegativeSuffix(suf);
3388         }
3389         else {
3390             suf = decfmt->getPositiveSuffix(suf);
3391         }
3392     }
3393
3394     // check for suffix
3395     if (suf.length() <= 0) {
3396         return start;
3397     }
3398
3399     // check suffix will be encountered in the pattern
3400     patternMatch = compareSimpleAffix(suf,fPattern,patLoc);
3401
3402     // check if a suffix will be encountered in the text
3403     textPreMatch = compareSimpleAffix(suf,text,start);
3404
3405     // check if a suffix was encountered in the text
3406     textPostMatch = compareSimpleAffix(suf,text,start-suf.length());
3407
3408     // check for suffix match
3409     if ((textPreMatch >= 0) && (patternMatch >= 0) && (textPreMatch == patternMatch)) {
3410         return start;
3411     }
3412     else if ((textPostMatch >= 0) && (patternMatch >= 0) && (textPostMatch == patternMatch)) {
3413         return  start - suf.length();
3414     }
3415
3416     // should not get here
3417     return start;
3418 }
3419
3420 //----------------------------------------------------------------------
3421
3422 int32_t
3423 SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix,
3424                    const UnicodeString& input,
3425                    int32_t pos) const {
3426     int32_t start = pos;
3427     for (int32_t i=0; i<affix.length(); ) {
3428         UChar32 c = affix.char32At(i);
3429         int32_t len = U16_LENGTH(c);
3430         if (uprv_isRuleWhiteSpace(c)) {
3431             // We may have a pattern like: \u200F \u0020
3432             //        and input text like: \u200F \u0020
3433             // Note that U+200F and U+0020 are RuleWhiteSpace but only
3434             // U+0020 is UWhiteSpace.  So we have to first do a direct
3435             // match of the run of RULE whitespace in the pattern,
3436             // then match any extra characters.
3437             UBool literalMatch = FALSE;
3438             while (pos < input.length() &&
3439                    input.char32At(pos) == c) {
3440                 literalMatch = TRUE;
3441                 i += len;
3442                 pos += len;
3443                 if (i == affix.length()) {
3444                     break;
3445                 }
3446                 c = affix.char32At(i);
3447                 len = U16_LENGTH(c);
3448                 if (!uprv_isRuleWhiteSpace(c)) {
3449                     break;
3450                 }
3451             }
3452
3453             // Advance over run in pattern
3454             i = skipRuleWhiteSpace(affix, i);
3455
3456             // Advance over run in input text
3457             // Must see at least one white space char in input,
3458             // unless we've already matched some characters literally.
3459             int32_t s = pos;
3460             pos = skipUWhiteSpace(input, pos);
3461             if (pos == s && !literalMatch) {
3462                 return -1;
3463             }
3464
3465             // If we skip UWhiteSpace in the input text, we need to skip it in the pattern.
3466             // Otherwise, the previous lines may have skipped over text (such as U+00A0) that
3467             // is also in the affix.
3468             i = skipUWhiteSpace(affix, i);
3469         } else {
3470             if (pos < input.length() &&
3471                 input.char32At(pos) == c) {
3472                 i += len;
3473                 pos += len;
3474             } else {
3475                 return -1;
3476             }
3477         }
3478     }
3479     return pos - start;
3480 }
3481
3482 //----------------------------------------------------------------------
3483
3484 int32_t
3485 SimpleDateFormat::skipRuleWhiteSpace(const UnicodeString& text, int32_t pos) const {
3486     while (pos < text.length()) {
3487         UChar32 c = text.char32At(pos);
3488         if (!uprv_isRuleWhiteSpace(c)) {
3489             break;
3490         }
3491         pos += U16_LENGTH(c);
3492     }
3493     return pos;
3494 }
3495
3496 //----------------------------------------------------------------------
3497
3498 int32_t
3499 SimpleDateFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) const {
3500     while (pos < text.length()) {
3501         UChar32 c = text.char32At(pos);
3502         if (!u_isUWhiteSpace(c)) {
3503             break;
3504         }
3505         pos += U16_LENGTH(c);
3506     }
3507     return pos;
3508 }
3509
3510 U_NAMESPACE_END
3511
3512 #endif /* #if !UCONFIG_NO_FORMATTING */
3513
3514 //eof