icuSources/i18n/smpdtfmt.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 1997-2009, International Business Machines Corporation and    *
   4 * others. All Rights Reserved.                                                *
   5 *******************************************************************************
   6 *
   7 * File SMPDTFMT.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   02/19/97    aliu        Converted from java.
  13 *   03/31/97    aliu        Modified extensively to work with 50 locales.
  14 *   04/01/97    aliu        Added support for centuries.
  15 *   07/09/97    helena      Made ParsePosition into a class.
  16 *   07/21/98    stephen     Added initializeDefaultCentury.
  17 *                             Removed getZoneIndex (added in DateFormatSymbols)
  18 *                             Removed subParseLong
  19 *                             Removed chk
  20 *  02/22/99     stephen     Removed character literals for EBCDIC safety
  21 *   10/14/99    aliu        Updated 2-digit year parsing so that only "00" thru
  22 *                           "99" are recognized. {j28 4182066}
  23 *   11/15/99    weiv        Added support for week of year/day of week format
  24 ********************************************************************************
  25 */
  26
  27 #define ZID_KEY_MAX 128
  28
  29 #include "unicode/utypes.h"
  30
  31 #if !UCONFIG_NO_FORMATTING
  32
  33 #include "unicode/smpdtfmt.h"
  34 #include "unicode/dtfmtsym.h"
  35 #include "unicode/ures.h"
  36 #include "unicode/msgfmt.h"
  37 #include "unicode/calendar.h"
  38 #include "unicode/gregocal.h"
  39 #include "unicode/timezone.h"
  40 #include "unicode/decimfmt.h"
  41 #include "unicode/dcfmtsym.h"
  42 #include "unicode/uchar.h"
  43 #include "unicode/uniset.h"
  44 #include "unicode/ustring.h"
  45 #include "unicode/basictz.h"
  46 #include "unicode/simpletz.h"
  47 #include "unicode/rbtz.h"
  48 #include "unicode/vtzone.h"
  49 #include "olsontz.h"
  50 #include "util.h"
  51 #include "gregoimp.h"
  52 #include "cstring.h"
  53 #include "uassert.h"
  54 #include "zstrfmt.h"
  55 #include "cmemory.h"
  56 #include "umutex.h"
  57 #include "smpdtfst.h"
  58 #include <float.h>
  59
  60 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
  61 #include <stdio.h>
  62 #endif
  63
  64 // *****************************************************************************
  65 // class SimpleDateFormat
  66 // *****************************************************************************
  67
  68 U_NAMESPACE_BEGIN
  69
  70 static const UChar PATTERN_CHAR_BASE = 0x40;
  71
  72 /**
  73  * Last-resort string to use for "GMT" when constructing time zone strings.
  74  */
  75 // For time zones that have no names, use strings GMT+minutes and
  76 // GMT-minutes. For instance, in France the time zone is GMT+60.
  77 // Also accepted are GMT+H:MM or GMT-H:MM.
  78 static const UChar gGmt[]      = {0x0047, 0x004D, 0x0054, 0x0000};         // "GMT"
  79 static const UChar gGmtPlus[]  = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
  80 static const UChar gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
  81 static const UChar gDefGmtPat[]       = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */
  82 static const UChar gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */
  83 static const UChar gDefGmtNegHmPat[]  = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */
  84 static const UChar gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */
  85 static const UChar gDefGmtPosHmPat[]  = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */
  86 typedef enum GmtPatSize {
  87     kGmtLen = 3,
  88     kGmtPatLen = 6,
  89     kNegHmsLen = 9,
  90     kNegHmLen = 6,
  91     kPosHmsLen = 9,
  92     kPosHmLen = 6
  93 } GmtPatSize;
  94
  95 // This is a pattern-of-last-resort used when we can't load a usable pattern out
  96 // of a resource.
  97 static const UChar gDefaultPattern[] =
  98 {
  99     0x79, 0x79, 0x79, 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
 100 };  /* "yyyyMMdd hh:mm a" */
 101
 102 // This prefix is designed to NEVER MATCH real text, in order to
 103 // suppress the parsing of negative numbers.  Adjust as needed (if
 104 // this becomes valid Unicode).
 105 static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
 106
 107 /**
 108  * These are the tags we expect to see in normal resource bundle files associated
 109  * with a locale.
 110  */
 111 static const char gDateTimePatternsTag[]="DateTimePatterns";
 112
 113 static const UChar gEtcUTC[] = {0x45, 0x74, 0x63, 0x2F, 0x55, 0x54, 0x43, 0x00}; // "Etc/UTC"
 114 static const UChar QUOTE = 0x27; // Single quote
 115 enum {
 116     kGMTNegativeHMS = 0,
 117     kGMTNegativeHM,
 118     kGMTPositiveHMS,
 119     kGMTPositiveHM,
 120
 121     kNumGMTFormatters
 122 };
 123
 124 /*
 125  * The field range check bias for each UDateFormatField.
 126  * The bias is added to the minimum and maximum values
 127  * before they are compared to the parsed number.
 128  * For example, the calendar stores zero-based month numbers
 129  * but the parsed month numbers start at 1, so the bias is 1.
 130  *
 131  * A value of -1 means that the value is not checked.
 132  */
 133 static const int32_t gFieldRangeBias[] = {
 134     -1,  // 'G' - UDAT_ERA_FIELD
 135     -1,  // 'y' - UDAT_YEAR_FIELD
 136      1,  // 'M' - UDAT_MONTH_FIELD
 137      0,  // 'd' - UDAT_DATE_FIELD
 138     -1,  // 'k' - UDAT_HOUR_OF_DAY1_FIELD
 139     -1,  // 'H' - UDAT_HOUR_OF_DAY0_FIELD
 140      0,  // 'm' - UDAT_MINUTE_FIELD
 141      0,  // 's' - UDAT_SEOND_FIELD
 142     -1,  // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
 143     -1,  // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
 144     -1,  // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
 145     -1,  // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
 146     -1,  // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
 147     -1,  // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
 148     -1,  // 'a' - UDAT_AM_PM_FIELD
 149     -1,  // 'h' - UDAT_HOUR1_FIELD
 150     -1,  // 'K' - UDAT_HOUR0_FIELD
 151     -1,  // 'z' - UDAT_TIMEZONE_FIELD
 152     -1,  // 'Y' - UDAT_YEAR_WOY_FIELD
 153     -1,  // 'e' - UDAT_DOW_LOCAL_FIELD
 154     -1,  // 'u' - UDAT_EXTENDED_YEAR_FIELD
 155     -1,  // 'g' - UDAT_JULIAN_DAY_FIELD
 156     -1,  // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
 157     -1,  // 'Z' - UDAT_TIMEZONE_RFC_FIELD
 158     -1,  // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
 159      0,  // 'c' - UDAT_STANDALONE_DAY_FIELD
 160      1,  // 'L' - UDAT_STANDALONE_MONTH_FIELD
 161     -1,  // 'Q' - UDAT_QUARTER_FIELD (1-4?)
 162     -1,  // 'q' - UDAT_STANDALONE_QUARTER_FIELD
 163     -1   // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
 164 };
 165
 166 static UMTX LOCK;
 167
 168 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
 169
 170 //----------------------------------------------------------------------
 171
 172 SimpleDateFormat::~SimpleDateFormat()
 173 {
 174     delete fSymbols;
 175     if (fGMTFormatters) {
 176         for (int32_t i = 0; i < kNumGMTFormatters; i++) {
 177             if (fGMTFormatters[i]) {
 178                 delete fGMTFormatters[i];
 179             }
 180         }
 181         uprv_free(fGMTFormatters);
 182     }
 183 }
 184
 185 //----------------------------------------------------------------------
 186
 187 SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
 188   :   fLocale(Locale::getDefault()),
 189       fSymbols(NULL),
 190       fGMTFormatters(NULL)
 191 {
 192     construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
 193     initializeDefaultCentury();
 194 }
 195
 196 //----------------------------------------------------------------------
 197
 198 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 199                                    UErrorCode &status)
 200 :   fPattern(pattern),
 201     fLocale(Locale::getDefault()),
 202     fSymbols(NULL),
 203     fGMTFormatters(NULL)
 204 {
 205     initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
 206     initialize(fLocale, status);
 207     initializeDefaultCentury();
 208 }
 209
 210 //----------------------------------------------------------------------
 211
 212 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 213                                    const Locale& locale,
 214                                    UErrorCode& status)
 215 :   fPattern(pattern),
 216     fLocale(locale),
 217     fGMTFormatters(NULL)
 218 {
 219     initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
 220     initialize(fLocale, status);
 221     initializeDefaultCentury();
 222 }
 223
 224 //----------------------------------------------------------------------
 225
 226 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 227                                    DateFormatSymbols* symbolsToAdopt,
 228                                    UErrorCode& status)
 229 :   fPattern(pattern),
 230     fLocale(Locale::getDefault()),
 231     fSymbols(symbolsToAdopt),
 232     fGMTFormatters(NULL)
 233 {
 234     initializeCalendar(NULL,fLocale,status);
 235     initialize(fLocale, status);
 236     initializeDefaultCentury();
 237 }
 238
 239 //----------------------------------------------------------------------
 240
 241 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 242                                    const DateFormatSymbols& symbols,
 243                                    UErrorCode& status)
 244 :   fPattern(pattern),
 245     fLocale(Locale::getDefault()),
 246     fSymbols(new DateFormatSymbols(symbols)),
 247     fGMTFormatters(NULL)
 248 {
 249     initializeCalendar(NULL, fLocale, status);
 250     initialize(fLocale, status);
 251     initializeDefaultCentury();
 252 }
 253
 254 //----------------------------------------------------------------------
 255
 256 // Not for public consumption; used by DateFormat
 257 SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
 258                                    EStyle dateStyle,
 259                                    const Locale& locale,
 260                                    UErrorCode& status)
 261 :   fLocale(locale),
 262     fSymbols(NULL),
 263     fGMTFormatters(NULL)
 264 {
 265     construct(timeStyle, dateStyle, fLocale, status);
 266     if(U_SUCCESS(status)) {
 267       initializeDefaultCentury();
 268     }
 269 }
 270
 271 //----------------------------------------------------------------------
 272
 273 /**
 274  * Not for public consumption; used by DateFormat.  This constructor
 275  * never fails.  If the resource data is not available, it uses the
 276  * the last resort symbols.
 277  */
 278 SimpleDateFormat::SimpleDateFormat(const Locale& locale,
 279                                    UErrorCode& status)
 280 :   fPattern(gDefaultPattern),
 281     fLocale(locale),
 282     fSymbols(NULL),
 283     fGMTFormatters(NULL)
 284 {
 285     if (U_FAILURE(status)) return;
 286     initializeSymbols(fLocale, initializeCalendar(NULL, fLocale, status),status);
 287     if (U_FAILURE(status))
 288     {
 289         status = U_ZERO_ERROR;
 290         delete fSymbols;
 291         // This constructor doesn't fail; it uses last resort data
 292         fSymbols = new DateFormatSymbols(status);
 293         /* test for NULL */
 294         if (fSymbols == 0) {
 295             status = U_MEMORY_ALLOCATION_ERROR;
 296             return;
 297         }
 298     }
 299
 300     initialize(fLocale, status);
 301     if(U_SUCCESS(status)) {
 302       initializeDefaultCentury();
 303     }
 304 }
 305
 306 //----------------------------------------------------------------------
 307
 308 SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
 309 :   DateFormat(other),
 310     fSymbols(NULL),
 311     fGMTFormatters(NULL)
 312 {
 313     *this = other;
 314 }
 315
 316 //----------------------------------------------------------------------
 317
 318 SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
 319 {
 320     if (this == &other) {
 321         return *this;
 322     }
 323     DateFormat::operator=(other);
 324
 325     delete fSymbols;
 326     fSymbols = NULL;
 327
 328     if (other.fSymbols)
 329         fSymbols = new DateFormatSymbols(*other.fSymbols);
 330
 331     fDefaultCenturyStart         = other.fDefaultCenturyStart;
 332     fDefaultCenturyStartYear     = other.fDefaultCenturyStartYear;
 333     fHaveDefaultCentury          = other.fHaveDefaultCentury;
 334
 335     fPattern = other.fPattern;
 336
 337     return *this;
 338 }
 339
 340 //----------------------------------------------------------------------
 341
 342 Format*
 343 SimpleDateFormat::clone() const
 344 {
 345     return new SimpleDateFormat(*this);
 346 }
 347
 348 //----------------------------------------------------------------------
 349
 350 UBool
 351 SimpleDateFormat::operator==(const Format& other) const
 352 {
 353     if (DateFormat::operator==(other)) {
 354         // DateFormat::operator== guarantees following cast is safe
 355         SimpleDateFormat* that = (SimpleDateFormat*)&other;
 356         return (fPattern             == that->fPattern &&
 357                 fSymbols             != NULL && // Check for pathological object
 358                 that->fSymbols       != NULL && // Check for pathological object
 359                 *fSymbols            == *that->fSymbols &&
 360                 fHaveDefaultCentury  == that->fHaveDefaultCentury &&
 361                 fDefaultCenturyStart == that->fDefaultCenturyStart);
 362     }
 363     return FALSE;
 364 }
 365
 366 //----------------------------------------------------------------------
 367
 368 void SimpleDateFormat::construct(EStyle timeStyle,
 369                                  EStyle dateStyle,
 370                                  const Locale& locale,
 371                                  UErrorCode& status)
 372 {
 373     // called by several constructors to load pattern data from the resources
 374     if (U_FAILURE(status)) return;
 375
 376     // We will need the calendar to know what type of symbols to load.
 377     initializeCalendar(NULL, locale, status);
 378     if (U_FAILURE(status)) return;
 379
 380     CalendarData calData(locale, fCalendar?fCalendar->getType():NULL, status);
 381     UResourceBundle *dateTimePatterns = calData.getByKey(gDateTimePatternsTag, status);
 382     if (U_FAILURE(status)) return;
 383
 384     if (ures_getSize(dateTimePatterns) <= kDateTime)
 385     {
 386         status = U_INVALID_FORMAT_ERROR;
 387         return;
 388     }
 389
 390     setLocaleIDs(ures_getLocaleByType(dateTimePatterns, ULOC_VALID_LOCALE, &status),
 391                  ures_getLocaleByType(dateTimePatterns, ULOC_ACTUAL_LOCALE, &status));
 392
 393     // create a symbols object from the locale
 394     initializeSymbols(locale,fCalendar, status);
 395     if (U_FAILURE(status)) return;
 396     /* test for NULL */
 397     if (fSymbols == 0) {
 398         status = U_MEMORY_ALLOCATION_ERROR;
 399         return;
 400     }
 401
 402     const UChar *resStr;
 403     int32_t resStrLen = 0;
 404
 405     // if the pattern should include both date and time information, use the date/time
 406     // pattern string as a guide to tell use how to glue together the appropriate date
 407     // and time pattern strings.  The actual gluing-together is handled by a convenience
 408     // method on MessageFormat.
 409     if ((timeStyle != kNone) && (dateStyle != kNone))
 410     {
 411         Formattable timeDateArray[2];
 412
 413         // use Formattable::adoptString() so that we can use fastCopyFrom()
 414         // instead of Formattable::setString()'s unaware, safe, deep string clone
 415         // see Jitterbug 2296
 416         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status);
 417         UnicodeString *tempus1 = new UnicodeString(TRUE, resStr, resStrLen);
 418         // NULL pointer check
 419         if (tempus1 == NULL) {
 420             status = U_MEMORY_ALLOCATION_ERROR;
 421             return;
 422         }
 423         timeDateArray[0].adoptString(tempus1);
 424
 425         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status);
 426         UnicodeString *tempus2 = new UnicodeString(TRUE, resStr, resStrLen);
 427         // Null pointer check
 428         if (tempus2 == NULL) {
 429             status = U_MEMORY_ALLOCATION_ERROR;
 430             return;
 431         }
 432         timeDateArray[1].adoptString(tempus2);
 433
 434         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)kDateTime, &resStrLen, &status);
 435         MessageFormat::format(UnicodeString(TRUE, resStr, resStrLen), timeDateArray, 2, fPattern, status);
 436     }
 437     // if the pattern includes just time data or just date date, load the appropriate
 438     // pattern string from the resources
 439     // setTo() - see DateFormatSymbols::assignArray comments
 440     else if (timeStyle != kNone) {
 441         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status);
 442         fPattern.setTo(TRUE, resStr, resStrLen);
 443     }
 444     else if (dateStyle != kNone) {
 445         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status);
 446         fPattern.setTo(TRUE, resStr, resStrLen);
 447     }
 448
 449     // and if it includes _neither_, that's an error
 450     else
 451         status = U_INVALID_FORMAT_ERROR;
 452
 453     // finally, finish initializing by creating a Calendar and a NumberFormat
 454     initialize(locale, status);
 455 }
 456
 457 //----------------------------------------------------------------------
 458
 459 Calendar*
 460 SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
 461 {
 462     if(!U_FAILURE(status)) {
 463         fCalendar = Calendar::createInstance(adoptZone?adoptZone:TimeZone::createDefault(), locale, status);
 464     }
 465     if (U_SUCCESS(status) && fCalendar == NULL) {
 466         status = U_MEMORY_ALLOCATION_ERROR;
 467     }
 468     return fCalendar;
 469 }
 470
 471 void
 472 SimpleDateFormat::initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status)
 473 {
 474   if(U_FAILURE(status)) {
 475     fSymbols = NULL;
 476   } else {
 477     // pass in calendar type - use NULL (default) if no calendar set (or err).
 478     fSymbols = new DateFormatSymbols(locale, calendar?calendar->getType() :NULL , status);
 479     // Null pointer check
 480     if (fSymbols == NULL) {
 481         status = U_MEMORY_ALLOCATION_ERROR;
 482         return;
 483     }
 484   }
 485 }
 486
 487 void
 488 SimpleDateFormat::initialize(const Locale& locale,
 489                              UErrorCode& status)
 490 {
 491     if (U_FAILURE(status)) return;
 492
 493     // We don't need to check that the row count is >= 1, since all 2d arrays have at
 494     // least one row
 495     fNumberFormat = NumberFormat::createInstance(locale, status);
 496     if (fNumberFormat != NULL && U_SUCCESS(status))
 497     {
 498         // no matter what the locale's default number format looked like, we want
 499         // to modify it so that it doesn't use thousands separators, doesn't always
 500         // show the decimal point, and recognizes integers only when parsing
 501
 502         fNumberFormat->setGroupingUsed(FALSE);
 503         if (fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID())
 504             ((DecimalFormat*)fNumberFormat)->setDecimalSeparatorAlwaysShown(FALSE);
 505         fNumberFormat->setParseIntegerOnly(TRUE);
 506         fNumberFormat->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
 507
 508                 // TODO: Really, the default should be lenient...
 509                 fNumberFormat->setParseStrict(FALSE);
 510     }
 511     else if (U_SUCCESS(status))
 512     {
 513         status = U_MISSING_RESOURCE_ERROR;
 514     }
 515 }
 516
 517 /* Initialize the fields we use to disambiguate ambiguous years. Separate
 518  * so we can call it from readObject().
 519  */
 520 void SimpleDateFormat::initializeDefaultCentury()
 521 {
 522   if(fCalendar) {
 523     fHaveDefaultCentury = fCalendar->haveDefaultCentury();
 524     if(fHaveDefaultCentury) {
 525       fDefaultCenturyStart = fCalendar->defaultCenturyStart();
 526       fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
 527     } else {
 528       fDefaultCenturyStart = DBL_MIN;
 529       fDefaultCenturyStartYear = -1;
 530     }
 531   }
 532 }
 533
 534 /* Define one-century window into which to disambiguate dates using
 535  * two-digit years. Make public in JDK 1.2.
 536  */
 537 void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
 538 {
 539     if(U_FAILURE(status)) {
 540         return;
 541     }
 542     if(!fCalendar) {
 543       status = U_ILLEGAL_ARGUMENT_ERROR;
 544       return;
 545     }
 546
 547     fCalendar->setTime(startDate, status);
 548     if(U_SUCCESS(status)) {
 549         fHaveDefaultCentury = TRUE;
 550         fDefaultCenturyStart = startDate;
 551         fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
 552     }
 553 }
 554
 555 //----------------------------------------------------------------------
 556
 557 UnicodeString&
 558 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
 559 {
 560     UErrorCode status = U_ZERO_ERROR;
 561     pos.setBeginIndex(0);
 562     pos.setEndIndex(0);
 563
 564     UBool inQuote = FALSE;
 565     UChar prevCh = 0;
 566     int32_t count = 0;
 567
 568     // loop through the pattern string character by character
 569     for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) {
 570         UChar ch = fPattern[i];
 571
 572         // Use subFormat() to format a repeated pattern character
 573         // when a different pattern or non-pattern character is seen
 574         if (ch != prevCh && count > 0) {
 575             subFormat(appendTo, prevCh, count, pos, cal, status);
 576             count = 0;
 577         }
 578         if (ch == QUOTE) {
 579             // Consecutive single quotes are a single quote literal,
 580             // either outside of quotes or between quotes
 581             if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) {
 582                 appendTo += (UChar)QUOTE;
 583                 ++i;
 584             } else {
 585                 inQuote = ! inQuote;
 586             }
 587         }
 588         else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
 589                     || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
 590             // ch is a date-time pattern character to be interpreted
 591             // by subFormat(); count the number of times it is repeated
 592             prevCh = ch;
 593             ++count;
 594         }
 595         else {
 596             // Append quoted characters and unquoted non-pattern characters
 597             appendTo += ch;
 598         }
 599     }
 600
 601     // Format the last item in the pattern, if any
 602     if (count > 0) {
 603         subFormat(appendTo, prevCh, count, pos, cal, status);
 604     }
 605
 606     // and if something failed (e.g., an invalid format character), reset our FieldPosition
 607     // to (0, 0) to show that
 608     // {sfb} look at this later- are these being set correctly?
 609     if (U_FAILURE(status)) {
 610         pos.setBeginIndex(0);
 611         pos.setEndIndex(0);
 612     }
 613
 614     return appendTo;
 615 }
 616
 617 UnicodeString&
 618 SimpleDateFormat::format(const Formattable& obj,
 619                          UnicodeString& appendTo,
 620                          FieldPosition& pos,
 621                          UErrorCode& status) const
 622 {
 623     // this is just here to get around the hiding problem
 624     // (the previous format() override would hide the version of
 625     // format() on DateFormat that this function correspond to, so we
 626     // have to redefine it here)
 627     return DateFormat::format(obj, appendTo, pos, status);
 628 }
 629
 630 //----------------------------------------------------------------------
 631
 632 /* Map calendar field into calendar field level.
 633  * the larger the level, the smaller the field unit.
 634  * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
 635  * UCAL_MONTH level is 20.
 636  * NOTE: if new fields adds in, the table needs to update.
 637  */
 638 const int32_t
 639 SimpleDateFormat::fgCalendarFieldToLevel[] =
 640 {
 641     /*GyM*/ 0, 10, 20,
 642     /*wW*/ 20, 30,
 643     /*dDEF*/ 30, 20, 30, 30,
 644     /*ahHm*/ 40, 50, 50, 60,
 645     /*sS..*/ 70, 80,
 646     /*z?Y*/ 0, 0, 10,
 647     /*eug*/ 30, 10, 0,
 648     /*A*/ 40
 649 };
 650
 651
 652 /* Map calendar field LETTER into calendar field level.
 653  * the larger the level, the smaller the field unit.
 654  * NOTE: if new fields adds in, the table needs to update.
 655  */
 656 const int32_t
 657 SimpleDateFormat::fgPatternCharToLevel[] = {
 658     //       A   B   C   D   E   F   G   H   I   J   K   L   M   N   O
 659         -1, 40, -1, -1, 20, 30, 30,  0, 50, -1, -1, 50, 20, 20, -1, -1,
 660     //   P   Q   R   S   T   U   V   W   X   Y   Z
 661         -1, 20, -1, 80, -1, -1,  0, 30, -1, 10,  0, -1, -1, -1, -1, -1,
 662     //       a   b   c   d   e   f   g   h   i   j   k   l   m   n   o
 663         -1, 40, -1, 30, 30, 30, -1,  0, 50, -1, -1, 50, -1, 60, -1, -1,
 664     //   p   q   r   s   t   u   v   w   x   y   z
 665         -1, 20, -1, 70, -1, 10,  0, 20, -1, 10,  0, -1, -1, -1, -1, -1
 666 };
 667
 668
 669 // Map index into pattern character string to Calendar field number.
 670 const UCalendarDateFields
 671 SimpleDateFormat::fgPatternIndexToCalendarField[] =
 672 {
 673     /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
 674     /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
 675     /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
 676     /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
 677     /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
 678     /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
 679     /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
 680     /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET,
 681     /*v*/   UCAL_ZONE_OFFSET,
 682     /*c*/   UCAL_DOW_LOCAL,
 683     /*L*/   UCAL_MONTH,
 684     /*Q*/   UCAL_MONTH,
 685     /*q*/   UCAL_MONTH,
 686     /*V*/   UCAL_ZONE_OFFSET,
 687 };
 688
 689 // Map index into pattern character string to DateFormat field number
 690 const UDateFormatField
 691 SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
 692     /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
 693     /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
 694     /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
 695     /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
 696     /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
 697     /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
 698     /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
 699     /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD,
 700     /*v*/   UDAT_TIMEZONE_GENERIC_FIELD,
 701     /*c*/   UDAT_STANDALONE_DAY_FIELD,
 702     /*L*/   UDAT_STANDALONE_MONTH_FIELD,
 703     /*Q*/   UDAT_QUARTER_FIELD,
 704     /*q*/   UDAT_STANDALONE_QUARTER_FIELD,
 705     /*V*/   UDAT_TIMEZONE_SPECIAL_FIELD,
 706 };
 707
 708 //----------------------------------------------------------------------
 709
 710 /**
 711  * Append symbols[value] to dst.  Make sure the array index is not out
 712  * of bounds.
 713  */
 714 static inline void
 715 _appendSymbol(UnicodeString& dst,
 716               int32_t value,
 717               const UnicodeString* symbols,
 718               int32_t symbolsCount) {
 719     U_ASSERT(0 <= value && value < symbolsCount);
 720     if (0 <= value && value < symbolsCount) {
 721         dst += symbols[value];
 722     }
 723 }
 724
 725 //---------------------------------------------------------------------
 726 void
 727 SimpleDateFormat::appendGMT(UnicodeString &appendTo, Calendar& cal, UErrorCode& status) const{
 728     int32_t offset = cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status);
 729     if (U_FAILURE(status)) {
 730         return;
 731     }
 732     if (isDefaultGMTFormat()) {
 733         formatGMTDefault(appendTo, offset);
 734     } else {
 735         ((SimpleDateFormat*)this)->initGMTFormatters(status);
 736         if (U_SUCCESS(status)) {
 737             int32_t type;
 738             if (offset < 0) {
 739                 offset = -offset;
 740                 type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTNegativeHM : kGMTNegativeHMS;
 741             } else {
 742                 type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTPositiveHM : kGMTPositiveHMS;
 743             }
 744             Formattable param(offset, Formattable::kIsDate);
 745             FieldPosition fpos(0);
 746             fGMTFormatters[type]->format(&param, 1, appendTo, fpos, status);
 747         }
 748     }
 749 }
 750
 751 int32_t
 752 SimpleDateFormat::parseGMT(const UnicodeString &text, ParsePosition &pos) const {
 753     if (!isDefaultGMTFormat()) {
 754         int32_t start = pos.getIndex();
 755
 756         // Quick check
 757         UBool prefixMatch = FALSE;
 758         int32_t prefixLen = fSymbols->fGmtFormat.indexOf((UChar)0x007B /* '{' */);
 759         if (prefixLen > 0 && text.compare(start, prefixLen, fSymbols->fGmtFormat, 0, prefixLen) == 0) {
 760             prefixMatch = TRUE;
 761         }
 762         if (prefixMatch) {
 763             // Prefix matched
 764             UErrorCode status = U_ZERO_ERROR;
 765             ((SimpleDateFormat*)this)->initGMTFormatters(status);
 766             if (U_SUCCESS(status)) {
 767                 Formattable parsed;
 768                 int32_t parsedCount;
 769
 770                 // Try negative Hms
 771                 fGMTFormatters[kGMTNegativeHMS]->parseObject(text, parsed, pos);
 772                 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
 773                     parsed.getArray(parsedCount);
 774                     if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
 775                         return (int32_t)(-1 * (int64_t)parsed[0].getDate());
 776                     }
 777                 }
 778
 779                 // Reset ParsePosition
 780                 pos.setIndex(start);
 781                 pos.setErrorIndex(-1);
 782
 783                 // Try positive Hms
 784                 fGMTFormatters[kGMTPositiveHMS]->parseObject(text, parsed, pos);
 785                 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
 786                     parsed.getArray(parsedCount);
 787                     if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
 788                         return (int32_t)((int64_t)parsed[0].getDate());
 789                     }
 790                 }
 791
 792                 // Reset ParsePosition
 793                 pos.setIndex(start);
 794                 pos.setErrorIndex(-1);
 795
 796                 // Try negative Hm
 797                 fGMTFormatters[kGMTNegativeHM]->parseObject(text, parsed, pos);
 798                 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
 799                     parsed.getArray(parsedCount);
 800                     if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
 801                         return (int32_t)(-1 * (int64_t)parsed[0].getDate());
 802                     }
 803                 }
 804
 805                 // Reset ParsePosition
 806                 pos.setIndex(start);
 807                 pos.setErrorIndex(-1);
 808
 809                 // Try positive Hm
 810                 fGMTFormatters[kGMTPositiveHM]->parseObject(text, parsed, pos);
 811                 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
 812                     parsed.getArray(parsedCount);
 813                     if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
 814                         return (int32_t)((int64_t)parsed[0].getDate());
 815                     }
 816                 }
 817
 818                 // Reset ParsePosition
 819                 pos.setIndex(start);
 820                 pos.setErrorIndex(-1);
 821             }
 822             // fall through to the default GMT parsing method
 823         }
 824     }
 825     return parseGMTDefault(text, pos);
 826 }
 827
 828 void
 829 SimpleDateFormat::formatGMTDefault(UnicodeString &appendTo, int32_t offset) const {
 830     if (offset < 0) {
 831         appendTo += gGmtMinus;
 832         offset = -offset; // suppress the '-' sign for text display.
 833     }else{
 834         appendTo += gGmtPlus;
 835     }
 836
 837     offset /= U_MILLIS_PER_SECOND; // now in seconds
 838     int32_t sec = offset % 60;
 839     offset /= 60;
 840     int32_t min = offset % 60;
 841     int32_t hour = offset / 60;
 842
 843
 844     zeroPaddingNumber(appendTo, hour, 2, 2);
 845     appendTo += (UChar)0x003A /*':'*/;
 846     zeroPaddingNumber(appendTo, min, 2, 2);
 847     if (sec != 0) {
 848         appendTo += (UChar)0x003A /*':'*/;
 849         zeroPaddingNumber(appendTo, sec, 2, 2);
 850     }
 851 }
 852
 853 int32_t
 854 SimpleDateFormat::parseGMTDefault(const UnicodeString &text, ParsePosition &pos) const {
 855     int32_t start = pos.getIndex();
 856
 857     if (start + kGmtLen + 1 >= text.length()) {
 858         pos.setErrorIndex(start);
 859         return 0;
 860     }
 861
 862     int32_t cur = start;
 863     // "GMT"
 864     if (text.compare(start, kGmtLen, gGmt) != 0) {
 865         pos.setErrorIndex(start);
 866         return 0;
 867     }
 868     cur += kGmtLen;
 869     // Sign
 870     UBool negative = FALSE;
 871     if (text.charAt(cur) == (UChar)0x002D /* minus */) {
 872         negative = TRUE;
 873     } else if (text.charAt(cur) != (UChar)0x002B /* plus */) {
 874         pos.setErrorIndex(cur);
 875         return 0;
 876     }
 877     cur++;
 878
 879     // Numbers
 880     int32_t numLen;
 881     pos.setIndex(cur);
 882
 883     Formattable number;
 884     parseInt(text, number, 6, pos, FALSE);
 885     numLen = pos.getIndex() - cur;
 886
 887     if (numLen <= 0) {
 888         pos.setIndex(start);
 889         pos.setErrorIndex(cur);
 890         return 0;
 891     }
 892
 893     int32_t numVal = number.getLong();
 894
 895     int32_t hour = 0;
 896     int32_t min = 0;
 897     int32_t sec = 0;
 898
 899     if (numLen <= 2) {
 900         // H[H][:mm[:ss]]
 901         hour = numVal;
 902         cur += numLen;
 903         if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) {
 904             cur++;
 905             pos.setIndex(cur);
 906             parseInt(text, number, 2, pos, FALSE);
 907             numLen = pos.getIndex() - cur;
 908             if (numLen == 2) {
 909                 // got minute field
 910                 min = number.getLong();
 911                 cur += numLen;
 912                 if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) {
 913                     cur++;
 914                     pos.setIndex(cur);
 915                     parseInt(text, number, 2, pos, FALSE);
 916                     numLen = pos.getIndex() - cur;
 917                     if (numLen == 2) {
 918                         // got second field
 919                         sec = number.getLong();
 920                     } else {
 921                         // reset position
 922                         pos.setIndex(cur - 1);
 923                         pos.setErrorIndex(-1);
 924                     }
 925                 }
 926             } else {
 927                 // reset postion
 928                 pos.setIndex(cur - 1);
 929                 pos.setErrorIndex(-1);
 930             }
 931         }
 932     } else if (numLen == 3 || numLen == 4) {
 933         // Hmm or HHmm
 934         hour = numVal / 100;
 935         min = numVal % 100;
 936     } else if (numLen == 5 || numLen == 6) {
 937         // Hmmss or HHmmss
 938         hour = numVal / 10000;
 939         min = (numVal % 10000) / 100;
 940         sec = numVal % 100;
 941     } else {
 942         // HHmmss followed by bogus numbers
 943         pos.setIndex(cur + 6);
 944
 945         int32_t shift = numLen - 6;
 946         while (shift > 0) {
 947             numVal /= 10;
 948             shift--;
 949         }
 950         hour = numVal / 10000;
 951         min = (numVal % 10000) / 100;
 952         sec = numVal % 100;
 953     }
 954
 955     int32_t offset = ((hour*60 + min)*60 + sec)*1000;
 956     if (negative) {
 957         offset = -offset;
 958     }
 959     return offset;
 960 }
 961
 962 UBool
 963 SimpleDateFormat::isDefaultGMTFormat() const {
 964     // GMT pattern
 965     if (fSymbols->fGmtFormat.length() == 0) {
 966         // No GMT pattern is set
 967         return TRUE;
 968     } else if (fSymbols->fGmtFormat.compare(gDefGmtPat, kGmtPatLen) != 0) {
 969         return FALSE;
 970     }
 971     // Hour patterns
 972     if (fSymbols->fGmtHourFormats == NULL || fSymbols->fGmtHourFormatsCount != DateFormatSymbols::GMT_HOUR_COUNT) {
 973         // No Hour pattern is set
 974         return TRUE;
 975     } else if ((fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS].compare(gDefGmtNegHmsPat, kNegHmsLen) != 0)
 976         || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM].compare(gDefGmtNegHmPat, kNegHmLen) != 0)
 977         || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS].compare(gDefGmtPosHmsPat, kPosHmsLen) != 0)
 978         || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM].compare(gDefGmtPosHmPat, kPosHmLen) != 0)) {
 979         return FALSE;
 980     }
 981     return TRUE;
 982 }
 983
 984 void
 985 SimpleDateFormat::formatRFC822TZ(UnicodeString &appendTo, int32_t offset) const {
 986     UChar sign = 0x002B /* '+' */;
 987     if (offset < 0) {
 988         offset = -offset;
 989         sign = 0x002D /* '-' */;
 990     }
 991     appendTo.append(sign);
 992
 993     int32_t offsetH = offset / U_MILLIS_PER_HOUR;
 994     offset = offset % U_MILLIS_PER_HOUR;
 995     int32_t offsetM = offset / U_MILLIS_PER_MINUTE;
 996     offset = offset % U_MILLIS_PER_MINUTE;
 997     int32_t offsetS = offset / U_MILLIS_PER_SECOND;
 998
 999     int32_t num = 0, denom = 0;
1000     if (offsetS == 0) {
1001         offset = offsetH*100 + offsetM; // HHmm
1002         num = offset % 10000;
1003         denom = 1000;
1004     } else {
1005         offset = offsetH*10000 + offsetM*100 + offsetS; // HHmmss
1006         num = offset % 1000000;
1007         denom = 100000;
1008     }
1009     while (denom >= 1) {
1010         UChar digit = (UChar)0x0030 + (num / denom);
1011         appendTo.append(digit);
1012         num = num % denom;
1013         denom /= 10;
1014     }
1015 }
1016
1017 void
1018 SimpleDateFormat::initGMTFormatters(UErrorCode &status) {
1019     if (U_FAILURE(status)) {
1020         return;
1021     }
1022     umtx_lock(&LOCK);
1023     if (fGMTFormatters == NULL) {
1024         fGMTFormatters = (MessageFormat**)uprv_malloc(kNumGMTFormatters * sizeof(MessageFormat*));
1025         if (fGMTFormatters) {
1026             for (int32_t i = 0; i < kNumGMTFormatters; i++) {
1027                 const UnicodeString *hourPattern = NULL; //initialized it to avoid warning
1028                 switch (i) {
1029                     case kGMTNegativeHMS:
1030                         hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS]);
1031                         break;
1032                     case kGMTNegativeHM:
1033                         hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM]);
1034                         break;
1035                     case kGMTPositiveHMS:
1036                         hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS]);
1037                         break;
1038                     case kGMTPositiveHM:
1039                         hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM]);
1040                         break;
1041                 }
1042                 fGMTFormatters[i] = new MessageFormat(fSymbols->fGmtFormat, status);
1043                 if (U_FAILURE(status)) {
1044                     break;
1045                 }
1046                 SimpleDateFormat *sdf = (SimpleDateFormat*)this->clone();
1047                 sdf->adoptTimeZone(TimeZone::createTimeZone(UnicodeString(gEtcUTC)));
1048                 sdf->applyPattern(*hourPattern);
1049                 fGMTFormatters[i]->adoptFormat(0, sdf);
1050             }
1051         } else {
1052             status = U_MEMORY_ALLOCATION_ERROR;
1053         }
1054     }
1055     umtx_unlock(&LOCK);
1056 }
1057
1058 //---------------------------------------------------------------------
1059 void
1060 SimpleDateFormat::subFormat(UnicodeString &appendTo,
1061                             UChar ch,
1062                             int32_t count,
1063                             FieldPosition& pos,
1064                             Calendar& cal,
1065                             UErrorCode& status) const
1066 {
1067     if (U_FAILURE(status)) {
1068         return;
1069     }
1070
1071     // this function gets called by format() to produce the appropriate substitution
1072     // text for an individual pattern symbol (e.g., "HH" or "yyyy")
1073
1074     UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
1075     UDateFormatField patternCharIndex;
1076     const int32_t maxIntCount = 10;
1077     int32_t beginOffset = appendTo.length();
1078
1079     // if the pattern character is unrecognized, signal an error and dump out
1080     if (patternCharPtr == NULL)
1081     {
1082         status = U_INVALID_FORMAT_ERROR;
1083         return;
1084     }
1085
1086     patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
1087     UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1088     int32_t value = cal.get(field, status);
1089     if (U_FAILURE(status)) {
1090         return;
1091     }
1092
1093     switch (patternCharIndex) {
1094
1095     // for any "G" symbol, write out the appropriate era string
1096     // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name
1097     case UDAT_ERA_FIELD:
1098         if (count == 5)
1099            _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount);
1100         else if (count == 4)
1101            _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount);
1102         else
1103            _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
1104         break;
1105
1106     // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
1107     // NEW: UTS#35:
1108 //Year         y     yy     yyy     yyyy     yyyyy
1109 //AD 1         1     01     001     0001     00001
1110 //AD 12       12     12     012     0012     00012
1111 //AD 123     123     23     123     0123     00123
1112 //AD 1234   1234     34    1234     1234     01234
1113 //AD 12345 12345     45   12345    12345     12345
1114         case UDAT_YEAR_FIELD:
1115     case UDAT_YEAR_WOY_FIELD:
1116         if(count == 2)
1117             zeroPaddingNumber(appendTo, value, 2, 2);
1118         else
1119             zeroPaddingNumber(appendTo, value, count, maxIntCount);
1120         break;
1121
1122     // for "MMMM", write out the whole month name, for "MMM", write out the month
1123     // abbreviation, for "M" or "MM", write out the month as a number with the
1124     // appropriate number of digits
1125     // for "MMMMM", use the narrow form
1126     case UDAT_MONTH_FIELD:
1127         if (count == 5)
1128             _appendSymbol(appendTo, value, fSymbols->fNarrowMonths,
1129                           fSymbols->fNarrowMonthsCount);
1130         else if (count == 4)
1131             _appendSymbol(appendTo, value, fSymbols->fMonths,
1132                           fSymbols->fMonthsCount);
1133         else if (count == 3)
1134             _appendSymbol(appendTo, value, fSymbols->fShortMonths,
1135                           fSymbols->fShortMonthsCount);
1136         else
1137             zeroPaddingNumber(appendTo, value + 1, count, maxIntCount);
1138         break;
1139
1140     // for "LLLL", write out the whole month name, for "LLL", write out the month
1141     // abbreviation, for "L" or "LL", write out the month as a number with the
1142     // appropriate number of digits
1143     // for "LLLLL", use the narrow form
1144     case UDAT_STANDALONE_MONTH_FIELD:
1145         if (count == 5)
1146             _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowMonths,
1147                           fSymbols->fStandaloneNarrowMonthsCount);
1148         else if (count == 4)
1149             _appendSymbol(appendTo, value, fSymbols->fStandaloneMonths,
1150                           fSymbols->fStandaloneMonthsCount);
1151         else if (count == 3)
1152             _appendSymbol(appendTo, value, fSymbols->fStandaloneShortMonths,
1153                           fSymbols->fStandaloneShortMonthsCount);
1154         else
1155             zeroPaddingNumber(appendTo, value + 1, count, maxIntCount);
1156         break;
1157
1158     // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
1159     case UDAT_HOUR_OF_DAY1_FIELD:
1160         if (value == 0)
1161             zeroPaddingNumber(appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
1162         else
1163             zeroPaddingNumber(appendTo, value, count, maxIntCount);
1164         break;
1165
1166     case UDAT_FRACTIONAL_SECOND_FIELD:
1167         // Fractional seconds left-justify
1168         {
1169             fNumberFormat->setMinimumIntegerDigits((count > 3) ? 3 : count);
1170             fNumberFormat->setMaximumIntegerDigits(maxIntCount);
1171             if (count == 1) {
1172                 value = (value + 50) / 100;
1173             } else if (count == 2) {
1174                 value = (value + 5) / 10;
1175             }
1176             FieldPosition p(0);
1177             fNumberFormat->format(value, appendTo, p);
1178             if (count > 3) {
1179                 fNumberFormat->setMinimumIntegerDigits(count - 3);
1180                 fNumberFormat->format((int32_t)0, appendTo, p);
1181             }
1182         }
1183         break;
1184
1185     // for "ee" or "e", use local numeric day-of-the-week
1186     // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name
1187     // for "EEEE" or "eeee", write out the wide day-of-the-week name
1188     // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name
1189     case UDAT_DOW_LOCAL_FIELD:
1190         if ( count < 3 ) {
1191             zeroPaddingNumber(appendTo, value, count, maxIntCount);
1192             break;
1193         }
1194         // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week,
1195         // we want standard day-of-week, so first fix value to work for EEEEE-EEE.
1196         value = cal.get(UCAL_DAY_OF_WEEK, status);
1197         if (U_FAILURE(status)) {
1198             return;
1199         }
1200         // fall through, do not break here
1201     case UDAT_DAY_OF_WEEK_FIELD:
1202         if (count == 5)
1203             _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays,
1204                           fSymbols->fNarrowWeekdaysCount);
1205         else if (count == 4)
1206             _appendSymbol(appendTo, value, fSymbols->fWeekdays,
1207                           fSymbols->fWeekdaysCount);
1208         else
1209             _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
1210                           fSymbols->fShortWeekdaysCount);
1211         break;
1212
1213     // for "ccc", write out the abbreviated day-of-the-week name
1214     // for "cccc", write out the wide day-of-the-week name
1215     // for "ccccc", use the narrow day-of-the-week name
1216     case UDAT_STANDALONE_DAY_FIELD:
1217         if ( count < 3 ) {
1218             zeroPaddingNumber(appendTo, value, 1, maxIntCount);
1219             break;
1220         }
1221         // fall through to alpha DOW handling, but for that we don't want local day-of-week,
1222         // we want standard day-of-week, so first fix value.
1223         value = cal.get(UCAL_DAY_OF_WEEK, status);
1224         if (U_FAILURE(status)) {
1225             return;
1226         }
1227         if (count == 5)
1228             _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays,
1229                           fSymbols->fStandaloneNarrowWeekdaysCount);
1230         else if (count == 4)
1231             _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays,
1232                           fSymbols->fStandaloneWeekdaysCount);
1233         else // count == 3
1234             _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays,
1235                           fSymbols->fStandaloneShortWeekdaysCount);
1236         break;
1237
1238     // for and "a" symbol, write out the whole AM/PM string
1239     case UDAT_AM_PM_FIELD:
1240         _appendSymbol(appendTo, value, fSymbols->fAmPms,
1241                       fSymbols->fAmPmsCount);
1242         break;
1243
1244     // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
1245     // as "12"
1246     case UDAT_HOUR1_FIELD:
1247         if (value == 0)
1248             zeroPaddingNumber(appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
1249         else
1250             zeroPaddingNumber(appendTo, value, count, maxIntCount);
1251         break;
1252
1253     // for the "z" symbols, we have to check our time zone data first.  If we have a
1254     // localized name for the time zone, then "zzzz" / "zzz" indicate whether
1255     // daylight time is in effect (long/short) and "zz" / "z" do not (long/short).
1256     // If we don't have a localized time zone name,
1257     // then the time zone shows up as "GMT+hh:mm" or "GMT-hh:mm" (where "hh:mm" is the
1258     // offset from GMT) regardless of how many z's were in the pattern symbol
1259     case UDAT_TIMEZONE_FIELD:
1260     case UDAT_TIMEZONE_GENERIC_FIELD:
1261     case UDAT_TIMEZONE_SPECIAL_FIELD:
1262         {
1263             UnicodeString zoneString;
1264             const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat();
1265             if (zsf) {
1266                 if (patternCharIndex == UDAT_TIMEZONE_FIELD) {
1267                     if (count < 4) {
1268                         // "z", "zz", "zzz"
1269                         zsf->getSpecificShortString(cal, TRUE /*commonly used only*/,
1270                             zoneString, status);
1271                     } else {
1272                         // "zzzz"
1273                         zsf->getSpecificLongString(cal, zoneString, status);
1274                     }
1275                 } else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) {
1276                     if (count == 1) {
1277                         // "v"
1278                         zsf->getGenericShortString(cal, TRUE /*commonly used only*/,
1279                             zoneString, status);
1280                     } else if (count == 4) {
1281                         // "vvvv"
1282                         zsf->getGenericLongString(cal, zoneString, status);
1283                     }
1284                 } else { // patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD
1285                     if (count == 1) {
1286                         // "V"
1287                         zsf->getSpecificShortString(cal, FALSE /*ignore commonly used*/,
1288                             zoneString, status);
1289                     } else if (count == 4) {
1290                         // "VVVV"
1291                         zsf->getGenericLocationString(cal, zoneString, status);
1292                     }
1293                 }
1294             }
1295             if (zoneString.isEmpty()) {
1296                 appendGMT(appendTo, cal, status);
1297             } else {
1298                 appendTo += zoneString;
1299             }
1300         }
1301         break;
1302
1303     case UDAT_TIMEZONE_RFC_FIELD: // 'Z' - TIMEZONE_RFC
1304         if (count < 4) {
1305             // RFC822 format, must use ASCII digits
1306             value = (cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status));
1307             formatRFC822TZ(appendTo, value);
1308         } else {
1309             // long form, localized GMT pattern
1310             appendGMT(appendTo, cal, status);
1311         }
1312         break;
1313
1314     case UDAT_QUARTER_FIELD:
1315         if (count >= 4)
1316             _appendSymbol(appendTo, value/3, fSymbols->fQuarters,
1317                           fSymbols->fQuartersCount);
1318         else if (count == 3)
1319             _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters,
1320                           fSymbols->fShortQuartersCount);
1321         else
1322             zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount);
1323         break;
1324
1325     case UDAT_STANDALONE_QUARTER_FIELD:
1326         if (count >= 4)
1327             _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters,
1328                           fSymbols->fStandaloneQuartersCount);
1329         else if (count == 3)
1330             _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters,
1331                           fSymbols->fStandaloneShortQuartersCount);
1332         else
1333             zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount);
1334         break;
1335
1336
1337     // all of the other pattern symbols can be formatted as simple numbers with
1338     // appropriate zero padding
1339     default:
1340         zeroPaddingNumber(appendTo, value, count, maxIntCount);
1341         break;
1342     }
1343
1344     // if the field we're formatting is the one the FieldPosition says it's interested
1345     // in, fill in the FieldPosition with this field's positions
1346     if (pos.getBeginIndex() == pos.getEndIndex() &&
1347         pos.getField() == fgPatternIndexToDateFormatField[patternCharIndex]) {
1348         pos.setBeginIndex(beginOffset);
1349         pos.setEndIndex(appendTo.length());
1350     }
1351 }
1352
1353 //----------------------------------------------------------------------
1354 void
1355 SimpleDateFormat::zeroPaddingNumber(UnicodeString &appendTo, int32_t value, int32_t minDigits, int32_t maxDigits) const
1356 {
1357     if (fNumberFormat!=NULL) {
1358         FieldPosition pos(0);
1359
1360         fNumberFormat->setMinimumIntegerDigits(minDigits);
1361         fNumberFormat->setMaximumIntegerDigits(maxDigits);
1362         fNumberFormat->format(value, appendTo, pos);  // 3rd arg is there to speed up processing
1363     }
1364 }
1365
1366 //----------------------------------------------------------------------
1367
1368 /**
1369  * Format characters that indicate numeric fields.  The character
1370  * at index 0 is treated specially.
1371  */
1372 static const UChar NUMERIC_FORMAT_CHARS[] = {0x4D, 0x59, 0x79, 0x75, 0x64, 0x65, 0x68, 0x48, 0x6D, 0x73, 0x53, 0x44, 0x46, 0x77, 0x57, 0x6B, 0x4B, 0x00}; /* "MYyudehHmsSDFwWkK" */
1373
1374 /**
1375  * Return true if the given format character, occuring count
1376  * times, represents a numeric field.
1377  */
1378 UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) {
1379     UnicodeString s(NUMERIC_FORMAT_CHARS);
1380     int32_t i = s.indexOf(formatChar);
1381     return (i > 0 || (i == 0 && count < 3));
1382 }
1383
1384 void
1385 SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
1386 {
1387     UErrorCode status = U_ZERO_ERROR;
1388     int32_t pos = parsePos.getIndex();
1389     int32_t start = pos;
1390     UBool ambiguousYear[] = { FALSE };
1391     int32_t count = 0;
1392
1393     UBool lenient = isLenient();
1394
1395     // hack, reset tztype, cast away const
1396     ((SimpleDateFormat*)this)->tztype = TZTYPE_UNK;
1397
1398     // For parsing abutting numeric fields. 'abutPat' is the
1399     // offset into 'pattern' of the first of 2 or more abutting
1400     // numeric fields.  'abutStart' is the offset into 'text'
1401     // where parsing the fields begins. 'abutPass' starts off as 0
1402     // and increments each time we try to parse the fields.
1403     int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
1404     int32_t abutStart = 0;
1405     int32_t abutPass = 0;
1406     UBool inQuote = FALSE;
1407
1408     const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS);
1409
1410     for (int32_t i=0; i<fPattern.length(); ++i) {
1411         UChar ch = fPattern.charAt(i);
1412
1413         // Handle alphabetic field characters.
1414         if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // [A-Za-z]
1415             int32_t fieldPat = i;
1416
1417             // Count the length of this field specifier
1418             count = 1;
1419             while ((i+1)<fPattern.length() &&
1420                    fPattern.charAt(i+1) == ch) {
1421                 ++count;
1422                 ++i;
1423             }
1424
1425             if (isNumeric(ch, count)) {
1426                 if (abutPat < 0) {
1427                     // Determine if there is an abutting numeric field.  For
1428                     // most fields we can just look at the next characters,
1429                     // but the 'm' field is either numeric or text,
1430                     // depending on the count, so we have to look ahead for
1431                     // that field.
1432                     if ((i+1)<fPattern.length()) {
1433                         UBool abutting;
1434                         UChar nextCh = fPattern.charAt(i+1);
1435                         int32_t k = numericFormatChars.indexOf(nextCh);
1436                         if (k == 0) {
1437                             int32_t j = i+2;
1438                             while (j<fPattern.length() &&
1439                                    fPattern.charAt(j) == nextCh) {
1440                                 ++j;
1441                             }
1442                             abutting = (j-i) < 4; // nextCount < 3
1443                         } else {
1444                             abutting = k > 0;
1445                         }
1446
1447                         // Record the start of a set of abutting numeric
1448                         // fields.
1449                         if (abutting) {
1450                             abutPat = fieldPat;
1451                             abutStart = pos;
1452                             abutPass = 0;
1453                         }
1454                     }
1455                 }
1456             } else {
1457                 abutPat = -1; // End of any abutting fields
1458             }
1459
1460             // Handle fields within a run of abutting numeric fields.  Take
1461             // the pattern "HHmmss" as an example. We will try to parse
1462             // 2/2/2 characters of the input text, then if that fails,
1463             // 1/2/2.  We only adjust the width of the leftmost field; the
1464             // others remain fixed.  This allows "123456" => 12:34:56, but
1465             // "12345" => 1:23:45.  Likewise, for the pattern "yyyyMMdd" we
1466             // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
1467             if (abutPat >= 0) {
1468                 // If we are at the start of a run of abutting fields, then
1469                 // shorten this field in each pass.  If we can't shorten
1470                 // this field any more, then the parse of this set of
1471                 // abutting numeric fields has failed.
1472                 if (fieldPat == abutPat) {
1473                     count -= abutPass++;
1474                     if (count == 0) {
1475                         parsePos.setIndex(start);
1476                         parsePos.setErrorIndex(pos);
1477                         return;
1478                     }
1479                 }
1480
1481                 pos = subParse(text, pos, ch, count,
1482                                TRUE, FALSE, ambiguousYear, cal);
1483
1484                 // If the parse fails anywhere in the run, back up to the
1485                 // start of the run and retry.
1486                 if (pos < 0) {
1487                     i = abutPat - 1;
1488                     pos = abutStart;
1489                     continue;
1490                 }
1491             }
1492
1493             // Handle non-numeric fields and non-abutting numeric
1494             // fields.
1495             else {
1496                 int32_t s = subParse(text, pos, ch, count,
1497                                      FALSE, TRUE, ambiguousYear, cal);
1498
1499                 if (s <= 0) {
1500                     status = U_PARSE_ERROR;
1501                     goto ExitParse;
1502                 }
1503                 pos = s;
1504             }
1505         }
1506
1507         // Handle literal pattern characters.  These are any
1508         // quoted characters and non-alphabetic unquoted
1509         // characters.
1510         else {
1511
1512             abutPat = -1; // End of any abutting fields
1513
1514             if (! matchLiterals(fPattern, i, text, pos, lenient)) {
1515                 status = U_PARSE_ERROR;
1516                 goto ExitParse;
1517             }
1518         }
1519     }
1520
1521     // At this point the fields of Calendar have been set.  Calendar
1522     // will fill in default values for missing fields when the time
1523     // is computed.
1524
1525     parsePos.setIndex(pos);
1526
1527     // This part is a problem:  When we call parsedDate.after, we compute the time.
1528     // Take the date April 3 2004 at 2:30 am.  When this is first set up, the year
1529     // will be wrong if we're parsing a 2-digit year pattern.  It will be 1904.
1530     // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day.  2:30 am
1531     // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
1532     // on that day.  It is therefore parsed out to fields as 3:30 am.  Then we
1533     // add 100 years, and get April 3 2004 at 3:30 am.  Note that April 3 2004 is
1534     // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
1535     /*
1536         UDate parsedDate = calendar.getTime();
1537         if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
1538             calendar.add(Calendar.YEAR, 100);
1539             parsedDate = calendar.getTime();
1540         }
1541     */
1542     // Because of the above condition, save off the fields in case we need to readjust.
1543     // The procedure we use here is not particularly efficient, but there is no other
1544     // way to do this given the API restrictions present in Calendar.  We minimize
1545     // inefficiency by only performing this computation when it might apply, that is,
1546     // when the two-digit year is equal to the start year, and thus might fall at the
1547     // front or the back of the default century.  This only works because we adjust
1548     // the year correctly to start with in other cases -- see subParse().
1549     if (ambiguousYear[0] || tztype != TZTYPE_UNK) // If this is true then the two-digit year == the default start year
1550     {
1551         // We need a copy of the fields, and we need to avoid triggering a call to
1552         // complete(), which will recalculate the fields.  Since we can't access
1553         // the fields[] array in Calendar, we clone the entire object.  This will
1554         // stop working if Calendar.clone() is ever rewritten to call complete().
1555         Calendar *copy;
1556         if (ambiguousYear[0]) {
1557             copy = cal.clone();
1558             // Check for failed cloning.
1559             if (copy == NULL) {
1560                 status = U_MEMORY_ALLOCATION_ERROR;
1561                 goto ExitParse;
1562             }
1563             UDate parsedDate = copy->getTime(status);
1564             // {sfb} check internalGetDefaultCenturyStart
1565             if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) {
1566                 // We can't use add here because that does a complete() first.
1567                 cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
1568             }
1569             delete copy;
1570         }
1571
1572         if (tztype != TZTYPE_UNK) {
1573             copy = cal.clone();
1574             // Check for failed cloning.
1575             if (copy == NULL) {
1576                 status = U_MEMORY_ALLOCATION_ERROR;
1577                 goto ExitParse;
1578             }
1579             const TimeZone & tz = cal.getTimeZone();
1580             BasicTimeZone *btz = NULL;
1581
1582             if (tz.getDynamicClassID() == OlsonTimeZone::getStaticClassID()
1583                 || tz.getDynamicClassID() == SimpleTimeZone::getStaticClassID()
1584                 || tz.getDynamicClassID() == RuleBasedTimeZone::getStaticClassID()
1585                 || tz.getDynamicClassID() == VTimeZone::getStaticClassID()) {
1586                 btz = (BasicTimeZone*)&tz;
1587             }
1588
1589             // Get local millis
1590             copy->set(UCAL_ZONE_OFFSET, 0);
1591             copy->set(UCAL_DST_OFFSET, 0);
1592             UDate localMillis = copy->getTime(status);
1593
1594             // Make sure parsed time zone type (Standard or Daylight)
1595             // matches the rule used by the parsed time zone.
1596             int32_t raw, dst;
1597             if (btz != NULL) {
1598                 if (tztype == TZTYPE_STD) {
1599                     btz->getOffsetFromLocal(localMillis,
1600                         BasicTimeZone::kStandard, BasicTimeZone::kStandard, raw, dst, status);
1601                 } else {
1602                     btz->getOffsetFromLocal(localMillis,
1603                         BasicTimeZone::kDaylight, BasicTimeZone::kDaylight, raw, dst, status);
1604                 }
1605             } else {
1606                 // No good way to resolve ambiguous time at transition,
1607                 // but following code work in most case.
1608                 tz.getOffset(localMillis, TRUE, raw, dst, status);
1609             }
1610
1611             // Now, compare the results with parsed type, either standard or daylight saving time
1612             int32_t resolvedSavings = dst;
1613             if (tztype == TZTYPE_STD) {
1614                 if (dst != 0) {
1615                     // Override DST_OFFSET = 0 in the result calendar
1616                     resolvedSavings = 0;
1617                 }
1618             } else { // tztype == TZTYPE_DST
1619                 if (dst == 0) {
1620                     if (btz != NULL) {
1621                         UDate time = localMillis + raw;
1622                         // We use the nearest daylight saving time rule.
1623                         TimeZoneTransition beforeTrs, afterTrs;
1624                         UDate beforeT = time, afterT = time;
1625                         int32_t beforeSav = 0, afterSav = 0;
1626                         UBool beforeTrsAvail, afterTrsAvail;
1627
1628                         // Search for DST rule before or on the time
1629                         while (TRUE) {
1630                             beforeTrsAvail = btz->getPreviousTransition(beforeT, TRUE, beforeTrs);
1631                             if (!beforeTrsAvail) {
1632                                 break;
1633                             }
1634                             beforeT = beforeTrs.getTime() - 1;
1635                             beforeSav = beforeTrs.getFrom()->getDSTSavings();
1636                             if (beforeSav != 0) {
1637                                 break;
1638                             }
1639                         }
1640
1641                         // Search for DST rule after the time
1642                         while (TRUE) {
1643                             afterTrsAvail = btz->getNextTransition(afterT, FALSE, afterTrs);
1644                             if (!afterTrsAvail) {
1645                                 break;
1646                             }
1647                             afterT = afterTrs.getTime();
1648                             afterSav = afterTrs.getTo()->getDSTSavings();
1649                             if (afterSav != 0) {
1650                                 break;
1651                             }
1652                         }
1653
1654                         if (beforeTrsAvail && afterTrsAvail) {
1655                             if (time - beforeT > afterT - time) {
1656                                 resolvedSavings = afterSav;
1657                             } else {
1658                                 resolvedSavings = beforeSav;
1659                             }
1660                         } else if (beforeTrsAvail && beforeSav != 0) {
1661                             resolvedSavings = beforeSav;
1662                         } else if (afterTrsAvail && afterSav != 0) {
1663                             resolvedSavings = afterSav;
1664                         } else {
1665                             resolvedSavings = btz->getDSTSavings();
1666                         }
1667                     } else {
1668                         resolvedSavings = tz.getDSTSavings();
1669                     }
1670                     if (resolvedSavings == 0) {
1671                         // final fallback
1672                         resolvedSavings = U_MILLIS_PER_HOUR;
1673                     }
1674                 }
1675             }
1676             cal.set(UCAL_ZONE_OFFSET, raw);
1677             cal.set(UCAL_DST_OFFSET, resolvedSavings);
1678             delete copy;
1679         }
1680     }
1681 ExitParse:
1682     // If any Calendar calls failed, we pretend that we
1683     // couldn't parse the string, when in reality this isn't quite accurate--
1684     // we did parse it; the Calendar calls just failed.
1685     if (U_FAILURE(status)) {
1686         parsePos.setErrorIndex(pos);
1687         parsePos.setIndex(start);
1688     }
1689 }
1690
1691 UDate
1692 SimpleDateFormat::parse( const UnicodeString& text,
1693                          ParsePosition& pos) const {
1694     // redefined here because the other parse() function hides this function's
1695     // cunterpart on DateFormat
1696     return DateFormat::parse(text, pos);
1697 }
1698
1699 UDate
1700 SimpleDateFormat::parse(const UnicodeString& text, UErrorCode& status) const
1701 {
1702     // redefined here because the other parse() function hides this function's
1703     // counterpart on DateFormat
1704     return DateFormat::parse(text, status);
1705 }
1706 //----------------------------------------------------------------------
1707
1708 int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
1709                               int32_t start,
1710                               UCalendarDateFields field,
1711                               const UnicodeString* data,
1712                               int32_t dataCount,
1713                               Calendar& cal) const
1714 {
1715     int32_t i = 0;
1716     int32_t count = dataCount;
1717
1718     // There may be multiple strings in the data[] array which begin with
1719     // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
1720     // We keep track of the longest match, and return that.  Note that this
1721     // unfortunately requires us to test all array elements.
1722     int32_t bestMatchLength = 0, bestMatch = -1;
1723
1724     // {sfb} kludge to support case-insensitive comparison
1725     // {markus 2002oct11} do not just use caseCompareBetween because we do not know
1726     // the length of the match after case folding
1727     // {alan 20040607} don't case change the whole string, since the length
1728     // can change
1729     // TODO we need a case-insensitive startsWith function
1730     UnicodeString lcase, lcaseText;
1731     text.extract(start, INT32_MAX, lcaseText);
1732     lcaseText.foldCase();
1733
1734     for (; i < count; ++i)
1735     {
1736         // Always compare if we have no match yet; otherwise only compare
1737         // against potentially better matches (longer strings).
1738
1739         lcase.fastCopyFrom(data[i]).foldCase();
1740         int32_t length = lcase.length();
1741
1742         if (length > bestMatchLength &&
1743             lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
1744         {
1745             bestMatch = i;
1746             bestMatchLength = length;
1747         }
1748     }
1749     if (bestMatch >= 0)
1750     {
1751         cal.set(field, bestMatch * 3);
1752
1753         // Once we have a match, we have to determine the length of the
1754         // original source string.  This will usually be == the length of
1755         // the case folded string, but it may differ (e.g. sharp s).
1756         lcase.fastCopyFrom(data[bestMatch]).foldCase();
1757
1758         // Most of the time, the length will be the same as the length
1759         // of the string from the locale data.  Sometimes it will be
1760         // different, in which case we will have to figure it out by
1761         // adding a character at a time, until we have a match.  We do
1762         // this all in one loop, where we try 'len' first (at index
1763         // i==0).
1764         int32_t len = data[bestMatch].length(); // 99+% of the time
1765         int32_t n = text.length() - start;
1766         for (i=0; i<=n; ++i) {
1767             int32_t j=i;
1768             if (i == 0) {
1769                 j = len;
1770             } else if (i == len) {
1771                 continue; // already tried this when i was 0
1772             }
1773             text.extract(start, j, lcaseText);
1774             lcaseText.foldCase();
1775             if (lcase == lcaseText) {
1776                 return start + j;
1777             }
1778         }
1779     }
1780
1781     return -start;
1782 }
1783
1784 //----------------------------------------------------------------------
1785 UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
1786                                       int32_t &patternOffset,
1787                                       const UnicodeString &text,
1788                                       int32_t &textOffset,
1789                                       UBool lenient)
1790 {
1791     UBool inQuote = FALSE;
1792     UnicodeString literal;
1793     int32_t i = patternOffset;
1794
1795     // scan pattern looking for contiguous literal characters
1796     for ( ; i < pattern.length(); i += 1) {
1797         UChar ch = pattern.charAt(i);
1798
1799         if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // unquoted [A-Za-z]
1800             break;
1801         }
1802
1803         if (ch == QUOTE) {
1804             // Match a quote literal ('') inside OR outside of quotes
1805             if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
1806                 i += 1;
1807             } else {
1808                 inQuote = !inQuote;
1809                 continue;
1810             }
1811         }
1812
1813         literal += ch;
1814     }
1815
1816     // at this point, literal contains the literal text
1817     // and i is the index of the next non-literal pattern character.
1818     int32_t p;
1819     int32_t t = textOffset;
1820
1821     if (lenient) {
1822         // trim leading, trailing whitespace from
1823         // the literal text
1824         literal.trim();
1825
1826         // ignore any leading whitespace in the text
1827         while (t < text.length() && u_isWhitespace(text.charAt(t))) {
1828             t += 1;
1829         }
1830     }
1831
1832     for (p = 0; p < literal.length() && t < text.length(); p += 1, t += 1) {
1833         UBool needWhitespace = FALSE;
1834
1835         while (p < literal.length() && uprv_isRuleWhiteSpace(literal.charAt(p))) {
1836             needWhitespace = TRUE;
1837             p += 1;
1838         }
1839
1840         if (needWhitespace) {
1841             int32_t tStart = t;
1842
1843             while (t < text.length()) {
1844                 UChar tch = text.charAt(t);
1845
1846                 if (!u_isUWhiteSpace(tch) && !uprv_isRuleWhiteSpace(tch)) {
1847                     break;
1848                 }
1849
1850                 t += 1;
1851             }
1852
1853             // TODO: should we require internal spaces
1854             // in lenient mode? (There won't be any
1855             // leading or trailing spaces)
1856             if (!lenient && t == tStart) {
1857                 // didn't find matching whitespace:
1858                 // an error in strict mode
1859                 return FALSE;
1860             }
1861
1862             // In strict mode, this run of whitespace
1863             // may have been at the end.
1864             if (p >= literal.length()) {
1865                 break;
1866             }
1867         }
1868
1869         if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
1870             // Ran out of text, or found a non-matching character:
1871             // OK in lenient mode, an error in strict mode.
1872             if (lenient) {
1873                 break;
1874             }
1875
1876             return FALSE;
1877         }
1878     }
1879
1880     // At this point if we're in strict mode we have a complete match.
1881     // If we're in lenient mode we may have a partial match, or no
1882     // match at all.
1883     if (p <= 0) {
1884         // no match. Pretend it matched a run of whitespace
1885         // and ignorables in the text.
1886         const  UnicodeSet *ignorables = NULL;
1887         UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), pattern.charAt(i));
1888
1889         if (patternCharPtr != NULL) {
1890             UDateFormatField patternCharIndex = (UDateFormatField) (patternCharPtr - DateFormatSymbols::getPatternUChars());
1891
1892             ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
1893         }
1894
1895         for (t = textOffset; t < text.length(); t += 1) {
1896             UChar ch = text.charAt(t);
1897
1898             if (ignorables == NULL || !ignorables->contains(ch)) {
1899                 break;
1900             }
1901         }
1902     }
1903
1904     // if we get here, we've got a complete match.
1905     patternOffset = i - 1;
1906     textOffset = t;
1907
1908     return TRUE;
1909 }
1910
1911 //----------------------------------------------------------------------
1912
1913 int32_t SimpleDateFormat::matchString(const UnicodeString& text,
1914                               int32_t start,
1915                               UCalendarDateFields field,
1916                               const UnicodeString* data,
1917                               int32_t dataCount,
1918                               Calendar& cal) const
1919 {
1920     int32_t i = 0;
1921     int32_t count = dataCount;
1922
1923     if (field == UCAL_DAY_OF_WEEK) i = 1;
1924
1925     // There may be multiple strings in the data[] array which begin with
1926     // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
1927     // We keep track of the longest match, and return that.  Note that this
1928     // unfortunately requires us to test all array elements.
1929     int32_t bestMatchLength = 0, bestMatch = -1;
1930
1931     // {sfb} kludge to support case-insensitive comparison
1932     // {markus 2002oct11} do not just use caseCompareBetween because we do not know
1933     // the length of the match after case folding
1934     // {alan 20040607} don't case change the whole string, since the length
1935     // can change
1936     // TODO we need a case-insensitive startsWith function
1937     UnicodeString lcase, lcaseText;
1938     text.extract(start, INT32_MAX, lcaseText);
1939     lcaseText.foldCase();
1940
1941     for (; i < count; ++i)
1942     {
1943         // Always compare if we have no match yet; otherwise only compare
1944         // against potentially better matches (longer strings).
1945
1946         lcase.fastCopyFrom(data[i]).foldCase();
1947         int32_t length = lcase.length();
1948
1949         if (length > bestMatchLength &&
1950             lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
1951         {
1952             bestMatch = i;
1953             bestMatchLength = length;
1954         }
1955     }
1956     if (bestMatch >= 0)
1957     {
1958         cal.set(field, bestMatch);
1959
1960         // Once we have a match, we have to determine the length of the
1961         // original source string.  This will usually be == the length of
1962         // the case folded string, but it may differ (e.g. sharp s).
1963         lcase.fastCopyFrom(data[bestMatch]).foldCase();
1964
1965         // Most of the time, the length will be the same as the length
1966         // of the string from the locale data.  Sometimes it will be
1967         // different, in which case we will have to figure it out by
1968         // adding a character at a time, until we have a match.  We do
1969         // this all in one loop, where we try 'len' first (at index
1970         // i==0).
1971         int32_t len = data[bestMatch].length(); // 99+% of the time
1972         int32_t n = text.length() - start;
1973         for (i=0; i<=n; ++i) {
1974             int32_t j=i;
1975             if (i == 0) {
1976                 j = len;
1977             } else if (i == len) {
1978                 continue; // already tried this when i was 0
1979             }
1980             text.extract(start, j, lcaseText);
1981             lcaseText.foldCase();
1982             if (lcase == lcaseText) {
1983                 return start + j;
1984             }
1985         }
1986     }
1987
1988     return -start;
1989 }
1990
1991 //----------------------------------------------------------------------
1992
1993 void
1994 SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
1995 {
1996     parseAmbiguousDatesAsAfter(d, status);
1997 }
1998
1999 /**
2000  * Private member function that converts the parsed date strings into
2001  * timeFields. Returns -start (for ParsePosition) if failed.
2002  * @param text the time text to be parsed.
2003  * @param start where to start parsing.
2004  * @param ch the pattern character for the date field text to be parsed.
2005  * @param count the count of a pattern character.
2006  * @return the new start position if matching succeeded; a negative number
2007  * indicating matching failure, otherwise.
2008  */
2009 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
2010                            UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const
2011 {
2012     Formattable number;
2013     int32_t value = 0;
2014     int32_t i;
2015     ParsePosition pos(0);
2016     int32_t patternCharIndex;
2017     UnicodeString temp;
2018     UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
2019     UBool lenient = isLenient();
2020     UBool gotNumber = FALSE;
2021
2022 #if defined (U_DEBUG_CAL)
2023     //fprintf(stderr, "%s:%d - [%c]  st=%d \n", __FILE__, __LINE__, (char) ch, start);
2024 #endif
2025
2026     if (patternCharPtr == NULL) {
2027         return -start;
2028     }
2029
2030     patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
2031
2032     UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
2033
2034     // If there are any spaces here, skip over them.  If we hit the end
2035     // of the string, then fail.
2036     for (;;) {
2037         if (start >= text.length()) {
2038             return -start;
2039         }
2040
2041         UChar32 c = text.char32At(start);
2042
2043         if (!u_isUWhiteSpace(c)) {
2044             break;
2045         }
2046
2047         start += UTF_CHAR_LENGTH(c);
2048     }
2049
2050     pos.setIndex(start);
2051
2052     // We handle a few special cases here where we need to parse
2053     // a number value.  We handle further, more generic cases below.  We need
2054     // to handle some of them here because some fields require extra processing on
2055     // the parsed value.
2056     if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD ||
2057         patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD ||
2058         patternCharIndex == UDAT_HOUR1_FIELD ||
2059         patternCharIndex == UDAT_HOUR0_FIELD ||
2060         patternCharIndex == UDAT_DOW_LOCAL_FIELD ||
2061         patternCharIndex == UDAT_STANDALONE_DAY_FIELD ||
2062         patternCharIndex == UDAT_MONTH_FIELD ||
2063         patternCharIndex == UDAT_STANDALONE_MONTH_FIELD ||
2064         patternCharIndex == UDAT_QUARTER_FIELD ||
2065         patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD ||
2066         patternCharIndex == UDAT_YEAR_FIELD ||
2067         patternCharIndex == UDAT_YEAR_WOY_FIELD ||
2068         patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD)
2069     {
2070         int32_t parseStart = pos.getIndex();
2071         // It would be good to unify this with the obeyCount logic below,
2072         // but that's going to be difficult.
2073         const UnicodeString* src;
2074
2075         if (obeyCount) {
2076             if ((start+count) > text.length()) {
2077                 return -start;
2078             }
2079
2080             text.extractBetween(0, start + count, temp);
2081             src = &temp;
2082         } else {
2083             src = &text;
2084         }
2085
2086         parseInt(*src, number, pos, allowNegative);
2087
2088         if (pos.getIndex() > parseStart) {
2089             value = number.getLong();
2090             gotNumber = TRUE;
2091
2092             // Check the range of the value
2093             int32_t bias = gFieldRangeBias[patternCharIndex];
2094
2095             if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
2096                 return -start;
2097             }
2098         }
2099
2100     }
2101
2102     // Make sure that we got a number if
2103     // we want one, and didn't get one
2104     // if we don't want one.
2105     switch (patternCharIndex) {
2106         case UDAT_HOUR_OF_DAY1_FIELD:
2107         case UDAT_HOUR_OF_DAY0_FIELD:
2108         case UDAT_HOUR1_FIELD:
2109         case UDAT_HOUR0_FIELD:
2110             // special range check for hours:
2111             if (value < 0 || value > 24) {
2112                 return -start;
2113             }
2114
2115             // fall through to gotNumber check
2116
2117         case UDAT_YEAR_FIELD:
2118         case UDAT_YEAR_WOY_FIELD:
2119         case UDAT_FRACTIONAL_SECOND_FIELD:
2120             // these must be a number
2121             if (! gotNumber) {
2122                 return -start;
2123             }
2124
2125             break;
2126
2127         case UDAT_DOW_LOCAL_FIELD:
2128         case UDAT_STANDALONE_DAY_FIELD:
2129         case UDAT_MONTH_FIELD:
2130         case UDAT_STANDALONE_MONTH_FIELD:
2131         case UDAT_QUARTER_FIELD:
2132         case UDAT_STANDALONE_QUARTER_FIELD:
2133             // in strict mode, these can only
2134             // be a number if count <= 2
2135             if (!lenient && gotNumber && count > 2) {
2136                 return -1;
2137             }
2138
2139             break;
2140
2141         default:
2142             // we check the rest of the fields below.
2143             break;
2144     }
2145
2146     switch (patternCharIndex) {
2147     case UDAT_ERA_FIELD:
2148         if (count == 5) {
2149             return matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, cal);
2150         }
2151         if (count == 4) {
2152             return matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, cal);
2153         }
2154
2155         return matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, cal);
2156
2157     case UDAT_YEAR_FIELD:
2158         // If there are 3 or more YEAR pattern characters, this indicates
2159         // that the year value is to be treated literally, without any
2160         // two-digit year adjustments (e.g., from "01" to 2001).  Otherwise
2161         // we made adjustments to place the 2-digit year in the proper
2162         // century, for parsed strings from "00" to "99".  Any other string
2163         // is treated literally:  "2250", "-1", "1", "002".
2164         if ((pos.getIndex() - start) == 2
2165             && u_isdigit(text.charAt(start))
2166             && u_isdigit(text.charAt(start+1)))
2167         {
2168             // Assume for example that the defaultCenturyStart is 6/18/1903.
2169             // This means that two-digit years will be forced into the range
2170             // 6/18/1903 to 6/17/2003.  As a result, years 00, 01, and 02
2171             // correspond to 2000, 2001, and 2002.  Years 04, 05, etc. correspond
2172             // to 1904, 1905, etc.  If the year is 03, then it is 2003 if the
2173             // other fields specify a date before 6/18, or 1903 if they specify a
2174             // date afterwards.  As a result, 03 is an ambiguous year.  All other
2175             // two-digit years are unambiguous.
2176           if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
2177               int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2178               ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2179               value += (fDefaultCenturyStartYear/100)*100 +
2180                 (value < ambiguousTwoDigitYear ? 100 : 0);
2181             }
2182         }
2183         cal.set(UCAL_YEAR, value);
2184         return pos.getIndex();
2185
2186     case UDAT_YEAR_WOY_FIELD:
2187         // Comment is the same as for UDAT_Year_FIELDs - look above
2188         if ((pos.getIndex() - start) == 2
2189             && u_isdigit(text.charAt(start))
2190             && u_isdigit(text.charAt(start+1))
2191             && fHaveDefaultCentury )
2192         {
2193             int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2194             ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2195             value += (fDefaultCenturyStartYear/100)*100 +
2196                 (value < ambiguousTwoDigitYear ? 100 : 0);
2197         }
2198         cal.set(UCAL_YEAR_WOY, value);
2199         return pos.getIndex();
2200
2201     case UDAT_MONTH_FIELD:
2202         if (gotNumber) // i.e., M or MM.
2203         {
2204             // Don't want to parse the month if it is a string
2205             // while pattern uses numeric style: M or MM.
2206             // [We computed 'value' above.]
2207             cal.set(UCAL_MONTH, value - 1);
2208             return pos.getIndex();
2209         } else {
2210             // count >= 3 // i.e., MMM or MMMM
2211             // Want to be able to parse both short and long forms.
2212             // Try count == 4 first:
2213             int32_t newStart = 0;
2214
2215             if ((newStart = matchString(text, start, UCAL_MONTH,
2216                                       fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0)
2217                 return newStart;
2218             else // count == 4 failed, now try count == 3
2219                 return matchString(text, start, UCAL_MONTH,
2220                                    fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal);
2221         }
2222
2223     case UDAT_STANDALONE_MONTH_FIELD:
2224         if (gotNumber) // i.e., L or LL.
2225         {
2226             // Don't want to parse the month if it is a string
2227             // while pattern uses numeric style: M or MM.
2228             // [We computed 'value' above.]
2229             cal.set(UCAL_MONTH, value - 1);
2230             return pos.getIndex();
2231         } else {
2232             // count >= 3 // i.e., LLL or LLLL
2233             // Want to be able to parse both short and long forms.
2234             // Try count == 4 first:
2235             int32_t newStart = 0;
2236
2237             if ((newStart = matchString(text, start, UCAL_MONTH,
2238                                       fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0)
2239                 return newStart;
2240             else // count == 4 failed, now try count == 3
2241                 return matchString(text, start, UCAL_MONTH,
2242                                    fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal);
2243         }
2244
2245     case UDAT_HOUR_OF_DAY1_FIELD:
2246         // [We computed 'value' above.]
2247         if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
2248             value = 0;
2249
2250         // fall through to set field
2251
2252     case UDAT_HOUR_OF_DAY0_FIELD:
2253         cal.set(UCAL_HOUR_OF_DAY, value);
2254         return pos.getIndex();
2255
2256     case UDAT_FRACTIONAL_SECOND_FIELD:
2257         // Fractional seconds left-justify
2258         i = pos.getIndex() - start;
2259         if (i < 3) {
2260             while (i < 3) {
2261                 value *= 10;
2262                 i++;
2263             }
2264         } else {
2265             int32_t a = 1;
2266             while (i > 3) {
2267                 a *= 10;
2268                 i--;
2269             }
2270             value = (value + (a>>1)) / a;
2271         }
2272         cal.set(UCAL_MILLISECOND, value);
2273         return pos.getIndex();
2274
2275     case UDAT_DOW_LOCAL_FIELD:
2276         if (gotNumber) // i.e., e or ee
2277         {
2278             // [We computed 'value' above.]
2279             cal.set(UCAL_DOW_LOCAL, value);
2280             return pos.getIndex();
2281         }
2282         // else for eee-eeeee fall through to handling of EEE-EEEEE
2283         // fall through, do not break here
2284     case UDAT_DAY_OF_WEEK_FIELD:
2285         {
2286             // Want to be able to parse both short and long forms.
2287             // Try count == 4 (EEEE) first:
2288             int32_t newStart = 0;
2289             if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2290                                       fSymbols->fWeekdays, fSymbols->fWeekdaysCount, cal)) > 0)
2291                 return newStart;
2292             // EEEE failed, now try EEE
2293             else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2294                                    fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0)
2295                 return newStart;
2296             // EEE failed, now try EEEEE
2297             else
2298                 return matchString(text, start, UCAL_DAY_OF_WEEK,
2299                                    fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal);
2300         }
2301
2302     case UDAT_STANDALONE_DAY_FIELD:
2303         {
2304             if (gotNumber) // c or cc
2305             {
2306                 // [We computed 'value' above.]
2307                 cal.set(UCAL_DOW_LOCAL, value);
2308                 return pos.getIndex();
2309             }
2310             // Want to be able to parse both short and long forms.
2311             // Try count == 4 (cccc) first:
2312             int32_t newStart = 0;
2313             if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2314                                       fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0)
2315                 return newStart;
2316             else // cccc failed, now try ccc
2317                 return matchString(text, start, UCAL_DAY_OF_WEEK,
2318                                    fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal);
2319         }
2320
2321     case UDAT_AM_PM_FIELD:
2322         return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal);
2323
2324     case UDAT_HOUR1_FIELD:
2325         // [We computed 'value' above.]
2326         if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
2327             value = 0;
2328
2329         // fall through to set field
2330
2331     case UDAT_HOUR0_FIELD:
2332         cal.set(UCAL_HOUR, value);
2333         return pos.getIndex();
2334
2335     case UDAT_QUARTER_FIELD:
2336         if (gotNumber) // i.e., Q or QQ.
2337         {
2338             // Don't want to parse the month if it is a string
2339             // while pattern uses numeric style: Q or QQ.
2340             // [We computed 'value' above.]
2341             cal.set(UCAL_MONTH, (value - 1) * 3);
2342             return pos.getIndex();
2343         } else {
2344             // count >= 3 // i.e., QQQ or QQQQ
2345             // Want to be able to parse both short and long forms.
2346             // Try count == 4 first:
2347             int32_t newStart = 0;
2348
2349             if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2350                                       fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
2351                 return newStart;
2352             else // count == 4 failed, now try count == 3
2353                 return matchQuarterString(text, start, UCAL_MONTH,
2354                                    fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal);
2355         }
2356
2357     case UDAT_STANDALONE_QUARTER_FIELD:
2358         if (gotNumber) // i.e., q or qq.
2359         {
2360             // Don't want to parse the month if it is a string
2361             // while pattern uses numeric style: q or q.
2362             // [We computed 'value' above.]
2363             cal.set(UCAL_MONTH, (value - 1) * 3);
2364             return pos.getIndex();
2365         } else {
2366             // count >= 3 // i.e., qqq or qqqq
2367             // Want to be able to parse both short and long forms.
2368             // Try count == 4 first:
2369             int32_t newStart = 0;
2370
2371             if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2372                                       fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
2373                 return newStart;
2374             else // count == 4 failed, now try count == 3
2375                 return matchQuarterString(text, start, UCAL_MONTH,
2376                                    fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal);
2377         }
2378
2379     case UDAT_TIMEZONE_FIELD:
2380     case UDAT_TIMEZONE_RFC_FIELD:
2381     case UDAT_TIMEZONE_GENERIC_FIELD:
2382     case UDAT_TIMEZONE_SPECIAL_FIELD:
2383         {
2384             int32_t offset = 0;
2385             UBool parsed = FALSE;
2386
2387             // Step 1
2388             // Check if this is a long GMT offset string (either localized or default)
2389             offset = parseGMT(text, pos);
2390             if (pos.getIndex() - start > 0) {
2391                 parsed = TRUE;
2392             }
2393             if (!parsed) {
2394                 // Step 2
2395                 // Check if this is an RFC822 time zone offset.
2396                 // ICU supports the standard RFC822 format [+|-]HHmm
2397                 // and its extended form [+|-]HHmmSS.
2398                 do {
2399                     int32_t sign = 0;
2400                     UChar signChar = text.charAt(start);
2401                     if (signChar == (UChar)0x002B /* '+' */) {
2402                         sign = 1;
2403                     } else if (signChar == (UChar)0x002D /* '-' */) {
2404                         sign = -1;
2405                     } else {
2406                         // Not an RFC822 offset string
2407                         break;
2408                     }
2409
2410                     // Parse digits
2411                     int32_t orgPos = start + 1;
2412                     pos.setIndex(orgPos);
2413                     parseInt(text, number, 6, pos, FALSE);
2414                     int32_t numLen = pos.getIndex() - orgPos;
2415                     if (numLen <= 0) {
2416                         break;
2417                     }
2418
2419                     // Followings are possible format (excluding sign char)
2420                     // HHmmSS
2421                     // HmmSS
2422                     // HHmm
2423                     // Hmm
2424                     // HH
2425                     // H
2426                     int32_t val = number.getLong();
2427                     int32_t hour = 0, min = 0, sec = 0;
2428                     switch(numLen) {
2429                     case 1: // H
2430                     case 2: // HH
2431                         hour = val;
2432                         break;
2433                     case 3: // Hmm
2434                     case 4: // HHmm
2435                         hour = val / 100;
2436                         min = val % 100;
2437                         break;
2438                     case 5: // Hmmss
2439                     case 6: // HHmmss
2440                         hour = val / 10000;
2441                         min = (val % 10000) / 100;
2442                         sec = val % 100;
2443                         break;
2444                     }
2445                     if (hour > 23 || min > 59 || sec > 59) {
2446                         // Invalid value range
2447                         break;
2448                     }
2449                     offset = (((hour * 60) + min) * 60 + sec) * 1000 * sign;
2450                     parsed = TRUE;
2451                 } while (FALSE);
2452
2453                 if (!parsed) {
2454                     // Failed to parse.  Reset the position.
2455                     pos.setIndex(start);
2456                 }
2457             }
2458
2459             if (parsed) {
2460                 // offset was successfully parsed as either a long GMT string or RFC822 zone offset
2461                 // string.  Create normalized zone ID for the offset.
2462
2463                 UnicodeString tzID(gGmt);
2464                 formatRFC822TZ(tzID, offset);
2465                 //TimeZone *customTZ = TimeZone::createTimeZone(tzID);
2466                 TimeZone *customTZ = new SimpleTimeZone(offset, tzID);    // faster than TimeZone::createTimeZone
2467                 cal.adoptTimeZone(customTZ);
2468
2469                 return pos.getIndex();
2470             }
2471
2472             // Step 3
2473             // At this point, check for named time zones by looking through
2474             // the locale data from the DateFormatZoneData strings.
2475             // Want to be able to parse both short and long forms.
2476             // optimize for calendar's current time zone
2477             const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat();
2478             if (zsf) {
2479                 UErrorCode status = U_ZERO_ERROR;
2480                 const ZoneStringInfo *zsinfo = NULL;
2481                 int32_t matchLen;
2482
2483                 switch (patternCharIndex) {
2484                     case UDAT_TIMEZONE_FIELD: // 'z'
2485                         if (count < 4) {
2486                             zsinfo = zsf->findSpecificShort(text, start, matchLen, status);
2487                         } else {
2488                             zsinfo = zsf->findSpecificLong(text, start, matchLen, status);
2489                         }
2490                         break;
2491                     case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
2492                         if (count == 1) {
2493                             zsinfo = zsf->findGenericShort(text, start, matchLen, status);
2494                         } else if (count == 4) {
2495                             zsinfo = zsf->findGenericLong(text, start, matchLen, status);
2496                         }
2497                         break;
2498                     case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
2499                         if (count == 1) {
2500                             zsinfo = zsf->findSpecificShort(text, start, matchLen, status);
2501                         } else if (count == 4) {
2502                             zsinfo = zsf->findGenericLocation(text, start, matchLen, status);
2503                         }
2504                         break;
2505                 }
2506
2507                 if (U_SUCCESS(status) && zsinfo != NULL) {
2508                     if (zsinfo->isStandard()) {
2509                         ((SimpleDateFormat*)this)->tztype = TZTYPE_STD;
2510                     } else if (zsinfo->isDaylight()) {
2511                         ((SimpleDateFormat*)this)->tztype = TZTYPE_DST;
2512                     }
2513                     UnicodeString tzid;
2514                     zsinfo->getID(tzid);
2515
2516                     UnicodeString current;
2517                     cal.getTimeZone().getID(current);
2518                     if (tzid != current) {
2519                         TimeZone *tz = TimeZone::createTimeZone(tzid);
2520                         cal.adoptTimeZone(tz);
2521                     }
2522                     return start + matchLen;
2523                 }
2524             }
2525             // complete failure
2526             return -start;
2527         }
2528
2529     default:
2530         // Handle "generic" fields
2531         int32_t parseStart = pos.getIndex();
2532         const UnicodeString* src;
2533         if (obeyCount) {
2534             if ((start+count) > text.length()) {
2535                 return -start;
2536             }
2537             text.extractBetween(0, start + count, temp);
2538             src = &temp;
2539         } else {
2540             src = &text;
2541         }
2542         parseInt(*src, number, pos, allowNegative);
2543         if (pos.getIndex() != parseStart) {
2544             int32_t value = number.getLong();
2545
2546             // Check the range of the value
2547             int32_t bias = gFieldRangeBias[patternCharIndex];
2548
2549             if (bias < 0 || (value >= cal.getMinimum(field) + bias && value <= cal.getMaximum(field) + bias)) {
2550                 cal.set(field, value);
2551                 return pos.getIndex();
2552             }
2553         }
2554
2555         return -start;
2556     }
2557 }
2558
2559 /**
2560  * Parse an integer using fNumberFormat.  This method is semantically
2561  * const, but actually may modify fNumberFormat.
2562  */
2563 void SimpleDateFormat::parseInt(const UnicodeString& text,
2564                                 Formattable& number,
2565                                 ParsePosition& pos,
2566                                 UBool allowNegative) const {
2567     parseInt(text, number, -1, pos, allowNegative);
2568 }
2569
2570 /**
2571  * Parse an integer using fNumberFormat up to maxDigits.
2572  */
2573 void SimpleDateFormat::parseInt(const UnicodeString& text,
2574                                 Formattable& number,
2575                                 int32_t maxDigits,
2576                                 ParsePosition& pos,
2577                                 UBool allowNegative) const {
2578     UnicodeString oldPrefix;
2579     DecimalFormat* df = NULL;
2580     if (!allowNegative &&
2581         fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
2582         df = (DecimalFormat*)fNumberFormat;
2583         df->getNegativePrefix(oldPrefix);
2584         df->setNegativePrefix(SUPPRESS_NEGATIVE_PREFIX);
2585     }
2586     int32_t oldPos = pos.getIndex();
2587     fNumberFormat->parse(text, number, pos);
2588     if (df != NULL) {
2589         df->setNegativePrefix(oldPrefix);
2590     }
2591
2592     if (maxDigits > 0) {
2593         // adjust the result to fit into
2594         // the maxDigits and move the position back
2595         int32_t nDigits = pos.getIndex() - oldPos;
2596         if (nDigits > maxDigits) {
2597             int32_t val = number.getLong();
2598             nDigits -= maxDigits;
2599             while (nDigits > 0) {
2600                 val /= 10;
2601                 nDigits--;
2602             }
2603             pos.setIndex(oldPos + maxDigits);
2604             number.setLong(val);
2605         }
2606     }
2607 }
2608
2609 //----------------------------------------------------------------------
2610
2611 void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
2612                                         UnicodeString& translatedPattern,
2613                                         const UnicodeString& from,
2614                                         const UnicodeString& to,
2615                                         UErrorCode& status)
2616 {
2617   // run through the pattern and convert any pattern symbols from the version
2618   // in "from" to the corresponding character ion "to".  This code takes
2619   // quoted strings into account (it doesn't try to translate them), and it signals
2620   // an error if a particular "pattern character" doesn't appear in "from".
2621   // Depending on the values of "from" and "to" this can convert from generic
2622   // to localized patterns or localized to generic.
2623   if (U_FAILURE(status))
2624     return;
2625
2626   translatedPattern.remove();
2627   UBool inQuote = FALSE;
2628   for (int32_t i = 0; i < originalPattern.length(); ++i) {
2629     UChar c = originalPattern[i];
2630     if (inQuote) {
2631       if (c == QUOTE)
2632     inQuote = FALSE;
2633     }
2634     else {
2635       if (c == QUOTE)
2636     inQuote = TRUE;
2637       else if ((c >= 0x0061 /*'a'*/ && c <= 0x007A) /*'z'*/
2638            || (c >= 0x0041 /*'A'*/ && c <= 0x005A /*'Z'*/)) {
2639     int32_t ci = from.indexOf(c);
2640     if (ci == -1) {
2641       status = U_INVALID_FORMAT_ERROR;
2642       return;
2643     }
2644     c = to[ci];
2645       }
2646     }
2647     translatedPattern += c;
2648   }
2649   if (inQuote) {
2650     status = U_INVALID_FORMAT_ERROR;
2651     return;
2652   }
2653 }
2654
2655 //----------------------------------------------------------------------
2656
2657 UnicodeString&
2658 SimpleDateFormat::toPattern(UnicodeString& result) const
2659 {
2660     result = fPattern;
2661     return result;
2662 }
2663
2664 //----------------------------------------------------------------------
2665
2666 UnicodeString&
2667 SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
2668                                      UErrorCode& status) const
2669 {
2670     translatePattern(fPattern, result, DateFormatSymbols::getPatternUChars(), fSymbols->fLocalPatternChars, status);
2671     return result;
2672 }
2673
2674 //----------------------------------------------------------------------
2675
2676 void
2677 SimpleDateFormat::applyPattern(const UnicodeString& pattern)
2678 {
2679     fPattern = pattern;
2680 }
2681
2682 //----------------------------------------------------------------------
2683
2684 void
2685 SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
2686                                         UErrorCode &status)
2687 {
2688     translatePattern(pattern, fPattern, fSymbols->fLocalPatternChars, DateFormatSymbols::getPatternUChars(), status);
2689 }
2690
2691 //----------------------------------------------------------------------
2692
2693 const DateFormatSymbols*
2694 SimpleDateFormat::getDateFormatSymbols() const
2695 {
2696     return fSymbols;
2697 }
2698
2699 //----------------------------------------------------------------------
2700
2701 void
2702 SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
2703 {
2704     delete fSymbols;
2705     fSymbols = newFormatSymbols;
2706 }
2707
2708 //----------------------------------------------------------------------
2709 void
2710 SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
2711 {
2712     delete fSymbols;
2713     fSymbols = new DateFormatSymbols(newFormatSymbols);
2714 }
2715
2716
2717 //----------------------------------------------------------------------
2718
2719
2720 void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
2721 {
2722   UErrorCode status = U_ZERO_ERROR;
2723   DateFormat::adoptCalendar(calendarToAdopt);
2724   delete fSymbols;
2725   fSymbols=NULL;
2726   initializeSymbols(fLocale, fCalendar, status);  // we need new symbols
2727   initializeDefaultCentury();  // we need a new century (possibly)
2728 }
2729
2730
2731 //----------------------------------------------------------------------
2732
2733
2734 UBool
2735 SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
2736     return isFieldUnitIgnored(fPattern, field);
2737 }
2738
2739
2740 UBool
2741 SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
2742                                      UCalendarDateFields field) {
2743     int32_t fieldLevel = fgCalendarFieldToLevel[field];
2744     int32_t level;
2745     UChar ch;
2746     UBool inQuote = FALSE;
2747     UChar prevCh = 0;
2748     int32_t count = 0;
2749
2750     for (int32_t i = 0; i < pattern.length(); ++i) {
2751         ch = pattern[i];
2752         if (ch != prevCh && count > 0) {
2753             level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
2754             // the larger the level, the smaller the field unit.
2755             if ( fieldLevel <= level ) {
2756                 return FALSE;
2757             }
2758             count = 0;
2759         }
2760         if (ch == QUOTE) {
2761             if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
2762                 ++i;
2763             } else {
2764                 inQuote = ! inQuote;
2765             }
2766         }
2767         else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
2768                     || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
2769             prevCh = ch;
2770             ++count;
2771         }
2772     }
2773     if ( count > 0 ) {
2774         // last item
2775         level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
2776             if ( fieldLevel <= level ) {
2777                 return FALSE;
2778             }
2779     }
2780     return TRUE;
2781 }
2782
2783
2784
2785 const Locale&
2786 SimpleDateFormat::getSmpFmtLocale(void) const {
2787     return fLocale;
2788 }
2789
2790
2791
2792 U_NAMESPACE_END
2793
2794 #endif /* #if !UCONFIG_NO_FORMATTING */
2795
2796 //eof