icuSources/i18n/smpdtfmt.cpp

   1 /*
   2 *******************************************************************************
   3 * Copyright (C) 1997-2004, International Business Machines Corporation and    *
   4 * others. All Rights Reserved.                                                *
   5 *******************************************************************************
   6 *
   7 * File SMPDTFMT.CPP
   8 *
   9 * Modification History:
  10 *
  11 *   Date        Name        Description
  12 *   02/19/97    aliu        Converted from java.
  13 *   03/31/97    aliu        Modified extensively to work with 50 locales.
  14 *   04/01/97    aliu        Added support for centuries.
  15 *   07/09/97    helena      Made ParsePosition into a class.
  16 *   07/21/98    stephen     Added initializeDefaultCentury.
  17 *                             Removed getZoneIndex (added in DateFormatSymbols)
  18 *                             Removed subParseLong
  19 *                             Removed chk
  20 *  02/22/99     stephen     Removed character literals for EBCDIC safety
  21 *   10/14/99    aliu        Updated 2-digit year parsing so that only "00" thru
  22 *                           "99" are recognized. {j28 4182066}
  23 *   11/15/99    weiv        Added support for week of year/day of week format
  24 ********************************************************************************
  25 */
  26
  27 #include "unicode/utypes.h"
  28
  29 #if !UCONFIG_NO_FORMATTING
  30
  31 #include "unicode/smpdtfmt.h"
  32 #include "unicode/dtfmtsym.h"
  33 #include "unicode/ures.h"
  34 #include "unicode/msgfmt.h"
  35 #include "unicode/calendar.h"
  36 #include "unicode/gregocal.h"
  37 #include "unicode/timezone.h"
  38 #include "unicode/decimfmt.h"
  39 #include "unicode/dcfmtsym.h"
  40 #include "unicode/uchar.h"
  41 #include "unicode/ustring.h"
  42 #include "util.h"
  43 #include "gregoimp.h"
  44 #include "cstring.h"
  45 #include "uassert.h"
  46 #include <float.h>
  47
  48 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
  49 #include <stdio.h>
  50 #endif
  51
  52 // *****************************************************************************
  53 // class SimpleDateFormat
  54 // *****************************************************************************
  55
  56 U_NAMESPACE_BEGIN
  57
  58 /**
  59  * Last-resort string to use for "GMT" when constructing time zone strings.
  60  */
  61 // For time zones that have no names, use strings GMT+minutes and
  62 // GMT-minutes. For instance, in France the time zone is GMT+60.
  63 // Also accepted are GMT+H:MM or GMT-H:MM.
  64 static const UChar gGmt[]      = {0x0047, 0x004D, 0x0054, 0x0000};         // "GMT"
  65 static const UChar gGmtPlus[]  = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
  66 static const UChar gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
  67
  68 // This is a pattern-of-last-resort used when we can't load a usable pattern out
  69 // of a resource.
  70 static const UChar gDefaultPattern[] =
  71 {
  72     0x79, 0x79, 0x79, 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
  73 };  /* "yyyyMMdd hh:mm a" */
  74
  75 // This prefix is designed to NEVER MATCH real text, in order to
  76 // suppress the parsing of negative numbers.  Adjust as needed (if
  77 // this becomes valid Unicode).
  78 static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
  79
  80 /**
  81  * These are the tags we expect to see in normal resource bundle files associated
  82  * with a locale.
  83  */
  84 static const char gDateTimePatternsTag[]="DateTimePatterns";
  85
  86 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
  87
  88 static const UChar QUOTE = 0x27; // Single quote
  89
  90 //----------------------------------------------------------------------
  91
  92 SimpleDateFormat::~SimpleDateFormat()
  93 {
  94     delete fSymbols;
  95 }
  96
  97 //----------------------------------------------------------------------
  98
  99 SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
 100   :   fLocale(Locale::getDefault()),
 101       fSymbols(NULL)
 102 {
 103     construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
 104     initializeDefaultCentury();
 105 }
 106
 107 //----------------------------------------------------------------------
 108
 109 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 110                                    UErrorCode &status)
 111 :   fPattern(pattern),
 112     fLocale(Locale::getDefault()),
 113     fSymbols(NULL)
 114 {
 115     initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
 116     initialize(fLocale, status);
 117     initializeDefaultCentury();
 118 }
 119
 120 //----------------------------------------------------------------------
 121
 122 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 123                                    const Locale& locale,
 124                                    UErrorCode& status)
 125 :   fPattern(pattern),
 126     fLocale(locale)
 127 {
 128     initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
 129     initialize(fLocale, status);
 130     initializeDefaultCentury();
 131 }
 132
 133 //----------------------------------------------------------------------
 134
 135 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 136                                    DateFormatSymbols* symbolsToAdopt,
 137                                    UErrorCode& status)
 138 :   fPattern(pattern),
 139     fLocale(Locale::getDefault()),
 140     fSymbols(symbolsToAdopt)
 141 {
 142     initializeCalendar(NULL,fLocale,status);
 143     initialize(fLocale, status);
 144     initializeDefaultCentury();
 145 }
 146
 147 //----------------------------------------------------------------------
 148
 149 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
 150                                    const DateFormatSymbols& symbols,
 151                                    UErrorCode& status)
 152 :   fPattern(pattern),
 153     fLocale(Locale::getDefault()),
 154     fSymbols(new DateFormatSymbols(symbols))
 155 {
 156     initializeCalendar(NULL, fLocale, status);
 157     initialize(fLocale, status);
 158     initializeDefaultCentury();
 159 }
 160
 161 //----------------------------------------------------------------------
 162
 163 // Not for public consumption; used by DateFormat
 164 SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
 165                                    EStyle dateStyle,
 166                                    const Locale& locale,
 167                                    UErrorCode& status)
 168 :   fLocale(locale),
 169     fSymbols(NULL)
 170 {
 171     construct(timeStyle, dateStyle, fLocale, status);
 172     if(U_SUCCESS(status)) {
 173       initializeDefaultCentury();
 174     }
 175 }
 176
 177 //----------------------------------------------------------------------
 178
 179 /**
 180  * Not for public consumption; used by DateFormat.  This constructor
 181  * never fails.  If the resource data is not available, it uses the
 182  * the last resort symbols.
 183  */
 184 SimpleDateFormat::SimpleDateFormat(const Locale& locale,
 185                                    UErrorCode& status)
 186 :   fPattern(gDefaultPattern),
 187     fLocale(locale),
 188     fSymbols(NULL)
 189 {
 190     if (U_FAILURE(status)) return;
 191     initializeSymbols(fLocale, initializeCalendar(NULL, fLocale, status),status);
 192     if (U_FAILURE(status))
 193     {
 194         status = U_ZERO_ERROR;
 195         delete fSymbols;
 196         // This constructor doesn't fail; it uses last resort data
 197         fSymbols = new DateFormatSymbols(status);
 198         /* test for NULL */
 199         if (fSymbols == 0) {
 200             status = U_MEMORY_ALLOCATION_ERROR;
 201             return;
 202         }
 203     }
 204
 205     initialize(fLocale, status);
 206     if(U_SUCCESS(status)) {
 207       initializeDefaultCentury();
 208     }
 209 }
 210
 211 //----------------------------------------------------------------------
 212
 213 SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
 214 :   DateFormat(other),
 215     fSymbols(NULL)
 216 {
 217     *this = other;
 218 }
 219
 220 //----------------------------------------------------------------------
 221
 222 SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
 223 {
 224     DateFormat::operator=(other);
 225
 226     delete fSymbols;
 227     fSymbols = NULL;
 228
 229     if (other.fSymbols)
 230         fSymbols = new DateFormatSymbols(*other.fSymbols);
 231
 232     fDefaultCenturyStart         = other.fDefaultCenturyStart;
 233     fDefaultCenturyStartYear     = other.fDefaultCenturyStartYear;
 234     fHaveDefaultCentury          = other.fHaveDefaultCentury;
 235
 236     fPattern = other.fPattern;
 237
 238     return *this;
 239 }
 240
 241 //----------------------------------------------------------------------
 242
 243 Format*
 244 SimpleDateFormat::clone() const
 245 {
 246     return new SimpleDateFormat(*this);
 247 }
 248
 249 //----------------------------------------------------------------------
 250
 251 UBool
 252 SimpleDateFormat::operator==(const Format& other) const
 253 {
 254     if (DateFormat::operator==(other)) {
 255         // DateFormat::operator== guarantees following cast is safe
 256         SimpleDateFormat* that = (SimpleDateFormat*)&other;
 257         return     (fPattern             == that->fPattern &&
 258                 fSymbols             != NULL && // Check for pathological object
 259                 that->fSymbols         != NULL && // Check for pathological object
 260                 *fSymbols             == *that->fSymbols &&
 261                     fHaveDefaultCentury == that->fHaveDefaultCentury &&
 262                 fDefaultCenturyStart == that->fDefaultCenturyStart);
 263     }
 264     return FALSE;
 265 }
 266
 267 //----------------------------------------------------------------------
 268
 269 void SimpleDateFormat::construct(EStyle timeStyle,
 270                                  EStyle dateStyle,
 271                                  const Locale& locale,
 272                                  UErrorCode& status)
 273 {
 274     // called by several constructors to load pattern data from the resources
 275     if (U_FAILURE(status)) return;
 276
 277     // We will need the calendar to know what type of symbols to load.
 278     initializeCalendar(NULL, locale, status);
 279
 280     CalendarData calData(locale, fCalendar?fCalendar->getType():NULL, status);
 281     UResourceBundle *dateTimePatterns = calData.getByKey(gDateTimePatternsTag, status);
 282     if (U_FAILURE(status)) return;
 283
 284     if (ures_getSize(dateTimePatterns) <= kDateTime)
 285     {
 286         status = U_INVALID_FORMAT_ERROR;
 287         return;
 288     }
 289
 290     setLocaleIDs(ures_getLocaleByType(dateTimePatterns, ULOC_VALID_LOCALE, &status),
 291                  ures_getLocaleByType(dateTimePatterns, ULOC_ACTUAL_LOCALE, &status));
 292
 293     // create a symbols object from the locale
 294     initializeSymbols(locale,fCalendar, status);
 295     if (U_FAILURE(status)) return;
 296     /* test for NULL */
 297     if (fSymbols == 0) {
 298         status = U_MEMORY_ALLOCATION_ERROR;
 299         return;
 300     }
 301
 302     const UChar *resStr;
 303     int32_t resStrLen = 0;
 304
 305     // if the pattern should include both date and time information, use the date/time
 306     // pattern string as a guide to tell use how to glue together the appropriate date
 307     // and time pattern strings.  The actual gluing-together is handled by a convenience
 308     // method on MessageFormat.
 309     if ((timeStyle != kNone) && (dateStyle != kNone))
 310     {
 311         Formattable timeDateArray[2];
 312
 313         // use Formattable::adoptString() so that we can use fastCopyFrom()
 314         // instead of Formattable::setString()'s unaware, safe, deep string clone
 315         // see Jitterbug 2296
 316         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status);
 317         timeDateArray[0].adoptString(new UnicodeString(TRUE, resStr, resStrLen));
 318         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status);
 319         timeDateArray[1].adoptString(new UnicodeString(TRUE, resStr, resStrLen));
 320
 321         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)kDateTime, &resStrLen, &status);
 322         MessageFormat::format(UnicodeString(TRUE, resStr, resStrLen), timeDateArray, 2, fPattern, status);
 323     }
 324     // if the pattern includes just time data or just date date, load the appropriate
 325     // pattern string from the resources
 326     // setTo() - see DateFormatSymbols::assignArray comments
 327     else if (timeStyle != kNone) {
 328         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status);
 329         fPattern.setTo(TRUE, resStr, resStrLen);
 330     }
 331     else if (dateStyle != kNone) {
 332         resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status);
 333         fPattern.setTo(TRUE, resStr, resStrLen);
 334     }
 335
 336     // and if it includes _neither_, that's an error
 337     else
 338         status = U_INVALID_FORMAT_ERROR;
 339
 340     // finally, finish initializing by creating a Calendar and a NumberFormat
 341     initialize(locale, status);
 342 }
 343
 344 //----------------------------------------------------------------------
 345
 346 Calendar*
 347 SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
 348 {
 349   if(!U_FAILURE(status)) {
 350     fCalendar = Calendar::createInstance(adoptZone?adoptZone:TimeZone::createDefault(), locale, status);
 351   }
 352   return fCalendar;
 353 }
 354
 355 void
 356 SimpleDateFormat::initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status)
 357 {
 358   if(U_FAILURE(status)) {
 359     fSymbols = NULL;
 360   } else {
 361     // pass in calendar type - use NULL (default) if no calendar set (or err).
 362     fSymbols = new DateFormatSymbols(locale, calendar?calendar->getType() :NULL , status);
 363   }
 364 }
 365
 366 void
 367 SimpleDateFormat::initialize(const Locale& locale,
 368                              UErrorCode& status)
 369 {
 370     if (U_FAILURE(status)) return;
 371
 372     // {sfb} should this be here?
 373     if (fSymbols->fZoneStringsColCount < 1)
 374     {
 375         status = U_INVALID_FORMAT_ERROR; // Check for bogus locale data
 376         return;
 377     }
 378
 379     // We don't need to check that the row count is >= 1, since all 2d arrays have at
 380     // least one row
 381     fNumberFormat = NumberFormat::createInstance(locale, status);
 382     if (fNumberFormat != NULL && U_SUCCESS(status))
 383     {
 384         // no matter what the locale's default number format looked like, we want
 385         // to modify it so that it doesn't use thousands separators, doesn't always
 386         // show the decimal point, and recognizes integers only when parsing
 387
 388         fNumberFormat->setGroupingUsed(FALSE);
 389         if (fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID())
 390             ((DecimalFormat*)fNumberFormat)->setDecimalSeparatorAlwaysShown(FALSE);
 391         fNumberFormat->setParseIntegerOnly(TRUE);
 392         fNumberFormat->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
 393     }
 394     else if (U_SUCCESS(status))
 395     {
 396         status = U_MISSING_RESOURCE_ERROR;
 397     }
 398 }
 399
 400 /* Initialize the fields we use to disambiguate ambiguous years. Separate
 401  * so we can call it from readObject().
 402  */
 403 void SimpleDateFormat::initializeDefaultCentury()
 404 {
 405   if(fCalendar) {
 406     fHaveDefaultCentury = fCalendar->haveDefaultCentury();
 407     if(fHaveDefaultCentury) {
 408       fDefaultCenturyStart = fCalendar->defaultCenturyStart();
 409       fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
 410     } else {
 411       fDefaultCenturyStart = DBL_MIN;
 412       fDefaultCenturyStartYear = -1;
 413     }
 414   }
 415 }
 416
 417 /* Define one-century window into which to disambiguate dates using
 418  * two-digit years. Make public in JDK 1.2.
 419  */
 420 void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
 421 {
 422     if(U_FAILURE(status)) {
 423         return;
 424     }
 425     if(!fCalendar) {
 426       status = U_ILLEGAL_ARGUMENT_ERROR;
 427       return;
 428     }
 429
 430     fCalendar->setTime(startDate, status);
 431     if(U_SUCCESS(status)) {
 432         fHaveDefaultCentury = TRUE;
 433         fDefaultCenturyStart = startDate;
 434         fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
 435     }
 436 }
 437
 438 //----------------------------------------------------------------------
 439
 440 UnicodeString&
 441 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
 442 {
 443     UErrorCode status = U_ZERO_ERROR;
 444     pos.setBeginIndex(0);
 445     pos.setEndIndex(0);
 446
 447     UBool inQuote = FALSE;
 448     UChar prevCh = 0;
 449     int32_t count = 0;
 450
 451     // loop through the pattern string character by character
 452     for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) {
 453         UChar ch = fPattern[i];
 454
 455         // Use subFormat() to format a repeated pattern character
 456         // when a different pattern or non-pattern character is seen
 457         if (ch != prevCh && count > 0) {
 458             subFormat(appendTo, prevCh, count, pos, cal, status);
 459             count = 0;
 460         }
 461         if (ch == QUOTE) {
 462             // Consecutive single quotes are a single quote literal,
 463             // either outside of quotes or between quotes
 464             if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) {
 465                 appendTo += (UChar)QUOTE;
 466                 ++i;
 467             } else {
 468                 inQuote = ! inQuote;
 469             }
 470         }
 471         else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
 472                     || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
 473             // ch is a date-time pattern character to be interpreted
 474             // by subFormat(); count the number of times it is repeated
 475             prevCh = ch;
 476             ++count;
 477         }
 478         else {
 479             // Append quoted characters and unquoted non-pattern characters
 480             appendTo += ch;
 481         }
 482     }
 483
 484     // Format the last item in the pattern, if any
 485     if (count > 0) {
 486         subFormat(appendTo, prevCh, count, pos, cal, status);
 487     }
 488
 489     // and if something failed (e.g., an invalid format character), reset our FieldPosition
 490     // to (0, 0) to show that
 491     // {sfb} look at this later- are these being set correctly?
 492     if (U_FAILURE(status)) {
 493         pos.setBeginIndex(0);
 494         pos.setEndIndex(0);
 495     }
 496
 497     return appendTo;
 498 }
 499
 500 UnicodeString&
 501 SimpleDateFormat::format(const Formattable& obj,
 502                          UnicodeString& appendTo,
 503                          FieldPosition& pos,
 504                          UErrorCode& status) const
 505 {
 506     // this is just here to get around the hiding problem
 507     // (the previous format() override would hide the version of
 508     // format() on DateFormat that this function correspond to, so we
 509     // have to redefine it here)
 510     return DateFormat::format(obj, appendTo, pos, status);
 511 }
 512
 513 //----------------------------------------------------------------------
 514
 515 // Map index into pattern character string to Calendar field number.
 516 const UCalendarDateFields
 517 SimpleDateFormat::fgPatternIndexToCalendarField[] =
 518 {
 519     /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
 520     /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
 521     /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
 522     /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
 523     /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
 524     /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
 525     /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
 526     /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET
 527 };
 528
 529 // Map index into pattern character string to DateFormat field number
 530 const UDateFormatField
 531 SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
 532     /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
 533     /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
 534     /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
 535     /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
 536     /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
 537     /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
 538     /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
 539     /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD
 540 };
 541
 542 //----------------------------------------------------------------------
 543
 544 /**
 545  * Append symbols[value] to dst.  Make sure the array index is not out
 546  * of bounds.
 547  */
 548 inline void
 549 _appendSymbol(UnicodeString& dst,
 550               int32_t value,
 551               const UnicodeString* symbols,
 552               int32_t symbolsCount) {
 553     U_ASSERT(value >= 0 && value < symbolsCount);
 554     dst += symbols[value];
 555 }
 556
 557 void
 558 SimpleDateFormat::subFormat(UnicodeString &appendTo,
 559                             UChar ch,
 560                             int32_t count,
 561                             FieldPosition& pos,
 562                             Calendar& cal,
 563                             UErrorCode& status) const
 564 {
 565     if (U_FAILURE(status)) {
 566         return;
 567     }
 568
 569     // this function gets called by format() to produce the appropriate substitution
 570     // text for an individual pattern symbol (e.g., "HH" or "yyyy")
 571
 572     UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
 573     UDateFormatField patternCharIndex;
 574     const int32_t maxIntCount = 10;
 575     int32_t beginOffset = appendTo.length();
 576
 577     // if the pattern character is unrecognized, signal an error and dump out
 578     if (patternCharPtr == NULL)
 579     {
 580         status = U_INVALID_FORMAT_ERROR;
 581         return;
 582     }
 583
 584     patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
 585     UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
 586     int32_t value = cal.get(field, status);
 587     if (U_FAILURE(status)) {
 588         return;
 589     }
 590
 591     switch (patternCharIndex) {
 592
 593     // for any "G" symbol, write out the appropriate era string
 594     case UDAT_ERA_FIELD:
 595         _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
 596         break;
 597
 598     // for "yyyy", write out the whole year; for "yy", write out the last 2 digits
 599     case UDAT_YEAR_FIELD:
 600     case UDAT_YEAR_WOY_FIELD:
 601         if (count >= 4)
 602             zeroPaddingNumber(appendTo, value, 4, maxIntCount);
 603         else if(count == 1)
 604             zeroPaddingNumber(appendTo, value, count, maxIntCount);
 605         else
 606             zeroPaddingNumber(appendTo, value, 2, 2);
 607         break;  // TODO: this needs to be synced with Java, with GCL/Shanghai's work
 608
 609     // for "MMMM", write out the whole month name, for "MMM", write out the month
 610     // abbreviation, for "M" or "MM", write out the month as a number with the
 611     // appropriate number of digits
 612     case UDAT_MONTH_FIELD:
 613         if (count >= 4)
 614             _appendSymbol(appendTo, value, fSymbols->fMonths,
 615                           fSymbols->fMonthsCount);
 616         else if (count == 3)
 617             _appendSymbol(appendTo, value, fSymbols->fShortMonths,
 618                           fSymbols->fShortMonthsCount);
 619         else
 620             zeroPaddingNumber(appendTo, value + 1, count, maxIntCount);
 621         break;
 622
 623     // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
 624     case UDAT_HOUR_OF_DAY1_FIELD:
 625         if (value == 0)
 626             zeroPaddingNumber(appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
 627         else
 628             zeroPaddingNumber(appendTo, value, count, maxIntCount);
 629         break;
 630
 631     case UDAT_FRACTIONAL_SECOND_FIELD:
 632         // Fractional seconds left-justify
 633         {
 634             fNumberFormat->setMinimumIntegerDigits((count > 3) ? 3 : count);
 635             fNumberFormat->setMaximumIntegerDigits(maxIntCount);
 636             if (count == 1) {
 637                 value = (value + 50) / 100;
 638             } else if (count == 2) {
 639                 value = (value + 5) / 10;
 640             }
 641             FieldPosition p(0);
 642             fNumberFormat->format(value, appendTo, p);
 643             if (count > 3) {
 644                 fNumberFormat->setMinimumIntegerDigits(count - 3);
 645                 fNumberFormat->format((int32_t)0, appendTo, p);
 646             }
 647         }
 648         break;
 649
 650     // for "EEEE", write out the day-of-the-week name; otherwise, use the abbreviation
 651     case UDAT_DAY_OF_WEEK_FIELD:
 652         if (count >= 4)
 653             _appendSymbol(appendTo, value, fSymbols->fWeekdays,
 654                           fSymbols->fWeekdaysCount);
 655         else
 656             _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
 657                           fSymbols->fShortWeekdaysCount);
 658         break;
 659
 660     // for and "a" symbol, write out the whole AM/PM string
 661     case UDAT_AM_PM_FIELD:
 662         _appendSymbol(appendTo, value, fSymbols->fAmPms,
 663                       fSymbols->fAmPmsCount);
 664         break;
 665
 666     // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
 667     // as "12"
 668     case UDAT_HOUR1_FIELD:
 669         if (value == 0)
 670             zeroPaddingNumber(appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
 671         else
 672             zeroPaddingNumber(appendTo, value, count, maxIntCount);
 673         break;
 674
 675     // for the "z" symbols, we have to check our time zone data first.  If we have a
 676     // localized name for the time zone, then "zzzz" is the whole name and anything
 677     // shorter is the abbreviation (we also have to check for daylight savings time
 678     // since the name will be different).  If we don't have a localized time zone name,
 679     // then the time zone shows up as "GMT+hh:mm" or "GMT-hh:mm" (where "hh:mm" is the
 680     // offset from GMT) regardless of how many z's were in the pattern symbol
 681     case UDAT_TIMEZONE_FIELD: {
 682         UnicodeString str;
 683         int32_t zoneIndex = fSymbols->getZoneIndex(cal.getTimeZone().getID(str));
 684         if (zoneIndex == -1) {
 685             value = cal.get(UCAL_ZONE_OFFSET, status) +
 686                     cal.get(UCAL_DST_OFFSET, status);
 687
 688             if (value < 0) {
 689                 appendTo += gGmtMinus;
 690                 value = -value; // suppress the '-' sign for text display.
 691             }
 692             else
 693                 appendTo += gGmtPlus;
 694
 695             zeroPaddingNumber(appendTo, (int32_t)(value/U_MILLIS_PER_HOUR), 2, 2);
 696             appendTo += (UChar)0x003A /*':'*/;
 697             zeroPaddingNumber(appendTo, (int32_t)((value%U_MILLIS_PER_HOUR)/U_MILLIS_PER_MINUTE), 2, 2);
 698         }
 699         else if (cal.get(UCAL_DST_OFFSET, status) != 0) {
 700             if (count >= 4)
 701                 appendTo += fSymbols->fZoneStrings[zoneIndex][3];
 702             else
 703                 appendTo += fSymbols->fZoneStrings[zoneIndex][4];
 704         }
 705         else {
 706             if (count >= 4)
 707                 appendTo += fSymbols->fZoneStrings[zoneIndex][1];
 708             else
 709                 appendTo += fSymbols->fZoneStrings[zoneIndex][2];
 710         }
 711         }
 712         break;
 713
 714     case 23: // 'Z' - TIMEZONE_RFC
 715         {
 716             UChar sign = 43/*'+'*/;
 717             value = (cal.get(UCAL_ZONE_OFFSET, status) +
 718                      cal.get(UCAL_DST_OFFSET, status)) / U_MILLIS_PER_MINUTE;
 719             if (value < 0) {
 720                 value = -value;
 721                 sign = 45/*'-'*/;
 722             }
 723             value = (value / 3) * 5 + (value % 60); // minutes => KKmm
 724             appendTo += sign;
 725             zeroPaddingNumber(appendTo, value, 4, 4);
 726         }
 727         break;
 728
 729     // all of the other pattern symbols can be formatted as simple numbers with
 730     // appropriate zero padding
 731     default:
 732         zeroPaddingNumber(appendTo, value, count, maxIntCount);
 733         break;
 734     }
 735
 736     // if the field we're formatting is the one the FieldPosition says it's interested
 737     // in, fill in the FieldPosition with this field's positions
 738     if (pos.getBeginIndex() == pos.getEndIndex() &&
 739         pos.getField() == fgPatternIndexToDateFormatField[patternCharIndex]) {
 740         pos.setBeginIndex(beginOffset);
 741         pos.setEndIndex(appendTo.length());
 742     }
 743 }
 744
 745 //----------------------------------------------------------------------
 746
 747 void
 748 SimpleDateFormat::zeroPaddingNumber(UnicodeString &appendTo, int32_t value, int32_t minDigits, int32_t maxDigits) const
 749 {
 750     FieldPosition pos(0);
 751
 752     fNumberFormat->setMinimumIntegerDigits(minDigits);
 753     fNumberFormat->setMaximumIntegerDigits(maxDigits);
 754     fNumberFormat->format(value, appendTo, pos);  // 3rd arg is there to speed up processing
 755 }
 756
 757 //----------------------------------------------------------------------
 758
 759 /**
 760  * Format characters that indicate numeric fields.  The character
 761  * at index 0 is treated specially.
 762  */
 763 static const UChar NUMERIC_FORMAT_CHARS[] = {0x4D, 0x79, 0x75, 0x64, 0x68, 0x48, 0x6D, 0x73, 0x53, 0x44, 0x46, 0x77, 0x57, 0x6B, 0x4B, 0x00}; /* "MyudhHmsSDFwWkK" */
 764
 765 /**
 766  * Return true if the given format character, occuring count
 767  * times, represents a numeric field.
 768  */
 769 UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) {
 770     UnicodeString s(NUMERIC_FORMAT_CHARS);
 771     int32_t i = s.indexOf(formatChar);
 772     return (i > 0 || (i == 0 && count < 3));
 773 }
 774
 775 void
 776 SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
 777 {
 778     int32_t pos = parsePos.getIndex();
 779     int32_t start = pos;
 780     UBool ambiguousYear[] = { FALSE };
 781     int32_t count = 0;
 782
 783     // For parsing abutting numeric fields. 'abutPat' is the
 784     // offset into 'pattern' of the first of 2 or more abutting
 785     // numeric fields.  'abutStart' is the offset into 'text'
 786     // where parsing the fields begins. 'abutPass' starts off as 0
 787     // and increments each time we try to parse the fields.
 788     int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
 789     int32_t abutStart = 0;
 790     int32_t abutPass = 0;
 791     UBool inQuote = FALSE;
 792
 793     const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS);
 794
 795     for (int32_t i=0; i<fPattern.length(); ++i) {
 796         UChar ch = fPattern.charAt(i);
 797
 798         // Handle alphabetic field characters.
 799         if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // [A-Za-z]
 800             int32_t fieldPat = i;
 801
 802             // Count the length of this field specifier
 803             count = 1;
 804             while ((i+1)<fPattern.length() &&
 805                    fPattern.charAt(i+1) == ch) {
 806                 ++count;
 807                 ++i;
 808             }
 809
 810             if (isNumeric(ch, count)) {
 811                 if (abutPat < 0) {
 812                     // Determine if there is an abutting numeric field.  For
 813                     // most fields we can just look at the next characters,
 814                     // but the 'm' field is either numeric or text,
 815                     // depending on the count, so we have to look ahead for
 816                     // that field.
 817                     if ((i+1)<fPattern.length()) {
 818                         UBool abutting;
 819                         UChar nextCh = fPattern.charAt(i+1);
 820                         int32_t k = numericFormatChars.indexOf(nextCh);
 821                         if (k == 0) {
 822                             int32_t j = i+2;
 823                             while (j<fPattern.length() &&
 824                                    fPattern.charAt(j) == nextCh) {
 825                                 ++j;
 826                             }
 827                             abutting = (j-i) < 4; // nextCount < 3
 828                         } else {
 829                             abutting = k > 0;
 830                         }
 831
 832                         // Record the start of a set of abutting numeric
 833                         // fields.
 834                         if (abutting) {
 835                             abutPat = fieldPat;
 836                             abutStart = pos;
 837                             abutPass = 0;
 838                         }
 839                     }
 840                 }
 841             } else {
 842                 abutPat = -1; // End of any abutting fields
 843             }
 844
 845             // Handle fields within a run of abutting numeric fields.  Take
 846             // the pattern "HHmmss" as an example. We will try to parse
 847             // 2/2/2 characters of the input text, then if that fails,
 848             // 1/2/2.  We only adjust the width of the leftmost field; the
 849             // others remain fixed.  This allows "123456" => 12:34:56, but
 850             // "12345" => 1:23:45.  Likewise, for the pattern "yyyyMMdd" we
 851             // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
 852             if (abutPat >= 0) {
 853                 // If we are at the start of a run of abutting fields, then
 854                 // shorten this field in each pass.  If we can't shorten
 855                 // this field any more, then the parse of this set of
 856                 // abutting numeric fields has failed.
 857                 if (fieldPat == abutPat) {
 858                     count -= abutPass++;
 859                     if (count == 0) {
 860                         parsePos.setIndex(start);
 861                         parsePos.setErrorIndex(pos);
 862                         return;
 863                     }
 864                 }
 865
 866                 pos = subParse(text, pos, ch, count,
 867                                TRUE, FALSE, ambiguousYear, cal);
 868
 869                 // If the parse fails anywhere in the run, back up to the
 870                 // start of the run and retry.
 871                 if (pos < 0) {
 872                     i = abutPat - 1;
 873                     pos = abutStart;
 874                     continue;
 875                 }
 876             }
 877
 878             // Handle non-numeric fields and non-abutting numeric
 879             // fields.
 880             else {
 881                 int32_t s = pos;
 882                 pos = subParse(text, pos, ch, count,
 883                                FALSE, TRUE, ambiguousYear, cal);
 884
 885                 if (pos < 0) {
 886                     parsePos.setErrorIndex(s);
 887                     parsePos.setIndex(start);
 888                     return;
 889                 }
 890             }
 891         }
 892
 893         // Handle literal pattern characters.  These are any
 894         // quoted characters and non-alphabetic unquoted
 895         // characters.
 896         else {
 897
 898             abutPat = -1; // End of any abutting fields
 899
 900             // Handle quotes.  Two consecutive quotes is a quote
 901             // literal, inside or outside of quotes.  Otherwise a
 902             // quote indicates entry or exit from a quoted region.
 903             if (ch == QUOTE) {
 904                 // Match a quote literal '' within OR outside of quotes
 905                 if ((i+1)<fPattern.length() && fPattern.charAt(i+1)==ch) {
 906                     ++i; // Skip over doubled quote
 907                     // Fall through and treat quote as a literal
 908                 } else {
 909                     // Enter or exit quoted region
 910                     inQuote = !inQuote;
 911                     continue;
 912                 }
 913             }
 914
 915             // A run of white space in the pattern matches a run
 916             // of white space in the input text.
 917             if (uprv_isRuleWhiteSpace(ch)) {
 918                 // Advance over run in pattern
 919                 while ((i+1)<fPattern.length() &&
 920                        uprv_isRuleWhiteSpace(fPattern.charAt(i+1))) {
 921                     ++i;
 922                 }
 923
 924                 // Advance over run in input text
 925                 int32_t s = pos;
 926                 while (pos<text.length() &&
 927                        u_isUWhiteSpace(text.charAt(pos))) {
 928                     ++pos;
 929                 }
 930
 931                 // Must see at least one white space char in input
 932                 if (pos > s) {
 933                     continue;
 934                 }
 935             } else if (pos<text.length() && text.charAt(pos)==ch) {
 936                 // Match a literal
 937                 ++pos;
 938                 continue;
 939             }
 940
 941             // We fall through to this point if the match fails
 942             parsePos.setIndex(start);
 943             parsePos.setErrorIndex(pos);
 944             return;
 945         }
 946     }
 947
 948     // At this point the fields of Calendar have been set.  Calendar
 949     // will fill in default values for missing fields when the time
 950     // is computed.
 951
 952     parsePos.setIndex(pos);
 953
 954     // This part is a problem:  When we call parsedDate.after, we compute the time.
 955     // Take the date April 3 2004 at 2:30 am.  When this is first set up, the year
 956     // will be wrong if we're parsing a 2-digit year pattern.  It will be 1904.
 957     // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day.  2:30 am
 958     // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
 959     // on that day.  It is therefore parsed out to fields as 3:30 am.  Then we
 960     // add 100 years, and get April 3 2004 at 3:30 am.  Note that April 3 2004 is
 961     // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
 962     /*
 963         UDate parsedDate = calendar.getTime();
 964         if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
 965             calendar.add(Calendar.YEAR, 100);
 966             parsedDate = calendar.getTime();
 967         }
 968     */
 969     // Because of the above condition, save off the fields in case we need to readjust.
 970     // The procedure we use here is not particularly efficient, but there is no other
 971     // way to do this given the API restrictions present in Calendar.  We minimize
 972     // inefficiency by only performing this computation when it might apply, that is,
 973     // when the two-digit year is equal to the start year, and thus might fall at the
 974     // front or the back of the default century.  This only works because we adjust
 975     // the year correctly to start with in other cases -- see subParse().
 976     UErrorCode status = U_ZERO_ERROR;
 977     if (ambiguousYear[0]) // If this is true then the two-digit year == the default start year
 978     {
 979         // We need a copy of the fields, and we need to avoid triggering a call to
 980         // complete(), which will recalculate the fields.  Since we can't access
 981         // the fields[] array in Calendar, we clone the entire object.  This will
 982         // stop working if Calendar.clone() is ever rewritten to call complete().
 983         Calendar *copy = cal.clone();
 984         UDate parsedDate = copy->getTime(status);
 985         // {sfb} check internalGetDefaultCenturyStart
 986         if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart))
 987         {
 988             // We can't use add here because that does a complete() first.
 989             cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
 990         }
 991         delete copy;
 992     }
 993
 994     // If any Calendar calls failed, we pretend that we
 995     // couldn't parse the string, when in reality this isn't quite accurate--
 996     // we did parse it; the Calendar calls just failed.
 997     if (U_FAILURE(status)) {
 998         parsePos.setErrorIndex(pos);
 999         parsePos.setIndex(start);
1000     }
1001 }
1002
1003 UDate
1004 SimpleDateFormat::parse( const UnicodeString& text,
1005                          ParsePosition& pos) const {
1006     // redefined here because the other parse() function hides this function's
1007     // cunterpart on DateFormat
1008     return DateFormat::parse(text, pos);
1009 }
1010
1011 UDate
1012 SimpleDateFormat::parse(const UnicodeString& text, UErrorCode& status) const
1013 {
1014     // redefined here because the other parse() function hides this function's
1015     // counterpart on DateFormat
1016     return DateFormat::parse(text, status);
1017 }
1018 //----------------------------------------------------------------------
1019
1020 int32_t SimpleDateFormat::matchString(const UnicodeString& text,
1021                               int32_t start,
1022                               UCalendarDateFields field,
1023                               const UnicodeString* data,
1024                               int32_t dataCount,
1025                               Calendar& cal) const
1026 {
1027     int32_t i = 0;
1028     int32_t count = dataCount;
1029
1030     if (field == UCAL_DAY_OF_WEEK) i = 1;
1031
1032     // There may be multiple strings in the data[] array which begin with
1033     // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
1034     // We keep track of the longest match, and return that.  Note that this
1035     // unfortunately requires us to test all array elements.
1036     int32_t bestMatchLength = 0, bestMatch = -1;
1037
1038     // {sfb} kludge to support case-insensitive comparison
1039     // {markus 2002oct11} do not just use caseCompareBetween because we do not know
1040     // the length of the match after case folding
1041     // {alan 20040607} don't case change the whole string, since the length
1042     // can change
1043     // TODO we need a case-insensitive startsWith function
1044     UnicodeString lcase, lcaseText;
1045     text.extract(start, INT32_MAX, lcaseText);
1046     lcaseText.foldCase();
1047
1048     for (; i < count; ++i)
1049     {
1050         // Always compare if we have no match yet; otherwise only compare
1051         // against potentially better matches (longer strings).
1052
1053         lcase.fastCopyFrom(data[i]).foldCase();
1054         int32_t length = lcase.length();
1055
1056         if (length > bestMatchLength &&
1057             lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
1058         {
1059             bestMatch = i;
1060             bestMatchLength = length;
1061         }
1062     }
1063     if (bestMatch >= 0)
1064     {
1065         cal.set(field, bestMatch);
1066
1067         // Once we have a match, we have to determine the length of the
1068         // original source string.  This will usually be == the length of
1069         // the case folded string, but it may differ (e.g. sharp s).
1070         lcase.fastCopyFrom(data[bestMatch]).foldCase();
1071
1072         // Most of the time, the length will be the same as the length
1073         // of the string from the locale data.  Sometimes it will be
1074         // different, in which case we will have to figure it out by
1075         // adding a character at a time, until we have a match.  We do
1076         // this all in one loop, where we try 'len' first (at index
1077         // i==0).
1078         int32_t len = data[bestMatch].length(); // 99+% of the time
1079         int32_t n = text.length() - start;
1080         for (i=0; i<=n; ++i) {
1081             int32_t j=i;
1082             if (i == 0) {
1083                 j = len;
1084             } else if (i == len) {
1085                 continue; // already tried this when i was 0
1086             }
1087             text.extract(start, j, lcaseText);
1088             lcaseText.foldCase();
1089             if (lcase == lcaseText) {
1090                 return start + j;
1091             }
1092         }
1093     }
1094
1095     return -start;
1096 }
1097
1098 //----------------------------------------------------------------------
1099
1100 void
1101 SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
1102 {
1103     parseAmbiguousDatesAsAfter(d, status);
1104 }
1105
1106 /**
1107  * Private member function that converts the parsed date strings into
1108  * timeFields. Returns -start (for ParsePosition) if failed.
1109  * @param text the time text to be parsed.
1110  * @param start where to start parsing.
1111  * @param ch the pattern character for the date field text to be parsed.
1112  * @param count the count of a pattern character.
1113  * @return the new start position if matching succeeded; a negative number
1114  * indicating matching failure, otherwise.
1115  */
1116 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
1117                            UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const
1118 {
1119     Formattable number;
1120     int32_t value = 0;
1121     int32_t i;
1122     ParsePosition pos(0);
1123     int32_t patternCharIndex;
1124     UnicodeString temp;
1125     UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
1126
1127 #if defined (U_DEBUG_CAL)
1128     //fprintf(stderr, "%s:%d - [%c]  st=%d \n", __FILE__, __LINE__, (char) ch, start);
1129 #endif
1130
1131     if (patternCharPtr == NULL) {
1132         return -start;
1133     }
1134
1135     patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
1136
1137     UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1138
1139     // If there are any spaces here, skip over them.  If we hit the end
1140     // of the string, then fail.
1141     for (;;) {
1142         if (start >= text.length()) {
1143             return -start;
1144         }
1145         UChar32 c = text.char32At(start);
1146         if (!u_isUWhiteSpace(c)) {
1147             break;
1148         }
1149         start += UTF_CHAR_LENGTH(c);
1150     }
1151     pos.setIndex(start);
1152
1153     // We handle a few special cases here where we need to parse
1154     // a number value.  We handle further, more generic cases below.  We need
1155     // to handle some of them here because some fields require extra processing on
1156     // the parsed value.
1157     if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD ||
1158         patternCharIndex == UDAT_HOUR1_FIELD ||
1159         (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) ||
1160         patternCharIndex == UDAT_YEAR_FIELD ||
1161         patternCharIndex == UDAT_YEAR_WOY_FIELD ||
1162         patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD)
1163     {
1164         int32_t parseStart = pos.getIndex();
1165         // It would be good to unify this with the obeyCount logic below,
1166         // but that's going to be difficult.
1167         const UnicodeString* src;
1168         if (obeyCount) {
1169             if ((start+count) > text.length()) {
1170                 return -start;
1171             }
1172             text.extractBetween(0, start + count, temp);
1173             src = &temp;
1174         } else {
1175             src = &text;
1176         }
1177         parseInt(*src, number, pos, allowNegative);
1178         if (pos.getIndex() == parseStart)
1179             return -start;
1180         value = number.getLong();
1181     }
1182
1183     switch (patternCharIndex) {
1184     case UDAT_ERA_FIELD:
1185         return matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, cal);
1186     case UDAT_YEAR_FIELD:
1187         // If there are 3 or more YEAR pattern characters, this indicates
1188         // that the year value is to be treated literally, without any
1189         // two-digit year adjustments (e.g., from "01" to 2001).  Otherwise
1190         // we made adjustments to place the 2-digit year in the proper
1191         // century, for parsed strings from "00" to "99".  Any other string
1192         // is treated literally:  "2250", "-1", "1", "002".
1193         if (count <= 2 && (pos.getIndex() - start) == 2
1194             && u_isdigit(text.charAt(start))
1195             && u_isdigit(text.charAt(start+1)))
1196         {
1197             // Assume for example that the defaultCenturyStart is 6/18/1903.
1198             // This means that two-digit years will be forced into the range
1199             // 6/18/1903 to 6/17/2003.  As a result, years 00, 01, and 02
1200             // correspond to 2000, 2001, and 2002.  Years 04, 05, etc. correspond
1201             // to 1904, 1905, etc.  If the year is 03, then it is 2003 if the
1202             // other fields specify a date before 6/18, or 1903 if they specify a
1203             // date afterwards.  As a result, 03 is an ambiguous year.  All other
1204             // two-digit years are unambiguous.
1205           if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
1206               int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
1207               ambiguousYear[0] = (value == ambiguousTwoDigitYear);
1208               value += (fDefaultCenturyStartYear/100)*100 +
1209                 (value < ambiguousTwoDigitYear ? 100 : 0);
1210             }
1211         }
1212         cal.set(UCAL_YEAR, value);
1213         return pos.getIndex();
1214     case UDAT_YEAR_WOY_FIELD:
1215         // Comment is the same as for UDAT_Year_FIELDs - look above
1216         if (count <= 2 && (pos.getIndex() - start) == 2
1217             && u_isdigit(text.charAt(start))
1218             && u_isdigit(text.charAt(start+1))
1219             && fHaveDefaultCentury )
1220         {
1221             int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
1222             ambiguousYear[0] = (value == ambiguousTwoDigitYear);
1223             value += (fDefaultCenturyStartYear/100)*100 +
1224                 (value < ambiguousTwoDigitYear ? 100 : 0);
1225         }
1226         cal.set(UCAL_YEAR_WOY, value);
1227         return pos.getIndex();
1228     case UDAT_MONTH_FIELD:
1229         if (count <= 2) // i.e., M or MM.
1230         {
1231             // Don't want to parse the month if it is a string
1232             // while pattern uses numeric style: M or MM.
1233             // [We computed 'value' above.]
1234             cal.set(UCAL_MONTH, value - 1);
1235             return pos.getIndex();
1236         }
1237         else
1238         {
1239             // count >= 3 // i.e., MMM or MMMM
1240             // Want to be able to parse both short and long forms.
1241             // Try count == 4 first:
1242             int32_t newStart = 0;
1243             if ((newStart = matchString(text, start, UCAL_MONTH,
1244                                       fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0)
1245                 return newStart;
1246             else // count == 4 failed, now try count == 3
1247                 return matchString(text, start, UCAL_MONTH,
1248                                    fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal);
1249         }
1250     case UDAT_HOUR_OF_DAY1_FIELD:
1251         // [We computed 'value' above.]
1252         if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
1253             value = 0;
1254         cal.set(UCAL_HOUR_OF_DAY, value);
1255         return pos.getIndex();
1256     case UDAT_FRACTIONAL_SECOND_FIELD:
1257         // Fractional seconds left-justify
1258         i = pos.getIndex() - start;
1259         if (i < 3) {
1260             while (i < 3) {
1261                 value *= 10;
1262                 i++;
1263             }
1264         } else {
1265             int32_t a = 1;
1266             while (i > 3) {
1267                 a *= 10;
1268                 i--;
1269             }
1270             value = (value + (a>>1)) / a;
1271         }
1272         cal.set(UCAL_MILLISECOND, value);
1273         return pos.getIndex();
1274     case UDAT_DAY_OF_WEEK_FIELD:
1275         {
1276             // Want to be able to parse both short and long forms.
1277             // Try count == 4 (DDDD) first:
1278             int32_t newStart = 0;
1279             if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
1280                                       fSymbols->fWeekdays, fSymbols->fWeekdaysCount, cal)) > 0)
1281                 return newStart;
1282             else // DDDD failed, now try DDD
1283                 return matchString(text, start, UCAL_DAY_OF_WEEK,
1284                                    fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal);
1285         }
1286     case UDAT_AM_PM_FIELD:
1287         return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal);
1288     case UDAT_HOUR1_FIELD:
1289         // [We computed 'value' above.]
1290         if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
1291             value = 0;
1292         cal.set(UCAL_HOUR, value);
1293         return pos.getIndex();
1294     case UDAT_TIMEZONE_FIELD:
1295     case UDAT_TIMEZONE_RFC_FIELD:
1296         {
1297         // First try to parse generic forms such as GMT-07:00. Do this first
1298         // in case localized DateFormatZoneData contains the string "GMT"
1299         // for a zone; in that case, we don't want to match the first three
1300         // characters of GMT+/-HH:MM etc.
1301
1302         UnicodeString lcaseText(text);
1303         UnicodeString lcaseGMT(gGmt);
1304         int32_t sign = 0;
1305         int32_t offset;
1306         int32_t gmtLen = lcaseGMT.length();
1307
1308         // For time zones that have no known names, look for strings
1309         // of the form:
1310         //    GMT[+-]hours:minutes or
1311         //    GMT[+-]hhmm or
1312         //    GMT.
1313
1314         // {sfb} kludge for case-insensitive compare
1315         lcaseText.toLower();
1316         lcaseGMT.toLower();
1317
1318         if ((text.length() - start) > gmtLen &&
1319             (lcaseText.compare(start, gmtLen, lcaseGMT, 0, gmtLen)) == 0)
1320         {
1321             cal.set(UCAL_DST_OFFSET, 0);
1322
1323             pos.setIndex(start + gmtLen);
1324
1325             if( text[pos.getIndex()] == 0x002B /*'+'*/ )
1326                 sign = 1;
1327             else if( text[pos.getIndex()] == 0x002D /*'-'*/ )
1328                 sign = -1;
1329             else {
1330                 cal.set(UCAL_ZONE_OFFSET, 0 );
1331                 return pos.getIndex();
1332             }
1333
1334             // Look for hours:minutes or hhmm.
1335             pos.setIndex(pos.getIndex() + 1);
1336             int32_t parseStart = pos.getIndex();
1337             Formattable tzNumber;
1338             fNumberFormat->parse(text, tzNumber, pos);
1339             if( pos.getIndex() == parseStart) {
1340                 return -start;
1341             }
1342             if( text[pos.getIndex()] == 0x003A /*':'*/ ) {
1343                 // This is the hours:minutes case
1344                 offset = tzNumber.getLong() * 60;
1345                 pos.setIndex(pos.getIndex() + 1);
1346                 parseStart = pos.getIndex();
1347                 fNumberFormat->parse(text, tzNumber, pos);
1348                 if( pos.getIndex() == parseStart) {
1349                     return -start;
1350                 }
1351                 offset += tzNumber.getLong();
1352             }
1353             else {
1354                 // This is the hhmm case.
1355                 offset = tzNumber.getLong();
1356                 if( offset < 24 )
1357                     offset *= 60;
1358                 else
1359                     offset = offset % 100 + offset / 100 * 60;
1360             }
1361
1362             // Fall through for final processing below of 'offset' and 'sign'.
1363         }
1364         else {
1365             // At this point, check for named time zones by looking through
1366             // the locale data from the DateFormatZoneData strings.
1367             // Want to be able to parse both short and long forms.
1368             const UnicodeString *zs;
1369             int32_t j;
1370
1371             for (i = 0; i < fSymbols->fZoneStringsRowCount; i++)
1372             {
1373                 // Checking long and short zones [1 & 2],
1374                 // and long and short daylight [3 & 4].
1375                 for (j = 1; j <= 4; ++j)
1376                 {
1377                     zs = &fSymbols->fZoneStrings[i][j];
1378                     // ### TODO markus 20021014: This use of caseCompare() will fail
1379                     // if the text contains a character that case-folds into multiple
1380                     // characters. In that case, zs->length() may be too long, and it does not match.
1381                     // We need a case-insensitive version of startsWith().
1382                     // There are similar cases of such caseCompare() uses elsewhere in ICU.
1383                     if (0 == (text.caseCompare(start, zs->length(), *zs, 0))) {
1384                         TimeZone *tz = TimeZone::createTimeZone(fSymbols->fZoneStrings[i][0]);
1385                         cal.set(UCAL_ZONE_OFFSET, tz->getRawOffset());
1386                         // Must call set() with something -- TODO -- Fix this to
1387                         // use the correct DST SAVINGS for the zone.
1388                         delete tz;
1389                         cal.set(UCAL_DST_OFFSET, j >= 3 ? U_MILLIS_PER_HOUR : 0);
1390                         return (start + fSymbols->fZoneStrings[i][j].length());
1391                     }
1392                 }
1393             }
1394
1395             // As a last resort, look for numeric timezones of the form
1396             // [+-]hhmm as specified by RFC 822.  This code is actually
1397             // a little more permissive than RFC 822.  It will try to do
1398             // its best with numbers that aren't strictly 4 digits long.
1399             UErrorCode status = U_ZERO_ERROR;
1400             DecimalFormat fmt(UNICODE_STRING_SIMPLE("+####;-####"), status);
1401             if(U_FAILURE(status))
1402                 return -start;
1403             fmt.setParseIntegerOnly(TRUE);
1404             int32_t parseStart = pos.getIndex();
1405             Formattable tzNumber;
1406             fmt.parse( text, tzNumber, pos );
1407             if( pos.getIndex() == parseStart) {
1408                 return -start;   // Wasn't actually a number.
1409             }
1410             offset = tzNumber.getLong();
1411             sign = 1;
1412             if( offset < 0 ) {
1413                 sign = -1;
1414                 offset = -offset;
1415             }
1416             if( offset < 24 )
1417                 offset = offset * 60;
1418             else
1419                 offset = offset % 100 + offset / 100 * 60;
1420
1421             // Fall through for final processing below of 'offset' and 'sign'.
1422         }
1423
1424         // Do the final processing for both of the above cases.  We only
1425         // arrive here if the form GMT+/-... or an RFC 822 form was seen.
1426         if (sign != 0)
1427         {
1428             offset *= U_MILLIS_PER_MINUTE * sign;
1429
1430             if (cal.getTimeZone().useDaylightTime())
1431             {
1432                 cal.set(UCAL_DST_OFFSET, U_MILLIS_PER_HOUR);
1433                 offset -= U_MILLIS_PER_HOUR;
1434             }
1435             cal.set(UCAL_ZONE_OFFSET, offset);
1436
1437             return pos.getIndex();
1438         }
1439
1440         // All efforts to parse a zone failed.
1441         return -start;
1442         }
1443     default:
1444         // Handle "generic" fields
1445         int32_t parseStart = pos.getIndex();
1446         const UnicodeString* src;
1447         if (obeyCount) {
1448             if ((start+count) > text.length()) {
1449                 return -start;
1450             }
1451             text.extractBetween(0, start + count, temp);
1452             src = &temp;
1453         } else {
1454             src = &text;
1455         }
1456         parseInt(*src, number, pos, allowNegative);
1457         if (pos.getIndex() != parseStart) {
1458             cal.set(field, number.getLong());
1459             return pos.getIndex();
1460         }
1461         return -start;
1462     }
1463 }
1464
1465 /**
1466  * Parse an integer using fNumberFormat.  This method is semantically
1467  * const, but actually may modify fNumberFormat.
1468  */
1469 void SimpleDateFormat::parseInt(const UnicodeString& text,
1470                                 Formattable& number,
1471                                 ParsePosition& pos,
1472                                 UBool allowNegative) const {
1473     UnicodeString oldPrefix;
1474     DecimalFormat* df = NULL;
1475     if (!allowNegative &&
1476         fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
1477         df = (DecimalFormat*)fNumberFormat;
1478         df->getNegativePrefix(oldPrefix);
1479         df->setNegativePrefix(SUPPRESS_NEGATIVE_PREFIX);
1480     }
1481     fNumberFormat->parse(text, number, pos);
1482     if (df != NULL) {
1483         df->setNegativePrefix(oldPrefix);
1484     }
1485 }
1486
1487 //----------------------------------------------------------------------
1488
1489 void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
1490                                         UnicodeString& translatedPattern,
1491                                         const UnicodeString& from,
1492                                         const UnicodeString& to,
1493                                         UErrorCode& status)
1494 {
1495   // run through the pattern and convert any pattern symbols from the version
1496   // in "from" to the corresponding character ion "to".  This code takes
1497   // quoted strings into account (it doesn't try to translate them), and it signals
1498   // an error if a particular "pattern character" doesn't appear in "from".
1499   // Depending on the values of "from" and "to" this can convert from generic
1500   // to localized patterns or localized to generic.
1501   if (U_FAILURE(status))
1502     return;
1503
1504   translatedPattern.remove();
1505   UBool inQuote = FALSE;
1506   for (int32_t i = 0; i < originalPattern.length(); ++i) {
1507     UChar c = originalPattern[i];
1508     if (inQuote) {
1509       if (c == QUOTE)
1510     inQuote = FALSE;
1511     }
1512     else {
1513       if (c == QUOTE)
1514     inQuote = TRUE;
1515       else if ((c >= 0x0061 /*'a'*/ && c <= 0x007A) /*'z'*/
1516            || (c >= 0x0041 /*'A'*/ && c <= 0x005A /*'Z'*/)) {
1517     int32_t ci = from.indexOf(c);
1518     if (ci == -1) {
1519       status = U_INVALID_FORMAT_ERROR;
1520       return;
1521     }
1522     c = to[ci];
1523       }
1524     }
1525     translatedPattern += c;
1526   }
1527   if (inQuote) {
1528     status = U_INVALID_FORMAT_ERROR;
1529     return;
1530   }
1531 }
1532
1533 //----------------------------------------------------------------------
1534
1535 UnicodeString&
1536 SimpleDateFormat::toPattern(UnicodeString& result) const
1537 {
1538     result = fPattern;
1539     return result;
1540 }
1541
1542 //----------------------------------------------------------------------
1543
1544 UnicodeString&
1545 SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
1546                                      UErrorCode& status) const
1547 {
1548     translatePattern(fPattern, result, DateFormatSymbols::getPatternUChars(), fSymbols->fLocalPatternChars, status);
1549     return result;
1550 }
1551
1552 //----------------------------------------------------------------------
1553
1554 void
1555 SimpleDateFormat::applyPattern(const UnicodeString& pattern)
1556 {
1557     fPattern = pattern;
1558 }
1559
1560 //----------------------------------------------------------------------
1561
1562 void
1563 SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
1564                                         UErrorCode &status)
1565 {
1566     translatePattern(pattern, fPattern, fSymbols->fLocalPatternChars, DateFormatSymbols::getPatternUChars(), status);
1567 }
1568
1569 //----------------------------------------------------------------------
1570
1571 const DateFormatSymbols*
1572 SimpleDateFormat::getDateFormatSymbols() const
1573 {
1574     return fSymbols;
1575 }
1576
1577 //----------------------------------------------------------------------
1578
1579 void
1580 SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
1581 {
1582     delete fSymbols;
1583     fSymbols = newFormatSymbols;
1584 }
1585
1586 //----------------------------------------------------------------------
1587 void
1588 SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
1589 {
1590     delete fSymbols;
1591     fSymbols = new DateFormatSymbols(newFormatSymbols);
1592 }
1593
1594
1595 //----------------------------------------------------------------------
1596
1597
1598 void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
1599 {
1600   UErrorCode status = U_ZERO_ERROR;
1601   DateFormat::adoptCalendar(calendarToAdopt);
1602   delete fSymbols;
1603   fSymbols=NULL;
1604   initializeSymbols(fLocale, fCalendar, status);  // we need new symbols
1605   initializeDefaultCentury();  // we need a new century (possibly)
1606 }
1607
1608 U_NAMESPACE_END
1609
1610 #endif /* #if !UCONFIG_NO_FORMATTING */
1611
1612 //eof