]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/smpdtfmt.cpp
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / i18n / smpdtfmt.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
46f4442e 3* Copyright (C) 1997-2009, International Business Machines Corporation and *
b75a7d8f
A
4* others. All Rights Reserved. *
5*******************************************************************************
6*
7* File SMPDTFMT.CPP
8*
9* Modification History:
10*
11* Date Name Description
12* 02/19/97 aliu Converted from java.
13* 03/31/97 aliu Modified extensively to work with 50 locales.
14* 04/01/97 aliu Added support for centuries.
15* 07/09/97 helena Made ParsePosition into a class.
16* 07/21/98 stephen Added initializeDefaultCentury.
17* Removed getZoneIndex (added in DateFormatSymbols)
18* Removed subParseLong
19* Removed chk
20* 02/22/99 stephen Removed character literals for EBCDIC safety
21* 10/14/99 aliu Updated 2-digit year parsing so that only "00" thru
22* "99" are recognized. {j28 4182066}
23* 11/15/99 weiv Added support for week of year/day of week format
24********************************************************************************
25*/
26
46f4442e
A
27#define ZID_KEY_MAX 128
28
b75a7d8f
A
29#include "unicode/utypes.h"
30
31#if !UCONFIG_NO_FORMATTING
32
33#include "unicode/smpdtfmt.h"
34#include "unicode/dtfmtsym.h"
374ca955 35#include "unicode/ures.h"
b75a7d8f
A
36#include "unicode/msgfmt.h"
37#include "unicode/calendar.h"
38#include "unicode/gregocal.h"
39#include "unicode/timezone.h"
40#include "unicode/decimfmt.h"
41#include "unicode/dcfmtsym.h"
42#include "unicode/uchar.h"
46f4442e 43#include "unicode/uniset.h"
b75a7d8f 44#include "unicode/ustring.h"
46f4442e
A
45#include "unicode/basictz.h"
46#include "unicode/simpletz.h"
47#include "unicode/rbtz.h"
48#include "unicode/vtzone.h"
49#include "olsontz.h"
374ca955
A
50#include "util.h"
51#include "gregoimp.h"
52#include "cstring.h"
53#include "uassert.h"
46f4442e
A
54#include "zstrfmt.h"
55#include "cmemory.h"
56#include "umutex.h"
57#include "smpdtfst.h"
b75a7d8f
A
58#include <float.h>
59
374ca955
A
60#if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
61#include <stdio.h>
62#endif
63
b75a7d8f
A
64// *****************************************************************************
65// class SimpleDateFormat
66// *****************************************************************************
67
68U_NAMESPACE_BEGIN
69
46f4442e
A
70static const UChar PATTERN_CHAR_BASE = 0x40;
71
374ca955
A
72/**
73 * Last-resort string to use for "GMT" when constructing time zone strings.
74 */
b75a7d8f
A
75// For time zones that have no names, use strings GMT+minutes and
76// GMT-minutes. For instance, in France the time zone is GMT+60.
77// Also accepted are GMT+H:MM or GMT-H:MM.
374ca955
A
78static const UChar gGmt[] = {0x0047, 0x004D, 0x0054, 0x0000}; // "GMT"
79static const UChar gGmtPlus[] = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
80static const UChar gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
46f4442e
A
81static const UChar gDefGmtPat[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */
82static const UChar gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */
83static const UChar gDefGmtNegHmPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */
84static const UChar gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */
85static const UChar gDefGmtPosHmPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */
86typedef enum GmtPatSize {
87 kGmtLen = 3,
88 kGmtPatLen = 6,
89 kNegHmsLen = 9,
90 kNegHmLen = 6,
91 kPosHmsLen = 9,
92 kPosHmLen = 6
93} GmtPatSize;
b75a7d8f
A
94
95// This is a pattern-of-last-resort used when we can't load a usable pattern out
96// of a resource.
374ca955 97static const UChar gDefaultPattern[] =
b75a7d8f
A
98{
99 0x79, 0x79, 0x79, 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
100}; /* "yyyyMMdd hh:mm a" */
101
102// This prefix is designed to NEVER MATCH real text, in order to
103// suppress the parsing of negative numbers. Adjust as needed (if
104// this becomes valid Unicode).
105static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
106
107/**
108 * These are the tags we expect to see in normal resource bundle files associated
109 * with a locale.
110 */
374ca955 111static const char gDateTimePatternsTag[]="DateTimePatterns";
b75a7d8f 112
46f4442e 113static const UChar gEtcUTC[] = {0x45, 0x74, 0x63, 0x2F, 0x55, 0x54, 0x43, 0x00}; // "Etc/UTC"
b75a7d8f 114static const UChar QUOTE = 0x27; // Single quote
46f4442e
A
115enum {
116 kGMTNegativeHMS = 0,
117 kGMTNegativeHM,
118 kGMTPositiveHMS,
119 kGMTPositiveHM,
120
121 kNumGMTFormatters
122};
123
124/*
125 * The field range check bias for each UDateFormatField.
126 * The bias is added to the minimum and maximum values
127 * before they are compared to the parsed number.
128 * For example, the calendar stores zero-based month numbers
129 * but the parsed month numbers start at 1, so the bias is 1.
130 *
131 * A value of -1 means that the value is not checked.
132 */
133static const int32_t gFieldRangeBias[] = {
134 -1, // 'G' - UDAT_ERA_FIELD
135 -1, // 'y' - UDAT_YEAR_FIELD
136 1, // 'M' - UDAT_MONTH_FIELD
137 0, // 'd' - UDAT_DATE_FIELD
138 -1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD
139 -1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD
140 0, // 'm' - UDAT_MINUTE_FIELD
141 0, // 's' - UDAT_SEOND_FIELD
142 -1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
143 -1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
144 -1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
145 -1, // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
146 -1, // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
147 -1, // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
148 -1, // 'a' - UDAT_AM_PM_FIELD
149 -1, // 'h' - UDAT_HOUR1_FIELD
150 -1, // 'K' - UDAT_HOUR0_FIELD
151 -1, // 'z' - UDAT_TIMEZONE_FIELD
152 -1, // 'Y' - UDAT_YEAR_WOY_FIELD
153 -1, // 'e' - UDAT_DOW_LOCAL_FIELD
154 -1, // 'u' - UDAT_EXTENDED_YEAR_FIELD
155 -1, // 'g' - UDAT_JULIAN_DAY_FIELD
156 -1, // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
157 -1, // 'Z' - UDAT_TIMEZONE_RFC_FIELD
158 -1, // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
159 0, // 'c' - UDAT_STANDALONE_DAY_FIELD
160 1, // 'L' - UDAT_STANDALONE_MONTH_FIELD
161 -1, // 'Q' - UDAT_QUARTER_FIELD (1-4?)
162 -1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD
163 -1 // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
164};
165
166static UMTX LOCK;
167
168UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
b75a7d8f
A
169
170//----------------------------------------------------------------------
171
172SimpleDateFormat::~SimpleDateFormat()
173{
174 delete fSymbols;
46f4442e
A
175 if (fGMTFormatters) {
176 for (int32_t i = 0; i < kNumGMTFormatters; i++) {
177 if (fGMTFormatters[i]) {
178 delete fGMTFormatters[i];
179 }
180 }
181 uprv_free(fGMTFormatters);
182 }
b75a7d8f
A
183}
184
185//----------------------------------------------------------------------
186
187SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
188 : fLocale(Locale::getDefault()),
73c04bcf 189 fSymbols(NULL),
46f4442e 190 fGMTFormatters(NULL)
b75a7d8f
A
191{
192 construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
193 initializeDefaultCentury();
194}
195
196//----------------------------------------------------------------------
197
198SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
199 UErrorCode &status)
200: fPattern(pattern),
201 fLocale(Locale::getDefault()),
73c04bcf 202 fSymbols(NULL),
46f4442e 203 fGMTFormatters(NULL)
b75a7d8f
A
204{
205 initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
206 initialize(fLocale, status);
207 initializeDefaultCentury();
208}
209
210//----------------------------------------------------------------------
211
212SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
213 const Locale& locale,
214 UErrorCode& status)
215: fPattern(pattern),
73c04bcf 216 fLocale(locale),
46f4442e 217 fGMTFormatters(NULL)
b75a7d8f
A
218{
219 initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
220 initialize(fLocale, status);
221 initializeDefaultCentury();
222}
223
224//----------------------------------------------------------------------
225
226SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
227 DateFormatSymbols* symbolsToAdopt,
228 UErrorCode& status)
229: fPattern(pattern),
230 fLocale(Locale::getDefault()),
73c04bcf 231 fSymbols(symbolsToAdopt),
46f4442e 232 fGMTFormatters(NULL)
b75a7d8f
A
233{
234 initializeCalendar(NULL,fLocale,status);
235 initialize(fLocale, status);
236 initializeDefaultCentury();
237}
238
239//----------------------------------------------------------------------
240
241SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
242 const DateFormatSymbols& symbols,
243 UErrorCode& status)
244: fPattern(pattern),
245 fLocale(Locale::getDefault()),
73c04bcf 246 fSymbols(new DateFormatSymbols(symbols)),
46f4442e 247 fGMTFormatters(NULL)
b75a7d8f
A
248{
249 initializeCalendar(NULL, fLocale, status);
250 initialize(fLocale, status);
251 initializeDefaultCentury();
252}
253
254//----------------------------------------------------------------------
255
256// Not for public consumption; used by DateFormat
257SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
258 EStyle dateStyle,
259 const Locale& locale,
260 UErrorCode& status)
261: fLocale(locale),
73c04bcf 262 fSymbols(NULL),
46f4442e 263 fGMTFormatters(NULL)
b75a7d8f
A
264{
265 construct(timeStyle, dateStyle, fLocale, status);
266 if(U_SUCCESS(status)) {
267 initializeDefaultCentury();
268 }
269}
270
271//----------------------------------------------------------------------
272
273/**
274 * Not for public consumption; used by DateFormat. This constructor
275 * never fails. If the resource data is not available, it uses the
276 * the last resort symbols.
277 */
278SimpleDateFormat::SimpleDateFormat(const Locale& locale,
279 UErrorCode& status)
374ca955 280: fPattern(gDefaultPattern),
b75a7d8f 281 fLocale(locale),
73c04bcf 282 fSymbols(NULL),
46f4442e 283 fGMTFormatters(NULL)
b75a7d8f
A
284{
285 if (U_FAILURE(status)) return;
286 initializeSymbols(fLocale, initializeCalendar(NULL, fLocale, status),status);
287 if (U_FAILURE(status))
288 {
289 status = U_ZERO_ERROR;
290 delete fSymbols;
291 // This constructor doesn't fail; it uses last resort data
292 fSymbols = new DateFormatSymbols(status);
293 /* test for NULL */
294 if (fSymbols == 0) {
295 status = U_MEMORY_ALLOCATION_ERROR;
296 return;
297 }
298 }
299
300 initialize(fLocale, status);
301 if(U_SUCCESS(status)) {
302 initializeDefaultCentury();
303 }
304}
305
306//----------------------------------------------------------------------
307
308SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
309: DateFormat(other),
73c04bcf 310 fSymbols(NULL),
46f4442e 311 fGMTFormatters(NULL)
b75a7d8f
A
312{
313 *this = other;
314}
315
316//----------------------------------------------------------------------
317
318SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
319{
46f4442e
A
320 if (this == &other) {
321 return *this;
322 }
b75a7d8f
A
323 DateFormat::operator=(other);
324
325 delete fSymbols;
326 fSymbols = NULL;
327
328 if (other.fSymbols)
329 fSymbols = new DateFormatSymbols(*other.fSymbols);
330
331 fDefaultCenturyStart = other.fDefaultCenturyStart;
332 fDefaultCenturyStartYear = other.fDefaultCenturyStartYear;
333 fHaveDefaultCentury = other.fHaveDefaultCentury;
334
335 fPattern = other.fPattern;
336
337 return *this;
338}
339
340//----------------------------------------------------------------------
341
342Format*
343SimpleDateFormat::clone() const
344{
345 return new SimpleDateFormat(*this);
346}
347
348//----------------------------------------------------------------------
349
350UBool
351SimpleDateFormat::operator==(const Format& other) const
352{
374ca955
A
353 if (DateFormat::operator==(other)) {
354 // DateFormat::operator== guarantees following cast is safe
b75a7d8f 355 SimpleDateFormat* that = (SimpleDateFormat*)&other;
73c04bcf 356 return (fPattern == that->fPattern &&
b75a7d8f 357 fSymbols != NULL && // Check for pathological object
73c04bcf
A
358 that->fSymbols != NULL && // Check for pathological object
359 *fSymbols == *that->fSymbols &&
360 fHaveDefaultCentury == that->fHaveDefaultCentury &&
b75a7d8f
A
361 fDefaultCenturyStart == that->fDefaultCenturyStart);
362 }
363 return FALSE;
364}
365
366//----------------------------------------------------------------------
367
368void SimpleDateFormat::construct(EStyle timeStyle,
369 EStyle dateStyle,
370 const Locale& locale,
371 UErrorCode& status)
372{
373 // called by several constructors to load pattern data from the resources
b75a7d8f
A
374 if (U_FAILURE(status)) return;
375
b75a7d8f
A
376 // We will need the calendar to know what type of symbols to load.
377 initializeCalendar(NULL, locale, status);
73c04bcf 378 if (U_FAILURE(status)) return;
b75a7d8f 379
374ca955
A
380 CalendarData calData(locale, fCalendar?fCalendar->getType():NULL, status);
381 UResourceBundle *dateTimePatterns = calData.getByKey(gDateTimePatternsTag, status);
b75a7d8f
A
382 if (U_FAILURE(status)) return;
383
374ca955 384 if (ures_getSize(dateTimePatterns) <= kDateTime)
b75a7d8f
A
385 {
386 status = U_INVALID_FORMAT_ERROR;
387 return;
388 }
389
374ca955
A
390 setLocaleIDs(ures_getLocaleByType(dateTimePatterns, ULOC_VALID_LOCALE, &status),
391 ures_getLocaleByType(dateTimePatterns, ULOC_ACTUAL_LOCALE, &status));
392
b75a7d8f
A
393 // create a symbols object from the locale
394 initializeSymbols(locale,fCalendar, status);
395 if (U_FAILURE(status)) return;
396 /* test for NULL */
397 if (fSymbols == 0) {
398 status = U_MEMORY_ALLOCATION_ERROR;
399 return;
400 }
401
374ca955
A
402 const UChar *resStr;
403 int32_t resStrLen = 0;
b75a7d8f
A
404
405 // if the pattern should include both date and time information, use the date/time
406 // pattern string as a guide to tell use how to glue together the appropriate date
407 // and time pattern strings. The actual gluing-together is handled by a convenience
408 // method on MessageFormat.
374ca955 409 if ((timeStyle != kNone) && (dateStyle != kNone))
b75a7d8f 410 {
374ca955 411 Formattable timeDateArray[2];
b75a7d8f 412
b75a7d8f
A
413 // use Formattable::adoptString() so that we can use fastCopyFrom()
414 // instead of Formattable::setString()'s unaware, safe, deep string clone
415 // see Jitterbug 2296
374ca955 416 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status);
46f4442e
A
417 UnicodeString *tempus1 = new UnicodeString(TRUE, resStr, resStrLen);
418 // NULL pointer check
419 if (tempus1 == NULL) {
420 status = U_MEMORY_ALLOCATION_ERROR;
421 return;
422 }
423 timeDateArray[0].adoptString(tempus1);
424
374ca955 425 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status);
46f4442e
A
426 UnicodeString *tempus2 = new UnicodeString(TRUE, resStr, resStrLen);
427 // Null pointer check
428 if (tempus2 == NULL) {
429 status = U_MEMORY_ALLOCATION_ERROR;
430 return;
431 }
432 timeDateArray[1].adoptString(tempus2);
b75a7d8f 433
374ca955
A
434 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)kDateTime, &resStrLen, &status);
435 MessageFormat::format(UnicodeString(TRUE, resStr, resStrLen), timeDateArray, 2, fPattern, status);
b75a7d8f 436 }
b75a7d8f
A
437 // if the pattern includes just time data or just date date, load the appropriate
438 // pattern string from the resources
374ca955
A
439 // setTo() - see DateFormatSymbols::assignArray comments
440 else if (timeStyle != kNone) {
441 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status);
442 fPattern.setTo(TRUE, resStr, resStrLen);
443 }
444 else if (dateStyle != kNone) {
445 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status);
446 fPattern.setTo(TRUE, resStr, resStrLen);
447 }
b75a7d8f
A
448
449 // and if it includes _neither_, that's an error
374ca955
A
450 else
451 status = U_INVALID_FORMAT_ERROR;
b75a7d8f
A
452
453 // finally, finish initializing by creating a Calendar and a NumberFormat
454 initialize(locale, status);
455}
456
457//----------------------------------------------------------------------
458
459Calendar*
460SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
461{
73c04bcf
A
462 if(!U_FAILURE(status)) {
463 fCalendar = Calendar::createInstance(adoptZone?adoptZone:TimeZone::createDefault(), locale, status);
464 }
465 if (U_SUCCESS(status) && fCalendar == NULL) {
466 status = U_MEMORY_ALLOCATION_ERROR;
467 }
468 return fCalendar;
b75a7d8f
A
469}
470
471void
472SimpleDateFormat::initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status)
473{
474 if(U_FAILURE(status)) {
475 fSymbols = NULL;
476 } else {
477 // pass in calendar type - use NULL (default) if no calendar set (or err).
478 fSymbols = new DateFormatSymbols(locale, calendar?calendar->getType() :NULL , status);
46f4442e
A
479 // Null pointer check
480 if (fSymbols == NULL) {
481 status = U_MEMORY_ALLOCATION_ERROR;
482 return;
483 }
b75a7d8f
A
484 }
485}
486
487void
488SimpleDateFormat::initialize(const Locale& locale,
489 UErrorCode& status)
490{
491 if (U_FAILURE(status)) return;
492
b75a7d8f
A
493 // We don't need to check that the row count is >= 1, since all 2d arrays have at
494 // least one row
495 fNumberFormat = NumberFormat::createInstance(locale, status);
496 if (fNumberFormat != NULL && U_SUCCESS(status))
497 {
498 // no matter what the locale's default number format looked like, we want
499 // to modify it so that it doesn't use thousands separators, doesn't always
500 // show the decimal point, and recognizes integers only when parsing
501
502 fNumberFormat->setGroupingUsed(FALSE);
503 if (fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID())
504 ((DecimalFormat*)fNumberFormat)->setDecimalSeparatorAlwaysShown(FALSE);
505 fNumberFormat->setParseIntegerOnly(TRUE);
506 fNumberFormat->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
46f4442e
A
507
508 // TODO: Really, the default should be lenient...
509 fNumberFormat->setParseStrict(FALSE);
b75a7d8f
A
510 }
511 else if (U_SUCCESS(status))
512 {
513 status = U_MISSING_RESOURCE_ERROR;
514 }
515}
516
517/* Initialize the fields we use to disambiguate ambiguous years. Separate
518 * so we can call it from readObject().
519 */
520void SimpleDateFormat::initializeDefaultCentury()
521{
522 if(fCalendar) {
523 fHaveDefaultCentury = fCalendar->haveDefaultCentury();
524 if(fHaveDefaultCentury) {
525 fDefaultCenturyStart = fCalendar->defaultCenturyStart();
526 fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
527 } else {
528 fDefaultCenturyStart = DBL_MIN;
529 fDefaultCenturyStartYear = -1;
530 }
531 }
532}
533
534/* Define one-century window into which to disambiguate dates using
535 * two-digit years. Make public in JDK 1.2.
536 */
537void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
538{
539 if(U_FAILURE(status)) {
540 return;
541 }
542 if(!fCalendar) {
543 status = U_ILLEGAL_ARGUMENT_ERROR;
544 return;
545 }
546
547 fCalendar->setTime(startDate, status);
548 if(U_SUCCESS(status)) {
549 fHaveDefaultCentury = TRUE;
550 fDefaultCenturyStart = startDate;
551 fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
552 }
553}
554
555//----------------------------------------------------------------------
556
557UnicodeString&
558SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
559{
560 UErrorCode status = U_ZERO_ERROR;
561 pos.setBeginIndex(0);
562 pos.setEndIndex(0);
563
564 UBool inQuote = FALSE;
565 UChar prevCh = 0;
566 int32_t count = 0;
567
568 // loop through the pattern string character by character
569 for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) {
570 UChar ch = fPattern[i];
571
572 // Use subFormat() to format a repeated pattern character
573 // when a different pattern or non-pattern character is seen
574 if (ch != prevCh && count > 0) {
575 subFormat(appendTo, prevCh, count, pos, cal, status);
576 count = 0;
577 }
578 if (ch == QUOTE) {
579 // Consecutive single quotes are a single quote literal,
580 // either outside of quotes or between quotes
581 if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) {
582 appendTo += (UChar)QUOTE;
583 ++i;
584 } else {
585 inQuote = ! inQuote;
586 }
587 }
588 else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
589 || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
590 // ch is a date-time pattern character to be interpreted
591 // by subFormat(); count the number of times it is repeated
592 prevCh = ch;
593 ++count;
594 }
595 else {
596 // Append quoted characters and unquoted non-pattern characters
597 appendTo += ch;
598 }
599 }
600
601 // Format the last item in the pattern, if any
602 if (count > 0) {
603 subFormat(appendTo, prevCh, count, pos, cal, status);
604 }
605
606 // and if something failed (e.g., an invalid format character), reset our FieldPosition
607 // to (0, 0) to show that
608 // {sfb} look at this later- are these being set correctly?
609 if (U_FAILURE(status)) {
610 pos.setBeginIndex(0);
611 pos.setEndIndex(0);
612 }
613
614 return appendTo;
615}
616
617UnicodeString&
618SimpleDateFormat::format(const Formattable& obj,
619 UnicodeString& appendTo,
620 FieldPosition& pos,
621 UErrorCode& status) const
622{
623 // this is just here to get around the hiding problem
624 // (the previous format() override would hide the version of
625 // format() on DateFormat that this function correspond to, so we
626 // have to redefine it here)
627 return DateFormat::format(obj, appendTo, pos, status);
628}
629
630//----------------------------------------------------------------------
631
46f4442e
A
632/* Map calendar field into calendar field level.
633 * the larger the level, the smaller the field unit.
634 * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
635 * UCAL_MONTH level is 20.
636 * NOTE: if new fields adds in, the table needs to update.
637 */
638const int32_t
639SimpleDateFormat::fgCalendarFieldToLevel[] =
640{
641 /*GyM*/ 0, 10, 20,
642 /*wW*/ 20, 30,
643 /*dDEF*/ 30, 20, 30, 30,
644 /*ahHm*/ 40, 50, 50, 60,
645 /*sS..*/ 70, 80,
646 /*z?Y*/ 0, 0, 10,
647 /*eug*/ 30, 10, 0,
648 /*A*/ 40
649};
650
651
652/* Map calendar field LETTER into calendar field level.
653 * the larger the level, the smaller the field unit.
654 * NOTE: if new fields adds in, the table needs to update.
655 */
656const int32_t
657SimpleDateFormat::fgPatternCharToLevel[] = {
658 // A B C D E F G H I J K L M N O
659 -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, -1,
660 // P Q R S T U V W X Y Z
661 -1, 20, -1, 80, -1, -1, 0, 30, -1, 10, 0, -1, -1, -1, -1, -1,
662 // a b c d e f g h i j k l m n o
663 -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, -1, 60, -1, -1,
664 // p q r s t u v w x y z
665 -1, 20, -1, 70, -1, 10, 0, 20, -1, 10, 0, -1, -1, -1, -1, -1
666};
667
668
b75a7d8f
A
669// Map index into pattern character string to Calendar field number.
670const UCalendarDateFields
671SimpleDateFormat::fgPatternIndexToCalendarField[] =
672{
374ca955
A
673 /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
674 /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
675 /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
676 /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
677 /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
678 /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
679 /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
73c04bcf
A
680 /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET,
681 /*v*/ UCAL_ZONE_OFFSET,
46f4442e 682 /*c*/ UCAL_DOW_LOCAL,
73c04bcf
A
683 /*L*/ UCAL_MONTH,
684 /*Q*/ UCAL_MONTH,
685 /*q*/ UCAL_MONTH,
46f4442e 686 /*V*/ UCAL_ZONE_OFFSET,
b75a7d8f
A
687};
688
689// Map index into pattern character string to DateFormat field number
374ca955 690const UDateFormatField
b75a7d8f 691SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
374ca955
A
692 /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
693 /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
694 /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
695 /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
696 /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
697 /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
698 /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
73c04bcf
A
699 /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD,
700 /*v*/ UDAT_TIMEZONE_GENERIC_FIELD,
701 /*c*/ UDAT_STANDALONE_DAY_FIELD,
702 /*L*/ UDAT_STANDALONE_MONTH_FIELD,
703 /*Q*/ UDAT_QUARTER_FIELD,
704 /*q*/ UDAT_STANDALONE_QUARTER_FIELD,
46f4442e 705 /*V*/ UDAT_TIMEZONE_SPECIAL_FIELD,
b75a7d8f
A
706};
707
b75a7d8f
A
708//----------------------------------------------------------------------
709
374ca955
A
710/**
711 * Append symbols[value] to dst. Make sure the array index is not out
712 * of bounds.
713 */
73c04bcf 714static inline void
374ca955
A
715_appendSymbol(UnicodeString& dst,
716 int32_t value,
717 const UnicodeString* symbols,
718 int32_t symbolsCount) {
73c04bcf
A
719 U_ASSERT(0 <= value && value < symbolsCount);
720 if (0 <= value && value < symbolsCount) {
721 dst += symbols[value];
722 }
723}
724
725//---------------------------------------------------------------------
46f4442e
A
726void
727SimpleDateFormat::appendGMT(UnicodeString &appendTo, Calendar& cal, UErrorCode& status) const{
728 int32_t offset = cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status);
729 if (U_FAILURE(status)) {
730 return;
731 }
732 if (isDefaultGMTFormat()) {
733 formatGMTDefault(appendTo, offset);
734 } else {
735 ((SimpleDateFormat*)this)->initGMTFormatters(status);
736 if (U_SUCCESS(status)) {
737 int32_t type;
738 if (offset < 0) {
739 offset = -offset;
740 type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTNegativeHM : kGMTNegativeHMS;
741 } else {
742 type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTPositiveHM : kGMTPositiveHMS;
743 }
744 Formattable param(offset, Formattable::kIsDate);
745 FieldPosition fpos(0);
746 fGMTFormatters[type]->format(&param, 1, appendTo, fpos, status);
747 }
748 }
749}
750
751int32_t
752SimpleDateFormat::parseGMT(const UnicodeString &text, ParsePosition &pos) const {
753 if (!isDefaultGMTFormat()) {
754 int32_t start = pos.getIndex();
755
756 // Quick check
757 UBool prefixMatch = FALSE;
758 int32_t prefixLen = fSymbols->fGmtFormat.indexOf((UChar)0x007B /* '{' */);
759 if (prefixLen > 0 && text.compare(start, prefixLen, fSymbols->fGmtFormat, 0, prefixLen) == 0) {
760 prefixMatch = TRUE;
761 }
762 if (prefixMatch) {
763 // Prefix matched
764 UErrorCode status = U_ZERO_ERROR;
765 ((SimpleDateFormat*)this)->initGMTFormatters(status);
766 if (U_SUCCESS(status)) {
767 Formattable parsed;
768 int32_t parsedCount;
769
770 // Try negative Hms
771 fGMTFormatters[kGMTNegativeHMS]->parseObject(text, parsed, pos);
772 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
773 parsed.getArray(parsedCount);
774 if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
775 return (int32_t)(-1 * (int64_t)parsed[0].getDate());
776 }
777 }
778
779 // Reset ParsePosition
780 pos.setIndex(start);
781 pos.setErrorIndex(-1);
782
783 // Try positive Hms
784 fGMTFormatters[kGMTPositiveHMS]->parseObject(text, parsed, pos);
785 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
786 parsed.getArray(parsedCount);
787 if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
788 return (int32_t)((int64_t)parsed[0].getDate());
789 }
790 }
791
792 // Reset ParsePosition
793 pos.setIndex(start);
794 pos.setErrorIndex(-1);
795
796 // Try negative Hm
797 fGMTFormatters[kGMTNegativeHM]->parseObject(text, parsed, pos);
798 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
799 parsed.getArray(parsedCount);
800 if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
801 return (int32_t)(-1 * (int64_t)parsed[0].getDate());
802 }
803 }
73c04bcf 804
46f4442e
A
805 // Reset ParsePosition
806 pos.setIndex(start);
807 pos.setErrorIndex(-1);
808
809 // Try positive Hm
810 fGMTFormatters[kGMTPositiveHM]->parseObject(text, parsed, pos);
811 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
812 parsed.getArray(parsedCount);
813 if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
814 return (int32_t)((int64_t)parsed[0].getDate());
815 }
816 }
817
818 // Reset ParsePosition
819 pos.setIndex(start);
820 pos.setErrorIndex(-1);
821 }
822 // fall through to the default GMT parsing method
823 }
824 }
825 return parseGMTDefault(text, pos);
826}
827
828void
829SimpleDateFormat::formatGMTDefault(UnicodeString &appendTo, int32_t offset) const {
830 if (offset < 0) {
73c04bcf 831 appendTo += gGmtMinus;
46f4442e 832 offset = -offset; // suppress the '-' sign for text display.
73c04bcf
A
833 }else{
834 appendTo += gGmtPlus;
835 }
836
46f4442e
A
837 offset /= U_MILLIS_PER_SECOND; // now in seconds
838 int32_t sec = offset % 60;
839 offset /= 60;
840 int32_t min = offset % 60;
841 int32_t hour = offset / 60;
842
843
844 zeroPaddingNumber(appendTo, hour, 2, 2);
73c04bcf 845 appendTo += (UChar)0x003A /*':'*/;
46f4442e
A
846 zeroPaddingNumber(appendTo, min, 2, 2);
847 if (sec != 0) {
848 appendTo += (UChar)0x003A /*':'*/;
849 zeroPaddingNumber(appendTo, sec, 2, 2);
850 }
851}
852
853int32_t
854SimpleDateFormat::parseGMTDefault(const UnicodeString &text, ParsePosition &pos) const {
855 int32_t start = pos.getIndex();
856
857 if (start + kGmtLen + 1 >= text.length()) {
858 pos.setErrorIndex(start);
859 return 0;
860 }
861
862 int32_t cur = start;
863 // "GMT"
864 if (text.compare(start, kGmtLen, gGmt) != 0) {
865 pos.setErrorIndex(start);
866 return 0;
867 }
868 cur += kGmtLen;
869 // Sign
870 UBool negative = FALSE;
871 if (text.charAt(cur) == (UChar)0x002D /* minus */) {
872 negative = TRUE;
873 } else if (text.charAt(cur) != (UChar)0x002B /* plus */) {
874 pos.setErrorIndex(cur);
875 return 0;
876 }
877 cur++;
878
879 // Numbers
880 int32_t numLen;
881 pos.setIndex(cur);
882
883 Formattable number;
884 parseInt(text, number, 6, pos, FALSE);
885 numLen = pos.getIndex() - cur;
886
887 if (numLen <= 0) {
888 pos.setIndex(start);
889 pos.setErrorIndex(cur);
890 return 0;
891 }
892
893 int32_t numVal = number.getLong();
894
895 int32_t hour = 0;
896 int32_t min = 0;
897 int32_t sec = 0;
898
899 if (numLen <= 2) {
900 // H[H][:mm[:ss]]
901 hour = numVal;
902 cur += numLen;
903 if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) {
904 cur++;
905 pos.setIndex(cur);
906 parseInt(text, number, 2, pos, FALSE);
907 numLen = pos.getIndex() - cur;
908 if (numLen == 2) {
909 // got minute field
910 min = number.getLong();
911 cur += numLen;
912 if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) {
913 cur++;
914 pos.setIndex(cur);
915 parseInt(text, number, 2, pos, FALSE);
916 numLen = pos.getIndex() - cur;
917 if (numLen == 2) {
918 // got second field
919 sec = number.getLong();
920 } else {
921 // reset position
922 pos.setIndex(cur - 1);
923 pos.setErrorIndex(-1);
924 }
925 }
926 } else {
927 // reset postion
928 pos.setIndex(cur - 1);
929 pos.setErrorIndex(-1);
930 }
931 }
932 } else if (numLen == 3 || numLen == 4) {
933 // Hmm or HHmm
934 hour = numVal / 100;
935 min = numVal % 100;
936 } else if (numLen == 5 || numLen == 6) {
937 // Hmmss or HHmmss
938 hour = numVal / 10000;
939 min = (numVal % 10000) / 100;
940 sec = numVal % 100;
941 } else {
942 // HHmmss followed by bogus numbers
943 pos.setIndex(cur + 6);
944
945 int32_t shift = numLen - 6;
946 while (shift > 0) {
947 numVal /= 10;
948 shift--;
949 }
950 hour = numVal / 10000;
951 min = (numVal % 10000) / 100;
952 sec = numVal % 100;
953 }
954
955 int32_t offset = ((hour*60 + min)*60 + sec)*1000;
956 if (negative) {
957 offset = -offset;
958 }
959 return offset;
960}
961
962UBool
963SimpleDateFormat::isDefaultGMTFormat() const {
964 // GMT pattern
965 if (fSymbols->fGmtFormat.length() == 0) {
966 // No GMT pattern is set
967 return TRUE;
968 } else if (fSymbols->fGmtFormat.compare(gDefGmtPat, kGmtPatLen) != 0) {
969 return FALSE;
970 }
971 // Hour patterns
972 if (fSymbols->fGmtHourFormats == NULL || fSymbols->fGmtHourFormatsCount != DateFormatSymbols::GMT_HOUR_COUNT) {
973 // No Hour pattern is set
974 return TRUE;
975 } else if ((fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS].compare(gDefGmtNegHmsPat, kNegHmsLen) != 0)
976 || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM].compare(gDefGmtNegHmPat, kNegHmLen) != 0)
977 || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS].compare(gDefGmtPosHmsPat, kPosHmsLen) != 0)
978 || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM].compare(gDefGmtPosHmPat, kPosHmLen) != 0)) {
979 return FALSE;
980 }
981 return TRUE;
982}
983
984void
985SimpleDateFormat::formatRFC822TZ(UnicodeString &appendTo, int32_t offset) const {
986 UChar sign = 0x002B /* '+' */;
987 if (offset < 0) {
988 offset = -offset;
989 sign = 0x002D /* '-' */;
990 }
991 appendTo.append(sign);
992
993 int32_t offsetH = offset / U_MILLIS_PER_HOUR;
994 offset = offset % U_MILLIS_PER_HOUR;
995 int32_t offsetM = offset / U_MILLIS_PER_MINUTE;
996 offset = offset % U_MILLIS_PER_MINUTE;
997 int32_t offsetS = offset / U_MILLIS_PER_SECOND;
998
999 int32_t num = 0, denom = 0;
1000 if (offsetS == 0) {
1001 offset = offsetH*100 + offsetM; // HHmm
1002 num = offset % 10000;
1003 denom = 1000;
1004 } else {
1005 offset = offsetH*10000 + offsetM*100 + offsetS; // HHmmss
1006 num = offset % 1000000;
1007 denom = 100000;
1008 }
1009 while (denom >= 1) {
1010 UChar digit = (UChar)0x0030 + (num / denom);
1011 appendTo.append(digit);
1012 num = num % denom;
1013 denom /= 10;
1014 }
1015}
1016
1017void
1018SimpleDateFormat::initGMTFormatters(UErrorCode &status) {
1019 if (U_FAILURE(status)) {
1020 return;
1021 }
1022 umtx_lock(&LOCK);
1023 if (fGMTFormatters == NULL) {
1024 fGMTFormatters = (MessageFormat**)uprv_malloc(kNumGMTFormatters * sizeof(MessageFormat*));
1025 if (fGMTFormatters) {
1026 for (int32_t i = 0; i < kNumGMTFormatters; i++) {
1027 const UnicodeString *hourPattern = NULL; //initialized it to avoid warning
1028 switch (i) {
1029 case kGMTNegativeHMS:
1030 hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS]);
1031 break;
1032 case kGMTNegativeHM:
1033 hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM]);
1034 break;
1035 case kGMTPositiveHMS:
1036 hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS]);
1037 break;
1038 case kGMTPositiveHM:
1039 hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM]);
1040 break;
1041 }
1042 fGMTFormatters[i] = new MessageFormat(fSymbols->fGmtFormat, status);
1043 if (U_FAILURE(status)) {
1044 break;
1045 }
1046 SimpleDateFormat *sdf = (SimpleDateFormat*)this->clone();
1047 sdf->adoptTimeZone(TimeZone::createTimeZone(UnicodeString(gEtcUTC)));
1048 sdf->applyPattern(*hourPattern);
1049 fGMTFormatters[i]->adoptFormat(0, sdf);
1050 }
1051 } else {
1052 status = U_MEMORY_ALLOCATION_ERROR;
1053 }
1054 }
1055 umtx_unlock(&LOCK);
374ca955
A
1056}
1057
73c04bcf 1058//---------------------------------------------------------------------
b75a7d8f
A
1059void
1060SimpleDateFormat::subFormat(UnicodeString &appendTo,
1061 UChar ch,
1062 int32_t count,
1063 FieldPosition& pos,
1064 Calendar& cal,
1065 UErrorCode& status) const
1066{
374ca955
A
1067 if (U_FAILURE(status)) {
1068 return;
1069 }
1070
b75a7d8f
A
1071 // this function gets called by format() to produce the appropriate substitution
1072 // text for an individual pattern symbol (e.g., "HH" or "yyyy")
1073
1074 UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
374ca955 1075 UDateFormatField patternCharIndex;
b75a7d8f
A
1076 const int32_t maxIntCount = 10;
1077 int32_t beginOffset = appendTo.length();
1078
1079 // if the pattern character is unrecognized, signal an error and dump out
1080 if (patternCharPtr == NULL)
1081 {
1082 status = U_INVALID_FORMAT_ERROR;
374ca955 1083 return;
b75a7d8f
A
1084 }
1085
374ca955 1086 patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
b75a7d8f
A
1087 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1088 int32_t value = cal.get(field, status);
1089 if (U_FAILURE(status)) {
1090 return;
1091 }
1092
1093 switch (patternCharIndex) {
1094
1095 // for any "G" symbol, write out the appropriate era string
46f4442e 1096 // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name
374ca955 1097 case UDAT_ERA_FIELD:
46f4442e
A
1098 if (count == 5)
1099 _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount);
1100 else if (count == 4)
73c04bcf
A
1101 _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount);
1102 else
1103 _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
b75a7d8f
A
1104 break;
1105
46f4442e
A
1106 // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
1107 // NEW: UTS#35:
1108//Year y yy yyy yyyy yyyyy
1109//AD 1 1 01 001 0001 00001
1110//AD 12 12 12 012 0012 00012
1111//AD 123 123 23 123 0123 00123
1112//AD 1234 1234 34 1234 1234 01234
1113//AD 12345 12345 45 12345 12345 12345
1114 case UDAT_YEAR_FIELD:
374ca955 1115 case UDAT_YEAR_WOY_FIELD:
46f4442e 1116 if(count == 2)
b75a7d8f 1117 zeroPaddingNumber(appendTo, value, 2, 2);
46f4442e
A
1118 else
1119 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1120 break;
b75a7d8f
A
1121
1122 // for "MMMM", write out the whole month name, for "MMM", write out the month
1123 // abbreviation, for "M" or "MM", write out the month as a number with the
1124 // appropriate number of digits
73c04bcf 1125 // for "MMMMM", use the narrow form
374ca955 1126 case UDAT_MONTH_FIELD:
73c04bcf
A
1127 if (count == 5)
1128 _appendSymbol(appendTo, value, fSymbols->fNarrowMonths,
1129 fSymbols->fNarrowMonthsCount);
1130 else if (count == 4)
374ca955
A
1131 _appendSymbol(appendTo, value, fSymbols->fMonths,
1132 fSymbols->fMonthsCount);
b75a7d8f 1133 else if (count == 3)
374ca955
A
1134 _appendSymbol(appendTo, value, fSymbols->fShortMonths,
1135 fSymbols->fShortMonthsCount);
b75a7d8f
A
1136 else
1137 zeroPaddingNumber(appendTo, value + 1, count, maxIntCount);
1138 break;
1139
73c04bcf
A
1140 // for "LLLL", write out the whole month name, for "LLL", write out the month
1141 // abbreviation, for "L" or "LL", write out the month as a number with the
1142 // appropriate number of digits
1143 // for "LLLLL", use the narrow form
1144 case UDAT_STANDALONE_MONTH_FIELD:
1145 if (count == 5)
1146 _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowMonths,
1147 fSymbols->fStandaloneNarrowMonthsCount);
1148 else if (count == 4)
1149 _appendSymbol(appendTo, value, fSymbols->fStandaloneMonths,
1150 fSymbols->fStandaloneMonthsCount);
1151 else if (count == 3)
1152 _appendSymbol(appendTo, value, fSymbols->fStandaloneShortMonths,
1153 fSymbols->fStandaloneShortMonthsCount);
1154 else
1155 zeroPaddingNumber(appendTo, value + 1, count, maxIntCount);
1156 break;
1157
b75a7d8f 1158 // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
374ca955 1159 case UDAT_HOUR_OF_DAY1_FIELD:
b75a7d8f
A
1160 if (value == 0)
1161 zeroPaddingNumber(appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
1162 else
1163 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1164 break;
1165
374ca955
A
1166 case UDAT_FRACTIONAL_SECOND_FIELD:
1167 // Fractional seconds left-justify
1168 {
1169 fNumberFormat->setMinimumIntegerDigits((count > 3) ? 3 : count);
1170 fNumberFormat->setMaximumIntegerDigits(maxIntCount);
1171 if (count == 1) {
1172 value = (value + 50) / 100;
1173 } else if (count == 2) {
1174 value = (value + 5) / 10;
1175 }
1176 FieldPosition p(0);
1177 fNumberFormat->format(value, appendTo, p);
1178 if (count > 3) {
1179 fNumberFormat->setMinimumIntegerDigits(count - 3);
1180 fNumberFormat->format((int32_t)0, appendTo, p);
1181 }
1182 }
b75a7d8f
A
1183 break;
1184
46f4442e
A
1185 // for "ee" or "e", use local numeric day-of-the-week
1186 // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name
1187 // for "EEEE" or "eeee", write out the wide day-of-the-week name
1188 // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name
1189 case UDAT_DOW_LOCAL_FIELD:
1190 if ( count < 3 ) {
1191 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1192 break;
1193 }
1194 // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week,
1195 // we want standard day-of-week, so first fix value to work for EEEEE-EEE.
1196 value = cal.get(UCAL_DAY_OF_WEEK, status);
1197 if (U_FAILURE(status)) {
1198 return;
1199 }
1200 // fall through, do not break here
374ca955 1201 case UDAT_DAY_OF_WEEK_FIELD:
73c04bcf
A
1202 if (count == 5)
1203 _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays,
1204 fSymbols->fNarrowWeekdaysCount);
1205 else if (count == 4)
374ca955
A
1206 _appendSymbol(appendTo, value, fSymbols->fWeekdays,
1207 fSymbols->fWeekdaysCount);
73c04bcf 1208 else
374ca955
A
1209 _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
1210 fSymbols->fShortWeekdaysCount);
b75a7d8f
A
1211 break;
1212
73c04bcf
A
1213 // for "ccc", write out the abbreviated day-of-the-week name
1214 // for "cccc", write out the wide day-of-the-week name
1215 // for "ccccc", use the narrow day-of-the-week name
1216 case UDAT_STANDALONE_DAY_FIELD:
46f4442e
A
1217 if ( count < 3 ) {
1218 zeroPaddingNumber(appendTo, value, 1, maxIntCount);
1219 break;
1220 }
1221 // fall through to alpha DOW handling, but for that we don't want local day-of-week,
1222 // we want standard day-of-week, so first fix value.
1223 value = cal.get(UCAL_DAY_OF_WEEK, status);
1224 if (U_FAILURE(status)) {
1225 return;
1226 }
73c04bcf
A
1227 if (count == 5)
1228 _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays,
1229 fSymbols->fStandaloneNarrowWeekdaysCount);
1230 else if (count == 4)
1231 _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays,
1232 fSymbols->fStandaloneWeekdaysCount);
46f4442e 1233 else // count == 3
73c04bcf
A
1234 _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays,
1235 fSymbols->fStandaloneShortWeekdaysCount);
73c04bcf
A
1236 break;
1237
b75a7d8f 1238 // for and "a" symbol, write out the whole AM/PM string
374ca955
A
1239 case UDAT_AM_PM_FIELD:
1240 _appendSymbol(appendTo, value, fSymbols->fAmPms,
1241 fSymbols->fAmPmsCount);
b75a7d8f
A
1242 break;
1243
1244 // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
1245 // as "12"
374ca955 1246 case UDAT_HOUR1_FIELD:
b75a7d8f
A
1247 if (value == 0)
1248 zeroPaddingNumber(appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
1249 else
1250 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1251 break;
1252
1253 // for the "z" symbols, we have to check our time zone data first. If we have a
73c04bcf
A
1254 // localized name for the time zone, then "zzzz" / "zzz" indicate whether
1255 // daylight time is in effect (long/short) and "zz" / "z" do not (long/short).
1256 // If we don't have a localized time zone name,
b75a7d8f
A
1257 // then the time zone shows up as "GMT+hh:mm" or "GMT-hh:mm" (where "hh:mm" is the
1258 // offset from GMT) regardless of how many z's were in the pattern symbol
73c04bcf 1259 case UDAT_TIMEZONE_FIELD:
46f4442e
A
1260 case UDAT_TIMEZONE_GENERIC_FIELD:
1261 case UDAT_TIMEZONE_SPECIAL_FIELD:
1262 {
1263 UnicodeString zoneString;
1264 const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat();
1265 if (zsf) {
1266 if (patternCharIndex == UDAT_TIMEZONE_FIELD) {
1267 if (count < 4) {
1268 // "z", "zz", "zzz"
1269 zsf->getSpecificShortString(cal, TRUE /*commonly used only*/,
1270 zoneString, status);
1271 } else {
1272 // "zzzz"
1273 zsf->getSpecificLongString(cal, zoneString, status);
73c04bcf 1274 }
46f4442e
A
1275 } else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) {
1276 if (count == 1) {
1277 // "v"
1278 zsf->getGenericShortString(cal, TRUE /*commonly used only*/,
1279 zoneString, status);
1280 } else if (count == 4) {
1281 // "vvvv"
1282 zsf->getGenericLongString(cal, zoneString, status);
1283 }
1284 } else { // patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD
1285 if (count == 1) {
1286 // "V"
1287 zsf->getSpecificShortString(cal, FALSE /*ignore commonly used*/,
1288 zoneString, status);
1289 } else if (count == 4) {
1290 // "VVVV"
1291 zsf->getGenericLocationString(cal, zoneString, status);
73c04bcf
A
1292 }
1293 }
1294 }
46f4442e 1295 if (zoneString.isEmpty()) {
73c04bcf 1296 appendGMT(appendTo, cal, status);
46f4442e
A
1297 } else {
1298 appendTo += zoneString;
73c04bcf 1299 }
b75a7d8f 1300 }
46f4442e
A
1301 break;
1302
1303 case UDAT_TIMEZONE_RFC_FIELD: // 'Z' - TIMEZONE_RFC
1304 if (count < 4) {
1305 // RFC822 format, must use ASCII digits
1306 value = (cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status));
1307 formatRFC822TZ(appendTo, value);
1308 } else {
1309 // long form, localized GMT pattern
1310 appendGMT(appendTo, cal, status);
374ca955
A
1311 }
1312 break;
1313
73c04bcf
A
1314 case UDAT_QUARTER_FIELD:
1315 if (count >= 4)
1316 _appendSymbol(appendTo, value/3, fSymbols->fQuarters,
1317 fSymbols->fQuartersCount);
1318 else if (count == 3)
1319 _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters,
1320 fSymbols->fShortQuartersCount);
1321 else
1322 zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount);
1323 break;
1324
1325 case UDAT_STANDALONE_QUARTER_FIELD:
1326 if (count >= 4)
1327 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters,
1328 fSymbols->fStandaloneQuartersCount);
1329 else if (count == 3)
1330 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters,
1331 fSymbols->fStandaloneShortQuartersCount);
1332 else
1333 zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount);
1334 break;
1335
1336
b75a7d8f
A
1337 // all of the other pattern symbols can be formatted as simple numbers with
1338 // appropriate zero padding
1339 default:
b75a7d8f
A
1340 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1341 break;
1342 }
1343
1344 // if the field we're formatting is the one the FieldPosition says it's interested
1345 // in, fill in the FieldPosition with this field's positions
374ca955
A
1346 if (pos.getBeginIndex() == pos.getEndIndex() &&
1347 pos.getField() == fgPatternIndexToDateFormatField[patternCharIndex]) {
1348 pos.setBeginIndex(beginOffset);
1349 pos.setEndIndex(appendTo.length());
b75a7d8f
A
1350 }
1351}
1352
1353//----------------------------------------------------------------------
b75a7d8f
A
1354void
1355SimpleDateFormat::zeroPaddingNumber(UnicodeString &appendTo, int32_t value, int32_t minDigits, int32_t maxDigits) const
1356{
46f4442e
A
1357 if (fNumberFormat!=NULL) {
1358 FieldPosition pos(0);
b75a7d8f 1359
46f4442e
A
1360 fNumberFormat->setMinimumIntegerDigits(minDigits);
1361 fNumberFormat->setMaximumIntegerDigits(maxDigits);
1362 fNumberFormat->format(value, appendTo, pos); // 3rd arg is there to speed up processing
1363 }
b75a7d8f
A
1364}
1365
1366//----------------------------------------------------------------------
1367
1368/**
1369 * Format characters that indicate numeric fields. The character
1370 * at index 0 is treated specially.
1371 */
46f4442e 1372static const UChar NUMERIC_FORMAT_CHARS[] = {0x4D, 0x59, 0x79, 0x75, 0x64, 0x65, 0x68, 0x48, 0x6D, 0x73, 0x53, 0x44, 0x46, 0x77, 0x57, 0x6B, 0x4B, 0x00}; /* "MYyudehHmsSDFwWkK" */
b75a7d8f
A
1373
1374/**
1375 * Return true if the given format character, occuring count
1376 * times, represents a numeric field.
1377 */
1378UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) {
1379 UnicodeString s(NUMERIC_FORMAT_CHARS);
1380 int32_t i = s.indexOf(formatChar);
1381 return (i > 0 || (i == 0 && count < 3));
1382}
1383
1384void
1385SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
1386{
46f4442e 1387 UErrorCode status = U_ZERO_ERROR;
b75a7d8f
A
1388 int32_t pos = parsePos.getIndex();
1389 int32_t start = pos;
1390 UBool ambiguousYear[] = { FALSE };
1391 int32_t count = 0;
46f4442e
A
1392
1393 UBool lenient = isLenient();
b75a7d8f 1394
46f4442e
A
1395 // hack, reset tztype, cast away const
1396 ((SimpleDateFormat*)this)->tztype = TZTYPE_UNK;
73c04bcf 1397
b75a7d8f
A
1398 // For parsing abutting numeric fields. 'abutPat' is the
1399 // offset into 'pattern' of the first of 2 or more abutting
1400 // numeric fields. 'abutStart' is the offset into 'text'
1401 // where parsing the fields begins. 'abutPass' starts off as 0
1402 // and increments each time we try to parse the fields.
1403 int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
1404 int32_t abutStart = 0;
1405 int32_t abutPass = 0;
1406 UBool inQuote = FALSE;
1407
1408 const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS);
1409
1410 for (int32_t i=0; i<fPattern.length(); ++i) {
1411 UChar ch = fPattern.charAt(i);
1412
1413 // Handle alphabetic field characters.
1414 if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // [A-Za-z]
1415 int32_t fieldPat = i;
1416
1417 // Count the length of this field specifier
1418 count = 1;
1419 while ((i+1)<fPattern.length() &&
1420 fPattern.charAt(i+1) == ch) {
1421 ++count;
1422 ++i;
1423 }
1424
1425 if (isNumeric(ch, count)) {
1426 if (abutPat < 0) {
1427 // Determine if there is an abutting numeric field. For
1428 // most fields we can just look at the next characters,
1429 // but the 'm' field is either numeric or text,
1430 // depending on the count, so we have to look ahead for
1431 // that field.
1432 if ((i+1)<fPattern.length()) {
1433 UBool abutting;
1434 UChar nextCh = fPattern.charAt(i+1);
1435 int32_t k = numericFormatChars.indexOf(nextCh);
1436 if (k == 0) {
1437 int32_t j = i+2;
1438 while (j<fPattern.length() &&
1439 fPattern.charAt(j) == nextCh) {
1440 ++j;
1441 }
1442 abutting = (j-i) < 4; // nextCount < 3
1443 } else {
1444 abutting = k > 0;
1445 }
1446
1447 // Record the start of a set of abutting numeric
1448 // fields.
1449 if (abutting) {
1450 abutPat = fieldPat;
1451 abutStart = pos;
1452 abutPass = 0;
1453 }
1454 }
1455 }
1456 } else {
1457 abutPat = -1; // End of any abutting fields
1458 }
1459
1460 // Handle fields within a run of abutting numeric fields. Take
1461 // the pattern "HHmmss" as an example. We will try to parse
1462 // 2/2/2 characters of the input text, then if that fails,
1463 // 1/2/2. We only adjust the width of the leftmost field; the
1464 // others remain fixed. This allows "123456" => 12:34:56, but
1465 // "12345" => 1:23:45. Likewise, for the pattern "yyyyMMdd" we
1466 // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
1467 if (abutPat >= 0) {
1468 // If we are at the start of a run of abutting fields, then
1469 // shorten this field in each pass. If we can't shorten
1470 // this field any more, then the parse of this set of
1471 // abutting numeric fields has failed.
1472 if (fieldPat == abutPat) {
1473 count -= abutPass++;
1474 if (count == 0) {
1475 parsePos.setIndex(start);
1476 parsePos.setErrorIndex(pos);
1477 return;
1478 }
1479 }
1480
1481 pos = subParse(text, pos, ch, count,
1482 TRUE, FALSE, ambiguousYear, cal);
1483
1484 // If the parse fails anywhere in the run, back up to the
1485 // start of the run and retry.
1486 if (pos < 0) {
1487 i = abutPat - 1;
1488 pos = abutStart;
1489 continue;
1490 }
1491 }
1492
1493 // Handle non-numeric fields and non-abutting numeric
1494 // fields.
1495 else {
46f4442e
A
1496 int32_t s = subParse(text, pos, ch, count,
1497 FALSE, TRUE, ambiguousYear, cal);
1498
1499 if (s <= 0) {
1500 status = U_PARSE_ERROR;
1501 goto ExitParse;
b75a7d8f 1502 }
46f4442e 1503 pos = s;
b75a7d8f
A
1504 }
1505 }
1506
1507 // Handle literal pattern characters. These are any
1508 // quoted characters and non-alphabetic unquoted
1509 // characters.
1510 else {
1511
1512 abutPat = -1; // End of any abutting fields
46f4442e
A
1513
1514 if (! matchLiterals(fPattern, i, text, pos, lenient)) {
1515 status = U_PARSE_ERROR;
1516 goto ExitParse;
b75a7d8f 1517 }
b75a7d8f
A
1518 }
1519 }
1520
1521 // At this point the fields of Calendar have been set. Calendar
1522 // will fill in default values for missing fields when the time
1523 // is computed.
1524
1525 parsePos.setIndex(pos);
1526
1527 // This part is a problem: When we call parsedDate.after, we compute the time.
1528 // Take the date April 3 2004 at 2:30 am. When this is first set up, the year
1529 // will be wrong if we're parsing a 2-digit year pattern. It will be 1904.
1530 // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day. 2:30 am
1531 // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
1532 // on that day. It is therefore parsed out to fields as 3:30 am. Then we
1533 // add 100 years, and get April 3 2004 at 3:30 am. Note that April 3 2004 is
1534 // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
1535 /*
1536 UDate parsedDate = calendar.getTime();
1537 if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
1538 calendar.add(Calendar.YEAR, 100);
1539 parsedDate = calendar.getTime();
1540 }
1541 */
1542 // Because of the above condition, save off the fields in case we need to readjust.
1543 // The procedure we use here is not particularly efficient, but there is no other
1544 // way to do this given the API restrictions present in Calendar. We minimize
1545 // inefficiency by only performing this computation when it might apply, that is,
1546 // when the two-digit year is equal to the start year, and thus might fall at the
1547 // front or the back of the default century. This only works because we adjust
1548 // the year correctly to start with in other cases -- see subParse().
46f4442e 1549 if (ambiguousYear[0] || tztype != TZTYPE_UNK) // If this is true then the two-digit year == the default start year
b75a7d8f
A
1550 {
1551 // We need a copy of the fields, and we need to avoid triggering a call to
1552 // complete(), which will recalculate the fields. Since we can't access
1553 // the fields[] array in Calendar, we clone the entire object. This will
1554 // stop working if Calendar.clone() is ever rewritten to call complete().
46f4442e 1555 Calendar *copy;
73c04bcf 1556 if (ambiguousYear[0]) {
46f4442e
A
1557 copy = cal.clone();
1558 // Check for failed cloning.
1559 if (copy == NULL) {
1560 status = U_MEMORY_ALLOCATION_ERROR;
1561 goto ExitParse;
1562 }
73c04bcf
A
1563 UDate parsedDate = copy->getTime(status);
1564 // {sfb} check internalGetDefaultCenturyStart
1565 if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) {
1566 // We can't use add here because that does a complete() first.
1567 cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
1568 }
46f4442e 1569 delete copy;
b75a7d8f 1570 }
73c04bcf 1571
46f4442e
A
1572 if (tztype != TZTYPE_UNK) {
1573 copy = cal.clone();
1574 // Check for failed cloning.
1575 if (copy == NULL) {
1576 status = U_MEMORY_ALLOCATION_ERROR;
1577 goto ExitParse;
1578 }
1579 const TimeZone & tz = cal.getTimeZone();
1580 BasicTimeZone *btz = NULL;
1581
1582 if (tz.getDynamicClassID() == OlsonTimeZone::getStaticClassID()
1583 || tz.getDynamicClassID() == SimpleTimeZone::getStaticClassID()
1584 || tz.getDynamicClassID() == RuleBasedTimeZone::getStaticClassID()
1585 || tz.getDynamicClassID() == VTimeZone::getStaticClassID()) {
1586 btz = (BasicTimeZone*)&tz;
1587 }
73c04bcf 1588
46f4442e
A
1589 // Get local millis
1590 copy->set(UCAL_ZONE_OFFSET, 0);
1591 copy->set(UCAL_DST_OFFSET, 0);
1592 UDate localMillis = copy->getTime(status);
1593
1594 // Make sure parsed time zone type (Standard or Daylight)
1595 // matches the rule used by the parsed time zone.
1596 int32_t raw, dst;
1597 if (btz != NULL) {
1598 if (tztype == TZTYPE_STD) {
1599 btz->getOffsetFromLocal(localMillis,
1600 BasicTimeZone::kStandard, BasicTimeZone::kStandard, raw, dst, status);
1601 } else {
1602 btz->getOffsetFromLocal(localMillis,
1603 BasicTimeZone::kDaylight, BasicTimeZone::kDaylight, raw, dst, status);
1604 }
1605 } else {
1606 // No good way to resolve ambiguous time at transition,
1607 // but following code work in most case.
1608 tz.getOffset(localMillis, TRUE, raw, dst, status);
73c04bcf 1609 }
73c04bcf 1610
46f4442e
A
1611 // Now, compare the results with parsed type, either standard or daylight saving time
1612 int32_t resolvedSavings = dst;
1613 if (tztype == TZTYPE_STD) {
1614 if (dst != 0) {
1615 // Override DST_OFFSET = 0 in the result calendar
1616 resolvedSavings = 0;
1617 }
1618 } else { // tztype == TZTYPE_DST
1619 if (dst == 0) {
1620 if (btz != NULL) {
1621 UDate time = localMillis + raw;
1622 // We use the nearest daylight saving time rule.
1623 TimeZoneTransition beforeTrs, afterTrs;
1624 UDate beforeT = time, afterT = time;
1625 int32_t beforeSav = 0, afterSav = 0;
1626 UBool beforeTrsAvail, afterTrsAvail;
1627
1628 // Search for DST rule before or on the time
1629 while (TRUE) {
1630 beforeTrsAvail = btz->getPreviousTransition(beforeT, TRUE, beforeTrs);
1631 if (!beforeTrsAvail) {
1632 break;
1633 }
1634 beforeT = beforeTrs.getTime() - 1;
1635 beforeSav = beforeTrs.getFrom()->getDSTSavings();
1636 if (beforeSav != 0) {
1637 break;
1638 }
1639 }
b75a7d8f 1640
46f4442e
A
1641 // Search for DST rule after the time
1642 while (TRUE) {
1643 afterTrsAvail = btz->getNextTransition(afterT, FALSE, afterTrs);
1644 if (!afterTrsAvail) {
1645 break;
1646 }
1647 afterT = afterTrs.getTime();
1648 afterSav = afterTrs.getTo()->getDSTSavings();
1649 if (afterSav != 0) {
1650 break;
1651 }
1652 }
1653
1654 if (beforeTrsAvail && afterTrsAvail) {
1655 if (time - beforeT > afterT - time) {
1656 resolvedSavings = afterSav;
1657 } else {
1658 resolvedSavings = beforeSav;
1659 }
1660 } else if (beforeTrsAvail && beforeSav != 0) {
1661 resolvedSavings = beforeSav;
1662 } else if (afterTrsAvail && afterSav != 0) {
1663 resolvedSavings = afterSav;
1664 } else {
1665 resolvedSavings = btz->getDSTSavings();
1666 }
1667 } else {
1668 resolvedSavings = tz.getDSTSavings();
1669 }
1670 if (resolvedSavings == 0) {
1671 // final fallback
1672 resolvedSavings = U_MILLIS_PER_HOUR;
1673 }
1674 }
1675 }
1676 cal.set(UCAL_ZONE_OFFSET, raw);
1677 cal.set(UCAL_DST_OFFSET, resolvedSavings);
1678 delete copy;
1679 }
1680 }
1681ExitParse:
b75a7d8f
A
1682 // If any Calendar calls failed, we pretend that we
1683 // couldn't parse the string, when in reality this isn't quite accurate--
1684 // we did parse it; the Calendar calls just failed.
1685 if (U_FAILURE(status)) {
1686 parsePos.setErrorIndex(pos);
1687 parsePos.setIndex(start);
1688 }
1689}
1690
1691UDate
1692SimpleDateFormat::parse( const UnicodeString& text,
1693 ParsePosition& pos) const {
1694 // redefined here because the other parse() function hides this function's
1695 // cunterpart on DateFormat
1696 return DateFormat::parse(text, pos);
1697}
1698
1699UDate
1700SimpleDateFormat::parse(const UnicodeString& text, UErrorCode& status) const
1701{
1702 // redefined here because the other parse() function hides this function's
1703 // counterpart on DateFormat
1704 return DateFormat::parse(text, status);
1705}
1706//----------------------------------------------------------------------
1707
73c04bcf
A
1708int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
1709 int32_t start,
1710 UCalendarDateFields field,
1711 const UnicodeString* data,
1712 int32_t dataCount,
1713 Calendar& cal) const
1714{
1715 int32_t i = 0;
1716 int32_t count = dataCount;
1717
1718 // There may be multiple strings in the data[] array which begin with
1719 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
1720 // We keep track of the longest match, and return that. Note that this
1721 // unfortunately requires us to test all array elements.
1722 int32_t bestMatchLength = 0, bestMatch = -1;
1723
1724 // {sfb} kludge to support case-insensitive comparison
1725 // {markus 2002oct11} do not just use caseCompareBetween because we do not know
1726 // the length of the match after case folding
1727 // {alan 20040607} don't case change the whole string, since the length
1728 // can change
1729 // TODO we need a case-insensitive startsWith function
1730 UnicodeString lcase, lcaseText;
1731 text.extract(start, INT32_MAX, lcaseText);
1732 lcaseText.foldCase();
1733
1734 for (; i < count; ++i)
1735 {
1736 // Always compare if we have no match yet; otherwise only compare
1737 // against potentially better matches (longer strings).
1738
1739 lcase.fastCopyFrom(data[i]).foldCase();
1740 int32_t length = lcase.length();
1741
1742 if (length > bestMatchLength &&
1743 lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
1744 {
1745 bestMatch = i;
1746 bestMatchLength = length;
1747 }
1748 }
1749 if (bestMatch >= 0)
1750 {
1751 cal.set(field, bestMatch * 3);
1752
1753 // Once we have a match, we have to determine the length of the
1754 // original source string. This will usually be == the length of
1755 // the case folded string, but it may differ (e.g. sharp s).
1756 lcase.fastCopyFrom(data[bestMatch]).foldCase();
1757
1758 // Most of the time, the length will be the same as the length
1759 // of the string from the locale data. Sometimes it will be
1760 // different, in which case we will have to figure it out by
1761 // adding a character at a time, until we have a match. We do
1762 // this all in one loop, where we try 'len' first (at index
1763 // i==0).
1764 int32_t len = data[bestMatch].length(); // 99+% of the time
1765 int32_t n = text.length() - start;
1766 for (i=0; i<=n; ++i) {
1767 int32_t j=i;
1768 if (i == 0) {
1769 j = len;
1770 } else if (i == len) {
1771 continue; // already tried this when i was 0
1772 }
1773 text.extract(start, j, lcaseText);
1774 lcaseText.foldCase();
1775 if (lcase == lcaseText) {
1776 return start + j;
1777 }
1778 }
1779 }
1780
1781 return -start;
1782}
1783
46f4442e
A
1784//----------------------------------------------------------------------
1785UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
1786 int32_t &patternOffset,
1787 const UnicodeString &text,
1788 int32_t &textOffset,
1789 UBool lenient)
1790{
1791 UBool inQuote = FALSE;
1792 UnicodeString literal;
1793 int32_t i = patternOffset;
1794
1795 // scan pattern looking for contiguous literal characters
1796 for ( ; i < pattern.length(); i += 1) {
1797 UChar ch = pattern.charAt(i);
1798
1799 if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // unquoted [A-Za-z]
1800 break;
1801 }
1802
1803 if (ch == QUOTE) {
1804 // Match a quote literal ('') inside OR outside of quotes
1805 if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
1806 i += 1;
1807 } else {
1808 inQuote = !inQuote;
1809 continue;
1810 }
1811 }
1812
1813 literal += ch;
1814 }
1815
1816 // at this point, literal contains the literal text
1817 // and i is the index of the next non-literal pattern character.
1818 int32_t p;
1819 int32_t t = textOffset;
1820
1821 if (lenient) {
1822 // trim leading, trailing whitespace from
1823 // the literal text
1824 literal.trim();
1825
1826 // ignore any leading whitespace in the text
1827 while (t < text.length() && u_isWhitespace(text.charAt(t))) {
1828 t += 1;
1829 }
1830 }
1831
1832 for (p = 0; p < literal.length() && t < text.length(); p += 1, t += 1) {
1833 UBool needWhitespace = FALSE;
1834
1835 while (p < literal.length() && uprv_isRuleWhiteSpace(literal.charAt(p))) {
1836 needWhitespace = TRUE;
1837 p += 1;
1838 }
1839
1840 if (needWhitespace) {
1841 int32_t tStart = t;
1842
1843 while (t < text.length()) {
1844 UChar tch = text.charAt(t);
1845
1846 if (!u_isUWhiteSpace(tch) && !uprv_isRuleWhiteSpace(tch)) {
1847 break;
1848 }
1849
1850 t += 1;
1851 }
1852
1853 // TODO: should we require internal spaces
1854 // in lenient mode? (There won't be any
1855 // leading or trailing spaces)
1856 if (!lenient && t == tStart) {
1857 // didn't find matching whitespace:
1858 // an error in strict mode
1859 return FALSE;
1860 }
1861
1862 // In strict mode, this run of whitespace
1863 // may have been at the end.
1864 if (p >= literal.length()) {
1865 break;
1866 }
1867 }
1868
1869 if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
1870 // Ran out of text, or found a non-matching character:
1871 // OK in lenient mode, an error in strict mode.
1872 if (lenient) {
1873 break;
1874 }
1875
1876 return FALSE;
1877 }
1878 }
1879
1880 // At this point if we're in strict mode we have a complete match.
1881 // If we're in lenient mode we may have a partial match, or no
1882 // match at all.
1883 if (p <= 0) {
1884 // no match. Pretend it matched a run of whitespace
1885 // and ignorables in the text.
1886 const UnicodeSet *ignorables = NULL;
1887 UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), pattern.charAt(i));
1888
1889 if (patternCharPtr != NULL) {
1890 UDateFormatField patternCharIndex = (UDateFormatField) (patternCharPtr - DateFormatSymbols::getPatternUChars());
1891
1892 ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
1893 }
1894
1895 for (t = textOffset; t < text.length(); t += 1) {
1896 UChar ch = text.charAt(t);
1897
1898 if (ignorables == NULL || !ignorables->contains(ch)) {
1899 break;
1900 }
1901 }
1902 }
1903
1904 // if we get here, we've got a complete match.
1905 patternOffset = i - 1;
1906 textOffset = t;
1907
1908 return TRUE;
1909}
1910
73c04bcf
A
1911//----------------------------------------------------------------------
1912
b75a7d8f
A
1913int32_t SimpleDateFormat::matchString(const UnicodeString& text,
1914 int32_t start,
1915 UCalendarDateFields field,
1916 const UnicodeString* data,
1917 int32_t dataCount,
1918 Calendar& cal) const
1919{
1920 int32_t i = 0;
1921 int32_t count = dataCount;
1922
1923 if (field == UCAL_DAY_OF_WEEK) i = 1;
1924
1925 // There may be multiple strings in the data[] array which begin with
1926 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
1927 // We keep track of the longest match, and return that. Note that this
1928 // unfortunately requires us to test all array elements.
1929 int32_t bestMatchLength = 0, bestMatch = -1;
1930
1931 // {sfb} kludge to support case-insensitive comparison
1932 // {markus 2002oct11} do not just use caseCompareBetween because we do not know
1933 // the length of the match after case folding
374ca955
A
1934 // {alan 20040607} don't case change the whole string, since the length
1935 // can change
1936 // TODO we need a case-insensitive startsWith function
1937 UnicodeString lcase, lcaseText;
1938 text.extract(start, INT32_MAX, lcaseText);
1939 lcaseText.foldCase();
b75a7d8f
A
1940
1941 for (; i < count; ++i)
1942 {
b75a7d8f
A
1943 // Always compare if we have no match yet; otherwise only compare
1944 // against potentially better matches (longer strings).
1945
b75a7d8f 1946 lcase.fastCopyFrom(data[i]).foldCase();
374ca955 1947 int32_t length = lcase.length();
b75a7d8f 1948
374ca955
A
1949 if (length > bestMatchLength &&
1950 lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
b75a7d8f
A
1951 {
1952 bestMatch = i;
1953 bestMatchLength = length;
1954 }
1955 }
1956 if (bestMatch >= 0)
1957 {
1958 cal.set(field, bestMatch);
374ca955
A
1959
1960 // Once we have a match, we have to determine the length of the
1961 // original source string. This will usually be == the length of
1962 // the case folded string, but it may differ (e.g. sharp s).
1963 lcase.fastCopyFrom(data[bestMatch]).foldCase();
1964
1965 // Most of the time, the length will be the same as the length
1966 // of the string from the locale data. Sometimes it will be
1967 // different, in which case we will have to figure it out by
1968 // adding a character at a time, until we have a match. We do
1969 // this all in one loop, where we try 'len' first (at index
1970 // i==0).
1971 int32_t len = data[bestMatch].length(); // 99+% of the time
1972 int32_t n = text.length() - start;
1973 for (i=0; i<=n; ++i) {
1974 int32_t j=i;
1975 if (i == 0) {
1976 j = len;
1977 } else if (i == len) {
1978 continue; // already tried this when i was 0
1979 }
1980 text.extract(start, j, lcaseText);
1981 lcaseText.foldCase();
1982 if (lcase == lcaseText) {
1983 return start + j;
1984 }
1985 }
b75a7d8f
A
1986 }
1987
1988 return -start;
1989}
1990
1991//----------------------------------------------------------------------
1992
1993void
1994SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
1995{
1996 parseAmbiguousDatesAsAfter(d, status);
1997}
1998
1999/**
2000 * Private member function that converts the parsed date strings into
2001 * timeFields. Returns -start (for ParsePosition) if failed.
2002 * @param text the time text to be parsed.
2003 * @param start where to start parsing.
2004 * @param ch the pattern character for the date field text to be parsed.
2005 * @param count the count of a pattern character.
2006 * @return the new start position if matching succeeded; a negative number
2007 * indicating matching failure, otherwise.
2008 */
2009int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
2010 UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const
2011{
2012 Formattable number;
2013 int32_t value = 0;
2014 int32_t i;
2015 ParsePosition pos(0);
2016 int32_t patternCharIndex;
2017 UnicodeString temp;
2018 UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
46f4442e
A
2019 UBool lenient = isLenient();
2020 UBool gotNumber = FALSE;
b75a7d8f 2021
374ca955
A
2022#if defined (U_DEBUG_CAL)
2023 //fprintf(stderr, "%s:%d - [%c] st=%d \n", __FILE__, __LINE__, (char) ch, start);
2024#endif
2025
b75a7d8f
A
2026 if (patternCharPtr == NULL) {
2027 return -start;
2028 }
2029
374ca955 2030 patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
b75a7d8f
A
2031
2032 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
2033
2034 // If there are any spaces here, skip over them. If we hit the end
2035 // of the string, then fail.
2036 for (;;) {
2037 if (start >= text.length()) {
2038 return -start;
2039 }
46f4442e 2040
b75a7d8f 2041 UChar32 c = text.char32At(start);
46f4442e 2042
b75a7d8f
A
2043 if (!u_isUWhiteSpace(c)) {
2044 break;
2045 }
46f4442e 2046
b75a7d8f
A
2047 start += UTF_CHAR_LENGTH(c);
2048 }
46f4442e 2049
b75a7d8f
A
2050 pos.setIndex(start);
2051
2052 // We handle a few special cases here where we need to parse
2053 // a number value. We handle further, more generic cases below. We need
2054 // to handle some of them here because some fields require extra processing on
2055 // the parsed value.
374ca955 2056 if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD ||
46f4442e 2057 patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD ||
374ca955 2058 patternCharIndex == UDAT_HOUR1_FIELD ||
46f4442e
A
2059 patternCharIndex == UDAT_HOUR0_FIELD ||
2060 patternCharIndex == UDAT_DOW_LOCAL_FIELD ||
2061 patternCharIndex == UDAT_STANDALONE_DAY_FIELD ||
2062 patternCharIndex == UDAT_MONTH_FIELD ||
2063 patternCharIndex == UDAT_STANDALONE_MONTH_FIELD ||
2064 patternCharIndex == UDAT_QUARTER_FIELD ||
2065 patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD ||
374ca955
A
2066 patternCharIndex == UDAT_YEAR_FIELD ||
2067 patternCharIndex == UDAT_YEAR_WOY_FIELD ||
2068 patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD)
b75a7d8f 2069 {
374ca955 2070 int32_t parseStart = pos.getIndex();
b75a7d8f
A
2071 // It would be good to unify this with the obeyCount logic below,
2072 // but that's going to be difficult.
2073 const UnicodeString* src;
73c04bcf 2074
b75a7d8f
A
2075 if (obeyCount) {
2076 if ((start+count) > text.length()) {
2077 return -start;
2078 }
73c04bcf 2079
b75a7d8f
A
2080 text.extractBetween(0, start + count, temp);
2081 src = &temp;
2082 } else {
2083 src = &text;
2084 }
73c04bcf 2085
b75a7d8f 2086 parseInt(*src, number, pos, allowNegative);
73c04bcf 2087
46f4442e
A
2088 if (pos.getIndex() > parseStart) {
2089 value = number.getLong();
2090 gotNumber = TRUE;
2091
2092 // Check the range of the value
2093 int32_t bias = gFieldRangeBias[patternCharIndex];
2094
2095 if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
2096 return -start;
2097 }
2098 }
2099
b75a7d8f 2100 }
46f4442e
A
2101
2102 // Make sure that we got a number if
2103 // we want one, and didn't get one
2104 // if we don't want one.
2105 switch (patternCharIndex) {
2106 case UDAT_HOUR_OF_DAY1_FIELD:
2107 case UDAT_HOUR_OF_DAY0_FIELD:
2108 case UDAT_HOUR1_FIELD:
2109 case UDAT_HOUR0_FIELD:
2110 // special range check for hours:
2111 if (value < 0 || value > 24) {
2112 return -start;
2113 }
2114
2115 // fall through to gotNumber check
2116
2117 case UDAT_YEAR_FIELD:
2118 case UDAT_YEAR_WOY_FIELD:
2119 case UDAT_FRACTIONAL_SECOND_FIELD:
2120 // these must be a number
2121 if (! gotNumber) {
2122 return -start;
2123 }
2124
2125 break;
2126
2127 case UDAT_DOW_LOCAL_FIELD:
2128 case UDAT_STANDALONE_DAY_FIELD:
2129 case UDAT_MONTH_FIELD:
2130 case UDAT_STANDALONE_MONTH_FIELD:
2131 case UDAT_QUARTER_FIELD:
2132 case UDAT_STANDALONE_QUARTER_FIELD:
2133 // in strict mode, these can only
2134 // be a number if count <= 2
2135 if (!lenient && gotNumber && count > 2) {
7393aa2f
A
2136 // We have a string pattern in strict mode
2137 // but the input parsed as a number. Ignore
2138 // the fact that the input parsed as a number
2139 // and try to match it as a string. (Some
2140 // locales have numbers for the month names.)
2141 gotNumber = FALSE;
2142 pos.setIndex(start);
46f4442e
A
2143 }
2144
2145 break;
2146
2147 default:
2148 // we check the rest of the fields below.
2149 break;
2150 }
2151
b75a7d8f 2152 switch (patternCharIndex) {
374ca955 2153 case UDAT_ERA_FIELD:
46f4442e
A
2154 if (count == 5) {
2155 return matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, cal);
2156 }
73c04bcf
A
2157 if (count == 4) {
2158 return matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, cal);
2159 }
2160
b75a7d8f 2161 return matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, cal);
73c04bcf 2162
374ca955 2163 case UDAT_YEAR_FIELD:
b75a7d8f
A
2164 // If there are 3 or more YEAR pattern characters, this indicates
2165 // that the year value is to be treated literally, without any
2166 // two-digit year adjustments (e.g., from "01" to 2001). Otherwise
2167 // we made adjustments to place the 2-digit year in the proper
2168 // century, for parsed strings from "00" to "99". Any other string
2169 // is treated literally: "2250", "-1", "1", "002".
46f4442e 2170 if ((pos.getIndex() - start) == 2
b75a7d8f
A
2171 && u_isdigit(text.charAt(start))
2172 && u_isdigit(text.charAt(start+1)))
2173 {
2174 // Assume for example that the defaultCenturyStart is 6/18/1903.
2175 // This means that two-digit years will be forced into the range
2176 // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02
2177 // correspond to 2000, 2001, and 2002. Years 04, 05, etc. correspond
2178 // to 1904, 1905, etc. If the year is 03, then it is 2003 if the
2179 // other fields specify a date before 6/18, or 1903 if they specify a
2180 // date afterwards. As a result, 03 is an ambiguous year. All other
2181 // two-digit years are unambiguous.
2182 if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
2183 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2184 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2185 value += (fDefaultCenturyStartYear/100)*100 +
2186 (value < ambiguousTwoDigitYear ? 100 : 0);
2187 }
2188 }
2189 cal.set(UCAL_YEAR, value);
2190 return pos.getIndex();
73c04bcf 2191
374ca955
A
2192 case UDAT_YEAR_WOY_FIELD:
2193 // Comment is the same as for UDAT_Year_FIELDs - look above
46f4442e 2194 if ((pos.getIndex() - start) == 2
b75a7d8f
A
2195 && u_isdigit(text.charAt(start))
2196 && u_isdigit(text.charAt(start+1))
2197 && fHaveDefaultCentury )
2198 {
2199 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2200 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2201 value += (fDefaultCenturyStartYear/100)*100 +
2202 (value < ambiguousTwoDigitYear ? 100 : 0);
2203 }
2204 cal.set(UCAL_YEAR_WOY, value);
2205 return pos.getIndex();
73c04bcf 2206
374ca955 2207 case UDAT_MONTH_FIELD:
46f4442e 2208 if (gotNumber) // i.e., M or MM.
b75a7d8f
A
2209 {
2210 // Don't want to parse the month if it is a string
2211 // while pattern uses numeric style: M or MM.
2212 // [We computed 'value' above.]
2213 cal.set(UCAL_MONTH, value - 1);
2214 return pos.getIndex();
73c04bcf 2215 } else {
b75a7d8f
A
2216 // count >= 3 // i.e., MMM or MMMM
2217 // Want to be able to parse both short and long forms.
2218 // Try count == 4 first:
2219 int32_t newStart = 0;
73c04bcf 2220
b75a7d8f
A
2221 if ((newStart = matchString(text, start, UCAL_MONTH,
2222 fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0)
2223 return newStart;
2224 else // count == 4 failed, now try count == 3
2225 return matchString(text, start, UCAL_MONTH,
2226 fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal);
2227 }
73c04bcf
A
2228
2229 case UDAT_STANDALONE_MONTH_FIELD:
46f4442e 2230 if (gotNumber) // i.e., L or LL.
73c04bcf
A
2231 {
2232 // Don't want to parse the month if it is a string
2233 // while pattern uses numeric style: M or MM.
2234 // [We computed 'value' above.]
2235 cal.set(UCAL_MONTH, value - 1);
2236 return pos.getIndex();
2237 } else {
2238 // count >= 3 // i.e., LLL or LLLL
2239 // Want to be able to parse both short and long forms.
2240 // Try count == 4 first:
2241 int32_t newStart = 0;
2242
2243 if ((newStart = matchString(text, start, UCAL_MONTH,
2244 fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0)
2245 return newStart;
2246 else // count == 4 failed, now try count == 3
2247 return matchString(text, start, UCAL_MONTH,
2248 fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal);
2249 }
2250
374ca955 2251 case UDAT_HOUR_OF_DAY1_FIELD:
b75a7d8f
A
2252 // [We computed 'value' above.]
2253 if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
2254 value = 0;
46f4442e
A
2255
2256 // fall through to set field
2257
2258 case UDAT_HOUR_OF_DAY0_FIELD:
b75a7d8f
A
2259 cal.set(UCAL_HOUR_OF_DAY, value);
2260 return pos.getIndex();
73c04bcf 2261
374ca955
A
2262 case UDAT_FRACTIONAL_SECOND_FIELD:
2263 // Fractional seconds left-justify
2264 i = pos.getIndex() - start;
2265 if (i < 3) {
2266 while (i < 3) {
2267 value *= 10;
2268 i++;
2269 }
2270 } else {
2271 int32_t a = 1;
2272 while (i > 3) {
2273 a *= 10;
2274 i--;
2275 }
2276 value = (value + (a>>1)) / a;
2277 }
2278 cal.set(UCAL_MILLISECOND, value);
2279 return pos.getIndex();
73c04bcf 2280
46f4442e
A
2281 case UDAT_DOW_LOCAL_FIELD:
2282 if (gotNumber) // i.e., e or ee
2283 {
2284 // [We computed 'value' above.]
2285 cal.set(UCAL_DOW_LOCAL, value);
2286 return pos.getIndex();
2287 }
2288 // else for eee-eeeee fall through to handling of EEE-EEEEE
2289 // fall through, do not break here
374ca955 2290 case UDAT_DAY_OF_WEEK_FIELD:
b75a7d8f
A
2291 {
2292 // Want to be able to parse both short and long forms.
46f4442e 2293 // Try count == 4 (EEEE) first:
b75a7d8f
A
2294 int32_t newStart = 0;
2295 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2296 fSymbols->fWeekdays, fSymbols->fWeekdaysCount, cal)) > 0)
2297 return newStart;
46f4442e
A
2298 // EEEE failed, now try EEE
2299 else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2300 fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0)
2301 return newStart;
2302 // EEE failed, now try EEEEE
2303 else
b75a7d8f 2304 return matchString(text, start, UCAL_DAY_OF_WEEK,
46f4442e 2305 fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal);
b75a7d8f 2306 }
73c04bcf
A
2307
2308 case UDAT_STANDALONE_DAY_FIELD:
2309 {
46f4442e
A
2310 if (gotNumber) // c or cc
2311 {
2312 // [We computed 'value' above.]
2313 cal.set(UCAL_DOW_LOCAL, value);
2314 return pos.getIndex();
2315 }
73c04bcf 2316 // Want to be able to parse both short and long forms.
46f4442e 2317 // Try count == 4 (cccc) first:
73c04bcf
A
2318 int32_t newStart = 0;
2319 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2320 fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0)
2321 return newStart;
46f4442e 2322 else // cccc failed, now try ccc
73c04bcf
A
2323 return matchString(text, start, UCAL_DAY_OF_WEEK,
2324 fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal);
2325 }
2326
374ca955 2327 case UDAT_AM_PM_FIELD:
b75a7d8f 2328 return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal);
73c04bcf 2329
374ca955 2330 case UDAT_HOUR1_FIELD:
b75a7d8f
A
2331 // [We computed 'value' above.]
2332 if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
2333 value = 0;
46f4442e
A
2334
2335 // fall through to set field
2336
2337 case UDAT_HOUR0_FIELD:
b75a7d8f
A
2338 cal.set(UCAL_HOUR, value);
2339 return pos.getIndex();
73c04bcf
A
2340
2341 case UDAT_QUARTER_FIELD:
46f4442e 2342 if (gotNumber) // i.e., Q or QQ.
73c04bcf
A
2343 {
2344 // Don't want to parse the month if it is a string
2345 // while pattern uses numeric style: Q or QQ.
2346 // [We computed 'value' above.]
2347 cal.set(UCAL_MONTH, (value - 1) * 3);
2348 return pos.getIndex();
2349 } else {
2350 // count >= 3 // i.e., QQQ or QQQQ
2351 // Want to be able to parse both short and long forms.
2352 // Try count == 4 first:
2353 int32_t newStart = 0;
2354
2355 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2356 fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
2357 return newStart;
2358 else // count == 4 failed, now try count == 3
2359 return matchQuarterString(text, start, UCAL_MONTH,
2360 fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal);
2361 }
2362
2363 case UDAT_STANDALONE_QUARTER_FIELD:
46f4442e 2364 if (gotNumber) // i.e., q or qq.
73c04bcf
A
2365 {
2366 // Don't want to parse the month if it is a string
2367 // while pattern uses numeric style: q or q.
2368 // [We computed 'value' above.]
2369 cal.set(UCAL_MONTH, (value - 1) * 3);
2370 return pos.getIndex();
2371 } else {
2372 // count >= 3 // i.e., qqq or qqqq
2373 // Want to be able to parse both short and long forms.
2374 // Try count == 4 first:
2375 int32_t newStart = 0;
2376
2377 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2378 fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
2379 return newStart;
2380 else // count == 4 failed, now try count == 3
2381 return matchQuarterString(text, start, UCAL_MONTH,
2382 fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal);
2383 }
2384
374ca955
A
2385 case UDAT_TIMEZONE_FIELD:
2386 case UDAT_TIMEZONE_RFC_FIELD:
73c04bcf 2387 case UDAT_TIMEZONE_GENERIC_FIELD:
46f4442e 2388 case UDAT_TIMEZONE_SPECIAL_FIELD:
b75a7d8f 2389 {
46f4442e
A
2390 int32_t offset = 0;
2391 UBool parsed = FALSE;
2392
2393 // Step 1
2394 // Check if this is a long GMT offset string (either localized or default)
2395 offset = parseGMT(text, pos);
2396 if (pos.getIndex() - start > 0) {
2397 parsed = TRUE;
2398 }
2399 if (!parsed) {
2400 // Step 2
2401 // Check if this is an RFC822 time zone offset.
2402 // ICU supports the standard RFC822 format [+|-]HHmm
2403 // and its extended form [+|-]HHmmSS.
2404 do {
2405 int32_t sign = 0;
2406 UChar signChar = text.charAt(start);
2407 if (signChar == (UChar)0x002B /* '+' */) {
2408 sign = 1;
2409 } else if (signChar == (UChar)0x002D /* '-' */) {
2410 sign = -1;
2411 } else {
2412 // Not an RFC822 offset string
2413 break;
2414 }
b75a7d8f 2415
46f4442e
A
2416 // Parse digits
2417 int32_t orgPos = start + 1;
2418 pos.setIndex(orgPos);
2419 parseInt(text, number, 6, pos, FALSE);
2420 int32_t numLen = pos.getIndex() - orgPos;
2421 if (numLen <= 0) {
2422 break;
2423 }
b75a7d8f 2424
46f4442e
A
2425 // Followings are possible format (excluding sign char)
2426 // HHmmSS
2427 // HmmSS
2428 // HHmm
2429 // Hmm
2430 // HH
2431 // H
2432 int32_t val = number.getLong();
2433 int32_t hour = 0, min = 0, sec = 0;
2434 switch(numLen) {
2435 case 1: // H
2436 case 2: // HH
2437 hour = val;
2438 break;
2439 case 3: // Hmm
2440 case 4: // HHmm
2441 hour = val / 100;
2442 min = val % 100;
2443 break;
2444 case 5: // Hmmss
2445 case 6: // HHmmss
2446 hour = val / 10000;
2447 min = (val % 10000) / 100;
2448 sec = val % 100;
2449 break;
2450 }
2451 if (hour > 23 || min > 59 || sec > 59) {
2452 // Invalid value range
2453 break;
2454 }
2455 offset = (((hour * 60) + min) * 60 + sec) * 1000 * sign;
2456 parsed = TRUE;
2457 } while (FALSE);
b75a7d8f 2458
46f4442e
A
2459 if (!parsed) {
2460 // Failed to parse. Reset the position.
2461 pos.setIndex(start);
2462 }
b75a7d8f 2463 }
46f4442e
A
2464
2465 if (parsed) {
2466 // offset was successfully parsed as either a long GMT string or RFC822 zone offset
2467 // string. Create normalized zone ID for the offset.
2468
2469 UnicodeString tzID(gGmt);
2470 formatRFC822TZ(tzID, offset);
2471 //TimeZone *customTZ = TimeZone::createTimeZone(tzID);
2472 TimeZone *customTZ = new SimpleTimeZone(offset, tzID); // faster than TimeZone::createTimeZone
2473 cal.adoptTimeZone(customTZ);
2474
2475 return pos.getIndex();
b75a7d8f
A
2476 }
2477
46f4442e 2478 // Step 3
b75a7d8f
A
2479 // At this point, check for named time zones by looking through
2480 // the locale data from the DateFormatZoneData strings.
2481 // Want to be able to parse both short and long forms.
46f4442e
A
2482 // optimize for calendar's current time zone
2483 const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat();
2484 if (zsf) {
2485 UErrorCode status = U_ZERO_ERROR;
2486 const ZoneStringInfo *zsinfo = NULL;
2487 int32_t matchLen;
2488
2489 switch (patternCharIndex) {
2490 case UDAT_TIMEZONE_FIELD: // 'z'
2491 if (count < 4) {
2492 zsinfo = zsf->findSpecificShort(text, start, matchLen, status);
2493 } else {
2494 zsinfo = zsf->findSpecificLong(text, start, matchLen, status);
2495 }
2496 break;
2497 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
2498 if (count == 1) {
2499 zsinfo = zsf->findGenericShort(text, start, matchLen, status);
2500 } else if (count == 4) {
2501 zsinfo = zsf->findGenericLong(text, start, matchLen, status);
2502 }
2503 break;
2504 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
2505 if (count == 1) {
2506 zsinfo = zsf->findSpecificShort(text, start, matchLen, status);
2507 } else if (count == 4) {
2508 zsinfo = zsf->findGenericLocation(text, start, matchLen, status);
2509 }
2510 break;
2511 }
b75a7d8f 2512
46f4442e
A
2513 if (U_SUCCESS(status) && zsinfo != NULL) {
2514 if (zsinfo->isStandard()) {
2515 ((SimpleDateFormat*)this)->tztype = TZTYPE_STD;
2516 } else if (zsinfo->isDaylight()) {
2517 ((SimpleDateFormat*)this)->tztype = TZTYPE_DST;
2518 }
2519 UnicodeString tzid;
2520 zsinfo->getID(tzid);
2521
2522 UnicodeString current;
2523 cal.getTimeZone().getID(current);
2524 if (tzid != current) {
2525 TimeZone *tz = TimeZone::createTimeZone(tzid);
2526 cal.adoptTimeZone(tz);
2527 }
2528 return start + matchLen;
2529 }
b75a7d8f 2530 }
46f4442e
A
2531 // complete failure
2532 return -start;
b75a7d8f 2533 }
73c04bcf 2534
b75a7d8f 2535 default:
b75a7d8f 2536 // Handle "generic" fields
374ca955 2537 int32_t parseStart = pos.getIndex();
b75a7d8f
A
2538 const UnicodeString* src;
2539 if (obeyCount) {
2540 if ((start+count) > text.length()) {
2541 return -start;
2542 }
2543 text.extractBetween(0, start + count, temp);
2544 src = &temp;
2545 } else {
2546 src = &text;
2547 }
2548 parseInt(*src, number, pos, allowNegative);
2549 if (pos.getIndex() != parseStart) {
46f4442e
A
2550 int32_t value = number.getLong();
2551
2552 // Check the range of the value
2553 int32_t bias = gFieldRangeBias[patternCharIndex];
2554
2555 if (bias < 0 || (value >= cal.getMinimum(field) + bias && value <= cal.getMaximum(field) + bias)) {
2556 cal.set(field, value);
2557 return pos.getIndex();
2558 }
b75a7d8f 2559 }
46f4442e 2560
b75a7d8f
A
2561 return -start;
2562 }
2563}
2564
2565/**
2566 * Parse an integer using fNumberFormat. This method is semantically
2567 * const, but actually may modify fNumberFormat.
2568 */
2569void SimpleDateFormat::parseInt(const UnicodeString& text,
2570 Formattable& number,
2571 ParsePosition& pos,
2572 UBool allowNegative) const {
46f4442e
A
2573 parseInt(text, number, -1, pos, allowNegative);
2574}
2575
2576/**
2577 * Parse an integer using fNumberFormat up to maxDigits.
2578 */
2579void SimpleDateFormat::parseInt(const UnicodeString& text,
2580 Formattable& number,
2581 int32_t maxDigits,
2582 ParsePosition& pos,
2583 UBool allowNegative) const {
b75a7d8f
A
2584 UnicodeString oldPrefix;
2585 DecimalFormat* df = NULL;
2586 if (!allowNegative &&
2587 fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
2588 df = (DecimalFormat*)fNumberFormat;
2589 df->getNegativePrefix(oldPrefix);
2590 df->setNegativePrefix(SUPPRESS_NEGATIVE_PREFIX);
2591 }
46f4442e 2592 int32_t oldPos = pos.getIndex();
b75a7d8f
A
2593 fNumberFormat->parse(text, number, pos);
2594 if (df != NULL) {
2595 df->setNegativePrefix(oldPrefix);
2596 }
46f4442e
A
2597
2598 if (maxDigits > 0) {
2599 // adjust the result to fit into
2600 // the maxDigits and move the position back
2601 int32_t nDigits = pos.getIndex() - oldPos;
2602 if (nDigits > maxDigits) {
2603 int32_t val = number.getLong();
2604 nDigits -= maxDigits;
2605 while (nDigits > 0) {
2606 val /= 10;
2607 nDigits--;
2608 }
2609 pos.setIndex(oldPos + maxDigits);
2610 number.setLong(val);
2611 }
2612 }
b75a7d8f
A
2613}
2614
2615//----------------------------------------------------------------------
2616
2617void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
2618 UnicodeString& translatedPattern,
2619 const UnicodeString& from,
2620 const UnicodeString& to,
2621 UErrorCode& status)
2622{
2623 // run through the pattern and convert any pattern symbols from the version
2624 // in "from" to the corresponding character ion "to". This code takes
2625 // quoted strings into account (it doesn't try to translate them), and it signals
2626 // an error if a particular "pattern character" doesn't appear in "from".
2627 // Depending on the values of "from" and "to" this can convert from generic
2628 // to localized patterns or localized to generic.
2629 if (U_FAILURE(status))
2630 return;
2631
2632 translatedPattern.remove();
2633 UBool inQuote = FALSE;
2634 for (int32_t i = 0; i < originalPattern.length(); ++i) {
2635 UChar c = originalPattern[i];
2636 if (inQuote) {
2637 if (c == QUOTE)
2638 inQuote = FALSE;
2639 }
2640 else {
2641 if (c == QUOTE)
2642 inQuote = TRUE;
2643 else if ((c >= 0x0061 /*'a'*/ && c <= 0x007A) /*'z'*/
2644 || (c >= 0x0041 /*'A'*/ && c <= 0x005A /*'Z'*/)) {
2645 int32_t ci = from.indexOf(c);
2646 if (ci == -1) {
2647 status = U_INVALID_FORMAT_ERROR;
2648 return;
2649 }
2650 c = to[ci];
2651 }
2652 }
2653 translatedPattern += c;
2654 }
2655 if (inQuote) {
2656 status = U_INVALID_FORMAT_ERROR;
2657 return;
2658 }
2659}
2660
2661//----------------------------------------------------------------------
2662
2663UnicodeString&
2664SimpleDateFormat::toPattern(UnicodeString& result) const
2665{
2666 result = fPattern;
2667 return result;
2668}
2669
2670//----------------------------------------------------------------------
2671
2672UnicodeString&
2673SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
2674 UErrorCode& status) const
2675{
2676 translatePattern(fPattern, result, DateFormatSymbols::getPatternUChars(), fSymbols->fLocalPatternChars, status);
2677 return result;
2678}
2679
2680//----------------------------------------------------------------------
2681
2682void
2683SimpleDateFormat::applyPattern(const UnicodeString& pattern)
2684{
2685 fPattern = pattern;
2686}
2687
2688//----------------------------------------------------------------------
2689
2690void
2691SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
2692 UErrorCode &status)
2693{
2694 translatePattern(pattern, fPattern, fSymbols->fLocalPatternChars, DateFormatSymbols::getPatternUChars(), status);
2695}
2696
2697//----------------------------------------------------------------------
2698
2699const DateFormatSymbols*
2700SimpleDateFormat::getDateFormatSymbols() const
2701{
2702 return fSymbols;
2703}
2704
2705//----------------------------------------------------------------------
2706
2707void
2708SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
2709{
2710 delete fSymbols;
2711 fSymbols = newFormatSymbols;
2712}
2713
2714//----------------------------------------------------------------------
2715void
2716SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
2717{
2718 delete fSymbols;
2719 fSymbols = new DateFormatSymbols(newFormatSymbols);
2720}
2721
2722
2723//----------------------------------------------------------------------
2724
2725
2726void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
2727{
2728 UErrorCode status = U_ZERO_ERROR;
2729 DateFormat::adoptCalendar(calendarToAdopt);
2730 delete fSymbols;
2731 fSymbols=NULL;
2732 initializeSymbols(fLocale, fCalendar, status); // we need new symbols
2733 initializeDefaultCentury(); // we need a new century (possibly)
2734}
2735
46f4442e
A
2736
2737//----------------------------------------------------------------------
2738
2739
2740UBool
2741SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
2742 return isFieldUnitIgnored(fPattern, field);
2743}
2744
2745
2746UBool
2747SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
2748 UCalendarDateFields field) {
2749 int32_t fieldLevel = fgCalendarFieldToLevel[field];
2750 int32_t level;
2751 UChar ch;
2752 UBool inQuote = FALSE;
2753 UChar prevCh = 0;
2754 int32_t count = 0;
2755
2756 for (int32_t i = 0; i < pattern.length(); ++i) {
2757 ch = pattern[i];
2758 if (ch != prevCh && count > 0) {
2759 level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
2760 // the larger the level, the smaller the field unit.
2761 if ( fieldLevel <= level ) {
2762 return FALSE;
2763 }
2764 count = 0;
2765 }
2766 if (ch == QUOTE) {
2767 if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
2768 ++i;
2769 } else {
2770 inQuote = ! inQuote;
2771 }
2772 }
2773 else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
2774 || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
2775 prevCh = ch;
2776 ++count;
2777 }
2778 }
2779 if ( count > 0 ) {
2780 // last item
2781 level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
2782 if ( fieldLevel <= level ) {
2783 return FALSE;
2784 }
2785 }
2786 return TRUE;
2787}
2788
2789
2790
2791const Locale&
2792SimpleDateFormat::getSmpFmtLocale(void) const {
2793 return fLocale;
2794}
2795
2796
2797
b75a7d8f
A
2798U_NAMESPACE_END
2799
2800#endif /* #if !UCONFIG_NO_FORMATTING */
2801
2802//eof