]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/smpdtfmt.cpp
7f98827fa245326d9fdd6766042cae3506efda0c
[apple/icu.git] / icuSources / i18n / smpdtfmt.cpp
1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2009, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 *
7 * File SMPDTFMT.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/31/97 aliu Modified extensively to work with 50 locales.
14 * 04/01/97 aliu Added support for centuries.
15 * 07/09/97 helena Made ParsePosition into a class.
16 * 07/21/98 stephen Added initializeDefaultCentury.
17 * Removed getZoneIndex (added in DateFormatSymbols)
18 * Removed subParseLong
19 * Removed chk
20 * 02/22/99 stephen Removed character literals for EBCDIC safety
21 * 10/14/99 aliu Updated 2-digit year parsing so that only "00" thru
22 * "99" are recognized. {j28 4182066}
23 * 11/15/99 weiv Added support for week of year/day of week format
24 ********************************************************************************
25 */
26
27 #define ZID_KEY_MAX 128
28
29 #include "unicode/utypes.h"
30
31 #if !UCONFIG_NO_FORMATTING
32
33 #include "unicode/smpdtfmt.h"
34 #include "unicode/dtfmtsym.h"
35 #include "unicode/ures.h"
36 #include "unicode/msgfmt.h"
37 #include "unicode/calendar.h"
38 #include "unicode/gregocal.h"
39 #include "unicode/timezone.h"
40 #include "unicode/decimfmt.h"
41 #include "unicode/dcfmtsym.h"
42 #include "unicode/uchar.h"
43 #include "unicode/uniset.h"
44 #include "unicode/ustring.h"
45 #include "unicode/basictz.h"
46 #include "unicode/simpletz.h"
47 #include "unicode/rbtz.h"
48 #include "unicode/vtzone.h"
49 #include "olsontz.h"
50 #include "util.h"
51 #include "gregoimp.h"
52 #include "cstring.h"
53 #include "uassert.h"
54 #include "zstrfmt.h"
55 #include "cmemory.h"
56 #include "umutex.h"
57 #include "smpdtfst.h"
58 #include <float.h>
59
60 #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL)
61 #include <stdio.h>
62 #endif
63
64 // *****************************************************************************
65 // class SimpleDateFormat
66 // *****************************************************************************
67
68 U_NAMESPACE_BEGIN
69
70 static const UChar PATTERN_CHAR_BASE = 0x40;
71
72 /**
73 * Last-resort string to use for "GMT" when constructing time zone strings.
74 */
75 // For time zones that have no names, use strings GMT+minutes and
76 // GMT-minutes. For instance, in France the time zone is GMT+60.
77 // Also accepted are GMT+H:MM or GMT-H:MM.
78 static const UChar gGmt[] = {0x0047, 0x004D, 0x0054, 0x0000}; // "GMT"
79 static const UChar gGmtPlus[] = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+"
80 static const UChar gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-"
81 static const UChar gDefGmtPat[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */
82 static const UChar gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */
83 static const UChar gDefGmtNegHmPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */
84 static const UChar gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */
85 static const UChar gDefGmtPosHmPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */
86 typedef enum GmtPatSize {
87 kGmtLen = 3,
88 kGmtPatLen = 6,
89 kNegHmsLen = 9,
90 kNegHmLen = 6,
91 kPosHmsLen = 9,
92 kPosHmLen = 6
93 } GmtPatSize;
94
95 // This is a pattern-of-last-resort used when we can't load a usable pattern out
96 // of a resource.
97 static const UChar gDefaultPattern[] =
98 {
99 0x79, 0x79, 0x79, 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0
100 }; /* "yyyyMMdd hh:mm a" */
101
102 // This prefix is designed to NEVER MATCH real text, in order to
103 // suppress the parsing of negative numbers. Adjust as needed (if
104 // this becomes valid Unicode).
105 static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0};
106
107 /**
108 * These are the tags we expect to see in normal resource bundle files associated
109 * with a locale.
110 */
111 static const char gDateTimePatternsTag[]="DateTimePatterns";
112
113 static const UChar gEtcUTC[] = {0x45, 0x74, 0x63, 0x2F, 0x55, 0x54, 0x43, 0x00}; // "Etc/UTC"
114 static const UChar QUOTE = 0x27; // Single quote
115 enum {
116 kGMTNegativeHMS = 0,
117 kGMTNegativeHM,
118 kGMTPositiveHMS,
119 kGMTPositiveHM,
120
121 kNumGMTFormatters
122 };
123
124 /*
125 * The field range check bias for each UDateFormatField.
126 * The bias is added to the minimum and maximum values
127 * before they are compared to the parsed number.
128 * For example, the calendar stores zero-based month numbers
129 * but the parsed month numbers start at 1, so the bias is 1.
130 *
131 * A value of -1 means that the value is not checked.
132 */
133 static const int32_t gFieldRangeBias[] = {
134 -1, // 'G' - UDAT_ERA_FIELD
135 -1, // 'y' - UDAT_YEAR_FIELD
136 1, // 'M' - UDAT_MONTH_FIELD
137 0, // 'd' - UDAT_DATE_FIELD
138 -1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD
139 -1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD
140 0, // 'm' - UDAT_MINUTE_FIELD
141 0, // 's' - UDAT_SEOND_FIELD
142 -1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?)
143 -1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?)
144 -1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?)
145 -1, // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?)
146 -1, // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?)
147 -1, // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?)
148 -1, // 'a' - UDAT_AM_PM_FIELD
149 -1, // 'h' - UDAT_HOUR1_FIELD
150 -1, // 'K' - UDAT_HOUR0_FIELD
151 -1, // 'z' - UDAT_TIMEZONE_FIELD
152 -1, // 'Y' - UDAT_YEAR_WOY_FIELD
153 -1, // 'e' - UDAT_DOW_LOCAL_FIELD
154 -1, // 'u' - UDAT_EXTENDED_YEAR_FIELD
155 -1, // 'g' - UDAT_JULIAN_DAY_FIELD
156 -1, // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD
157 -1, // 'Z' - UDAT_TIMEZONE_RFC_FIELD
158 -1, // 'v' - UDAT_TIMEZONE_GENERIC_FIELD
159 0, // 'c' - UDAT_STANDALONE_DAY_FIELD
160 1, // 'L' - UDAT_STANDALONE_MONTH_FIELD
161 -1, // 'Q' - UDAT_QUARTER_FIELD (1-4?)
162 -1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD
163 -1 // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD
164 };
165
166 static UMTX LOCK;
167
168 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat)
169
170 //----------------------------------------------------------------------
171
172 SimpleDateFormat::~SimpleDateFormat()
173 {
174 delete fSymbols;
175 if (fGMTFormatters) {
176 for (int32_t i = 0; i < kNumGMTFormatters; i++) {
177 if (fGMTFormatters[i]) {
178 delete fGMTFormatters[i];
179 }
180 }
181 uprv_free(fGMTFormatters);
182 }
183 }
184
185 //----------------------------------------------------------------------
186
187 SimpleDateFormat::SimpleDateFormat(UErrorCode& status)
188 : fLocale(Locale::getDefault()),
189 fSymbols(NULL),
190 fGMTFormatters(NULL)
191 {
192 construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status);
193 initializeDefaultCentury();
194 }
195
196 //----------------------------------------------------------------------
197
198 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
199 UErrorCode &status)
200 : fPattern(pattern),
201 fLocale(Locale::getDefault()),
202 fSymbols(NULL),
203 fGMTFormatters(NULL)
204 {
205 initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
206 initialize(fLocale, status);
207 initializeDefaultCentury();
208 }
209
210 //----------------------------------------------------------------------
211
212 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
213 const Locale& locale,
214 UErrorCode& status)
215 : fPattern(pattern),
216 fLocale(locale),
217 fGMTFormatters(NULL)
218 {
219 initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status);
220 initialize(fLocale, status);
221 initializeDefaultCentury();
222 }
223
224 //----------------------------------------------------------------------
225
226 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
227 DateFormatSymbols* symbolsToAdopt,
228 UErrorCode& status)
229 : fPattern(pattern),
230 fLocale(Locale::getDefault()),
231 fSymbols(symbolsToAdopt),
232 fGMTFormatters(NULL)
233 {
234 initializeCalendar(NULL,fLocale,status);
235 initialize(fLocale, status);
236 initializeDefaultCentury();
237 }
238
239 //----------------------------------------------------------------------
240
241 SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern,
242 const DateFormatSymbols& symbols,
243 UErrorCode& status)
244 : fPattern(pattern),
245 fLocale(Locale::getDefault()),
246 fSymbols(new DateFormatSymbols(symbols)),
247 fGMTFormatters(NULL)
248 {
249 initializeCalendar(NULL, fLocale, status);
250 initialize(fLocale, status);
251 initializeDefaultCentury();
252 }
253
254 //----------------------------------------------------------------------
255
256 // Not for public consumption; used by DateFormat
257 SimpleDateFormat::SimpleDateFormat(EStyle timeStyle,
258 EStyle dateStyle,
259 const Locale& locale,
260 UErrorCode& status)
261 : fLocale(locale),
262 fSymbols(NULL),
263 fGMTFormatters(NULL)
264 {
265 construct(timeStyle, dateStyle, fLocale, status);
266 if(U_SUCCESS(status)) {
267 initializeDefaultCentury();
268 }
269 }
270
271 //----------------------------------------------------------------------
272
273 /**
274 * Not for public consumption; used by DateFormat. This constructor
275 * never fails. If the resource data is not available, it uses the
276 * the last resort symbols.
277 */
278 SimpleDateFormat::SimpleDateFormat(const Locale& locale,
279 UErrorCode& status)
280 : fPattern(gDefaultPattern),
281 fLocale(locale),
282 fSymbols(NULL),
283 fGMTFormatters(NULL)
284 {
285 if (U_FAILURE(status)) return;
286 initializeSymbols(fLocale, initializeCalendar(NULL, fLocale, status),status);
287 if (U_FAILURE(status))
288 {
289 status = U_ZERO_ERROR;
290 delete fSymbols;
291 // This constructor doesn't fail; it uses last resort data
292 fSymbols = new DateFormatSymbols(status);
293 /* test for NULL */
294 if (fSymbols == 0) {
295 status = U_MEMORY_ALLOCATION_ERROR;
296 return;
297 }
298 }
299
300 initialize(fLocale, status);
301 if(U_SUCCESS(status)) {
302 initializeDefaultCentury();
303 }
304 }
305
306 //----------------------------------------------------------------------
307
308 SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other)
309 : DateFormat(other),
310 fSymbols(NULL),
311 fGMTFormatters(NULL)
312 {
313 *this = other;
314 }
315
316 //----------------------------------------------------------------------
317
318 SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other)
319 {
320 if (this == &other) {
321 return *this;
322 }
323 DateFormat::operator=(other);
324
325 delete fSymbols;
326 fSymbols = NULL;
327
328 if (other.fSymbols)
329 fSymbols = new DateFormatSymbols(*other.fSymbols);
330
331 fDefaultCenturyStart = other.fDefaultCenturyStart;
332 fDefaultCenturyStartYear = other.fDefaultCenturyStartYear;
333 fHaveDefaultCentury = other.fHaveDefaultCentury;
334
335 fPattern = other.fPattern;
336
337 return *this;
338 }
339
340 //----------------------------------------------------------------------
341
342 Format*
343 SimpleDateFormat::clone() const
344 {
345 return new SimpleDateFormat(*this);
346 }
347
348 //----------------------------------------------------------------------
349
350 UBool
351 SimpleDateFormat::operator==(const Format& other) const
352 {
353 if (DateFormat::operator==(other)) {
354 // DateFormat::operator== guarantees following cast is safe
355 SimpleDateFormat* that = (SimpleDateFormat*)&other;
356 return (fPattern == that->fPattern &&
357 fSymbols != NULL && // Check for pathological object
358 that->fSymbols != NULL && // Check for pathological object
359 *fSymbols == *that->fSymbols &&
360 fHaveDefaultCentury == that->fHaveDefaultCentury &&
361 fDefaultCenturyStart == that->fDefaultCenturyStart);
362 }
363 return FALSE;
364 }
365
366 //----------------------------------------------------------------------
367
368 void SimpleDateFormat::construct(EStyle timeStyle,
369 EStyle dateStyle,
370 const Locale& locale,
371 UErrorCode& status)
372 {
373 // called by several constructors to load pattern data from the resources
374 if (U_FAILURE(status)) return;
375
376 // We will need the calendar to know what type of symbols to load.
377 initializeCalendar(NULL, locale, status);
378 if (U_FAILURE(status)) return;
379
380 CalendarData calData(locale, fCalendar?fCalendar->getType():NULL, status);
381 UResourceBundle *dateTimePatterns = calData.getByKey(gDateTimePatternsTag, status);
382 if (U_FAILURE(status)) return;
383
384 if (ures_getSize(dateTimePatterns) <= kDateTime)
385 {
386 status = U_INVALID_FORMAT_ERROR;
387 return;
388 }
389
390 setLocaleIDs(ures_getLocaleByType(dateTimePatterns, ULOC_VALID_LOCALE, &status),
391 ures_getLocaleByType(dateTimePatterns, ULOC_ACTUAL_LOCALE, &status));
392
393 // create a symbols object from the locale
394 initializeSymbols(locale,fCalendar, status);
395 if (U_FAILURE(status)) return;
396 /* test for NULL */
397 if (fSymbols == 0) {
398 status = U_MEMORY_ALLOCATION_ERROR;
399 return;
400 }
401
402 const UChar *resStr;
403 int32_t resStrLen = 0;
404
405 // if the pattern should include both date and time information, use the date/time
406 // pattern string as a guide to tell use how to glue together the appropriate date
407 // and time pattern strings. The actual gluing-together is handled by a convenience
408 // method on MessageFormat.
409 if ((timeStyle != kNone) && (dateStyle != kNone))
410 {
411 Formattable timeDateArray[2];
412
413 // use Formattable::adoptString() so that we can use fastCopyFrom()
414 // instead of Formattable::setString()'s unaware, safe, deep string clone
415 // see Jitterbug 2296
416 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status);
417 UnicodeString *tempus1 = new UnicodeString(TRUE, resStr, resStrLen);
418 // NULL pointer check
419 if (tempus1 == NULL) {
420 status = U_MEMORY_ALLOCATION_ERROR;
421 return;
422 }
423 timeDateArray[0].adoptString(tempus1);
424
425 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status);
426 UnicodeString *tempus2 = new UnicodeString(TRUE, resStr, resStrLen);
427 // Null pointer check
428 if (tempus2 == NULL) {
429 status = U_MEMORY_ALLOCATION_ERROR;
430 return;
431 }
432 timeDateArray[1].adoptString(tempus2);
433
434 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)kDateTime, &resStrLen, &status);
435 MessageFormat::format(UnicodeString(TRUE, resStr, resStrLen), timeDateArray, 2, fPattern, status);
436 }
437 // if the pattern includes just time data or just date date, load the appropriate
438 // pattern string from the resources
439 // setTo() - see DateFormatSymbols::assignArray comments
440 else if (timeStyle != kNone) {
441 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status);
442 fPattern.setTo(TRUE, resStr, resStrLen);
443 }
444 else if (dateStyle != kNone) {
445 resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status);
446 fPattern.setTo(TRUE, resStr, resStrLen);
447 }
448
449 // and if it includes _neither_, that's an error
450 else
451 status = U_INVALID_FORMAT_ERROR;
452
453 // finally, finish initializing by creating a Calendar and a NumberFormat
454 initialize(locale, status);
455 }
456
457 //----------------------------------------------------------------------
458
459 Calendar*
460 SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status)
461 {
462 if(!U_FAILURE(status)) {
463 fCalendar = Calendar::createInstance(adoptZone?adoptZone:TimeZone::createDefault(), locale, status);
464 }
465 if (U_SUCCESS(status) && fCalendar == NULL) {
466 status = U_MEMORY_ALLOCATION_ERROR;
467 }
468 return fCalendar;
469 }
470
471 void
472 SimpleDateFormat::initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status)
473 {
474 if(U_FAILURE(status)) {
475 fSymbols = NULL;
476 } else {
477 // pass in calendar type - use NULL (default) if no calendar set (or err).
478 fSymbols = new DateFormatSymbols(locale, calendar?calendar->getType() :NULL , status);
479 // Null pointer check
480 if (fSymbols == NULL) {
481 status = U_MEMORY_ALLOCATION_ERROR;
482 return;
483 }
484 }
485 }
486
487 void
488 SimpleDateFormat::initialize(const Locale& locale,
489 UErrorCode& status)
490 {
491 if (U_FAILURE(status)) return;
492
493 // We don't need to check that the row count is >= 1, since all 2d arrays have at
494 // least one row
495 fNumberFormat = NumberFormat::createInstance(locale, status);
496 if (fNumberFormat != NULL && U_SUCCESS(status))
497 {
498 // no matter what the locale's default number format looked like, we want
499 // to modify it so that it doesn't use thousands separators, doesn't always
500 // show the decimal point, and recognizes integers only when parsing
501
502 fNumberFormat->setGroupingUsed(FALSE);
503 if (fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID())
504 ((DecimalFormat*)fNumberFormat)->setDecimalSeparatorAlwaysShown(FALSE);
505 fNumberFormat->setParseIntegerOnly(TRUE);
506 fNumberFormat->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00"
507
508 // TODO: Really, the default should be lenient...
509 fNumberFormat->setParseStrict(FALSE);
510 }
511 else if (U_SUCCESS(status))
512 {
513 status = U_MISSING_RESOURCE_ERROR;
514 }
515 }
516
517 /* Initialize the fields we use to disambiguate ambiguous years. Separate
518 * so we can call it from readObject().
519 */
520 void SimpleDateFormat::initializeDefaultCentury()
521 {
522 if(fCalendar) {
523 fHaveDefaultCentury = fCalendar->haveDefaultCentury();
524 if(fHaveDefaultCentury) {
525 fDefaultCenturyStart = fCalendar->defaultCenturyStart();
526 fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear();
527 } else {
528 fDefaultCenturyStart = DBL_MIN;
529 fDefaultCenturyStartYear = -1;
530 }
531 }
532 }
533
534 /* Define one-century window into which to disambiguate dates using
535 * two-digit years. Make public in JDK 1.2.
536 */
537 void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status)
538 {
539 if(U_FAILURE(status)) {
540 return;
541 }
542 if(!fCalendar) {
543 status = U_ILLEGAL_ARGUMENT_ERROR;
544 return;
545 }
546
547 fCalendar->setTime(startDate, status);
548 if(U_SUCCESS(status)) {
549 fHaveDefaultCentury = TRUE;
550 fDefaultCenturyStart = startDate;
551 fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status);
552 }
553 }
554
555 //----------------------------------------------------------------------
556
557 UnicodeString&
558 SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const
559 {
560 UErrorCode status = U_ZERO_ERROR;
561 pos.setBeginIndex(0);
562 pos.setEndIndex(0);
563
564 UBool inQuote = FALSE;
565 UChar prevCh = 0;
566 int32_t count = 0;
567
568 // loop through the pattern string character by character
569 for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) {
570 UChar ch = fPattern[i];
571
572 // Use subFormat() to format a repeated pattern character
573 // when a different pattern or non-pattern character is seen
574 if (ch != prevCh && count > 0) {
575 subFormat(appendTo, prevCh, count, pos, cal, status);
576 count = 0;
577 }
578 if (ch == QUOTE) {
579 // Consecutive single quotes are a single quote literal,
580 // either outside of quotes or between quotes
581 if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) {
582 appendTo += (UChar)QUOTE;
583 ++i;
584 } else {
585 inQuote = ! inQuote;
586 }
587 }
588 else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
589 || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
590 // ch is a date-time pattern character to be interpreted
591 // by subFormat(); count the number of times it is repeated
592 prevCh = ch;
593 ++count;
594 }
595 else {
596 // Append quoted characters and unquoted non-pattern characters
597 appendTo += ch;
598 }
599 }
600
601 // Format the last item in the pattern, if any
602 if (count > 0) {
603 subFormat(appendTo, prevCh, count, pos, cal, status);
604 }
605
606 // and if something failed (e.g., an invalid format character), reset our FieldPosition
607 // to (0, 0) to show that
608 // {sfb} look at this later- are these being set correctly?
609 if (U_FAILURE(status)) {
610 pos.setBeginIndex(0);
611 pos.setEndIndex(0);
612 }
613
614 return appendTo;
615 }
616
617 UnicodeString&
618 SimpleDateFormat::format(const Formattable& obj,
619 UnicodeString& appendTo,
620 FieldPosition& pos,
621 UErrorCode& status) const
622 {
623 // this is just here to get around the hiding problem
624 // (the previous format() override would hide the version of
625 // format() on DateFormat that this function correspond to, so we
626 // have to redefine it here)
627 return DateFormat::format(obj, appendTo, pos, status);
628 }
629
630 //----------------------------------------------------------------------
631
632 /* Map calendar field into calendar field level.
633 * the larger the level, the smaller the field unit.
634 * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10,
635 * UCAL_MONTH level is 20.
636 * NOTE: if new fields adds in, the table needs to update.
637 */
638 const int32_t
639 SimpleDateFormat::fgCalendarFieldToLevel[] =
640 {
641 /*GyM*/ 0, 10, 20,
642 /*wW*/ 20, 30,
643 /*dDEF*/ 30, 20, 30, 30,
644 /*ahHm*/ 40, 50, 50, 60,
645 /*sS..*/ 70, 80,
646 /*z?Y*/ 0, 0, 10,
647 /*eug*/ 30, 10, 0,
648 /*A*/ 40
649 };
650
651
652 /* Map calendar field LETTER into calendar field level.
653 * the larger the level, the smaller the field unit.
654 * NOTE: if new fields adds in, the table needs to update.
655 */
656 const int32_t
657 SimpleDateFormat::fgPatternCharToLevel[] = {
658 // A B C D E F G H I J K L M N O
659 -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, -1,
660 // P Q R S T U V W X Y Z
661 -1, 20, -1, 80, -1, -1, 0, 30, -1, 10, 0, -1, -1, -1, -1, -1,
662 // a b c d e f g h i j k l m n o
663 -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, -1, 60, -1, -1,
664 // p q r s t u v w x y z
665 -1, 20, -1, 70, -1, 10, 0, 20, -1, 10, 0, -1, -1, -1, -1, -1
666 };
667
668
669 // Map index into pattern character string to Calendar field number.
670 const UCalendarDateFields
671 SimpleDateFormat::fgPatternIndexToCalendarField[] =
672 {
673 /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH,
674 /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY,
675 /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND,
676 /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH,
677 /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM,
678 /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET,
679 /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR,
680 /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET,
681 /*v*/ UCAL_ZONE_OFFSET,
682 /*c*/ UCAL_DOW_LOCAL,
683 /*L*/ UCAL_MONTH,
684 /*Q*/ UCAL_MONTH,
685 /*q*/ UCAL_MONTH,
686 /*V*/ UCAL_ZONE_OFFSET,
687 };
688
689 // Map index into pattern character string to DateFormat field number
690 const UDateFormatField
691 SimpleDateFormat::fgPatternIndexToDateFormatField[] = {
692 /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD,
693 /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD,
694 /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD,
695 /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD,
696 /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD,
697 /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD,
698 /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD,
699 /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD,
700 /*v*/ UDAT_TIMEZONE_GENERIC_FIELD,
701 /*c*/ UDAT_STANDALONE_DAY_FIELD,
702 /*L*/ UDAT_STANDALONE_MONTH_FIELD,
703 /*Q*/ UDAT_QUARTER_FIELD,
704 /*q*/ UDAT_STANDALONE_QUARTER_FIELD,
705 /*V*/ UDAT_TIMEZONE_SPECIAL_FIELD,
706 };
707
708 //----------------------------------------------------------------------
709
710 /**
711 * Append symbols[value] to dst. Make sure the array index is not out
712 * of bounds.
713 */
714 static inline void
715 _appendSymbol(UnicodeString& dst,
716 int32_t value,
717 const UnicodeString* symbols,
718 int32_t symbolsCount) {
719 U_ASSERT(0 <= value && value < symbolsCount);
720 if (0 <= value && value < symbolsCount) {
721 dst += symbols[value];
722 }
723 }
724
725 //---------------------------------------------------------------------
726 void
727 SimpleDateFormat::appendGMT(UnicodeString &appendTo, Calendar& cal, UErrorCode& status) const{
728 int32_t offset = cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status);
729 if (U_FAILURE(status)) {
730 return;
731 }
732 if (isDefaultGMTFormat()) {
733 formatGMTDefault(appendTo, offset);
734 } else {
735 ((SimpleDateFormat*)this)->initGMTFormatters(status);
736 if (U_SUCCESS(status)) {
737 int32_t type;
738 if (offset < 0) {
739 offset = -offset;
740 type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTNegativeHM : kGMTNegativeHMS;
741 } else {
742 type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTPositiveHM : kGMTPositiveHMS;
743 }
744 Formattable param(offset, Formattable::kIsDate);
745 FieldPosition fpos(0);
746 fGMTFormatters[type]->format(&param, 1, appendTo, fpos, status);
747 }
748 }
749 }
750
751 int32_t
752 SimpleDateFormat::parseGMT(const UnicodeString &text, ParsePosition &pos) const {
753 if (!isDefaultGMTFormat()) {
754 int32_t start = pos.getIndex();
755
756 // Quick check
757 UBool prefixMatch = FALSE;
758 int32_t prefixLen = fSymbols->fGmtFormat.indexOf((UChar)0x007B /* '{' */);
759 if (prefixLen > 0 && text.compare(start, prefixLen, fSymbols->fGmtFormat, 0, prefixLen) == 0) {
760 prefixMatch = TRUE;
761 }
762 if (prefixMatch) {
763 // Prefix matched
764 UErrorCode status = U_ZERO_ERROR;
765 ((SimpleDateFormat*)this)->initGMTFormatters(status);
766 if (U_SUCCESS(status)) {
767 Formattable parsed;
768 int32_t parsedCount;
769
770 // Try negative Hms
771 fGMTFormatters[kGMTNegativeHMS]->parseObject(text, parsed, pos);
772 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
773 parsed.getArray(parsedCount);
774 if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
775 return (int32_t)(-1 * (int64_t)parsed[0].getDate());
776 }
777 }
778
779 // Reset ParsePosition
780 pos.setIndex(start);
781 pos.setErrorIndex(-1);
782
783 // Try positive Hms
784 fGMTFormatters[kGMTPositiveHMS]->parseObject(text, parsed, pos);
785 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
786 parsed.getArray(parsedCount);
787 if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
788 return (int32_t)((int64_t)parsed[0].getDate());
789 }
790 }
791
792 // Reset ParsePosition
793 pos.setIndex(start);
794 pos.setErrorIndex(-1);
795
796 // Try negative Hm
797 fGMTFormatters[kGMTNegativeHM]->parseObject(text, parsed, pos);
798 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
799 parsed.getArray(parsedCount);
800 if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
801 return (int32_t)(-1 * (int64_t)parsed[0].getDate());
802 }
803 }
804
805 // Reset ParsePosition
806 pos.setIndex(start);
807 pos.setErrorIndex(-1);
808
809 // Try positive Hm
810 fGMTFormatters[kGMTPositiveHM]->parseObject(text, parsed, pos);
811 if (pos.getErrorIndex() == -1 && pos.getIndex() > start) {
812 parsed.getArray(parsedCount);
813 if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) {
814 return (int32_t)((int64_t)parsed[0].getDate());
815 }
816 }
817
818 // Reset ParsePosition
819 pos.setIndex(start);
820 pos.setErrorIndex(-1);
821 }
822 // fall through to the default GMT parsing method
823 }
824 }
825 return parseGMTDefault(text, pos);
826 }
827
828 void
829 SimpleDateFormat::formatGMTDefault(UnicodeString &appendTo, int32_t offset) const {
830 if (offset < 0) {
831 appendTo += gGmtMinus;
832 offset = -offset; // suppress the '-' sign for text display.
833 }else{
834 appendTo += gGmtPlus;
835 }
836
837 offset /= U_MILLIS_PER_SECOND; // now in seconds
838 int32_t sec = offset % 60;
839 offset /= 60;
840 int32_t min = offset % 60;
841 int32_t hour = offset / 60;
842
843
844 zeroPaddingNumber(appendTo, hour, 2, 2);
845 appendTo += (UChar)0x003A /*':'*/;
846 zeroPaddingNumber(appendTo, min, 2, 2);
847 if (sec != 0) {
848 appendTo += (UChar)0x003A /*':'*/;
849 zeroPaddingNumber(appendTo, sec, 2, 2);
850 }
851 }
852
853 int32_t
854 SimpleDateFormat::parseGMTDefault(const UnicodeString &text, ParsePosition &pos) const {
855 int32_t start = pos.getIndex();
856
857 if (start + kGmtLen + 1 >= text.length()) {
858 pos.setErrorIndex(start);
859 return 0;
860 }
861
862 int32_t cur = start;
863 // "GMT"
864 if (text.compare(start, kGmtLen, gGmt) != 0) {
865 pos.setErrorIndex(start);
866 return 0;
867 }
868 cur += kGmtLen;
869 // Sign
870 UBool negative = FALSE;
871 if (text.charAt(cur) == (UChar)0x002D /* minus */) {
872 negative = TRUE;
873 } else if (text.charAt(cur) != (UChar)0x002B /* plus */) {
874 pos.setErrorIndex(cur);
875 return 0;
876 }
877 cur++;
878
879 // Numbers
880 int32_t numLen;
881 pos.setIndex(cur);
882
883 Formattable number;
884 parseInt(text, number, 6, pos, FALSE);
885 numLen = pos.getIndex() - cur;
886
887 if (numLen <= 0) {
888 pos.setIndex(start);
889 pos.setErrorIndex(cur);
890 return 0;
891 }
892
893 int32_t numVal = number.getLong();
894
895 int32_t hour = 0;
896 int32_t min = 0;
897 int32_t sec = 0;
898
899 if (numLen <= 2) {
900 // H[H][:mm[:ss]]
901 hour = numVal;
902 cur += numLen;
903 if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) {
904 cur++;
905 pos.setIndex(cur);
906 parseInt(text, number, 2, pos, FALSE);
907 numLen = pos.getIndex() - cur;
908 if (numLen == 2) {
909 // got minute field
910 min = number.getLong();
911 cur += numLen;
912 if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) {
913 cur++;
914 pos.setIndex(cur);
915 parseInt(text, number, 2, pos, FALSE);
916 numLen = pos.getIndex() - cur;
917 if (numLen == 2) {
918 // got second field
919 sec = number.getLong();
920 } else {
921 // reset position
922 pos.setIndex(cur - 1);
923 pos.setErrorIndex(-1);
924 }
925 }
926 } else {
927 // reset postion
928 pos.setIndex(cur - 1);
929 pos.setErrorIndex(-1);
930 }
931 }
932 } else if (numLen == 3 || numLen == 4) {
933 // Hmm or HHmm
934 hour = numVal / 100;
935 min = numVal % 100;
936 } else if (numLen == 5 || numLen == 6) {
937 // Hmmss or HHmmss
938 hour = numVal / 10000;
939 min = (numVal % 10000) / 100;
940 sec = numVal % 100;
941 } else {
942 // HHmmss followed by bogus numbers
943 pos.setIndex(cur + 6);
944
945 int32_t shift = numLen - 6;
946 while (shift > 0) {
947 numVal /= 10;
948 shift--;
949 }
950 hour = numVal / 10000;
951 min = (numVal % 10000) / 100;
952 sec = numVal % 100;
953 }
954
955 int32_t offset = ((hour*60 + min)*60 + sec)*1000;
956 if (negative) {
957 offset = -offset;
958 }
959 return offset;
960 }
961
962 UBool
963 SimpleDateFormat::isDefaultGMTFormat() const {
964 // GMT pattern
965 if (fSymbols->fGmtFormat.length() == 0) {
966 // No GMT pattern is set
967 return TRUE;
968 } else if (fSymbols->fGmtFormat.compare(gDefGmtPat, kGmtPatLen) != 0) {
969 return FALSE;
970 }
971 // Hour patterns
972 if (fSymbols->fGmtHourFormats == NULL || fSymbols->fGmtHourFormatsCount != DateFormatSymbols::GMT_HOUR_COUNT) {
973 // No Hour pattern is set
974 return TRUE;
975 } else if ((fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS].compare(gDefGmtNegHmsPat, kNegHmsLen) != 0)
976 || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM].compare(gDefGmtNegHmPat, kNegHmLen) != 0)
977 || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS].compare(gDefGmtPosHmsPat, kPosHmsLen) != 0)
978 || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM].compare(gDefGmtPosHmPat, kPosHmLen) != 0)) {
979 return FALSE;
980 }
981 return TRUE;
982 }
983
984 void
985 SimpleDateFormat::formatRFC822TZ(UnicodeString &appendTo, int32_t offset) const {
986 UChar sign = 0x002B /* '+' */;
987 if (offset < 0) {
988 offset = -offset;
989 sign = 0x002D /* '-' */;
990 }
991 appendTo.append(sign);
992
993 int32_t offsetH = offset / U_MILLIS_PER_HOUR;
994 offset = offset % U_MILLIS_PER_HOUR;
995 int32_t offsetM = offset / U_MILLIS_PER_MINUTE;
996 offset = offset % U_MILLIS_PER_MINUTE;
997 int32_t offsetS = offset / U_MILLIS_PER_SECOND;
998
999 int32_t num = 0, denom = 0;
1000 if (offsetS == 0) {
1001 offset = offsetH*100 + offsetM; // HHmm
1002 num = offset % 10000;
1003 denom = 1000;
1004 } else {
1005 offset = offsetH*10000 + offsetM*100 + offsetS; // HHmmss
1006 num = offset % 1000000;
1007 denom = 100000;
1008 }
1009 while (denom >= 1) {
1010 UChar digit = (UChar)0x0030 + (num / denom);
1011 appendTo.append(digit);
1012 num = num % denom;
1013 denom /= 10;
1014 }
1015 }
1016
1017 void
1018 SimpleDateFormat::initGMTFormatters(UErrorCode &status) {
1019 if (U_FAILURE(status)) {
1020 return;
1021 }
1022 umtx_lock(&LOCK);
1023 if (fGMTFormatters == NULL) {
1024 fGMTFormatters = (MessageFormat**)uprv_malloc(kNumGMTFormatters * sizeof(MessageFormat*));
1025 if (fGMTFormatters) {
1026 for (int32_t i = 0; i < kNumGMTFormatters; i++) {
1027 const UnicodeString *hourPattern = NULL; //initialized it to avoid warning
1028 switch (i) {
1029 case kGMTNegativeHMS:
1030 hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS]);
1031 break;
1032 case kGMTNegativeHM:
1033 hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM]);
1034 break;
1035 case kGMTPositiveHMS:
1036 hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS]);
1037 break;
1038 case kGMTPositiveHM:
1039 hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM]);
1040 break;
1041 }
1042 fGMTFormatters[i] = new MessageFormat(fSymbols->fGmtFormat, status);
1043 if (U_FAILURE(status)) {
1044 break;
1045 }
1046 SimpleDateFormat *sdf = (SimpleDateFormat*)this->clone();
1047 sdf->adoptTimeZone(TimeZone::createTimeZone(UnicodeString(gEtcUTC)));
1048 sdf->applyPattern(*hourPattern);
1049 fGMTFormatters[i]->adoptFormat(0, sdf);
1050 }
1051 } else {
1052 status = U_MEMORY_ALLOCATION_ERROR;
1053 }
1054 }
1055 umtx_unlock(&LOCK);
1056 }
1057
1058 //---------------------------------------------------------------------
1059 void
1060 SimpleDateFormat::subFormat(UnicodeString &appendTo,
1061 UChar ch,
1062 int32_t count,
1063 FieldPosition& pos,
1064 Calendar& cal,
1065 UErrorCode& status) const
1066 {
1067 if (U_FAILURE(status)) {
1068 return;
1069 }
1070
1071 // this function gets called by format() to produce the appropriate substitution
1072 // text for an individual pattern symbol (e.g., "HH" or "yyyy")
1073
1074 UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
1075 UDateFormatField patternCharIndex;
1076 const int32_t maxIntCount = 10;
1077 int32_t beginOffset = appendTo.length();
1078
1079 // if the pattern character is unrecognized, signal an error and dump out
1080 if (patternCharPtr == NULL)
1081 {
1082 status = U_INVALID_FORMAT_ERROR;
1083 return;
1084 }
1085
1086 patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
1087 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
1088 int32_t value = cal.get(field, status);
1089 if (U_FAILURE(status)) {
1090 return;
1091 }
1092
1093 switch (patternCharIndex) {
1094
1095 // for any "G" symbol, write out the appropriate era string
1096 // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name
1097 case UDAT_ERA_FIELD:
1098 if (count == 5)
1099 _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount);
1100 else if (count == 4)
1101 _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount);
1102 else
1103 _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount);
1104 break;
1105
1106 // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits
1107 // NEW: UTS#35:
1108 //Year y yy yyy yyyy yyyyy
1109 //AD 1 1 01 001 0001 00001
1110 //AD 12 12 12 012 0012 00012
1111 //AD 123 123 23 123 0123 00123
1112 //AD 1234 1234 34 1234 1234 01234
1113 //AD 12345 12345 45 12345 12345 12345
1114 case UDAT_YEAR_FIELD:
1115 case UDAT_YEAR_WOY_FIELD:
1116 if(count == 2)
1117 zeroPaddingNumber(appendTo, value, 2, 2);
1118 else
1119 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1120 break;
1121
1122 // for "MMMM", write out the whole month name, for "MMM", write out the month
1123 // abbreviation, for "M" or "MM", write out the month as a number with the
1124 // appropriate number of digits
1125 // for "MMMMM", use the narrow form
1126 case UDAT_MONTH_FIELD:
1127 if (count == 5)
1128 _appendSymbol(appendTo, value, fSymbols->fNarrowMonths,
1129 fSymbols->fNarrowMonthsCount);
1130 else if (count == 4)
1131 _appendSymbol(appendTo, value, fSymbols->fMonths,
1132 fSymbols->fMonthsCount);
1133 else if (count == 3)
1134 _appendSymbol(appendTo, value, fSymbols->fShortMonths,
1135 fSymbols->fShortMonthsCount);
1136 else
1137 zeroPaddingNumber(appendTo, value + 1, count, maxIntCount);
1138 break;
1139
1140 // for "LLLL", write out the whole month name, for "LLL", write out the month
1141 // abbreviation, for "L" or "LL", write out the month as a number with the
1142 // appropriate number of digits
1143 // for "LLLLL", use the narrow form
1144 case UDAT_STANDALONE_MONTH_FIELD:
1145 if (count == 5)
1146 _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowMonths,
1147 fSymbols->fStandaloneNarrowMonthsCount);
1148 else if (count == 4)
1149 _appendSymbol(appendTo, value, fSymbols->fStandaloneMonths,
1150 fSymbols->fStandaloneMonthsCount);
1151 else if (count == 3)
1152 _appendSymbol(appendTo, value, fSymbols->fStandaloneShortMonths,
1153 fSymbols->fStandaloneShortMonthsCount);
1154 else
1155 zeroPaddingNumber(appendTo, value + 1, count, maxIntCount);
1156 break;
1157
1158 // for "k" and "kk", write out the hour, adjusting midnight to appear as "24"
1159 case UDAT_HOUR_OF_DAY1_FIELD:
1160 if (value == 0)
1161 zeroPaddingNumber(appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount);
1162 else
1163 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1164 break;
1165
1166 case UDAT_FRACTIONAL_SECOND_FIELD:
1167 // Fractional seconds left-justify
1168 {
1169 fNumberFormat->setMinimumIntegerDigits((count > 3) ? 3 : count);
1170 fNumberFormat->setMaximumIntegerDigits(maxIntCount);
1171 if (count == 1) {
1172 value = (value + 50) / 100;
1173 } else if (count == 2) {
1174 value = (value + 5) / 10;
1175 }
1176 FieldPosition p(0);
1177 fNumberFormat->format(value, appendTo, p);
1178 if (count > 3) {
1179 fNumberFormat->setMinimumIntegerDigits(count - 3);
1180 fNumberFormat->format((int32_t)0, appendTo, p);
1181 }
1182 }
1183 break;
1184
1185 // for "ee" or "e", use local numeric day-of-the-week
1186 // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name
1187 // for "EEEE" or "eeee", write out the wide day-of-the-week name
1188 // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name
1189 case UDAT_DOW_LOCAL_FIELD:
1190 if ( count < 3 ) {
1191 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1192 break;
1193 }
1194 // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week,
1195 // we want standard day-of-week, so first fix value to work for EEEEE-EEE.
1196 value = cal.get(UCAL_DAY_OF_WEEK, status);
1197 if (U_FAILURE(status)) {
1198 return;
1199 }
1200 // fall through, do not break here
1201 case UDAT_DAY_OF_WEEK_FIELD:
1202 if (count == 5)
1203 _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays,
1204 fSymbols->fNarrowWeekdaysCount);
1205 else if (count == 4)
1206 _appendSymbol(appendTo, value, fSymbols->fWeekdays,
1207 fSymbols->fWeekdaysCount);
1208 else
1209 _appendSymbol(appendTo, value, fSymbols->fShortWeekdays,
1210 fSymbols->fShortWeekdaysCount);
1211 break;
1212
1213 // for "ccc", write out the abbreviated day-of-the-week name
1214 // for "cccc", write out the wide day-of-the-week name
1215 // for "ccccc", use the narrow day-of-the-week name
1216 case UDAT_STANDALONE_DAY_FIELD:
1217 if ( count < 3 ) {
1218 zeroPaddingNumber(appendTo, value, 1, maxIntCount);
1219 break;
1220 }
1221 // fall through to alpha DOW handling, but for that we don't want local day-of-week,
1222 // we want standard day-of-week, so first fix value.
1223 value = cal.get(UCAL_DAY_OF_WEEK, status);
1224 if (U_FAILURE(status)) {
1225 return;
1226 }
1227 if (count == 5)
1228 _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays,
1229 fSymbols->fStandaloneNarrowWeekdaysCount);
1230 else if (count == 4)
1231 _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays,
1232 fSymbols->fStandaloneWeekdaysCount);
1233 else // count == 3
1234 _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays,
1235 fSymbols->fStandaloneShortWeekdaysCount);
1236 break;
1237
1238 // for and "a" symbol, write out the whole AM/PM string
1239 case UDAT_AM_PM_FIELD:
1240 _appendSymbol(appendTo, value, fSymbols->fAmPms,
1241 fSymbols->fAmPmsCount);
1242 break;
1243
1244 // for "h" and "hh", write out the hour, adjusting noon and midnight to show up
1245 // as "12"
1246 case UDAT_HOUR1_FIELD:
1247 if (value == 0)
1248 zeroPaddingNumber(appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount);
1249 else
1250 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1251 break;
1252
1253 // for the "z" symbols, we have to check our time zone data first. If we have a
1254 // localized name for the time zone, then "zzzz" / "zzz" indicate whether
1255 // daylight time is in effect (long/short) and "zz" / "z" do not (long/short).
1256 // If we don't have a localized time zone name,
1257 // then the time zone shows up as "GMT+hh:mm" or "GMT-hh:mm" (where "hh:mm" is the
1258 // offset from GMT) regardless of how many z's were in the pattern symbol
1259 case UDAT_TIMEZONE_FIELD:
1260 case UDAT_TIMEZONE_GENERIC_FIELD:
1261 case UDAT_TIMEZONE_SPECIAL_FIELD:
1262 {
1263 UnicodeString zoneString;
1264 const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat();
1265 if (zsf) {
1266 if (patternCharIndex == UDAT_TIMEZONE_FIELD) {
1267 if (count < 4) {
1268 // "z", "zz", "zzz"
1269 zsf->getSpecificShortString(cal, TRUE /*commonly used only*/,
1270 zoneString, status);
1271 } else {
1272 // "zzzz"
1273 zsf->getSpecificLongString(cal, zoneString, status);
1274 }
1275 } else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) {
1276 if (count == 1) {
1277 // "v"
1278 zsf->getGenericShortString(cal, TRUE /*commonly used only*/,
1279 zoneString, status);
1280 } else if (count == 4) {
1281 // "vvvv"
1282 zsf->getGenericLongString(cal, zoneString, status);
1283 }
1284 } else { // patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD
1285 if (count == 1) {
1286 // "V"
1287 zsf->getSpecificShortString(cal, FALSE /*ignore commonly used*/,
1288 zoneString, status);
1289 } else if (count == 4) {
1290 // "VVVV"
1291 zsf->getGenericLocationString(cal, zoneString, status);
1292 }
1293 }
1294 }
1295 if (zoneString.isEmpty()) {
1296 appendGMT(appendTo, cal, status);
1297 } else {
1298 appendTo += zoneString;
1299 }
1300 }
1301 break;
1302
1303 case UDAT_TIMEZONE_RFC_FIELD: // 'Z' - TIMEZONE_RFC
1304 if (count < 4) {
1305 // RFC822 format, must use ASCII digits
1306 value = (cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status));
1307 formatRFC822TZ(appendTo, value);
1308 } else {
1309 // long form, localized GMT pattern
1310 appendGMT(appendTo, cal, status);
1311 }
1312 break;
1313
1314 case UDAT_QUARTER_FIELD:
1315 if (count >= 4)
1316 _appendSymbol(appendTo, value/3, fSymbols->fQuarters,
1317 fSymbols->fQuartersCount);
1318 else if (count == 3)
1319 _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters,
1320 fSymbols->fShortQuartersCount);
1321 else
1322 zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount);
1323 break;
1324
1325 case UDAT_STANDALONE_QUARTER_FIELD:
1326 if (count >= 4)
1327 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters,
1328 fSymbols->fStandaloneQuartersCount);
1329 else if (count == 3)
1330 _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters,
1331 fSymbols->fStandaloneShortQuartersCount);
1332 else
1333 zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount);
1334 break;
1335
1336
1337 // all of the other pattern symbols can be formatted as simple numbers with
1338 // appropriate zero padding
1339 default:
1340 zeroPaddingNumber(appendTo, value, count, maxIntCount);
1341 break;
1342 }
1343
1344 // if the field we're formatting is the one the FieldPosition says it's interested
1345 // in, fill in the FieldPosition with this field's positions
1346 if (pos.getBeginIndex() == pos.getEndIndex() &&
1347 pos.getField() == fgPatternIndexToDateFormatField[patternCharIndex]) {
1348 pos.setBeginIndex(beginOffset);
1349 pos.setEndIndex(appendTo.length());
1350 }
1351 }
1352
1353 //----------------------------------------------------------------------
1354 void
1355 SimpleDateFormat::zeroPaddingNumber(UnicodeString &appendTo, int32_t value, int32_t minDigits, int32_t maxDigits) const
1356 {
1357 if (fNumberFormat!=NULL) {
1358 FieldPosition pos(0);
1359
1360 fNumberFormat->setMinimumIntegerDigits(minDigits);
1361 fNumberFormat->setMaximumIntegerDigits(maxDigits);
1362 fNumberFormat->format(value, appendTo, pos); // 3rd arg is there to speed up processing
1363 }
1364 }
1365
1366 //----------------------------------------------------------------------
1367
1368 /**
1369 * Format characters that indicate numeric fields. The character
1370 * at index 0 is treated specially.
1371 */
1372 static const UChar NUMERIC_FORMAT_CHARS[] = {0x4D, 0x59, 0x79, 0x75, 0x64, 0x65, 0x68, 0x48, 0x6D, 0x73, 0x53, 0x44, 0x46, 0x77, 0x57, 0x6B, 0x4B, 0x00}; /* "MYyudehHmsSDFwWkK" */
1373
1374 /**
1375 * Return true if the given format character, occuring count
1376 * times, represents a numeric field.
1377 */
1378 UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) {
1379 UnicodeString s(NUMERIC_FORMAT_CHARS);
1380 int32_t i = s.indexOf(formatChar);
1381 return (i > 0 || (i == 0 && count < 3));
1382 }
1383
1384 void
1385 SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const
1386 {
1387 UErrorCode status = U_ZERO_ERROR;
1388 int32_t pos = parsePos.getIndex();
1389 int32_t start = pos;
1390 UBool ambiguousYear[] = { FALSE };
1391 int32_t count = 0;
1392
1393 UBool lenient = isLenient();
1394
1395 // hack, reset tztype, cast away const
1396 ((SimpleDateFormat*)this)->tztype = TZTYPE_UNK;
1397
1398 // For parsing abutting numeric fields. 'abutPat' is the
1399 // offset into 'pattern' of the first of 2 or more abutting
1400 // numeric fields. 'abutStart' is the offset into 'text'
1401 // where parsing the fields begins. 'abutPass' starts off as 0
1402 // and increments each time we try to parse the fields.
1403 int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields
1404 int32_t abutStart = 0;
1405 int32_t abutPass = 0;
1406 UBool inQuote = FALSE;
1407
1408 const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS);
1409
1410 for (int32_t i=0; i<fPattern.length(); ++i) {
1411 UChar ch = fPattern.charAt(i);
1412
1413 // Handle alphabetic field characters.
1414 if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // [A-Za-z]
1415 int32_t fieldPat = i;
1416
1417 // Count the length of this field specifier
1418 count = 1;
1419 while ((i+1)<fPattern.length() &&
1420 fPattern.charAt(i+1) == ch) {
1421 ++count;
1422 ++i;
1423 }
1424
1425 if (isNumeric(ch, count)) {
1426 if (abutPat < 0) {
1427 // Determine if there is an abutting numeric field. For
1428 // most fields we can just look at the next characters,
1429 // but the 'm' field is either numeric or text,
1430 // depending on the count, so we have to look ahead for
1431 // that field.
1432 if ((i+1)<fPattern.length()) {
1433 UBool abutting;
1434 UChar nextCh = fPattern.charAt(i+1);
1435 int32_t k = numericFormatChars.indexOf(nextCh);
1436 if (k == 0) {
1437 int32_t j = i+2;
1438 while (j<fPattern.length() &&
1439 fPattern.charAt(j) == nextCh) {
1440 ++j;
1441 }
1442 abutting = (j-i) < 4; // nextCount < 3
1443 } else {
1444 abutting = k > 0;
1445 }
1446
1447 // Record the start of a set of abutting numeric
1448 // fields.
1449 if (abutting) {
1450 abutPat = fieldPat;
1451 abutStart = pos;
1452 abutPass = 0;
1453 }
1454 }
1455 }
1456 } else {
1457 abutPat = -1; // End of any abutting fields
1458 }
1459
1460 // Handle fields within a run of abutting numeric fields. Take
1461 // the pattern "HHmmss" as an example. We will try to parse
1462 // 2/2/2 characters of the input text, then if that fails,
1463 // 1/2/2. We only adjust the width of the leftmost field; the
1464 // others remain fixed. This allows "123456" => 12:34:56, but
1465 // "12345" => 1:23:45. Likewise, for the pattern "yyyyMMdd" we
1466 // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
1467 if (abutPat >= 0) {
1468 // If we are at the start of a run of abutting fields, then
1469 // shorten this field in each pass. If we can't shorten
1470 // this field any more, then the parse of this set of
1471 // abutting numeric fields has failed.
1472 if (fieldPat == abutPat) {
1473 count -= abutPass++;
1474 if (count == 0) {
1475 parsePos.setIndex(start);
1476 parsePos.setErrorIndex(pos);
1477 return;
1478 }
1479 }
1480
1481 pos = subParse(text, pos, ch, count,
1482 TRUE, FALSE, ambiguousYear, cal);
1483
1484 // If the parse fails anywhere in the run, back up to the
1485 // start of the run and retry.
1486 if (pos < 0) {
1487 i = abutPat - 1;
1488 pos = abutStart;
1489 continue;
1490 }
1491 }
1492
1493 // Handle non-numeric fields and non-abutting numeric
1494 // fields.
1495 else {
1496 int32_t s = subParse(text, pos, ch, count,
1497 FALSE, TRUE, ambiguousYear, cal);
1498
1499 if (s <= 0) {
1500 status = U_PARSE_ERROR;
1501 goto ExitParse;
1502 }
1503 pos = s;
1504 }
1505 }
1506
1507 // Handle literal pattern characters. These are any
1508 // quoted characters and non-alphabetic unquoted
1509 // characters.
1510 else {
1511
1512 abutPat = -1; // End of any abutting fields
1513
1514 if (! matchLiterals(fPattern, i, text, pos, lenient)) {
1515 status = U_PARSE_ERROR;
1516 goto ExitParse;
1517 }
1518 }
1519 }
1520
1521 // At this point the fields of Calendar have been set. Calendar
1522 // will fill in default values for missing fields when the time
1523 // is computed.
1524
1525 parsePos.setIndex(pos);
1526
1527 // This part is a problem: When we call parsedDate.after, we compute the time.
1528 // Take the date April 3 2004 at 2:30 am. When this is first set up, the year
1529 // will be wrong if we're parsing a 2-digit year pattern. It will be 1904.
1530 // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day. 2:30 am
1531 // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am
1532 // on that day. It is therefore parsed out to fields as 3:30 am. Then we
1533 // add 100 years, and get April 3 2004 at 3:30 am. Note that April 3 2004 is
1534 // a Saturday, so it can have a 2:30 am -- and it should. [LIU]
1535 /*
1536 UDate parsedDate = calendar.getTime();
1537 if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) {
1538 calendar.add(Calendar.YEAR, 100);
1539 parsedDate = calendar.getTime();
1540 }
1541 */
1542 // Because of the above condition, save off the fields in case we need to readjust.
1543 // The procedure we use here is not particularly efficient, but there is no other
1544 // way to do this given the API restrictions present in Calendar. We minimize
1545 // inefficiency by only performing this computation when it might apply, that is,
1546 // when the two-digit year is equal to the start year, and thus might fall at the
1547 // front or the back of the default century. This only works because we adjust
1548 // the year correctly to start with in other cases -- see subParse().
1549 if (ambiguousYear[0] || tztype != TZTYPE_UNK) // If this is true then the two-digit year == the default start year
1550 {
1551 // We need a copy of the fields, and we need to avoid triggering a call to
1552 // complete(), which will recalculate the fields. Since we can't access
1553 // the fields[] array in Calendar, we clone the entire object. This will
1554 // stop working if Calendar.clone() is ever rewritten to call complete().
1555 Calendar *copy;
1556 if (ambiguousYear[0]) {
1557 copy = cal.clone();
1558 // Check for failed cloning.
1559 if (copy == NULL) {
1560 status = U_MEMORY_ALLOCATION_ERROR;
1561 goto ExitParse;
1562 }
1563 UDate parsedDate = copy->getTime(status);
1564 // {sfb} check internalGetDefaultCenturyStart
1565 if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) {
1566 // We can't use add here because that does a complete() first.
1567 cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100);
1568 }
1569 delete copy;
1570 }
1571
1572 if (tztype != TZTYPE_UNK) {
1573 copy = cal.clone();
1574 // Check for failed cloning.
1575 if (copy == NULL) {
1576 status = U_MEMORY_ALLOCATION_ERROR;
1577 goto ExitParse;
1578 }
1579 const TimeZone & tz = cal.getTimeZone();
1580 BasicTimeZone *btz = NULL;
1581
1582 if (tz.getDynamicClassID() == OlsonTimeZone::getStaticClassID()
1583 || tz.getDynamicClassID() == SimpleTimeZone::getStaticClassID()
1584 || tz.getDynamicClassID() == RuleBasedTimeZone::getStaticClassID()
1585 || tz.getDynamicClassID() == VTimeZone::getStaticClassID()) {
1586 btz = (BasicTimeZone*)&tz;
1587 }
1588
1589 // Get local millis
1590 copy->set(UCAL_ZONE_OFFSET, 0);
1591 copy->set(UCAL_DST_OFFSET, 0);
1592 UDate localMillis = copy->getTime(status);
1593
1594 // Make sure parsed time zone type (Standard or Daylight)
1595 // matches the rule used by the parsed time zone.
1596 int32_t raw, dst;
1597 if (btz != NULL) {
1598 if (tztype == TZTYPE_STD) {
1599 btz->getOffsetFromLocal(localMillis,
1600 BasicTimeZone::kStandard, BasicTimeZone::kStandard, raw, dst, status);
1601 } else {
1602 btz->getOffsetFromLocal(localMillis,
1603 BasicTimeZone::kDaylight, BasicTimeZone::kDaylight, raw, dst, status);
1604 }
1605 } else {
1606 // No good way to resolve ambiguous time at transition,
1607 // but following code work in most case.
1608 tz.getOffset(localMillis, TRUE, raw, dst, status);
1609 }
1610
1611 // Now, compare the results with parsed type, either standard or daylight saving time
1612 int32_t resolvedSavings = dst;
1613 if (tztype == TZTYPE_STD) {
1614 if (dst != 0) {
1615 // Override DST_OFFSET = 0 in the result calendar
1616 resolvedSavings = 0;
1617 }
1618 } else { // tztype == TZTYPE_DST
1619 if (dst == 0) {
1620 if (btz != NULL) {
1621 UDate time = localMillis + raw;
1622 // We use the nearest daylight saving time rule.
1623 TimeZoneTransition beforeTrs, afterTrs;
1624 UDate beforeT = time, afterT = time;
1625 int32_t beforeSav = 0, afterSav = 0;
1626 UBool beforeTrsAvail, afterTrsAvail;
1627
1628 // Search for DST rule before or on the time
1629 while (TRUE) {
1630 beforeTrsAvail = btz->getPreviousTransition(beforeT, TRUE, beforeTrs);
1631 if (!beforeTrsAvail) {
1632 break;
1633 }
1634 beforeT = beforeTrs.getTime() - 1;
1635 beforeSav = beforeTrs.getFrom()->getDSTSavings();
1636 if (beforeSav != 0) {
1637 break;
1638 }
1639 }
1640
1641 // Search for DST rule after the time
1642 while (TRUE) {
1643 afterTrsAvail = btz->getNextTransition(afterT, FALSE, afterTrs);
1644 if (!afterTrsAvail) {
1645 break;
1646 }
1647 afterT = afterTrs.getTime();
1648 afterSav = afterTrs.getTo()->getDSTSavings();
1649 if (afterSav != 0) {
1650 break;
1651 }
1652 }
1653
1654 if (beforeTrsAvail && afterTrsAvail) {
1655 if (time - beforeT > afterT - time) {
1656 resolvedSavings = afterSav;
1657 } else {
1658 resolvedSavings = beforeSav;
1659 }
1660 } else if (beforeTrsAvail && beforeSav != 0) {
1661 resolvedSavings = beforeSav;
1662 } else if (afterTrsAvail && afterSav != 0) {
1663 resolvedSavings = afterSav;
1664 } else {
1665 resolvedSavings = btz->getDSTSavings();
1666 }
1667 } else {
1668 resolvedSavings = tz.getDSTSavings();
1669 }
1670 if (resolvedSavings == 0) {
1671 // final fallback
1672 resolvedSavings = U_MILLIS_PER_HOUR;
1673 }
1674 }
1675 }
1676 cal.set(UCAL_ZONE_OFFSET, raw);
1677 cal.set(UCAL_DST_OFFSET, resolvedSavings);
1678 delete copy;
1679 }
1680 }
1681 ExitParse:
1682 // If any Calendar calls failed, we pretend that we
1683 // couldn't parse the string, when in reality this isn't quite accurate--
1684 // we did parse it; the Calendar calls just failed.
1685 if (U_FAILURE(status)) {
1686 parsePos.setErrorIndex(pos);
1687 parsePos.setIndex(start);
1688 }
1689 }
1690
1691 UDate
1692 SimpleDateFormat::parse( const UnicodeString& text,
1693 ParsePosition& pos) const {
1694 // redefined here because the other parse() function hides this function's
1695 // cunterpart on DateFormat
1696 return DateFormat::parse(text, pos);
1697 }
1698
1699 UDate
1700 SimpleDateFormat::parse(const UnicodeString& text, UErrorCode& status) const
1701 {
1702 // redefined here because the other parse() function hides this function's
1703 // counterpart on DateFormat
1704 return DateFormat::parse(text, status);
1705 }
1706 //----------------------------------------------------------------------
1707
1708 int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text,
1709 int32_t start,
1710 UCalendarDateFields field,
1711 const UnicodeString* data,
1712 int32_t dataCount,
1713 Calendar& cal) const
1714 {
1715 int32_t i = 0;
1716 int32_t count = dataCount;
1717
1718 // There may be multiple strings in the data[] array which begin with
1719 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
1720 // We keep track of the longest match, and return that. Note that this
1721 // unfortunately requires us to test all array elements.
1722 int32_t bestMatchLength = 0, bestMatch = -1;
1723
1724 // {sfb} kludge to support case-insensitive comparison
1725 // {markus 2002oct11} do not just use caseCompareBetween because we do not know
1726 // the length of the match after case folding
1727 // {alan 20040607} don't case change the whole string, since the length
1728 // can change
1729 // TODO we need a case-insensitive startsWith function
1730 UnicodeString lcase, lcaseText;
1731 text.extract(start, INT32_MAX, lcaseText);
1732 lcaseText.foldCase();
1733
1734 for (; i < count; ++i)
1735 {
1736 // Always compare if we have no match yet; otherwise only compare
1737 // against potentially better matches (longer strings).
1738
1739 lcase.fastCopyFrom(data[i]).foldCase();
1740 int32_t length = lcase.length();
1741
1742 if (length > bestMatchLength &&
1743 lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
1744 {
1745 bestMatch = i;
1746 bestMatchLength = length;
1747 }
1748 }
1749 if (bestMatch >= 0)
1750 {
1751 cal.set(field, bestMatch * 3);
1752
1753 // Once we have a match, we have to determine the length of the
1754 // original source string. This will usually be == the length of
1755 // the case folded string, but it may differ (e.g. sharp s).
1756 lcase.fastCopyFrom(data[bestMatch]).foldCase();
1757
1758 // Most of the time, the length will be the same as the length
1759 // of the string from the locale data. Sometimes it will be
1760 // different, in which case we will have to figure it out by
1761 // adding a character at a time, until we have a match. We do
1762 // this all in one loop, where we try 'len' first (at index
1763 // i==0).
1764 int32_t len = data[bestMatch].length(); // 99+% of the time
1765 int32_t n = text.length() - start;
1766 for (i=0; i<=n; ++i) {
1767 int32_t j=i;
1768 if (i == 0) {
1769 j = len;
1770 } else if (i == len) {
1771 continue; // already tried this when i was 0
1772 }
1773 text.extract(start, j, lcaseText);
1774 lcaseText.foldCase();
1775 if (lcase == lcaseText) {
1776 return start + j;
1777 }
1778 }
1779 }
1780
1781 return -start;
1782 }
1783
1784 //----------------------------------------------------------------------
1785 UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern,
1786 int32_t &patternOffset,
1787 const UnicodeString &text,
1788 int32_t &textOffset,
1789 UBool lenient)
1790 {
1791 UBool inQuote = FALSE;
1792 UnicodeString literal;
1793 int32_t i = patternOffset;
1794
1795 // scan pattern looking for contiguous literal characters
1796 for ( ; i < pattern.length(); i += 1) {
1797 UChar ch = pattern.charAt(i);
1798
1799 if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // unquoted [A-Za-z]
1800 break;
1801 }
1802
1803 if (ch == QUOTE) {
1804 // Match a quote literal ('') inside OR outside of quotes
1805 if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) {
1806 i += 1;
1807 } else {
1808 inQuote = !inQuote;
1809 continue;
1810 }
1811 }
1812
1813 literal += ch;
1814 }
1815
1816 // at this point, literal contains the literal text
1817 // and i is the index of the next non-literal pattern character.
1818 int32_t p;
1819 int32_t t = textOffset;
1820
1821 if (lenient) {
1822 // trim leading, trailing whitespace from
1823 // the literal text
1824 literal.trim();
1825
1826 // ignore any leading whitespace in the text
1827 while (t < text.length() && u_isWhitespace(text.charAt(t))) {
1828 t += 1;
1829 }
1830 }
1831
1832 for (p = 0; p < literal.length() && t < text.length(); p += 1, t += 1) {
1833 UBool needWhitespace = FALSE;
1834
1835 while (p < literal.length() && uprv_isRuleWhiteSpace(literal.charAt(p))) {
1836 needWhitespace = TRUE;
1837 p += 1;
1838 }
1839
1840 if (needWhitespace) {
1841 int32_t tStart = t;
1842
1843 while (t < text.length()) {
1844 UChar tch = text.charAt(t);
1845
1846 if (!u_isUWhiteSpace(tch) && !uprv_isRuleWhiteSpace(tch)) {
1847 break;
1848 }
1849
1850 t += 1;
1851 }
1852
1853 // TODO: should we require internal spaces
1854 // in lenient mode? (There won't be any
1855 // leading or trailing spaces)
1856 if (!lenient && t == tStart) {
1857 // didn't find matching whitespace:
1858 // an error in strict mode
1859 return FALSE;
1860 }
1861
1862 // In strict mode, this run of whitespace
1863 // may have been at the end.
1864 if (p >= literal.length()) {
1865 break;
1866 }
1867 }
1868
1869 if (t >= text.length() || literal.charAt(p) != text.charAt(t)) {
1870 // Ran out of text, or found a non-matching character:
1871 // OK in lenient mode, an error in strict mode.
1872 if (lenient) {
1873 break;
1874 }
1875
1876 return FALSE;
1877 }
1878 }
1879
1880 // At this point if we're in strict mode we have a complete match.
1881 // If we're in lenient mode we may have a partial match, or no
1882 // match at all.
1883 if (p <= 0) {
1884 // no match. Pretend it matched a run of whitespace
1885 // and ignorables in the text.
1886 const UnicodeSet *ignorables = NULL;
1887 UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), pattern.charAt(i));
1888
1889 if (patternCharPtr != NULL) {
1890 UDateFormatField patternCharIndex = (UDateFormatField) (patternCharPtr - DateFormatSymbols::getPatternUChars());
1891
1892 ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex);
1893 }
1894
1895 for (t = textOffset; t < text.length(); t += 1) {
1896 UChar ch = text.charAt(t);
1897
1898 if (ignorables == NULL || !ignorables->contains(ch)) {
1899 break;
1900 }
1901 }
1902 }
1903
1904 // if we get here, we've got a complete match.
1905 patternOffset = i - 1;
1906 textOffset = t;
1907
1908 return TRUE;
1909 }
1910
1911 //----------------------------------------------------------------------
1912
1913 int32_t SimpleDateFormat::matchString(const UnicodeString& text,
1914 int32_t start,
1915 UCalendarDateFields field,
1916 const UnicodeString* data,
1917 int32_t dataCount,
1918 Calendar& cal) const
1919 {
1920 int32_t i = 0;
1921 int32_t count = dataCount;
1922
1923 if (field == UCAL_DAY_OF_WEEK) i = 1;
1924
1925 // There may be multiple strings in the data[] array which begin with
1926 // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech).
1927 // We keep track of the longest match, and return that. Note that this
1928 // unfortunately requires us to test all array elements.
1929 int32_t bestMatchLength = 0, bestMatch = -1;
1930
1931 // {sfb} kludge to support case-insensitive comparison
1932 // {markus 2002oct11} do not just use caseCompareBetween because we do not know
1933 // the length of the match after case folding
1934 // {alan 20040607} don't case change the whole string, since the length
1935 // can change
1936 // TODO we need a case-insensitive startsWith function
1937 UnicodeString lcase, lcaseText;
1938 text.extract(start, INT32_MAX, lcaseText);
1939 lcaseText.foldCase();
1940
1941 for (; i < count; ++i)
1942 {
1943 // Always compare if we have no match yet; otherwise only compare
1944 // against potentially better matches (longer strings).
1945
1946 lcase.fastCopyFrom(data[i]).foldCase();
1947 int32_t length = lcase.length();
1948
1949 if (length > bestMatchLength &&
1950 lcaseText.compareBetween(0, length, lcase, 0, length) == 0)
1951 {
1952 bestMatch = i;
1953 bestMatchLength = length;
1954 }
1955 }
1956 if (bestMatch >= 0)
1957 {
1958 cal.set(field, bestMatch);
1959
1960 // Once we have a match, we have to determine the length of the
1961 // original source string. This will usually be == the length of
1962 // the case folded string, but it may differ (e.g. sharp s).
1963 lcase.fastCopyFrom(data[bestMatch]).foldCase();
1964
1965 // Most of the time, the length will be the same as the length
1966 // of the string from the locale data. Sometimes it will be
1967 // different, in which case we will have to figure it out by
1968 // adding a character at a time, until we have a match. We do
1969 // this all in one loop, where we try 'len' first (at index
1970 // i==0).
1971 int32_t len = data[bestMatch].length(); // 99+% of the time
1972 int32_t n = text.length() - start;
1973 for (i=0; i<=n; ++i) {
1974 int32_t j=i;
1975 if (i == 0) {
1976 j = len;
1977 } else if (i == len) {
1978 continue; // already tried this when i was 0
1979 }
1980 text.extract(start, j, lcaseText);
1981 lcaseText.foldCase();
1982 if (lcase == lcaseText) {
1983 return start + j;
1984 }
1985 }
1986 }
1987
1988 return -start;
1989 }
1990
1991 //----------------------------------------------------------------------
1992
1993 void
1994 SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status)
1995 {
1996 parseAmbiguousDatesAsAfter(d, status);
1997 }
1998
1999 /**
2000 * Private member function that converts the parsed date strings into
2001 * timeFields. Returns -start (for ParsePosition) if failed.
2002 * @param text the time text to be parsed.
2003 * @param start where to start parsing.
2004 * @param ch the pattern character for the date field text to be parsed.
2005 * @param count the count of a pattern character.
2006 * @return the new start position if matching succeeded; a negative number
2007 * indicating matching failure, otherwise.
2008 */
2009 int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count,
2010 UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const
2011 {
2012 Formattable number;
2013 int32_t value = 0;
2014 int32_t i;
2015 ParsePosition pos(0);
2016 int32_t patternCharIndex;
2017 UnicodeString temp;
2018 UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch);
2019 UBool lenient = isLenient();
2020 UBool gotNumber = FALSE;
2021
2022 #if defined (U_DEBUG_CAL)
2023 //fprintf(stderr, "%s:%d - [%c] st=%d \n", __FILE__, __LINE__, (char) ch, start);
2024 #endif
2025
2026 if (patternCharPtr == NULL) {
2027 return -start;
2028 }
2029
2030 patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars());
2031
2032 UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex];
2033
2034 // If there are any spaces here, skip over them. If we hit the end
2035 // of the string, then fail.
2036 for (;;) {
2037 if (start >= text.length()) {
2038 return -start;
2039 }
2040
2041 UChar32 c = text.char32At(start);
2042
2043 if (!u_isUWhiteSpace(c)) {
2044 break;
2045 }
2046
2047 start += UTF_CHAR_LENGTH(c);
2048 }
2049
2050 pos.setIndex(start);
2051
2052 // We handle a few special cases here where we need to parse
2053 // a number value. We handle further, more generic cases below. We need
2054 // to handle some of them here because some fields require extra processing on
2055 // the parsed value.
2056 if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD ||
2057 patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD ||
2058 patternCharIndex == UDAT_HOUR1_FIELD ||
2059 patternCharIndex == UDAT_HOUR0_FIELD ||
2060 patternCharIndex == UDAT_DOW_LOCAL_FIELD ||
2061 patternCharIndex == UDAT_STANDALONE_DAY_FIELD ||
2062 patternCharIndex == UDAT_MONTH_FIELD ||
2063 patternCharIndex == UDAT_STANDALONE_MONTH_FIELD ||
2064 patternCharIndex == UDAT_QUARTER_FIELD ||
2065 patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD ||
2066 patternCharIndex == UDAT_YEAR_FIELD ||
2067 patternCharIndex == UDAT_YEAR_WOY_FIELD ||
2068 patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD)
2069 {
2070 int32_t parseStart = pos.getIndex();
2071 // It would be good to unify this with the obeyCount logic below,
2072 // but that's going to be difficult.
2073 const UnicodeString* src;
2074
2075 if (obeyCount) {
2076 if ((start+count) > text.length()) {
2077 return -start;
2078 }
2079
2080 text.extractBetween(0, start + count, temp);
2081 src = &temp;
2082 } else {
2083 src = &text;
2084 }
2085
2086 parseInt(*src, number, pos, allowNegative);
2087
2088 if (pos.getIndex() > parseStart) {
2089 value = number.getLong();
2090 gotNumber = TRUE;
2091
2092 // Check the range of the value
2093 int32_t bias = gFieldRangeBias[patternCharIndex];
2094
2095 if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) {
2096 return -start;
2097 }
2098 }
2099
2100 }
2101
2102 // Make sure that we got a number if
2103 // we want one, and didn't get one
2104 // if we don't want one.
2105 switch (patternCharIndex) {
2106 case UDAT_HOUR_OF_DAY1_FIELD:
2107 case UDAT_HOUR_OF_DAY0_FIELD:
2108 case UDAT_HOUR1_FIELD:
2109 case UDAT_HOUR0_FIELD:
2110 // special range check for hours:
2111 if (value < 0 || value > 24) {
2112 return -start;
2113 }
2114
2115 // fall through to gotNumber check
2116
2117 case UDAT_YEAR_FIELD:
2118 case UDAT_YEAR_WOY_FIELD:
2119 case UDAT_FRACTIONAL_SECOND_FIELD:
2120 // these must be a number
2121 if (! gotNumber) {
2122 return -start;
2123 }
2124
2125 break;
2126
2127 case UDAT_DOW_LOCAL_FIELD:
2128 case UDAT_STANDALONE_DAY_FIELD:
2129 case UDAT_MONTH_FIELD:
2130 case UDAT_STANDALONE_MONTH_FIELD:
2131 case UDAT_QUARTER_FIELD:
2132 case UDAT_STANDALONE_QUARTER_FIELD:
2133 // in strict mode, these can only
2134 // be a number if count <= 2
2135 if (!lenient && gotNumber && count > 2) {
2136 return -1;
2137 }
2138
2139 break;
2140
2141 default:
2142 // we check the rest of the fields below.
2143 break;
2144 }
2145
2146 switch (patternCharIndex) {
2147 case UDAT_ERA_FIELD:
2148 if (count == 5) {
2149 return matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, cal);
2150 }
2151 if (count == 4) {
2152 return matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, cal);
2153 }
2154
2155 return matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, cal);
2156
2157 case UDAT_YEAR_FIELD:
2158 // If there are 3 or more YEAR pattern characters, this indicates
2159 // that the year value is to be treated literally, without any
2160 // two-digit year adjustments (e.g., from "01" to 2001). Otherwise
2161 // we made adjustments to place the 2-digit year in the proper
2162 // century, for parsed strings from "00" to "99". Any other string
2163 // is treated literally: "2250", "-1", "1", "002".
2164 if ((pos.getIndex() - start) == 2
2165 && u_isdigit(text.charAt(start))
2166 && u_isdigit(text.charAt(start+1)))
2167 {
2168 // Assume for example that the defaultCenturyStart is 6/18/1903.
2169 // This means that two-digit years will be forced into the range
2170 // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02
2171 // correspond to 2000, 2001, and 2002. Years 04, 05, etc. correspond
2172 // to 1904, 1905, etc. If the year is 03, then it is 2003 if the
2173 // other fields specify a date before 6/18, or 1903 if they specify a
2174 // date afterwards. As a result, 03 is an ambiguous year. All other
2175 // two-digit years are unambiguous.
2176 if(fHaveDefaultCentury) { // check if this formatter even has a pivot year
2177 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2178 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2179 value += (fDefaultCenturyStartYear/100)*100 +
2180 (value < ambiguousTwoDigitYear ? 100 : 0);
2181 }
2182 }
2183 cal.set(UCAL_YEAR, value);
2184 return pos.getIndex();
2185
2186 case UDAT_YEAR_WOY_FIELD:
2187 // Comment is the same as for UDAT_Year_FIELDs - look above
2188 if ((pos.getIndex() - start) == 2
2189 && u_isdigit(text.charAt(start))
2190 && u_isdigit(text.charAt(start+1))
2191 && fHaveDefaultCentury )
2192 {
2193 int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100;
2194 ambiguousYear[0] = (value == ambiguousTwoDigitYear);
2195 value += (fDefaultCenturyStartYear/100)*100 +
2196 (value < ambiguousTwoDigitYear ? 100 : 0);
2197 }
2198 cal.set(UCAL_YEAR_WOY, value);
2199 return pos.getIndex();
2200
2201 case UDAT_MONTH_FIELD:
2202 if (gotNumber) // i.e., M or MM.
2203 {
2204 // Don't want to parse the month if it is a string
2205 // while pattern uses numeric style: M or MM.
2206 // [We computed 'value' above.]
2207 cal.set(UCAL_MONTH, value - 1);
2208 return pos.getIndex();
2209 } else {
2210 // count >= 3 // i.e., MMM or MMMM
2211 // Want to be able to parse both short and long forms.
2212 // Try count == 4 first:
2213 int32_t newStart = 0;
2214
2215 if ((newStart = matchString(text, start, UCAL_MONTH,
2216 fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0)
2217 return newStart;
2218 else // count == 4 failed, now try count == 3
2219 return matchString(text, start, UCAL_MONTH,
2220 fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal);
2221 }
2222
2223 case UDAT_STANDALONE_MONTH_FIELD:
2224 if (gotNumber) // i.e., L or LL.
2225 {
2226 // Don't want to parse the month if it is a string
2227 // while pattern uses numeric style: M or MM.
2228 // [We computed 'value' above.]
2229 cal.set(UCAL_MONTH, value - 1);
2230 return pos.getIndex();
2231 } else {
2232 // count >= 3 // i.e., LLL or LLLL
2233 // Want to be able to parse both short and long forms.
2234 // Try count == 4 first:
2235 int32_t newStart = 0;
2236
2237 if ((newStart = matchString(text, start, UCAL_MONTH,
2238 fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0)
2239 return newStart;
2240 else // count == 4 failed, now try count == 3
2241 return matchString(text, start, UCAL_MONTH,
2242 fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal);
2243 }
2244
2245 case UDAT_HOUR_OF_DAY1_FIELD:
2246 // [We computed 'value' above.]
2247 if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1)
2248 value = 0;
2249
2250 // fall through to set field
2251
2252 case UDAT_HOUR_OF_DAY0_FIELD:
2253 cal.set(UCAL_HOUR_OF_DAY, value);
2254 return pos.getIndex();
2255
2256 case UDAT_FRACTIONAL_SECOND_FIELD:
2257 // Fractional seconds left-justify
2258 i = pos.getIndex() - start;
2259 if (i < 3) {
2260 while (i < 3) {
2261 value *= 10;
2262 i++;
2263 }
2264 } else {
2265 int32_t a = 1;
2266 while (i > 3) {
2267 a *= 10;
2268 i--;
2269 }
2270 value = (value + (a>>1)) / a;
2271 }
2272 cal.set(UCAL_MILLISECOND, value);
2273 return pos.getIndex();
2274
2275 case UDAT_DOW_LOCAL_FIELD:
2276 if (gotNumber) // i.e., e or ee
2277 {
2278 // [We computed 'value' above.]
2279 cal.set(UCAL_DOW_LOCAL, value);
2280 return pos.getIndex();
2281 }
2282 // else for eee-eeeee fall through to handling of EEE-EEEEE
2283 // fall through, do not break here
2284 case UDAT_DAY_OF_WEEK_FIELD:
2285 {
2286 // Want to be able to parse both short and long forms.
2287 // Try count == 4 (EEEE) first:
2288 int32_t newStart = 0;
2289 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2290 fSymbols->fWeekdays, fSymbols->fWeekdaysCount, cal)) > 0)
2291 return newStart;
2292 // EEEE failed, now try EEE
2293 else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2294 fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0)
2295 return newStart;
2296 // EEE failed, now try EEEEE
2297 else
2298 return matchString(text, start, UCAL_DAY_OF_WEEK,
2299 fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal);
2300 }
2301
2302 case UDAT_STANDALONE_DAY_FIELD:
2303 {
2304 if (gotNumber) // c or cc
2305 {
2306 // [We computed 'value' above.]
2307 cal.set(UCAL_DOW_LOCAL, value);
2308 return pos.getIndex();
2309 }
2310 // Want to be able to parse both short and long forms.
2311 // Try count == 4 (cccc) first:
2312 int32_t newStart = 0;
2313 if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK,
2314 fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0)
2315 return newStart;
2316 else // cccc failed, now try ccc
2317 return matchString(text, start, UCAL_DAY_OF_WEEK,
2318 fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal);
2319 }
2320
2321 case UDAT_AM_PM_FIELD:
2322 return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal);
2323
2324 case UDAT_HOUR1_FIELD:
2325 // [We computed 'value' above.]
2326 if (value == cal.getLeastMaximum(UCAL_HOUR)+1)
2327 value = 0;
2328
2329 // fall through to set field
2330
2331 case UDAT_HOUR0_FIELD:
2332 cal.set(UCAL_HOUR, value);
2333 return pos.getIndex();
2334
2335 case UDAT_QUARTER_FIELD:
2336 if (gotNumber) // i.e., Q or QQ.
2337 {
2338 // Don't want to parse the month if it is a string
2339 // while pattern uses numeric style: Q or QQ.
2340 // [We computed 'value' above.]
2341 cal.set(UCAL_MONTH, (value - 1) * 3);
2342 return pos.getIndex();
2343 } else {
2344 // count >= 3 // i.e., QQQ or QQQQ
2345 // Want to be able to parse both short and long forms.
2346 // Try count == 4 first:
2347 int32_t newStart = 0;
2348
2349 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2350 fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0)
2351 return newStart;
2352 else // count == 4 failed, now try count == 3
2353 return matchQuarterString(text, start, UCAL_MONTH,
2354 fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal);
2355 }
2356
2357 case UDAT_STANDALONE_QUARTER_FIELD:
2358 if (gotNumber) // i.e., q or qq.
2359 {
2360 // Don't want to parse the month if it is a string
2361 // while pattern uses numeric style: q or q.
2362 // [We computed 'value' above.]
2363 cal.set(UCAL_MONTH, (value - 1) * 3);
2364 return pos.getIndex();
2365 } else {
2366 // count >= 3 // i.e., qqq or qqqq
2367 // Want to be able to parse both short and long forms.
2368 // Try count == 4 first:
2369 int32_t newStart = 0;
2370
2371 if ((newStart = matchQuarterString(text, start, UCAL_MONTH,
2372 fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0)
2373 return newStart;
2374 else // count == 4 failed, now try count == 3
2375 return matchQuarterString(text, start, UCAL_MONTH,
2376 fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal);
2377 }
2378
2379 case UDAT_TIMEZONE_FIELD:
2380 case UDAT_TIMEZONE_RFC_FIELD:
2381 case UDAT_TIMEZONE_GENERIC_FIELD:
2382 case UDAT_TIMEZONE_SPECIAL_FIELD:
2383 {
2384 int32_t offset = 0;
2385 UBool parsed = FALSE;
2386
2387 // Step 1
2388 // Check if this is a long GMT offset string (either localized or default)
2389 offset = parseGMT(text, pos);
2390 if (pos.getIndex() - start > 0) {
2391 parsed = TRUE;
2392 }
2393 if (!parsed) {
2394 // Step 2
2395 // Check if this is an RFC822 time zone offset.
2396 // ICU supports the standard RFC822 format [+|-]HHmm
2397 // and its extended form [+|-]HHmmSS.
2398 do {
2399 int32_t sign = 0;
2400 UChar signChar = text.charAt(start);
2401 if (signChar == (UChar)0x002B /* '+' */) {
2402 sign = 1;
2403 } else if (signChar == (UChar)0x002D /* '-' */) {
2404 sign = -1;
2405 } else {
2406 // Not an RFC822 offset string
2407 break;
2408 }
2409
2410 // Parse digits
2411 int32_t orgPos = start + 1;
2412 pos.setIndex(orgPos);
2413 parseInt(text, number, 6, pos, FALSE);
2414 int32_t numLen = pos.getIndex() - orgPos;
2415 if (numLen <= 0) {
2416 break;
2417 }
2418
2419 // Followings are possible format (excluding sign char)
2420 // HHmmSS
2421 // HmmSS
2422 // HHmm
2423 // Hmm
2424 // HH
2425 // H
2426 int32_t val = number.getLong();
2427 int32_t hour = 0, min = 0, sec = 0;
2428 switch(numLen) {
2429 case 1: // H
2430 case 2: // HH
2431 hour = val;
2432 break;
2433 case 3: // Hmm
2434 case 4: // HHmm
2435 hour = val / 100;
2436 min = val % 100;
2437 break;
2438 case 5: // Hmmss
2439 case 6: // HHmmss
2440 hour = val / 10000;
2441 min = (val % 10000) / 100;
2442 sec = val % 100;
2443 break;
2444 }
2445 if (hour > 23 || min > 59 || sec > 59) {
2446 // Invalid value range
2447 break;
2448 }
2449 offset = (((hour * 60) + min) * 60 + sec) * 1000 * sign;
2450 parsed = TRUE;
2451 } while (FALSE);
2452
2453 if (!parsed) {
2454 // Failed to parse. Reset the position.
2455 pos.setIndex(start);
2456 }
2457 }
2458
2459 if (parsed) {
2460 // offset was successfully parsed as either a long GMT string or RFC822 zone offset
2461 // string. Create normalized zone ID for the offset.
2462
2463 UnicodeString tzID(gGmt);
2464 formatRFC822TZ(tzID, offset);
2465 //TimeZone *customTZ = TimeZone::createTimeZone(tzID);
2466 TimeZone *customTZ = new SimpleTimeZone(offset, tzID); // faster than TimeZone::createTimeZone
2467 cal.adoptTimeZone(customTZ);
2468
2469 return pos.getIndex();
2470 }
2471
2472 // Step 3
2473 // At this point, check for named time zones by looking through
2474 // the locale data from the DateFormatZoneData strings.
2475 // Want to be able to parse both short and long forms.
2476 // optimize for calendar's current time zone
2477 const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat();
2478 if (zsf) {
2479 UErrorCode status = U_ZERO_ERROR;
2480 const ZoneStringInfo *zsinfo = NULL;
2481 int32_t matchLen;
2482
2483 switch (patternCharIndex) {
2484 case UDAT_TIMEZONE_FIELD: // 'z'
2485 if (count < 4) {
2486 zsinfo = zsf->findSpecificShort(text, start, matchLen, status);
2487 } else {
2488 zsinfo = zsf->findSpecificLong(text, start, matchLen, status);
2489 }
2490 break;
2491 case UDAT_TIMEZONE_GENERIC_FIELD: // 'v'
2492 if (count == 1) {
2493 zsinfo = zsf->findGenericShort(text, start, matchLen, status);
2494 } else if (count == 4) {
2495 zsinfo = zsf->findGenericLong(text, start, matchLen, status);
2496 }
2497 break;
2498 case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V'
2499 if (count == 1) {
2500 zsinfo = zsf->findSpecificShort(text, start, matchLen, status);
2501 } else if (count == 4) {
2502 zsinfo = zsf->findGenericLocation(text, start, matchLen, status);
2503 }
2504 break;
2505 }
2506
2507 if (U_SUCCESS(status) && zsinfo != NULL) {
2508 if (zsinfo->isStandard()) {
2509 ((SimpleDateFormat*)this)->tztype = TZTYPE_STD;
2510 } else if (zsinfo->isDaylight()) {
2511 ((SimpleDateFormat*)this)->tztype = TZTYPE_DST;
2512 }
2513 UnicodeString tzid;
2514 zsinfo->getID(tzid);
2515
2516 UnicodeString current;
2517 cal.getTimeZone().getID(current);
2518 if (tzid != current) {
2519 TimeZone *tz = TimeZone::createTimeZone(tzid);
2520 cal.adoptTimeZone(tz);
2521 }
2522 return start + matchLen;
2523 }
2524 }
2525 // complete failure
2526 return -start;
2527 }
2528
2529 default:
2530 // Handle "generic" fields
2531 int32_t parseStart = pos.getIndex();
2532 const UnicodeString* src;
2533 if (obeyCount) {
2534 if ((start+count) > text.length()) {
2535 return -start;
2536 }
2537 text.extractBetween(0, start + count, temp);
2538 src = &temp;
2539 } else {
2540 src = &text;
2541 }
2542 parseInt(*src, number, pos, allowNegative);
2543 if (pos.getIndex() != parseStart) {
2544 int32_t value = number.getLong();
2545
2546 // Check the range of the value
2547 int32_t bias = gFieldRangeBias[patternCharIndex];
2548
2549 if (bias < 0 || (value >= cal.getMinimum(field) + bias && value <= cal.getMaximum(field) + bias)) {
2550 cal.set(field, value);
2551 return pos.getIndex();
2552 }
2553 }
2554
2555 return -start;
2556 }
2557 }
2558
2559 /**
2560 * Parse an integer using fNumberFormat. This method is semantically
2561 * const, but actually may modify fNumberFormat.
2562 */
2563 void SimpleDateFormat::parseInt(const UnicodeString& text,
2564 Formattable& number,
2565 ParsePosition& pos,
2566 UBool allowNegative) const {
2567 parseInt(text, number, -1, pos, allowNegative);
2568 }
2569
2570 /**
2571 * Parse an integer using fNumberFormat up to maxDigits.
2572 */
2573 void SimpleDateFormat::parseInt(const UnicodeString& text,
2574 Formattable& number,
2575 int32_t maxDigits,
2576 ParsePosition& pos,
2577 UBool allowNegative) const {
2578 UnicodeString oldPrefix;
2579 DecimalFormat* df = NULL;
2580 if (!allowNegative &&
2581 fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
2582 df = (DecimalFormat*)fNumberFormat;
2583 df->getNegativePrefix(oldPrefix);
2584 df->setNegativePrefix(SUPPRESS_NEGATIVE_PREFIX);
2585 }
2586 int32_t oldPos = pos.getIndex();
2587 fNumberFormat->parse(text, number, pos);
2588 if (df != NULL) {
2589 df->setNegativePrefix(oldPrefix);
2590 }
2591
2592 if (maxDigits > 0) {
2593 // adjust the result to fit into
2594 // the maxDigits and move the position back
2595 int32_t nDigits = pos.getIndex() - oldPos;
2596 if (nDigits > maxDigits) {
2597 int32_t val = number.getLong();
2598 nDigits -= maxDigits;
2599 while (nDigits > 0) {
2600 val /= 10;
2601 nDigits--;
2602 }
2603 pos.setIndex(oldPos + maxDigits);
2604 number.setLong(val);
2605 }
2606 }
2607 }
2608
2609 //----------------------------------------------------------------------
2610
2611 void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern,
2612 UnicodeString& translatedPattern,
2613 const UnicodeString& from,
2614 const UnicodeString& to,
2615 UErrorCode& status)
2616 {
2617 // run through the pattern and convert any pattern symbols from the version
2618 // in "from" to the corresponding character ion "to". This code takes
2619 // quoted strings into account (it doesn't try to translate them), and it signals
2620 // an error if a particular "pattern character" doesn't appear in "from".
2621 // Depending on the values of "from" and "to" this can convert from generic
2622 // to localized patterns or localized to generic.
2623 if (U_FAILURE(status))
2624 return;
2625
2626 translatedPattern.remove();
2627 UBool inQuote = FALSE;
2628 for (int32_t i = 0; i < originalPattern.length(); ++i) {
2629 UChar c = originalPattern[i];
2630 if (inQuote) {
2631 if (c == QUOTE)
2632 inQuote = FALSE;
2633 }
2634 else {
2635 if (c == QUOTE)
2636 inQuote = TRUE;
2637 else if ((c >= 0x0061 /*'a'*/ && c <= 0x007A) /*'z'*/
2638 || (c >= 0x0041 /*'A'*/ && c <= 0x005A /*'Z'*/)) {
2639 int32_t ci = from.indexOf(c);
2640 if (ci == -1) {
2641 status = U_INVALID_FORMAT_ERROR;
2642 return;
2643 }
2644 c = to[ci];
2645 }
2646 }
2647 translatedPattern += c;
2648 }
2649 if (inQuote) {
2650 status = U_INVALID_FORMAT_ERROR;
2651 return;
2652 }
2653 }
2654
2655 //----------------------------------------------------------------------
2656
2657 UnicodeString&
2658 SimpleDateFormat::toPattern(UnicodeString& result) const
2659 {
2660 result = fPattern;
2661 return result;
2662 }
2663
2664 //----------------------------------------------------------------------
2665
2666 UnicodeString&
2667 SimpleDateFormat::toLocalizedPattern(UnicodeString& result,
2668 UErrorCode& status) const
2669 {
2670 translatePattern(fPattern, result, DateFormatSymbols::getPatternUChars(), fSymbols->fLocalPatternChars, status);
2671 return result;
2672 }
2673
2674 //----------------------------------------------------------------------
2675
2676 void
2677 SimpleDateFormat::applyPattern(const UnicodeString& pattern)
2678 {
2679 fPattern = pattern;
2680 }
2681
2682 //----------------------------------------------------------------------
2683
2684 void
2685 SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern,
2686 UErrorCode &status)
2687 {
2688 translatePattern(pattern, fPattern, fSymbols->fLocalPatternChars, DateFormatSymbols::getPatternUChars(), status);
2689 }
2690
2691 //----------------------------------------------------------------------
2692
2693 const DateFormatSymbols*
2694 SimpleDateFormat::getDateFormatSymbols() const
2695 {
2696 return fSymbols;
2697 }
2698
2699 //----------------------------------------------------------------------
2700
2701 void
2702 SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols)
2703 {
2704 delete fSymbols;
2705 fSymbols = newFormatSymbols;
2706 }
2707
2708 //----------------------------------------------------------------------
2709 void
2710 SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols)
2711 {
2712 delete fSymbols;
2713 fSymbols = new DateFormatSymbols(newFormatSymbols);
2714 }
2715
2716
2717 //----------------------------------------------------------------------
2718
2719
2720 void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt)
2721 {
2722 UErrorCode status = U_ZERO_ERROR;
2723 DateFormat::adoptCalendar(calendarToAdopt);
2724 delete fSymbols;
2725 fSymbols=NULL;
2726 initializeSymbols(fLocale, fCalendar, status); // we need new symbols
2727 initializeDefaultCentury(); // we need a new century (possibly)
2728 }
2729
2730
2731 //----------------------------------------------------------------------
2732
2733
2734 UBool
2735 SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const {
2736 return isFieldUnitIgnored(fPattern, field);
2737 }
2738
2739
2740 UBool
2741 SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern,
2742 UCalendarDateFields field) {
2743 int32_t fieldLevel = fgCalendarFieldToLevel[field];
2744 int32_t level;
2745 UChar ch;
2746 UBool inQuote = FALSE;
2747 UChar prevCh = 0;
2748 int32_t count = 0;
2749
2750 for (int32_t i = 0; i < pattern.length(); ++i) {
2751 ch = pattern[i];
2752 if (ch != prevCh && count > 0) {
2753 level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
2754 // the larger the level, the smaller the field unit.
2755 if ( fieldLevel <= level ) {
2756 return FALSE;
2757 }
2758 count = 0;
2759 }
2760 if (ch == QUOTE) {
2761 if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) {
2762 ++i;
2763 } else {
2764 inQuote = ! inQuote;
2765 }
2766 }
2767 else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/)
2768 || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) {
2769 prevCh = ch;
2770 ++count;
2771 }
2772 }
2773 if ( count > 0 ) {
2774 // last item
2775 level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE];
2776 if ( fieldLevel <= level ) {
2777 return FALSE;
2778 }
2779 }
2780 return TRUE;
2781 }
2782
2783
2784
2785 const Locale&
2786 SimpleDateFormat::getSmpFmtLocale(void) const {
2787 return fLocale;
2788 }
2789
2790
2791
2792 U_NAMESPACE_END
2793
2794 #endif /* #if !UCONFIG_NO_FORMATTING */
2795
2796 //eof