]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
46f4442e | 3 | * Copyright (C) 1997-2009, International Business Machines Corporation and * |
b75a7d8f A |
4 | * others. All Rights Reserved. * |
5 | ******************************************************************************* | |
6 | * | |
7 | * File SMPDTFMT.CPP | |
8 | * | |
9 | * Modification History: | |
10 | * | |
11 | * Date Name Description | |
12 | * 02/19/97 aliu Converted from java. | |
13 | * 03/31/97 aliu Modified extensively to work with 50 locales. | |
14 | * 04/01/97 aliu Added support for centuries. | |
15 | * 07/09/97 helena Made ParsePosition into a class. | |
16 | * 07/21/98 stephen Added initializeDefaultCentury. | |
17 | * Removed getZoneIndex (added in DateFormatSymbols) | |
18 | * Removed subParseLong | |
19 | * Removed chk | |
20 | * 02/22/99 stephen Removed character literals for EBCDIC safety | |
21 | * 10/14/99 aliu Updated 2-digit year parsing so that only "00" thru | |
22 | * "99" are recognized. {j28 4182066} | |
23 | * 11/15/99 weiv Added support for week of year/day of week format | |
24 | ******************************************************************************** | |
25 | */ | |
26 | ||
46f4442e A |
27 | #define ZID_KEY_MAX 128 |
28 | ||
b75a7d8f A |
29 | #include "unicode/utypes.h" |
30 | ||
31 | #if !UCONFIG_NO_FORMATTING | |
32 | ||
33 | #include "unicode/smpdtfmt.h" | |
34 | #include "unicode/dtfmtsym.h" | |
374ca955 | 35 | #include "unicode/ures.h" |
b75a7d8f A |
36 | #include "unicode/msgfmt.h" |
37 | #include "unicode/calendar.h" | |
38 | #include "unicode/gregocal.h" | |
39 | #include "unicode/timezone.h" | |
40 | #include "unicode/decimfmt.h" | |
41 | #include "unicode/dcfmtsym.h" | |
42 | #include "unicode/uchar.h" | |
46f4442e | 43 | #include "unicode/uniset.h" |
b75a7d8f | 44 | #include "unicode/ustring.h" |
46f4442e A |
45 | #include "unicode/basictz.h" |
46 | #include "unicode/simpletz.h" | |
47 | #include "unicode/rbtz.h" | |
48 | #include "unicode/vtzone.h" | |
49 | #include "olsontz.h" | |
374ca955 A |
50 | #include "util.h" |
51 | #include "gregoimp.h" | |
52 | #include "cstring.h" | |
53 | #include "uassert.h" | |
46f4442e A |
54 | #include "zstrfmt.h" |
55 | #include "cmemory.h" | |
56 | #include "umutex.h" | |
57 | #include "smpdtfst.h" | |
b75a7d8f A |
58 | #include <float.h> |
59 | ||
374ca955 A |
60 | #if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL) |
61 | #include <stdio.h> | |
62 | #endif | |
63 | ||
b75a7d8f A |
64 | // ***************************************************************************** |
65 | // class SimpleDateFormat | |
66 | // ***************************************************************************** | |
67 | ||
68 | U_NAMESPACE_BEGIN | |
69 | ||
46f4442e A |
70 | static const UChar PATTERN_CHAR_BASE = 0x40; |
71 | ||
374ca955 A |
72 | /** |
73 | * Last-resort string to use for "GMT" when constructing time zone strings. | |
74 | */ | |
b75a7d8f A |
75 | // For time zones that have no names, use strings GMT+minutes and |
76 | // GMT-minutes. For instance, in France the time zone is GMT+60. | |
77 | // Also accepted are GMT+H:MM or GMT-H:MM. | |
374ca955 A |
78 | static const UChar gGmt[] = {0x0047, 0x004D, 0x0054, 0x0000}; // "GMT" |
79 | static const UChar gGmtPlus[] = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+" | |
80 | static const UChar gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-" | |
46f4442e A |
81 | static const UChar gDefGmtPat[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */ |
82 | static const UChar gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */ | |
83 | static const UChar gDefGmtNegHmPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */ | |
84 | static const UChar gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */ | |
85 | static const UChar gDefGmtPosHmPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */ | |
86 | typedef enum GmtPatSize { | |
87 | kGmtLen = 3, | |
88 | kGmtPatLen = 6, | |
89 | kNegHmsLen = 9, | |
90 | kNegHmLen = 6, | |
91 | kPosHmsLen = 9, | |
92 | kPosHmLen = 6 | |
93 | } GmtPatSize; | |
b75a7d8f A |
94 | |
95 | // This is a pattern-of-last-resort used when we can't load a usable pattern out | |
96 | // of a resource. | |
374ca955 | 97 | static const UChar gDefaultPattern[] = |
b75a7d8f A |
98 | { |
99 | 0x79, 0x79, 0x79, 0x79, 0x4D, 0x4D, 0x64, 0x64, 0x20, 0x68, 0x68, 0x3A, 0x6D, 0x6D, 0x20, 0x61, 0 | |
100 | }; /* "yyyyMMdd hh:mm a" */ | |
101 | ||
102 | // This prefix is designed to NEVER MATCH real text, in order to | |
103 | // suppress the parsing of negative numbers. Adjust as needed (if | |
104 | // this becomes valid Unicode). | |
105 | static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0}; | |
106 | ||
107 | /** | |
108 | * These are the tags we expect to see in normal resource bundle files associated | |
109 | * with a locale. | |
110 | */ | |
374ca955 | 111 | static const char gDateTimePatternsTag[]="DateTimePatterns"; |
b75a7d8f | 112 | |
46f4442e | 113 | static const UChar gEtcUTC[] = {0x45, 0x74, 0x63, 0x2F, 0x55, 0x54, 0x43, 0x00}; // "Etc/UTC" |
b75a7d8f | 114 | static const UChar QUOTE = 0x27; // Single quote |
46f4442e A |
115 | enum { |
116 | kGMTNegativeHMS = 0, | |
117 | kGMTNegativeHM, | |
118 | kGMTPositiveHMS, | |
119 | kGMTPositiveHM, | |
120 | ||
121 | kNumGMTFormatters | |
122 | }; | |
123 | ||
124 | /* | |
125 | * The field range check bias for each UDateFormatField. | |
126 | * The bias is added to the minimum and maximum values | |
127 | * before they are compared to the parsed number. | |
128 | * For example, the calendar stores zero-based month numbers | |
129 | * but the parsed month numbers start at 1, so the bias is 1. | |
130 | * | |
131 | * A value of -1 means that the value is not checked. | |
132 | */ | |
133 | static const int32_t gFieldRangeBias[] = { | |
134 | -1, // 'G' - UDAT_ERA_FIELD | |
135 | -1, // 'y' - UDAT_YEAR_FIELD | |
136 | 1, // 'M' - UDAT_MONTH_FIELD | |
137 | 0, // 'd' - UDAT_DATE_FIELD | |
138 | -1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD | |
139 | -1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD | |
140 | 0, // 'm' - UDAT_MINUTE_FIELD | |
141 | 0, // 's' - UDAT_SEOND_FIELD | |
142 | -1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?) | |
143 | -1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?) | |
144 | -1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?) | |
145 | -1, // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?) | |
146 | -1, // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?) | |
147 | -1, // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?) | |
148 | -1, // 'a' - UDAT_AM_PM_FIELD | |
149 | -1, // 'h' - UDAT_HOUR1_FIELD | |
150 | -1, // 'K' - UDAT_HOUR0_FIELD | |
151 | -1, // 'z' - UDAT_TIMEZONE_FIELD | |
152 | -1, // 'Y' - UDAT_YEAR_WOY_FIELD | |
153 | -1, // 'e' - UDAT_DOW_LOCAL_FIELD | |
154 | -1, // 'u' - UDAT_EXTENDED_YEAR_FIELD | |
155 | -1, // 'g' - UDAT_JULIAN_DAY_FIELD | |
156 | -1, // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD | |
157 | -1, // 'Z' - UDAT_TIMEZONE_RFC_FIELD | |
158 | -1, // 'v' - UDAT_TIMEZONE_GENERIC_FIELD | |
159 | 0, // 'c' - UDAT_STANDALONE_DAY_FIELD | |
160 | 1, // 'L' - UDAT_STANDALONE_MONTH_FIELD | |
161 | -1, // 'Q' - UDAT_QUARTER_FIELD (1-4?) | |
162 | -1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD | |
163 | -1 // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD | |
164 | }; | |
165 | ||
166 | static UMTX LOCK; | |
167 | ||
168 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat) | |
b75a7d8f A |
169 | |
170 | //---------------------------------------------------------------------- | |
171 | ||
172 | SimpleDateFormat::~SimpleDateFormat() | |
173 | { | |
174 | delete fSymbols; | |
46f4442e A |
175 | if (fGMTFormatters) { |
176 | for (int32_t i = 0; i < kNumGMTFormatters; i++) { | |
177 | if (fGMTFormatters[i]) { | |
178 | delete fGMTFormatters[i]; | |
179 | } | |
180 | } | |
181 | uprv_free(fGMTFormatters); | |
182 | } | |
b75a7d8f A |
183 | } |
184 | ||
185 | //---------------------------------------------------------------------- | |
186 | ||
187 | SimpleDateFormat::SimpleDateFormat(UErrorCode& status) | |
188 | : fLocale(Locale::getDefault()), | |
73c04bcf | 189 | fSymbols(NULL), |
46f4442e | 190 | fGMTFormatters(NULL) |
b75a7d8f A |
191 | { |
192 | construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status); | |
193 | initializeDefaultCentury(); | |
194 | } | |
195 | ||
196 | //---------------------------------------------------------------------- | |
197 | ||
198 | SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, | |
199 | UErrorCode &status) | |
200 | : fPattern(pattern), | |
201 | fLocale(Locale::getDefault()), | |
73c04bcf | 202 | fSymbols(NULL), |
46f4442e | 203 | fGMTFormatters(NULL) |
b75a7d8f A |
204 | { |
205 | initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status); | |
206 | initialize(fLocale, status); | |
207 | initializeDefaultCentury(); | |
208 | } | |
209 | ||
210 | //---------------------------------------------------------------------- | |
211 | ||
212 | SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, | |
213 | const Locale& locale, | |
214 | UErrorCode& status) | |
215 | : fPattern(pattern), | |
73c04bcf | 216 | fLocale(locale), |
46f4442e | 217 | fGMTFormatters(NULL) |
b75a7d8f A |
218 | { |
219 | initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status); | |
220 | initialize(fLocale, status); | |
221 | initializeDefaultCentury(); | |
222 | } | |
223 | ||
224 | //---------------------------------------------------------------------- | |
225 | ||
226 | SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, | |
227 | DateFormatSymbols* symbolsToAdopt, | |
228 | UErrorCode& status) | |
229 | : fPattern(pattern), | |
230 | fLocale(Locale::getDefault()), | |
73c04bcf | 231 | fSymbols(symbolsToAdopt), |
46f4442e | 232 | fGMTFormatters(NULL) |
b75a7d8f A |
233 | { |
234 | initializeCalendar(NULL,fLocale,status); | |
235 | initialize(fLocale, status); | |
236 | initializeDefaultCentury(); | |
237 | } | |
238 | ||
239 | //---------------------------------------------------------------------- | |
240 | ||
241 | SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, | |
242 | const DateFormatSymbols& symbols, | |
243 | UErrorCode& status) | |
244 | : fPattern(pattern), | |
245 | fLocale(Locale::getDefault()), | |
73c04bcf | 246 | fSymbols(new DateFormatSymbols(symbols)), |
46f4442e | 247 | fGMTFormatters(NULL) |
b75a7d8f A |
248 | { |
249 | initializeCalendar(NULL, fLocale, status); | |
250 | initialize(fLocale, status); | |
251 | initializeDefaultCentury(); | |
252 | } | |
253 | ||
254 | //---------------------------------------------------------------------- | |
255 | ||
256 | // Not for public consumption; used by DateFormat | |
257 | SimpleDateFormat::SimpleDateFormat(EStyle timeStyle, | |
258 | EStyle dateStyle, | |
259 | const Locale& locale, | |
260 | UErrorCode& status) | |
261 | : fLocale(locale), | |
73c04bcf | 262 | fSymbols(NULL), |
46f4442e | 263 | fGMTFormatters(NULL) |
b75a7d8f A |
264 | { |
265 | construct(timeStyle, dateStyle, fLocale, status); | |
266 | if(U_SUCCESS(status)) { | |
267 | initializeDefaultCentury(); | |
268 | } | |
269 | } | |
270 | ||
271 | //---------------------------------------------------------------------- | |
272 | ||
273 | /** | |
274 | * Not for public consumption; used by DateFormat. This constructor | |
275 | * never fails. If the resource data is not available, it uses the | |
276 | * the last resort symbols. | |
277 | */ | |
278 | SimpleDateFormat::SimpleDateFormat(const Locale& locale, | |
279 | UErrorCode& status) | |
374ca955 | 280 | : fPattern(gDefaultPattern), |
b75a7d8f | 281 | fLocale(locale), |
73c04bcf | 282 | fSymbols(NULL), |
46f4442e | 283 | fGMTFormatters(NULL) |
b75a7d8f A |
284 | { |
285 | if (U_FAILURE(status)) return; | |
286 | initializeSymbols(fLocale, initializeCalendar(NULL, fLocale, status),status); | |
287 | if (U_FAILURE(status)) | |
288 | { | |
289 | status = U_ZERO_ERROR; | |
290 | delete fSymbols; | |
291 | // This constructor doesn't fail; it uses last resort data | |
292 | fSymbols = new DateFormatSymbols(status); | |
293 | /* test for NULL */ | |
294 | if (fSymbols == 0) { | |
295 | status = U_MEMORY_ALLOCATION_ERROR; | |
296 | return; | |
297 | } | |
298 | } | |
299 | ||
300 | initialize(fLocale, status); | |
301 | if(U_SUCCESS(status)) { | |
302 | initializeDefaultCentury(); | |
303 | } | |
304 | } | |
305 | ||
306 | //---------------------------------------------------------------------- | |
307 | ||
308 | SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other) | |
309 | : DateFormat(other), | |
73c04bcf | 310 | fSymbols(NULL), |
46f4442e | 311 | fGMTFormatters(NULL) |
b75a7d8f A |
312 | { |
313 | *this = other; | |
314 | } | |
315 | ||
316 | //---------------------------------------------------------------------- | |
317 | ||
318 | SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other) | |
319 | { | |
46f4442e A |
320 | if (this == &other) { |
321 | return *this; | |
322 | } | |
b75a7d8f A |
323 | DateFormat::operator=(other); |
324 | ||
325 | delete fSymbols; | |
326 | fSymbols = NULL; | |
327 | ||
328 | if (other.fSymbols) | |
329 | fSymbols = new DateFormatSymbols(*other.fSymbols); | |
330 | ||
331 | fDefaultCenturyStart = other.fDefaultCenturyStart; | |
332 | fDefaultCenturyStartYear = other.fDefaultCenturyStartYear; | |
333 | fHaveDefaultCentury = other.fHaveDefaultCentury; | |
334 | ||
335 | fPattern = other.fPattern; | |
336 | ||
337 | return *this; | |
338 | } | |
339 | ||
340 | //---------------------------------------------------------------------- | |
341 | ||
342 | Format* | |
343 | SimpleDateFormat::clone() const | |
344 | { | |
345 | return new SimpleDateFormat(*this); | |
346 | } | |
347 | ||
348 | //---------------------------------------------------------------------- | |
349 | ||
350 | UBool | |
351 | SimpleDateFormat::operator==(const Format& other) const | |
352 | { | |
374ca955 A |
353 | if (DateFormat::operator==(other)) { |
354 | // DateFormat::operator== guarantees following cast is safe | |
b75a7d8f | 355 | SimpleDateFormat* that = (SimpleDateFormat*)&other; |
73c04bcf | 356 | return (fPattern == that->fPattern && |
b75a7d8f | 357 | fSymbols != NULL && // Check for pathological object |
73c04bcf A |
358 | that->fSymbols != NULL && // Check for pathological object |
359 | *fSymbols == *that->fSymbols && | |
360 | fHaveDefaultCentury == that->fHaveDefaultCentury && | |
b75a7d8f A |
361 | fDefaultCenturyStart == that->fDefaultCenturyStart); |
362 | } | |
363 | return FALSE; | |
364 | } | |
365 | ||
366 | //---------------------------------------------------------------------- | |
367 | ||
368 | void SimpleDateFormat::construct(EStyle timeStyle, | |
369 | EStyle dateStyle, | |
370 | const Locale& locale, | |
371 | UErrorCode& status) | |
372 | { | |
373 | // called by several constructors to load pattern data from the resources | |
b75a7d8f A |
374 | if (U_FAILURE(status)) return; |
375 | ||
b75a7d8f A |
376 | // We will need the calendar to know what type of symbols to load. |
377 | initializeCalendar(NULL, locale, status); | |
73c04bcf | 378 | if (U_FAILURE(status)) return; |
b75a7d8f | 379 | |
374ca955 A |
380 | CalendarData calData(locale, fCalendar?fCalendar->getType():NULL, status); |
381 | UResourceBundle *dateTimePatterns = calData.getByKey(gDateTimePatternsTag, status); | |
b75a7d8f A |
382 | if (U_FAILURE(status)) return; |
383 | ||
374ca955 | 384 | if (ures_getSize(dateTimePatterns) <= kDateTime) |
b75a7d8f A |
385 | { |
386 | status = U_INVALID_FORMAT_ERROR; | |
387 | return; | |
388 | } | |
389 | ||
374ca955 A |
390 | setLocaleIDs(ures_getLocaleByType(dateTimePatterns, ULOC_VALID_LOCALE, &status), |
391 | ures_getLocaleByType(dateTimePatterns, ULOC_ACTUAL_LOCALE, &status)); | |
392 | ||
b75a7d8f A |
393 | // create a symbols object from the locale |
394 | initializeSymbols(locale,fCalendar, status); | |
395 | if (U_FAILURE(status)) return; | |
396 | /* test for NULL */ | |
397 | if (fSymbols == 0) { | |
398 | status = U_MEMORY_ALLOCATION_ERROR; | |
399 | return; | |
400 | } | |
401 | ||
374ca955 A |
402 | const UChar *resStr; |
403 | int32_t resStrLen = 0; | |
b75a7d8f A |
404 | |
405 | // if the pattern should include both date and time information, use the date/time | |
406 | // pattern string as a guide to tell use how to glue together the appropriate date | |
407 | // and time pattern strings. The actual gluing-together is handled by a convenience | |
408 | // method on MessageFormat. | |
374ca955 | 409 | if ((timeStyle != kNone) && (dateStyle != kNone)) |
b75a7d8f | 410 | { |
374ca955 | 411 | Formattable timeDateArray[2]; |
b75a7d8f | 412 | |
b75a7d8f A |
413 | // use Formattable::adoptString() so that we can use fastCopyFrom() |
414 | // instead of Formattable::setString()'s unaware, safe, deep string clone | |
415 | // see Jitterbug 2296 | |
374ca955 | 416 | resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status); |
46f4442e A |
417 | UnicodeString *tempus1 = new UnicodeString(TRUE, resStr, resStrLen); |
418 | // NULL pointer check | |
419 | if (tempus1 == NULL) { | |
420 | status = U_MEMORY_ALLOCATION_ERROR; | |
421 | return; | |
422 | } | |
423 | timeDateArray[0].adoptString(tempus1); | |
424 | ||
374ca955 | 425 | resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status); |
46f4442e A |
426 | UnicodeString *tempus2 = new UnicodeString(TRUE, resStr, resStrLen); |
427 | // Null pointer check | |
428 | if (tempus2 == NULL) { | |
429 | status = U_MEMORY_ALLOCATION_ERROR; | |
430 | return; | |
431 | } | |
432 | timeDateArray[1].adoptString(tempus2); | |
b75a7d8f | 433 | |
374ca955 A |
434 | resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)kDateTime, &resStrLen, &status); |
435 | MessageFormat::format(UnicodeString(TRUE, resStr, resStrLen), timeDateArray, 2, fPattern, status); | |
b75a7d8f | 436 | } |
b75a7d8f A |
437 | // if the pattern includes just time data or just date date, load the appropriate |
438 | // pattern string from the resources | |
374ca955 A |
439 | // setTo() - see DateFormatSymbols::assignArray comments |
440 | else if (timeStyle != kNone) { | |
441 | resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status); | |
442 | fPattern.setTo(TRUE, resStr, resStrLen); | |
443 | } | |
444 | else if (dateStyle != kNone) { | |
445 | resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status); | |
446 | fPattern.setTo(TRUE, resStr, resStrLen); | |
447 | } | |
b75a7d8f A |
448 | |
449 | // and if it includes _neither_, that's an error | |
374ca955 A |
450 | else |
451 | status = U_INVALID_FORMAT_ERROR; | |
b75a7d8f A |
452 | |
453 | // finally, finish initializing by creating a Calendar and a NumberFormat | |
454 | initialize(locale, status); | |
455 | } | |
456 | ||
457 | //---------------------------------------------------------------------- | |
458 | ||
459 | Calendar* | |
460 | SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, UErrorCode& status) | |
461 | { | |
73c04bcf A |
462 | if(!U_FAILURE(status)) { |
463 | fCalendar = Calendar::createInstance(adoptZone?adoptZone:TimeZone::createDefault(), locale, status); | |
464 | } | |
465 | if (U_SUCCESS(status) && fCalendar == NULL) { | |
466 | status = U_MEMORY_ALLOCATION_ERROR; | |
467 | } | |
468 | return fCalendar; | |
b75a7d8f A |
469 | } |
470 | ||
471 | void | |
472 | SimpleDateFormat::initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status) | |
473 | { | |
474 | if(U_FAILURE(status)) { | |
475 | fSymbols = NULL; | |
476 | } else { | |
477 | // pass in calendar type - use NULL (default) if no calendar set (or err). | |
478 | fSymbols = new DateFormatSymbols(locale, calendar?calendar->getType() :NULL , status); | |
46f4442e A |
479 | // Null pointer check |
480 | if (fSymbols == NULL) { | |
481 | status = U_MEMORY_ALLOCATION_ERROR; | |
482 | return; | |
483 | } | |
b75a7d8f A |
484 | } |
485 | } | |
486 | ||
487 | void | |
488 | SimpleDateFormat::initialize(const Locale& locale, | |
489 | UErrorCode& status) | |
490 | { | |
491 | if (U_FAILURE(status)) return; | |
492 | ||
b75a7d8f A |
493 | // We don't need to check that the row count is >= 1, since all 2d arrays have at |
494 | // least one row | |
495 | fNumberFormat = NumberFormat::createInstance(locale, status); | |
496 | if (fNumberFormat != NULL && U_SUCCESS(status)) | |
497 | { | |
498 | // no matter what the locale's default number format looked like, we want | |
499 | // to modify it so that it doesn't use thousands separators, doesn't always | |
500 | // show the decimal point, and recognizes integers only when parsing | |
501 | ||
502 | fNumberFormat->setGroupingUsed(FALSE); | |
503 | if (fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) | |
504 | ((DecimalFormat*)fNumberFormat)->setDecimalSeparatorAlwaysShown(FALSE); | |
505 | fNumberFormat->setParseIntegerOnly(TRUE); | |
506 | fNumberFormat->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00" | |
46f4442e A |
507 | |
508 | // TODO: Really, the default should be lenient... | |
509 | fNumberFormat->setParseStrict(FALSE); | |
b75a7d8f A |
510 | } |
511 | else if (U_SUCCESS(status)) | |
512 | { | |
513 | status = U_MISSING_RESOURCE_ERROR; | |
514 | } | |
515 | } | |
516 | ||
517 | /* Initialize the fields we use to disambiguate ambiguous years. Separate | |
518 | * so we can call it from readObject(). | |
519 | */ | |
520 | void SimpleDateFormat::initializeDefaultCentury() | |
521 | { | |
522 | if(fCalendar) { | |
523 | fHaveDefaultCentury = fCalendar->haveDefaultCentury(); | |
524 | if(fHaveDefaultCentury) { | |
525 | fDefaultCenturyStart = fCalendar->defaultCenturyStart(); | |
526 | fDefaultCenturyStartYear = fCalendar->defaultCenturyStartYear(); | |
527 | } else { | |
528 | fDefaultCenturyStart = DBL_MIN; | |
529 | fDefaultCenturyStartYear = -1; | |
530 | } | |
531 | } | |
532 | } | |
533 | ||
534 | /* Define one-century window into which to disambiguate dates using | |
535 | * two-digit years. Make public in JDK 1.2. | |
536 | */ | |
537 | void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status) | |
538 | { | |
539 | if(U_FAILURE(status)) { | |
540 | return; | |
541 | } | |
542 | if(!fCalendar) { | |
543 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
544 | return; | |
545 | } | |
546 | ||
547 | fCalendar->setTime(startDate, status); | |
548 | if(U_SUCCESS(status)) { | |
549 | fHaveDefaultCentury = TRUE; | |
550 | fDefaultCenturyStart = startDate; | |
551 | fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status); | |
552 | } | |
553 | } | |
554 | ||
555 | //---------------------------------------------------------------------- | |
556 | ||
557 | UnicodeString& | |
558 | SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const | |
559 | { | |
560 | UErrorCode status = U_ZERO_ERROR; | |
561 | pos.setBeginIndex(0); | |
562 | pos.setEndIndex(0); | |
563 | ||
564 | UBool inQuote = FALSE; | |
565 | UChar prevCh = 0; | |
566 | int32_t count = 0; | |
567 | ||
568 | // loop through the pattern string character by character | |
569 | for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) { | |
570 | UChar ch = fPattern[i]; | |
571 | ||
572 | // Use subFormat() to format a repeated pattern character | |
573 | // when a different pattern or non-pattern character is seen | |
574 | if (ch != prevCh && count > 0) { | |
575 | subFormat(appendTo, prevCh, count, pos, cal, status); | |
576 | count = 0; | |
577 | } | |
578 | if (ch == QUOTE) { | |
579 | // Consecutive single quotes are a single quote literal, | |
580 | // either outside of quotes or between quotes | |
581 | if ((i+1) < fPattern.length() && fPattern[i+1] == QUOTE) { | |
582 | appendTo += (UChar)QUOTE; | |
583 | ++i; | |
584 | } else { | |
585 | inQuote = ! inQuote; | |
586 | } | |
587 | } | |
588 | else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/) | |
589 | || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) { | |
590 | // ch is a date-time pattern character to be interpreted | |
591 | // by subFormat(); count the number of times it is repeated | |
592 | prevCh = ch; | |
593 | ++count; | |
594 | } | |
595 | else { | |
596 | // Append quoted characters and unquoted non-pattern characters | |
597 | appendTo += ch; | |
598 | } | |
599 | } | |
600 | ||
601 | // Format the last item in the pattern, if any | |
602 | if (count > 0) { | |
603 | subFormat(appendTo, prevCh, count, pos, cal, status); | |
604 | } | |
605 | ||
606 | // and if something failed (e.g., an invalid format character), reset our FieldPosition | |
607 | // to (0, 0) to show that | |
608 | // {sfb} look at this later- are these being set correctly? | |
609 | if (U_FAILURE(status)) { | |
610 | pos.setBeginIndex(0); | |
611 | pos.setEndIndex(0); | |
612 | } | |
613 | ||
614 | return appendTo; | |
615 | } | |
616 | ||
617 | UnicodeString& | |
618 | SimpleDateFormat::format(const Formattable& obj, | |
619 | UnicodeString& appendTo, | |
620 | FieldPosition& pos, | |
621 | UErrorCode& status) const | |
622 | { | |
623 | // this is just here to get around the hiding problem | |
624 | // (the previous format() override would hide the version of | |
625 | // format() on DateFormat that this function correspond to, so we | |
626 | // have to redefine it here) | |
627 | return DateFormat::format(obj, appendTo, pos, status); | |
628 | } | |
629 | ||
630 | //---------------------------------------------------------------------- | |
631 | ||
46f4442e A |
632 | /* Map calendar field into calendar field level. |
633 | * the larger the level, the smaller the field unit. | |
634 | * For example, UCAL_ERA level is 0, UCAL_YEAR level is 10, | |
635 | * UCAL_MONTH level is 20. | |
636 | * NOTE: if new fields adds in, the table needs to update. | |
637 | */ | |
638 | const int32_t | |
639 | SimpleDateFormat::fgCalendarFieldToLevel[] = | |
640 | { | |
641 | /*GyM*/ 0, 10, 20, | |
642 | /*wW*/ 20, 30, | |
643 | /*dDEF*/ 30, 20, 30, 30, | |
644 | /*ahHm*/ 40, 50, 50, 60, | |
645 | /*sS..*/ 70, 80, | |
646 | /*z?Y*/ 0, 0, 10, | |
647 | /*eug*/ 30, 10, 0, | |
648 | /*A*/ 40 | |
649 | }; | |
650 | ||
651 | ||
652 | /* Map calendar field LETTER into calendar field level. | |
653 | * the larger the level, the smaller the field unit. | |
654 | * NOTE: if new fields adds in, the table needs to update. | |
655 | */ | |
656 | const int32_t | |
657 | SimpleDateFormat::fgPatternCharToLevel[] = { | |
658 | // A B C D E F G H I J K L M N O | |
659 | -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, -1, | |
660 | // P Q R S T U V W X Y Z | |
661 | -1, 20, -1, 80, -1, -1, 0, 30, -1, 10, 0, -1, -1, -1, -1, -1, | |
662 | // a b c d e f g h i j k l m n o | |
663 | -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, -1, 60, -1, -1, | |
664 | // p q r s t u v w x y z | |
665 | -1, 20, -1, 70, -1, 10, 0, 20, -1, 10, 0, -1, -1, -1, -1, -1 | |
666 | }; | |
667 | ||
668 | ||
b75a7d8f A |
669 | // Map index into pattern character string to Calendar field number. |
670 | const UCalendarDateFields | |
671 | SimpleDateFormat::fgPatternIndexToCalendarField[] = | |
672 | { | |
374ca955 A |
673 | /*GyM*/ UCAL_ERA, UCAL_YEAR, UCAL_MONTH, |
674 | /*dkH*/ UCAL_DATE, UCAL_HOUR_OF_DAY, UCAL_HOUR_OF_DAY, | |
675 | /*msS*/ UCAL_MINUTE, UCAL_SECOND, UCAL_MILLISECOND, | |
676 | /*EDF*/ UCAL_DAY_OF_WEEK, UCAL_DAY_OF_YEAR, UCAL_DAY_OF_WEEK_IN_MONTH, | |
677 | /*wWa*/ UCAL_WEEK_OF_YEAR, UCAL_WEEK_OF_MONTH, UCAL_AM_PM, | |
678 | /*hKz*/ UCAL_HOUR, UCAL_HOUR, UCAL_ZONE_OFFSET, | |
679 | /*Yeu*/ UCAL_YEAR_WOY, UCAL_DOW_LOCAL, UCAL_EXTENDED_YEAR, | |
73c04bcf A |
680 | /*gAZ*/ UCAL_JULIAN_DAY, UCAL_MILLISECONDS_IN_DAY, UCAL_ZONE_OFFSET, |
681 | /*v*/ UCAL_ZONE_OFFSET, | |
46f4442e | 682 | /*c*/ UCAL_DOW_LOCAL, |
73c04bcf A |
683 | /*L*/ UCAL_MONTH, |
684 | /*Q*/ UCAL_MONTH, | |
685 | /*q*/ UCAL_MONTH, | |
46f4442e | 686 | /*V*/ UCAL_ZONE_OFFSET, |
b75a7d8f A |
687 | }; |
688 | ||
689 | // Map index into pattern character string to DateFormat field number | |
374ca955 | 690 | const UDateFormatField |
b75a7d8f | 691 | SimpleDateFormat::fgPatternIndexToDateFormatField[] = { |
374ca955 A |
692 | /*GyM*/ UDAT_ERA_FIELD, UDAT_YEAR_FIELD, UDAT_MONTH_FIELD, |
693 | /*dkH*/ UDAT_DATE_FIELD, UDAT_HOUR_OF_DAY1_FIELD, UDAT_HOUR_OF_DAY0_FIELD, | |
694 | /*msS*/ UDAT_MINUTE_FIELD, UDAT_SECOND_FIELD, UDAT_FRACTIONAL_SECOND_FIELD, | |
695 | /*EDF*/ UDAT_DAY_OF_WEEK_FIELD, UDAT_DAY_OF_YEAR_FIELD, UDAT_DAY_OF_WEEK_IN_MONTH_FIELD, | |
696 | /*wWa*/ UDAT_WEEK_OF_YEAR_FIELD, UDAT_WEEK_OF_MONTH_FIELD, UDAT_AM_PM_FIELD, | |
697 | /*hKz*/ UDAT_HOUR1_FIELD, UDAT_HOUR0_FIELD, UDAT_TIMEZONE_FIELD, | |
698 | /*Yeu*/ UDAT_YEAR_WOY_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_EXTENDED_YEAR_FIELD, | |
73c04bcf A |
699 | /*gAZ*/ UDAT_JULIAN_DAY_FIELD, UDAT_MILLISECONDS_IN_DAY_FIELD, UDAT_TIMEZONE_RFC_FIELD, |
700 | /*v*/ UDAT_TIMEZONE_GENERIC_FIELD, | |
701 | /*c*/ UDAT_STANDALONE_DAY_FIELD, | |
702 | /*L*/ UDAT_STANDALONE_MONTH_FIELD, | |
703 | /*Q*/ UDAT_QUARTER_FIELD, | |
704 | /*q*/ UDAT_STANDALONE_QUARTER_FIELD, | |
46f4442e | 705 | /*V*/ UDAT_TIMEZONE_SPECIAL_FIELD, |
b75a7d8f A |
706 | }; |
707 | ||
b75a7d8f A |
708 | //---------------------------------------------------------------------- |
709 | ||
374ca955 A |
710 | /** |
711 | * Append symbols[value] to dst. Make sure the array index is not out | |
712 | * of bounds. | |
713 | */ | |
73c04bcf | 714 | static inline void |
374ca955 A |
715 | _appendSymbol(UnicodeString& dst, |
716 | int32_t value, | |
717 | const UnicodeString* symbols, | |
718 | int32_t symbolsCount) { | |
73c04bcf A |
719 | U_ASSERT(0 <= value && value < symbolsCount); |
720 | if (0 <= value && value < symbolsCount) { | |
721 | dst += symbols[value]; | |
722 | } | |
723 | } | |
724 | ||
725 | //--------------------------------------------------------------------- | |
46f4442e A |
726 | void |
727 | SimpleDateFormat::appendGMT(UnicodeString &appendTo, Calendar& cal, UErrorCode& status) const{ | |
728 | int32_t offset = cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status); | |
729 | if (U_FAILURE(status)) { | |
730 | return; | |
731 | } | |
732 | if (isDefaultGMTFormat()) { | |
733 | formatGMTDefault(appendTo, offset); | |
734 | } else { | |
735 | ((SimpleDateFormat*)this)->initGMTFormatters(status); | |
736 | if (U_SUCCESS(status)) { | |
737 | int32_t type; | |
738 | if (offset < 0) { | |
739 | offset = -offset; | |
740 | type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTNegativeHM : kGMTNegativeHMS; | |
741 | } else { | |
742 | type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTPositiveHM : kGMTPositiveHMS; | |
743 | } | |
744 | Formattable param(offset, Formattable::kIsDate); | |
745 | FieldPosition fpos(0); | |
746 | fGMTFormatters[type]->format(¶m, 1, appendTo, fpos, status); | |
747 | } | |
748 | } | |
749 | } | |
750 | ||
751 | int32_t | |
752 | SimpleDateFormat::parseGMT(const UnicodeString &text, ParsePosition &pos) const { | |
753 | if (!isDefaultGMTFormat()) { | |
754 | int32_t start = pos.getIndex(); | |
755 | ||
756 | // Quick check | |
757 | UBool prefixMatch = FALSE; | |
758 | int32_t prefixLen = fSymbols->fGmtFormat.indexOf((UChar)0x007B /* '{' */); | |
759 | if (prefixLen > 0 && text.compare(start, prefixLen, fSymbols->fGmtFormat, 0, prefixLen) == 0) { | |
760 | prefixMatch = TRUE; | |
761 | } | |
762 | if (prefixMatch) { | |
763 | // Prefix matched | |
764 | UErrorCode status = U_ZERO_ERROR; | |
765 | ((SimpleDateFormat*)this)->initGMTFormatters(status); | |
766 | if (U_SUCCESS(status)) { | |
767 | Formattable parsed; | |
768 | int32_t parsedCount; | |
769 | ||
770 | // Try negative Hms | |
771 | fGMTFormatters[kGMTNegativeHMS]->parseObject(text, parsed, pos); | |
772 | if (pos.getErrorIndex() == -1 && pos.getIndex() > start) { | |
773 | parsed.getArray(parsedCount); | |
774 | if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) { | |
775 | return (int32_t)(-1 * (int64_t)parsed[0].getDate()); | |
776 | } | |
777 | } | |
778 | ||
779 | // Reset ParsePosition | |
780 | pos.setIndex(start); | |
781 | pos.setErrorIndex(-1); | |
782 | ||
783 | // Try positive Hms | |
784 | fGMTFormatters[kGMTPositiveHMS]->parseObject(text, parsed, pos); | |
785 | if (pos.getErrorIndex() == -1 && pos.getIndex() > start) { | |
786 | parsed.getArray(parsedCount); | |
787 | if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) { | |
788 | return (int32_t)((int64_t)parsed[0].getDate()); | |
789 | } | |
790 | } | |
791 | ||
792 | // Reset ParsePosition | |
793 | pos.setIndex(start); | |
794 | pos.setErrorIndex(-1); | |
795 | ||
796 | // Try negative Hm | |
797 | fGMTFormatters[kGMTNegativeHM]->parseObject(text, parsed, pos); | |
798 | if (pos.getErrorIndex() == -1 && pos.getIndex() > start) { | |
799 | parsed.getArray(parsedCount); | |
800 | if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) { | |
801 | return (int32_t)(-1 * (int64_t)parsed[0].getDate()); | |
802 | } | |
803 | } | |
73c04bcf | 804 | |
46f4442e A |
805 | // Reset ParsePosition |
806 | pos.setIndex(start); | |
807 | pos.setErrorIndex(-1); | |
808 | ||
809 | // Try positive Hm | |
810 | fGMTFormatters[kGMTPositiveHM]->parseObject(text, parsed, pos); | |
811 | if (pos.getErrorIndex() == -1 && pos.getIndex() > start) { | |
812 | parsed.getArray(parsedCount); | |
813 | if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) { | |
814 | return (int32_t)((int64_t)parsed[0].getDate()); | |
815 | } | |
816 | } | |
817 | ||
818 | // Reset ParsePosition | |
819 | pos.setIndex(start); | |
820 | pos.setErrorIndex(-1); | |
821 | } | |
822 | // fall through to the default GMT parsing method | |
823 | } | |
824 | } | |
825 | return parseGMTDefault(text, pos); | |
826 | } | |
827 | ||
828 | void | |
829 | SimpleDateFormat::formatGMTDefault(UnicodeString &appendTo, int32_t offset) const { | |
830 | if (offset < 0) { | |
73c04bcf | 831 | appendTo += gGmtMinus; |
46f4442e | 832 | offset = -offset; // suppress the '-' sign for text display. |
73c04bcf A |
833 | }else{ |
834 | appendTo += gGmtPlus; | |
835 | } | |
836 | ||
46f4442e A |
837 | offset /= U_MILLIS_PER_SECOND; // now in seconds |
838 | int32_t sec = offset % 60; | |
839 | offset /= 60; | |
840 | int32_t min = offset % 60; | |
841 | int32_t hour = offset / 60; | |
842 | ||
843 | ||
844 | zeroPaddingNumber(appendTo, hour, 2, 2); | |
73c04bcf | 845 | appendTo += (UChar)0x003A /*':'*/; |
46f4442e A |
846 | zeroPaddingNumber(appendTo, min, 2, 2); |
847 | if (sec != 0) { | |
848 | appendTo += (UChar)0x003A /*':'*/; | |
849 | zeroPaddingNumber(appendTo, sec, 2, 2); | |
850 | } | |
851 | } | |
852 | ||
853 | int32_t | |
854 | SimpleDateFormat::parseGMTDefault(const UnicodeString &text, ParsePosition &pos) const { | |
855 | int32_t start = pos.getIndex(); | |
856 | ||
857 | if (start + kGmtLen + 1 >= text.length()) { | |
858 | pos.setErrorIndex(start); | |
859 | return 0; | |
860 | } | |
861 | ||
862 | int32_t cur = start; | |
863 | // "GMT" | |
864 | if (text.compare(start, kGmtLen, gGmt) != 0) { | |
865 | pos.setErrorIndex(start); | |
866 | return 0; | |
867 | } | |
868 | cur += kGmtLen; | |
869 | // Sign | |
870 | UBool negative = FALSE; | |
871 | if (text.charAt(cur) == (UChar)0x002D /* minus */) { | |
872 | negative = TRUE; | |
873 | } else if (text.charAt(cur) != (UChar)0x002B /* plus */) { | |
874 | pos.setErrorIndex(cur); | |
875 | return 0; | |
876 | } | |
877 | cur++; | |
878 | ||
879 | // Numbers | |
880 | int32_t numLen; | |
881 | pos.setIndex(cur); | |
882 | ||
883 | Formattable number; | |
884 | parseInt(text, number, 6, pos, FALSE); | |
885 | numLen = pos.getIndex() - cur; | |
886 | ||
887 | if (numLen <= 0) { | |
888 | pos.setIndex(start); | |
889 | pos.setErrorIndex(cur); | |
890 | return 0; | |
891 | } | |
892 | ||
893 | int32_t numVal = number.getLong(); | |
894 | ||
895 | int32_t hour = 0; | |
896 | int32_t min = 0; | |
897 | int32_t sec = 0; | |
898 | ||
899 | if (numLen <= 2) { | |
900 | // H[H][:mm[:ss]] | |
901 | hour = numVal; | |
902 | cur += numLen; | |
903 | if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) { | |
904 | cur++; | |
905 | pos.setIndex(cur); | |
906 | parseInt(text, number, 2, pos, FALSE); | |
907 | numLen = pos.getIndex() - cur; | |
908 | if (numLen == 2) { | |
909 | // got minute field | |
910 | min = number.getLong(); | |
911 | cur += numLen; | |
912 | if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) { | |
913 | cur++; | |
914 | pos.setIndex(cur); | |
915 | parseInt(text, number, 2, pos, FALSE); | |
916 | numLen = pos.getIndex() - cur; | |
917 | if (numLen == 2) { | |
918 | // got second field | |
919 | sec = number.getLong(); | |
920 | } else { | |
921 | // reset position | |
922 | pos.setIndex(cur - 1); | |
923 | pos.setErrorIndex(-1); | |
924 | } | |
925 | } | |
926 | } else { | |
927 | // reset postion | |
928 | pos.setIndex(cur - 1); | |
929 | pos.setErrorIndex(-1); | |
930 | } | |
931 | } | |
932 | } else if (numLen == 3 || numLen == 4) { | |
933 | // Hmm or HHmm | |
934 | hour = numVal / 100; | |
935 | min = numVal % 100; | |
936 | } else if (numLen == 5 || numLen == 6) { | |
937 | // Hmmss or HHmmss | |
938 | hour = numVal / 10000; | |
939 | min = (numVal % 10000) / 100; | |
940 | sec = numVal % 100; | |
941 | } else { | |
942 | // HHmmss followed by bogus numbers | |
943 | pos.setIndex(cur + 6); | |
944 | ||
945 | int32_t shift = numLen - 6; | |
946 | while (shift > 0) { | |
947 | numVal /= 10; | |
948 | shift--; | |
949 | } | |
950 | hour = numVal / 10000; | |
951 | min = (numVal % 10000) / 100; | |
952 | sec = numVal % 100; | |
953 | } | |
954 | ||
955 | int32_t offset = ((hour*60 + min)*60 + sec)*1000; | |
956 | if (negative) { | |
957 | offset = -offset; | |
958 | } | |
959 | return offset; | |
960 | } | |
961 | ||
962 | UBool | |
963 | SimpleDateFormat::isDefaultGMTFormat() const { | |
964 | // GMT pattern | |
965 | if (fSymbols->fGmtFormat.length() == 0) { | |
966 | // No GMT pattern is set | |
967 | return TRUE; | |
968 | } else if (fSymbols->fGmtFormat.compare(gDefGmtPat, kGmtPatLen) != 0) { | |
969 | return FALSE; | |
970 | } | |
971 | // Hour patterns | |
972 | if (fSymbols->fGmtHourFormats == NULL || fSymbols->fGmtHourFormatsCount != DateFormatSymbols::GMT_HOUR_COUNT) { | |
973 | // No Hour pattern is set | |
974 | return TRUE; | |
975 | } else if ((fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS].compare(gDefGmtNegHmsPat, kNegHmsLen) != 0) | |
976 | || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM].compare(gDefGmtNegHmPat, kNegHmLen) != 0) | |
977 | || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS].compare(gDefGmtPosHmsPat, kPosHmsLen) != 0) | |
978 | || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM].compare(gDefGmtPosHmPat, kPosHmLen) != 0)) { | |
979 | return FALSE; | |
980 | } | |
981 | return TRUE; | |
982 | } | |
983 | ||
984 | void | |
985 | SimpleDateFormat::formatRFC822TZ(UnicodeString &appendTo, int32_t offset) const { | |
986 | UChar sign = 0x002B /* '+' */; | |
987 | if (offset < 0) { | |
988 | offset = -offset; | |
989 | sign = 0x002D /* '-' */; | |
990 | } | |
991 | appendTo.append(sign); | |
992 | ||
993 | int32_t offsetH = offset / U_MILLIS_PER_HOUR; | |
994 | offset = offset % U_MILLIS_PER_HOUR; | |
995 | int32_t offsetM = offset / U_MILLIS_PER_MINUTE; | |
996 | offset = offset % U_MILLIS_PER_MINUTE; | |
997 | int32_t offsetS = offset / U_MILLIS_PER_SECOND; | |
998 | ||
999 | int32_t num = 0, denom = 0; | |
1000 | if (offsetS == 0) { | |
1001 | offset = offsetH*100 + offsetM; // HHmm | |
1002 | num = offset % 10000; | |
1003 | denom = 1000; | |
1004 | } else { | |
1005 | offset = offsetH*10000 + offsetM*100 + offsetS; // HHmmss | |
1006 | num = offset % 1000000; | |
1007 | denom = 100000; | |
1008 | } | |
1009 | while (denom >= 1) { | |
1010 | UChar digit = (UChar)0x0030 + (num / denom); | |
1011 | appendTo.append(digit); | |
1012 | num = num % denom; | |
1013 | denom /= 10; | |
1014 | } | |
1015 | } | |
1016 | ||
1017 | void | |
1018 | SimpleDateFormat::initGMTFormatters(UErrorCode &status) { | |
1019 | if (U_FAILURE(status)) { | |
1020 | return; | |
1021 | } | |
1022 | umtx_lock(&LOCK); | |
1023 | if (fGMTFormatters == NULL) { | |
1024 | fGMTFormatters = (MessageFormat**)uprv_malloc(kNumGMTFormatters * sizeof(MessageFormat*)); | |
1025 | if (fGMTFormatters) { | |
1026 | for (int32_t i = 0; i < kNumGMTFormatters; i++) { | |
1027 | const UnicodeString *hourPattern = NULL; //initialized it to avoid warning | |
1028 | switch (i) { | |
1029 | case kGMTNegativeHMS: | |
1030 | hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS]); | |
1031 | break; | |
1032 | case kGMTNegativeHM: | |
1033 | hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM]); | |
1034 | break; | |
1035 | case kGMTPositiveHMS: | |
1036 | hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS]); | |
1037 | break; | |
1038 | case kGMTPositiveHM: | |
1039 | hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM]); | |
1040 | break; | |
1041 | } | |
1042 | fGMTFormatters[i] = new MessageFormat(fSymbols->fGmtFormat, status); | |
1043 | if (U_FAILURE(status)) { | |
1044 | break; | |
1045 | } | |
1046 | SimpleDateFormat *sdf = (SimpleDateFormat*)this->clone(); | |
1047 | sdf->adoptTimeZone(TimeZone::createTimeZone(UnicodeString(gEtcUTC))); | |
1048 | sdf->applyPattern(*hourPattern); | |
1049 | fGMTFormatters[i]->adoptFormat(0, sdf); | |
1050 | } | |
1051 | } else { | |
1052 | status = U_MEMORY_ALLOCATION_ERROR; | |
1053 | } | |
1054 | } | |
1055 | umtx_unlock(&LOCK); | |
374ca955 A |
1056 | } |
1057 | ||
73c04bcf | 1058 | //--------------------------------------------------------------------- |
b75a7d8f A |
1059 | void |
1060 | SimpleDateFormat::subFormat(UnicodeString &appendTo, | |
1061 | UChar ch, | |
1062 | int32_t count, | |
1063 | FieldPosition& pos, | |
1064 | Calendar& cal, | |
1065 | UErrorCode& status) const | |
1066 | { | |
374ca955 A |
1067 | if (U_FAILURE(status)) { |
1068 | return; | |
1069 | } | |
1070 | ||
b75a7d8f A |
1071 | // this function gets called by format() to produce the appropriate substitution |
1072 | // text for an individual pattern symbol (e.g., "HH" or "yyyy") | |
1073 | ||
1074 | UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch); | |
374ca955 | 1075 | UDateFormatField patternCharIndex; |
b75a7d8f A |
1076 | const int32_t maxIntCount = 10; |
1077 | int32_t beginOffset = appendTo.length(); | |
1078 | ||
1079 | // if the pattern character is unrecognized, signal an error and dump out | |
1080 | if (patternCharPtr == NULL) | |
1081 | { | |
1082 | status = U_INVALID_FORMAT_ERROR; | |
374ca955 | 1083 | return; |
b75a7d8f A |
1084 | } |
1085 | ||
374ca955 | 1086 | patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars()); |
b75a7d8f A |
1087 | UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; |
1088 | int32_t value = cal.get(field, status); | |
1089 | if (U_FAILURE(status)) { | |
1090 | return; | |
1091 | } | |
1092 | ||
1093 | switch (patternCharIndex) { | |
1094 | ||
1095 | // for any "G" symbol, write out the appropriate era string | |
46f4442e | 1096 | // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name |
374ca955 | 1097 | case UDAT_ERA_FIELD: |
46f4442e A |
1098 | if (count == 5) |
1099 | _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount); | |
1100 | else if (count == 4) | |
73c04bcf A |
1101 | _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount); |
1102 | else | |
1103 | _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount); | |
b75a7d8f A |
1104 | break; |
1105 | ||
46f4442e A |
1106 | // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits |
1107 | // NEW: UTS#35: | |
1108 | //Year y yy yyy yyyy yyyyy | |
1109 | //AD 1 1 01 001 0001 00001 | |
1110 | //AD 12 12 12 012 0012 00012 | |
1111 | //AD 123 123 23 123 0123 00123 | |
1112 | //AD 1234 1234 34 1234 1234 01234 | |
1113 | //AD 12345 12345 45 12345 12345 12345 | |
1114 | case UDAT_YEAR_FIELD: | |
374ca955 | 1115 | case UDAT_YEAR_WOY_FIELD: |
46f4442e | 1116 | if(count == 2) |
b75a7d8f | 1117 | zeroPaddingNumber(appendTo, value, 2, 2); |
46f4442e A |
1118 | else |
1119 | zeroPaddingNumber(appendTo, value, count, maxIntCount); | |
1120 | break; | |
b75a7d8f A |
1121 | |
1122 | // for "MMMM", write out the whole month name, for "MMM", write out the month | |
1123 | // abbreviation, for "M" or "MM", write out the month as a number with the | |
1124 | // appropriate number of digits | |
73c04bcf | 1125 | // for "MMMMM", use the narrow form |
374ca955 | 1126 | case UDAT_MONTH_FIELD: |
73c04bcf A |
1127 | if (count == 5) |
1128 | _appendSymbol(appendTo, value, fSymbols->fNarrowMonths, | |
1129 | fSymbols->fNarrowMonthsCount); | |
1130 | else if (count == 4) | |
374ca955 A |
1131 | _appendSymbol(appendTo, value, fSymbols->fMonths, |
1132 | fSymbols->fMonthsCount); | |
b75a7d8f | 1133 | else if (count == 3) |
374ca955 A |
1134 | _appendSymbol(appendTo, value, fSymbols->fShortMonths, |
1135 | fSymbols->fShortMonthsCount); | |
b75a7d8f A |
1136 | else |
1137 | zeroPaddingNumber(appendTo, value + 1, count, maxIntCount); | |
1138 | break; | |
1139 | ||
73c04bcf A |
1140 | // for "LLLL", write out the whole month name, for "LLL", write out the month |
1141 | // abbreviation, for "L" or "LL", write out the month as a number with the | |
1142 | // appropriate number of digits | |
1143 | // for "LLLLL", use the narrow form | |
1144 | case UDAT_STANDALONE_MONTH_FIELD: | |
1145 | if (count == 5) | |
1146 | _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowMonths, | |
1147 | fSymbols->fStandaloneNarrowMonthsCount); | |
1148 | else if (count == 4) | |
1149 | _appendSymbol(appendTo, value, fSymbols->fStandaloneMonths, | |
1150 | fSymbols->fStandaloneMonthsCount); | |
1151 | else if (count == 3) | |
1152 | _appendSymbol(appendTo, value, fSymbols->fStandaloneShortMonths, | |
1153 | fSymbols->fStandaloneShortMonthsCount); | |
1154 | else | |
1155 | zeroPaddingNumber(appendTo, value + 1, count, maxIntCount); | |
1156 | break; | |
1157 | ||
b75a7d8f | 1158 | // for "k" and "kk", write out the hour, adjusting midnight to appear as "24" |
374ca955 | 1159 | case UDAT_HOUR_OF_DAY1_FIELD: |
b75a7d8f A |
1160 | if (value == 0) |
1161 | zeroPaddingNumber(appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount); | |
1162 | else | |
1163 | zeroPaddingNumber(appendTo, value, count, maxIntCount); | |
1164 | break; | |
1165 | ||
374ca955 A |
1166 | case UDAT_FRACTIONAL_SECOND_FIELD: |
1167 | // Fractional seconds left-justify | |
1168 | { | |
1169 | fNumberFormat->setMinimumIntegerDigits((count > 3) ? 3 : count); | |
1170 | fNumberFormat->setMaximumIntegerDigits(maxIntCount); | |
1171 | if (count == 1) { | |
1172 | value = (value + 50) / 100; | |
1173 | } else if (count == 2) { | |
1174 | value = (value + 5) / 10; | |
1175 | } | |
1176 | FieldPosition p(0); | |
1177 | fNumberFormat->format(value, appendTo, p); | |
1178 | if (count > 3) { | |
1179 | fNumberFormat->setMinimumIntegerDigits(count - 3); | |
1180 | fNumberFormat->format((int32_t)0, appendTo, p); | |
1181 | } | |
1182 | } | |
b75a7d8f A |
1183 | break; |
1184 | ||
46f4442e A |
1185 | // for "ee" or "e", use local numeric day-of-the-week |
1186 | // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name | |
1187 | // for "EEEE" or "eeee", write out the wide day-of-the-week name | |
1188 | // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name | |
1189 | case UDAT_DOW_LOCAL_FIELD: | |
1190 | if ( count < 3 ) { | |
1191 | zeroPaddingNumber(appendTo, value, count, maxIntCount); | |
1192 | break; | |
1193 | } | |
1194 | // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week, | |
1195 | // we want standard day-of-week, so first fix value to work for EEEEE-EEE. | |
1196 | value = cal.get(UCAL_DAY_OF_WEEK, status); | |
1197 | if (U_FAILURE(status)) { | |
1198 | return; | |
1199 | } | |
1200 | // fall through, do not break here | |
374ca955 | 1201 | case UDAT_DAY_OF_WEEK_FIELD: |
73c04bcf A |
1202 | if (count == 5) |
1203 | _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays, | |
1204 | fSymbols->fNarrowWeekdaysCount); | |
1205 | else if (count == 4) | |
374ca955 A |
1206 | _appendSymbol(appendTo, value, fSymbols->fWeekdays, |
1207 | fSymbols->fWeekdaysCount); | |
73c04bcf | 1208 | else |
374ca955 A |
1209 | _appendSymbol(appendTo, value, fSymbols->fShortWeekdays, |
1210 | fSymbols->fShortWeekdaysCount); | |
b75a7d8f A |
1211 | break; |
1212 | ||
73c04bcf A |
1213 | // for "ccc", write out the abbreviated day-of-the-week name |
1214 | // for "cccc", write out the wide day-of-the-week name | |
1215 | // for "ccccc", use the narrow day-of-the-week name | |
1216 | case UDAT_STANDALONE_DAY_FIELD: | |
46f4442e A |
1217 | if ( count < 3 ) { |
1218 | zeroPaddingNumber(appendTo, value, 1, maxIntCount); | |
1219 | break; | |
1220 | } | |
1221 | // fall through to alpha DOW handling, but for that we don't want local day-of-week, | |
1222 | // we want standard day-of-week, so first fix value. | |
1223 | value = cal.get(UCAL_DAY_OF_WEEK, status); | |
1224 | if (U_FAILURE(status)) { | |
1225 | return; | |
1226 | } | |
73c04bcf A |
1227 | if (count == 5) |
1228 | _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays, | |
1229 | fSymbols->fStandaloneNarrowWeekdaysCount); | |
1230 | else if (count == 4) | |
1231 | _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays, | |
1232 | fSymbols->fStandaloneWeekdaysCount); | |
46f4442e | 1233 | else // count == 3 |
73c04bcf A |
1234 | _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays, |
1235 | fSymbols->fStandaloneShortWeekdaysCount); | |
73c04bcf A |
1236 | break; |
1237 | ||
b75a7d8f | 1238 | // for and "a" symbol, write out the whole AM/PM string |
374ca955 A |
1239 | case UDAT_AM_PM_FIELD: |
1240 | _appendSymbol(appendTo, value, fSymbols->fAmPms, | |
1241 | fSymbols->fAmPmsCount); | |
b75a7d8f A |
1242 | break; |
1243 | ||
1244 | // for "h" and "hh", write out the hour, adjusting noon and midnight to show up | |
1245 | // as "12" | |
374ca955 | 1246 | case UDAT_HOUR1_FIELD: |
b75a7d8f A |
1247 | if (value == 0) |
1248 | zeroPaddingNumber(appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount); | |
1249 | else | |
1250 | zeroPaddingNumber(appendTo, value, count, maxIntCount); | |
1251 | break; | |
1252 | ||
1253 | // for the "z" symbols, we have to check our time zone data first. If we have a | |
73c04bcf A |
1254 | // localized name for the time zone, then "zzzz" / "zzz" indicate whether |
1255 | // daylight time is in effect (long/short) and "zz" / "z" do not (long/short). | |
1256 | // If we don't have a localized time zone name, | |
b75a7d8f A |
1257 | // then the time zone shows up as "GMT+hh:mm" or "GMT-hh:mm" (where "hh:mm" is the |
1258 | // offset from GMT) regardless of how many z's were in the pattern symbol | |
73c04bcf | 1259 | case UDAT_TIMEZONE_FIELD: |
46f4442e A |
1260 | case UDAT_TIMEZONE_GENERIC_FIELD: |
1261 | case UDAT_TIMEZONE_SPECIAL_FIELD: | |
1262 | { | |
1263 | UnicodeString zoneString; | |
1264 | const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat(); | |
1265 | if (zsf) { | |
1266 | if (patternCharIndex == UDAT_TIMEZONE_FIELD) { | |
1267 | if (count < 4) { | |
1268 | // "z", "zz", "zzz" | |
1269 | zsf->getSpecificShortString(cal, TRUE /*commonly used only*/, | |
1270 | zoneString, status); | |
1271 | } else { | |
1272 | // "zzzz" | |
1273 | zsf->getSpecificLongString(cal, zoneString, status); | |
73c04bcf | 1274 | } |
46f4442e A |
1275 | } else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) { |
1276 | if (count == 1) { | |
1277 | // "v" | |
1278 | zsf->getGenericShortString(cal, TRUE /*commonly used only*/, | |
1279 | zoneString, status); | |
1280 | } else if (count == 4) { | |
1281 | // "vvvv" | |
1282 | zsf->getGenericLongString(cal, zoneString, status); | |
1283 | } | |
1284 | } else { // patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD | |
1285 | if (count == 1) { | |
1286 | // "V" | |
1287 | zsf->getSpecificShortString(cal, FALSE /*ignore commonly used*/, | |
1288 | zoneString, status); | |
1289 | } else if (count == 4) { | |
1290 | // "VVVV" | |
1291 | zsf->getGenericLocationString(cal, zoneString, status); | |
73c04bcf A |
1292 | } |
1293 | } | |
1294 | } | |
46f4442e | 1295 | if (zoneString.isEmpty()) { |
73c04bcf | 1296 | appendGMT(appendTo, cal, status); |
46f4442e A |
1297 | } else { |
1298 | appendTo += zoneString; | |
73c04bcf | 1299 | } |
b75a7d8f | 1300 | } |
46f4442e A |
1301 | break; |
1302 | ||
1303 | case UDAT_TIMEZONE_RFC_FIELD: // 'Z' - TIMEZONE_RFC | |
1304 | if (count < 4) { | |
1305 | // RFC822 format, must use ASCII digits | |
1306 | value = (cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status)); | |
1307 | formatRFC822TZ(appendTo, value); | |
1308 | } else { | |
1309 | // long form, localized GMT pattern | |
1310 | appendGMT(appendTo, cal, status); | |
374ca955 A |
1311 | } |
1312 | break; | |
1313 | ||
73c04bcf A |
1314 | case UDAT_QUARTER_FIELD: |
1315 | if (count >= 4) | |
1316 | _appendSymbol(appendTo, value/3, fSymbols->fQuarters, | |
1317 | fSymbols->fQuartersCount); | |
1318 | else if (count == 3) | |
1319 | _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters, | |
1320 | fSymbols->fShortQuartersCount); | |
1321 | else | |
1322 | zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount); | |
1323 | break; | |
1324 | ||
1325 | case UDAT_STANDALONE_QUARTER_FIELD: | |
1326 | if (count >= 4) | |
1327 | _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters, | |
1328 | fSymbols->fStandaloneQuartersCount); | |
1329 | else if (count == 3) | |
1330 | _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters, | |
1331 | fSymbols->fStandaloneShortQuartersCount); | |
1332 | else | |
1333 | zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount); | |
1334 | break; | |
1335 | ||
1336 | ||
b75a7d8f A |
1337 | // all of the other pattern symbols can be formatted as simple numbers with |
1338 | // appropriate zero padding | |
1339 | default: | |
b75a7d8f A |
1340 | zeroPaddingNumber(appendTo, value, count, maxIntCount); |
1341 | break; | |
1342 | } | |
1343 | ||
1344 | // if the field we're formatting is the one the FieldPosition says it's interested | |
1345 | // in, fill in the FieldPosition with this field's positions | |
374ca955 A |
1346 | if (pos.getBeginIndex() == pos.getEndIndex() && |
1347 | pos.getField() == fgPatternIndexToDateFormatField[patternCharIndex]) { | |
1348 | pos.setBeginIndex(beginOffset); | |
1349 | pos.setEndIndex(appendTo.length()); | |
b75a7d8f A |
1350 | } |
1351 | } | |
1352 | ||
1353 | //---------------------------------------------------------------------- | |
b75a7d8f A |
1354 | void |
1355 | SimpleDateFormat::zeroPaddingNumber(UnicodeString &appendTo, int32_t value, int32_t minDigits, int32_t maxDigits) const | |
1356 | { | |
46f4442e A |
1357 | if (fNumberFormat!=NULL) { |
1358 | FieldPosition pos(0); | |
b75a7d8f | 1359 | |
46f4442e A |
1360 | fNumberFormat->setMinimumIntegerDigits(minDigits); |
1361 | fNumberFormat->setMaximumIntegerDigits(maxDigits); | |
1362 | fNumberFormat->format(value, appendTo, pos); // 3rd arg is there to speed up processing | |
1363 | } | |
b75a7d8f A |
1364 | } |
1365 | ||
1366 | //---------------------------------------------------------------------- | |
1367 | ||
1368 | /** | |
1369 | * Format characters that indicate numeric fields. The character | |
1370 | * at index 0 is treated specially. | |
1371 | */ | |
46f4442e | 1372 | static const UChar NUMERIC_FORMAT_CHARS[] = {0x4D, 0x59, 0x79, 0x75, 0x64, 0x65, 0x68, 0x48, 0x6D, 0x73, 0x53, 0x44, 0x46, 0x77, 0x57, 0x6B, 0x4B, 0x00}; /* "MYyudehHmsSDFwWkK" */ |
b75a7d8f A |
1373 | |
1374 | /** | |
1375 | * Return true if the given format character, occuring count | |
1376 | * times, represents a numeric field. | |
1377 | */ | |
1378 | UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) { | |
1379 | UnicodeString s(NUMERIC_FORMAT_CHARS); | |
1380 | int32_t i = s.indexOf(formatChar); | |
1381 | return (i > 0 || (i == 0 && count < 3)); | |
1382 | } | |
1383 | ||
1384 | void | |
1385 | SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& parsePos) const | |
1386 | { | |
46f4442e | 1387 | UErrorCode status = U_ZERO_ERROR; |
b75a7d8f A |
1388 | int32_t pos = parsePos.getIndex(); |
1389 | int32_t start = pos; | |
1390 | UBool ambiguousYear[] = { FALSE }; | |
1391 | int32_t count = 0; | |
46f4442e A |
1392 | |
1393 | UBool lenient = isLenient(); | |
b75a7d8f | 1394 | |
46f4442e A |
1395 | // hack, reset tztype, cast away const |
1396 | ((SimpleDateFormat*)this)->tztype = TZTYPE_UNK; | |
73c04bcf | 1397 | |
b75a7d8f A |
1398 | // For parsing abutting numeric fields. 'abutPat' is the |
1399 | // offset into 'pattern' of the first of 2 or more abutting | |
1400 | // numeric fields. 'abutStart' is the offset into 'text' | |
1401 | // where parsing the fields begins. 'abutPass' starts off as 0 | |
1402 | // and increments each time we try to parse the fields. | |
1403 | int32_t abutPat = -1; // If >=0, we are in a run of abutting numeric fields | |
1404 | int32_t abutStart = 0; | |
1405 | int32_t abutPass = 0; | |
1406 | UBool inQuote = FALSE; | |
1407 | ||
1408 | const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS); | |
1409 | ||
1410 | for (int32_t i=0; i<fPattern.length(); ++i) { | |
1411 | UChar ch = fPattern.charAt(i); | |
1412 | ||
1413 | // Handle alphabetic field characters. | |
1414 | if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // [A-Za-z] | |
1415 | int32_t fieldPat = i; | |
1416 | ||
1417 | // Count the length of this field specifier | |
1418 | count = 1; | |
1419 | while ((i+1)<fPattern.length() && | |
1420 | fPattern.charAt(i+1) == ch) { | |
1421 | ++count; | |
1422 | ++i; | |
1423 | } | |
1424 | ||
1425 | if (isNumeric(ch, count)) { | |
1426 | if (abutPat < 0) { | |
1427 | // Determine if there is an abutting numeric field. For | |
1428 | // most fields we can just look at the next characters, | |
1429 | // but the 'm' field is either numeric or text, | |
1430 | // depending on the count, so we have to look ahead for | |
1431 | // that field. | |
1432 | if ((i+1)<fPattern.length()) { | |
1433 | UBool abutting; | |
1434 | UChar nextCh = fPattern.charAt(i+1); | |
1435 | int32_t k = numericFormatChars.indexOf(nextCh); | |
1436 | if (k == 0) { | |
1437 | int32_t j = i+2; | |
1438 | while (j<fPattern.length() && | |
1439 | fPattern.charAt(j) == nextCh) { | |
1440 | ++j; | |
1441 | } | |
1442 | abutting = (j-i) < 4; // nextCount < 3 | |
1443 | } else { | |
1444 | abutting = k > 0; | |
1445 | } | |
1446 | ||
1447 | // Record the start of a set of abutting numeric | |
1448 | // fields. | |
1449 | if (abutting) { | |
1450 | abutPat = fieldPat; | |
1451 | abutStart = pos; | |
1452 | abutPass = 0; | |
1453 | } | |
1454 | } | |
1455 | } | |
1456 | } else { | |
1457 | abutPat = -1; // End of any abutting fields | |
1458 | } | |
1459 | ||
1460 | // Handle fields within a run of abutting numeric fields. Take | |
1461 | // the pattern "HHmmss" as an example. We will try to parse | |
1462 | // 2/2/2 characters of the input text, then if that fails, | |
1463 | // 1/2/2. We only adjust the width of the leftmost field; the | |
1464 | // others remain fixed. This allows "123456" => 12:34:56, but | |
1465 | // "12345" => 1:23:45. Likewise, for the pattern "yyyyMMdd" we | |
1466 | // try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2. | |
1467 | if (abutPat >= 0) { | |
1468 | // If we are at the start of a run of abutting fields, then | |
1469 | // shorten this field in each pass. If we can't shorten | |
1470 | // this field any more, then the parse of this set of | |
1471 | // abutting numeric fields has failed. | |
1472 | if (fieldPat == abutPat) { | |
1473 | count -= abutPass++; | |
1474 | if (count == 0) { | |
1475 | parsePos.setIndex(start); | |
1476 | parsePos.setErrorIndex(pos); | |
1477 | return; | |
1478 | } | |
1479 | } | |
1480 | ||
1481 | pos = subParse(text, pos, ch, count, | |
1482 | TRUE, FALSE, ambiguousYear, cal); | |
1483 | ||
1484 | // If the parse fails anywhere in the run, back up to the | |
1485 | // start of the run and retry. | |
1486 | if (pos < 0) { | |
1487 | i = abutPat - 1; | |
1488 | pos = abutStart; | |
1489 | continue; | |
1490 | } | |
1491 | } | |
1492 | ||
1493 | // Handle non-numeric fields and non-abutting numeric | |
1494 | // fields. | |
1495 | else { | |
46f4442e A |
1496 | int32_t s = subParse(text, pos, ch, count, |
1497 | FALSE, TRUE, ambiguousYear, cal); | |
1498 | ||
1499 | if (s <= 0) { | |
1500 | status = U_PARSE_ERROR; | |
1501 | goto ExitParse; | |
b75a7d8f | 1502 | } |
46f4442e | 1503 | pos = s; |
b75a7d8f A |
1504 | } |
1505 | } | |
1506 | ||
1507 | // Handle literal pattern characters. These are any | |
1508 | // quoted characters and non-alphabetic unquoted | |
1509 | // characters. | |
1510 | else { | |
1511 | ||
1512 | abutPat = -1; // End of any abutting fields | |
46f4442e A |
1513 | |
1514 | if (! matchLiterals(fPattern, i, text, pos, lenient)) { | |
1515 | status = U_PARSE_ERROR; | |
1516 | goto ExitParse; | |
b75a7d8f | 1517 | } |
b75a7d8f A |
1518 | } |
1519 | } | |
1520 | ||
1521 | // At this point the fields of Calendar have been set. Calendar | |
1522 | // will fill in default values for missing fields when the time | |
1523 | // is computed. | |
1524 | ||
1525 | parsePos.setIndex(pos); | |
1526 | ||
1527 | // This part is a problem: When we call parsedDate.after, we compute the time. | |
1528 | // Take the date April 3 2004 at 2:30 am. When this is first set up, the year | |
1529 | // will be wrong if we're parsing a 2-digit year pattern. It will be 1904. | |
1530 | // April 3 1904 is a Sunday (unlike 2004) so it is the DST onset day. 2:30 am | |
1531 | // is therefore an "impossible" time, since the time goes from 1:59 to 3:00 am | |
1532 | // on that day. It is therefore parsed out to fields as 3:30 am. Then we | |
1533 | // add 100 years, and get April 3 2004 at 3:30 am. Note that April 3 2004 is | |
1534 | // a Saturday, so it can have a 2:30 am -- and it should. [LIU] | |
1535 | /* | |
1536 | UDate parsedDate = calendar.getTime(); | |
1537 | if( ambiguousYear[0] && !parsedDate.after(fDefaultCenturyStart) ) { | |
1538 | calendar.add(Calendar.YEAR, 100); | |
1539 | parsedDate = calendar.getTime(); | |
1540 | } | |
1541 | */ | |
1542 | // Because of the above condition, save off the fields in case we need to readjust. | |
1543 | // The procedure we use here is not particularly efficient, but there is no other | |
1544 | // way to do this given the API restrictions present in Calendar. We minimize | |
1545 | // inefficiency by only performing this computation when it might apply, that is, | |
1546 | // when the two-digit year is equal to the start year, and thus might fall at the | |
1547 | // front or the back of the default century. This only works because we adjust | |
1548 | // the year correctly to start with in other cases -- see subParse(). | |
46f4442e | 1549 | if (ambiguousYear[0] || tztype != TZTYPE_UNK) // If this is true then the two-digit year == the default start year |
b75a7d8f A |
1550 | { |
1551 | // We need a copy of the fields, and we need to avoid triggering a call to | |
1552 | // complete(), which will recalculate the fields. Since we can't access | |
1553 | // the fields[] array in Calendar, we clone the entire object. This will | |
1554 | // stop working if Calendar.clone() is ever rewritten to call complete(). | |
46f4442e | 1555 | Calendar *copy; |
73c04bcf | 1556 | if (ambiguousYear[0]) { |
46f4442e A |
1557 | copy = cal.clone(); |
1558 | // Check for failed cloning. | |
1559 | if (copy == NULL) { | |
1560 | status = U_MEMORY_ALLOCATION_ERROR; | |
1561 | goto ExitParse; | |
1562 | } | |
73c04bcf A |
1563 | UDate parsedDate = copy->getTime(status); |
1564 | // {sfb} check internalGetDefaultCenturyStart | |
1565 | if (fHaveDefaultCentury && (parsedDate < fDefaultCenturyStart)) { | |
1566 | // We can't use add here because that does a complete() first. | |
1567 | cal.set(UCAL_YEAR, fDefaultCenturyStartYear + 100); | |
1568 | } | |
46f4442e | 1569 | delete copy; |
b75a7d8f | 1570 | } |
73c04bcf | 1571 | |
46f4442e A |
1572 | if (tztype != TZTYPE_UNK) { |
1573 | copy = cal.clone(); | |
1574 | // Check for failed cloning. | |
1575 | if (copy == NULL) { | |
1576 | status = U_MEMORY_ALLOCATION_ERROR; | |
1577 | goto ExitParse; | |
1578 | } | |
1579 | const TimeZone & tz = cal.getTimeZone(); | |
1580 | BasicTimeZone *btz = NULL; | |
1581 | ||
1582 | if (tz.getDynamicClassID() == OlsonTimeZone::getStaticClassID() | |
1583 | || tz.getDynamicClassID() == SimpleTimeZone::getStaticClassID() | |
1584 | || tz.getDynamicClassID() == RuleBasedTimeZone::getStaticClassID() | |
1585 | || tz.getDynamicClassID() == VTimeZone::getStaticClassID()) { | |
1586 | btz = (BasicTimeZone*)&tz; | |
1587 | } | |
73c04bcf | 1588 | |
46f4442e A |
1589 | // Get local millis |
1590 | copy->set(UCAL_ZONE_OFFSET, 0); | |
1591 | copy->set(UCAL_DST_OFFSET, 0); | |
1592 | UDate localMillis = copy->getTime(status); | |
1593 | ||
1594 | // Make sure parsed time zone type (Standard or Daylight) | |
1595 | // matches the rule used by the parsed time zone. | |
1596 | int32_t raw, dst; | |
1597 | if (btz != NULL) { | |
1598 | if (tztype == TZTYPE_STD) { | |
1599 | btz->getOffsetFromLocal(localMillis, | |
1600 | BasicTimeZone::kStandard, BasicTimeZone::kStandard, raw, dst, status); | |
1601 | } else { | |
1602 | btz->getOffsetFromLocal(localMillis, | |
1603 | BasicTimeZone::kDaylight, BasicTimeZone::kDaylight, raw, dst, status); | |
1604 | } | |
1605 | } else { | |
1606 | // No good way to resolve ambiguous time at transition, | |
1607 | // but following code work in most case. | |
1608 | tz.getOffset(localMillis, TRUE, raw, dst, status); | |
73c04bcf | 1609 | } |
73c04bcf | 1610 | |
46f4442e A |
1611 | // Now, compare the results with parsed type, either standard or daylight saving time |
1612 | int32_t resolvedSavings = dst; | |
1613 | if (tztype == TZTYPE_STD) { | |
1614 | if (dst != 0) { | |
1615 | // Override DST_OFFSET = 0 in the result calendar | |
1616 | resolvedSavings = 0; | |
1617 | } | |
1618 | } else { // tztype == TZTYPE_DST | |
1619 | if (dst == 0) { | |
1620 | if (btz != NULL) { | |
1621 | UDate time = localMillis + raw; | |
1622 | // We use the nearest daylight saving time rule. | |
1623 | TimeZoneTransition beforeTrs, afterTrs; | |
1624 | UDate beforeT = time, afterT = time; | |
1625 | int32_t beforeSav = 0, afterSav = 0; | |
1626 | UBool beforeTrsAvail, afterTrsAvail; | |
1627 | ||
1628 | // Search for DST rule before or on the time | |
1629 | while (TRUE) { | |
1630 | beforeTrsAvail = btz->getPreviousTransition(beforeT, TRUE, beforeTrs); | |
1631 | if (!beforeTrsAvail) { | |
1632 | break; | |
1633 | } | |
1634 | beforeT = beforeTrs.getTime() - 1; | |
1635 | beforeSav = beforeTrs.getFrom()->getDSTSavings(); | |
1636 | if (beforeSav != 0) { | |
1637 | break; | |
1638 | } | |
1639 | } | |
b75a7d8f | 1640 | |
46f4442e A |
1641 | // Search for DST rule after the time |
1642 | while (TRUE) { | |
1643 | afterTrsAvail = btz->getNextTransition(afterT, FALSE, afterTrs); | |
1644 | if (!afterTrsAvail) { | |
1645 | break; | |
1646 | } | |
1647 | afterT = afterTrs.getTime(); | |
1648 | afterSav = afterTrs.getTo()->getDSTSavings(); | |
1649 | if (afterSav != 0) { | |
1650 | break; | |
1651 | } | |
1652 | } | |
1653 | ||
1654 | if (beforeTrsAvail && afterTrsAvail) { | |
1655 | if (time - beforeT > afterT - time) { | |
1656 | resolvedSavings = afterSav; | |
1657 | } else { | |
1658 | resolvedSavings = beforeSav; | |
1659 | } | |
1660 | } else if (beforeTrsAvail && beforeSav != 0) { | |
1661 | resolvedSavings = beforeSav; | |
1662 | } else if (afterTrsAvail && afterSav != 0) { | |
1663 | resolvedSavings = afterSav; | |
1664 | } else { | |
1665 | resolvedSavings = btz->getDSTSavings(); | |
1666 | } | |
1667 | } else { | |
1668 | resolvedSavings = tz.getDSTSavings(); | |
1669 | } | |
1670 | if (resolvedSavings == 0) { | |
1671 | // final fallback | |
1672 | resolvedSavings = U_MILLIS_PER_HOUR; | |
1673 | } | |
1674 | } | |
1675 | } | |
1676 | cal.set(UCAL_ZONE_OFFSET, raw); | |
1677 | cal.set(UCAL_DST_OFFSET, resolvedSavings); | |
1678 | delete copy; | |
1679 | } | |
1680 | } | |
1681 | ExitParse: | |
b75a7d8f A |
1682 | // If any Calendar calls failed, we pretend that we |
1683 | // couldn't parse the string, when in reality this isn't quite accurate-- | |
1684 | // we did parse it; the Calendar calls just failed. | |
1685 | if (U_FAILURE(status)) { | |
1686 | parsePos.setErrorIndex(pos); | |
1687 | parsePos.setIndex(start); | |
1688 | } | |
1689 | } | |
1690 | ||
1691 | UDate | |
1692 | SimpleDateFormat::parse( const UnicodeString& text, | |
1693 | ParsePosition& pos) const { | |
1694 | // redefined here because the other parse() function hides this function's | |
1695 | // cunterpart on DateFormat | |
1696 | return DateFormat::parse(text, pos); | |
1697 | } | |
1698 | ||
1699 | UDate | |
1700 | SimpleDateFormat::parse(const UnicodeString& text, UErrorCode& status) const | |
1701 | { | |
1702 | // redefined here because the other parse() function hides this function's | |
1703 | // counterpart on DateFormat | |
1704 | return DateFormat::parse(text, status); | |
1705 | } | |
1706 | //---------------------------------------------------------------------- | |
1707 | ||
73c04bcf A |
1708 | int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text, |
1709 | int32_t start, | |
1710 | UCalendarDateFields field, | |
1711 | const UnicodeString* data, | |
1712 | int32_t dataCount, | |
1713 | Calendar& cal) const | |
1714 | { | |
1715 | int32_t i = 0; | |
1716 | int32_t count = dataCount; | |
1717 | ||
1718 | // There may be multiple strings in the data[] array which begin with | |
1719 | // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech). | |
1720 | // We keep track of the longest match, and return that. Note that this | |
1721 | // unfortunately requires us to test all array elements. | |
1722 | int32_t bestMatchLength = 0, bestMatch = -1; | |
1723 | ||
1724 | // {sfb} kludge to support case-insensitive comparison | |
1725 | // {markus 2002oct11} do not just use caseCompareBetween because we do not know | |
1726 | // the length of the match after case folding | |
1727 | // {alan 20040607} don't case change the whole string, since the length | |
1728 | // can change | |
1729 | // TODO we need a case-insensitive startsWith function | |
1730 | UnicodeString lcase, lcaseText; | |
1731 | text.extract(start, INT32_MAX, lcaseText); | |
1732 | lcaseText.foldCase(); | |
1733 | ||
1734 | for (; i < count; ++i) | |
1735 | { | |
1736 | // Always compare if we have no match yet; otherwise only compare | |
1737 | // against potentially better matches (longer strings). | |
1738 | ||
1739 | lcase.fastCopyFrom(data[i]).foldCase(); | |
1740 | int32_t length = lcase.length(); | |
1741 | ||
1742 | if (length > bestMatchLength && | |
1743 | lcaseText.compareBetween(0, length, lcase, 0, length) == 0) | |
1744 | { | |
1745 | bestMatch = i; | |
1746 | bestMatchLength = length; | |
1747 | } | |
1748 | } | |
1749 | if (bestMatch >= 0) | |
1750 | { | |
1751 | cal.set(field, bestMatch * 3); | |
1752 | ||
1753 | // Once we have a match, we have to determine the length of the | |
1754 | // original source string. This will usually be == the length of | |
1755 | // the case folded string, but it may differ (e.g. sharp s). | |
1756 | lcase.fastCopyFrom(data[bestMatch]).foldCase(); | |
1757 | ||
1758 | // Most of the time, the length will be the same as the length | |
1759 | // of the string from the locale data. Sometimes it will be | |
1760 | // different, in which case we will have to figure it out by | |
1761 | // adding a character at a time, until we have a match. We do | |
1762 | // this all in one loop, where we try 'len' first (at index | |
1763 | // i==0). | |
1764 | int32_t len = data[bestMatch].length(); // 99+% of the time | |
1765 | int32_t n = text.length() - start; | |
1766 | for (i=0; i<=n; ++i) { | |
1767 | int32_t j=i; | |
1768 | if (i == 0) { | |
1769 | j = len; | |
1770 | } else if (i == len) { | |
1771 | continue; // already tried this when i was 0 | |
1772 | } | |
1773 | text.extract(start, j, lcaseText); | |
1774 | lcaseText.foldCase(); | |
1775 | if (lcase == lcaseText) { | |
1776 | return start + j; | |
1777 | } | |
1778 | } | |
1779 | } | |
1780 | ||
1781 | return -start; | |
1782 | } | |
1783 | ||
46f4442e A |
1784 | //---------------------------------------------------------------------- |
1785 | UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern, | |
1786 | int32_t &patternOffset, | |
1787 | const UnicodeString &text, | |
1788 | int32_t &textOffset, | |
1789 | UBool lenient) | |
1790 | { | |
1791 | UBool inQuote = FALSE; | |
1792 | UnicodeString literal; | |
1793 | int32_t i = patternOffset; | |
1794 | ||
1795 | // scan pattern looking for contiguous literal characters | |
1796 | for ( ; i < pattern.length(); i += 1) { | |
1797 | UChar ch = pattern.charAt(i); | |
1798 | ||
1799 | if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // unquoted [A-Za-z] | |
1800 | break; | |
1801 | } | |
1802 | ||
1803 | if (ch == QUOTE) { | |
1804 | // Match a quote literal ('') inside OR outside of quotes | |
1805 | if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) { | |
1806 | i += 1; | |
1807 | } else { | |
1808 | inQuote = !inQuote; | |
1809 | continue; | |
1810 | } | |
1811 | } | |
1812 | ||
1813 | literal += ch; | |
1814 | } | |
1815 | ||
1816 | // at this point, literal contains the literal text | |
1817 | // and i is the index of the next non-literal pattern character. | |
1818 | int32_t p; | |
1819 | int32_t t = textOffset; | |
1820 | ||
1821 | if (lenient) { | |
1822 | // trim leading, trailing whitespace from | |
1823 | // the literal text | |
1824 | literal.trim(); | |
1825 | ||
1826 | // ignore any leading whitespace in the text | |
1827 | while (t < text.length() && u_isWhitespace(text.charAt(t))) { | |
1828 | t += 1; | |
1829 | } | |
1830 | } | |
1831 | ||
1832 | for (p = 0; p < literal.length() && t < text.length(); p += 1, t += 1) { | |
1833 | UBool needWhitespace = FALSE; | |
1834 | ||
1835 | while (p < literal.length() && uprv_isRuleWhiteSpace(literal.charAt(p))) { | |
1836 | needWhitespace = TRUE; | |
1837 | p += 1; | |
1838 | } | |
1839 | ||
1840 | if (needWhitespace) { | |
1841 | int32_t tStart = t; | |
1842 | ||
1843 | while (t < text.length()) { | |
1844 | UChar tch = text.charAt(t); | |
1845 | ||
1846 | if (!u_isUWhiteSpace(tch) && !uprv_isRuleWhiteSpace(tch)) { | |
1847 | break; | |
1848 | } | |
1849 | ||
1850 | t += 1; | |
1851 | } | |
1852 | ||
1853 | // TODO: should we require internal spaces | |
1854 | // in lenient mode? (There won't be any | |
1855 | // leading or trailing spaces) | |
1856 | if (!lenient && t == tStart) { | |
1857 | // didn't find matching whitespace: | |
1858 | // an error in strict mode | |
1859 | return FALSE; | |
1860 | } | |
1861 | ||
1862 | // In strict mode, this run of whitespace | |
1863 | // may have been at the end. | |
1864 | if (p >= literal.length()) { | |
1865 | break; | |
1866 | } | |
1867 | } | |
1868 | ||
1869 | if (t >= text.length() || literal.charAt(p) != text.charAt(t)) { | |
1870 | // Ran out of text, or found a non-matching character: | |
1871 | // OK in lenient mode, an error in strict mode. | |
1872 | if (lenient) { | |
1873 | break; | |
1874 | } | |
1875 | ||
1876 | return FALSE; | |
1877 | } | |
1878 | } | |
1879 | ||
1880 | // At this point if we're in strict mode we have a complete match. | |
1881 | // If we're in lenient mode we may have a partial match, or no | |
1882 | // match at all. | |
1883 | if (p <= 0) { | |
1884 | // no match. Pretend it matched a run of whitespace | |
1885 | // and ignorables in the text. | |
1886 | const UnicodeSet *ignorables = NULL; | |
1887 | UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), pattern.charAt(i)); | |
1888 | ||
1889 | if (patternCharPtr != NULL) { | |
1890 | UDateFormatField patternCharIndex = (UDateFormatField) (patternCharPtr - DateFormatSymbols::getPatternUChars()); | |
1891 | ||
1892 | ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex); | |
1893 | } | |
1894 | ||
1895 | for (t = textOffset; t < text.length(); t += 1) { | |
1896 | UChar ch = text.charAt(t); | |
1897 | ||
1898 | if (ignorables == NULL || !ignorables->contains(ch)) { | |
1899 | break; | |
1900 | } | |
1901 | } | |
1902 | } | |
1903 | ||
1904 | // if we get here, we've got a complete match. | |
1905 | patternOffset = i - 1; | |
1906 | textOffset = t; | |
1907 | ||
1908 | return TRUE; | |
1909 | } | |
1910 | ||
73c04bcf A |
1911 | //---------------------------------------------------------------------- |
1912 | ||
b75a7d8f A |
1913 | int32_t SimpleDateFormat::matchString(const UnicodeString& text, |
1914 | int32_t start, | |
1915 | UCalendarDateFields field, | |
1916 | const UnicodeString* data, | |
1917 | int32_t dataCount, | |
1918 | Calendar& cal) const | |
1919 | { | |
1920 | int32_t i = 0; | |
1921 | int32_t count = dataCount; | |
1922 | ||
1923 | if (field == UCAL_DAY_OF_WEEK) i = 1; | |
1924 | ||
1925 | // There may be multiple strings in the data[] array which begin with | |
1926 | // the same prefix (e.g., Cerven and Cervenec (June and July) in Czech). | |
1927 | // We keep track of the longest match, and return that. Note that this | |
1928 | // unfortunately requires us to test all array elements. | |
1929 | int32_t bestMatchLength = 0, bestMatch = -1; | |
1930 | ||
1931 | // {sfb} kludge to support case-insensitive comparison | |
1932 | // {markus 2002oct11} do not just use caseCompareBetween because we do not know | |
1933 | // the length of the match after case folding | |
374ca955 A |
1934 | // {alan 20040607} don't case change the whole string, since the length |
1935 | // can change | |
1936 | // TODO we need a case-insensitive startsWith function | |
1937 | UnicodeString lcase, lcaseText; | |
1938 | text.extract(start, INT32_MAX, lcaseText); | |
1939 | lcaseText.foldCase(); | |
b75a7d8f A |
1940 | |
1941 | for (; i < count; ++i) | |
1942 | { | |
b75a7d8f A |
1943 | // Always compare if we have no match yet; otherwise only compare |
1944 | // against potentially better matches (longer strings). | |
1945 | ||
b75a7d8f | 1946 | lcase.fastCopyFrom(data[i]).foldCase(); |
374ca955 | 1947 | int32_t length = lcase.length(); |
b75a7d8f | 1948 | |
374ca955 A |
1949 | if (length > bestMatchLength && |
1950 | lcaseText.compareBetween(0, length, lcase, 0, length) == 0) | |
b75a7d8f A |
1951 | { |
1952 | bestMatch = i; | |
1953 | bestMatchLength = length; | |
1954 | } | |
1955 | } | |
1956 | if (bestMatch >= 0) | |
1957 | { | |
1958 | cal.set(field, bestMatch); | |
374ca955 A |
1959 | |
1960 | // Once we have a match, we have to determine the length of the | |
1961 | // original source string. This will usually be == the length of | |
1962 | // the case folded string, but it may differ (e.g. sharp s). | |
1963 | lcase.fastCopyFrom(data[bestMatch]).foldCase(); | |
1964 | ||
1965 | // Most of the time, the length will be the same as the length | |
1966 | // of the string from the locale data. Sometimes it will be | |
1967 | // different, in which case we will have to figure it out by | |
1968 | // adding a character at a time, until we have a match. We do | |
1969 | // this all in one loop, where we try 'len' first (at index | |
1970 | // i==0). | |
1971 | int32_t len = data[bestMatch].length(); // 99+% of the time | |
1972 | int32_t n = text.length() - start; | |
1973 | for (i=0; i<=n; ++i) { | |
1974 | int32_t j=i; | |
1975 | if (i == 0) { | |
1976 | j = len; | |
1977 | } else if (i == len) { | |
1978 | continue; // already tried this when i was 0 | |
1979 | } | |
1980 | text.extract(start, j, lcaseText); | |
1981 | lcaseText.foldCase(); | |
1982 | if (lcase == lcaseText) { | |
1983 | return start + j; | |
1984 | } | |
1985 | } | |
b75a7d8f A |
1986 | } |
1987 | ||
1988 | return -start; | |
1989 | } | |
1990 | ||
1991 | //---------------------------------------------------------------------- | |
1992 | ||
1993 | void | |
1994 | SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status) | |
1995 | { | |
1996 | parseAmbiguousDatesAsAfter(d, status); | |
1997 | } | |
1998 | ||
1999 | /** | |
2000 | * Private member function that converts the parsed date strings into | |
2001 | * timeFields. Returns -start (for ParsePosition) if failed. | |
2002 | * @param text the time text to be parsed. | |
2003 | * @param start where to start parsing. | |
2004 | * @param ch the pattern character for the date field text to be parsed. | |
2005 | * @param count the count of a pattern character. | |
2006 | * @return the new start position if matching succeeded; a negative number | |
2007 | * indicating matching failure, otherwise. | |
2008 | */ | |
2009 | int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, | |
2010 | UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const | |
2011 | { | |
2012 | Formattable number; | |
2013 | int32_t value = 0; | |
2014 | int32_t i; | |
2015 | ParsePosition pos(0); | |
2016 | int32_t patternCharIndex; | |
2017 | UnicodeString temp; | |
2018 | UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch); | |
46f4442e A |
2019 | UBool lenient = isLenient(); |
2020 | UBool gotNumber = FALSE; | |
b75a7d8f | 2021 | |
374ca955 A |
2022 | #if defined (U_DEBUG_CAL) |
2023 | //fprintf(stderr, "%s:%d - [%c] st=%d \n", __FILE__, __LINE__, (char) ch, start); | |
2024 | #endif | |
2025 | ||
b75a7d8f A |
2026 | if (patternCharPtr == NULL) { |
2027 | return -start; | |
2028 | } | |
2029 | ||
374ca955 | 2030 | patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars()); |
b75a7d8f A |
2031 | |
2032 | UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; | |
2033 | ||
2034 | // If there are any spaces here, skip over them. If we hit the end | |
2035 | // of the string, then fail. | |
2036 | for (;;) { | |
2037 | if (start >= text.length()) { | |
2038 | return -start; | |
2039 | } | |
46f4442e | 2040 | |
b75a7d8f | 2041 | UChar32 c = text.char32At(start); |
46f4442e | 2042 | |
b75a7d8f A |
2043 | if (!u_isUWhiteSpace(c)) { |
2044 | break; | |
2045 | } | |
46f4442e | 2046 | |
b75a7d8f A |
2047 | start += UTF_CHAR_LENGTH(c); |
2048 | } | |
46f4442e | 2049 | |
b75a7d8f A |
2050 | pos.setIndex(start); |
2051 | ||
2052 | // We handle a few special cases here where we need to parse | |
2053 | // a number value. We handle further, more generic cases below. We need | |
2054 | // to handle some of them here because some fields require extra processing on | |
2055 | // the parsed value. | |
374ca955 | 2056 | if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || |
46f4442e | 2057 | patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || |
374ca955 | 2058 | patternCharIndex == UDAT_HOUR1_FIELD || |
46f4442e A |
2059 | patternCharIndex == UDAT_HOUR0_FIELD || |
2060 | patternCharIndex == UDAT_DOW_LOCAL_FIELD || | |
2061 | patternCharIndex == UDAT_STANDALONE_DAY_FIELD || | |
2062 | patternCharIndex == UDAT_MONTH_FIELD || | |
2063 | patternCharIndex == UDAT_STANDALONE_MONTH_FIELD || | |
2064 | patternCharIndex == UDAT_QUARTER_FIELD || | |
2065 | patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD || | |
374ca955 A |
2066 | patternCharIndex == UDAT_YEAR_FIELD || |
2067 | patternCharIndex == UDAT_YEAR_WOY_FIELD || | |
2068 | patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) | |
b75a7d8f | 2069 | { |
374ca955 | 2070 | int32_t parseStart = pos.getIndex(); |
b75a7d8f A |
2071 | // It would be good to unify this with the obeyCount logic below, |
2072 | // but that's going to be difficult. | |
2073 | const UnicodeString* src; | |
73c04bcf | 2074 | |
b75a7d8f A |
2075 | if (obeyCount) { |
2076 | if ((start+count) > text.length()) { | |
2077 | return -start; | |
2078 | } | |
73c04bcf | 2079 | |
b75a7d8f A |
2080 | text.extractBetween(0, start + count, temp); |
2081 | src = &temp; | |
2082 | } else { | |
2083 | src = &text; | |
2084 | } | |
73c04bcf | 2085 | |
b75a7d8f | 2086 | parseInt(*src, number, pos, allowNegative); |
73c04bcf | 2087 | |
46f4442e A |
2088 | if (pos.getIndex() > parseStart) { |
2089 | value = number.getLong(); | |
2090 | gotNumber = TRUE; | |
2091 | ||
2092 | // Check the range of the value | |
2093 | int32_t bias = gFieldRangeBias[patternCharIndex]; | |
2094 | ||
2095 | if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { | |
2096 | return -start; | |
2097 | } | |
2098 | } | |
2099 | ||
b75a7d8f | 2100 | } |
46f4442e A |
2101 | |
2102 | // Make sure that we got a number if | |
2103 | // we want one, and didn't get one | |
2104 | // if we don't want one. | |
2105 | switch (patternCharIndex) { | |
2106 | case UDAT_HOUR_OF_DAY1_FIELD: | |
2107 | case UDAT_HOUR_OF_DAY0_FIELD: | |
2108 | case UDAT_HOUR1_FIELD: | |
2109 | case UDAT_HOUR0_FIELD: | |
2110 | // special range check for hours: | |
2111 | if (value < 0 || value > 24) { | |
2112 | return -start; | |
2113 | } | |
2114 | ||
2115 | // fall through to gotNumber check | |
2116 | ||
2117 | case UDAT_YEAR_FIELD: | |
2118 | case UDAT_YEAR_WOY_FIELD: | |
2119 | case UDAT_FRACTIONAL_SECOND_FIELD: | |
2120 | // these must be a number | |
2121 | if (! gotNumber) { | |
2122 | return -start; | |
2123 | } | |
2124 | ||
2125 | break; | |
2126 | ||
2127 | case UDAT_DOW_LOCAL_FIELD: | |
2128 | case UDAT_STANDALONE_DAY_FIELD: | |
2129 | case UDAT_MONTH_FIELD: | |
2130 | case UDAT_STANDALONE_MONTH_FIELD: | |
2131 | case UDAT_QUARTER_FIELD: | |
2132 | case UDAT_STANDALONE_QUARTER_FIELD: | |
2133 | // in strict mode, these can only | |
2134 | // be a number if count <= 2 | |
2135 | if (!lenient && gotNumber && count > 2) { | |
7393aa2f A |
2136 | // We have a string pattern in strict mode |
2137 | // but the input parsed as a number. Ignore | |
2138 | // the fact that the input parsed as a number | |
2139 | // and try to match it as a string. (Some | |
2140 | // locales have numbers for the month names.) | |
2141 | gotNumber = FALSE; | |
2142 | pos.setIndex(start); | |
46f4442e A |
2143 | } |
2144 | ||
2145 | break; | |
2146 | ||
2147 | default: | |
2148 | // we check the rest of the fields below. | |
2149 | break; | |
2150 | } | |
2151 | ||
b75a7d8f | 2152 | switch (patternCharIndex) { |
374ca955 | 2153 | case UDAT_ERA_FIELD: |
46f4442e A |
2154 | if (count == 5) { |
2155 | return matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, cal); | |
2156 | } | |
73c04bcf A |
2157 | if (count == 4) { |
2158 | return matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, cal); | |
2159 | } | |
2160 | ||
b75a7d8f | 2161 | return matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, cal); |
73c04bcf | 2162 | |
374ca955 | 2163 | case UDAT_YEAR_FIELD: |
b75a7d8f A |
2164 | // If there are 3 or more YEAR pattern characters, this indicates |
2165 | // that the year value is to be treated literally, without any | |
2166 | // two-digit year adjustments (e.g., from "01" to 2001). Otherwise | |
2167 | // we made adjustments to place the 2-digit year in the proper | |
2168 | // century, for parsed strings from "00" to "99". Any other string | |
2169 | // is treated literally: "2250", "-1", "1", "002". | |
46f4442e | 2170 | if ((pos.getIndex() - start) == 2 |
b75a7d8f A |
2171 | && u_isdigit(text.charAt(start)) |
2172 | && u_isdigit(text.charAt(start+1))) | |
2173 | { | |
2174 | // Assume for example that the defaultCenturyStart is 6/18/1903. | |
2175 | // This means that two-digit years will be forced into the range | |
2176 | // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02 | |
2177 | // correspond to 2000, 2001, and 2002. Years 04, 05, etc. correspond | |
2178 | // to 1904, 1905, etc. If the year is 03, then it is 2003 if the | |
2179 | // other fields specify a date before 6/18, or 1903 if they specify a | |
2180 | // date afterwards. As a result, 03 is an ambiguous year. All other | |
2181 | // two-digit years are unambiguous. | |
2182 | if(fHaveDefaultCentury) { // check if this formatter even has a pivot year | |
2183 | int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100; | |
2184 | ambiguousYear[0] = (value == ambiguousTwoDigitYear); | |
2185 | value += (fDefaultCenturyStartYear/100)*100 + | |
2186 | (value < ambiguousTwoDigitYear ? 100 : 0); | |
2187 | } | |
2188 | } | |
2189 | cal.set(UCAL_YEAR, value); | |
2190 | return pos.getIndex(); | |
73c04bcf | 2191 | |
374ca955 A |
2192 | case UDAT_YEAR_WOY_FIELD: |
2193 | // Comment is the same as for UDAT_Year_FIELDs - look above | |
46f4442e | 2194 | if ((pos.getIndex() - start) == 2 |
b75a7d8f A |
2195 | && u_isdigit(text.charAt(start)) |
2196 | && u_isdigit(text.charAt(start+1)) | |
2197 | && fHaveDefaultCentury ) | |
2198 | { | |
2199 | int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100; | |
2200 | ambiguousYear[0] = (value == ambiguousTwoDigitYear); | |
2201 | value += (fDefaultCenturyStartYear/100)*100 + | |
2202 | (value < ambiguousTwoDigitYear ? 100 : 0); | |
2203 | } | |
2204 | cal.set(UCAL_YEAR_WOY, value); | |
2205 | return pos.getIndex(); | |
73c04bcf | 2206 | |
374ca955 | 2207 | case UDAT_MONTH_FIELD: |
46f4442e | 2208 | if (gotNumber) // i.e., M or MM. |
b75a7d8f A |
2209 | { |
2210 | // Don't want to parse the month if it is a string | |
2211 | // while pattern uses numeric style: M or MM. | |
2212 | // [We computed 'value' above.] | |
2213 | cal.set(UCAL_MONTH, value - 1); | |
2214 | return pos.getIndex(); | |
73c04bcf | 2215 | } else { |
b75a7d8f A |
2216 | // count >= 3 // i.e., MMM or MMMM |
2217 | // Want to be able to parse both short and long forms. | |
2218 | // Try count == 4 first: | |
2219 | int32_t newStart = 0; | |
73c04bcf | 2220 | |
b75a7d8f A |
2221 | if ((newStart = matchString(text, start, UCAL_MONTH, |
2222 | fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0) | |
2223 | return newStart; | |
2224 | else // count == 4 failed, now try count == 3 | |
2225 | return matchString(text, start, UCAL_MONTH, | |
2226 | fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal); | |
2227 | } | |
73c04bcf A |
2228 | |
2229 | case UDAT_STANDALONE_MONTH_FIELD: | |
46f4442e | 2230 | if (gotNumber) // i.e., L or LL. |
73c04bcf A |
2231 | { |
2232 | // Don't want to parse the month if it is a string | |
2233 | // while pattern uses numeric style: M or MM. | |
2234 | // [We computed 'value' above.] | |
2235 | cal.set(UCAL_MONTH, value - 1); | |
2236 | return pos.getIndex(); | |
2237 | } else { | |
2238 | // count >= 3 // i.e., LLL or LLLL | |
2239 | // Want to be able to parse both short and long forms. | |
2240 | // Try count == 4 first: | |
2241 | int32_t newStart = 0; | |
2242 | ||
2243 | if ((newStart = matchString(text, start, UCAL_MONTH, | |
2244 | fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0) | |
2245 | return newStart; | |
2246 | else // count == 4 failed, now try count == 3 | |
2247 | return matchString(text, start, UCAL_MONTH, | |
2248 | fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal); | |
2249 | } | |
2250 | ||
374ca955 | 2251 | case UDAT_HOUR_OF_DAY1_FIELD: |
b75a7d8f A |
2252 | // [We computed 'value' above.] |
2253 | if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1) | |
2254 | value = 0; | |
46f4442e A |
2255 | |
2256 | // fall through to set field | |
2257 | ||
2258 | case UDAT_HOUR_OF_DAY0_FIELD: | |
b75a7d8f A |
2259 | cal.set(UCAL_HOUR_OF_DAY, value); |
2260 | return pos.getIndex(); | |
73c04bcf | 2261 | |
374ca955 A |
2262 | case UDAT_FRACTIONAL_SECOND_FIELD: |
2263 | // Fractional seconds left-justify | |
2264 | i = pos.getIndex() - start; | |
2265 | if (i < 3) { | |
2266 | while (i < 3) { | |
2267 | value *= 10; | |
2268 | i++; | |
2269 | } | |
2270 | } else { | |
2271 | int32_t a = 1; | |
2272 | while (i > 3) { | |
2273 | a *= 10; | |
2274 | i--; | |
2275 | } | |
2276 | value = (value + (a>>1)) / a; | |
2277 | } | |
2278 | cal.set(UCAL_MILLISECOND, value); | |
2279 | return pos.getIndex(); | |
73c04bcf | 2280 | |
46f4442e A |
2281 | case UDAT_DOW_LOCAL_FIELD: |
2282 | if (gotNumber) // i.e., e or ee | |
2283 | { | |
2284 | // [We computed 'value' above.] | |
2285 | cal.set(UCAL_DOW_LOCAL, value); | |
2286 | return pos.getIndex(); | |
2287 | } | |
2288 | // else for eee-eeeee fall through to handling of EEE-EEEEE | |
2289 | // fall through, do not break here | |
374ca955 | 2290 | case UDAT_DAY_OF_WEEK_FIELD: |
b75a7d8f A |
2291 | { |
2292 | // Want to be able to parse both short and long forms. | |
46f4442e | 2293 | // Try count == 4 (EEEE) first: |
b75a7d8f A |
2294 | int32_t newStart = 0; |
2295 | if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, | |
2296 | fSymbols->fWeekdays, fSymbols->fWeekdaysCount, cal)) > 0) | |
2297 | return newStart; | |
46f4442e A |
2298 | // EEEE failed, now try EEE |
2299 | else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, | |
2300 | fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0) | |
2301 | return newStart; | |
2302 | // EEE failed, now try EEEEE | |
2303 | else | |
b75a7d8f | 2304 | return matchString(text, start, UCAL_DAY_OF_WEEK, |
46f4442e | 2305 | fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal); |
b75a7d8f | 2306 | } |
73c04bcf A |
2307 | |
2308 | case UDAT_STANDALONE_DAY_FIELD: | |
2309 | { | |
46f4442e A |
2310 | if (gotNumber) // c or cc |
2311 | { | |
2312 | // [We computed 'value' above.] | |
2313 | cal.set(UCAL_DOW_LOCAL, value); | |
2314 | return pos.getIndex(); | |
2315 | } | |
73c04bcf | 2316 | // Want to be able to parse both short and long forms. |
46f4442e | 2317 | // Try count == 4 (cccc) first: |
73c04bcf A |
2318 | int32_t newStart = 0; |
2319 | if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, | |
2320 | fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0) | |
2321 | return newStart; | |
46f4442e | 2322 | else // cccc failed, now try ccc |
73c04bcf A |
2323 | return matchString(text, start, UCAL_DAY_OF_WEEK, |
2324 | fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal); | |
2325 | } | |
2326 | ||
374ca955 | 2327 | case UDAT_AM_PM_FIELD: |
b75a7d8f | 2328 | return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal); |
73c04bcf | 2329 | |
374ca955 | 2330 | case UDAT_HOUR1_FIELD: |
b75a7d8f A |
2331 | // [We computed 'value' above.] |
2332 | if (value == cal.getLeastMaximum(UCAL_HOUR)+1) | |
2333 | value = 0; | |
46f4442e A |
2334 | |
2335 | // fall through to set field | |
2336 | ||
2337 | case UDAT_HOUR0_FIELD: | |
b75a7d8f A |
2338 | cal.set(UCAL_HOUR, value); |
2339 | return pos.getIndex(); | |
73c04bcf A |
2340 | |
2341 | case UDAT_QUARTER_FIELD: | |
46f4442e | 2342 | if (gotNumber) // i.e., Q or QQ. |
73c04bcf A |
2343 | { |
2344 | // Don't want to parse the month if it is a string | |
2345 | // while pattern uses numeric style: Q or QQ. | |
2346 | // [We computed 'value' above.] | |
2347 | cal.set(UCAL_MONTH, (value - 1) * 3); | |
2348 | return pos.getIndex(); | |
2349 | } else { | |
2350 | // count >= 3 // i.e., QQQ or QQQQ | |
2351 | // Want to be able to parse both short and long forms. | |
2352 | // Try count == 4 first: | |
2353 | int32_t newStart = 0; | |
2354 | ||
2355 | if ((newStart = matchQuarterString(text, start, UCAL_MONTH, | |
2356 | fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0) | |
2357 | return newStart; | |
2358 | else // count == 4 failed, now try count == 3 | |
2359 | return matchQuarterString(text, start, UCAL_MONTH, | |
2360 | fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal); | |
2361 | } | |
2362 | ||
2363 | case UDAT_STANDALONE_QUARTER_FIELD: | |
46f4442e | 2364 | if (gotNumber) // i.e., q or qq. |
73c04bcf A |
2365 | { |
2366 | // Don't want to parse the month if it is a string | |
2367 | // while pattern uses numeric style: q or q. | |
2368 | // [We computed 'value' above.] | |
2369 | cal.set(UCAL_MONTH, (value - 1) * 3); | |
2370 | return pos.getIndex(); | |
2371 | } else { | |
2372 | // count >= 3 // i.e., qqq or qqqq | |
2373 | // Want to be able to parse both short and long forms. | |
2374 | // Try count == 4 first: | |
2375 | int32_t newStart = 0; | |
2376 | ||
2377 | if ((newStart = matchQuarterString(text, start, UCAL_MONTH, | |
2378 | fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0) | |
2379 | return newStart; | |
2380 | else // count == 4 failed, now try count == 3 | |
2381 | return matchQuarterString(text, start, UCAL_MONTH, | |
2382 | fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal); | |
2383 | } | |
2384 | ||
374ca955 A |
2385 | case UDAT_TIMEZONE_FIELD: |
2386 | case UDAT_TIMEZONE_RFC_FIELD: | |
73c04bcf | 2387 | case UDAT_TIMEZONE_GENERIC_FIELD: |
46f4442e | 2388 | case UDAT_TIMEZONE_SPECIAL_FIELD: |
b75a7d8f | 2389 | { |
46f4442e A |
2390 | int32_t offset = 0; |
2391 | UBool parsed = FALSE; | |
2392 | ||
2393 | // Step 1 | |
2394 | // Check if this is a long GMT offset string (either localized or default) | |
2395 | offset = parseGMT(text, pos); | |
2396 | if (pos.getIndex() - start > 0) { | |
2397 | parsed = TRUE; | |
2398 | } | |
2399 | if (!parsed) { | |
2400 | // Step 2 | |
2401 | // Check if this is an RFC822 time zone offset. | |
2402 | // ICU supports the standard RFC822 format [+|-]HHmm | |
2403 | // and its extended form [+|-]HHmmSS. | |
2404 | do { | |
2405 | int32_t sign = 0; | |
2406 | UChar signChar = text.charAt(start); | |
2407 | if (signChar == (UChar)0x002B /* '+' */) { | |
2408 | sign = 1; | |
2409 | } else if (signChar == (UChar)0x002D /* '-' */) { | |
2410 | sign = -1; | |
2411 | } else { | |
2412 | // Not an RFC822 offset string | |
2413 | break; | |
2414 | } | |
b75a7d8f | 2415 | |
46f4442e A |
2416 | // Parse digits |
2417 | int32_t orgPos = start + 1; | |
2418 | pos.setIndex(orgPos); | |
2419 | parseInt(text, number, 6, pos, FALSE); | |
2420 | int32_t numLen = pos.getIndex() - orgPos; | |
2421 | if (numLen <= 0) { | |
2422 | break; | |
2423 | } | |
b75a7d8f | 2424 | |
46f4442e A |
2425 | // Followings are possible format (excluding sign char) |
2426 | // HHmmSS | |
2427 | // HmmSS | |
2428 | // HHmm | |
2429 | // Hmm | |
2430 | // HH | |
2431 | // H | |
2432 | int32_t val = number.getLong(); | |
2433 | int32_t hour = 0, min = 0, sec = 0; | |
2434 | switch(numLen) { | |
2435 | case 1: // H | |
2436 | case 2: // HH | |
2437 | hour = val; | |
2438 | break; | |
2439 | case 3: // Hmm | |
2440 | case 4: // HHmm | |
2441 | hour = val / 100; | |
2442 | min = val % 100; | |
2443 | break; | |
2444 | case 5: // Hmmss | |
2445 | case 6: // HHmmss | |
2446 | hour = val / 10000; | |
2447 | min = (val % 10000) / 100; | |
2448 | sec = val % 100; | |
2449 | break; | |
2450 | } | |
2451 | if (hour > 23 || min > 59 || sec > 59) { | |
2452 | // Invalid value range | |
2453 | break; | |
2454 | } | |
2455 | offset = (((hour * 60) + min) * 60 + sec) * 1000 * sign; | |
2456 | parsed = TRUE; | |
2457 | } while (FALSE); | |
b75a7d8f | 2458 | |
46f4442e A |
2459 | if (!parsed) { |
2460 | // Failed to parse. Reset the position. | |
2461 | pos.setIndex(start); | |
2462 | } | |
b75a7d8f | 2463 | } |
46f4442e A |
2464 | |
2465 | if (parsed) { | |
2466 | // offset was successfully parsed as either a long GMT string or RFC822 zone offset | |
2467 | // string. Create normalized zone ID for the offset. | |
2468 | ||
2469 | UnicodeString tzID(gGmt); | |
2470 | formatRFC822TZ(tzID, offset); | |
2471 | //TimeZone *customTZ = TimeZone::createTimeZone(tzID); | |
2472 | TimeZone *customTZ = new SimpleTimeZone(offset, tzID); // faster than TimeZone::createTimeZone | |
2473 | cal.adoptTimeZone(customTZ); | |
2474 | ||
2475 | return pos.getIndex(); | |
b75a7d8f A |
2476 | } |
2477 | ||
46f4442e | 2478 | // Step 3 |
b75a7d8f A |
2479 | // At this point, check for named time zones by looking through |
2480 | // the locale data from the DateFormatZoneData strings. | |
2481 | // Want to be able to parse both short and long forms. | |
46f4442e A |
2482 | // optimize for calendar's current time zone |
2483 | const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat(); | |
2484 | if (zsf) { | |
2485 | UErrorCode status = U_ZERO_ERROR; | |
2486 | const ZoneStringInfo *zsinfo = NULL; | |
2487 | int32_t matchLen; | |
2488 | ||
2489 | switch (patternCharIndex) { | |
2490 | case UDAT_TIMEZONE_FIELD: // 'z' | |
2491 | if (count < 4) { | |
2492 | zsinfo = zsf->findSpecificShort(text, start, matchLen, status); | |
2493 | } else { | |
2494 | zsinfo = zsf->findSpecificLong(text, start, matchLen, status); | |
2495 | } | |
2496 | break; | |
2497 | case UDAT_TIMEZONE_GENERIC_FIELD: // 'v' | |
2498 | if (count == 1) { | |
2499 | zsinfo = zsf->findGenericShort(text, start, matchLen, status); | |
2500 | } else if (count == 4) { | |
2501 | zsinfo = zsf->findGenericLong(text, start, matchLen, status); | |
2502 | } | |
2503 | break; | |
2504 | case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V' | |
2505 | if (count == 1) { | |
2506 | zsinfo = zsf->findSpecificShort(text, start, matchLen, status); | |
2507 | } else if (count == 4) { | |
2508 | zsinfo = zsf->findGenericLocation(text, start, matchLen, status); | |
2509 | } | |
2510 | break; | |
2511 | } | |
b75a7d8f | 2512 | |
46f4442e A |
2513 | if (U_SUCCESS(status) && zsinfo != NULL) { |
2514 | if (zsinfo->isStandard()) { | |
2515 | ((SimpleDateFormat*)this)->tztype = TZTYPE_STD; | |
2516 | } else if (zsinfo->isDaylight()) { | |
2517 | ((SimpleDateFormat*)this)->tztype = TZTYPE_DST; | |
2518 | } | |
2519 | UnicodeString tzid; | |
2520 | zsinfo->getID(tzid); | |
2521 | ||
2522 | UnicodeString current; | |
2523 | cal.getTimeZone().getID(current); | |
2524 | if (tzid != current) { | |
2525 | TimeZone *tz = TimeZone::createTimeZone(tzid); | |
2526 | cal.adoptTimeZone(tz); | |
2527 | } | |
2528 | return start + matchLen; | |
2529 | } | |
b75a7d8f | 2530 | } |
46f4442e A |
2531 | // complete failure |
2532 | return -start; | |
b75a7d8f | 2533 | } |
73c04bcf | 2534 | |
b75a7d8f | 2535 | default: |
b75a7d8f | 2536 | // Handle "generic" fields |
374ca955 | 2537 | int32_t parseStart = pos.getIndex(); |
b75a7d8f A |
2538 | const UnicodeString* src; |
2539 | if (obeyCount) { | |
2540 | if ((start+count) > text.length()) { | |
2541 | return -start; | |
2542 | } | |
2543 | text.extractBetween(0, start + count, temp); | |
2544 | src = &temp; | |
2545 | } else { | |
2546 | src = &text; | |
2547 | } | |
2548 | parseInt(*src, number, pos, allowNegative); | |
2549 | if (pos.getIndex() != parseStart) { | |
46f4442e A |
2550 | int32_t value = number.getLong(); |
2551 | ||
2552 | // Check the range of the value | |
2553 | int32_t bias = gFieldRangeBias[patternCharIndex]; | |
2554 | ||
2555 | if (bias < 0 || (value >= cal.getMinimum(field) + bias && value <= cal.getMaximum(field) + bias)) { | |
2556 | cal.set(field, value); | |
2557 | return pos.getIndex(); | |
2558 | } | |
b75a7d8f | 2559 | } |
46f4442e | 2560 | |
b75a7d8f A |
2561 | return -start; |
2562 | } | |
2563 | } | |
2564 | ||
2565 | /** | |
2566 | * Parse an integer using fNumberFormat. This method is semantically | |
2567 | * const, but actually may modify fNumberFormat. | |
2568 | */ | |
2569 | void SimpleDateFormat::parseInt(const UnicodeString& text, | |
2570 | Formattable& number, | |
2571 | ParsePosition& pos, | |
2572 | UBool allowNegative) const { | |
46f4442e A |
2573 | parseInt(text, number, -1, pos, allowNegative); |
2574 | } | |
2575 | ||
2576 | /** | |
2577 | * Parse an integer using fNumberFormat up to maxDigits. | |
2578 | */ | |
2579 | void SimpleDateFormat::parseInt(const UnicodeString& text, | |
2580 | Formattable& number, | |
2581 | int32_t maxDigits, | |
2582 | ParsePosition& pos, | |
2583 | UBool allowNegative) const { | |
b75a7d8f A |
2584 | UnicodeString oldPrefix; |
2585 | DecimalFormat* df = NULL; | |
2586 | if (!allowNegative && | |
2587 | fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) { | |
2588 | df = (DecimalFormat*)fNumberFormat; | |
2589 | df->getNegativePrefix(oldPrefix); | |
2590 | df->setNegativePrefix(SUPPRESS_NEGATIVE_PREFIX); | |
2591 | } | |
46f4442e | 2592 | int32_t oldPos = pos.getIndex(); |
b75a7d8f A |
2593 | fNumberFormat->parse(text, number, pos); |
2594 | if (df != NULL) { | |
2595 | df->setNegativePrefix(oldPrefix); | |
2596 | } | |
46f4442e A |
2597 | |
2598 | if (maxDigits > 0) { | |
2599 | // adjust the result to fit into | |
2600 | // the maxDigits and move the position back | |
2601 | int32_t nDigits = pos.getIndex() - oldPos; | |
2602 | if (nDigits > maxDigits) { | |
2603 | int32_t val = number.getLong(); | |
2604 | nDigits -= maxDigits; | |
2605 | while (nDigits > 0) { | |
2606 | val /= 10; | |
2607 | nDigits--; | |
2608 | } | |
2609 | pos.setIndex(oldPos + maxDigits); | |
2610 | number.setLong(val); | |
2611 | } | |
2612 | } | |
b75a7d8f A |
2613 | } |
2614 | ||
2615 | //---------------------------------------------------------------------- | |
2616 | ||
2617 | void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern, | |
2618 | UnicodeString& translatedPattern, | |
2619 | const UnicodeString& from, | |
2620 | const UnicodeString& to, | |
2621 | UErrorCode& status) | |
2622 | { | |
2623 | // run through the pattern and convert any pattern symbols from the version | |
2624 | // in "from" to the corresponding character ion "to". This code takes | |
2625 | // quoted strings into account (it doesn't try to translate them), and it signals | |
2626 | // an error if a particular "pattern character" doesn't appear in "from". | |
2627 | // Depending on the values of "from" and "to" this can convert from generic | |
2628 | // to localized patterns or localized to generic. | |
2629 | if (U_FAILURE(status)) | |
2630 | return; | |
2631 | ||
2632 | translatedPattern.remove(); | |
2633 | UBool inQuote = FALSE; | |
2634 | for (int32_t i = 0; i < originalPattern.length(); ++i) { | |
2635 | UChar c = originalPattern[i]; | |
2636 | if (inQuote) { | |
2637 | if (c == QUOTE) | |
2638 | inQuote = FALSE; | |
2639 | } | |
2640 | else { | |
2641 | if (c == QUOTE) | |
2642 | inQuote = TRUE; | |
2643 | else if ((c >= 0x0061 /*'a'*/ && c <= 0x007A) /*'z'*/ | |
2644 | || (c >= 0x0041 /*'A'*/ && c <= 0x005A /*'Z'*/)) { | |
2645 | int32_t ci = from.indexOf(c); | |
2646 | if (ci == -1) { | |
2647 | status = U_INVALID_FORMAT_ERROR; | |
2648 | return; | |
2649 | } | |
2650 | c = to[ci]; | |
2651 | } | |
2652 | } | |
2653 | translatedPattern += c; | |
2654 | } | |
2655 | if (inQuote) { | |
2656 | status = U_INVALID_FORMAT_ERROR; | |
2657 | return; | |
2658 | } | |
2659 | } | |
2660 | ||
2661 | //---------------------------------------------------------------------- | |
2662 | ||
2663 | UnicodeString& | |
2664 | SimpleDateFormat::toPattern(UnicodeString& result) const | |
2665 | { | |
2666 | result = fPattern; | |
2667 | return result; | |
2668 | } | |
2669 | ||
2670 | //---------------------------------------------------------------------- | |
2671 | ||
2672 | UnicodeString& | |
2673 | SimpleDateFormat::toLocalizedPattern(UnicodeString& result, | |
2674 | UErrorCode& status) const | |
2675 | { | |
2676 | translatePattern(fPattern, result, DateFormatSymbols::getPatternUChars(), fSymbols->fLocalPatternChars, status); | |
2677 | return result; | |
2678 | } | |
2679 | ||
2680 | //---------------------------------------------------------------------- | |
2681 | ||
2682 | void | |
2683 | SimpleDateFormat::applyPattern(const UnicodeString& pattern) | |
2684 | { | |
2685 | fPattern = pattern; | |
2686 | } | |
2687 | ||
2688 | //---------------------------------------------------------------------- | |
2689 | ||
2690 | void | |
2691 | SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern, | |
2692 | UErrorCode &status) | |
2693 | { | |
2694 | translatePattern(pattern, fPattern, fSymbols->fLocalPatternChars, DateFormatSymbols::getPatternUChars(), status); | |
2695 | } | |
2696 | ||
2697 | //---------------------------------------------------------------------- | |
2698 | ||
2699 | const DateFormatSymbols* | |
2700 | SimpleDateFormat::getDateFormatSymbols() const | |
2701 | { | |
2702 | return fSymbols; | |
2703 | } | |
2704 | ||
2705 | //---------------------------------------------------------------------- | |
2706 | ||
2707 | void | |
2708 | SimpleDateFormat::adoptDateFormatSymbols(DateFormatSymbols* newFormatSymbols) | |
2709 | { | |
2710 | delete fSymbols; | |
2711 | fSymbols = newFormatSymbols; | |
2712 | } | |
2713 | ||
2714 | //---------------------------------------------------------------------- | |
2715 | void | |
2716 | SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols) | |
2717 | { | |
2718 | delete fSymbols; | |
2719 | fSymbols = new DateFormatSymbols(newFormatSymbols); | |
2720 | } | |
2721 | ||
2722 | ||
2723 | //---------------------------------------------------------------------- | |
2724 | ||
2725 | ||
2726 | void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt) | |
2727 | { | |
2728 | UErrorCode status = U_ZERO_ERROR; | |
2729 | DateFormat::adoptCalendar(calendarToAdopt); | |
2730 | delete fSymbols; | |
2731 | fSymbols=NULL; | |
2732 | initializeSymbols(fLocale, fCalendar, status); // we need new symbols | |
2733 | initializeDefaultCentury(); // we need a new century (possibly) | |
2734 | } | |
2735 | ||
46f4442e A |
2736 | |
2737 | //---------------------------------------------------------------------- | |
2738 | ||
2739 | ||
2740 | UBool | |
2741 | SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const { | |
2742 | return isFieldUnitIgnored(fPattern, field); | |
2743 | } | |
2744 | ||
2745 | ||
2746 | UBool | |
2747 | SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern, | |
2748 | UCalendarDateFields field) { | |
2749 | int32_t fieldLevel = fgCalendarFieldToLevel[field]; | |
2750 | int32_t level; | |
2751 | UChar ch; | |
2752 | UBool inQuote = FALSE; | |
2753 | UChar prevCh = 0; | |
2754 | int32_t count = 0; | |
2755 | ||
2756 | for (int32_t i = 0; i < pattern.length(); ++i) { | |
2757 | ch = pattern[i]; | |
2758 | if (ch != prevCh && count > 0) { | |
2759 | level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE]; | |
2760 | // the larger the level, the smaller the field unit. | |
2761 | if ( fieldLevel <= level ) { | |
2762 | return FALSE; | |
2763 | } | |
2764 | count = 0; | |
2765 | } | |
2766 | if (ch == QUOTE) { | |
2767 | if ((i+1) < pattern.length() && pattern[i+1] == QUOTE) { | |
2768 | ++i; | |
2769 | } else { | |
2770 | inQuote = ! inQuote; | |
2771 | } | |
2772 | } | |
2773 | else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/) | |
2774 | || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) { | |
2775 | prevCh = ch; | |
2776 | ++count; | |
2777 | } | |
2778 | } | |
2779 | if ( count > 0 ) { | |
2780 | // last item | |
2781 | level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE]; | |
2782 | if ( fieldLevel <= level ) { | |
2783 | return FALSE; | |
2784 | } | |
2785 | } | |
2786 | return TRUE; | |
2787 | } | |
2788 | ||
2789 | ||
2790 | ||
2791 | const Locale& | |
2792 | SimpleDateFormat::getSmpFmtLocale(void) const { | |
2793 | return fLocale; | |
2794 | } | |
2795 | ||
2796 | ||
2797 | ||
b75a7d8f A |
2798 | U_NAMESPACE_END |
2799 | ||
2800 | #endif /* #if !UCONFIG_NO_FORMATTING */ | |
2801 | ||
2802 | //eof |