X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/46f4442e9a5a4f3b98b7c1083586332f6a8a99a4..c5116b9f5a666b9d59f443b3770acd6ef64dc6c3:/icuSources/i18n/smpdtfmt.cpp diff --git a/icuSources/i18n/smpdtfmt.cpp b/icuSources/i18n/smpdtfmt.cpp index 7f98827f..56c32c25 100644 --- a/icuSources/i18n/smpdtfmt.cpp +++ b/icuSources/i18n/smpdtfmt.cpp @@ -1,6 +1,8 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* -* Copyright (C) 1997-2009, International Business Machines Corporation and * +* Copyright (C) 1997-2016, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* * @@ -17,7 +19,7 @@ * Removed getZoneIndex (added in DateFormatSymbols) * Removed subParseLong * Removed chk -* 02/22/99 stephen Removed character literals for EBCDIC safety +* 02/22/99 stephen Removed character literals for EBCDIC safety * 10/14/99 aliu Updated 2-digit year parsing so that only "00" thru * "99" are recognized. {j28 4182066} * 11/15/99 weiv Added support for week of year/day of week format @@ -29,7 +31,6 @@ #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING - #include "unicode/smpdtfmt.h" #include "unicode/dtfmtsym.h" #include "unicode/ures.h" @@ -43,21 +44,41 @@ #include "unicode/uniset.h" #include "unicode/ustring.h" #include "unicode/basictz.h" +#include "unicode/simpleformatter.h" #include "unicode/simpletz.h" #include "unicode/rbtz.h" +#include "unicode/tzfmt.h" +#include "unicode/ucasemap.h" +#include "unicode/utf16.h" #include "unicode/vtzone.h" +#include "unicode/udisplaycontext.h" +#include "unicode/brkiter.h" +#include "unicode/rbnf.h" +#include "unicode/dtptngen.h" +#include "uresimp.h" #include "olsontz.h" -#include "util.h" -#include "gregoimp.h" +#include "patternprops.h" +#include "fphdlimp.h" +#include "hebrwcal.h" #include "cstring.h" #include "uassert.h" -#include "zstrfmt.h" #include "cmemory.h" #include "umutex.h" -#include "smpdtfst.h" #include <float.h> - -#if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL) +#include "smpdtfst.h" +#include "sharednumberformat.h" +#include "ucasemap_imp.h" +#include "ustr_imp.h" +#include "charstr.h" +#include "uvector.h" +#include "cstr.h" +#include "dayperiodrules.h" +#include "tznames_impl.h" // ZONE_NAME_U16_MAX +#include "number_utypes.h" + +#define DEBUG_SYNTHETIC_TIMEFMTS 0 + +#if defined( U_DEBUG_CALSVC ) || defined (U_DEBUG_CAL) || DEBUG_SYNTHETIC_TIMEFMTS #include <stdio.h> #endif @@ -67,31 +88,76 @@ U_NAMESPACE_BEGIN -static const UChar PATTERN_CHAR_BASE = 0x40; - /** * Last-resort string to use for "GMT" when constructing time zone strings. */ // For time zones that have no names, use strings GMT+minutes and // GMT-minutes. For instance, in France the time zone is GMT+60. // Also accepted are GMT+H:MM or GMT-H:MM. -static const UChar gGmt[] = {0x0047, 0x004D, 0x0054, 0x0000}; // "GMT" -static const UChar gGmtPlus[] = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+" -static const UChar gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-" -static const UChar gDefGmtPat[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */ -static const UChar gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */ -static const UChar gDefGmtNegHmPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */ -static const UChar gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */ -static const UChar gDefGmtPosHmPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */ +// Currently not being used +//static const UChar gGmt[] = {0x0047, 0x004D, 0x0054, 0x0000}; // "GMT" +//static const UChar gGmtPlus[] = {0x0047, 0x004D, 0x0054, 0x002B, 0x0000}; // "GMT+" +//static const UChar gGmtMinus[] = {0x0047, 0x004D, 0x0054, 0x002D, 0x0000}; // "GMT-" +//static const UChar gDefGmtPat[] = {0x0047, 0x004D, 0x0054, 0x007B, 0x0030, 0x007D, 0x0000}; /* GMT{0} */ +//static const UChar gDefGmtNegHmsPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* -HH:mm:ss */ +//static const UChar gDefGmtNegHmPat[] = {0x002D, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* -HH:mm */ +//static const UChar gDefGmtPosHmsPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x003A, 0x0073, 0x0073, 0x0000}; /* +HH:mm:ss */ +//static const UChar gDefGmtPosHmPat[] = {0x002B, 0x0048, 0x0048, 0x003A, 0x006D, 0x006D, 0x0000}; /* +HH:mm */ +//static const UChar gUt[] = {0x0055, 0x0054, 0x0000}; // "UT" +//static const UChar gUtc[] = {0x0055, 0x0054, 0x0043, 0x0000}; // "UT" + typedef enum GmtPatSize { kGmtLen = 3, kGmtPatLen = 6, kNegHmsLen = 9, kNegHmLen = 6, kPosHmsLen = 9, - kPosHmLen = 6 + kPosHmLen = 6, + kUtLen = 2, + kUtcLen = 3 } GmtPatSize; +// Stuff needed for numbering system overrides + +typedef enum OvrStrType { + kOvrStrDate = 0, + kOvrStrTime = 1, + kOvrStrBoth = 2 +} OvrStrType; + +static const UDateFormatField kDateFields[] = { + UDAT_YEAR_FIELD, + UDAT_MONTH_FIELD, + UDAT_DATE_FIELD, + UDAT_DAY_OF_YEAR_FIELD, + UDAT_DAY_OF_WEEK_IN_MONTH_FIELD, + UDAT_WEEK_OF_YEAR_FIELD, + UDAT_WEEK_OF_MONTH_FIELD, + UDAT_YEAR_WOY_FIELD, + UDAT_EXTENDED_YEAR_FIELD, + UDAT_JULIAN_DAY_FIELD, + UDAT_STANDALONE_DAY_FIELD, + UDAT_STANDALONE_MONTH_FIELD, + UDAT_QUARTER_FIELD, + UDAT_STANDALONE_QUARTER_FIELD, + UDAT_YEAR_NAME_FIELD, + UDAT_RELATED_YEAR_FIELD }; +static const int8_t kDateFieldsCount = 16; + +static const UDateFormatField kTimeFields[] = { + UDAT_HOUR_OF_DAY1_FIELD, + UDAT_HOUR_OF_DAY0_FIELD, + UDAT_MINUTE_FIELD, + UDAT_SECOND_FIELD, + UDAT_FRACTIONAL_SECOND_FIELD, + UDAT_HOUR1_FIELD, + UDAT_HOUR0_FIELD, + UDAT_MILLISECONDS_IN_DAY_FIELD, + UDAT_TIMEZONE_RFC_FIELD, + UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD }; +static const int8_t kTimeFieldsCount = 10; + + // This is a pattern-of-last-resort used when we can't load a usable pattern out // of a resource. static const UChar gDefaultPattern[] = @@ -108,18 +174,7 @@ static const UChar SUPPRESS_NEGATIVE_PREFIX[] = {0xAB00, 0}; * These are the tags we expect to see in normal resource bundle files associated * with a locale. */ -static const char gDateTimePatternsTag[]="DateTimePatterns"; - -static const UChar gEtcUTC[] = {0x45, 0x74, 0x63, 0x2F, 0x55, 0x54, 0x43, 0x00}; // "Etc/UTC" static const UChar QUOTE = 0x27; // Single quote -enum { - kGMTNegativeHMS = 0, - kGMTNegativeHM, - kGMTPositiveHMS, - kGMTPositiveHM, - - kNumGMTFormatters -}; /* * The field range check bias for each UDateFormatField. @@ -138,7 +193,7 @@ static const int32_t gFieldRangeBias[] = { -1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD -1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD 0, // 'm' - UDAT_MINUTE_FIELD - 0, // 's' - UDAT_SEOND_FIELD + 0, // 's' - UDAT_SECOND_FIELD -1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?) -1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?) -1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?) @@ -160,26 +215,169 @@ static const int32_t gFieldRangeBias[] = { 1, // 'L' - UDAT_STANDALONE_MONTH_FIELD -1, // 'Q' - UDAT_QUARTER_FIELD (1-4?) -1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD - -1 // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD + -1, // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD + -1, // 'U' - UDAT_YEAR_NAME_FIELD + -1, // 'O' - UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD + -1, // 'X' - UDAT_TIMEZONE_ISO_FIELD + -1, // 'x' - UDAT_TIMEZONE_ISO_LOCAL_FIELD + -1, // 'r' - UDAT_RELATED_YEAR_FIELD +#if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR + -1, // ':' - UDAT_TIME_SEPARATOR_FIELD +#else + -1, // (no pattern character currently) - UDAT_TIME_SEPARATOR_FIELD +#endif +}; +// A slightly looser range check for lenient parsing +static const int32_t gFieldRangeBiasLenient[] = { + -1, // 'G' - UDAT_ERA_FIELD + -1, // 'y' - UDAT_YEAR_FIELD + 8, // 'M' - UDAT_MONTH_FIELD (allow calendar max + 7, e.g. 19 for grego 1-based month) + 18, // 'd' - UDAT_DATE_FIELD (allow calendar max + 18, e.g. 49 for grego; tests require at least 40 for grego) + -1, // 'k' - UDAT_HOUR_OF_DAY1_FIELD + -1, // 'H' - UDAT_HOUR_OF_DAY0_FIELD + 40, // 'm' - UDAT_MINUTE_FIELD (allow calendar max + 40, e.g. 99) + 40, // 's' - UDAT_SECOND_FIELD (allow calendar max + 40, e.g. 99) + -1, // 'S' - UDAT_FRACTIONAL_SECOND_FIELD (0-999?) + -1, // 'E' - UDAT_DAY_OF_WEEK_FIELD (1-7?) + -1, // 'D' - UDAT_DAY_OF_YEAR_FIELD (1 - 366?) + -1, // 'F' - UDAT_DAY_OF_WEEK_IN_MONTH_FIELD (1-5?) + -1, // 'w' - UDAT_WEEK_OF_YEAR_FIELD (1-52?) + -1, // 'W' - UDAT_WEEK_OF_MONTH_FIELD (1-5?) + -1, // 'a' - UDAT_AM_PM_FIELD + -1, // 'h' - UDAT_HOUR1_FIELD + -1, // 'K' - UDAT_HOUR0_FIELD + -1, // 'z' - UDAT_TIMEZONE_FIELD + -1, // 'Y' - UDAT_YEAR_WOY_FIELD + -1, // 'e' - UDAT_DOW_LOCAL_FIELD + -1, // 'u' - UDAT_EXTENDED_YEAR_FIELD + -1, // 'g' - UDAT_JULIAN_DAY_FIELD + -1, // 'A' - UDAT_MILLISECONDS_IN_DAY_FIELD + -1, // 'Z' - UDAT_TIMEZONE_RFC_FIELD + -1, // 'v' - UDAT_TIMEZONE_GENERIC_FIELD + 18, // 'c' - UDAT_STANDALONE_DAY_FIELD (allow calendar max + 18, e.g. 49 for grego) + 8, // 'L' - UDAT_STANDALONE_MONTH_FIELD (allow calendar max + 7, e.g. 19 for grego 1-based month) + -1, // 'Q' - UDAT_QUARTER_FIELD (1-4?) + -1, // 'q' - UDAT_STANDALONE_QUARTER_FIELD + -1, // 'V' - UDAT_TIMEZONE_SPECIAL_FIELD + -1, // 'U' - UDAT_YEAR_NAME_FIELD + -1, // 'O' - UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD + -1, // 'X' - UDAT_TIMEZONE_ISO_FIELD + -1, // 'x' - UDAT_TIMEZONE_ISO_LOCAL_FIELD + -1, // 'r' - UDAT_RELATED_YEAR_FIELD +#if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR + -1, // ':' - UDAT_TIME_SEPARATOR_FIELD +#else + -1, // (no pattern character currently) - UDAT_TIME_SEPARATOR_FIELD +#endif }; -static UMTX LOCK; +// When calendar uses hebr numbering (i.e. he@calendar=hebrew), +// offset the years within the current millenium down to 1-999 +static const int32_t HEBREW_CAL_CUR_MILLENIUM_START_YEAR = 5000; +static const int32_t HEBREW_CAL_CUR_MILLENIUM_END_YEAR = 6000; + +static UMutex *LOCK() { + static UMutex *m = STATIC_NEW(UMutex); + return m; +} UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SimpleDateFormat) +SimpleDateFormat::NSOverride::~NSOverride() { + if (snf != NULL) { + snf->removeRef(); + } +} + + +void SimpleDateFormat::NSOverride::free() { + NSOverride *cur = this; + while (cur) { + NSOverride *next_temp = cur->next; + delete cur; + cur = next_temp; + } +} + +// no matter what the locale's default number format looked like, we want +// to modify it so that it doesn't use thousands separators, doesn't always +// show the decimal point, and recognizes integers only when parsing +static void fixNumberFormatForDates(NumberFormat &nf) { + // Use new group setter equivalent to + // setGroupingUsed(FALSE); + // setDecimalSeparatorAlwaysShown(FALSE); + // setParseIntegerOnly(TRUE); + // setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00" + nf.setDateSettings(); // Apple rdar://50064762 +} + +static const SharedNumberFormat *createSharedNumberFormat( + NumberFormat *nfToAdopt) { + fixNumberFormatForDates(*nfToAdopt); + const SharedNumberFormat *result = new SharedNumberFormat(nfToAdopt); + if (result == NULL) { + delete nfToAdopt; + } + return result; +} + +static const SharedNumberFormat *createSharedNumberFormat( + const Locale &loc, UErrorCode &status) { + NumberFormat *nf = NumberFormat::createInstance(loc, status); + if (U_FAILURE(status)) { + return NULL; + } + const SharedNumberFormat *result = createSharedNumberFormat(nf); + if (result == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +static const SharedNumberFormat **allocSharedNumberFormatters() { + const SharedNumberFormat **result = (const SharedNumberFormat**) + uprv_malloc(UDAT_FIELD_COUNT * sizeof(const SharedNumberFormat*)); + if (result == NULL) { + return NULL; + } + for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) { + result[i] = NULL; + } + return result; +} + +static void freeSharedNumberFormatters(const SharedNumberFormat ** list) { + for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) { + SharedObject::clearPtr(list[i]); + } + uprv_free(list); +} + +const NumberFormat *SimpleDateFormat::getNumberFormatByIndex( + UDateFormatField index) const { + if (fSharedNumberFormatters == NULL || + fSharedNumberFormatters[index] == NULL) { + return fNumberFormat; + } + return &(**fSharedNumberFormatters[index]); +} + //---------------------------------------------------------------------- SimpleDateFormat::~SimpleDateFormat() { delete fSymbols; - if (fGMTFormatters) { - for (int32_t i = 0; i < kNumGMTFormatters; i++) { - if (fGMTFormatters[i]) { - delete fGMTFormatters[i]; - } - } - uprv_free(fGMTFormatters); + if (fSharedNumberFormatters) { + freeSharedNumberFormatters(fSharedNumberFormatters); } + if (fTimeZoneFormat) { + delete fTimeZoneFormat; + } + freeFastNumberFormatters(); + +#if !UCONFIG_NO_BREAK_ITERATION + delete fCapitalizationBrkIter; +#endif } //---------------------------------------------------------------------- @@ -187,8 +385,11 @@ SimpleDateFormat::~SimpleDateFormat() SimpleDateFormat::SimpleDateFormat(UErrorCode& status) : fLocale(Locale::getDefault()), fSymbols(NULL), - fGMTFormatters(NULL) + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) { + initializeBooleanAttributes(); construct(kShort, (EStyle) (kShort + kDateOffset), fLocale, status); initializeDefaultCentury(); } @@ -200,9 +401,62 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, : fPattern(pattern), fLocale(Locale::getDefault()), fSymbols(NULL), - fGMTFormatters(NULL) + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) +{ + fDateOverride.setToBogus(); + fTimeOverride.setToBogus(); + initializeBooleanAttributes(); + initializeCalendar(NULL,fLocale,status); + fSymbols = DateFormatSymbols::createForLocale(fLocale, status); + initialize(fLocale, status); + initializeDefaultCentury(); + +} + +//---------------------------------------------------------------------- + +SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, + const UnicodeString& override, + UErrorCode &status) +: fPattern(pattern), + fLocale(Locale::getDefault()), + fSymbols(NULL), + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) +{ + fDateOverride.setTo(override); + fTimeOverride.setToBogus(); + initializeBooleanAttributes(); + initializeCalendar(NULL,fLocale,status); + fSymbols = DateFormatSymbols::createForLocale(fLocale, status); + initialize(fLocale, status); + initializeDefaultCentury(); + + processOverrideString(fLocale,override,kOvrStrBoth,status); + +} + +//---------------------------------------------------------------------- + +SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, + const Locale& locale, + UErrorCode& status) +: fPattern(pattern), + fLocale(locale), + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) { - initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status); + + fDateOverride.setToBogus(); + fTimeOverride.setToBogus(); + initializeBooleanAttributes(); + + initializeCalendar(NULL,fLocale,status); + fSymbols = DateFormatSymbols::createForLocale(fLocale, status); initialize(fLocale, status); initializeDefaultCentury(); } @@ -210,15 +464,27 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, //---------------------------------------------------------------------- SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, + const UnicodeString& override, const Locale& locale, UErrorCode& status) : fPattern(pattern), fLocale(locale), - fGMTFormatters(NULL) + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) { - initializeSymbols(fLocale, initializeCalendar(NULL,fLocale,status), status); + + fDateOverride.setTo(override); + fTimeOverride.setToBogus(); + initializeBooleanAttributes(); + + initializeCalendar(NULL,fLocale,status); + fSymbols = DateFormatSymbols::createForLocale(fLocale, status); initialize(fLocale, status); initializeDefaultCentury(); + + processOverrideString(locale,override,kOvrStrBoth,status); + } //---------------------------------------------------------------------- @@ -229,8 +495,15 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, : fPattern(pattern), fLocale(Locale::getDefault()), fSymbols(symbolsToAdopt), - fGMTFormatters(NULL) + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) { + + fDateOverride.setToBogus(); + fTimeOverride.setToBogus(); + initializeBooleanAttributes(); + initializeCalendar(NULL,fLocale,status); initialize(fLocale, status); initializeDefaultCentury(); @@ -244,8 +517,15 @@ SimpleDateFormat::SimpleDateFormat(const UnicodeString& pattern, : fPattern(pattern), fLocale(Locale::getDefault()), fSymbols(new DateFormatSymbols(symbols)), - fGMTFormatters(NULL) + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) { + + fDateOverride.setToBogus(); + fTimeOverride.setToBogus(); + initializeBooleanAttributes(); + initializeCalendar(NULL, fLocale, status); initialize(fLocale, status); initializeDefaultCentury(); @@ -260,8 +540,11 @@ SimpleDateFormat::SimpleDateFormat(EStyle timeStyle, UErrorCode& status) : fLocale(locale), fSymbols(NULL), - fGMTFormatters(NULL) + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) { + initializeBooleanAttributes(); construct(timeStyle, dateStyle, fLocale, status); if(U_SUCCESS(status)) { initializeDefaultCentury(); @@ -280,10 +563,14 @@ SimpleDateFormat::SimpleDateFormat(const Locale& locale, : fPattern(gDefaultPattern), fLocale(locale), fSymbols(NULL), - fGMTFormatters(NULL) + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) { if (U_FAILURE(status)) return; - initializeSymbols(fLocale, initializeCalendar(NULL, fLocale, status),status); + initializeBooleanAttributes(); + initializeCalendar(NULL, fLocale, status); + fSymbols = DateFormatSymbols::createForLocale(fLocale, status); if (U_FAILURE(status)) { status = U_ZERO_ERROR; @@ -297,6 +584,9 @@ SimpleDateFormat::SimpleDateFormat(const Locale& locale, } } + fDateOverride.setToBogus(); + fTimeOverride.setToBogus(); + initialize(fLocale, status); if(U_SUCCESS(status)) { initializeDefaultCentury(); @@ -307,9 +597,13 @@ SimpleDateFormat::SimpleDateFormat(const Locale& locale, SimpleDateFormat::SimpleDateFormat(const SimpleDateFormat& other) : DateFormat(other), + fLocale(other.fLocale), fSymbols(NULL), - fGMTFormatters(NULL) + fTimeZoneFormat(NULL), + fSharedNumberFormatters(NULL), + fCapitalizationBrkIter(NULL) { + initializeBooleanAttributes(); *this = other; } @@ -320,7 +614,10 @@ SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other) if (this == &other) { return *this; } + freeFastNumberFormatters(); // deletes refs to fNumberFormat's symbols DateFormat::operator=(other); + fDateOverride = other.fDateOverride; + fTimeOverride = other.fTimeOverride; delete fSymbols; fSymbols = NULL; @@ -333,6 +630,41 @@ SimpleDateFormat& SimpleDateFormat::operator=(const SimpleDateFormat& other) fHaveDefaultCentury = other.fHaveDefaultCentury; fPattern = other.fPattern; + fHasMinute = other.fHasMinute; + fHasSecond = other.fHasSecond; + + fLocale = other.fLocale; + // TimeZoneFormat can now be set independently via setter. + // If it is NULL, it will be lazily initialized from locale + delete fTimeZoneFormat; + fTimeZoneFormat = NULL; + if (other.fTimeZoneFormat) { + fTimeZoneFormat = new TimeZoneFormat(*other.fTimeZoneFormat); + } + +#if !UCONFIG_NO_BREAK_ITERATION + if (other.fCapitalizationBrkIter != NULL) { + fCapitalizationBrkIter = (other.fCapitalizationBrkIter)->clone(); + } +#endif + + if (fSharedNumberFormatters != NULL) { + freeSharedNumberFormatters(fSharedNumberFormatters); + fSharedNumberFormatters = NULL; + } + if (other.fSharedNumberFormatters != NULL) { + fSharedNumberFormatters = allocSharedNumberFormatters(); + if (fSharedNumberFormatters) { + for (int32_t i = 0; i < UDAT_FIELD_COUNT; ++i) { + SharedObject::copyPtr( + other.fSharedNumberFormatters[i], + fSharedNumberFormatters[i]); + } + } + } + + UErrorCode localStatus = U_ZERO_ERROR; + initFastNumberFormatters(localStatus); return *this; } @@ -351,6 +683,8 @@ UBool SimpleDateFormat::operator==(const Format& other) const { if (DateFormat::operator==(other)) { + // The DateFormat::operator== check for fCapitalizationContext equality above + // is sufficient to check equality of all derived context-related data. // DateFormat::operator== guarantees following cast is safe SimpleDateFormat* that = (SimpleDateFormat*)&other; return (fPattern == that->fPattern && @@ -358,12 +692,27 @@ SimpleDateFormat::operator==(const Format& other) const that->fSymbols != NULL && // Check for pathological object *fSymbols == *that->fSymbols && fHaveDefaultCentury == that->fHaveDefaultCentury && - fDefaultCenturyStart == that->fDefaultCenturyStart); + fDefaultCenturyStart == that->fDefaultCenturyStart && + // Check fTimeZoneFormat, it can be set independently via setter + ((fTimeZoneFormat == NULL && that->fTimeZoneFormat == NULL) || + (fTimeZoneFormat != NULL && that->fTimeZoneFormat != NULL && *fTimeZoneFormat == *that->fTimeZoneFormat)) && + // Check override strings (these also indicate any relevant + // differences in fNumberFormatters, fOverrideList) + fDateOverride == that->fDateOverride && + fTimeOverride == that->fTimeOverride); } return FALSE; } //---------------------------------------------------------------------- +static const UChar* timeSkeletons[4] = { + u"jmmsszzzz", // kFull + u"jmmssz", // kLong + u"jmmss", // kMedium + u"jmm", // kShort +}; + +enum { kBaseNameMax = ULOC_LANG_CAPACITY + ULOC_SCRIPT_CAPACITY + ULOC_COUNTRY_CAPACITY }; // includes separators and 0 term void SimpleDateFormat::construct(EStyle timeStyle, EStyle dateStyle, @@ -377,21 +726,45 @@ void SimpleDateFormat::construct(EStyle timeStyle, initializeCalendar(NULL, locale, status); if (U_FAILURE(status)) return; - CalendarData calData(locale, fCalendar?fCalendar->getType():NULL, status); - UResourceBundle *dateTimePatterns = calData.getByKey(gDateTimePatternsTag, status); + // Load date time patterns directly from resources. + const char* cType = fCalendar ? fCalendar->getType() : NULL; + LocalUResourceBundlePointer bundle(ures_open(NULL, locale.getBaseName(), &status)); + if (U_FAILURE(status)) return; + + UBool cTypeIsGregorian = TRUE; + LocalUResourceBundlePointer dateTimePatterns; + if (cType != NULL && uprv_strcmp(cType, "gregorian") != 0) { + CharString resourcePath("calendar/", status); + resourcePath.append(cType, status).append("/DateTimePatterns", status); + dateTimePatterns.adoptInstead( + ures_getByKeyWithFallback(bundle.getAlias(), resourcePath.data(), + (UResourceBundle*)NULL, &status)); + cTypeIsGregorian = FALSE; + } + + // Check for "gregorian" fallback. + if (cTypeIsGregorian || status == U_MISSING_RESOURCE_ERROR) { + status = U_ZERO_ERROR; + dateTimePatterns.adoptInstead( + ures_getByKeyWithFallback(bundle.getAlias(), + "calendar/gregorian/DateTimePatterns", + (UResourceBundle*)NULL, &status)); + } if (U_FAILURE(status)) return; - if (ures_getSize(dateTimePatterns) <= kDateTime) + LocalUResourceBundlePointer currentBundle; + + if (ures_getSize(dateTimePatterns.getAlias()) <= kDateTime) { status = U_INVALID_FORMAT_ERROR; return; } - setLocaleIDs(ures_getLocaleByType(dateTimePatterns, ULOC_VALID_LOCALE, &status), - ures_getLocaleByType(dateTimePatterns, ULOC_ACTUAL_LOCALE, &status)); + setLocaleIDs(ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_VALID_LOCALE, &status), + ures_getLocaleByType(dateTimePatterns.getAlias(), ULOC_ACTUAL_LOCALE, &status)); // create a symbols object from the locale - initializeSymbols(locale,fCalendar, status); + fSymbols = DateFormatSymbols::createForLocale(locale, status); if (U_FAILURE(status)) return; /* test for NULL */ if (fSymbols == 0) { @@ -399,53 +772,178 @@ void SimpleDateFormat::construct(EStyle timeStyle, return; } - const UChar *resStr; - int32_t resStrLen = 0; + const UChar *resStr,*ovrStr; + int32_t resStrLen,ovrStrLen = 0; + fDateOverride.setToBogus(); + fTimeOverride.setToBogus(); + + UnicodeString timePattern; + if (timeStyle >= kFull && timeStyle <= kShort) { + const char* baseLoc = locale.getBaseName(); + if (baseLoc!=NULL && baseLoc[0]!=0 && uprv_strcmp(baseLoc,"und")!=0) { + UErrorCode useStatus = U_ZERO_ERROR; + const char* validLoc = getLocaleID(ULOC_VALID_LOCALE, useStatus); + if (U_SUCCESS(useStatus) && uprv_strcmp(validLoc,baseLoc)!=0) { + char minLoc[kBaseNameMax]; + uloc_minimizeSubtags(baseLoc, minLoc, kBaseNameMax, &useStatus); + minLoc[kBaseNameMax-1] = 0; // ensure zero term + const char* actualLoc = getLocaleID(ULOC_ACTUAL_LOCALE, useStatus); + if (U_SUCCESS(useStatus) && uprv_strcmp(actualLoc,minLoc)!=0) { + // The standard time formats may have the wrong time cycle, because: + // * the valid locale is not the same as the base locale, or + // * the actual locale the patterns are coming from is not the same + // as the minimized locale. + // We could *also* check whether they do actually have a mismatch with + // the time cycle preferences for the region, but that is a lot more + // work for little or no additional benefit, since just going ahead + // and always synthesizing the time format as per the following should + // create a locale-appropriate pattern with cycle that matches the + // region preferences anyway (for completely unsupported languages, + // this will use root patterns for the appropriate cycle for the + // likely subtags resion). + LocalPointer<DateTimePatternGenerator> dtpg(DateTimePatternGenerator::createInstance(locale, useStatus, TRUE)); + if (U_SUCCESS(useStatus)) { + UnicodeString timeSkeleton(TRUE, timeSkeletons[timeStyle], -1); + timePattern = dtpg->getBestPattern(timeSkeleton, useStatus); +#if DEBUG_SYNTHETIC_TIMEFMTS + if (timePattern.length() != 0) { + char bbuf[32]; + timePattern.extract(0,timePattern.length(),bbuf,32); + printf("\n## for locale %s, validLoc %s, minLoc %s, actualLoc %s, synth timePat %s\n", locale.getName(), validLoc, minLoc, actualLoc, bbuf); + } +#endif + } + } + } + } + } // if the pattern should include both date and time information, use the date/time // pattern string as a guide to tell use how to glue together the appropriate date - // and time pattern strings. The actual gluing-together is handled by a convenience - // method on MessageFormat. + // and time pattern strings. if ((timeStyle != kNone) && (dateStyle != kNone)) { - Formattable timeDateArray[2]; - - // use Formattable::adoptString() so that we can use fastCopyFrom() - // instead of Formattable::setString()'s unaware, safe, deep string clone - // see Jitterbug 2296 - resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status); - UnicodeString *tempus1 = new UnicodeString(TRUE, resStr, resStrLen); - // NULL pointer check - if (tempus1 == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; + UnicodeString tempus1(timePattern); + if (tempus1.length() == 0) { + currentBundle.adoptInstead( + ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); + if (U_FAILURE(status)) { + status = U_INVALID_FORMAT_ERROR; + return; + } + switch (ures_getType(currentBundle.getAlias())) { + case URES_STRING: { + resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); + break; + } + case URES_ARRAY: { + resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); + ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); + fTimeOverride.setTo(TRUE, ovrStr, ovrStrLen); + break; + } + default: { + status = U_INVALID_FORMAT_ERROR; + return; + } + } + + tempus1.setTo(TRUE, resStr, resStrLen); } - timeDateArray[0].adoptString(tempus1); - - resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status); - UnicodeString *tempus2 = new UnicodeString(TRUE, resStr, resStrLen); - // Null pointer check - if (tempus2 == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; + + currentBundle.adoptInstead( + ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, NULL, &status)); + if (U_FAILURE(status)) { + status = U_INVALID_FORMAT_ERROR; + return; + } + switch (ures_getType(currentBundle.getAlias())) { + case URES_STRING: { + resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); + break; + } + case URES_ARRAY: { + resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); + ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); + fDateOverride.setTo(TRUE, ovrStr, ovrStrLen); + break; + } + default: { + status = U_INVALID_FORMAT_ERROR; + return; + } } - timeDateArray[1].adoptString(tempus2); - resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)kDateTime, &resStrLen, &status); - MessageFormat::format(UnicodeString(TRUE, resStr, resStrLen), timeDateArray, 2, fPattern, status); + UnicodeString tempus2(TRUE, resStr, resStrLen); + + int32_t glueIndex = kDateTime; + int32_t patternsSize = ures_getSize(dateTimePatterns.getAlias()); + if (patternsSize >= (kDateTimeOffset + kShort + 1)) { + // Get proper date time format + glueIndex = (int32_t)(kDateTimeOffset + (dateStyle - kDateOffset)); + } + + resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), glueIndex, &resStrLen, &status); + SimpleFormatter(UnicodeString(TRUE, resStr, resStrLen), 2, 2, status). + format(tempus1, tempus2, fPattern, status); } // if the pattern includes just time data or just date date, load the appropriate // pattern string from the resources // setTo() - see DateFormatSymbols::assignArray comments else if (timeStyle != kNone) { - resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)timeStyle, &resStrLen, &status); - fPattern.setTo(TRUE, resStr, resStrLen); + fPattern.setTo(timePattern); + if (fPattern.length() == 0) { + currentBundle.adoptInstead( + ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)timeStyle, NULL, &status)); + if (U_FAILURE(status)) { + status = U_INVALID_FORMAT_ERROR; + return; + } + switch (ures_getType(currentBundle.getAlias())) { + case URES_STRING: { + resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); + break; + } + case URES_ARRAY: { + resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); + ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); + fDateOverride.setTo(TRUE, ovrStr, ovrStrLen); + break; + } + default: { + status = U_INVALID_FORMAT_ERROR; + return; + } + } + fPattern.setTo(TRUE, resStr, resStrLen); + } } else if (dateStyle != kNone) { - resStr = ures_getStringByIndex(dateTimePatterns, (int32_t)dateStyle, &resStrLen, &status); + currentBundle.adoptInstead( + ures_getByIndex(dateTimePatterns.getAlias(), (int32_t)dateStyle, NULL, &status)); + if (U_FAILURE(status)) { + status = U_INVALID_FORMAT_ERROR; + return; + } + switch (ures_getType(currentBundle.getAlias())) { + case URES_STRING: { + resStr = ures_getString(currentBundle.getAlias(), &resStrLen, &status); + break; + } + case URES_ARRAY: { + resStr = ures_getStringByIndex(currentBundle.getAlias(), 0, &resStrLen, &status); + ovrStr = ures_getStringByIndex(currentBundle.getAlias(), 1, &ovrStrLen, &status); + fDateOverride.setTo(TRUE, ovrStr, ovrStrLen); + break; + } + default: { + status = U_INVALID_FORMAT_ERROR; + return; + } + } fPattern.setTo(TRUE, resStr, resStrLen); } - + // and if it includes _neither_, that's an error else status = U_INVALID_FORMAT_ERROR; @@ -462,51 +960,51 @@ SimpleDateFormat::initializeCalendar(TimeZone* adoptZone, const Locale& locale, if(!U_FAILURE(status)) { fCalendar = Calendar::createInstance(adoptZone?adoptZone:TimeZone::createDefault(), locale, status); } - if (U_SUCCESS(status) && fCalendar == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } return fCalendar; } -void -SimpleDateFormat::initializeSymbols(const Locale& locale, Calendar* calendar, UErrorCode& status) -{ - if(U_FAILURE(status)) { - fSymbols = NULL; - } else { - // pass in calendar type - use NULL (default) if no calendar set (or err). - fSymbols = new DateFormatSymbols(locale, calendar?calendar->getType() :NULL , status); - // Null pointer check - if (fSymbols == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - } -} - void SimpleDateFormat::initialize(const Locale& locale, UErrorCode& status) { if (U_FAILURE(status)) return; + parsePattern(); // Need this before initNumberFormatters(), to set fHasHanYearChar + + // If the locale has @[....]numbers=hanidays we want to *delete* that (so it + // it is not used for every field) and then set fDateOverride to "d=hanidays" + // (as with std formats for zh@calendar=chinese) to use hanidays for d field. + static const UChar hanidaysOverride[] = {0x64,0x3D,0x68,0x61,0x6E,0x69,0x64,0x61,0x79,0x73,0}; // "d=hanidays" + char numbersValue[ULOC_KEYWORDS_CAPACITY]; + UErrorCode numbersStatus = U_ZERO_ERROR; + Locale localeNoHanidays(locale); + int32_t numbersLen = localeNoHanidays.getKeywordValue("numbers", numbersValue, ULOC_KEYWORDS_CAPACITY, numbersStatus); + if ( U_SUCCESS(numbersStatus) && numbersLen > 0 ) { + if ( uprv_strcmp(numbersValue, "hanidays") == 0 ) { + localeNoHanidays.setKeywordValue("numbers", NULL, numbersStatus); + fDateOverride.setTo(hanidaysOverride,-1); + } + } + // Simple-minded hack to force Gannen year numbering for ja@calendar=japanese + // if format is non-numeric (includes å¹´) and fDateOverride is not already specified. + // Now this does get updated if applyPattern subsequently changes the pattern type. + if (fDateOverride.isBogus() && fHasHanYearChar && + fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 && + uprv_strcmp(fLocale.getLanguage(),"ja") == 0) { + fDateOverride.setTo(u"y=jpanyear", -1); + } + // We don't need to check that the row count is >= 1, since all 2d arrays have at // least one row - fNumberFormat = NumberFormat::createInstance(locale, status); + fNumberFormat = NumberFormat::createInstance(localeNoHanidays, status); if (fNumberFormat != NULL && U_SUCCESS(status)) { - // no matter what the locale's default number format looked like, we want - // to modify it so that it doesn't use thousands separators, doesn't always - // show the decimal point, and recognizes integers only when parsing - - fNumberFormat->setGroupingUsed(FALSE); - if (fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) - ((DecimalFormat*)fNumberFormat)->setDecimalSeparatorAlwaysShown(FALSE); - fNumberFormat->setParseIntegerOnly(TRUE); - fNumberFormat->setMinimumFractionDigits(0); // To prevent "Jan 1.00, 1997.00" - - // TODO: Really, the default should be lenient... - fNumberFormat->setParseStrict(FALSE); + fixNumberFormatForDates(*fNumberFormat); + //fNumberFormat->setLenient(TRUE); // Java uses a custom DateNumberFormat to format/parse + + initNumberFormatters(locale, status); + initFastNumberFormatters(status); + } else if (U_SUCCESS(status)) { @@ -517,7 +1015,7 @@ SimpleDateFormat::initialize(const Locale& locale, /* Initialize the fields we use to disambiguate ambiguous years. Separate * so we can call it from readObject(). */ -void SimpleDateFormat::initializeDefaultCentury() +void SimpleDateFormat::initializeDefaultCentury() { if(fCalendar) { fHaveDefaultCentury = fCalendar->haveDefaultCentury(); @@ -531,10 +1029,23 @@ void SimpleDateFormat::initializeDefaultCentury() } } +/* + * Initialize the boolean attributes. Separate so we can call it from all constructors. + */ +void SimpleDateFormat::initializeBooleanAttributes() +{ + UErrorCode status = U_ZERO_ERROR; + + setBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, true, status); + setBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, true, status); + setBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, true, status); + setBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, true, status); +} + /* Define one-century window into which to disambiguate dates using * two-digit years. Make public in JDK 1.2. */ -void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status) +void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& status) { if(U_FAILURE(status)) { return; @@ -543,7 +1054,7 @@ void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& s status = U_ILLEGAL_ARGUMENT_ERROR; return; } - + fCalendar->setTime(startDate, status); if(U_SUCCESS(status)) { fHaveDefaultCentury = TRUE; @@ -551,28 +1062,68 @@ void SimpleDateFormat::parseAmbiguousDatesAsAfter(UDate startDate, UErrorCode& s fDefaultCenturyStartYear = fCalendar->get(UCAL_YEAR, status); } } - + //---------------------------------------------------------------------- UnicodeString& SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& pos) const { - UErrorCode status = U_ZERO_ERROR; - pos.setBeginIndex(0); - pos.setEndIndex(0); + UErrorCode status = U_ZERO_ERROR; + FieldPositionOnlyHandler handler(pos); + return _format(cal, appendTo, handler, status); +} + +//---------------------------------------------------------------------- + +UnicodeString& +SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, + FieldPositionIterator* posIter, UErrorCode& status) const +{ + FieldPositionIteratorHandler handler(posIter, status); + return _format(cal, appendTo, handler, status); +} + +//---------------------------------------------------------------------- + +UnicodeString& +SimpleDateFormat::_format(Calendar& cal, UnicodeString& appendTo, + FieldPositionHandler& handler, UErrorCode& status) const +{ + if ( U_FAILURE(status) ) { + return appendTo; + } + Calendar* workCal = &cal; + Calendar* calClone = NULL; + if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) { + // Different calendar type + // We use the time and time zone from the input calendar, but + // do not use the input calendar for field calculation. + calClone = fCalendar->clone(); + if (calClone != NULL) { + UDate t = cal.getTime(status); + calClone->setTime(t, status); + calClone->setTimeZone(cal.getTimeZone()); + workCal = calClone; + } else { + status = U_MEMORY_ALLOCATION_ERROR; + return appendTo; + } + } UBool inQuote = FALSE; UChar prevCh = 0; int32_t count = 0; - + int32_t fieldNum = 0; + UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status); + // loop through the pattern string character by character for (int32_t i = 0; i < fPattern.length() && U_SUCCESS(status); ++i) { UChar ch = fPattern[i]; - + // Use subFormat() to format a repeated pattern character // when a different pattern or non-pattern character is seen if (ch != prevCh && count > 0) { - subFormat(appendTo, prevCh, count, pos, cal, status); + subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++, handler, *workCal, status); count = 0; } if (ch == QUOTE) { @@ -584,9 +1135,8 @@ SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& } else { inQuote = ! inQuote; } - } - else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/) - || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) { + } + else if (!inQuote && isSyntaxChar(ch)) { // ch is a date-time pattern character to be interpreted // by subFormat(); count the number of times it is repeated prevCh = ch; @@ -600,31 +1150,14 @@ SimpleDateFormat::format(Calendar& cal, UnicodeString& appendTo, FieldPosition& // Format the last item in the pattern, if any if (count > 0) { - subFormat(appendTo, prevCh, count, pos, cal, status); + subFormat(appendTo, prevCh, count, capitalizationContext, fieldNum++, handler, *workCal, status); } - // and if something failed (e.g., an invalid format character), reset our FieldPosition - // to (0, 0) to show that - // {sfb} look at this later- are these being set correctly? - if (U_FAILURE(status)) { - pos.setBeginIndex(0); - pos.setEndIndex(0); + if (calClone != NULL) { + delete calClone; } - - return appendTo; -} -UnicodeString& -SimpleDateFormat::format(const Formattable& obj, - UnicodeString& appendTo, - FieldPosition& pos, - UErrorCode& status) const -{ - // this is just here to get around the hiding problem - // (the previous format() override would hide the version of - // format() on DateFormat that this function correspond to, so we - // have to redefine it here) - return DateFormat::format(obj, appendTo, pos, status); + return appendTo; } //---------------------------------------------------------------------- @@ -642,29 +1175,85 @@ SimpleDateFormat::fgCalendarFieldToLevel[] = /*wW*/ 20, 30, /*dDEF*/ 30, 20, 30, 30, /*ahHm*/ 40, 50, 50, 60, - /*sS..*/ 70, 80, - /*z?Y*/ 0, 0, 10, + /*sS*/ 70, 80, + /*z?Y*/ 0, 0, 10, /*eug*/ 30, 10, 0, - /*A*/ 40 + /*A?.*/ 40, 0, 0 }; +int32_t SimpleDateFormat::getLevelFromChar(UChar ch) { + // Map date field LETTER into calendar field level. + // the larger the level, the smaller the field unit. + // NOTE: if new fields adds in, the table needs to update. + static const int32_t mapCharToLevel[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // ! " # $ % & ' ( ) * + , - . / + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +#if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR + // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, +#else + // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, +#endif + // @ A B C D E F G H I J K L M N O + -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, 0, + // P Q R S T U V W X Y Z [ \ ] ^ _ + -1, 20, -1, 80, -1, 10, 0, 30, 0, 10, 0, -1, -1, -1, -1, -1, + // ` a b c d e f g h i j k l m n o + -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, 0, 60, -1, -1, + // p q r s t u v w x y z { | } ~ + -1, 20, 10, 70, -1, 10, 0, 20, 0, 10, 0, -1, -1, -1, -1, -1 + }; + + return ch < UPRV_LENGTHOF(mapCharToLevel) ? mapCharToLevel[ch] : -1; +} -/* Map calendar field LETTER into calendar field level. - * the larger the level, the smaller the field unit. - * NOTE: if new fields adds in, the table needs to update. - */ -const int32_t -SimpleDateFormat::fgPatternCharToLevel[] = { - // A B C D E F G H I J K L M N O - -1, 40, -1, -1, 20, 30, 30, 0, 50, -1, -1, 50, 20, 20, -1, -1, - // P Q R S T U V W X Y Z - -1, 20, -1, 80, -1, -1, 0, 30, -1, 10, 0, -1, -1, -1, -1, -1, - // a b c d e f g h i j k l m n o - -1, 40, -1, 30, 30, 30, -1, 0, 50, -1, -1, 50, -1, 60, -1, -1, - // p q r s t u v w x y z - -1, 20, -1, 70, -1, 10, 0, 20, -1, 10, 0, -1, -1, -1, -1, -1 -}; - +UBool SimpleDateFormat::isSyntaxChar(UChar ch) { + static const UBool mapCharToIsSyntax[] = { + // + FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, + // + FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, + // + FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, + // + FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, + // ! " # $ % & ' + FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, + // ( ) * + , - . / + FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, + // 0 1 2 3 4 5 6 7 + FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, +#if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR + // 8 9 : ; < = > ? + FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, +#else + // 8 9 : ; < = > ? + FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, +#endif + // @ A B C D E F G + FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, + // H I J K L M N O + TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, + // P Q R S T U V W + TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, + // X Y Z [ \ ] ^ _ + TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, + // ` a b c d e f g + FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, + // h i j k l m n o + TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, + // p q r s t u v w + TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, + // x y z { | } ~ + TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE + }; + + return ch < UPRV_LENGTHOF(mapCharToIsSyntax) ? mapCharToIsSyntax[ch] : FALSE; +} // Map index into pattern character string to Calendar field number. const UCalendarDateFields @@ -684,6 +1273,16 @@ SimpleDateFormat::fgPatternIndexToCalendarField[] = /*Q*/ UCAL_MONTH, /*q*/ UCAL_MONTH, /*V*/ UCAL_ZONE_OFFSET, + /*U*/ UCAL_YEAR, + /*O*/ UCAL_ZONE_OFFSET, + /*Xx*/ UCAL_ZONE_OFFSET, UCAL_ZONE_OFFSET, + /*r*/ UCAL_EXTENDED_YEAR, + /*bB*/ UCAL_FIELD_COUNT, UCAL_FIELD_COUNT, // no mappings to calendar fields +#if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR + /*:*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */ +#else + /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UCAL_FIELD_COUNT, /* => no useful mapping to any calendar field */ +#endif }; // Map index into pattern character string to DateFormat field number @@ -703,6 +1302,16 @@ SimpleDateFormat::fgPatternIndexToDateFormatField[] = { /*Q*/ UDAT_QUARTER_FIELD, /*q*/ UDAT_STANDALONE_QUARTER_FIELD, /*V*/ UDAT_TIMEZONE_SPECIAL_FIELD, + /*U*/ UDAT_YEAR_NAME_FIELD, + /*O*/ UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD, + /*Xx*/ UDAT_TIMEZONE_ISO_FIELD, UDAT_TIMEZONE_ISO_LOCAL_FIELD, + /*r*/ UDAT_RELATED_YEAR_FIELD, + /*bB*/ UDAT_AM_PM_MIDNIGHT_NOON_FIELD, UDAT_FLEXIBLE_DAY_PERIOD_FIELD, +#if UDAT_HAS_PATTERN_CHAR_FOR_TIME_SEPARATOR + /*:*/ UDAT_TIME_SEPARATOR_FIELD, +#else + /*no pattern char for UDAT_TIME_SEPARATOR_FIELD*/ UDAT_TIME_SEPARATOR_FIELD, +#endif }; //---------------------------------------------------------------------- @@ -722,337 +1331,197 @@ _appendSymbol(UnicodeString& dst, } } -//--------------------------------------------------------------------- -void -SimpleDateFormat::appendGMT(UnicodeString &appendTo, Calendar& cal, UErrorCode& status) const{ - int32_t offset = cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status); - if (U_FAILURE(status)) { - return; - } - if (isDefaultGMTFormat()) { - formatGMTDefault(appendTo, offset); - } else { - ((SimpleDateFormat*)this)->initGMTFormatters(status); - if (U_SUCCESS(status)) { - int32_t type; - if (offset < 0) { - offset = -offset; - type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTNegativeHM : kGMTNegativeHMS; - } else { - type = (offset % U_MILLIS_PER_MINUTE) == 0 ? kGMTPositiveHM : kGMTPositiveHMS; - } - Formattable param(offset, Formattable::kIsDate); - FieldPosition fpos(0); - fGMTFormatters[type]->format(¶m, 1, appendTo, fpos, status); +static inline void +_appendSymbolWithMonthPattern(UnicodeString& dst, int32_t value, const UnicodeString* symbols, int32_t symbolsCount, + const UnicodeString* monthPattern, UErrorCode& status) { + U_ASSERT(0 <= value && value < symbolsCount); + if (0 <= value && value < symbolsCount) { + if (monthPattern == NULL) { + dst += symbols[value]; + } else { + SimpleFormatter(*monthPattern, 1, 1, status).format(symbols[value], dst, status); } } } -int32_t -SimpleDateFormat::parseGMT(const UnicodeString &text, ParsePosition &pos) const { - if (!isDefaultGMTFormat()) { - int32_t start = pos.getIndex(); - - // Quick check - UBool prefixMatch = FALSE; - int32_t prefixLen = fSymbols->fGmtFormat.indexOf((UChar)0x007B /* '{' */); - if (prefixLen > 0 && text.compare(start, prefixLen, fSymbols->fGmtFormat, 0, prefixLen) == 0) { - prefixMatch = TRUE; - } - if (prefixMatch) { - // Prefix matched - UErrorCode status = U_ZERO_ERROR; - ((SimpleDateFormat*)this)->initGMTFormatters(status); - if (U_SUCCESS(status)) { - Formattable parsed; - int32_t parsedCount; - - // Try negative Hms - fGMTFormatters[kGMTNegativeHMS]->parseObject(text, parsed, pos); - if (pos.getErrorIndex() == -1 && pos.getIndex() > start) { - parsed.getArray(parsedCount); - if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) { - return (int32_t)(-1 * (int64_t)parsed[0].getDate()); - } - } - - // Reset ParsePosition - pos.setIndex(start); - pos.setErrorIndex(-1); - - // Try positive Hms - fGMTFormatters[kGMTPositiveHMS]->parseObject(text, parsed, pos); - if (pos.getErrorIndex() == -1 && pos.getIndex() > start) { - parsed.getArray(parsedCount); - if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) { - return (int32_t)((int64_t)parsed[0].getDate()); - } - } - - // Reset ParsePosition - pos.setIndex(start); - pos.setErrorIndex(-1); - - // Try negative Hm - fGMTFormatters[kGMTNegativeHM]->parseObject(text, parsed, pos); - if (pos.getErrorIndex() == -1 && pos.getIndex() > start) { - parsed.getArray(parsedCount); - if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) { - return (int32_t)(-1 * (int64_t)parsed[0].getDate()); - } - } - - // Reset ParsePosition - pos.setIndex(start); - pos.setErrorIndex(-1); - - // Try positive Hm - fGMTFormatters[kGMTPositiveHM]->parseObject(text, parsed, pos); - if (pos.getErrorIndex() == -1 && pos.getIndex() > start) { - parsed.getArray(parsedCount); - if (parsedCount == 1 && parsed[0].getType() == Formattable::kDate) { - return (int32_t)((int64_t)parsed[0].getDate()); - } - } +//---------------------------------------------------------------------- - // Reset ParsePosition - pos.setIndex(start); - pos.setErrorIndex(-1); - } - // fall through to the default GMT parsing method - } - } - return parseGMTDefault(text, pos); +static number::LocalizedNumberFormatter* +createFastFormatter(const DecimalFormat* df, int32_t minInt, int32_t maxInt) { + return new number::LocalizedNumberFormatter( + df->toNumberFormatter() + .integerWidth(number::IntegerWidth::zeroFillTo(minInt).truncateAt(maxInt))); } -void -SimpleDateFormat::formatGMTDefault(UnicodeString &appendTo, int32_t offset) const { - if (offset < 0) { - appendTo += gGmtMinus; - offset = -offset; // suppress the '-' sign for text display. - }else{ - appendTo += gGmtPlus; +void SimpleDateFormat::initFastNumberFormatters(UErrorCode& status) { + if (U_FAILURE(status)) { + return; } - - offset /= U_MILLIS_PER_SECOND; // now in seconds - int32_t sec = offset % 60; - offset /= 60; - int32_t min = offset % 60; - int32_t hour = offset / 60; - - - zeroPaddingNumber(appendTo, hour, 2, 2); - appendTo += (UChar)0x003A /*':'*/; - zeroPaddingNumber(appendTo, min, 2, 2); - if (sec != 0) { - appendTo += (UChar)0x003A /*':'*/; - zeroPaddingNumber(appendTo, sec, 2, 2); + auto* df = dynamic_cast<DecimalFormat*>(fNumberFormat); + if (df == nullptr) { + return; } + df->setDFSShallowCopy(TRUE); + fFastNumberFormatters[SMPDTFMT_NF_1x10] = createFastFormatter(df, 1, 10); + fFastNumberFormatters[SMPDTFMT_NF_2x10] = createFastFormatter(df, 2, 10); + fFastNumberFormatters[SMPDTFMT_NF_3x10] = createFastFormatter(df, 3, 10); + fFastNumberFormatters[SMPDTFMT_NF_4x10] = createFastFormatter(df, 4, 10); + fFastNumberFormatters[SMPDTFMT_NF_2x2] = createFastFormatter(df, 2, 2); + df->setDFSShallowCopy(FALSE); } -int32_t -SimpleDateFormat::parseGMTDefault(const UnicodeString &text, ParsePosition &pos) const { - int32_t start = pos.getIndex(); +void SimpleDateFormat::freeFastNumberFormatters() { + delete fFastNumberFormatters[SMPDTFMT_NF_1x10]; + delete fFastNumberFormatters[SMPDTFMT_NF_2x10]; + delete fFastNumberFormatters[SMPDTFMT_NF_3x10]; + delete fFastNumberFormatters[SMPDTFMT_NF_4x10]; + delete fFastNumberFormatters[SMPDTFMT_NF_2x2]; + fFastNumberFormatters[SMPDTFMT_NF_1x10] = nullptr; + fFastNumberFormatters[SMPDTFMT_NF_2x10] = nullptr; + fFastNumberFormatters[SMPDTFMT_NF_3x10] = nullptr; + fFastNumberFormatters[SMPDTFMT_NF_4x10] = nullptr; + fFastNumberFormatters[SMPDTFMT_NF_2x2] = nullptr; +} - if (start + kGmtLen + 1 >= text.length()) { - pos.setErrorIndex(start); - return 0; - } - int32_t cur = start; - // "GMT" - if (text.compare(start, kGmtLen, gGmt) != 0) { - pos.setErrorIndex(start); - return 0; +void +SimpleDateFormat::initNumberFormatters(const Locale &locale,UErrorCode &status) { + if (U_FAILURE(status)) { + return; } - cur += kGmtLen; - // Sign - UBool negative = FALSE; - if (text.charAt(cur) == (UChar)0x002D /* minus */) { - negative = TRUE; - } else if (text.charAt(cur) != (UChar)0x002B /* plus */) { - pos.setErrorIndex(cur); - return 0; + if ( fDateOverride.isBogus() && fTimeOverride.isBogus() ) { + return; } - cur++; - - // Numbers - int32_t numLen; - pos.setIndex(cur); - - Formattable number; - parseInt(text, number, 6, pos, FALSE); - numLen = pos.getIndex() - cur; - - if (numLen <= 0) { - pos.setIndex(start); - pos.setErrorIndex(cur); - return 0; - } - - int32_t numVal = number.getLong(); - - int32_t hour = 0; - int32_t min = 0; - int32_t sec = 0; - - if (numLen <= 2) { - // H[H][:mm[:ss]] - hour = numVal; - cur += numLen; - if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) { - cur++; - pos.setIndex(cur); - parseInt(text, number, 2, pos, FALSE); - numLen = pos.getIndex() - cur; - if (numLen == 2) { - // got minute field - min = number.getLong(); - cur += numLen; - if (cur + 2 < text.length() && text.charAt(cur) == (UChar)0x003A /* colon */) { - cur++; - pos.setIndex(cur); - parseInt(text, number, 2, pos, FALSE); - numLen = pos.getIndex() - cur; - if (numLen == 2) { - // got second field - sec = number.getLong(); - } else { - // reset position - pos.setIndex(cur - 1); - pos.setErrorIndex(-1); - } - } - } else { - // reset postion - pos.setIndex(cur - 1); - pos.setErrorIndex(-1); - } - } - } else if (numLen == 3 || numLen == 4) { - // Hmm or HHmm - hour = numVal / 100; - min = numVal % 100; - } else if (numLen == 5 || numLen == 6) { - // Hmmss or HHmmss - hour = numVal / 10000; - min = (numVal % 10000) / 100; - sec = numVal % 100; - } else { - // HHmmss followed by bogus numbers - pos.setIndex(cur + 6); - - int32_t shift = numLen - 6; - while (shift > 0) { - numVal /= 10; - shift--; + umtx_lock(LOCK()); + if (fSharedNumberFormatters == NULL) { + fSharedNumberFormatters = allocSharedNumberFormatters(); + if (fSharedNumberFormatters == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; } - hour = numVal / 10000; - min = (numVal % 10000) / 100; - sec = numVal % 100; } + umtx_unlock(LOCK()); - int32_t offset = ((hour*60 + min)*60 + sec)*1000; - if (negative) { - offset = -offset; - } - return offset; -} - -UBool -SimpleDateFormat::isDefaultGMTFormat() const { - // GMT pattern - if (fSymbols->fGmtFormat.length() == 0) { - // No GMT pattern is set - return TRUE; - } else if (fSymbols->fGmtFormat.compare(gDefGmtPat, kGmtPatLen) != 0) { - return FALSE; - } - // Hour patterns - if (fSymbols->fGmtHourFormats == NULL || fSymbols->fGmtHourFormatsCount != DateFormatSymbols::GMT_HOUR_COUNT) { - // No Hour pattern is set - return TRUE; - } else if ((fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS].compare(gDefGmtNegHmsPat, kNegHmsLen) != 0) - || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM].compare(gDefGmtNegHmPat, kNegHmLen) != 0) - || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS].compare(gDefGmtPosHmsPat, kPosHmsLen) != 0) - || (fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM].compare(gDefGmtPosHmPat, kPosHmLen) != 0)) { - return FALSE; + if (U_FAILURE(status)) { + return; } - return TRUE; -} -void -SimpleDateFormat::formatRFC822TZ(UnicodeString &appendTo, int32_t offset) const { - UChar sign = 0x002B /* '+' */; - if (offset < 0) { - offset = -offset; - sign = 0x002D /* '-' */; - } - appendTo.append(sign); - - int32_t offsetH = offset / U_MILLIS_PER_HOUR; - offset = offset % U_MILLIS_PER_HOUR; - int32_t offsetM = offset / U_MILLIS_PER_MINUTE; - offset = offset % U_MILLIS_PER_MINUTE; - int32_t offsetS = offset / U_MILLIS_PER_SECOND; - - int32_t num = 0, denom = 0; - if (offsetS == 0) { - offset = offsetH*100 + offsetM; // HHmm - num = offset % 10000; - denom = 1000; - } else { - offset = offsetH*10000 + offsetM*100 + offsetS; // HHmmss - num = offset % 1000000; - denom = 100000; - } - while (denom >= 1) { - UChar digit = (UChar)0x0030 + (num / denom); - appendTo.append(digit); - num = num % denom; - denom /= 10; - } + processOverrideString(locale,fDateOverride,kOvrStrDate,status); + processOverrideString(locale,fTimeOverride,kOvrStrTime,status); } void -SimpleDateFormat::initGMTFormatters(UErrorCode &status) { - if (U_FAILURE(status)) { +SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeString &str, int8_t type, UErrorCode &status) { + if (str.isBogus() || U_FAILURE(status)) { return; } - umtx_lock(&LOCK); - if (fGMTFormatters == NULL) { - fGMTFormatters = (MessageFormat**)uprv_malloc(kNumGMTFormatters * sizeof(MessageFormat*)); - if (fGMTFormatters) { - for (int32_t i = 0; i < kNumGMTFormatters; i++) { - const UnicodeString *hourPattern = NULL; //initialized it to avoid warning - switch (i) { - case kGMTNegativeHMS: - hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HMS]); - break; - case kGMTNegativeHM: - hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_NEGATIVE_HM]); - break; - case kGMTPositiveHMS: - hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HMS]); - break; - case kGMTPositiveHM: - hourPattern = &(fSymbols->fGmtHourFormats[DateFormatSymbols::GMT_POSITIVE_HM]); + + int32_t start = 0; + int32_t len; + UnicodeString nsName; + UnicodeString ovrField; + UBool moreToProcess = TRUE; + NSOverride *overrideList = NULL; + + while (moreToProcess) { + int32_t delimiterPosition = str.indexOf((UChar)ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE,start); + if (delimiterPosition == -1) { + moreToProcess = FALSE; + len = str.length() - start; + } else { + len = delimiterPosition - start; + } + UnicodeString currentString(str,start,len); + int32_t equalSignPosition = currentString.indexOf((UChar)ULOC_KEYWORD_ASSIGN_UNICODE,0); + if (equalSignPosition == -1) { // Simple override string such as "hebrew" + nsName.setTo(currentString); + ovrField.setToBogus(); + } else { // Field specific override string such as "y=hebrew" + nsName.setTo(currentString,equalSignPosition+1); + ovrField.setTo(currentString,0,1); // We just need the first character. + } + + int32_t nsNameHash = nsName.hashCode(); + // See if the numbering system is in the override list, if not, then add it. + NSOverride *curr = overrideList; + const SharedNumberFormat *snf = NULL; + UBool found = FALSE; + while ( curr && !found ) { + if ( curr->hash == nsNameHash ) { + snf = curr->snf; + found = TRUE; + } + curr = curr->next; + } + + if (!found) { + LocalPointer<NSOverride> cur(new NSOverride); + if (!cur.isNull()) { + char kw[ULOC_KEYWORD_AND_VALUES_CAPACITY]; + uprv_strcpy(kw,"numbers="); + nsName.extract(0,len,kw+8,ULOC_KEYWORD_AND_VALUES_CAPACITY-8,US_INV); + + Locale ovrLoc(locale.getLanguage(),locale.getCountry(),locale.getVariant(),kw); + cur->hash = nsNameHash; + cur->next = overrideList; + SharedObject::copyPtr( + createSharedNumberFormat(ovrLoc, status), cur->snf); + if (U_FAILURE(status)) { + if (overrideList) { + overrideList->free(); + } + return; + } + snf = cur->snf; + overrideList = cur.orphan(); + } else { + status = U_MEMORY_ALLOCATION_ERROR; + if (overrideList) { + overrideList->free(); + } + return; + } + } + + // Now that we have an appropriate number formatter, fill in the appropriate spaces in the + // number formatters table. + if (ovrField.isBogus()) { + switch (type) { + case kOvrStrDate: + case kOvrStrBoth: { + for ( int8_t i=0 ; i<kDateFieldsCount; i++ ) { + SharedObject::copyPtr(snf, fSharedNumberFormatters[kDateFields[i]]); + } + if (type==kOvrStrDate) { break; + } + U_FALLTHROUGH; } - fGMTFormatters[i] = new MessageFormat(fSymbols->fGmtFormat, status); - if (U_FAILURE(status)) { + case kOvrStrTime : { + for ( int8_t i=0 ; i<kTimeFieldsCount; i++ ) { + SharedObject::copyPtr(snf, fSharedNumberFormatters[kTimeFields[i]]); + } break; } - SimpleDateFormat *sdf = (SimpleDateFormat*)this->clone(); - sdf->adoptTimeZone(TimeZone::createTimeZone(UnicodeString(gEtcUTC))); - sdf->applyPattern(*hourPattern); - fGMTFormatters[i]->adoptFormat(0, sdf); } } else { - status = U_MEMORY_ALLOCATION_ERROR; + // if the pattern character is unrecognized, signal an error and bail out + UDateFormatField patternCharIndex = + DateFormatSymbols::getPatternCharIndex(ovrField.charAt(0)); + if (patternCharIndex == UDAT_FIELD_COUNT) { + status = U_INVALID_FORMAT_ERROR; + if (overrideList) { + overrideList->free(); + } + return; + } + SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]); } + + start = delimiterPosition + 1; + } + if (overrideList) { + overrideList->free(); } - umtx_unlock(&LOCK); } //--------------------------------------------------------------------- @@ -1060,7 +1529,9 @@ void SimpleDateFormat::subFormat(UnicodeString &appendTo, UChar ch, int32_t count, - FieldPosition& pos, + UDisplayContext capitalizationContext, + int32_t fieldNum, + FieldPositionHandler& handler, Calendar& cal, UErrorCode& status) const { @@ -1071,39 +1542,72 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, // this function gets called by format() to produce the appropriate substitution // text for an individual pattern symbol (e.g., "HH" or "yyyy") - UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch); - UDateFormatField patternCharIndex; + UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch); const int32_t maxIntCount = 10; int32_t beginOffset = appendTo.length(); + const NumberFormat *currentNumberFormat; + DateFormatSymbols::ECapitalizationContextUsageType capContextUsageType = DateFormatSymbols::kCapContextUsageOther; + + UBool isHebrewCalendar = (uprv_strcmp(cal.getType(),"hebrew") == 0); + UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0); // if the pattern character is unrecognized, signal an error and dump out - if (patternCharPtr == NULL) + if (patternCharIndex == UDAT_FIELD_COUNT) { - status = U_INVALID_FORMAT_ERROR; + if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored + status = U_INVALID_FORMAT_ERROR; + } return; } - patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars()); UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; - int32_t value = cal.get(field, status); + int32_t value = 0; + // Don't get value unless it is useful + if (field < UCAL_FIELD_COUNT) { + value = (patternCharIndex != UDAT_RELATED_YEAR_FIELD)? cal.get(field, status): cal.getRelatedYear(status); + } if (U_FAILURE(status)) { return; } + currentNumberFormat = getNumberFormatByIndex(patternCharIndex); + if (currentNumberFormat == NULL) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } + UnicodeString hebr("hebr", 4, US_INV); + switch (patternCharIndex) { - + // for any "G" symbol, write out the appropriate era string // "GGGG" is wide era name, "GGGGG" is narrow era name, anything else is abbreviated name case UDAT_ERA_FIELD: - if (count == 5) - _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount); - else if (count == 4) - _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount); - else - _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount); + if (isChineseCalendar) { + zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, 9); // as in ICU4J + } else { + if (count == 5) { + _appendSymbol(appendTo, value, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageEraNarrow; + } else if (count == 4) { + _appendSymbol(appendTo, value, fSymbols->fEraNames, fSymbols->fEraNamesCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageEraWide; + } else { + _appendSymbol(appendTo, value, fSymbols->fEras, fSymbols->fErasCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageEraAbbrev; + } + } break; - // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits + case UDAT_YEAR_NAME_FIELD: + if (fSymbols->fShortYearNames != NULL && value <= fSymbols->fShortYearNamesCount) { + // the Calendar YEAR field runs 1 through 60 for cyclic years + _appendSymbol(appendTo, value - 1, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount); + break; + } + // else fall through to numeric year handling, do not break here + U_FALLTHROUGH; + + // OLD: for "yyyy", write out the whole year; for "yy", write out the last 2 digits // NEW: UTS#35: //Year y yy yyy yyyy yyyyy //AD 1 1 01 001 0001 00001 @@ -1111,84 +1615,104 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, //AD 123 123 23 123 0123 00123 //AD 1234 1234 34 1234 1234 01234 //AD 12345 12345 45 12345 12345 12345 - case UDAT_YEAR_FIELD: + case UDAT_YEAR_FIELD: case UDAT_YEAR_WOY_FIELD: + if (fDateOverride.compare(hebr)==0 && value>HEBREW_CAL_CUR_MILLENIUM_START_YEAR && value<HEBREW_CAL_CUR_MILLENIUM_END_YEAR) { + value-=HEBREW_CAL_CUR_MILLENIUM_START_YEAR; + } if(count == 2) - zeroPaddingNumber(appendTo, value, 2, 2); - else - zeroPaddingNumber(appendTo, value, count, maxIntCount); - break; - - // for "MMMM", write out the whole month name, for "MMM", write out the month - // abbreviation, for "M" or "MM", write out the month as a number with the - // appropriate number of digits - // for "MMMMM", use the narrow form - case UDAT_MONTH_FIELD: - if (count == 5) - _appendSymbol(appendTo, value, fSymbols->fNarrowMonths, - fSymbols->fNarrowMonthsCount); - else if (count == 4) - _appendSymbol(appendTo, value, fSymbols->fMonths, - fSymbols->fMonthsCount); - else if (count == 3) - _appendSymbol(appendTo, value, fSymbols->fShortMonths, - fSymbols->fShortMonthsCount); - else - zeroPaddingNumber(appendTo, value + 1, count, maxIntCount); + zeroPaddingNumber(currentNumberFormat, appendTo, value, 2, 2); + else + zeroPaddingNumber(currentNumberFormat, appendTo, value, count, maxIntCount); break; - // for "LLLL", write out the whole month name, for "LLL", write out the month - // abbreviation, for "L" or "LL", write out the month as a number with the + // for "MMMM"/"LLLL", write out the whole month name, for "MMM"/"LLL", write out the month + // abbreviation, for "M"/"L" or "MM"/"LL", write out the month as a number with the // appropriate number of digits - // for "LLLLL", use the narrow form + // for "MMMMM"/"LLLLL", use the narrow form + case UDAT_MONTH_FIELD: case UDAT_STANDALONE_MONTH_FIELD: - if (count == 5) - _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowMonths, - fSymbols->fStandaloneNarrowMonthsCount); - else if (count == 4) - _appendSymbol(appendTo, value, fSymbols->fStandaloneMonths, - fSymbols->fStandaloneMonthsCount); - else if (count == 3) - _appendSymbol(appendTo, value, fSymbols->fStandaloneShortMonths, - fSymbols->fStandaloneShortMonthsCount); - else - zeroPaddingNumber(appendTo, value + 1, count, maxIntCount); + if ( isHebrewCalendar ) { + HebrewCalendar *hc = (HebrewCalendar*)&cal; + if (hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value == 6 && count >= 3 ) + value = 13; // Show alternate form for Adar II in leap years in Hebrew calendar. + if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6 && count < 3 ) + value--; // Adjust the month number down 1 in Hebrew non-leap years, i.e. Adar is 6, not 7. + } + { + int32_t isLeapMonth = (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount)? + cal.get(UCAL_IS_LEAP_MONTH, status): 0; + // should consolidate the next section by using arrays of pointers & counts for the right symbols... + if (count == 5) { + if (patternCharIndex == UDAT_MONTH_FIELD) { + _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fNarrowMonths, fSymbols->fNarrowMonthsCount, + (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatNarrow]): NULL, status); + } else { + _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneNarrowMonths, fSymbols->fStandaloneNarrowMonthsCount, + (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneNarrow]): NULL, status); + } + capContextUsageType = DateFormatSymbols::kCapContextUsageMonthNarrow; + } else if (count == 4) { + if (patternCharIndex == UDAT_MONTH_FIELD) { + _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fMonths, fSymbols->fMonthsCount, + (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]): NULL, status); + capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat; + } else { + _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, + (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]): NULL, status); + capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone; + } + } else if (count == 3) { + if (patternCharIndex == UDAT_MONTH_FIELD) { + _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, + (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]): NULL, status); + capContextUsageType = DateFormatSymbols::kCapContextUsageMonthFormat; + } else { + _appendSymbolWithMonthPattern(appendTo, value, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, + (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]): NULL, status); + capContextUsageType = DateFormatSymbols::kCapContextUsageMonthStandalone; + } + } else { + UnicodeString monthNumber; + zeroPaddingNumber(currentNumberFormat,monthNumber, value + 1, count, maxIntCount); + _appendSymbolWithMonthPattern(appendTo, 0, &monthNumber, 1, + (isLeapMonth!=0)? &(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric]): NULL, status); + } + } break; // for "k" and "kk", write out the hour, adjusting midnight to appear as "24" case UDAT_HOUR_OF_DAY1_FIELD: - if (value == 0) - zeroPaddingNumber(appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount); - else - zeroPaddingNumber(appendTo, value, count, maxIntCount); + if (value == 0) + zeroPaddingNumber(currentNumberFormat,appendTo, cal.getMaximum(UCAL_HOUR_OF_DAY) + 1, count, maxIntCount); + else + zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount); break; case UDAT_FRACTIONAL_SECOND_FIELD: // Fractional seconds left-justify { - fNumberFormat->setMinimumIntegerDigits((count > 3) ? 3 : count); - fNumberFormat->setMaximumIntegerDigits(maxIntCount); + int32_t minDigits = (count > 3) ? 3 : count; if (count == 1) { - value = (value + 50) / 100; + value /= 100; } else if (count == 2) { - value = (value + 5) / 10; + value /= 10; } - FieldPosition p(0); - fNumberFormat->format(value, appendTo, p); + zeroPaddingNumber(currentNumberFormat, appendTo, value, minDigits, maxIntCount); if (count > 3) { - fNumberFormat->setMinimumIntegerDigits(count - 3); - fNumberFormat->format((int32_t)0, appendTo, p); + zeroPaddingNumber(currentNumberFormat, appendTo, 0, count - 3, maxIntCount); } } break; // for "ee" or "e", use local numeric day-of-the-week + // for "EEEEEE" or "eeeeee", write out the short day-of-the-week name // for "EEEEE" or "eeeee", write out the narrow day-of-the-week name // for "EEEE" or "eeee", write out the wide day-of-the-week name // for "EEE" or "EE" or "E" or "eee", write out the abbreviated day-of-the-week name case UDAT_DOW_LOCAL_FIELD: if ( count < 3 ) { - zeroPaddingNumber(appendTo, value, count, maxIntCount); + zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount); break; } // fall through to EEEEE-EEE handling, but for that we don't want local day-of-week, @@ -1198,24 +1722,34 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, return; } // fall through, do not break here + U_FALLTHROUGH; case UDAT_DAY_OF_WEEK_FIELD: - if (count == 5) + if (count == 5) { _appendSymbol(appendTo, value, fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount); - else if (count == 4) + capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow; + } else if (count == 4) { _appendSymbol(appendTo, value, fSymbols->fWeekdays, fSymbols->fWeekdaysCount); - else + capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat; + } else if (count == 6) { + _appendSymbol(appendTo, value, fSymbols->fShorterWeekdays, + fSymbols->fShorterWeekdaysCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat; + } else { _appendSymbol(appendTo, value, fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageDayFormat; + } break; // for "ccc", write out the abbreviated day-of-the-week name // for "cccc", write out the wide day-of-the-week name // for "ccccc", use the narrow day-of-the-week name + // for "ccccc", use the short day-of-the-week name case UDAT_STANDALONE_DAY_FIELD: if ( count < 3 ) { - zeroPaddingNumber(appendTo, value, 1, maxIntCount); + zeroPaddingNumber(currentNumberFormat,appendTo, value, 1, maxIntCount); break; } // fall through to alpha DOW handling, but for that we don't want local day-of-week, @@ -1224,161 +1758,530 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, if (U_FAILURE(status)) { return; } - if (count == 5) + if (count == 5) { _appendSymbol(appendTo, value, fSymbols->fStandaloneNarrowWeekdays, fSymbols->fStandaloneNarrowWeekdaysCount); - else if (count == 4) + capContextUsageType = DateFormatSymbols::kCapContextUsageDayNarrow; + } else if (count == 4) { _appendSymbol(appendTo, value, fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount); - else // count == 3 + capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone; + } else if (count == 6) { + _appendSymbol(appendTo, value, fSymbols->fStandaloneShorterWeekdays, + fSymbols->fStandaloneShorterWeekdaysCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone; + } else { // count == 3 _appendSymbol(appendTo, value, fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount); + capContextUsageType = DateFormatSymbols::kCapContextUsageDayStandalone; + } break; - // for and "a" symbol, write out the whole AM/PM string + // for "a" symbol, write out the whole AM/PM string case UDAT_AM_PM_FIELD: - _appendSymbol(appendTo, value, fSymbols->fAmPms, - fSymbols->fAmPmsCount); + if (count < 5) { + _appendSymbol(appendTo, value, fSymbols->fAmPms, + fSymbols->fAmPmsCount); + } else { + _appendSymbol(appendTo, value, fSymbols->fNarrowAmPms, + fSymbols->fNarrowAmPmsCount); + } + break; + + // if we see pattern character for UDAT_TIME_SEPARATOR_FIELD (none currently defined), + // write out the time separator string. Leave support in for future definition. + case UDAT_TIME_SEPARATOR_FIELD: + { + UnicodeString separator; + appendTo += fSymbols->getTimeSeparatorString(separator); + } break; // for "h" and "hh", write out the hour, adjusting noon and midnight to show up // as "12" case UDAT_HOUR1_FIELD: - if (value == 0) - zeroPaddingNumber(appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount); - else - zeroPaddingNumber(appendTo, value, count, maxIntCount); + if (value == 0) + zeroPaddingNumber(currentNumberFormat,appendTo, cal.getLeastMaximum(UCAL_HOUR) + 1, count, maxIntCount); + else + zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount); break; - // for the "z" symbols, we have to check our time zone data first. If we have a - // localized name for the time zone, then "zzzz" / "zzz" indicate whether - // daylight time is in effect (long/short) and "zz" / "z" do not (long/short). - // If we don't have a localized time zone name, - // then the time zone shows up as "GMT+hh:mm" or "GMT-hh:mm" (where "hh:mm" is the - // offset from GMT) regardless of how many z's were in the pattern symbol - case UDAT_TIMEZONE_FIELD: - case UDAT_TIMEZONE_GENERIC_FIELD: - case UDAT_TIMEZONE_SPECIAL_FIELD: + case UDAT_TIMEZONE_FIELD: // 'z' + case UDAT_TIMEZONE_RFC_FIELD: // 'Z' + case UDAT_TIMEZONE_GENERIC_FIELD: // 'v' + case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V' + case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O' + case UDAT_TIMEZONE_ISO_FIELD: // 'X' + case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x' { - UnicodeString zoneString; - const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat(); - if (zsf) { + UChar zsbuf[ZONE_NAME_U16_MAX]; + UnicodeString zoneString(zsbuf, 0, UPRV_LENGTHOF(zsbuf)); + const TimeZone& tz = cal.getTimeZone(); + UDate date = cal.getTime(status); + const TimeZoneFormat *tzfmt = tzFormat(status); + if (U_SUCCESS(status)) { if (patternCharIndex == UDAT_TIMEZONE_FIELD) { if (count < 4) { // "z", "zz", "zzz" - zsf->getSpecificShortString(cal, TRUE /*commonly used only*/, - zoneString, status); + tzfmt->format(UTZFMT_STYLE_SPECIFIC_SHORT, tz, date, zoneString); + capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort; + } else { + // "zzzz" or longer + tzfmt->format(UTZFMT_STYLE_SPECIFIC_LONG, tz, date, zoneString); + capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong; + } + } + else if (patternCharIndex == UDAT_TIMEZONE_RFC_FIELD) { + if (count < 4) { + // "Z" + tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString); + } else if (count == 5) { + // "ZZZZZ" + tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString); } else { - // "zzzz" - zsf->getSpecificLongString(cal, zoneString, status); + // "ZZ", "ZZZ", "ZZZZ" + tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString); } - } else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) { + } + else if (patternCharIndex == UDAT_TIMEZONE_GENERIC_FIELD) { if (count == 1) { // "v" - zsf->getGenericShortString(cal, TRUE /*commonly used only*/, - zoneString, status); + tzfmt->format(UTZFMT_STYLE_GENERIC_SHORT, tz, date, zoneString); + capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneShort; } else if (count == 4) { // "vvvv" - zsf->getGenericLongString(cal, zoneString, status); + tzfmt->format(UTZFMT_STYLE_GENERIC_LONG, tz, date, zoneString); + capContextUsageType = DateFormatSymbols::kCapContextUsageMetazoneLong; } - } else { // patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD + } + else if (patternCharIndex == UDAT_TIMEZONE_SPECIAL_FIELD) { if (count == 1) { // "V" - zsf->getSpecificShortString(cal, FALSE /*ignore commonly used*/, - zoneString, status); + tzfmt->format(UTZFMT_STYLE_ZONE_ID_SHORT, tz, date, zoneString); + } else if (count == 2) { + // "VV" + tzfmt->format(UTZFMT_STYLE_ZONE_ID, tz, date, zoneString); + } else if (count == 3) { + // "VVV" + tzfmt->format(UTZFMT_STYLE_EXEMPLAR_LOCATION, tz, date, zoneString); } else if (count == 4) { // "VVVV" - zsf->getGenericLocationString(cal, zoneString, status); + tzfmt->format(UTZFMT_STYLE_GENERIC_LOCATION, tz, date, zoneString); + capContextUsageType = DateFormatSymbols::kCapContextUsageZoneLong; } } + else if (patternCharIndex == UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD) { + if (count == 1) { + // "O" + tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT_SHORT, tz, date, zoneString); + } else if (count == 4) { + // "OOOO" + tzfmt->format(UTZFMT_STYLE_LOCALIZED_GMT, tz, date, zoneString); + } + } + else if (patternCharIndex == UDAT_TIMEZONE_ISO_FIELD) { + if (count == 1) { + // "X" + tzfmt->format(UTZFMT_STYLE_ISO_BASIC_SHORT, tz, date, zoneString); + } else if (count == 2) { + // "XX" + tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FIXED, tz, date, zoneString); + } else if (count == 3) { + // "XXX" + tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FIXED, tz, date, zoneString); + } else if (count == 4) { + // "XXXX" + tzfmt->format(UTZFMT_STYLE_ISO_BASIC_FULL, tz, date, zoneString); + } else if (count == 5) { + // "XXXXX" + tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_FULL, tz, date, zoneString); + } + } + else if (patternCharIndex == UDAT_TIMEZONE_ISO_LOCAL_FIELD) { + if (count == 1) { + // "x" + tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT, tz, date, zoneString); + } else if (count == 2) { + // "xx" + tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED, tz, date, zoneString); + } else if (count == 3) { + // "xxx" + tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED, tz, date, zoneString); + } else if (count == 4) { + // "xxxx" + tzfmt->format(UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL, tz, date, zoneString); + } else if (count == 5) { + // "xxxxx" + tzfmt->format(UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL, tz, date, zoneString); + } + } + else { + UPRV_UNREACHABLE; + } } - if (zoneString.isEmpty()) { - appendGMT(appendTo, cal, status); - } else { - appendTo += zoneString; - } - } - break; - - case UDAT_TIMEZONE_RFC_FIELD: // 'Z' - TIMEZONE_RFC - if (count < 4) { - // RFC822 format, must use ASCII digits - value = (cal.get(UCAL_ZONE_OFFSET, status) + cal.get(UCAL_DST_OFFSET, status)); - formatRFC822TZ(appendTo, value); - } else { - // long form, localized GMT pattern - appendGMT(appendTo, cal, status); + appendTo += zoneString; } break; case UDAT_QUARTER_FIELD: - if (count >= 4) + if (count >= 4) _appendSymbol(appendTo, value/3, fSymbols->fQuarters, fSymbols->fQuartersCount); - else if (count == 3) + else if (count == 3) _appendSymbol(appendTo, value/3, fSymbols->fShortQuarters, fSymbols->fShortQuartersCount); - else - zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount); + else + zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount); break; case UDAT_STANDALONE_QUARTER_FIELD: - if (count >= 4) + if (count >= 4) _appendSymbol(appendTo, value/3, fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount); - else if (count == 3) + else if (count == 3) _appendSymbol(appendTo, value/3, fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount); - else - zeroPaddingNumber(appendTo, (value/3) + 1, count, maxIntCount); + else + zeroPaddingNumber(currentNumberFormat,appendTo, (value/3) + 1, count, maxIntCount); + break; + + case UDAT_AM_PM_MIDNIGHT_NOON_FIELD: + { + const UnicodeString *toAppend = NULL; + int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status); + + // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day. + // For ICU 57 output of "midnight" is temporarily suppressed. + + // For "midnight" and "noon": + // Time, as displayed, must be exactly noon or midnight. + // This means minutes and seconds, if present, must be zero. + if ((/*hour == 0 ||*/ hour == 12) && + (!fHasMinute || cal.get(UCAL_MINUTE, status) == 0) && + (!fHasSecond || cal.get(UCAL_SECOND, status) == 0)) { + // Stealing am/pm value to use as our array index. + // It works out: am/midnight are both 0, pm/noon are both 1, + // 12 am is 12 midnight, and 12 pm is 12 noon. + int32_t val = cal.get(UCAL_AM_PM, status); + + if (count <= 3) { + toAppend = &fSymbols->fAbbreviatedDayPeriods[val]; + } else if (count == 4 || count > 5) { + toAppend = &fSymbols->fWideDayPeriods[val]; + } else { // count == 5 + toAppend = &fSymbols->fNarrowDayPeriods[val]; + } + } + + // toAppend is NULL if time isn't exactly midnight or noon (as displayed). + // toAppend is bogus if time is midnight or noon, but no localized string exists. + // In either case, fall back to am/pm. + if (toAppend == NULL || toAppend->isBogus()) { + // Reformat with identical arguments except ch, now changed to 'a'. + subFormat(appendTo, 0x61, count, capitalizationContext, fieldNum, + handler, cal, status); + } else { + appendTo += *toAppend; + } + break; + } + + case UDAT_FLEXIBLE_DAY_PERIOD_FIELD: + { + // TODO: Maybe fetch the DayperiodRules during initialization (instead of at the first + // loading of an instance) if a relevant pattern character (b or B) is used. + const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status); + if (U_FAILURE(status)) { + // Data doesn't conform to spec, therefore loading failed. + break; + } + if (ruleSet == NULL) { + // Data doesn't exist for the locale we're looking for. + // Falling back to am/pm. + subFormat(appendTo, 0x61, count, capitalizationContext, fieldNum, + handler, cal, status); + break; + } + + // Get current display time. + int32_t hour = cal.get(UCAL_HOUR_OF_DAY, status); + int32_t minute = 0; + if (fHasMinute) { + minute = cal.get(UCAL_MINUTE, status); + } + int32_t second = 0; + if (fHasSecond) { + second = cal.get(UCAL_SECOND, status); + } + + // Determine day period. + DayPeriodRules::DayPeriod periodType; + if (hour == 0 && minute == 0 && second == 0 && ruleSet->hasMidnight()) { + periodType = DayPeriodRules::DAYPERIOD_MIDNIGHT; + } else if (hour == 12 && minute == 0 && second == 0 && ruleSet->hasNoon()) { + periodType = DayPeriodRules::DAYPERIOD_NOON; + } else { + periodType = ruleSet->getDayPeriodForHour(hour); + } + + // Rule set exists, therefore periodType can't be UNKNOWN. + // Get localized string. + U_ASSERT(periodType != DayPeriodRules::DAYPERIOD_UNKNOWN); + UnicodeString *toAppend = NULL; + int32_t index; + + // Note: "midnight" can be ambiguous as to whether it refers to beginning of day or end of day. + // For ICU 57 output of "midnight" is temporarily suppressed. + + if (periodType != DayPeriodRules::DAYPERIOD_AM && + periodType != DayPeriodRules::DAYPERIOD_PM && + periodType != DayPeriodRules::DAYPERIOD_MIDNIGHT) { + index = (int32_t)periodType; + if (count <= 3) { + toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short + } else if (count == 4 || count > 5) { + toAppend = &fSymbols->fWideDayPeriods[index]; + } else { // count == 5 + toAppend = &fSymbols->fNarrowDayPeriods[index]; + } + } + // Fallback schedule: + // Midnight/Noon -> General Periods -> AM/PM. + + // Midnight/Noon -> General Periods. + if ((toAppend == NULL || toAppend->isBogus()) && + (periodType == DayPeriodRules::DAYPERIOD_MIDNIGHT || + periodType == DayPeriodRules::DAYPERIOD_NOON)) { + periodType = ruleSet->getDayPeriodForHour(hour); + index = (int32_t)periodType; + + if (count <= 3) { + toAppend = &fSymbols->fAbbreviatedDayPeriods[index]; // i.e. short + } else if (count == 4 || count > 5) { + toAppend = &fSymbols->fWideDayPeriods[index]; + } else { // count == 5 + toAppend = &fSymbols->fNarrowDayPeriods[index]; + } + } + + // General Periods -> AM/PM. + if (periodType == DayPeriodRules::DAYPERIOD_AM || + periodType == DayPeriodRules::DAYPERIOD_PM || + toAppend->isBogus()) { + subFormat(appendTo, 0x61, count, capitalizationContext, fieldNum, + handler, cal, status); + } + else { + appendTo += *toAppend; + } + + break; + } // all of the other pattern symbols can be formatted as simple numbers with // appropriate zero padding default: - zeroPaddingNumber(appendTo, value, count, maxIntCount); + zeroPaddingNumber(currentNumberFormat,appendTo, value, count, maxIntCount); break; } +#if !UCONFIG_NO_BREAK_ITERATION + // if first field, check to see whether we need to and are able to titlecase it + if (fieldNum == 0 && fCapitalizationBrkIter != NULL && appendTo.length() > beginOffset && + u_islower(appendTo.char32At(beginOffset))) { + UBool titlecase = FALSE; + switch (capitalizationContext) { + case UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE: + titlecase = TRUE; + break; + case UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU: + titlecase = fSymbols->fCapitalization[capContextUsageType][0]; + break; + case UDISPCTX_CAPITALIZATION_FOR_STANDALONE: + titlecase = fSymbols->fCapitalization[capContextUsageType][1]; + break; + default: + // titlecase = FALSE; + break; + } + if (titlecase) { + UnicodeString firstField(appendTo, beginOffset); + firstField.toTitle(fCapitalizationBrkIter, fLocale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT); + appendTo.replaceBetween(beginOffset, appendTo.length(), firstField); + } + } +#endif + + handler.addAttribute(fgPatternIndexToDateFormatField[patternCharIndex], beginOffset, appendTo.length()); +} + +//---------------------------------------------------------------------- + +void SimpleDateFormat::adoptNumberFormat(NumberFormat *formatToAdopt) { + fixNumberFormatForDates(*formatToAdopt); + freeFastNumberFormatters(); // deletes refs to fNumberFormat's symbols + delete fNumberFormat; + fNumberFormat = formatToAdopt; + + // We successfully set the default number format. Now delete the overrides + // (can't fail). + if (fSharedNumberFormatters) { + freeSharedNumberFormatters(fSharedNumberFormatters); + fSharedNumberFormatters = NULL; + } + + // Also re-compute the fast formatters. + UErrorCode localStatus = U_ZERO_ERROR; + initFastNumberFormatters(localStatus); +} + +void SimpleDateFormat::adoptNumberFormat(const UnicodeString& fields, NumberFormat *formatToAdopt, UErrorCode &status){ + fixNumberFormatForDates(*formatToAdopt); + LocalPointer<NumberFormat> fmt(formatToAdopt); + if (U_FAILURE(status)) { + return; + } + + // We must ensure fSharedNumberFormatters is allocated. + if (fSharedNumberFormatters == NULL) { + fSharedNumberFormatters = allocSharedNumberFormatters(); + if (fSharedNumberFormatters == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } + const SharedNumberFormat *newFormat = createSharedNumberFormat(fmt.orphan()); + if (newFormat == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + for (int i=0; i<fields.length(); i++) { + UChar field = fields.charAt(i); + // if the pattern character is unrecognized, signal an error and bail out + UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(field); + if (patternCharIndex == UDAT_FIELD_COUNT) { + status = U_INVALID_FORMAT_ERROR; + newFormat->deleteIfZeroRefCount(); + return; + } + + // Set the number formatter in the table + SharedObject::copyPtr( + newFormat, fSharedNumberFormatters[patternCharIndex]); + } + newFormat->deleteIfZeroRefCount(); +} - // if the field we're formatting is the one the FieldPosition says it's interested - // in, fill in the FieldPosition with this field's positions - if (pos.getBeginIndex() == pos.getEndIndex() && - pos.getField() == fgPatternIndexToDateFormatField[patternCharIndex]) { - pos.setBeginIndex(beginOffset); - pos.setEndIndex(appendTo.length()); +const NumberFormat * +SimpleDateFormat::getNumberFormatForField(UChar field) const { + UDateFormatField index = DateFormatSymbols::getPatternCharIndex(field); + if (index == UDAT_FIELD_COUNT) { + return NULL; } + return getNumberFormatByIndex(index); } //---------------------------------------------------------------------- void -SimpleDateFormat::zeroPaddingNumber(UnicodeString &appendTo, int32_t value, int32_t minDigits, int32_t maxDigits) const +SimpleDateFormat::zeroPaddingNumber( + const NumberFormat *currentNumberFormat, + UnicodeString &appendTo, + int32_t value, int32_t minDigits, int32_t maxDigits) const { - if (fNumberFormat!=NULL) { - FieldPosition pos(0); + const number::LocalizedNumberFormatter* fastFormatter = nullptr; + // NOTE: This uses the heuristic that these five min/max int settings account for the vast majority + // of SimpleDateFormat number formatting cases at the time of writing (ICU 62). + if (currentNumberFormat == fNumberFormat) { + if (maxDigits == 10) { + if (minDigits == 1) { + fastFormatter = fFastNumberFormatters[SMPDTFMT_NF_1x10]; + } else if (minDigits == 2) { + fastFormatter = fFastNumberFormatters[SMPDTFMT_NF_2x10]; + } else if (minDigits == 3) { + fastFormatter = fFastNumberFormatters[SMPDTFMT_NF_3x10]; + } else if (minDigits == 4) { + fastFormatter = fFastNumberFormatters[SMPDTFMT_NF_4x10]; + } + } else if (maxDigits == 2) { + if (minDigits == 2) { + fastFormatter = fFastNumberFormatters[SMPDTFMT_NF_2x2]; + } + } + } + if (fastFormatter != nullptr) { + // Can use fast path + number::impl::UFormattedNumberData result; + result.quantity.setToInt(value); + UErrorCode localStatus = U_ZERO_ERROR; + fastFormatter->formatImpl(&result, localStatus); + if (U_FAILURE(localStatus)) { + return; + } + appendTo.append(result.getStringRef().toTempUnicodeString()); + return; + } + + // Check for RBNF (no clone necessary) + auto* rbnf = dynamic_cast<const RuleBasedNumberFormat*>(currentNumberFormat); + if (rbnf != nullptr) { + FieldPosition pos(FieldPosition::DONT_CARE); + rbnf->format(value, appendTo, pos); // 3rd arg is there to speed up processing + return; + } - fNumberFormat->setMinimumIntegerDigits(minDigits); - fNumberFormat->setMaximumIntegerDigits(maxDigits); - fNumberFormat->format(value, appendTo, pos); // 3rd arg is there to speed up processing + // Fall back to slow path (clone and mutate the NumberFormat) + if (currentNumberFormat != nullptr) { + FieldPosition pos(FieldPosition::DONT_CARE); + LocalPointer<NumberFormat> nf(dynamic_cast<NumberFormat*>(currentNumberFormat->clone())); + nf->setMinimumIntegerDigits(minDigits); + nf->setMaximumIntegerDigits(maxDigits); + nf->format(value, appendTo, pos); // 3rd arg is there to speed up processing } } //---------------------------------------------------------------------- -/** - * Format characters that indicate numeric fields. The character - * at index 0 is treated specially. - */ -static const UChar NUMERIC_FORMAT_CHARS[] = {0x4D, 0x59, 0x79, 0x75, 0x64, 0x65, 0x68, 0x48, 0x6D, 0x73, 0x53, 0x44, 0x46, 0x77, 0x57, 0x6B, 0x4B, 0x00}; /* "MYyudehHmsSDFwWkK" */ - /** * Return true if the given format character, occuring count * times, represents a numeric field. */ UBool SimpleDateFormat::isNumeric(UChar formatChar, int32_t count) { - UnicodeString s(NUMERIC_FORMAT_CHARS); - int32_t i = s.indexOf(formatChar); - return (i > 0 || (i == 0 && count < 3)); + return DateFormatSymbols::isNumericPatternChar(formatChar, count); +} + +UBool +SimpleDateFormat::isAtNumericField(const UnicodeString &pattern, int32_t patternOffset) { + if (patternOffset >= pattern.length()) { + // not at any field + return FALSE; + } + UChar ch = pattern.charAt(patternOffset); + UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch); + if (f == UDAT_FIELD_COUNT) { + // not at any field + return FALSE; + } + int32_t i = patternOffset; + while (pattern.charAt(++i) == ch) {} + return DateFormatSymbols::isNumericField(f, i - patternOffset); +} + +UBool +SimpleDateFormat::isAfterNonNumericField(const UnicodeString &pattern, int32_t patternOffset) { + if (patternOffset <= 0) { + // not after any field + return FALSE; + } + UChar ch = pattern.charAt(--patternOffset); + UDateFormatField f = DateFormatSymbols::getPatternCharIndex(ch); + if (f == UDAT_FIELD_COUNT) { + // not after any field + return FALSE; + } + int32_t i = patternOffset; + while (pattern.charAt(--i) == ch) {} + return !DateFormatSymbols::isNumericField(f, patternOffset - i); } void @@ -1386,14 +2289,20 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& { UErrorCode status = U_ZERO_ERROR; int32_t pos = parsePos.getIndex(); + if(parsePos.getIndex() < 0) { + parsePos.setErrorIndex(0); + return; + } int32_t start = pos; + + // Hold the day period until everything else is parsed, because we need + // the hour to interpret time correctly. + int32_t dayPeriodInt = -1; + UBool ambiguousYear[] = { FALSE }; + int32_t saveHebrewMonth = -1; int32_t count = 0; - - UBool lenient = isLenient(); - - // hack, reset tztype, cast away const - ((SimpleDateFormat*)this)->tztype = TZTYPE_UNK; + UTimeZoneFormatTimeType tzTimeType = UTZFMT_TIME_TYPE_UNKNOWN; // For parsing abutting numeric fields. 'abutPat' is the // offset into 'pattern' of the first of 2 or more abutting @@ -1405,13 +2314,43 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& int32_t abutPass = 0; UBool inQuote = FALSE; - const UnicodeString numericFormatChars(NUMERIC_FORMAT_CHARS); + MessageFormat * numericLeapMonthFormatter = NULL; + + Calendar* calClone = NULL; + Calendar *workCal = &cal; + if (&cal != fCalendar && uprv_strcmp(cal.getType(), fCalendar->getType()) != 0) { + // Different calendar type + // We use the time/zone from the input calendar, but + // do not use the input calendar for field calculation. + calClone = fCalendar->clone(); + if (calClone != NULL) { + calClone->setTime(cal.getTime(status),status); + if (U_FAILURE(status)) { + goto ExitParse; + } + calClone->setTimeZone(cal.getTimeZone()); + workCal = calClone; + } else { + status = U_MEMORY_ALLOCATION_ERROR; + goto ExitParse; + } + } + + if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) { + numericLeapMonthFormatter = new MessageFormat(fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternNumeric], fLocale, status); + if (numericLeapMonthFormatter == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + goto ExitParse; + } else if (U_FAILURE(status)) { + goto ExitParse; // this will delete numericLeapMonthFormatter + } + } for (int32_t i=0; i<fPattern.length(); ++i) { UChar ch = fPattern.charAt(i); // Handle alphabetic field characters. - if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // [A-Za-z] + if (!inQuote && isSyntaxChar(ch)) { int32_t fieldPat = i; // Count the length of this field specifier @@ -1424,33 +2363,12 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& if (isNumeric(ch, count)) { if (abutPat < 0) { - // Determine if there is an abutting numeric field. For - // most fields we can just look at the next characters, - // but the 'm' field is either numeric or text, - // depending on the count, so we have to look ahead for - // that field. - if ((i+1)<fPattern.length()) { - UBool abutting; - UChar nextCh = fPattern.charAt(i+1); - int32_t k = numericFormatChars.indexOf(nextCh); - if (k == 0) { - int32_t j = i+2; - while (j<fPattern.length() && - fPattern.charAt(j) == nextCh) { - ++j; - } - abutting = (j-i) < 4; // nextCount < 3 - } else { - abutting = k > 0; - } - - // Record the start of a set of abutting numeric - // fields. - if (abutting) { - abutPat = fieldPat; - abutStart = pos; - abutPass = 0; - } + // Determine if there is an abutting numeric field. + // Record the start of a set of abutting numeric fields. + if (isAtNumericField(fPattern, i + 1)) { + abutPat = fieldPat; + abutStart = pos; + abutPass = 0; } } } else { @@ -1472,14 +2390,13 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& if (fieldPat == abutPat) { count -= abutPass++; if (count == 0) { - parsePos.setIndex(start); - parsePos.setErrorIndex(pos); - return; + status = U_PARSE_ERROR; + goto ExitParse; } } pos = subParse(text, pos, ch, count, - TRUE, FALSE, ambiguousYear, cal); + TRUE, FALSE, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType); // If the parse fails anywhere in the run, back up to the // start of the run and retry. @@ -1492,11 +2409,31 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& // Handle non-numeric fields and non-abutting numeric // fields. - else { + else if (ch != 0x6C) { // pattern char 'l' (SMALL LETTER L) just gets ignored int32_t s = subParse(text, pos, ch, count, - FALSE, TRUE, ambiguousYear, cal); - - if (s <= 0) { + FALSE, TRUE, ambiguousYear, saveHebrewMonth, *workCal, i, numericLeapMonthFormatter, &tzTimeType, &dayPeriodInt); + + if (s == -pos-1) { + // era not present, in special cases allow this to continue + // from the position where the era was expected + s = pos; + + if (i+1 < fPattern.length()) { + // move to next pattern character + UChar c = fPattern.charAt(i+1); + + // check for whitespace + if (PatternProps::isWhiteSpace(c)) { + i++; + // Advance over run in pattern + while ((i+1)<fPattern.length() && + PatternProps::isWhiteSpace(fPattern.charAt(i+1))) { + ++i; + } + } + } + } + else if (s <= 0) { status = U_PARSE_ERROR; goto ExitParse; } @@ -1508,16 +2445,94 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& // quoted characters and non-alphabetic unquoted // characters. else { - + abutPat = -1; // End of any abutting fields - - if (! matchLiterals(fPattern, i, text, pos, lenient)) { + + if (! matchLiterals(fPattern, i, text, pos, getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status), getBooleanAttribute(UDAT_PARSE_PARTIAL_LITERAL_MATCH, status), isLenient())) { status = U_PARSE_ERROR; goto ExitParse; } } } + // Special hack for trailing "." after non-numeric field. + if (text.charAt(pos) == 0x2e && getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) { + // only do if the last field is not numeric + if (isAfterNonNumericField(fPattern, fPattern.length())) { + pos++; // skip the extra "." + } + } + + // If dayPeriod is set, use it in conjunction with hour-of-day to determine am/pm. + if (dayPeriodInt >= 0) { + DayPeriodRules::DayPeriod dayPeriod = (DayPeriodRules::DayPeriod)dayPeriodInt; + const DayPeriodRules *ruleSet = DayPeriodRules::getInstance(this->getSmpFmtLocale(), status); + + if (!cal.isSet(UCAL_HOUR) && !cal.isSet(UCAL_HOUR_OF_DAY)) { + // If hour is not set, set time to the midpoint of current day period, overwriting + // minutes if it's set. + double midPoint = ruleSet->getMidPointForDayPeriod(dayPeriod, status); + + // If we can't get midPoint we do nothing. + if (U_SUCCESS(status)) { + // Truncate midPoint toward zero to get the hour. + // Any leftover means it was a half-hour. + int32_t midPointHour = (int32_t) midPoint; + int32_t midPointMinute = (midPoint - midPointHour) > 0 ? 30 : 0; + + // No need to set am/pm because hour-of-day is set last therefore takes precedence. + cal.set(UCAL_HOUR_OF_DAY, midPointHour); + cal.set(UCAL_MINUTE, midPointMinute); + } + } else { + int hourOfDay; + + if (cal.isSet(UCAL_HOUR_OF_DAY)) { // Hour is parsed in 24-hour format. + hourOfDay = cal.get(UCAL_HOUR_OF_DAY, status); + } else { // Hour is parsed in 12-hour format. + hourOfDay = cal.get(UCAL_HOUR, status); + // cal.get() turns 12 to 0 for 12-hour time; change 0 to 12 + // so 0 unambiguously means a 24-hour time from above. + if (hourOfDay == 0) { hourOfDay = 12; } + } + U_ASSERT(0 <= hourOfDay && hourOfDay <= 23); + + + // If hour-of-day is 0 or 13 thru 23 then input time in unambiguously in 24-hour format. + if (hourOfDay == 0 || (13 <= hourOfDay && hourOfDay <= 23)) { + // Make hour-of-day take precedence over (hour + am/pm) by setting it again. + cal.set(UCAL_HOUR_OF_DAY, hourOfDay); + } else { + // We have a 12-hour time and need to choose between am and pm. + // Behave as if dayPeriod spanned 6 hours each way from its center point. + // This will parse correctly for consistent time + period (e.g. 10 at night) as + // well as provide a reasonable recovery for inconsistent time + period (e.g. + // 9 in the afternoon). + + // Assume current time is in the AM. + // - Change 12 back to 0 for easier handling of 12am. + // - Append minutes as fractional hours because e.g. 8:15 and 8:45 could be parsed + // into different half-days if center of dayPeriod is at 14:30. + // - cal.get(MINUTE) will return 0 if MINUTE is unset, which works. + if (hourOfDay == 12) { hourOfDay = 0; } + double currentHour = hourOfDay + (cal.get(UCAL_MINUTE, status)) / 60.0; + double midPointHour = ruleSet->getMidPointForDayPeriod(dayPeriod, status); + + if (U_SUCCESS(status)) { + double hoursAheadMidPoint = currentHour - midPointHour; + + // Assume current time is in the AM. + if (-6 <= hoursAheadMidPoint && hoursAheadMidPoint < 6) { + // Assumption holds; set time as such. + cal.set(UCAL_AM_PM, 0); + } else { + cal.set(UCAL_AM_PM, 1); + } + } + } + } + } + // At this point the fields of Calendar have been set. Calendar // will fill in default values for missing fields when the time // is computed. @@ -1546,7 +2561,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& // when the two-digit year is equal to the start year, and thus might fall at the // front or the back of the default century. This only works because we adjust // the year correctly to start with in other cases -- see subParse(). - if (ambiguousYear[0] || tztype != TZTYPE_UNK) // If this is true then the two-digit year == the default start year + if (ambiguousYear[0] || tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) // If this is true then the two-digit year == the default start year { // We need a copy of the fields, and we need to avoid triggering a call to // complete(), which will recalculate the fields. Since we can't access @@ -1557,8 +2572,8 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& copy = cal.clone(); // Check for failed cloning. if (copy == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - goto ExitParse; + status = U_MEMORY_ALLOCATION_ERROR; + goto ExitParse; } UDate parsedDate = copy->getTime(status); // {sfb} check internalGetDefaultCenturyStart @@ -1569,20 +2584,20 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& delete copy; } - if (tztype != TZTYPE_UNK) { + if (tzTimeType != UTZFMT_TIME_TYPE_UNKNOWN) { copy = cal.clone(); // Check for failed cloning. if (copy == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - goto ExitParse; + status = U_MEMORY_ALLOCATION_ERROR; + goto ExitParse; } const TimeZone & tz = cal.getTimeZone(); BasicTimeZone *btz = NULL; - if (tz.getDynamicClassID() == OlsonTimeZone::getStaticClassID() - || tz.getDynamicClassID() == SimpleTimeZone::getStaticClassID() - || tz.getDynamicClassID() == RuleBasedTimeZone::getStaticClassID() - || tz.getDynamicClassID() == VTimeZone::getStaticClassID()) { + if (dynamic_cast<const OlsonTimeZone *>(&tz) != NULL + || dynamic_cast<const SimpleTimeZone *>(&tz) != NULL + || dynamic_cast<const RuleBasedTimeZone *>(&tz) != NULL + || dynamic_cast<const VTimeZone *>(&tz) != NULL) { btz = (BasicTimeZone*)&tz; } @@ -1595,7 +2610,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& // matches the rule used by the parsed time zone. int32_t raw, dst; if (btz != NULL) { - if (tztype == TZTYPE_STD) { + if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) { btz->getOffsetFromLocal(localMillis, BasicTimeZone::kStandard, BasicTimeZone::kStandard, raw, dst, status); } else { @@ -1610,7 +2625,7 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& // Now, compare the results with parsed type, either standard or daylight saving time int32_t resolvedSavings = dst; - if (tztype == TZTYPE_STD) { + if (tzTimeType == UTZFMT_TIME_TYPE_STANDARD) { if (dst != 0) { // Override DST_OFFSET = 0 in the result calendar resolvedSavings = 0; @@ -1678,33 +2693,37 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& delete copy; } } -ExitParse: +ExitParse: + // Set the parsed result if local calendar is used + // instead of the input calendar + if (U_SUCCESS(status) && workCal != &cal) { + cal.setTimeZone(workCal->getTimeZone()); + cal.setTime(workCal->getTime(status), status); + } + + if (numericLeapMonthFormatter != NULL) { + delete numericLeapMonthFormatter; + } + if (calClone != NULL) { + delete calClone; + } + // If any Calendar calls failed, we pretend that we // couldn't parse the string, when in reality this isn't quite accurate-- // we did parse it; the Calendar calls just failed. - if (U_FAILURE(status)) { + if (U_FAILURE(status)) { parsePos.setErrorIndex(pos); - parsePos.setIndex(start); + parsePos.setIndex(start); } } -UDate -SimpleDateFormat::parse( const UnicodeString& text, - ParsePosition& pos) const { - // redefined here because the other parse() function hides this function's - // cunterpart on DateFormat - return DateFormat::parse(text, pos); -} - -UDate -SimpleDateFormat::parse(const UnicodeString& text, UErrorCode& status) const -{ - // redefined here because the other parse() function hides this function's - // counterpart on DateFormat - return DateFormat::parse(text, status); -} //---------------------------------------------------------------------- +static int32_t +matchStringWithOptionalDot(const UnicodeString &text, + int32_t index, + const UnicodeString &data); + int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text, int32_t start, UCalendarDateFields field, @@ -1720,86 +2739,70 @@ int32_t SimpleDateFormat::matchQuarterString(const UnicodeString& text, // We keep track of the longest match, and return that. Note that this // unfortunately requires us to test all array elements. int32_t bestMatchLength = 0, bestMatch = -1; + UnicodeString bestMatchName; - // {sfb} kludge to support case-insensitive comparison - // {markus 2002oct11} do not just use caseCompareBetween because we do not know - // the length of the match after case folding - // {alan 20040607} don't case change the whole string, since the length - // can change - // TODO we need a case-insensitive startsWith function - UnicodeString lcase, lcaseText; - text.extract(start, INT32_MAX, lcaseText); - lcaseText.foldCase(); - - for (; i < count; ++i) - { - // Always compare if we have no match yet; otherwise only compare - // against potentially better matches (longer strings). - - lcase.fastCopyFrom(data[i]).foldCase(); - int32_t length = lcase.length(); - - if (length > bestMatchLength && - lcaseText.compareBetween(0, length, lcase, 0, length) == 0) - { + for (; i < count; ++i) { + int32_t matchLength = 0; + if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) { + bestMatchLength = matchLength; bestMatch = i; - bestMatchLength = length; } } - if (bestMatch >= 0) - { + + if (bestMatch >= 0) { cal.set(field, bestMatch * 3); + return start + bestMatchLength; + } + + return -start; +} + +int32_t SimpleDateFormat::matchDayPeriodStrings(const UnicodeString& text, int32_t start, + const UnicodeString* data, int32_t dataCount, + int32_t &dayPeriod) const +{ + + int32_t bestMatchLength = 0, bestMatch = -1; + + for (int32_t i = 0; i < dataCount; ++i) { + int32_t matchLength = 0; + if ((matchLength = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) { + bestMatchLength = matchLength; + bestMatch = i; + } + } + + if (bestMatch >= 0) { + dayPeriod = bestMatch; + return start + bestMatchLength; + } - // Once we have a match, we have to determine the length of the - // original source string. This will usually be == the length of - // the case folded string, but it may differ (e.g. sharp s). - lcase.fastCopyFrom(data[bestMatch]).foldCase(); - - // Most of the time, the length will be the same as the length - // of the string from the locale data. Sometimes it will be - // different, in which case we will have to figure it out by - // adding a character at a time, until we have a match. We do - // this all in one loop, where we try 'len' first (at index - // i==0). - int32_t len = data[bestMatch].length(); // 99+% of the time - int32_t n = text.length() - start; - for (i=0; i<=n; ++i) { - int32_t j=i; - if (i == 0) { - j = len; - } else if (i == len) { - continue; // already tried this when i was 0 - } - text.extract(start, j, lcaseText); - lcaseText.foldCase(); - if (lcase == lcaseText) { - return start + j; - } - } - } - return -start; } //---------------------------------------------------------------------- +#define IS_BIDI_MARK(c) (c==0x200E || c==0x200F || c==0x061C) + UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern, int32_t &patternOffset, const UnicodeString &text, int32_t &textOffset, - UBool lenient) + UBool whitespaceLenient, + UBool partialMatchLenient, + UBool oldLeniency) { UBool inQuote = FALSE; UnicodeString literal; int32_t i = patternOffset; - + // scan pattern looking for contiguous literal characters for ( ; i < pattern.length(); i += 1) { UChar ch = pattern.charAt(i); - - if (!inQuote && ((ch >= 0x41 && ch <= 0x5A) || (ch >= 0x61 && ch <= 0x7A))) { // unquoted [A-Za-z] + + if (!inQuote && isSyntaxChar(ch)) { break; } - + if (ch == QUOTE) { // Match a quote literal ('') inside OR outside of quotes if ((i + 1) < pattern.length() && pattern.charAt(i + 1) == QUOTE) { @@ -1809,102 +2812,145 @@ UBool SimpleDateFormat::matchLiterals(const UnicodeString &pattern, continue; } } - - literal += ch; + + if (!IS_BIDI_MARK(ch)) { + literal += ch; + } } - - // at this point, literal contains the literal text + + // at this point, literal contains the pattern literal text (without bidi marks) // and i is the index of the next non-literal pattern character. int32_t p; int32_t t = textOffset; - - if (lenient) { - // trim leading, trailing whitespace from - // the literal text + + if (whitespaceLenient) { + // trim leading, trailing whitespace from the pattern literal literal.trim(); - - // ignore any leading whitespace in the text - while (t < text.length() && u_isWhitespace(text.charAt(t))) { + + // ignore any leading whitespace (or bidi marks) in the text + while (t < text.length()) { + UChar ch = text.charAt(t); + if (!u_isWhitespace(ch) && !IS_BIDI_MARK(ch)) { + break; + } t += 1; } } - - for (p = 0; p < literal.length() && t < text.length(); p += 1, t += 1) { + + // Get ignorables, move up here + const UnicodeSet *ignorables = NULL; + UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(pattern.charAt(i)); + if (patternCharIndex != UDAT_FIELD_COUNT) { + ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex); + } + + for (p = 0; p < literal.length() && t < text.length();) { UBool needWhitespace = FALSE; - - while (p < literal.length() && uprv_isRuleWhiteSpace(literal.charAt(p))) { + + // Skip any whitespace at current position in pattern, + // but remember whether we found whitespace in the pattern + // (we already deleted any bidi marks in the pattern). + while (p < literal.length() && PatternProps::isWhiteSpace(literal.charAt(p))) { needWhitespace = TRUE; p += 1; } - + + // If the pattern has whitespace at this point, skip it in text as well + // (if the text does not have any, that may be an error for strict parsing) if (needWhitespace) { - int32_t tStart = t; + UBool whitespaceInText = FALSE; + // Skip any whitespace (or bidi marks) at current position in text, + // but remember whether we found whitespace in the text at this point. while (t < text.length()) { UChar tch = text.charAt(t); - - if (!u_isUWhiteSpace(tch) && !uprv_isRuleWhiteSpace(tch)) { + if (u_isUWhiteSpace(tch) || PatternProps::isWhiteSpace(tch)) { + whitespaceInText = TRUE; + } else if (!IS_BIDI_MARK(tch)) { break; } - + t += 1; } - + // TODO: should we require internal spaces // in lenient mode? (There won't be any // leading or trailing spaces) - if (!lenient && t == tStart) { + if (!whitespaceLenient && !whitespaceInText) { // didn't find matching whitespace: // an error in strict mode return FALSE; } - + // In strict mode, this run of whitespace // may have been at the end. if (p >= literal.length()) { break; } + } else { + // Still need to skip any bidi marks in the text + while (t < text.length() && IS_BIDI_MARK(text.charAt(t))) { + ++t; + } } - if (t >= text.length() || literal.charAt(p) != text.charAt(t)) { // Ran out of text, or found a non-matching character: // OK in lenient mode, an error in strict mode. - if (lenient) { + if (whitespaceLenient) { + if (t == textOffset && text.charAt(t) == 0x2e && + isAfterNonNumericField(pattern, patternOffset)) { + // Lenient mode and the literal input text begins with a "." and + // we are after a non-numeric field: We skip the "." + ++t; + continue; // Do not update p. + } + // if it is actual whitespace and we're whitespace lenient it's OK + + UChar wsc = text.charAt(t); + if(PatternProps::isWhiteSpace(wsc)) { + // Lenient mode and it's just whitespace we skip it + ++t; + continue; // Do not update p. + } + } + // hack around oldleniency being a bit of a catch-all bucket and we're just adding support specifically for paritial matches + // This fix is for http://bugs.icu-project.org/trac/ticket/10855 and adds "&& oldLeniency" + //if(partialMatchLenient && oldLeniency) { + // However this causes problems for Apple, see <rdar://problem/20692829> regressions in Chinese date parsing + // We don't want to go back to just "if(partialMatchLenient)" as in ICU 53, that is too lenient for strict mode. + // So if the pattern character is in the separator set, we allow the text character to be in that set or be an alpha char. + if( partialMatchLenient && ( oldLeniency || + ( ignorables != NULL && ignorables->contains(literal.charAt(p)) && (ignorables->contains(text.charAt(t)) || u_isalpha(text.charAt(t))) ) ) + ) { break; } - + return FALSE; } + ++p; + ++t; } - + // At this point if we're in strict mode we have a complete match. // If we're in lenient mode we may have a partial match, or no // match at all. if (p <= 0) { // no match. Pretend it matched a run of whitespace // and ignorables in the text. - const UnicodeSet *ignorables = NULL; - UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), pattern.charAt(i)); - - if (patternCharPtr != NULL) { - UDateFormatField patternCharIndex = (UDateFormatField) (patternCharPtr - DateFormatSymbols::getPatternUChars()); - - ignorables = SimpleDateFormatStaticSets::getIgnorables(patternCharIndex); - } - + for (t = textOffset; t < text.length(); t += 1) { UChar ch = text.charAt(t); - - if (ignorables == NULL || !ignorables->contains(ch)) { + + if (!IS_BIDI_MARK(ch) && (ignorables == NULL || !ignorables->contains(ch))) { break; } } } - + // if we get here, we've got a complete match. patternOffset = i - 1; textOffset = t; - + return TRUE; } @@ -1915,6 +2961,7 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, UCalendarDateFields field, const UnicodeString* data, int32_t dataCount, + const UnicodeString* monthPattern, Calendar& cal) const { int32_t i = 0; @@ -1927,67 +2974,76 @@ int32_t SimpleDateFormat::matchString(const UnicodeString& text, // We keep track of the longest match, and return that. Note that this // unfortunately requires us to test all array elements. int32_t bestMatchLength = 0, bestMatch = -1; + UnicodeString bestMatchName; + int32_t isLeapMonth = 0; - // {sfb} kludge to support case-insensitive comparison - // {markus 2002oct11} do not just use caseCompareBetween because we do not know - // the length of the match after case folding - // {alan 20040607} don't case change the whole string, since the length - // can change - // TODO we need a case-insensitive startsWith function - UnicodeString lcase, lcaseText; - text.extract(start, INT32_MAX, lcaseText); - lcaseText.foldCase(); - - for (; i < count; ++i) - { - // Always compare if we have no match yet; otherwise only compare - // against potentially better matches (longer strings). - - lcase.fastCopyFrom(data[i]).foldCase(); - int32_t length = lcase.length(); - - if (length > bestMatchLength && - lcaseText.compareBetween(0, length, lcase, 0, length) == 0) - { + for (; i < count; ++i) { + int32_t matchLen = 0; + if ((matchLen = matchStringWithOptionalDot(text, start, data[i])) > bestMatchLength) { bestMatch = i; - bestMatchLength = length; + bestMatchLength = matchLen; + } + + if (monthPattern != NULL) { + UErrorCode status = U_ZERO_ERROR; + UnicodeString leapMonthName; + SimpleFormatter(*monthPattern, 1, 1, status).format(data[i], leapMonthName, status); + if (U_SUCCESS(status)) { + if ((matchLen = matchStringWithOptionalDot(text, start, leapMonthName)) > bestMatchLength) { + bestMatch = i; + bestMatchLength = matchLen; + isLeapMonth = 1; + } + } } } - if (bestMatch >= 0) - { - cal.set(field, bestMatch); - - // Once we have a match, we have to determine the length of the - // original source string. This will usually be == the length of - // the case folded string, but it may differ (e.g. sharp s). - lcase.fastCopyFrom(data[bestMatch]).foldCase(); - - // Most of the time, the length will be the same as the length - // of the string from the locale data. Sometimes it will be - // different, in which case we will have to figure it out by - // adding a character at a time, until we have a match. We do - // this all in one loop, where we try 'len' first (at index - // i==0). - int32_t len = data[bestMatch].length(); // 99+% of the time - int32_t n = text.length() - start; - for (i=0; i<=n; ++i) { - int32_t j=i; - if (i == 0) { - j = len; - } else if (i == len) { - continue; // already tried this when i was 0 - } - text.extract(start, j, lcaseText); - lcaseText.foldCase(); - if (lcase == lcaseText) { - return start + j; - } - } - } - + + if (bestMatch >= 0) { + if (field < UCAL_FIELD_COUNT) { + // Adjustment for Hebrew Calendar month Adar II + if (!strcmp(cal.getType(),"hebrew") && field==UCAL_MONTH && bestMatch==13) { + cal.set(field,6); + } else { + if (field == UCAL_YEAR) { + bestMatch++; // only get here for cyclic year names, which match 1-based years 1-60 + } + cal.set(field, bestMatch); + } + if (monthPattern != NULL) { + cal.set(UCAL_IS_LEAP_MONTH, isLeapMonth); + } + } + + return start + bestMatchLength; + } + return -start; } +static int32_t +matchStringWithOptionalDot(const UnicodeString &text, + int32_t index, + const UnicodeString &data) { + UErrorCode sts = U_ZERO_ERROR; + int32_t matchLenText = 0; + int32_t matchLenData = 0; + + u_caseInsensitivePrefixMatch(text.getBuffer() + index, text.length() - index, + data.getBuffer(), data.length(), + 0 /* default case option */, + &matchLenText, &matchLenData, + &sts); + U_ASSERT (U_SUCCESS(sts)); + + if (matchLenData == data.length() /* normal match */ + || (data.charAt(data.length() - 1) == 0x2e + && matchLenData == data.length() - 1 /* match without trailing dot */)) { + return matchLenText; + } + + return 0; +} + //---------------------------------------------------------------------- void @@ -1999,37 +3055,43 @@ SimpleDateFormat::set2DigitYearStart(UDate d, UErrorCode& status) /** * Private member function that converts the parsed date strings into * timeFields. Returns -start (for ParsePosition) if failed. - * @param text the time text to be parsed. - * @param start where to start parsing. - * @param ch the pattern character for the date field text to be parsed. - * @param count the count of a pattern character. - * @return the new start position if matching succeeded; a negative number - * indicating matching failure, otherwise. */ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UChar ch, int32_t count, - UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], Calendar& cal) const + UBool obeyCount, UBool allowNegative, UBool ambiguousYear[], int32_t& saveHebrewMonth, Calendar& cal, + int32_t patLoc, MessageFormat * numericLeapMonthFormatter, UTimeZoneFormatTimeType *tzTimeType, + int32_t *dayPeriod) const { Formattable number; int32_t value = 0; int32_t i; + int32_t ps = 0; + UErrorCode status = U_ZERO_ERROR; ParsePosition pos(0); - int32_t patternCharIndex; + UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(ch); + const NumberFormat *currentNumberFormat; UnicodeString temp; - UChar *patternCharPtr = u_strchr(DateFormatSymbols::getPatternUChars(), ch); - UBool lenient = isLenient(); + int32_t tzParseOptions = (isLenient())? UTZFMT_PARSE_OPTION_ALL_STYLES: UTZFMT_PARSE_OPTION_NONE; UBool gotNumber = FALSE; #if defined (U_DEBUG_CAL) //fprintf(stderr, "%s:%d - [%c] st=%d \n", __FILE__, __LINE__, (char) ch, start); #endif - if (patternCharPtr == NULL) { + if (patternCharIndex == UDAT_FIELD_COUNT) { return -start; } - patternCharIndex = (UDateFormatField)(patternCharPtr - DateFormatSymbols::getPatternUChars()); + currentNumberFormat = getNumberFormatByIndex(patternCharIndex); + if (currentNumberFormat == NULL) { + return -start; + } + UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; // UCAL_FIELD_COUNT if irrelevant + UnicodeString hebr("hebr", 4, US_INV); - UCalendarDateFields field = fgPatternIndexToCalendarField[patternCharIndex]; + if (numericLeapMonthFormatter != NULL) { + numericLeapMonthFormatter->setFormats((const Format **)¤tNumberFormat, 1); + } + UBool isChineseCalendar = (uprv_strcmp(cal.getType(),"chinese") == 0 || uprv_strcmp(cal.getType(),"dangi") == 0); // If there are any spaces here, skip over them. If we hit the end // of the string, then fail. @@ -2037,68 +3099,103 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC if (start >= text.length()) { return -start; } - UChar32 c = text.char32At(start); - - if (!u_isUWhiteSpace(c)) { + if (!u_isUWhiteSpace(c) /*||*/ && !PatternProps::isWhiteSpace(c)) { break; } - - start += UTF_CHAR_LENGTH(c); + start += U16_LENGTH(c); } - pos.setIndex(start); // We handle a few special cases here where we need to parse // a number value. We handle further, more generic cases below. We need // to handle some of them here because some fields require extra processing on // the parsed value. - if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || - patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || - patternCharIndex == UDAT_HOUR1_FIELD || - patternCharIndex == UDAT_HOUR0_FIELD || - patternCharIndex == UDAT_DOW_LOCAL_FIELD || - patternCharIndex == UDAT_STANDALONE_DAY_FIELD || - patternCharIndex == UDAT_MONTH_FIELD || - patternCharIndex == UDAT_STANDALONE_MONTH_FIELD || - patternCharIndex == UDAT_QUARTER_FIELD || - patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD || - patternCharIndex == UDAT_YEAR_FIELD || - patternCharIndex == UDAT_YEAR_WOY_FIELD || - patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) + if (patternCharIndex == UDAT_HOUR_OF_DAY1_FIELD || // k + patternCharIndex == UDAT_HOUR_OF_DAY0_FIELD || // H + patternCharIndex == UDAT_HOUR1_FIELD || // h + patternCharIndex == UDAT_HOUR0_FIELD || // K + (patternCharIndex == UDAT_DOW_LOCAL_FIELD && count <= 2) || // e + (patternCharIndex == UDAT_STANDALONE_DAY_FIELD && count <= 2) || // c + (patternCharIndex == UDAT_MONTH_FIELD && count <= 2) || // M + (patternCharIndex == UDAT_STANDALONE_MONTH_FIELD && count <= 2) || // L + (patternCharIndex == UDAT_QUARTER_FIELD && count <= 2) || // Q + (patternCharIndex == UDAT_STANDALONE_QUARTER_FIELD && count <= 2) || // q + patternCharIndex == UDAT_YEAR_FIELD || // y + patternCharIndex == UDAT_YEAR_WOY_FIELD || // Y + patternCharIndex == UDAT_YEAR_NAME_FIELD || // U (falls back to numeric) + (patternCharIndex == UDAT_ERA_FIELD && isChineseCalendar) || // G + patternCharIndex == UDAT_FRACTIONAL_SECOND_FIELD) // S { int32_t parseStart = pos.getIndex(); // It would be good to unify this with the obeyCount logic below, // but that's going to be difficult. const UnicodeString* src; - if (obeyCount) { - if ((start+count) > text.length()) { - return -start; + UBool parsedNumericLeapMonth = FALSE; + if (numericLeapMonthFormatter != NULL && (patternCharIndex == UDAT_MONTH_FIELD || patternCharIndex == UDAT_STANDALONE_MONTH_FIELD)) { + int32_t argCount; + Formattable * args = numericLeapMonthFormatter->parse(text, pos, argCount); + if (args != NULL && argCount == 1 && pos.getIndex() > parseStart && args[0].isNumeric()) { + parsedNumericLeapMonth = TRUE; + number.setLong(args[0].getLong()); + cal.set(UCAL_IS_LEAP_MONTH, 1); + delete[] args; + } else { + pos.setIndex(parseStart); + cal.set(UCAL_IS_LEAP_MONTH, 0); } + } - text.extractBetween(0, start + count, temp); - src = &temp; - } else { - src = &text; + if (!parsedNumericLeapMonth) { + if (obeyCount) { + if ((start+count) > text.length()) { + return -start; + } + + text.extractBetween(0, start + count, temp); + src = &temp; + } else { + src = &text; + } + + parseInt(*src, number, pos, allowNegative,currentNumberFormat); } - parseInt(*src, number, pos, allowNegative); + int32_t txtLoc = pos.getIndex(); - if (pos.getIndex() > parseStart) { + if (txtLoc > parseStart) { value = number.getLong(); gotNumber = TRUE; - + + // suffix processing + if (value < 0 ) { + txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, TRUE); + if (txtLoc != pos.getIndex()) { + value *= -1; + } + } + else { + txtLoc = checkIntSuffix(text, txtLoc, patLoc+1, FALSE); + } + // Check the range of the value - int32_t bias = gFieldRangeBias[patternCharIndex]; - - if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { - return -start; + if (!getBooleanAttribute(UDAT_PARSE_ALLOW_WHITESPACE, status)) { + int32_t bias = gFieldRangeBias[patternCharIndex]; + if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { + return -start; + } + } else { + int32_t bias = gFieldRangeBiasLenient[patternCharIndex]; + if (bias >= 0 && (value > cal.getMaximum(field) + bias)) { + return -start; + } } + + pos.setIndex(txtLoc); } - } - + // Make sure that we got a number if // we want one, and didn't get one // if we don't want one. @@ -2111,9 +3208,9 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC if (value < 0 || value > 24) { return -start; } - + // fall through to gotNumber check - + U_FALLTHROUGH; case UDAT_YEAR_FIELD: case UDAT_YEAR_WOY_FIELD: case UDAT_FRACTIONAL_SECOND_FIELD: @@ -2121,38 +3218,38 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC if (! gotNumber) { return -start; } - - break; - - case UDAT_DOW_LOCAL_FIELD: - case UDAT_STANDALONE_DAY_FIELD: - case UDAT_MONTH_FIELD: - case UDAT_STANDALONE_MONTH_FIELD: - case UDAT_QUARTER_FIELD: - case UDAT_STANDALONE_QUARTER_FIELD: - // in strict mode, these can only - // be a number if count <= 2 - if (!lenient && gotNumber && count > 2) { - return -1; - } - + break; - + default: // we check the rest of the fields below. break; } - + switch (patternCharIndex) { case UDAT_ERA_FIELD: - if (count == 5) { - return matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, cal); + if (isChineseCalendar) { + if (!gotNumber) { + return -start; + } + cal.set(UCAL_ERA, value); + return pos.getIndex(); } - if (count == 4) { - return matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, cal); + if (count == 5) { + ps = matchString(text, start, UCAL_ERA, fSymbols->fNarrowEras, fSymbols->fNarrowErasCount, NULL, cal); + } else if (count == 4) { + ps = matchString(text, start, UCAL_ERA, fSymbols->fEraNames, fSymbols->fEraNamesCount, NULL, cal); + } else { + ps = matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, NULL, cal); } - return matchString(text, start, UCAL_ERA, fSymbols->fEras, fSymbols->fErasCount, cal); + // check return position, if it equals -start, then matchString error + // special case the return code so we don't necessarily fail out until we + // verify no year information also + if (ps == -start) + ps--; + + return ps; case UDAT_YEAR_FIELD: // If there are 3 or more YEAR pattern characters, this indicates @@ -2161,33 +3258,51 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // we made adjustments to place the 2-digit year in the proper // century, for parsed strings from "00" to "99". Any other string // is treated literally: "2250", "-1", "1", "002". - if ((pos.getIndex() - start) == 2 - && u_isdigit(text.charAt(start)) - && u_isdigit(text.charAt(start+1))) + if (fDateOverride.compare(hebr)==0 && value < 1000) { + value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR; + } else if (text.moveIndex32(start, 2) == pos.getIndex() && !isChineseCalendar + && u_isdigit(text.char32At(start)) + && u_isdigit(text.char32At(text.moveIndex32(start, 1)))) { - // Assume for example that the defaultCenturyStart is 6/18/1903. - // This means that two-digit years will be forced into the range - // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02 - // correspond to 2000, 2001, and 2002. Years 04, 05, etc. correspond - // to 1904, 1905, etc. If the year is 03, then it is 2003 if the - // other fields specify a date before 6/18, or 1903 if they specify a - // date afterwards. As a result, 03 is an ambiguous year. All other - // two-digit years are unambiguous. - if(fHaveDefaultCentury) { // check if this formatter even has a pivot year - int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100; - ambiguousYear[0] = (value == ambiguousTwoDigitYear); - value += (fDefaultCenturyStartYear/100)*100 + - (value < ambiguousTwoDigitYear ? 100 : 0); + // only adjust year for patterns less than 3. + if(count < 3) { + // Assume for example that the defaultCenturyStart is 6/18/1903. + // This means that two-digit years will be forced into the range + // 6/18/1903 to 6/17/2003. As a result, years 00, 01, and 02 + // correspond to 2000, 2001, and 2002. Years 04, 05, etc. correspond + // to 1904, 1905, etc. If the year is 03, then it is 2003 if the + // other fields specify a date before 6/18, or 1903 if they specify a + // date afterwards. As a result, 03 is an ambiguous year. All other + // two-digit years are unambiguous. + if(fHaveDefaultCentury) { // check if this formatter even has a pivot year + int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100; + ambiguousYear[0] = (value == ambiguousTwoDigitYear); + value += (fDefaultCenturyStartYear/100)*100 + + (value < ambiguousTwoDigitYear ? 100 : 0); + } } } cal.set(UCAL_YEAR, value); + + // Delayed checking for adjustment of Hebrew month numbers in non-leap years. + if (saveHebrewMonth >= 0) { + HebrewCalendar *hc = (HebrewCalendar*)&cal; + if (!hc->isLeapYear(value) && saveHebrewMonth >= 6) { + cal.set(UCAL_MONTH,saveHebrewMonth); + } else { + cal.set(UCAL_MONTH,saveHebrewMonth-1); + } + saveHebrewMonth = -1; + } return pos.getIndex(); case UDAT_YEAR_WOY_FIELD: // Comment is the same as for UDAT_Year_FIELDs - look above - if ((pos.getIndex() - start) == 2 - && u_isdigit(text.charAt(start)) - && u_isdigit(text.charAt(start+1)) + if (fDateOverride.compare(hebr)==0 && value < 1000) { + value += HEBREW_CAL_CUR_MILLENIUM_START_YEAR; + } else if (text.moveIndex32(start, 2) == pos.getIndex() + && u_isdigit(text.char32At(start)) + && u_isdigit(text.char32At(text.moveIndex32(start, 1))) && fHaveDefaultCentury ) { int32_t ambiguousTwoDigitYear = fDefaultCenturyStartYear % 100; @@ -2198,64 +3313,102 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC cal.set(UCAL_YEAR_WOY, value); return pos.getIndex(); - case UDAT_MONTH_FIELD: - if (gotNumber) // i.e., M or MM. - { - // Don't want to parse the month if it is a string - // while pattern uses numeric style: M or MM. - // [We computed 'value' above.] - cal.set(UCAL_MONTH, value - 1); - return pos.getIndex(); - } else { - // count >= 3 // i.e., MMM or MMMM - // Want to be able to parse both short and long forms. - // Try count == 4 first: - int32_t newStart = 0; - - if ((newStart = matchString(text, start, UCAL_MONTH, - fSymbols->fMonths, fSymbols->fMonthsCount, cal)) > 0) + case UDAT_YEAR_NAME_FIELD: + if (fSymbols->fShortYearNames != NULL) { + int32_t newStart = matchString(text, start, UCAL_YEAR, fSymbols->fShortYearNames, fSymbols->fShortYearNamesCount, NULL, cal); + if (newStart > 0) { return newStart; - else // count == 4 failed, now try count == 3 - return matchString(text, start, UCAL_MONTH, - fSymbols->fShortMonths, fSymbols->fShortMonthsCount, cal); + } } + if (gotNumber && (getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC,status) || value > fSymbols->fShortYearNamesCount)) { + cal.set(UCAL_YEAR, value); + return pos.getIndex(); + } + return -start; + case UDAT_MONTH_FIELD: case UDAT_STANDALONE_MONTH_FIELD: - if (gotNumber) // i.e., L or LL. + if (gotNumber) // i.e., M or MM. { - // Don't want to parse the month if it is a string - // while pattern uses numeric style: M or MM. - // [We computed 'value' above.] - cal.set(UCAL_MONTH, value - 1); + // When parsing month numbers from the Hebrew Calendar, we might need to adjust the month depending on whether + // or not it was a leap year. We may or may not yet know what year it is, so might have to delay checking until + // the year is parsed. + if (!strcmp(cal.getType(),"hebrew")) { + HebrewCalendar *hc = (HebrewCalendar*)&cal; + if (cal.isSet(UCAL_YEAR)) { + UErrorCode monthStatus = U_ZERO_ERROR; + if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && value >= 6) { + cal.set(UCAL_MONTH, value); + } else { + cal.set(UCAL_MONTH, value - 1); + } + } else { + saveHebrewMonth = value; + } + } else { + // Don't want to parse the month if it is a string + // while pattern uses numeric style: M/MM, L/LL + // [We computed 'value' above.] + cal.set(UCAL_MONTH, value - 1); + } return pos.getIndex(); } else { - // count >= 3 // i.e., LLL or LLLL + // count >= 3 // i.e., MMM/MMMM, LLL/LLLL // Want to be able to parse both short and long forms. // Try count == 4 first: + UnicodeString * wideMonthPat = NULL; + UnicodeString * shortMonthPat = NULL; + if (fSymbols->fLeapMonthPatterns != NULL && fSymbols->fLeapMonthPatternsCount >= DateFormatSymbols::kMonthPatternsCount) { + if (patternCharIndex==UDAT_MONTH_FIELD) { + wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatWide]; + shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternFormatAbbrev]; + } else { + wideMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneWide]; + shortMonthPat = &fSymbols->fLeapMonthPatterns[DateFormatSymbols::kLeapMonthPatternStandaloneAbbrev]; + } + } int32_t newStart = 0; - - if ((newStart = matchString(text, start, UCAL_MONTH, - fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, cal)) > 0) + if (patternCharIndex==UDAT_MONTH_FIELD) { + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { + newStart = matchString(text, start, UCAL_MONTH, fSymbols->fMonths, fSymbols->fMonthsCount, wideMonthPat, cal); // try MMMM + if (newStart > 0) { + return newStart; + } + } + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { + newStart = matchString(text, start, UCAL_MONTH, fSymbols->fShortMonths, fSymbols->fShortMonthsCount, shortMonthPat, cal); // try MMM + } + } else { + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { + newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneMonths, fSymbols->fStandaloneMonthsCount, wideMonthPat, cal); // try LLLL + if (newStart > 0) { + return newStart; + } + } + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { + newStart = matchString(text, start, UCAL_MONTH, fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, shortMonthPat, cal); // try LLL + } + } + if (newStart > 0 || !getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) // currently we do not try to parse MMMMM/LLLLL: #8860 return newStart; - else // count == 4 failed, now try count == 3 - return matchString(text, start, UCAL_MONTH, - fSymbols->fStandaloneShortMonths, fSymbols->fStandaloneShortMonthsCount, cal); + // else we allowing parsing as number, below } + break; case UDAT_HOUR_OF_DAY1_FIELD: // [We computed 'value' above.] - if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1) + if (value == cal.getMaximum(UCAL_HOUR_OF_DAY) + 1) value = 0; - + // fall through to set field - + U_FALLTHROUGH; case UDAT_HOUR_OF_DAY0_FIELD: cal.set(UCAL_HOUR_OF_DAY, value); return pos.getIndex(); case UDAT_FRACTIONAL_SECOND_FIELD: // Fractional seconds left-justify - i = pos.getIndex() - start; + i = countDigits(text, start, pos.getIndex()); if (i < 3) { while (i < 3) { value *= 10; @@ -2267,7 +3420,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC a *= 10; i--; } - value = (value + (a>>1)) / a; + value /= a; } cal.set(UCAL_MILLISECOND, value); return pos.getIndex(); @@ -2281,23 +3434,40 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC } // else for eee-eeeee fall through to handling of EEE-EEEEE // fall through, do not break here + U_FALLTHROUGH; case UDAT_DAY_OF_WEEK_FIELD: { // Want to be able to parse both short and long forms. - // Try count == 4 (EEEE) first: + // Try count == 4 (EEEE) wide first: int32_t newStart = 0; - if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fWeekdays, fSymbols->fWeekdaysCount, cal)) > 0) - return newStart; - // EEEE failed, now try EEE - else if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, cal)) > 0) + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { + if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, + fSymbols->fWeekdays, fSymbols->fWeekdaysCount, NULL, cal)) > 0) + return newStart; + } + // EEEE wide failed, now try EEE abbreviated + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { + if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, + fSymbols->fShortWeekdays, fSymbols->fShortWeekdaysCount, NULL, cal)) > 0) + return newStart; + } + // EEE abbreviated failed, now try EEEEEE short + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) { + if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, + fSymbols->fShorterWeekdays, fSymbols->fShorterWeekdaysCount, NULL, cal)) > 0) + return newStart; + } + // EEEEEE short failed, now try EEEEE narrow + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { + if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, + fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, NULL, cal)) > 0) + return newStart; + } + if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status) || patternCharIndex == UDAT_DAY_OF_WEEK_FIELD) return newStart; - // EEE failed, now try EEEEE - else - return matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fNarrowWeekdays, fSymbols->fNarrowWeekdaysCount, cal); + // else we allowing parsing as number, below } + break; case UDAT_STANDALONE_DAY_FIELD: { @@ -2310,25 +3480,55 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // Want to be able to parse both short and long forms. // Try count == 4 (cccc) first: int32_t newStart = 0; - if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, cal)) > 0) + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { + if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, + fSymbols->fStandaloneWeekdays, fSymbols->fStandaloneWeekdaysCount, NULL, cal)) > 0) + return newStart; + } + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { + if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, + fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, NULL, cal)) > 0) + return newStart; + } + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 6) { + if ((newStart = matchString(text, start, UCAL_DAY_OF_WEEK, + fSymbols->fStandaloneShorterWeekdays, fSymbols->fStandaloneShorterWeekdaysCount, NULL, cal)) > 0) + return newStart; + } + if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) return newStart; - else // cccc failed, now try ccc - return matchString(text, start, UCAL_DAY_OF_WEEK, - fSymbols->fStandaloneShortWeekdays, fSymbols->fStandaloneShortWeekdaysCount, cal); + // else we allowing parsing as number, below } + break; case UDAT_AM_PM_FIELD: - return matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, cal); - - case UDAT_HOUR1_FIELD: - // [We computed 'value' above.] - if (value == cal.getLeastMaximum(UCAL_HOUR)+1) - value = 0; - - // fall through to set field - - case UDAT_HOUR0_FIELD: + { + // optionally try both wide/abbrev and narrow forms + int32_t newStart = 0; + // try wide/abbrev + if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count < 5 ) { + if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fAmPms, fSymbols->fAmPmsCount, NULL, cal)) > 0) { + return newStart; + } + } + // try narrow + if( getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count >= 5 ) { + if ((newStart = matchString(text, start, UCAL_AM_PM, fSymbols->fNarrowAmPms, fSymbols->fNarrowAmPmsCount, NULL, cal)) > 0) { + return newStart; + } + } + // no matches for given options + return -start; + } + + case UDAT_HOUR1_FIELD: + // [We computed 'value' above.] + if (value == cal.getLeastMaximum(UCAL_HOUR)+1) + value = 0; + + // fall through to set field + U_FALLTHROUGH; + case UDAT_HOUR0_FIELD: cal.set(UCAL_HOUR, value); return pos.getIndex(); @@ -2346,13 +3546,23 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // Try count == 4 first: int32_t newStart = 0; - if ((newStart = matchQuarterString(text, start, UCAL_MONTH, + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { + if ((newStart = matchQuarterString(text, start, UCAL_MONTH, fSymbols->fQuarters, fSymbols->fQuartersCount, cal)) > 0) + return newStart; + } + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { + if ((newStart = matchQuarterString(text, start, UCAL_MONTH, + fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal)) > 0) + return newStart; + } + if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) return newStart; - else // count == 4 failed, now try count == 3 - return matchQuarterString(text, start, UCAL_MONTH, - fSymbols->fShortQuarters, fSymbols->fShortQuartersCount, cal); + // else we allowing parsing as number, below + if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) + return -start; } + break; case UDAT_STANDALONE_QUARTER_FIELD: if (gotNumber) // i.e., q or qq. @@ -2368,192 +3578,337 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC // Try count == 4 first: int32_t newStart = 0; - if ((newStart = matchQuarterString(text, start, UCAL_MONTH, + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { + if ((newStart = matchQuarterString(text, start, UCAL_MONTH, fSymbols->fStandaloneQuarters, fSymbols->fStandaloneQuartersCount, cal)) > 0) + return newStart; + } + if(getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { + if ((newStart = matchQuarterString(text, start, UCAL_MONTH, + fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal)) > 0) + return newStart; + } + if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) return newStart; - else // count == 4 failed, now try count == 3 - return matchQuarterString(text, start, UCAL_MONTH, - fSymbols->fStandaloneShortQuarters, fSymbols->fStandaloneShortQuartersCount, cal); + // else we allowing parsing as number, below + if(!getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) + return -start; } + break; - case UDAT_TIMEZONE_FIELD: - case UDAT_TIMEZONE_RFC_FIELD: - case UDAT_TIMEZONE_GENERIC_FIELD: - case UDAT_TIMEZONE_SPECIAL_FIELD: + case UDAT_TIMEZONE_FIELD: // 'z' { - int32_t offset = 0; - UBool parsed = FALSE; - - // Step 1 - // Check if this is a long GMT offset string (either localized or default) - offset = parseGMT(text, pos); - if (pos.getIndex() - start > 0) { - parsed = TRUE; - } - if (!parsed) { - // Step 2 - // Check if this is an RFC822 time zone offset. - // ICU supports the standard RFC822 format [+|-]HHmm - // and its extended form [+|-]HHmmSS. - do { - int32_t sign = 0; - UChar signChar = text.charAt(start); - if (signChar == (UChar)0x002B /* '+' */) { - sign = 1; - } else if (signChar == (UChar)0x002D /* '-' */) { - sign = -1; - } else { - // Not an RFC822 offset string - break; - } + UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_SPECIFIC_SHORT : UTZFMT_STYLE_SPECIFIC_LONG; + const TimeZoneFormat *tzfmt = tzFormat(status); + if (U_SUCCESS(status)) { + TimeZone *tz = tzfmt->parse(style, text, pos, tzParseOptions, tzTimeType); + if (tz != NULL) { + cal.adoptTimeZone(tz); + return pos.getIndex(); + } + } + return -start; + } + break; + case UDAT_TIMEZONE_RFC_FIELD: // 'Z' + { + UTimeZoneFormatStyle style = (count < 4) ? + UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL : ((count == 5) ? UTZFMT_STYLE_ISO_EXTENDED_FULL: UTZFMT_STYLE_LOCALIZED_GMT); + const TimeZoneFormat *tzfmt = tzFormat(status); + if (U_SUCCESS(status)) { + TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); + if (tz != NULL) { + cal.adoptTimeZone(tz); + return pos.getIndex(); + } + } + return -start; + } + case UDAT_TIMEZONE_GENERIC_FIELD: // 'v' + { + UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_GENERIC_SHORT : UTZFMT_STYLE_GENERIC_LONG; + const TimeZoneFormat *tzfmt = tzFormat(status); + if (U_SUCCESS(status)) { + TimeZone *tz = tzfmt->parse(style, text, pos, tzParseOptions, tzTimeType); + if (tz != NULL) { + cal.adoptTimeZone(tz); + return pos.getIndex(); + } + } + return -start; + } + case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V' + { + UTimeZoneFormatStyle style; + switch (count) { + case 1: + style = UTZFMT_STYLE_ZONE_ID_SHORT; + break; + case 2: + style = UTZFMT_STYLE_ZONE_ID; + break; + case 3: + style = UTZFMT_STYLE_EXEMPLAR_LOCATION; + break; + default: + style = UTZFMT_STYLE_GENERIC_LOCATION; + break; + } + const TimeZoneFormat *tzfmt = tzFormat(status); + if (U_SUCCESS(status)) { + TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); + if (tz != NULL) { + cal.adoptTimeZone(tz); + return pos.getIndex(); + } + } + return -start; + } + case UDAT_TIMEZONE_LOCALIZED_GMT_OFFSET_FIELD: // 'O' + { + UTimeZoneFormatStyle style = (count < 4) ? UTZFMT_STYLE_LOCALIZED_GMT_SHORT : UTZFMT_STYLE_LOCALIZED_GMT; + const TimeZoneFormat *tzfmt = tzFormat(status); + if (U_SUCCESS(status)) { + TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); + if (tz != NULL) { + cal.adoptTimeZone(tz); + return pos.getIndex(); + } + } + return -start; + } + case UDAT_TIMEZONE_ISO_FIELD: // 'X' + { + UTimeZoneFormatStyle style; + switch (count) { + case 1: + style = UTZFMT_STYLE_ISO_BASIC_SHORT; + break; + case 2: + style = UTZFMT_STYLE_ISO_BASIC_FIXED; + break; + case 3: + style = UTZFMT_STYLE_ISO_EXTENDED_FIXED; + break; + case 4: + style = UTZFMT_STYLE_ISO_BASIC_FULL; + break; + default: + style = UTZFMT_STYLE_ISO_EXTENDED_FULL; + break; + } + const TimeZoneFormat *tzfmt = tzFormat(status); + if (U_SUCCESS(status)) { + TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); + if (tz != NULL) { + cal.adoptTimeZone(tz); + return pos.getIndex(); + } + } + return -start; + } + case UDAT_TIMEZONE_ISO_LOCAL_FIELD: // 'x' + { + UTimeZoneFormatStyle style; + switch (count) { + case 1: + style = UTZFMT_STYLE_ISO_BASIC_LOCAL_SHORT; + break; + case 2: + style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FIXED; + break; + case 3: + style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FIXED; + break; + case 4: + style = UTZFMT_STYLE_ISO_BASIC_LOCAL_FULL; + break; + default: + style = UTZFMT_STYLE_ISO_EXTENDED_LOCAL_FULL; + break; + } + const TimeZoneFormat *tzfmt = tzFormat(status); + if (U_SUCCESS(status)) { + TimeZone *tz = tzfmt->parse(style, text, pos, tzTimeType); + if (tz != NULL) { + cal.adoptTimeZone(tz); + return pos.getIndex(); + } + } + return -start; + } + // currently no pattern character is defined for UDAT_TIME_SEPARATOR_FIELD + // so we should not get here. Leave support in for future definition. + case UDAT_TIME_SEPARATOR_FIELD: + { + static const UChar def_sep = DateFormatSymbols::DEFAULT_TIME_SEPARATOR; + static const UChar alt_sep = DateFormatSymbols::ALTERNATE_TIME_SEPARATOR; - // Parse digits - int32_t orgPos = start + 1; - pos.setIndex(orgPos); - parseInt(text, number, 6, pos, FALSE); - int32_t numLen = pos.getIndex() - orgPos; - if (numLen <= 0) { - break; - } + // Try matching a time separator. + int32_t count_sep = 1; + UnicodeString data[3]; + fSymbols->getTimeSeparatorString(data[0]); - // Followings are possible format (excluding sign char) - // HHmmSS - // HmmSS - // HHmm - // Hmm - // HH - // H - int32_t val = number.getLong(); - int32_t hour = 0, min = 0, sec = 0; - switch(numLen) { - case 1: // H - case 2: // HH - hour = val; - break; - case 3: // Hmm - case 4: // HHmm - hour = val / 100; - min = val % 100; - break; - case 5: // Hmmss - case 6: // HHmmss - hour = val / 10000; - min = (val % 10000) / 100; - sec = val % 100; - break; - } - if (hour > 23 || min > 59 || sec > 59) { - // Invalid value range - break; - } - offset = (((hour * 60) + min) * 60 + sec) * 1000 * sign; - parsed = TRUE; - } while (FALSE); + // Add the default, if different from the locale. + if (data[0].compare(&def_sep, 1) != 0) { + data[count_sep++].setTo(def_sep); + } - if (!parsed) { - // Failed to parse. Reset the position. - pos.setIndex(start); - } + // If lenient, add also the alternate, if different from the locale. + if (isLenient() && data[0].compare(&alt_sep, 1) != 0) { + data[count_sep++].setTo(alt_sep); } - if (parsed) { - // offset was successfully parsed as either a long GMT string or RFC822 zone offset - // string. Create normalized zone ID for the offset. - - UnicodeString tzID(gGmt); - formatRFC822TZ(tzID, offset); - //TimeZone *customTZ = TimeZone::createTimeZone(tzID); - TimeZone *customTZ = new SimpleTimeZone(offset, tzID); // faster than TimeZone::createTimeZone - cal.adoptTimeZone(customTZ); + return matchString(text, start, UCAL_FIELD_COUNT /* => nothing to set */, data, count_sep, NULL, cal); + } - return pos.getIndex(); - } + case UDAT_AM_PM_MIDNIGHT_NOON_FIELD: + { + U_ASSERT(dayPeriod != NULL); + int32_t ampmStart = subParse(text, start, 0x61, count, + obeyCount, allowNegative, ambiguousYear, saveHebrewMonth, cal, + patLoc, numericLeapMonthFormatter, tzTimeType); - // Step 3 - // At this point, check for named time zones by looking through - // the locale data from the DateFormatZoneData strings. - // Want to be able to parse both short and long forms. - // optimize for calendar's current time zone - const ZoneStringFormat *zsf = fSymbols->getZoneStringFormat(); - if (zsf) { - UErrorCode status = U_ZERO_ERROR; - const ZoneStringInfo *zsinfo = NULL; - int32_t matchLen; + if (ampmStart > 0) { + return ampmStart; + } else { + int32_t newStart = 0; - switch (patternCharIndex) { - case UDAT_TIMEZONE_FIELD: // 'z' - if (count < 4) { - zsinfo = zsf->findSpecificShort(text, start, matchLen, status); - } else { - zsinfo = zsf->findSpecificLong(text, start, matchLen, status); - } - break; - case UDAT_TIMEZONE_GENERIC_FIELD: // 'v' - if (count == 1) { - zsinfo = zsf->findGenericShort(text, start, matchLen, status); - } else if (count == 4) { - zsinfo = zsf->findGenericLong(text, start, matchLen, status); - } - break; - case UDAT_TIMEZONE_SPECIAL_FIELD: // 'V' - if (count == 1) { - zsinfo = zsf->findSpecificShort(text, start, matchLen, status); - } else if (count == 4) { - zsinfo = zsf->findGenericLocation(text, start, matchLen, status); - } - break; + // Only match the first two strings from the day period strings array. + if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { + if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods, + 2, *dayPeriod)) > 0) { + return newStart; } - - if (U_SUCCESS(status) && zsinfo != NULL) { - if (zsinfo->isStandard()) { - ((SimpleDateFormat*)this)->tztype = TZTYPE_STD; - } else if (zsinfo->isDaylight()) { - ((SimpleDateFormat*)this)->tztype = TZTYPE_DST; - } - UnicodeString tzid; - zsinfo->getID(tzid); - - UnicodeString current; - cal.getTimeZone().getID(current); - if (tzid != current) { - TimeZone *tz = TimeZone::createTimeZone(tzid); - cal.adoptTimeZone(tz); - } - return start + matchLen; + } + if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { + if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods, + 2, *dayPeriod)) > 0) { + return newStart; } } - // complete failure + // count == 4, but allow other counts + if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status)) { + if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods, + 2, *dayPeriod)) > 0) { + return newStart; + } + } + return -start; } + } + + case UDAT_FLEXIBLE_DAY_PERIOD_FIELD: + { + U_ASSERT(dayPeriod != NULL); + int32_t newStart = 0; + + if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 3) { + if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fAbbreviatedDayPeriods, + fSymbols->fAbbreviatedDayPeriodsCount, *dayPeriod)) > 0) { + return newStart; + } + } + if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 5) { + if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fNarrowDayPeriods, + fSymbols->fNarrowDayPeriodsCount, *dayPeriod)) > 0) { + return newStart; + } + } + if (getBooleanAttribute(UDAT_PARSE_MULTIPLE_PATTERNS_FOR_MATCH, status) || count == 4) { + if ((newStart = matchDayPeriodStrings(text, start, fSymbols->fWideDayPeriods, + fSymbols->fWideDayPeriodsCount, *dayPeriod)) > 0) { + return newStart; + } + } + + return -start; + } default: // Handle "generic" fields - int32_t parseStart = pos.getIndex(); - const UnicodeString* src; - if (obeyCount) { - if ((start+count) > text.length()) { + // this is now handled below, outside the switch block + break; + } + // Handle "generic" fields: + // switch default case now handled here (outside switch block) to allow + // parsing of some string fields as digits for lenient case + + int32_t parseStart = pos.getIndex(); + const UnicodeString* src; + if (obeyCount) { + if ((start+count) > text.length()) { + return -start; + } + text.extractBetween(0, start + count, temp); + src = &temp; + } else { + src = &text; + } + parseInt(*src, number, pos, allowNegative,currentNumberFormat); + if (pos.getIndex() != parseStart) { + int32_t val = number.getLong(); + + // Don't need suffix processing here (as in number processing at the beginning of the function); + // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes. + + // Check the range of the value + if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) { + int32_t bias = gFieldRangeBias[patternCharIndex]; + if (bias >= 0 && (val > cal.getMaximum(field) + bias || val < cal.getMinimum(field) + bias)) { return -start; } - text.extractBetween(0, start + count, temp); - src = &temp; } else { - src = &text; + int32_t bias = gFieldRangeBiasLenient[patternCharIndex]; + if (bias >= 0 && (value > cal.getMaximum(field) + bias)) { + return -start; + } } - parseInt(*src, number, pos, allowNegative); - if (pos.getIndex() != parseStart) { - int32_t value = number.getLong(); - - // Check the range of the value - int32_t bias = gFieldRangeBias[patternCharIndex]; - - if (bias < 0 || (value >= cal.getMinimum(field) + bias && value <= cal.getMaximum(field) + bias)) { - cal.set(field, value); - return pos.getIndex(); + + // For the following, need to repeat some of the "if (gotNumber)" code above: + // UDAT_[STANDALONE_]MONTH_FIELD, UDAT_DOW_LOCAL_FIELD, UDAT_STANDALONE_DAY_FIELD, + // UDAT_[STANDALONE_]QUARTER_FIELD + switch (patternCharIndex) { + case UDAT_MONTH_FIELD: + // See notes under UDAT_MONTH_FIELD case above + if (!strcmp(cal.getType(),"hebrew")) { + HebrewCalendar *hc = (HebrewCalendar*)&cal; + if (cal.isSet(UCAL_YEAR)) { + UErrorCode monthStatus = U_ZERO_ERROR; + if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && val >= 6) { + cal.set(UCAL_MONTH, val); + } else { + cal.set(UCAL_MONTH, val - 1); + } + } else { + saveHebrewMonth = val; + } + } else { + cal.set(UCAL_MONTH, val - 1); } + break; + case UDAT_STANDALONE_MONTH_FIELD: + cal.set(UCAL_MONTH, val - 1); + break; + case UDAT_DOW_LOCAL_FIELD: + case UDAT_STANDALONE_DAY_FIELD: + cal.set(UCAL_DOW_LOCAL, val); + break; + case UDAT_QUARTER_FIELD: + case UDAT_STANDALONE_QUARTER_FIELD: + cal.set(UCAL_MONTH, (val - 1) * 3); + break; + case UDAT_RELATED_YEAR_FIELD: + cal.setRelatedYear(val); + break; + default: + cal.set(field, val); + break; } - - return -start; + return pos.getIndex(); } + return -start; } /** @@ -2563,8 +3918,9 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC void SimpleDateFormat::parseInt(const UnicodeString& text, Formattable& number, ParsePosition& pos, - UBool allowNegative) const { - parseInt(text, number, -1, pos, allowNegative); + UBool allowNegative, + const NumberFormat *fmt) const { + parseInt(text, number, -1, pos, allowNegative,fmt); } /** @@ -2574,20 +3930,22 @@ void SimpleDateFormat::parseInt(const UnicodeString& text, Formattable& number, int32_t maxDigits, ParsePosition& pos, - UBool allowNegative) const { + UBool allowNegative, + const NumberFormat *fmt) const { UnicodeString oldPrefix; - DecimalFormat* df = NULL; - if (!allowNegative && - fNumberFormat->getDynamicClassID() == DecimalFormat::getStaticClassID()) { - df = (DecimalFormat*)fNumberFormat; - df->getNegativePrefix(oldPrefix); - df->setNegativePrefix(SUPPRESS_NEGATIVE_PREFIX); + auto* fmtAsDF = dynamic_cast<const DecimalFormat*>(fmt); + LocalPointer<DecimalFormat> df; + if (!allowNegative && fmtAsDF != nullptr) { + df.adoptInstead(dynamic_cast<DecimalFormat*>(fmtAsDF->clone())); + if (df.isNull()) { + // Memory allocation error + return; + } + df->setNegativePrefix(UnicodeString(TRUE, SUPPRESS_NEGATIVE_PREFIX, -1)); + fmt = df.getAlias(); } int32_t oldPos = pos.getIndex(); - fNumberFormat->parse(text, number, pos); - if (df != NULL) { - df->setNegativePrefix(oldPrefix); - } + fmt->parse(text, number, pos); if (maxDigits > 0) { // adjust the result to fit into @@ -2606,6 +3964,19 @@ void SimpleDateFormat::parseInt(const UnicodeString& text, } } +int32_t SimpleDateFormat::countDigits(const UnicodeString& text, int32_t start, int32_t end) const { + int32_t numDigits = 0; + int32_t idx = start; + while (idx < end) { + UChar32 cp = text.char32At(idx); + if (u_isdigit(cp)) { + numDigits++; + } + idx += U16_LENGTH(cp); + } + return numDigits; +} + //---------------------------------------------------------------------- void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern, @@ -2614,42 +3985,42 @@ void SimpleDateFormat::translatePattern(const UnicodeString& originalPattern, const UnicodeString& to, UErrorCode& status) { - // run through the pattern and convert any pattern symbols from the version - // in "from" to the corresponding character ion "to". This code takes - // quoted strings into account (it doesn't try to translate them), and it signals - // an error if a particular "pattern character" doesn't appear in "from". - // Depending on the values of "from" and "to" this can convert from generic - // to localized patterns or localized to generic. - if (U_FAILURE(status)) - return; - - translatedPattern.remove(); - UBool inQuote = FALSE; - for (int32_t i = 0; i < originalPattern.length(); ++i) { - UChar c = originalPattern[i]; - if (inQuote) { - if (c == QUOTE) - inQuote = FALSE; - } - else { - if (c == QUOTE) - inQuote = TRUE; - else if ((c >= 0x0061 /*'a'*/ && c <= 0x007A) /*'z'*/ - || (c >= 0x0041 /*'A'*/ && c <= 0x005A /*'Z'*/)) { - int32_t ci = from.indexOf(c); - if (ci == -1) { - status = U_INVALID_FORMAT_ERROR; - return; + // run through the pattern and convert any pattern symbols from the version + // in "from" to the corresponding character in "to". This code takes + // quoted strings into account (it doesn't try to translate them), and it signals + // an error if a particular "pattern character" doesn't appear in "from". + // Depending on the values of "from" and "to" this can convert from generic + // to localized patterns or localized to generic. + if (U_FAILURE(status)) { + return; + } + + translatedPattern.remove(); + UBool inQuote = FALSE; + for (int32_t i = 0; i < originalPattern.length(); ++i) { + UChar c = originalPattern[i]; + if (inQuote) { + if (c == QUOTE) { + inQuote = FALSE; + } + } else { + if (c == QUOTE) { + inQuote = TRUE; + } else if (isSyntaxChar(c)) { + int32_t ci = from.indexOf(c); + if (ci == -1) { + status = U_INVALID_FORMAT_ERROR; + return; + } + c = to[ci]; + } + } + translatedPattern += c; } - c = to[ci]; - } + if (inQuote) { + status = U_INVALID_FORMAT_ERROR; + return; } - translatedPattern += c; - } - if (inQuote) { - status = U_INVALID_FORMAT_ERROR; - return; - } } //---------------------------------------------------------------------- @@ -2667,7 +4038,9 @@ UnicodeString& SimpleDateFormat::toLocalizedPattern(UnicodeString& result, UErrorCode& status) const { - translatePattern(fPattern, result, DateFormatSymbols::getPatternUChars(), fSymbols->fLocalPatternChars, status); + translatePattern(fPattern, result, + UnicodeString(DateFormatSymbols::getPatternUChars()), + fSymbols->fLocalPatternChars, status); return result; } @@ -2677,6 +4050,43 @@ void SimpleDateFormat::applyPattern(const UnicodeString& pattern) { fPattern = pattern; + parsePattern(); + + // Hack to update use of Gannen year numbering for ja@calendar=japanese - + // use only if format is non-numeric (includes å¹´) and no other fDateOverride. + if (fCalendar != nullptr && uprv_strcmp(fCalendar->getType(),"japanese") == 0 && + uprv_strcmp(fLocale.getLanguage(),"ja") == 0) { + if (fDateOverride==UnicodeString(u"y=jpanyear") && !fHasHanYearChar) { + // Gannen numbering is set but new pattern should not use it, unset; + // use procedure from adoptNumberFormat to clear overrides + if (fSharedNumberFormatters) { + freeSharedNumberFormatters(fSharedNumberFormatters); + fSharedNumberFormatters = NULL; + } + fDateOverride.setToBogus(); // record status + } else if (fDateOverride.isBogus() && fHasHanYearChar) { + // No current override (=> no Gannen numbering) but new pattern needs it; + // use procedures from initNUmberFormatters / adoptNumberFormat + umtx_lock(LOCK()); + if (fSharedNumberFormatters == NULL) { + fSharedNumberFormatters = allocSharedNumberFormatters(); + } + umtx_unlock(LOCK()); + if (fSharedNumberFormatters != NULL) { + Locale ovrLoc(fLocale.getLanguage(),fLocale.getCountry(),fLocale.getVariant(),"numbers=jpanyear"); + UErrorCode status = U_ZERO_ERROR; + const SharedNumberFormat *snf = createSharedNumberFormat(ovrLoc, status); + if (U_SUCCESS(status)) { + // Now that we have an appropriate number formatter, fill in the + // appropriate slot in the number formatters table. + UDateFormatField patternCharIndex = DateFormatSymbols::getPatternCharIndex(u'y'); + SharedObject::copyPtr(snf, fSharedNumberFormatters[patternCharIndex]); + snf->deleteIfZeroRefCount(); + fDateOverride.setTo(u"y=jpanyear", -1); // record status + } + } + } + } } //---------------------------------------------------------------------- @@ -2685,7 +4095,9 @@ void SimpleDateFormat::applyLocalizedPattern(const UnicodeString& pattern, UErrorCode &status) { - translatePattern(pattern, fPattern, fSymbols->fLocalPatternChars, DateFormatSymbols::getPatternUChars(), status); + translatePattern(pattern, fPattern, + fSymbols->fLocalPatternChars, + UnicodeString(DateFormatSymbols::getPatternUChars()), status); } //---------------------------------------------------------------------- @@ -2713,6 +4125,33 @@ SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols fSymbols = new DateFormatSymbols(newFormatSymbols); } +//---------------------------------------------------------------------- +const TimeZoneFormat* +SimpleDateFormat::getTimeZoneFormat(void) const { + // TimeZoneFormat initialization might fail when out of memory. + // If we always initialize TimeZoneFormat instance, we can return + // such status there. For now, this implementation lazily instantiates + // a TimeZoneFormat for performance optimization reasons, but cannot + // propagate such error (probably just out of memory case) to the caller. + UErrorCode status = U_ZERO_ERROR; + return (const TimeZoneFormat*)tzFormat(status); +} + +//---------------------------------------------------------------------- +void +SimpleDateFormat::adoptTimeZoneFormat(TimeZoneFormat* timeZoneFormatToAdopt) +{ + delete fTimeZoneFormat; + fTimeZoneFormat = timeZoneFormatToAdopt; +} + +//---------------------------------------------------------------------- +void +SimpleDateFormat::setTimeZoneFormat(const TimeZoneFormat& newTimeZoneFormat) +{ + delete fTimeZoneFormat; + fTimeZoneFormat = new TimeZoneFormat(newTimeZoneFormat); +} //---------------------------------------------------------------------- @@ -2720,10 +4159,20 @@ SimpleDateFormat::setDateFormatSymbols(const DateFormatSymbols& newFormatSymbols void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt) { UErrorCode status = U_ZERO_ERROR; + Locale calLocale(fLocale); + DateFormatSymbols *newSymbols = fSymbols; + if (!newSymbols || fCalendar->getType() != calendarToAdopt->getType()) { + calLocale.setKeywordValue("calendar", calendarToAdopt->getType(), status); + newSymbols = DateFormatSymbols::createForLocale(calLocale, status); + if (U_FAILURE(status)) { + return; + } + } DateFormat::adoptCalendar(calendarToAdopt); - delete fSymbols; - fSymbols=NULL; - initializeSymbols(fLocale, fCalendar, status); // we need new symbols + if (fSymbols != newSymbols) { + delete fSymbols; + fSymbols = newSymbols; + } initializeDefaultCentury(); // we need a new century (possibly) } @@ -2731,6 +4180,31 @@ void SimpleDateFormat::adoptCalendar(Calendar* calendarToAdopt) //---------------------------------------------------------------------- +// override the DateFormat implementation in order to +// lazily initialize fCapitalizationBrkIter +void +SimpleDateFormat::setContext(UDisplayContext value, UErrorCode& status) +{ + DateFormat::setContext(value, status); +#if !UCONFIG_NO_BREAK_ITERATION + if (U_SUCCESS(status)) { + if ( fCapitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || + value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE) ) { + status = U_ZERO_ERROR; + fCapitalizationBrkIter = BreakIterator::createSentenceInstance(fLocale, status); + if (U_FAILURE(status)) { + delete fCapitalizationBrkIter; + fCapitalizationBrkIter = NULL; + } + } + } +#endif +} + + +//---------------------------------------------------------------------- + + UBool SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const { return isFieldUnitIgnored(fPattern, field); @@ -2738,7 +4212,7 @@ SimpleDateFormat::isFieldUnitIgnored(UCalendarDateFields field) const { UBool -SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern, +SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern, UCalendarDateFields field) { int32_t fieldLevel = fgCalendarFieldToLevel[field]; int32_t level; @@ -2750,9 +4224,9 @@ SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern, for (int32_t i = 0; i < pattern.length(); ++i) { ch = pattern[i]; if (ch != prevCh && count > 0) { - level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE]; + level = getLevelFromChar(prevCh); // the larger the level, the smaller the field unit. - if ( fieldLevel <= level ) { + if (fieldLevel <= level) { return FALSE; } count = 0; @@ -2763,31 +4237,217 @@ SimpleDateFormat::isFieldUnitIgnored(const UnicodeString& pattern, } else { inQuote = ! inQuote; } - } - else if ( ! inQuote && ((ch >= 0x0061 /*'a'*/ && ch <= 0x007A /*'z'*/) - || (ch >= 0x0041 /*'A'*/ && ch <= 0x005A /*'Z'*/))) { + } + else if (!inQuote && isSyntaxChar(ch)) { prevCh = ch; ++count; } } - if ( count > 0 ) { + if (count > 0) { // last item - level = fgPatternCharToLevel[prevCh - PATTERN_CHAR_BASE]; - if ( fieldLevel <= level ) { - return FALSE; - } + level = getLevelFromChar(prevCh); + if (fieldLevel <= level) { + return FALSE; + } } return TRUE; } - +//---------------------------------------------------------------------- const Locale& SimpleDateFormat::getSmpFmtLocale(void) const { return fLocale; } +//---------------------------------------------------------------------- + +int32_t +SimpleDateFormat::checkIntSuffix(const UnicodeString& text, int32_t start, + int32_t patLoc, UBool isNegative) const { + // local variables + UnicodeString suf; + int32_t patternMatch; + int32_t textPreMatch; + int32_t textPostMatch; + + // check that we are still in range + if ( (start > text.length()) || + (start < 0) || + (patLoc < 0) || + (patLoc > fPattern.length())) { + // out of range, don't advance location in text + return start; + } + + // get the suffix + DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fNumberFormat); + if (decfmt != NULL) { + if (isNegative) { + suf = decfmt->getNegativeSuffix(suf); + } + else { + suf = decfmt->getPositiveSuffix(suf); + } + } + + // check for suffix + if (suf.length() <= 0) { + return start; + } + + // check suffix will be encountered in the pattern + patternMatch = compareSimpleAffix(suf,fPattern,patLoc); + + // check if a suffix will be encountered in the text + textPreMatch = compareSimpleAffix(suf,text,start); + + // check if a suffix was encountered in the text + textPostMatch = compareSimpleAffix(suf,text,start-suf.length()); + + // check for suffix match + if ((textPreMatch >= 0) && (patternMatch >= 0) && (textPreMatch == patternMatch)) { + return start; + } + else if ((textPostMatch >= 0) && (patternMatch >= 0) && (textPostMatch == patternMatch)) { + return start - suf.length(); + } + + // should not get here + return start; +} + +//---------------------------------------------------------------------- + +int32_t +SimpleDateFormat::compareSimpleAffix(const UnicodeString& affix, + const UnicodeString& input, + int32_t pos) const { + int32_t start = pos; + for (int32_t i=0; i<affix.length(); ) { + UChar32 c = affix.char32At(i); + int32_t len = U16_LENGTH(c); + if (PatternProps::isWhiteSpace(c)) { + // We may have a pattern like: \u200F \u0020 + // and input text like: \u200F \u0020 + // Note that U+200F and U+0020 are Pattern_White_Space but only + // U+0020 is UWhiteSpace. So we have to first do a direct + // match of the run of Pattern_White_Space in the pattern, + // then match any extra characters. + UBool literalMatch = FALSE; + while (pos < input.length() && + input.char32At(pos) == c) { + literalMatch = TRUE; + i += len; + pos += len; + if (i == affix.length()) { + break; + } + c = affix.char32At(i); + len = U16_LENGTH(c); + if (!PatternProps::isWhiteSpace(c)) { + break; + } + } + + // Advance over run in pattern + i = skipPatternWhiteSpace(affix, i); + + // Advance over run in input text + // Must see at least one white space char in input, + // unless we've already matched some characters literally. + int32_t s = pos; + pos = skipUWhiteSpace(input, pos); + if (pos == s && !literalMatch) { + return -1; + } + // If we skip UWhiteSpace in the input text, we need to skip it in the pattern. + // Otherwise, the previous lines may have skipped over text (such as U+00A0) that + // is also in the affix. + i = skipUWhiteSpace(affix, i); + } else { + if (pos < input.length() && + input.char32At(pos) == c) { + i += len; + pos += len; + } else { + return -1; + } + } + } + return pos - start; +} + +//---------------------------------------------------------------------- + +int32_t +SimpleDateFormat::skipPatternWhiteSpace(const UnicodeString& text, int32_t pos) const { + const UChar* s = text.getBuffer(); + return (int32_t)(PatternProps::skipWhiteSpace(s + pos, text.length() - pos) - s); +} + +//---------------------------------------------------------------------- + +int32_t +SimpleDateFormat::skipUWhiteSpace(const UnicodeString& text, int32_t pos) const { + while (pos < text.length()) { + UChar32 c = text.char32At(pos); + if (!u_isUWhiteSpace(c)) { + break; + } + pos += U16_LENGTH(c); + } + return pos; +} + +//---------------------------------------------------------------------- + +// Lazy TimeZoneFormat instantiation, semantically const. +TimeZoneFormat * +SimpleDateFormat::tzFormat(UErrorCode &status) const { + if (fTimeZoneFormat == NULL) { + umtx_lock(LOCK()); + { + if (fTimeZoneFormat == NULL) { + TimeZoneFormat *tzfmt = TimeZoneFormat::createInstance(fLocale, status); + if (U_FAILURE(status)) { + return NULL; + } + + const_cast<SimpleDateFormat *>(this)->fTimeZoneFormat = tzfmt; + } + } + umtx_unlock(LOCK()); + } + return fTimeZoneFormat; +} + +void SimpleDateFormat::parsePattern() { + fHasMinute = FALSE; + fHasSecond = FALSE; + fHasHanYearChar = FALSE; + + int len = fPattern.length(); + UBool inQuote = FALSE; + for (int32_t i = 0; i < len; ++i) { + UChar ch = fPattern[i]; + if (ch == QUOTE) { + inQuote = !inQuote; + } + if (ch == 0x5E74) { // don't care whether this is inside quotes + fHasHanYearChar = TRUE; + } + if (!inQuote) { + if (ch == 0x6D) { // 0x6D == 'm' + fHasMinute = TRUE; + } + if (ch == 0x73) { // 0x73 == 's' + fHasSecond = TRUE; + } + } + } +} U_NAMESPACE_END