X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/46f4442e9a5a4f3b98b7c1083586332f6a8a99a4..c5116b9f5a666b9d59f443b3770acd6ef64dc6c3:/icuSources/i18n/msgfmt.cpp diff --git a/icuSources/i18n/msgfmt.cpp b/icuSources/i18n/msgfmt.cpp index 359c8ced..e39b26b9 100644 --- a/icuSources/i18n/msgfmt.cpp +++ b/icuSources/i18n/msgfmt.cpp @@ -1,62 +1,68 @@ -/* -******************************************************************************* -* Copyright (C) 2007-2008, International Business Machines Corporation and * -* others. All Rights Reserved. * -******************************************************************************* -* -* File MSGFMT.CPP -* -* Modification History: -* -* Date Name Description -* 02/19/97 aliu Converted from java. -* 03/20/97 helena Finished first cut of implementation. -* 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. -* 06/11/97 helena Fixed addPattern to take the pattern correctly. -* 06/17/97 helena Fixed the getPattern to return the correct pattern. -* 07/09/97 helena Made ParsePosition into a class. -* 02/22/99 stephen Removed character literals for EBCDIC safety -******************************************************************************** -*/ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/******************************************************************** + * COPYRIGHT: + * Copyright (c) 1997-2015, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************** + * + * File MSGFMT.CPP + * + * Modification History: + * + * Date Name Description + * 02/19/97 aliu Converted from java. + * 03/20/97 helena Finished first cut of implementation. + * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi. + * 06/11/97 helena Fixed addPattern to take the pattern correctly. + * 06/17/97 helena Fixed the getPattern to return the correct pattern. + * 07/09/97 helena Made ParsePosition into a class. + * 02/22/99 stephen Removed character literals for EBCDIC safety + * 11/01/09 kirtig Added SelectFormat + ********************************************************************/ #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING -#include "unicode/msgfmt.h" -#include "unicode/decimfmt.h" -#include "unicode/datefmt.h" -#include "unicode/smpdtfmt.h" +#include "unicode/appendable.h" #include "unicode/choicfmt.h" +#include "unicode/datefmt.h" +#include "unicode/decimfmt.h" +#include "unicode/localpointer.h" +#include "unicode/msgfmt.h" +#include "unicode/numberformatter.h" #include "unicode/plurfmt.h" -#include "unicode/ustring.h" -#include "unicode/ucnv_err.h" -#include "unicode/uchar.h" -#include "unicode/umsg.h" #include "unicode/rbnf.h" +#include "unicode/selfmt.h" +#include "unicode/smpdtfmt.h" +#include "unicode/umsg.h" +#include "unicode/ustring.h" #include "cmemory.h" +#include "patternprops.h" +#include "messageimpl.h" #include "msgfmt_impl.h" -#include "util.h" +#include "plurrule_impl.h" #include "uassert.h" +#include "uelement.h" +#include "uhash.h" #include "ustrfmt.h" +#include "util.h" #include "uvector.h" +#include "number_decimalquantity.h" // ***************************************************************************** // class MessageFormat // ***************************************************************************** -#define COMMA ((UChar)0x002C) #define SINGLE_QUOTE ((UChar)0x0027) +#define COMMA ((UChar)0x002C) #define LEFT_CURLY_BRACE ((UChar)0x007B) #define RIGHT_CURLY_BRACE ((UChar)0x007D) //--------------------------------------- // static data -static const UChar ID_EMPTY[] = { - 0 /* empty string, used for default so that null can mark end of list */ -}; - static const UChar ID_NUMBER[] = { 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */ }; @@ -66,9 +72,6 @@ static const UChar ID_DATE[] = { static const UChar ID_TIME[] = { 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */ }; -static const UChar ID_CHOICE[] = { - 0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0 /* "choice" */ -}; static const UChar ID_SPELLOUT[] = { 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */ }; @@ -78,24 +81,21 @@ static const UChar ID_ORDINAL[] = { static const UChar ID_DURATION[] = { 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */ }; -static const UChar ID_PLURAL[] = { - 0x70, 0x6c, 0x75, 0x72, 0x61, 0x6c, 0 /* "plural" */ -}; // MessageFormat Type List Number, Date, Time or Choice static const UChar * const TYPE_IDS[] = { - ID_EMPTY, ID_NUMBER, - ID_DATE, + ID_DATE, ID_TIME, - ID_CHOICE, ID_SPELLOUT, ID_ORDINAL, ID_DURATION, - ID_PLURAL, NULL, }; - + +static const UChar ID_EMPTY[] = { + 0 /* empty string, used for default so that null can mark end of list */ +}; static const UChar ID_CURRENCY[] = { 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */ }; @@ -137,17 +137,33 @@ static const UChar * const DATE_STYLE_IDS[] = { ID_FULL, NULL, }; - -static const U_NAMESPACE_QUALIFIER DateFormat::EStyle DATE_STYLES[] = { - U_NAMESPACE_QUALIFIER DateFormat::kDefault, - U_NAMESPACE_QUALIFIER DateFormat::kShort, - U_NAMESPACE_QUALIFIER DateFormat::kMedium, - U_NAMESPACE_QUALIFIER DateFormat::kLong, - U_NAMESPACE_QUALIFIER DateFormat::kFull, + +static const icu::DateFormat::EStyle DATE_STYLES[] = { + icu::DateFormat::kDefault, + icu::DateFormat::kShort, + icu::DateFormat::kMedium, + icu::DateFormat::kLong, + icu::DateFormat::kFull, }; static const int32_t DEFAULT_INITIAL_CAPACITY = 10; +static const UChar NULL_STRING[] = { + 0x6E, 0x75, 0x6C, 0x6C, 0 // "null" +}; + +static const UChar OTHER_STRING[] = { + 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" +}; + +U_CDECL_BEGIN +static UBool U_CALLCONV equalFormatsForHash(const UHashTok key1, + const UHashTok key2) { + return icu::MessageFormat::equalFormats(key1.pointer, key2.pointer); +} + +U_CDECL_END + U_NAMESPACE_BEGIN // ------------------------------------- @@ -156,29 +172,6 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration) //-------------------------------------------------------------------- -/** - * Convert a string to an unsigned decimal, ignoring rule whitespace. - * @return a non-negative number if successful, or a negative number - * upon failure. - */ -static int32_t stou(const UnicodeString& string) { - int32_t n = 0; - int32_t count = 0; - UChar32 c; - for (int32_t i=0; i 10) { - return -1; - } - n = 10*n + d; - } - return n; -} - /** * Convert an integer value to a string and append the result to * the given UnicodeString. @@ -186,127 +179,98 @@ static int32_t stou(const UnicodeString& string) { static UnicodeString& itos(int32_t i, UnicodeString& appendTo) { UChar temp[16]; uprv_itou(temp,16,i,10,0); // 10 == radix - appendTo.append(temp); + appendTo.append(temp, -1); return appendTo; } -/* - * A structure representing one subformat of this MessageFormat. - * Each subformat has a Format object, an offset into the plain - * pattern text fPattern, and an argument number. The argument - * number corresponds to the array of arguments to be formatted. - * @internal - */ -class MessageFormat::Subformat : public UMemory { + +// AppendableWrapper: encapsulates the result of formatting, keeping track +// of the string and its length. +class AppendableWrapper : public UMemory { public: - /** - * @internal - */ - Format* format; // formatter - /** - * @internal - */ - int32_t offset; // offset into fPattern - /** - * @internal - */ - // TODO (claireho) or save the number to argName and use itos to convert to number.=> we need this number - int32_t argNum; // 0-based argument number - /** - * @internal - */ - UnicodeString* argName; // argument name or number - - /** - * Clone that.format and assign it to this.format - * Do NOT delete this.format - * @internal - */ - Subformat& operator=(const Subformat& that) { - if (this != &that) { - format = that.format ? that.format->clone() : NULL; - offset = that.offset; - argNum = that.argNum; - argName = (that.argNum==-1) ? new UnicodeString(*that.argName): NULL; + AppendableWrapper(Appendable& appendable) : app(appendable), len(0) { + } + void append(const UnicodeString& s) { + app.appendString(s.getBuffer(), s.length()); + len += s.length(); + } + void append(const UChar* s, const int32_t sLength) { + app.appendString(s, sLength); + len += sLength; + } + void append(const UnicodeString& s, int32_t start, int32_t length) { + append(s.tempSubString(start, length)); + } + void formatAndAppend(const Format* formatter, const Formattable& arg, UErrorCode& ec) { + UnicodeString s; + formatter->format(arg, s, ec); + if (U_SUCCESS(ec)) { + append(s); } - return *this; } - - /** - * @internal - */ - UBool operator==(const Subformat& that) const { - // Do cheap comparisons first - return offset == that.offset && - argNum == that.argNum && - ((argName == that.argName) || - (*argName == *that.argName)) && - ((format == that.format) || // handles NULL - (*format == *that.format)); + void formatAndAppend(const Format* formatter, const Formattable& arg, + const UnicodeString &argString, UErrorCode& ec) { + if (!argString.isEmpty()) { + if (U_SUCCESS(ec)) { + append(argString); + } + } else { + formatAndAppend(formatter, arg, ec); + } } - - /** - * @internal - */ - UBool operator!=(const Subformat& that) const { - return !operator==(that); + int32_t length() { + return len; } +private: + Appendable& app; + int32_t len; }; + // ------------------------------------- // Creates a MessageFormat instance based on the pattern. MessageFormat::MessageFormat(const UnicodeString& pattern, UErrorCode& success) : fLocale(Locale::getDefault()), // Uses the default locale + msgPattern(success), formatAliases(NULL), formatAliasesCapacity(0), - idStart(UCHAR_ID_START), - idContinue(UCHAR_ID_CONTINUE), - subformats(NULL), - subformatCount(0), - subformatCapacity(0), argTypes(NULL), argTypeCount(0), argTypeCapacity(0), - isArgNumeric(TRUE), + hasArgTypeConflicts(FALSE), defaultNumberFormat(NULL), - defaultDateFormat(NULL) + defaultDateFormat(NULL), + cachedFormatters(NULL), + customFormatArgStarts(NULL), + pluralProvider(*this, UPLURAL_TYPE_CARDINAL), + ordinalProvider(*this, UPLURAL_TYPE_ORDINAL) { - if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || - !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { - success = U_MEMORY_ALLOCATION_ERROR; - return; - } - applyPattern(pattern, success); setLocaleIDs(fLocale.getName(), fLocale.getName()); + applyPattern(pattern, success); } - + MessageFormat::MessageFormat(const UnicodeString& pattern, const Locale& newLocale, UErrorCode& success) : fLocale(newLocale), + msgPattern(success), formatAliases(NULL), formatAliasesCapacity(0), - idStart(UCHAR_ID_START), - idContinue(UCHAR_ID_CONTINUE), - subformats(NULL), - subformatCount(0), - subformatCapacity(0), argTypes(NULL), argTypeCount(0), argTypeCapacity(0), - isArgNumeric(TRUE), + hasArgTypeConflicts(FALSE), defaultNumberFormat(NULL), - defaultDateFormat(NULL) + defaultDateFormat(NULL), + cachedFormatters(NULL), + customFormatArgStarts(NULL), + pluralProvider(*this, UPLURAL_TYPE_CARDINAL), + ordinalProvider(*this, UPLURAL_TYPE_ORDINAL) { - if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || - !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { - success = U_MEMORY_ALLOCATION_ERROR; - return; - } - applyPattern(pattern, success); setLocaleIDs(fLocale.getName(), fLocale.getName()); + applyPattern(pattern, success); } MessageFormat::MessageFormat(const UnicodeString& pattern, @@ -314,65 +278,57 @@ MessageFormat::MessageFormat(const UnicodeString& pattern, UParseError& parseError, UErrorCode& success) : fLocale(newLocale), + msgPattern(success), formatAliases(NULL), formatAliasesCapacity(0), - idStart(UCHAR_ID_START), - idContinue(UCHAR_ID_CONTINUE), - subformats(NULL), - subformatCount(0), - subformatCapacity(0), argTypes(NULL), argTypeCount(0), argTypeCapacity(0), - isArgNumeric(TRUE), + hasArgTypeConflicts(FALSE), defaultNumberFormat(NULL), - defaultDateFormat(NULL) + defaultDateFormat(NULL), + cachedFormatters(NULL), + customFormatArgStarts(NULL), + pluralProvider(*this, UPLURAL_TYPE_CARDINAL), + ordinalProvider(*this, UPLURAL_TYPE_ORDINAL) { - if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY) || - !allocateArgTypes(DEFAULT_INITIAL_CAPACITY)) { - success = U_MEMORY_ALLOCATION_ERROR; - return; - } - applyPattern(pattern, parseError, success); setLocaleIDs(fLocale.getName(), fLocale.getName()); + applyPattern(pattern, parseError, success); } MessageFormat::MessageFormat(const MessageFormat& that) -: Format(that), +: + Format(that), + fLocale(that.fLocale), + msgPattern(that.msgPattern), formatAliases(NULL), formatAliasesCapacity(0), - idStart(UCHAR_ID_START), - idContinue(UCHAR_ID_CONTINUE), - subformats(NULL), - subformatCount(0), - subformatCapacity(0), argTypes(NULL), argTypeCount(0), argTypeCapacity(0), - isArgNumeric(TRUE), + hasArgTypeConflicts(that.hasArgTypeConflicts), defaultNumberFormat(NULL), - defaultDateFormat(NULL) + defaultDateFormat(NULL), + cachedFormatters(NULL), + customFormatArgStarts(NULL), + pluralProvider(*this, UPLURAL_TYPE_CARDINAL), + ordinalProvider(*this, UPLURAL_TYPE_ORDINAL) { - *this = that; + // This will take care of creating the hash tables (since they are NULL). + UErrorCode ec = U_ZERO_ERROR; + copyObjects(that, ec); + if (U_FAILURE(ec)) { + resetPattern(); + } } MessageFormat::~MessageFormat() { - int32_t idx; - for (idx = 0; idx < subformatCount; idx++) { - delete subformats[idx].format; - delete subformats[idx].argName; - } - uprv_free(subformats); - subformats = NULL; - subformatCount = subformatCapacity = 0; + uhash_close(cachedFormatters); + uhash_close(customFormatArgStarts); uprv_free(argTypes); - argTypes = NULL; - argTypeCount = argTypeCapacity = 0; - uprv_free(formatAliases); - delete defaultNumberFormat; delete defaultDateFormat; } @@ -380,37 +336,6 @@ MessageFormat::~MessageFormat() //-------------------------------------------------------------------- // Variable-size array management -/** - * Allocate subformats[] to at least the given capacity and return - * TRUE if successful. If not, leave subformats[] unchanged. - * - * If subformats is NULL, allocate it. If it is not NULL, enlarge it - * if necessary to be at least as large as specified. - */ -UBool MessageFormat::allocateSubformats(int32_t capacity) { - if (subformats == NULL) { - subformats = (Subformat*) uprv_malloc(sizeof(*subformats) * capacity); - subformatCapacity = capacity; - subformatCount = 0; - if (subformats == NULL) { - subformatCapacity = 0; - return FALSE; - } - } else if (subformatCapacity < capacity) { - if (capacity < 2*subformatCapacity) { - capacity = 2*subformatCapacity; - } - Subformat* a = (Subformat*) - uprv_realloc(subformats, sizeof(*subformats) * capacity); - if (a == NULL) { - return FALSE; // request failed - } - subformats = a; - subformatCapacity = capacity; - } - return TRUE; -} - /** * Allocate argTypes[] to at least the given capacity and return * TRUE if successful. If not, leave argTypes[] unchanged. @@ -418,33 +343,26 @@ UBool MessageFormat::allocateSubformats(int32_t capacity) { * If argTypes is NULL, allocate it. If it is not NULL, enlarge it * if necessary to be at least as large as specified. */ -UBool MessageFormat::allocateArgTypes(int32_t capacity) { - if (argTypes == NULL) { - argTypes = (Formattable::Type*) uprv_malloc(sizeof(*argTypes) * capacity); - argTypeCount = 0; - argTypeCapacity = capacity; - if (argTypes == NULL) { - argTypeCapacity = 0; - return FALSE; - } - for (int32_t i=0; i= capacity) { + return TRUE; + } + if (capacity < DEFAULT_INITIAL_CAPACITY) { + capacity = DEFAULT_INITIAL_CAPACITY; + } else if (capacity < 2*argTypeCapacity) { + capacity = 2*argTypeCapacity; + } + Formattable::Type* a = (Formattable::Type*) uprv_realloc(argTypes, sizeof(*argTypes) * capacity); - if (a == NULL) { - return FALSE; // request failed - } - for (int32_t i=argTypeCapacity; ikey.integer != rhs_cur->key.integer) { + return FALSE; + } + const Format* format = (const Format*)uhash_iget(cachedFormatters, cur->key.integer); + const Format* rhs_format = (const Format*)uhash_iget(that.cachedFormatters, rhs_cur->key.integer); + if (*format != *rhs_format) { return FALSE; } } - return TRUE; } // ------------------------------------- // Creates a copy of this MessageFormat, the caller owns the copy. - + Format* MessageFormat::clone() const { return new MessageFormat(*this); } - + // ------------------------------------- // Sets the locale of this MessageFormat object to theLocale. - + void MessageFormat::setLocale(const Locale& theLocale) { @@ -530,25 +453,24 @@ MessageFormat::setLocale(const Locale& theLocale) defaultNumberFormat = NULL; delete defaultDateFormat; defaultDateFormat = NULL; + fLocale = theLocale; + setLocaleIDs(fLocale.getName(), fLocale.getName()); + pluralProvider.reset(); + ordinalProvider.reset(); } - fLocale = theLocale; - setLocaleIDs(fLocale.getName(), fLocale.getName()); } - + // ------------------------------------- // Gets the locale of this MessageFormat object. - + const Locale& MessageFormat::getLocale() const { return fLocale; } - - - void -MessageFormat::applyPattern(const UnicodeString& newPattern, +MessageFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { UParseError parseError; @@ -560,362 +482,213 @@ MessageFormat::applyPattern(const UnicodeString& newPattern, // Applies the new pattern and returns an error if the pattern // is not correct. void -MessageFormat::applyPattern(const UnicodeString& pattern, +MessageFormat::applyPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& ec) -{ +{ if(U_FAILURE(ec)) { return; } - // The pattern is broken up into segments. Each time a subformat - // is encountered, 4 segments are recorded. For example, consider - // the pattern: - // "There {0,choice,0.0#are no files|1.0#is one file|1.0getDynamicClassID() == DecimalFormat::getStaticClassID()) { - - UErrorCode ec = U_ZERO_ERROR; - NumberFormat& formatAlias = *(NumberFormat*)fmt; - NumberFormat *defaultTemplate = NumberFormat::createInstance(fLocale, ec); - NumberFormat *currencyTemplate = NumberFormat::createCurrencyInstance(fLocale, ec); - NumberFormat *percentTemplate = NumberFormat::createPercentInstance(fLocale, ec); - NumberFormat *integerTemplate = createIntegerFormat(fLocale, ec); - - appendTo += COMMA; - appendTo += ID_NUMBER; - if (formatAlias != *defaultTemplate) { - appendTo += COMMA; - if (formatAlias == *currencyTemplate) { - appendTo += ID_CURRENCY; - } - else if (formatAlias == *percentTemplate) { - appendTo += ID_PERCENT; - } - else if (formatAlias == *integerTemplate) { - appendTo += ID_INTEGER; - } - else { - UnicodeString buffer; - appendTo += ((DecimalFormat*)fmt)->toPattern(buffer); - } - } - - delete defaultTemplate; - delete currencyTemplate; - delete percentTemplate; - delete integerTemplate; - } - else if (fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) { - DateFormat& formatAlias = *(DateFormat*)fmt; - DateFormat *defaultDateTemplate = DateFormat::createDateInstance(DateFormat::kDefault, fLocale); - DateFormat *shortDateTemplate = DateFormat::createDateInstance(DateFormat::kShort, fLocale); - DateFormat *longDateTemplate = DateFormat::createDateInstance(DateFormat::kLong, fLocale); - DateFormat *fullDateTemplate = DateFormat::createDateInstance(DateFormat::kFull, fLocale); - DateFormat *defaultTimeTemplate = DateFormat::createTimeInstance(DateFormat::kDefault, fLocale); - DateFormat *shortTimeTemplate = DateFormat::createTimeInstance(DateFormat::kShort, fLocale); - DateFormat *longTimeTemplate = DateFormat::createTimeInstance(DateFormat::kLong, fLocale); - DateFormat *fullTimeTemplate = DateFormat::createTimeInstance(DateFormat::kFull, fLocale); - - - appendTo += COMMA; - if (formatAlias == *defaultDateTemplate) { - appendTo += ID_DATE; - } - else if (formatAlias == *shortDateTemplate) { - appendTo += ID_DATE; - appendTo += COMMA; - appendTo += ID_SHORT; - } - else if (formatAlias == *defaultDateTemplate) { - appendTo += ID_DATE; - appendTo += COMMA; - appendTo += ID_MEDIUM; - } - else if (formatAlias == *longDateTemplate) { - appendTo += ID_DATE; - appendTo += COMMA; - appendTo += ID_LONG; - } - else if (formatAlias == *fullDateTemplate) { - appendTo += ID_DATE; - appendTo += COMMA; - appendTo += ID_FULL; - } - else if (formatAlias == *defaultTimeTemplate) { - appendTo += ID_TIME; - } - else if (formatAlias == *shortTimeTemplate) { - appendTo += ID_TIME; - appendTo += COMMA; - appendTo += ID_SHORT; - } - else if (formatAlias == *defaultTimeTemplate) { - appendTo += ID_TIME; - appendTo += COMMA; - appendTo += ID_MEDIUM; - } - else if (formatAlias == *longTimeTemplate) { - appendTo += ID_TIME; - appendTo += COMMA; - appendTo += ID_LONG; - } - else if (formatAlias == *fullTimeTemplate) { - appendTo += ID_TIME; - appendTo += COMMA; - appendTo += ID_FULL; - } - else { - UnicodeString buffer; - appendTo += ID_DATE; - appendTo += COMMA; - appendTo += ((SimpleDateFormat*)fmt)->toPattern(buffer); - } - - delete defaultDateTemplate; - delete shortDateTemplate; - delete longDateTemplate; - delete fullDateTemplate; - delete defaultTimeTemplate; - delete shortTimeTemplate; - delete longTimeTemplate; - delete fullTimeTemplate; - // {sfb} there should be a more efficient way to do this! - } - else if (fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID()) { - UnicodeString buffer; - appendTo += COMMA; - appendTo += ID_CHOICE; - appendTo += COMMA; - appendTo += ((ChoiceFormat*)fmt)->toPattern(buffer); + if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { + return -1; } - else if (fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) { - UnicodeString buffer; - appendTo += ((PluralFormat*)fmt)->toPattern(buffer); - } - else { - //appendTo += ", unknown"; + } +} + +void MessageFormat::setArgStartFormat(int32_t argStart, + Format* formatter, + UErrorCode& status) { + if (U_FAILURE(status)) { + delete formatter; + return; + } + if (cachedFormatters == NULL) { + cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong, + equalFormatsForHash, &status); + if (U_FAILURE(status)) { + delete formatter; + return; } - appendTo += RIGHT_CURLY_BRACE; + uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject); + } + if (formatter == NULL) { + formatter = new DummyFormat(); + } + uhash_iput(cachedFormatters, argStart, formatter, &status); +} + + +UBool MessageFormat::argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber) { + const MessagePattern::Part& part = msgPattern.getPart(partIndex); + return part.getType() == UMSGPAT_PART_TYPE_ARG_NAME ? + msgPattern.partSubstringMatches(part, argName) : + part.getValue() == argNumber; // ARG_NUMBER +} + +// Sets a custom formatter for a MessagePattern ARG_START part index. +// "Custom" formatters are provided by the user via setFormat() or similar APIs. +void MessageFormat::setCustomArgStartFormat(int32_t argStart, + Format* formatter, + UErrorCode& status) { + setArgStartFormat(argStart, formatter, status); + if (customFormatArgStarts == NULL) { + customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong, + NULL, &status); + } + uhash_iputi(customFormatArgStarts, argStart, 1, &status); +} + +Format* MessageFormat::getCachedFormatter(int32_t argumentNumber) const { + if (cachedFormatters == NULL) { + return NULL; + } + void* ptr = uhash_iget(cachedFormatters, argumentNumber); + if (ptr != NULL && dynamic_cast((Format*)ptr) == NULL) { + return (Format*) ptr; + } else { + // Not cached, or a DummyFormat representing setFormat(NULL). + return NULL; } - copyAndFixQuotes(fPattern, lastOffset, fPattern.length(), appendTo); - return appendTo; } - + // ------------------------------------- // Adopts the new formats array and updates the array count. // This MessageFormat instance owns the new formats. - void MessageFormat::adoptFormats(Format** newFormats, int32_t count) { if (newFormats == NULL || count < 0) { return; } - - int32_t i; - if (allocateSubformats(count)) { - for (i=0; i= 0;) { + setCustomArgStartFormat(partIndex, newFormats[formatNumber], status); + ++formatNumber; + } + // Delete those that didn't get used (if any). + for (; formatNumber < count; ++formatNumber) { + delete newFormats[formatNumber]; } - // TODO: What about the .offset and .argNum fields? -} +} // ------------------------------------- // Sets the new formats array and updates the array count. // This MessageFormat instance maks a copy of the new formats. - + void MessageFormat::setFormats(const Format** newFormats, int32_t count) { if (newFormats == NULL || count < 0) { return; } + // Throw away any cached formatters. + if (cachedFormatters != NULL) { + uhash_removeAll(cachedFormatters); + } + if (customFormatArgStarts != NULL) { + uhash_removeAll(customFormatArgStarts); + } - if (allocateSubformats(count)) { - int32_t i; - for (i=0; iclone() : NULL; - } - subformatCount = count; + UErrorCode status = U_ZERO_ERROR; + int32_t formatNumber = 0; + for (int32_t partIndex = 0; + formatNumber < count && U_SUCCESS(status) && (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { + Format* newFormat = NULL; + if (newFormats[formatNumber] != NULL) { + newFormat = newFormats[formatNumber]->clone(); + if (newFormat == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } + } + setCustomArgStartFormat(partIndex, newFormat, status); + ++formatNumber; + } + if (U_FAILURE(status)) { + resetPattern(); } +} - // TODO: What about the .offset and .arg fields? -} - // ------------------------------------- // Adopt a single format by format number. // Do nothing if the format number is not less than the array count. - + void MessageFormat::adoptFormat(int32_t n, Format *newFormat) { - if (n < 0 || n >= subformatCount) { - delete newFormat; - } else { - delete subformats[n].format; - subformats[n].format = newFormat; + LocalPointer p(newFormat); + if (n >= 0) { + int32_t formatNumber = 0; + for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { + if (n == formatNumber) { + UErrorCode status = U_ZERO_ERROR; + setCustomArgStartFormat(partIndex, p.orphan(), status); + return; + } + ++formatNumber; + } } } @@ -923,27 +696,35 @@ MessageFormat::adoptFormat(int32_t n, Format *newFormat) { // Adopt a single format by format name. // Do nothing if there is no match of formatName. void -MessageFormat::adoptFormat(const UnicodeString& formatName, +MessageFormat::adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status) { - if (isArgNumeric ) { - int32_t argumentNumber = stou(formatName); - if (argumentNumber<0) { - status = U_ARGUMENT_TYPE_MISMATCH; - return; - } - adoptFormat(argumentNumber, formatToAdopt); + LocalPointer p(formatToAdopt); + if (U_FAILURE(status)) { + return; + } + int32_t argNumber = MessagePattern::validateArgumentName(formatName); + if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { + status = U_ILLEGAL_ARGUMENT_ERROR; return; } - for (int32_t i=0; i= 0 && U_SUCCESS(status); + ) { + if (argNameMatches(partIndex + 1, formatName, argNumber)) { + Format* f; + if (p.isValid()) { + f = p.orphan(); + } else if (formatToAdopt == NULL) { + f = NULL; } else { - subformats[i].format = formatToAdopt; + f = formatToAdopt->clone(); + if (f == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } } + setCustomArgStartFormat(partIndex, f, status); } } } @@ -951,16 +732,22 @@ MessageFormat::adoptFormat(const UnicodeString& formatName, // ------------------------------------- // Set a single format. // Do nothing if the variable is not less than the array count. - void MessageFormat::setFormat(int32_t n, const Format& newFormat) { - if (n >= 0 && n < subformatCount) { - delete subformats[n].format; - if (&newFormat == NULL) { - // This should never happen -- but we'll be nice if it does - subformats[n].format = NULL; - } else { - subformats[n].format = newFormat.clone(); + + if (n >= 0) { + int32_t formatNumber = 0; + for (int32_t partIndex = 0; + (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { + if (n == formatNumber) { + Format* new_format = newFormat.clone(); + if (new_format) { + UErrorCode status = U_ZERO_ERROR; + setCustomArgStartFormat(partIndex, new_format, status); + } + return; + } + ++formatNumber; } } } @@ -970,27 +757,16 @@ MessageFormat::setFormat(int32_t n, const Format& newFormat) { // Do nothing if the variable is not less than the array count. Format * MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) { + if (U_FAILURE(status) || cachedFormatters == NULL) return NULL; - if (U_FAILURE(status)) return NULL; - - if (isArgNumeric ) { - int32_t argumentNumber = stou(formatName); - if (argumentNumber<0) { - status = U_ARGUMENT_TYPE_MISMATCH; - return NULL; - } - if (argumentNumber < 0 || argumentNumber >= subformatCount) { - return subformats[argumentNumber].format; - } - else { - return NULL; - } + int32_t argNumber = MessagePattern::validateArgumentName(formatName); + if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; } - - for (int32_t i=0; i= 0;) { + if (argNameMatches(partIndex + 1, formatName, argNumber)) { + return getCachedFormatter(partIndex); } } return NULL; @@ -1003,28 +779,29 @@ void MessageFormat::setFormat(const UnicodeString& formatName, const Format& newFormat, UErrorCode& status) { - if (isArgNumeric) { - status = U_ARGUMENT_TYPE_MISMATCH; + if (U_FAILURE(status)) return; + + int32_t argNumber = MessagePattern::validateArgumentName(formatName); + if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) { + status = U_ILLEGAL_ARGUMENT_ERROR; return; } - for (int32_t i=0; i= 0 && U_SUCCESS(status); + ) { + if (argNameMatches(partIndex + 1, formatName, argNumber)) { + Format* new_format = newFormat.clone(); + if (new_format == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; } - break; + setCustomArgStartFormat(partIndex, new_format, status); } } } // ------------------------------------- // Gets the format array. - const Format** MessageFormat::getFormats(int32_t& cnt) const { @@ -1033,48 +810,59 @@ MessageFormat::getFormats(int32_t& cnt) const // method on this object. We construct and resize an array // on demand that contains aliases to the subformats[i].format // pointers. - MessageFormat* t = (MessageFormat*) this; + + // Get total required capacity first (it's refreshed on each call). + int32_t totalCapacity = 0; + for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0; ++totalCapacity) {}; + + MessageFormat* t = const_cast (this); cnt = 0; - if (formatAliases == NULL) { - t->formatAliasesCapacity = (subformatCount<10) ? 10 : subformatCount; + if (formatAliases == nullptr) { + t->formatAliasesCapacity = totalCapacity; Format** a = (Format**) uprv_malloc(sizeof(Format*) * formatAliasesCapacity); - if (a == NULL) { - return NULL; + if (a == nullptr) { + t->formatAliasesCapacity = 0; + return nullptr; } - t->formatAliases = a; - } else if (subformatCount > formatAliasesCapacity) { + t->formatAliases = a; + } else if (totalCapacity > formatAliasesCapacity) { Format** a = (Format**) - uprv_realloc(formatAliases, sizeof(Format*) * subformatCount); - if (a == NULL) { - return NULL; + uprv_realloc(formatAliases, sizeof(Format*) * totalCapacity); + if (a == nullptr) { + t->formatAliasesCapacity = 0; + return nullptr; } t->formatAliases = a; - t->formatAliasesCapacity = subformatCount; + t->formatAliasesCapacity = totalCapacity; } - for (int32_t i=0; iformatAliases[i] = subformats[i].format; + + for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { + t->formatAliases[cnt++] = getCachedFormatter(partIndex); } - cnt = subformatCount; + return (const Format**)formatAliases; } - + + +UnicodeString MessageFormat::getArgName(int32_t partIndex) { + const MessagePattern::Part& part = msgPattern.getPart(partIndex); + return msgPattern.getSubstring(part); +} StringEnumeration* MessageFormat::getFormatNames(UErrorCode& status) { if (U_FAILURE(status)) return NULL; - - if (isArgNumeric) { - status = U_ARGUMENT_TYPE_MISMATCH; - return NULL; - } + UVector *fFormatNames = new UVector(status); if (U_FAILURE(status)) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } - for (int32_t i=0; iaddElement(new UnicodeString(*subformats[i].argName), status); + fFormatNames->setDeleter(uprv_deleteUObject); + + for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) { + fFormatNames->addElement(new UnicodeString(getArgName(partIndex + 1)), status); } StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status); @@ -1084,334 +872,654 @@ MessageFormat::getFormatNames(UErrorCode& status) { // ------------------------------------- // Formats the source Formattable array and copy into the result buffer. // Ignore the FieldPosition result for error checking. - + UnicodeString& MessageFormat::format(const Formattable* source, - int32_t cnt, - UnicodeString& appendTo, - FieldPosition& ignore, + int32_t cnt, + UnicodeString& appendTo, + FieldPosition& ignore, UErrorCode& success) const { - if (U_FAILURE(success)) - return appendTo; - - return format(source, cnt, appendTo, ignore, 0, success); + return format(source, NULL, cnt, appendTo, &ignore, success); } - + // ------------------------------------- // Internally creates a MessageFormat instance based on the -// pattern and formats the arguments Formattable array and +// pattern and formats the arguments Formattable array and // copy into the appendTo buffer. - + UnicodeString& MessageFormat::format( const UnicodeString& pattern, const Formattable* arguments, int32_t cnt, - UnicodeString& appendTo, + UnicodeString& appendTo, UErrorCode& success) { MessageFormat temp(pattern, success); - FieldPosition ignore(0); - temp.format(arguments, cnt, appendTo, ignore, success); - return appendTo; + return temp.format(arguments, NULL, cnt, appendTo, NULL, success); } - + // ------------------------------------- -// Formats the source Formattable object and copy into the +// Formats the source Formattable object and copy into the // appendTo buffer. The Formattable object must be an array // of Formattable instances, returns error otherwise. - + UnicodeString& -MessageFormat::format(const Formattable& source, - UnicodeString& appendTo, - FieldPosition& ignore, +MessageFormat::format(const Formattable& source, + UnicodeString& appendTo, + FieldPosition& ignore, UErrorCode& success) const { - int32_t cnt; - - if (U_FAILURE(success)) + if (U_FAILURE(success)) return appendTo; if (source.getType() != Formattable::kArray) { success = U_ILLEGAL_ARGUMENT_ERROR; return appendTo; } + int32_t cnt; const Formattable* tmpPtr = source.getArray(cnt); - - return format(tmpPtr, cnt, appendTo, ignore, 0, success); + return format(tmpPtr, NULL, cnt, appendTo, &ignore, success); } - UnicodeString& MessageFormat::format(const UnicodeString* argumentNames, const Formattable* arguments, int32_t count, UnicodeString& appendTo, UErrorCode& success) const { - FieldPosition ignore(0); - return format(arguments, argumentNames, count, appendTo, ignore, 0, success); + return format(arguments, argumentNames, count, appendTo, NULL, success); } -UnicodeString& -MessageFormat::format(const Formattable* arguments, - int32_t cnt, - UnicodeString& appendTo, - FieldPosition& status, - int32_t recursionProtection, - UErrorCode& success) const -{ - return format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); +// Does linear search to find the match for an ArgName. +const Formattable* MessageFormat::getArgFromListByName(const Formattable* arguments, + const UnicodeString *argumentNames, + int32_t cnt, UnicodeString& name) const { + for (int32_t i = 0; i < cnt; ++i) { + if (0 == argumentNames[i].compare(name)) { + return arguments + i; + } + } + return NULL; } -// ------------------------------------- -// Formats the arguments Formattable array and copy into the appendTo buffer. -// Ignore the FieldPosition result for error checking. UnicodeString& MessageFormat::format(const Formattable* arguments, const UnicodeString *argumentNames, - int32_t cnt, - UnicodeString& appendTo, - FieldPosition& status, - int32_t recursionProtection, - UErrorCode& success) const -{ - int32_t lastOffset = 0; - int32_t argumentNumber=0; - if (cnt < 0 || (cnt && arguments == NULL)) { - success = U_ILLEGAL_ARGUMENT_ERROR; + int32_t cnt, + UnicodeString& appendTo, + FieldPosition* pos, + UErrorCode& status) const { + if (U_FAILURE(status)) { return appendTo; } - - if ( !isArgNumeric && argumentNames== NULL ) { - success = U_ILLEGAL_ARGUMENT_ERROR; - return appendTo; + + UnicodeStringAppendable usapp(appendTo); + AppendableWrapper app(usapp); + format(0, NULL, arguments, argumentNames, cnt, app, pos, status); + return appendTo; +} + +namespace { + +/** + * Mutable input/output values for the PluralSelectorProvider. + * Separate so that it is possible to make MessageFormat Freezable. + */ +class PluralSelectorContext { +public: + PluralSelectorContext(int32_t start, const UnicodeString &name, + const Formattable &num, double off, UErrorCode &errorCode) + : startIndex(start), argName(name), offset(off), + numberArgIndex(-1), formatter(NULL), forReplaceNumber(FALSE) { + // number needs to be set even when select() is not called. + // Keep it as a Number/Formattable: + // For format() methods, and to preserve information (e.g., BigDecimal). + if(off == 0) { + number = num; + } else { + number = num.getDouble(errorCode) - off; + } + } + + // Input values for plural selection with decimals. + int32_t startIndex; + const UnicodeString &argName; + /** argument number - plural offset */ + Formattable number; + double offset; + // Output values for plural selection with decimals. + /** -1 if REPLACE_NUMBER, 0 arg not found, >0 ARG_START index */ + int32_t numberArgIndex; + const Format *formatter; + /** formatted argument number - plural offset */ + UnicodeString numberString; + /** TRUE if number-offset was formatted with the stock number formatter */ + UBool forReplaceNumber; +}; + +} // namespace + +// if argumentNames is NULL, this means arguments is a numeric array. +// arguments can not be NULL. +// We use const void *plNumber rather than const PluralSelectorContext *pluralNumber +// so that we need not declare the PluralSelectorContext in the public header file. +void MessageFormat::format(int32_t msgStart, const void *plNumber, + const Formattable* arguments, + const UnicodeString *argumentNames, + int32_t cnt, + AppendableWrapper& appendTo, + FieldPosition* ignore, + UErrorCode& success) const { + if (U_FAILURE(success)) { + return; } - - const Formattable *obj=NULL; - for (int32_t i=0; i= cnt) { - appendTo += LEFT_CURLY_BRACE; - itos(argumentNumber, appendTo); - appendTo += RIGHT_CURLY_BRACE; - continue; + + const UnicodeString& msgString = msgPattern.getPatternString(); + int32_t prevIndex = msgPattern.getPart(msgStart).getLimit(); + for (int32_t i = msgStart + 1; U_SUCCESS(success) ; ++i) { + const MessagePattern::Part* part = &msgPattern.getPart(i); + const UMessagePatternPartType type = part->getType(); + int32_t index = part->getIndex(); + appendTo.append(msgString, prevIndex, index - prevIndex); + if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { + return; + } + prevIndex = part->getLimit(); + if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { + const PluralSelectorContext &pluralNumber = + *static_cast(plNumber); + if(pluralNumber.forReplaceNumber) { + // number-offset was already formatted. + appendTo.formatAndAppend(pluralNumber.formatter, + pluralNumber.number, pluralNumber.numberString, success); + } else { + const NumberFormat* nf = getDefaultNumberFormat(success); + appendTo.formatAndAppend(nf, pluralNumber.number, success); } - obj = arguments+argumentNumber; + continue; } - else { - for (int32_t j=0; jgetArgType(); + part = &msgPattern.getPart(++i); + const Formattable* arg; + UBool noArg = FALSE; + UnicodeString argName = msgPattern.getSubstring(*part); + if (argumentNames == NULL) { + int32_t argNumber = part->getValue(); // ARG_NUMBER + if (0 <= argNumber && argNumber < cnt) { + arg = arguments + argNumber; + } else { + arg = NULL; + noArg = TRUE; } - if (obj == NULL ) { - appendTo += LEFT_CURLY_BRACE; - appendTo += *subformats[i].argName; - appendTo += RIGHT_CURLY_BRACE; - continue; - + } else { + arg = getArgFromListByName(arguments, argumentNames, cnt, argName); + if (arg == NULL) { + noArg = TRUE; } } - Formattable::Type type = obj->getType(); - - // Recursively calling the format process only if the current - // format argument refers to a ChoiceFormat object. - Format* fmt = subformats[i].format; - if (fmt != NULL) { - UnicodeString argNum; - fmt->format(*obj, argNum, success); - - // Needs to reprocess the ChoiceFormat option by using the - // MessageFormat pattern application. - if ((fmt->getDynamicClassID() == ChoiceFormat::getStaticClassID() || - fmt->getDynamicClassID() == PluralFormat::getStaticClassID()) && - argNum.indexOf(LEFT_CURLY_BRACE) >= 0) { - MessageFormat temp(argNum, fLocale, success); - // TODO: Implement recursion protection - if ( isArgNumeric ) { - temp.format(arguments, NULL, cnt, appendTo, status, recursionProtection, success); - } - else { - temp.format(arguments, argumentNames, cnt, appendTo, status, recursionProtection, success); - } - if (U_FAILURE(success)) { - return appendTo; + ++i; + int32_t prevDestLength = appendTo.length(); + const Format* formatter = NULL; + if (noArg) { + appendTo.append( + UnicodeString(LEFT_CURLY_BRACE).append(argName).append(RIGHT_CURLY_BRACE)); + } else if (arg == NULL) { + appendTo.append(NULL_STRING, 4); + } else if(plNumber!=NULL && + static_cast(plNumber)->numberArgIndex==(i-2)) { + const PluralSelectorContext &pluralNumber = + *static_cast(plNumber); + if(pluralNumber.offset == 0) { + // The number was already formatted with this formatter. + appendTo.formatAndAppend(pluralNumber.formatter, pluralNumber.number, + pluralNumber.numberString, success); + } else { + // Do not use the formatted (number-offset) string for a named argument + // that formats the number without subtracting the offset. + appendTo.formatAndAppend(pluralNumber.formatter, *arg, success); + } + } else if ((formatter = getCachedFormatter(i -2)) != 0) { + // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings. + if (dynamic_cast(formatter) || + dynamic_cast(formatter) || + dynamic_cast(formatter)) { + // We only handle nested formats here if they were provided via + // setFormat() or its siblings. Otherwise they are not cached and instead + // handled below according to argType. + UnicodeString subMsgString; + formatter->format(*arg, subMsgString, success); + if (subMsgString.indexOf(LEFT_CURLY_BRACE) >= 0 || + (subMsgString.indexOf(SINGLE_QUOTE) >= 0 && !MessageImpl::jdkAposMode(msgPattern)) + ) { + MessageFormat subMsgFormat(subMsgString, fLocale, success); + subMsgFormat.format(0, NULL, arguments, argumentNames, cnt, appendTo, ignore, success); + } else { + appendTo.append(subMsgString); } + } else { + appendTo.formatAndAppend(formatter, *arg, success); + } + } else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) { + // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. + // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check + // for the hash table containind DummyFormat. + if (arg->isNumeric()) { + const NumberFormat* nf = getDefaultNumberFormat(success); + appendTo.formatAndAppend(nf, *arg, success); + } else if (arg->getType() == Formattable::kDate) { + const DateFormat* df = getDefaultDateFormat(success); + appendTo.formatAndAppend(df, *arg, success); + } else { + appendTo.append(arg->getString(success)); } - else { - appendTo += argNum; + } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) { + if (!arg->isNumeric()) { + success = U_ILLEGAL_ARGUMENT_ERROR; + return; } + // We must use the Formattable::getDouble() variant with the UErrorCode parameter + // because only this one converts non-double numeric types to double. + const double number = arg->getDouble(success); + int32_t subMsgStart = ChoiceFormat::findSubMessage(msgPattern, i, number); + formatComplexSubMessage(subMsgStart, NULL, arguments, argumentNames, + cnt, appendTo, success); + } else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType)) { + if (!arg->isNumeric()) { + success = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + const PluralSelectorProvider &selector = + argType == UMSGPAT_ARG_TYPE_PLURAL ? pluralProvider : ordinalProvider; + // We must use the Formattable::getDouble() variant with the UErrorCode parameter + // because only this one converts non-double numeric types to double. + double offset = msgPattern.getPluralOffset(i); + PluralSelectorContext context(i, argName, *arg, offset, success); + int32_t subMsgStart = PluralFormat::findSubMessage( + msgPattern, i, selector, &context, arg->getDouble(success), success); + formatComplexSubMessage(subMsgStart, &context, arguments, argumentNames, + cnt, appendTo, success); + } else if (argType == UMSGPAT_ARG_TYPE_SELECT) { + int32_t subMsgStart = SelectFormat::findSubMessage(msgPattern, i, arg->getString(success), success); + formatComplexSubMessage(subMsgStart, NULL, arguments, argumentNames, + cnt, appendTo, success); + } else { + // This should never happen. + success = U_INTERNAL_PROGRAM_ERROR; + return; } - // If the obj data type is a number, use a NumberFormat instance. - else if ((type == Formattable::kDouble) || - (type == Formattable::kLong) || - (type == Formattable::kInt64)) { - - const NumberFormat* nf = getDefaultNumberFormat(success); - if (nf == NULL) { - return appendTo; + ignore = updateMetaData(appendTo, prevDestLength, ignore, arg); + prevIndex = msgPattern.getPart(argLimit).getLimit(); + i = argLimit; + } +} + + +void MessageFormat::formatComplexSubMessage(int32_t msgStart, + const void *plNumber, + const Formattable* arguments, + const UnicodeString *argumentNames, + int32_t cnt, + AppendableWrapper& appendTo, + UErrorCode& success) const { + if (U_FAILURE(success)) { + return; + } + + if (!MessageImpl::jdkAposMode(msgPattern)) { + format(msgStart, plNumber, arguments, argumentNames, cnt, appendTo, NULL, success); + return; + } + + // JDK compatibility mode: (see JDK MessageFormat.format() API docs) + // - remove SKIP_SYNTAX; that is, remove half of the apostrophes + // - if the result string contains an open curly brace '{' then + // instantiate a temporary MessageFormat object and format again; + // otherwise just append the result string + const UnicodeString& msgString = msgPattern.getPatternString(); + UnicodeString sb; + int32_t prevIndex = msgPattern.getPart(msgStart).getLimit(); + for (int32_t i = msgStart;;) { + const MessagePattern::Part& part = msgPattern.getPart(++i); + const UMessagePatternPartType type = part.getType(); + int32_t index = part.getIndex(); + if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { + sb.append(msgString, prevIndex, index - prevIndex); + break; + } else if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER || type == UMSGPAT_PART_TYPE_SKIP_SYNTAX) { + sb.append(msgString, prevIndex, index - prevIndex); + if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { + const PluralSelectorContext &pluralNumber = + *static_cast(plNumber); + if(pluralNumber.forReplaceNumber) { + // number-offset was already formatted. + sb.append(pluralNumber.numberString); + } else { + const NumberFormat* nf = getDefaultNumberFormat(success); + sb.append(nf->format(pluralNumber.number, sb, success)); + } } - if (type == Formattable::kDouble) { - nf->format(obj->getDouble(), appendTo); - } else if (type == Formattable::kLong) { - nf->format(obj->getLong(), appendTo); - } else { - nf->format(obj->getInt64(), appendTo); + prevIndex = part.getLimit(); + } else if (type == UMSGPAT_PART_TYPE_ARG_START) { + sb.append(msgString, prevIndex, index - prevIndex); + prevIndex = index; + i = msgPattern.getLimitPartIndex(i); + index = msgPattern.getPart(i).getLimit(); + MessageImpl::appendReducedApostrophes(msgString, prevIndex, index, sb); + prevIndex = index; + } + } + if (sb.indexOf(LEFT_CURLY_BRACE) >= 0) { + UnicodeString emptyPattern; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter. + MessageFormat subMsgFormat(emptyPattern, fLocale, success); + subMsgFormat.applyPattern(sb, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, success); + subMsgFormat.format(0, NULL, arguments, argumentNames, cnt, appendTo, NULL, success); + } else { + appendTo.append(sb); + } +} + + +UnicodeString MessageFormat::getLiteralStringUntilNextArgument(int32_t from) const { + const UnicodeString& msgString=msgPattern.getPatternString(); + int32_t prevIndex=msgPattern.getPart(from).getLimit(); + UnicodeString b; + for (int32_t i = from + 1; ; ++i) { + const MessagePattern::Part& part = msgPattern.getPart(i); + const UMessagePatternPartType type=part.getType(); + int32_t index=part.getIndex(); + b.append(msgString, prevIndex, index - prevIndex); + if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_MSG_LIMIT) { + return b; + } + // Unexpected Part "part" in parsed message. + U_ASSERT(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR); + prevIndex=part.getLimit(); + } +} + + +FieldPosition* MessageFormat::updateMetaData(AppendableWrapper& /*dest*/, int32_t /*prevLength*/, + FieldPosition* /*fp*/, const Formattable* /*argId*/) const { + // Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing. + return NULL; + /* + if (fp != NULL && Field.ARGUMENT.equals(fp.getFieldAttribute())) { + fp->setBeginIndex(prevLength); + fp->setEndIndex(dest.get_length()); + return NULL; + } + return fp; + */ +} + +int32_t +MessageFormat::findOtherSubMessage(int32_t partIndex) const { + int32_t count=msgPattern.countParts(); + const MessagePattern::Part *part = &msgPattern.getPart(partIndex); + if(MessagePattern::Part::hasNumericValue(part->getType())) { + ++partIndex; + } + // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples + // until ARG_LIMIT or end of plural-only pattern. + UnicodeString other(FALSE, OTHER_STRING, 5); + do { + part=&msgPattern.getPart(partIndex++); + UMessagePatternPartType type=part->getType(); + if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) { + break; + } + U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_SELECTOR); + // part is an ARG_SELECTOR followed by an optional explicit value, and then a message + if(msgPattern.partSubstringMatches(*part, other)) { + return partIndex; + } + if(MessagePattern::Part::hasNumericValue(msgPattern.getPartType(partIndex))) { + ++partIndex; // skip the numeric-value part of "=1" etc. + } + partIndex=msgPattern.getLimitPartIndex(partIndex); + } while(++partIndex 0) { + if (!allocateArgTypes(argTypeCount, ec)) { + return; + } + uprv_memcpy(argTypes, that.argTypes, argTypeCount * sizeof(argTypes[0])); + } + if (cachedFormatters != NULL) { + uhash_removeAll(cachedFormatters); + } + if (customFormatArgStarts != NULL) { + uhash_removeAll(customFormatArgStarts); + } + if (that.cachedFormatters) { + if (cachedFormatters == NULL) { + cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong, + equalFormatsForHash, &ec); + if (U_FAILURE(ec)) { + return; } + uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject); } - // If the obj data type is a Date instance, use a DateFormat instance. - else if (type == Formattable::kDate) { - const DateFormat* df = getDefaultDateFormat(success); - if (df == NULL) { - return appendTo; + + const int32_t count = uhash_count(that.cachedFormatters); + int32_t pos, idx; + for (idx = 0, pos = UHASH_FIRST; idx < count && U_SUCCESS(ec); ++idx) { + const UHashElement* cur = uhash_nextElement(that.cachedFormatters, &pos); + Format* newFormat = ((Format*)(cur->value.pointer))->clone(); + if (newFormat) { + uhash_iput(cachedFormatters, cur->key.integer, newFormat, &ec); + } else { + ec = U_MEMORY_ALLOCATION_ERROR; + return; } - df->format(obj->getDate(), appendTo); } - else if (type == Formattable::kString) { - appendTo += obj->getString(); + } + if (that.customFormatArgStarts) { + if (customFormatArgStarts == NULL) { + customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong, + NULL, &ec); } - else { - success = U_ILLEGAL_ARGUMENT_ERROR; - return appendTo; + const int32_t count = uhash_count(that.customFormatArgStarts); + int32_t pos, idx; + for (idx = 0, pos = UHASH_FIRST; idx < count && U_SUCCESS(ec); ++idx) { + const UHashElement* cur = uhash_nextElement(that.customFormatArgStarts, &pos); + uhash_iputi(customFormatArgStarts, cur->key.integer, cur->value.integer, &ec); } } - // Appends the rest of the pattern characters after the real last offset. - appendTo.append(fPattern, lastOffset, 0x7fffffff); - return appendTo; } -// ------------------------------------- -// Parses the source pattern and returns the Formattable objects array, -// the array count and the ending parse position. The caller of this method -// owns the array. - Formattable* -MessageFormat::parse(const UnicodeString& source, +MessageFormat::parse(int32_t msgStart, + const UnicodeString& source, ParsePosition& pos, - int32_t& count) const -{ - // Allocate at least one element. Allocating an array of length - // zero causes problems on some platforms (e.g. Win32). - Formattable *resultArray = new Formattable[argTypeCount ? argTypeCount : 1]; - int32_t patternOffset = 0; + int32_t& count, + UErrorCode& ec) const { + count = 0; + if (U_FAILURE(ec)) { + pos.setErrorIndex(pos.getIndex()); + return NULL; + } + // parse() does not work with named arguments. + if (msgPattern.hasNamedArguments()) { + ec = U_ARGUMENT_TYPE_MISMATCH; + pos.setErrorIndex(pos.getIndex()); + return NULL; + } + LocalArray resultArray(new Formattable[argTypeCount ? argTypeCount : 1]); + const UnicodeString& msgString=msgPattern.getPatternString(); + int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); int32_t sourceOffset = pos.getIndex(); - ParsePosition tempPos(0); - count = 0; // {sfb} reset to zero - int32_t len; - // If resultArray could not be created, exit out. - // Avoid crossing initialization of variables above. - if (resultArray == NULL) { - goto PARSE_ERROR; - } - for (int32_t i = 0; i < subformatCount; ++i) { - // match up to format - len = subformats[i].offset - patternOffset; - if (len == 0 || - fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) { + ParsePosition tempStatus(0); + + for(int32_t i=msgStart+1; ; ++i) { + UBool haveArgResult = FALSE; + const MessagePattern::Part* part=&msgPattern.getPart(i); + const UMessagePatternPartType type=part->getType(); + int32_t index=part->getIndex(); + // Make sure the literal string matches. + int32_t len = index - prevIndex; + if (len == 0 || (0 == msgString.compare(prevIndex, len, source, sourceOffset, len))) { sourceOffset += len; - patternOffset += len; - } - else { - goto PARSE_ERROR; + prevIndex += len; + } else { + pos.setErrorIndex(sourceOffset); + return NULL; // leave index as is to signal error + } + if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { + // Things went well! Done. + pos.setIndex(sourceOffset); + return resultArray.orphan(); + } + if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR) { + prevIndex=part->getLimit(); + continue; } - - // now use format - Format* fmt = subformats[i].format; - int32_t argNum = subformats[i].argNum; - if (fmt == NULL) { // string format + // We do not support parsing Plural formats. (No REPLACE_NUMBER here.) + // Unexpected Part "part" in parsed message. + U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_START); + int32_t argLimit=msgPattern.getLimitPartIndex(i); + + UMessagePatternArgType argType=part->getArgType(); + part=&msgPattern.getPart(++i); + int32_t argNumber = part->getValue(); // ARG_NUMBER + UnicodeString key; + ++i; + const Format* formatter = NULL; + Formattable& argResult = resultArray[argNumber]; + + if(cachedFormatters!=NULL && (formatter = getCachedFormatter(i - 2))!=NULL) { + // Just parse using the formatter. + tempStatus.setIndex(sourceOffset); + formatter->parseObject(source, argResult, tempStatus); + if (tempStatus.getIndex() == sourceOffset) { + pos.setErrorIndex(sourceOffset); + return NULL; // leave index as is to signal error + } + sourceOffset = tempStatus.getIndex(); + haveArgResult = TRUE; + } else if( + argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) { + // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table. + // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check + // for the hash table containind DummyFormat. + + // Match as a string. // if at end, use longest possible match // otherwise uses first match to intervening string // does NOT recursively try all possibilities - int32_t tempLength = (i+1= tempLength) { + if (!stringAfterArgument.isEmpty()) { + next = source.indexOf(stringAfterArgument, sourceOffset); + } else { next = source.length(); } - else { - UnicodeString buffer; - fPattern.extract(patternOffset,tempLength - patternOffset, buffer); - next = source.indexOf(buffer, sourceOffset); - } - if (next < 0) { - goto PARSE_ERROR; - } - else { - UnicodeString buffer; - source.extract(sourceOffset,next - sourceOffset, buffer); - UnicodeString strValue = buffer; - UnicodeString temp(LEFT_CURLY_BRACE); - // {sfb} check this later - if (isArgNumeric) { - itos(argNum, temp); - } - else { - temp+=(*subformats[i].argName); - } - temp += RIGHT_CURLY_BRACE; - if (strValue != temp) { - source.extract(sourceOffset,next - sourceOffset, buffer); - resultArray[argNum].setString(buffer); - // {sfb} not sure about this - if ((argNum + 1) > count) { - count = argNum + 1; - } + pos.setErrorIndex(sourceOffset); + return NULL; // leave index as is to signal error + } else { + UnicodeString strValue(source.tempSubString(sourceOffset, next - sourceOffset)); + UnicodeString compValue; + compValue.append(LEFT_CURLY_BRACE); + itos(argNumber, compValue); + compValue.append(RIGHT_CURLY_BRACE); + if (0 != strValue.compare(compValue)) { + argResult.setString(strValue); + haveArgResult = TRUE; } sourceOffset = next; } - } - else { - tempPos.setIndex(sourceOffset); - fmt->parseObject(source, resultArray[argNum], tempPos); - if (tempPos.getIndex() == sourceOffset) { - goto PARSE_ERROR; - } - - if ((argNum + 1) > count) { - count = argNum + 1; + } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { + tempStatus.setIndex(sourceOffset); + double choiceResult = ChoiceFormat::parseArgument(msgPattern, i, source, tempStatus); + if (tempStatus.getIndex() == sourceOffset) { + pos.setErrorIndex(sourceOffset); + return NULL; // leave index as is to signal error } - sourceOffset = tempPos.getIndex(); // update + argResult.setDouble(choiceResult); + haveArgResult = TRUE; + sourceOffset = tempStatus.getIndex(); + } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) || argType==UMSGPAT_ARG_TYPE_SELECT) { + // Parsing not supported. + ec = U_UNSUPPORTED_ERROR; + return NULL; + } else { + // This should never happen. + ec = U_INTERNAL_PROGRAM_ERROR; + return NULL; } + if (haveArgResult && count <= argNumber) { + count = argNumber + 1; + } + prevIndex=msgPattern.getPart(argLimit).getLimit(); + i=argLimit; } - len = fPattern.length() - patternOffset; - if (len == 0 || - fPattern.compare(patternOffset, len, source, sourceOffset, len) == 0) { - pos.setIndex(sourceOffset + len); - return resultArray; - } - // else fall through... +} +// ------------------------------------- +// Parses the source pattern and returns the Formattable objects array, +// the array count and the ending parse position. The caller of this method +// owns the array. - PARSE_ERROR: - pos.setErrorIndex(sourceOffset); - delete [] resultArray; - count = 0; - return NULL; // leave index as is to signal error +Formattable* +MessageFormat::parse(const UnicodeString& source, + ParsePosition& pos, + int32_t& count) const { + UErrorCode ec = U_ZERO_ERROR; + return parse(0, source, pos, count, ec); } - + // ------------------------------------- -// Parses the source string and returns the array of -// Formattable objects and the array count. The caller +// Parses the source string and returns the array of +// Formattable objects and the array count. The caller // owns the returned array. - + Formattable* -MessageFormat::parse(const UnicodeString& source, +MessageFormat::parse(const UnicodeString& source, int32_t& cnt, UErrorCode& success) const { - if (!isArgNumeric ) { + if (msgPattern.hasNamedArguments()) { success = U_ARGUMENT_TYPE_MISMATCH; - return NULL; + return NULL; } ParsePosition status(0); // Calls the actual implementation method and starts @@ -1424,10 +1532,10 @@ MessageFormat::parse(const UnicodeString& source, } return result; } - + // ------------------------------------- // Parses the source text and copy into the result buffer. - + void MessageFormat::parseObject( const UnicodeString& source, Formattable& result, @@ -1435,29 +1543,29 @@ MessageFormat::parseObject( const UnicodeString& source, { int32_t cnt = 0; Formattable* tmpResult = parse(source, status, cnt); - if (tmpResult != NULL) + if (tmpResult != NULL) result.adoptArray(tmpResult, cnt); } - -UnicodeString + +UnicodeString MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) { - UnicodeString result; - if (U_SUCCESS(status)) { - int32_t plen = pattern.length(); - const UChar* pat = pattern.getBuffer(); - int32_t blen = plen * 2 + 1; // space for null termination, convenience - UChar* buf = result.getBuffer(blen); - if (buf == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } else { - int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status); - result.releaseBuffer(U_SUCCESS(status) ? len : 0); + UnicodeString result; + if (U_SUCCESS(status)) { + int32_t plen = pattern.length(); + const UChar* pat = pattern.getBuffer(); + int32_t blen = plen * 2 + 1; // space for null termination, convenience + UChar* buf = result.getBuffer(blen); + if (buf == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status); + result.releaseBuffer(U_SUCCESS(status) ? len : 0); + } } - } - if (U_FAILURE(status)) { - result.setToBogus(); - } - return result; + if (U_FAILURE(status)) { + result.setToBogus(); + } + return result; } // ------------------------------------- @@ -1472,65 +1580,119 @@ static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const Unicode } return fmt; } - -/** - * Reads the segments[] array (see applyPattern()) and parses the - * segments[1..3] into a Format* object. Stores the format object in - * the subformats[] array. Updates the argTypes[] array type - * information for the corresponding argument. - * - * @param formatNumber index into subformats[] for this format - * @param segments array of strings with the parsed pattern segments - * @param parseError parse error data (output param) - * @param ec error code - */ -void -MessageFormat::makeFormat(int32_t formatNumber, - UnicodeString* segments, - UParseError& parseError, - UErrorCode& ec) { - if (U_FAILURE(ec)) { + +void MessageFormat::cacheExplicitFormats(UErrorCode& status) { + if (U_FAILURE(status)) { return; } - // Parse the argument number - int32_t argumentNumber = stou(segments[1]); // always unlocalized! - UnicodeString argumentName; - if (argumentNumber < 0) { - if ( (isArgNumeric==TRUE) && (formatNumber !=0) ) { - ec = U_INVALID_FORMAT_ERROR; - return; - } - isArgNumeric = FALSE; - argumentNumber=formatNumber; + if (cachedFormatters != NULL) { + uhash_removeAll(cachedFormatters); } - if (!isArgNumeric) { - if ( !isLegalArgName(segments[1]) ) { - ec = U_INVALID_FORMAT_ERROR; - return; + if (customFormatArgStarts != NULL) { + uhash_removeAll(customFormatArgStarts); + } + + // The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT + // which we need not examine. + int32_t limit = msgPattern.countParts() - 2; + argTypeCount = 0; + // We also need not look at the first two "parts" + // (at most MSG_START and ARG_START) in this loop. + // We determine the argTypeCount first so that we can allocateArgTypes + // so that the next loop can set argTypes[argNumber]. + // (This is for the C API which needs the argTypes to read its va_arg list.) + for (int32_t i = 2; i < limit && U_SUCCESS(status); ++i) { + const MessagePattern::Part& part = msgPattern.getPart(i); + if (part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { + const int argNumber = part.getValue(); + if (argNumber >= argTypeCount) { + argTypeCount = argNumber + 1; + } } - argumentName = segments[1]; } + if (!allocateArgTypes(argTypeCount, status)) { + return; + } + // Set all argTypes to kObject, as a "none" value, for lack of any better value. + // We never use kObject for real arguments. + // We use it as "no argument yet" for the check for hasArgTypeConflicts. + for (int32_t i = 0; i < argTypeCount; ++i) { + argTypes[i] = Formattable::kObject; + } + hasArgTypeConflicts = FALSE; - // Parse the format, recording the argument type and creating a - // new Format object (except for string arguments). - Formattable::Type argType; - Format *fmt = NULL; - int32_t typeID, styleID; - DateFormat::EStyle style; - UnicodeString unquotedPattern, quotedPattern; - UBool inQuote = FALSE; + // This loop starts at part index 1 because we do need to examine + // ARG_START parts. (But we can ignore the MSG_START.) + for (int32_t i = 1; i < limit && U_SUCCESS(status); ++i) { + const MessagePattern::Part* part = &msgPattern.getPart(i); + if (part->getType() != UMSGPAT_PART_TYPE_ARG_START) { + continue; + } + UMessagePatternArgType argType = part->getArgType(); - switch (typeID = findKeyword(segments[2], TYPE_IDS)) { + int32_t argNumber = -1; + part = &msgPattern.getPart(i + 1); + if (part->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { + argNumber = part->getValue(); + } + Formattable::Type formattableType; - case 0: // string - argType = Formattable::kString; - break; + switch (argType) { + case UMSGPAT_ARG_TYPE_NONE: + formattableType = Formattable::kString; + break; + case UMSGPAT_ARG_TYPE_SIMPLE: { + int32_t index = i; + i += 2; + UnicodeString explicitType = msgPattern.getSubstring(msgPattern.getPart(i++)); + UnicodeString style; + if ((part = &msgPattern.getPart(i))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE) { + style = msgPattern.getSubstring(*part); + ++i; + } + UParseError parseError; + Format* formatter = createAppropriateFormat(explicitType, style, formattableType, parseError, status); + setArgStartFormat(index, formatter, status); + break; + } + case UMSGPAT_ARG_TYPE_CHOICE: + case UMSGPAT_ARG_TYPE_PLURAL: + case UMSGPAT_ARG_TYPE_SELECTORDINAL: + formattableType = Formattable::kDouble; + break; + case UMSGPAT_ARG_TYPE_SELECT: + formattableType = Formattable::kString; + break; + default: + status = U_INTERNAL_PROGRAM_ERROR; // Should be unreachable. + formattableType = Formattable::kString; + break; + } + if (argNumber != -1) { + if (argTypes[argNumber] != Formattable::kObject && argTypes[argNumber] != formattableType) { + hasArgTypeConflicts = TRUE; + } + argTypes[argNumber] = formattableType; + } + } +} - case 1: // number - argType = Formattable::kDouble; +Format* MessageFormat::createAppropriateFormat(UnicodeString& type, UnicodeString& style, + Formattable::Type& formattableType, UParseError& parseError, + UErrorCode& ec) { + if (U_FAILURE(ec)) { + return NULL; + } + Format* fmt = NULL; + int32_t typeID, styleID; + DateFormat::EStyle date_style; + int32_t firstNonSpace; - switch (findKeyword(segments[3], NUMBER_STYLE_IDS)) { + switch (typeID = findKeyword(type, TYPE_IDS)) { + case 0: // number + formattableType = Formattable::kDouble; + switch (findKeyword(style, NUMBER_STYLE_IDS)) { case 0: // default fmt = NumberFormat::createInstance(fLocale, ec); break; @@ -1541,129 +1703,94 @@ MessageFormat::makeFormat(int32_t formatNumber, fmt = NumberFormat::createPercentInstance(fLocale, ec); break; case 3: // integer - argType = Formattable::kLong; + formattableType = Formattable::kLong; fmt = createIntegerFormat(fLocale, ec); break; - default: // pattern - fmt = NumberFormat::createInstance(fLocale, ec); - if (fmt && - fmt->getDynamicClassID() == DecimalFormat::getStaticClassID()) { - ((DecimalFormat*)fmt)->applyPattern(segments[3],parseError,ec); + default: // pattern or skeleton + firstNonSpace = PatternProps::skipWhiteSpace(style, 0); + if (style.compare(firstNonSpace, 2, u"::", 0, 2) == 0) { + // Skeleton + UnicodeString skeleton = style.tempSubString(firstNonSpace + 2); + fmt = number::NumberFormatter::forSkeleton(skeleton, ec).locale(fLocale).toFormat(ec); + } else { + // Pattern + fmt = NumberFormat::createInstance(fLocale, ec); + if (fmt) { + auto* decfmt = dynamic_cast(fmt); + if (decfmt != nullptr) { + decfmt->applyPattern(style, parseError, ec); + } + } } break; } break; - case 2: // date - case 3: // time - argType = Formattable::kDate; - styleID = findKeyword(segments[3], DATE_STYLE_IDS); - style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault; - - if (typeID == 2) { - fmt = DateFormat::createDateInstance(style, fLocale); + case 1: // date + case 2: // time + formattableType = Formattable::kDate; + firstNonSpace = PatternProps::skipWhiteSpace(style, 0); + if (style.compare(firstNonSpace, 2, u"::", 0, 2) == 0) { + // Skeleton + UnicodeString skeleton = style.tempSubString(firstNonSpace + 2); + fmt = DateFormat::createInstanceForSkeleton(skeleton, fLocale, ec); } else { - fmt = DateFormat::createTimeInstance(style, fLocale); - } - - if (styleID < 0 && - fmt != NULL && - fmt->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) { - ((SimpleDateFormat*)fmt)->applyPattern(segments[3]); - } - break; + // Pattern + styleID = findKeyword(style, DATE_STYLE_IDS); + date_style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault; - case 4: // choice - argType = Formattable::kDouble; + if (typeID == 1) { + fmt = DateFormat::createDateInstance(date_style, fLocale); + } else { + fmt = DateFormat::createTimeInstance(date_style, fLocale); + } - fmt = new ChoiceFormat(segments[3], parseError, ec); + if (styleID < 0 && fmt != NULL) { + SimpleDateFormat* sdtfmt = dynamic_cast(fmt); + if (sdtfmt != NULL) { + sdtfmt->applyPattern(style); + } + } + } break; - case 5: // spellout - argType = Formattable::kDouble; - fmt = makeRBNF(URBNF_SPELLOUT, fLocale, segments[3], ec); - break; - case 6: // ordinal - argType = Formattable::kDouble; - fmt = makeRBNF(URBNF_ORDINAL, fLocale, segments[3], ec); + case 3: // spellout + formattableType = Formattable::kDouble; + fmt = makeRBNF(URBNF_SPELLOUT, fLocale, style, ec); break; - case 7: // duration - argType = Formattable::kDouble; - fmt = makeRBNF(URBNF_DURATION, fLocale, segments[3], ec); + case 4: // ordinal + formattableType = Formattable::kDouble; + fmt = makeRBNF(URBNF_ORDINAL, fLocale, style, ec); break; - case 8: // plural - argType = Formattable::kDouble; - quotedPattern = segments[3]; - for (int32_t i = 0; i < quotedPattern.length(); ++i) { - UChar ch = quotedPattern.charAt(i); - if (ch == SINGLE_QUOTE) { - if (i+1 < quotedPattern.length() && quotedPattern.charAt(i+1)==SINGLE_QUOTE) { - unquotedPattern+=ch; - ++i; - } - else { - inQuote = !inQuote; - } - } - else { - unquotedPattern += ch; - } - } - fmt = new PluralFormat(fLocale, unquotedPattern, ec); + case 5: // duration + formattableType = Formattable::kDouble; + fmt = makeRBNF(URBNF_DURATION, fLocale, style, ec); break; default: - argType = Formattable::kString; + formattableType = Formattable::kString; ec = U_ILLEGAL_ARGUMENT_ERROR; break; } - if (fmt==NULL && argType!=Formattable::kString && U_SUCCESS(ec)) { - ec = U_MEMORY_ALLOCATION_ERROR; - } - - if (!allocateSubformats(formatNumber+1) || - !allocateArgTypes(argumentNumber+1)) { - ec = U_MEMORY_ALLOCATION_ERROR; - } + return fmt; +} - if (U_FAILURE(ec)) { - delete fmt; - return; - } - // Parse succeeded; record results in our arrays - subformats[formatNumber].format = fmt; - subformats[formatNumber].offset = segments[0].length(); - if (isArgNumeric) { - subformats[formatNumber].argName = NULL; - subformats[formatNumber].argNum = argumentNumber; - } - else { - subformats[formatNumber].argName = new UnicodeString(argumentName); - subformats[formatNumber].argNum = -1; - } - subformatCount = formatNumber+1; - - // Careful here: argumentNumber may in general arrive out of - // sequence, e.g., "There was {2} on {0,date} (see {1,number})." - argTypes[argumentNumber] = argType; - if (argumentNumber+1 > argTypeCount) { - argTypeCount = argumentNumber+1; - } -} - -// ------------------------------------- -// Finds the string, s, in the string array, list. -int32_t MessageFormat::findKeyword(const UnicodeString& s, +//------------------------------------- +// Finds the string, s, in the string array, list. +int32_t MessageFormat::findKeyword(const UnicodeString& s, const UChar * const *list) { - if (s.length() == 0) + if (s.isEmpty()) { return 0; // default + } - UnicodeString buffer = s; + int32_t length = s.length(); + const UChar *ps = PatternProps::trimWhiteSpace(s.getBuffer(), length); + UnicodeString buffer(FALSE, ps, length); // Trims the space characters and turns all characters // in s to lower case. - buffer.trim().toLower(""); + buffer.toLower(""); for (int32_t i = 0; list[i]; ++i) { if (!buffer.compare(list[i], u_strlen(list[i]))) { return i; @@ -1671,57 +1798,15 @@ int32_t MessageFormat::findKeyword(const UnicodeString& s, } return -1; } - -// ------------------------------------- -// Checks the range of the source text to quote the special -// characters, { and ' and copy to target buffer. - -void -MessageFormat::copyAndFixQuotes(const UnicodeString& source, - int32_t start, - int32_t end, - UnicodeString& appendTo) -{ - UBool gotLB = FALSE; - - for (int32_t i = start; i < end; ++i) { - UChar ch = source[i]; - if (ch == LEFT_CURLY_BRACE) { - appendTo += SINGLE_QUOTE; - appendTo += LEFT_CURLY_BRACE; - appendTo += SINGLE_QUOTE; - gotLB = TRUE; - } - else if (ch == RIGHT_CURLY_BRACE) { - if(gotLB) { - appendTo += RIGHT_CURLY_BRACE; - gotLB = FALSE; - } - else { - // orig code. - appendTo += SINGLE_QUOTE; - appendTo += RIGHT_CURLY_BRACE; - appendTo += SINGLE_QUOTE; - } - } - else if (ch == SINGLE_QUOTE) { - appendTo += SINGLE_QUOTE; - appendTo += SINGLE_QUOTE; - } - else { - appendTo += ch; - } - } -} /** * Convenience method that ought to be in NumberFormat */ -NumberFormat* +NumberFormat* MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const { NumberFormat *temp = NumberFormat::createInstance(locale, status); - if (temp != NULL && temp->getDynamicClassID() == DecimalFormat::getStaticClassID()) { - DecimalFormat *temp2 = (DecimalFormat*) temp; + DecimalFormat *temp2; + if (temp != NULL && (temp2 = dynamic_cast(temp)) != NULL) { temp2->setMaximumFractionDigits(0); temp2->setDecimalSeparatorAlwaysShown(FALSE); temp2->setParseIntegerOnly(TRUE); @@ -1741,7 +1826,7 @@ const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const if (defaultNumberFormat == NULL) { MessageFormat* t = (MessageFormat*) this; t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec); - if (U_FAILURE(ec)) { + if (U_FAILURE(ec)) { delete t->defaultNumberFormat; t->defaultNumberFormat = NULL; } else if (t->defaultNumberFormat == NULL) { @@ -1771,22 +1856,62 @@ const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const { UBool MessageFormat::usesNamedArguments() const { - return !isArgNumeric; + return msgPattern.hasNamedArguments(); } -UBool -MessageFormat::isLegalArgName(const UnicodeString& argName) const { - if(!u_hasBinaryProperty(argName.charAt(0), idStart)) { - return FALSE; +int32_t +MessageFormat::getArgTypeCount() const { + return argTypeCount; +} + +UBool MessageFormat::equalFormats(const void* left, const void* right) { + return *(const Format*)left==*(const Format*)right; +} + + +UBool MessageFormat::DummyFormat::operator==(const Format&) const { + return TRUE; +} + +Format* MessageFormat::DummyFormat::clone() const { + return new DummyFormat(); +} + +UnicodeString& MessageFormat::DummyFormat::format(const Formattable&, + UnicodeString& appendTo, + UErrorCode& status) const { + if (U_SUCCESS(status)) { + status = U_UNSUPPORTED_ERROR; } - for (int32_t i=1; isize(); + return (fFormatNames==NULL) ? 0 : fFormatNames->size(); } FormatNameEnumeration::~FormatNameEnumeration() { - UnicodeString *s; - for (int32_t i=0; isize(); ++i) { - if ((s=(UnicodeString *)fFormatNames->elementAt(i))!=NULL) { - delete s; + delete fFormatNames; +} + +MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const MessageFormat &mf, UPluralType t) + : msgFormat(mf), rules(NULL), type(t) { +} + +MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() { + delete rules; +} + +UnicodeString MessageFormat::PluralSelectorProvider::select(void *ctx, double number, + UErrorCode& ec) const { + if (U_FAILURE(ec)) { + return UnicodeString(FALSE, OTHER_STRING, 5); + } + MessageFormat::PluralSelectorProvider* t = const_cast(this); + if(rules == NULL) { + t->rules = PluralRules::forLocale(msgFormat.fLocale, type, ec); + if (U_FAILURE(ec)) { + return UnicodeString(FALSE, OTHER_STRING, 5); } } - delete fFormatNames; + // Select a sub-message according to how the number is formatted, + // which is specified in the selected sub-message. + // We avoid this circle by looking at how + // the number is formatted in the "other" sub-message + // which must always be present and usually contains the number. + // Message authors should be consistent across sub-messages. + PluralSelectorContext &context = *static_cast(ctx); + int32_t otherIndex = msgFormat.findOtherSubMessage(context.startIndex); + context.numberArgIndex = msgFormat.findFirstPluralNumberArg(otherIndex, context.argName); + if(context.numberArgIndex > 0 && msgFormat.cachedFormatters != NULL) { + context.formatter = + (const Format*)uhash_iget(msgFormat.cachedFormatters, context.numberArgIndex); + } + if(context.formatter == NULL) { + context.formatter = msgFormat.getDefaultNumberFormat(ec); + context.forReplaceNumber = TRUE; + } + if (context.number.getDouble(ec) != number) { + ec = U_INTERNAL_PROGRAM_ERROR; + return UnicodeString(FALSE, OTHER_STRING, 5); + } + context.formatter->format(context.number, context.numberString, ec); + auto* decFmt = dynamic_cast(context.formatter); + if(decFmt != NULL) { + number::impl::DecimalQuantity dq; + decFmt->formatToDecimalQuantity(context.number, dq, ec); + if (U_FAILURE(ec)) { + return UnicodeString(FALSE, OTHER_STRING, 5); + } + return rules->select(dq); + } else { + return rules->select(number); + } } + +void MessageFormat::PluralSelectorProvider::reset() { + delete rules; + rules = NULL; +} + + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */