1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************
11 * Modification History:
13 * Date Name Description
14 * 02/19/97 aliu Converted from java.
15 * 03/20/97 helena Finished first cut of implementation.
16 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
17 * 06/11/97 helena Fixed addPattern to take the pattern correctly.
18 * 06/17/97 helena Fixed the getPattern to return the correct pattern.
19 * 07/09/97 helena Made ParsePosition into a class.
20 * 02/22/99 stephen Removed character literals for EBCDIC safety
21 * 11/01/09 kirtig Added SelectFormat
22 ********************************************************************/
24 #include "unicode/utypes.h"
26 #if !UCONFIG_NO_FORMATTING
28 #include "unicode/appendable.h"
29 #include "unicode/choicfmt.h"
30 #include "unicode/datefmt.h"
31 #include "unicode/decimfmt.h"
32 #include "unicode/localpointer.h"
33 #include "unicode/msgfmt.h"
34 #include "unicode/numberformatter.h"
35 #include "unicode/plurfmt.h"
36 #include "unicode/rbnf.h"
37 #include "unicode/selfmt.h"
38 #include "unicode/smpdtfmt.h"
39 #include "unicode/umsg.h"
40 #include "unicode/ustring.h"
42 #include "patternprops.h"
43 #include "messageimpl.h"
44 #include "msgfmt_impl.h"
45 #include "plurrule_impl.h"
52 #include "number_decimalquantity.h"
54 // *****************************************************************************
55 // class MessageFormat
56 // *****************************************************************************
58 #define SINGLE_QUOTE ((UChar)0x0027)
59 #define COMMA ((UChar)0x002C)
60 #define LEFT_CURLY_BRACE ((UChar)0x007B)
61 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
63 //---------------------------------------
66 static const UChar ID_NUMBER
[] = {
67 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
69 static const UChar ID_DATE
[] = {
70 0x64, 0x61, 0x74, 0x65, 0 /* "date" */
72 static const UChar ID_TIME
[] = {
73 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
75 static const UChar ID_SPELLOUT
[] = {
76 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
78 static const UChar ID_ORDINAL
[] = {
79 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
81 static const UChar ID_DURATION
[] = {
82 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
85 // MessageFormat Type List Number, Date, Time or Choice
86 static const UChar
* const TYPE_IDS
[] = {
96 static const UChar ID_EMPTY
[] = {
97 0 /* empty string, used for default so that null can mark end of list */
99 static const UChar ID_CURRENCY
[] = {
100 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
102 static const UChar ID_PERCENT
[] = {
103 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
105 static const UChar ID_INTEGER
[] = {
106 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
109 // NumberFormat modifier list, default, currency, percent or integer
110 static const UChar
* const NUMBER_STYLE_IDS
[] = {
118 static const UChar ID_SHORT
[] = {
119 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
121 static const UChar ID_MEDIUM
[] = {
122 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
124 static const UChar ID_LONG
[] = {
125 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
127 static const UChar ID_FULL
[] = {
128 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
131 // DateFormat modifier list, default, short, medium, long or full
132 static const UChar
* const DATE_STYLE_IDS
[] = {
141 static const icu::DateFormat::EStyle DATE_STYLES
[] = {
142 icu::DateFormat::kDefault
,
143 icu::DateFormat::kShort
,
144 icu::DateFormat::kMedium
,
145 icu::DateFormat::kLong
,
146 icu::DateFormat::kFull
,
149 static const int32_t DEFAULT_INITIAL_CAPACITY
= 10;
151 static const UChar NULL_STRING
[] = {
152 0x6E, 0x75, 0x6C, 0x6C, 0 // "null"
155 static const UChar OTHER_STRING
[] = {
156 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
160 static UBool U_CALLCONV
equalFormatsForHash(const UHashTok key1
,
161 const UHashTok key2
) {
162 return icu::MessageFormat::equalFormats(key1
.pointer
, key2
.pointer
);
169 // -------------------------------------
170 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat
)
171 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration
)
173 //--------------------------------------------------------------------
176 * Convert an integer value to a string and append the result to
177 * the given UnicodeString.
179 static UnicodeString
& itos(int32_t i
, UnicodeString
& appendTo
) {
181 uprv_itou(temp
,16,i
,10,0); // 10 == radix
182 appendTo
.append(temp
, -1);
187 // AppendableWrapper: encapsulates the result of formatting, keeping track
188 // of the string and its length.
189 class AppendableWrapper
: public UMemory
{
191 AppendableWrapper(Appendable
& appendable
) : app(appendable
), len(0) {
193 void append(const UnicodeString
& s
) {
194 app
.appendString(s
.getBuffer(), s
.length());
197 void append(const UChar
* s
, const int32_t sLength
) {
198 app
.appendString(s
, sLength
);
201 void append(const UnicodeString
& s
, int32_t start
, int32_t length
) {
202 append(s
.tempSubString(start
, length
));
204 void formatAndAppend(const Format
* formatter
, const Formattable
& arg
, UErrorCode
& ec
) {
206 formatter
->format(arg
, s
, ec
);
211 void formatAndAppend(const Format
* formatter
, const Formattable
& arg
,
212 const UnicodeString
&argString
, UErrorCode
& ec
) {
213 if (!argString
.isEmpty()) {
218 formatAndAppend(formatter
, arg
, ec
);
230 // -------------------------------------
231 // Creates a MessageFormat instance based on the pattern.
233 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
235 : fLocale(Locale::getDefault()), // Uses the default locale
238 formatAliasesCapacity(0),
242 hasArgTypeConflicts(FALSE
),
243 defaultNumberFormat(NULL
),
244 defaultDateFormat(NULL
),
245 cachedFormatters(NULL
),
246 customFormatArgStarts(NULL
),
247 pluralProvider(*this, UPLURAL_TYPE_CARDINAL
),
248 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL
)
250 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
251 applyPattern(pattern
, success
);
254 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
255 const Locale
& newLocale
,
257 : fLocale(newLocale
),
260 formatAliasesCapacity(0),
264 hasArgTypeConflicts(FALSE
),
265 defaultNumberFormat(NULL
),
266 defaultDateFormat(NULL
),
267 cachedFormatters(NULL
),
268 customFormatArgStarts(NULL
),
269 pluralProvider(*this, UPLURAL_TYPE_CARDINAL
),
270 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL
)
272 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
273 applyPattern(pattern
, success
);
276 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
277 const Locale
& newLocale
,
278 UParseError
& parseError
,
280 : fLocale(newLocale
),
283 formatAliasesCapacity(0),
287 hasArgTypeConflicts(FALSE
),
288 defaultNumberFormat(NULL
),
289 defaultDateFormat(NULL
),
290 cachedFormatters(NULL
),
291 customFormatArgStarts(NULL
),
292 pluralProvider(*this, UPLURAL_TYPE_CARDINAL
),
293 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL
)
295 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
296 applyPattern(pattern
, parseError
, success
);
299 MessageFormat::MessageFormat(const MessageFormat
& that
)
302 fLocale(that
.fLocale
),
303 msgPattern(that
.msgPattern
),
305 formatAliasesCapacity(0),
309 hasArgTypeConflicts(that
.hasArgTypeConflicts
),
310 defaultNumberFormat(NULL
),
311 defaultDateFormat(NULL
),
312 cachedFormatters(NULL
),
313 customFormatArgStarts(NULL
),
314 pluralProvider(*this, UPLURAL_TYPE_CARDINAL
),
315 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL
)
317 // This will take care of creating the hash tables (since they are NULL).
318 UErrorCode ec
= U_ZERO_ERROR
;
319 copyObjects(that
, ec
);
325 MessageFormat::~MessageFormat()
327 uhash_close(cachedFormatters
);
328 uhash_close(customFormatArgStarts
);
331 uprv_free(formatAliases
);
332 delete defaultNumberFormat
;
333 delete defaultDateFormat
;
336 //--------------------------------------------------------------------
337 // Variable-size array management
340 * Allocate argTypes[] to at least the given capacity and return
341 * TRUE if successful. If not, leave argTypes[] unchanged.
343 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it
344 * if necessary to be at least as large as specified.
346 UBool
MessageFormat::allocateArgTypes(int32_t capacity
, UErrorCode
& status
) {
347 if (U_FAILURE(status
)) {
350 if (argTypeCapacity
>= capacity
) {
353 if (capacity
< DEFAULT_INITIAL_CAPACITY
) {
354 capacity
= DEFAULT_INITIAL_CAPACITY
;
355 } else if (capacity
< 2*argTypeCapacity
) {
356 capacity
= 2*argTypeCapacity
;
358 Formattable::Type
* a
= (Formattable::Type
*)
359 uprv_realloc(argTypes
, sizeof(*argTypes
) * capacity
);
361 status
= U_MEMORY_ALLOCATION_ERROR
;
365 argTypeCapacity
= capacity
;
369 // -------------------------------------
370 // assignment operator
373 MessageFormat::operator=(const MessageFormat
& that
)
376 // Calls the super class for assignment first.
377 Format::operator=(that
);
379 setLocale(that
.fLocale
);
380 msgPattern
= that
.msgPattern
;
381 hasArgTypeConflicts
= that
.hasArgTypeConflicts
;
383 UErrorCode ec
= U_ZERO_ERROR
;
384 copyObjects(that
, ec
);
393 MessageFormat::operator==(const Format
& rhs
) const
395 if (this == &rhs
) return TRUE
;
397 MessageFormat
& that
= (MessageFormat
&)rhs
;
399 // Check class ID before checking MessageFormat members
400 if (!Format::operator==(rhs
) ||
401 msgPattern
!= that
.msgPattern
||
402 fLocale
!= that
.fLocale
) {
406 // Compare hashtables.
407 if ((customFormatArgStarts
== NULL
) != (that
.customFormatArgStarts
== NULL
)) {
410 if (customFormatArgStarts
== NULL
) {
414 UErrorCode ec
= U_ZERO_ERROR
;
415 const int32_t count
= uhash_count(customFormatArgStarts
);
416 const int32_t rhs_count
= uhash_count(that
.customFormatArgStarts
);
417 if (count
!= rhs_count
) {
420 int32_t idx
= 0, rhs_idx
= 0, pos
= UHASH_FIRST
, rhs_pos
= UHASH_FIRST
;
421 for (; idx
< count
&& rhs_idx
< rhs_count
&& U_SUCCESS(ec
); ++idx
, ++rhs_idx
) {
422 const UHashElement
* cur
= uhash_nextElement(customFormatArgStarts
, &pos
);
423 const UHashElement
* rhs_cur
= uhash_nextElement(that
.customFormatArgStarts
, &rhs_pos
);
424 if (cur
->key
.integer
!= rhs_cur
->key
.integer
) {
427 const Format
* format
= (const Format
*)uhash_iget(cachedFormatters
, cur
->key
.integer
);
428 const Format
* rhs_format
= (const Format
*)uhash_iget(that
.cachedFormatters
, rhs_cur
->key
.integer
);
429 if (*format
!= *rhs_format
) {
436 // -------------------------------------
437 // Creates a copy of this MessageFormat, the caller owns the copy.
440 MessageFormat::clone() const
442 return new MessageFormat(*this);
445 // -------------------------------------
446 // Sets the locale of this MessageFormat object to theLocale.
449 MessageFormat::setLocale(const Locale
& theLocale
)
451 if (fLocale
!= theLocale
) {
452 delete defaultNumberFormat
;
453 defaultNumberFormat
= NULL
;
454 delete defaultDateFormat
;
455 defaultDateFormat
= NULL
;
457 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
458 pluralProvider
.reset();
459 ordinalProvider
.reset();
463 // -------------------------------------
464 // Gets the locale of this MessageFormat object.
467 MessageFormat::getLocale() const
473 MessageFormat::applyPattern(const UnicodeString
& newPattern
,
476 UParseError parseError
;
477 applyPattern(newPattern
,parseError
,status
);
481 // -------------------------------------
482 // Applies the new pattern and returns an error if the pattern
485 MessageFormat::applyPattern(const UnicodeString
& pattern
,
486 UParseError
& parseError
,
492 msgPattern
.parse(pattern
, &parseError
, ec
);
493 cacheExplicitFormats(ec
);
500 void MessageFormat::resetPattern() {
502 uhash_close(cachedFormatters
);
503 cachedFormatters
= NULL
;
504 uhash_close(customFormatArgStarts
);
505 customFormatArgStarts
= NULL
;
507 hasArgTypeConflicts
= FALSE
;
511 MessageFormat::applyPattern(const UnicodeString
& pattern
,
512 UMessagePatternApostropheMode aposMode
,
513 UParseError
* parseError
,
514 UErrorCode
& status
) {
515 if (aposMode
!= msgPattern
.getApostropheMode()) {
516 msgPattern
.clearPatternAndSetApostropheMode(aposMode
);
518 applyPattern(pattern
, *parseError
, status
);
521 // -------------------------------------
522 // Converts this MessageFormat instance to a pattern.
525 MessageFormat::toPattern(UnicodeString
& appendTo
) const {
526 if ((customFormatArgStarts
!= NULL
&& 0 != uhash_count(customFormatArgStarts
)) ||
527 0 == msgPattern
.countParts()
529 appendTo
.setToBogus();
532 return appendTo
.append(msgPattern
.getPatternString());
535 int32_t MessageFormat::nextTopLevelArgStart(int32_t partIndex
) const {
536 if (partIndex
!= 0) {
537 partIndex
= msgPattern
.getLimitPartIndex(partIndex
);
540 UMessagePatternPartType type
= msgPattern
.getPartType(++partIndex
);
541 if (type
== UMSGPAT_PART_TYPE_ARG_START
) {
544 if (type
== UMSGPAT_PART_TYPE_MSG_LIMIT
) {
550 void MessageFormat::setArgStartFormat(int32_t argStart
,
552 UErrorCode
& status
) {
553 if (U_FAILURE(status
)) {
557 if (cachedFormatters
== NULL
) {
558 cachedFormatters
=uhash_open(uhash_hashLong
, uhash_compareLong
,
559 equalFormatsForHash
, &status
);
560 if (U_FAILURE(status
)) {
564 uhash_setValueDeleter(cachedFormatters
, uprv_deleteUObject
);
566 if (formatter
== NULL
) {
567 formatter
= new DummyFormat();
569 uhash_iput(cachedFormatters
, argStart
, formatter
, &status
);
573 UBool
MessageFormat::argNameMatches(int32_t partIndex
, const UnicodeString
& argName
, int32_t argNumber
) {
574 const MessagePattern::Part
& part
= msgPattern
.getPart(partIndex
);
575 return part
.getType() == UMSGPAT_PART_TYPE_ARG_NAME
?
576 msgPattern
.partSubstringMatches(part
, argName
) :
577 part
.getValue() == argNumber
; // ARG_NUMBER
580 // Sets a custom formatter for a MessagePattern ARG_START part index.
581 // "Custom" formatters are provided by the user via setFormat() or similar APIs.
582 void MessageFormat::setCustomArgStartFormat(int32_t argStart
,
584 UErrorCode
& status
) {
585 setArgStartFormat(argStart
, formatter
, status
);
586 if (customFormatArgStarts
== NULL
) {
587 customFormatArgStarts
=uhash_open(uhash_hashLong
, uhash_compareLong
,
590 uhash_iputi(customFormatArgStarts
, argStart
, 1, &status
);
593 Format
* MessageFormat::getCachedFormatter(int32_t argumentNumber
) const {
594 if (cachedFormatters
== NULL
) {
597 void* ptr
= uhash_iget(cachedFormatters
, argumentNumber
);
598 if (ptr
!= NULL
&& dynamic_cast<DummyFormat
*>((Format
*)ptr
) == NULL
) {
599 return (Format
*) ptr
;
601 // Not cached, or a DummyFormat representing setFormat(NULL).
606 // -------------------------------------
607 // Adopts the new formats array and updates the array count.
608 // This MessageFormat instance owns the new formats.
610 MessageFormat::adoptFormats(Format
** newFormats
,
612 if (newFormats
== NULL
|| count
< 0) {
615 // Throw away any cached formatters.
616 if (cachedFormatters
!= NULL
) {
617 uhash_removeAll(cachedFormatters
);
619 if (customFormatArgStarts
!= NULL
) {
620 uhash_removeAll(customFormatArgStarts
);
623 int32_t formatNumber
= 0;
624 UErrorCode status
= U_ZERO_ERROR
;
625 for (int32_t partIndex
= 0;
626 formatNumber
< count
&& U_SUCCESS(status
) &&
627 (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
628 setCustomArgStartFormat(partIndex
, newFormats
[formatNumber
], status
);
631 // Delete those that didn't get used (if any).
632 for (; formatNumber
< count
; ++formatNumber
) {
633 delete newFormats
[formatNumber
];
638 // -------------------------------------
639 // Sets the new formats array and updates the array count.
640 // This MessageFormat instance maks a copy of the new formats.
643 MessageFormat::setFormats(const Format
** newFormats
,
645 if (newFormats
== NULL
|| count
< 0) {
648 // Throw away any cached formatters.
649 if (cachedFormatters
!= NULL
) {
650 uhash_removeAll(cachedFormatters
);
652 if (customFormatArgStarts
!= NULL
) {
653 uhash_removeAll(customFormatArgStarts
);
656 UErrorCode status
= U_ZERO_ERROR
;
657 int32_t formatNumber
= 0;
658 for (int32_t partIndex
= 0;
659 formatNumber
< count
&& U_SUCCESS(status
) && (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
660 Format
* newFormat
= NULL
;
661 if (newFormats
[formatNumber
] != NULL
) {
662 newFormat
= newFormats
[formatNumber
]->clone();
663 if (newFormat
== NULL
) {
664 status
= U_MEMORY_ALLOCATION_ERROR
;
667 setCustomArgStartFormat(partIndex
, newFormat
, status
);
670 if (U_FAILURE(status
)) {
675 // -------------------------------------
676 // Adopt a single format by format number.
677 // Do nothing if the format number is not less than the array count.
680 MessageFormat::adoptFormat(int32_t n
, Format
*newFormat
) {
681 LocalPointer
<Format
> p(newFormat
);
683 int32_t formatNumber
= 0;
684 for (int32_t partIndex
= 0; (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
685 if (n
== formatNumber
) {
686 UErrorCode status
= U_ZERO_ERROR
;
687 setCustomArgStartFormat(partIndex
, p
.orphan(), status
);
695 // -------------------------------------
696 // Adopt a single format by format name.
697 // Do nothing if there is no match of formatName.
699 MessageFormat::adoptFormat(const UnicodeString
& formatName
,
700 Format
* formatToAdopt
,
701 UErrorCode
& status
) {
702 LocalPointer
<Format
> p(formatToAdopt
);
703 if (U_FAILURE(status
)) {
706 int32_t argNumber
= MessagePattern::validateArgumentName(formatName
);
707 if (argNumber
< UMSGPAT_ARG_NAME_NOT_NUMBER
) {
708 status
= U_ILLEGAL_ARGUMENT_ERROR
;
711 for (int32_t partIndex
= 0;
712 (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0 && U_SUCCESS(status
);
714 if (argNameMatches(partIndex
+ 1, formatName
, argNumber
)) {
718 } else if (formatToAdopt
== NULL
) {
721 f
= formatToAdopt
->clone();
723 status
= U_MEMORY_ALLOCATION_ERROR
;
727 setCustomArgStartFormat(partIndex
, f
, status
);
732 // -------------------------------------
733 // Set a single format.
734 // Do nothing if the variable is not less than the array count.
736 MessageFormat::setFormat(int32_t n
, const Format
& newFormat
) {
739 int32_t formatNumber
= 0;
740 for (int32_t partIndex
= 0;
741 (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
742 if (n
== formatNumber
) {
743 Format
* new_format
= newFormat
.clone();
745 UErrorCode status
= U_ZERO_ERROR
;
746 setCustomArgStartFormat(partIndex
, new_format
, status
);
755 // -------------------------------------
756 // Get a single format by format name.
757 // Do nothing if the variable is not less than the array count.
759 MessageFormat::getFormat(const UnicodeString
& formatName
, UErrorCode
& status
) {
760 if (U_FAILURE(status
) || cachedFormatters
== NULL
) return NULL
;
762 int32_t argNumber
= MessagePattern::validateArgumentName(formatName
);
763 if (argNumber
< UMSGPAT_ARG_NAME_NOT_NUMBER
) {
764 status
= U_ILLEGAL_ARGUMENT_ERROR
;
767 for (int32_t partIndex
= 0; (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
768 if (argNameMatches(partIndex
+ 1, formatName
, argNumber
)) {
769 return getCachedFormatter(partIndex
);
775 // -------------------------------------
776 // Set a single format by format name
777 // Do nothing if the variable is not less than the array count.
779 MessageFormat::setFormat(const UnicodeString
& formatName
,
780 const Format
& newFormat
,
781 UErrorCode
& status
) {
782 if (U_FAILURE(status
)) return;
784 int32_t argNumber
= MessagePattern::validateArgumentName(formatName
);
785 if (argNumber
< UMSGPAT_ARG_NAME_NOT_NUMBER
) {
786 status
= U_ILLEGAL_ARGUMENT_ERROR
;
789 for (int32_t partIndex
= 0;
790 (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0 && U_SUCCESS(status
);
792 if (argNameMatches(partIndex
+ 1, formatName
, argNumber
)) {
793 Format
* new_format
= newFormat
.clone();
794 if (new_format
== NULL
) {
795 status
= U_MEMORY_ALLOCATION_ERROR
;
798 setCustomArgStartFormat(partIndex
, new_format
, status
);
803 // -------------------------------------
804 // Gets the format array.
806 MessageFormat::getFormats(int32_t& cnt
) const
808 // This old API returns an array (which we hold) of Format*
809 // pointers. The array is valid up to the next call to any
810 // method on this object. We construct and resize an array
811 // on demand that contains aliases to the subformats[i].format
814 // Get total required capacity first (it's refreshed on each call).
815 int32_t totalCapacity
= 0;
816 for (int32_t partIndex
= 0; (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0; ++totalCapacity
) {}
818 MessageFormat
* t
= const_cast<MessageFormat
*> (this);
820 if (formatAliases
== nullptr) {
821 t
->formatAliasesCapacity
= totalCapacity
;
822 Format
** a
= (Format
**)
823 uprv_malloc(sizeof(Format
*) * formatAliasesCapacity
);
825 t
->formatAliasesCapacity
= 0;
828 t
->formatAliases
= a
;
829 } else if (totalCapacity
> formatAliasesCapacity
) {
830 Format
** a
= (Format
**)
831 uprv_realloc(formatAliases
, sizeof(Format
*) * totalCapacity
);
833 t
->formatAliasesCapacity
= 0;
836 t
->formatAliases
= a
;
837 t
->formatAliasesCapacity
= totalCapacity
;
840 for (int32_t partIndex
= 0; (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
841 t
->formatAliases
[cnt
++] = getCachedFormatter(partIndex
);
844 return (const Format
**)formatAliases
;
848 UnicodeString
MessageFormat::getArgName(int32_t partIndex
) {
849 const MessagePattern::Part
& part
= msgPattern
.getPart(partIndex
);
850 return msgPattern
.getSubstring(part
);
854 MessageFormat::getFormatNames(UErrorCode
& status
) {
855 if (U_FAILURE(status
)) return NULL
;
857 UVector
*fFormatNames
= new UVector(status
);
858 if (U_FAILURE(status
)) {
859 status
= U_MEMORY_ALLOCATION_ERROR
;
862 fFormatNames
->setDeleter(uprv_deleteUObject
);
864 for (int32_t partIndex
= 0; (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
865 fFormatNames
->addElement(new UnicodeString(getArgName(partIndex
+ 1)), status
);
868 StringEnumeration
* nameEnumerator
= new FormatNameEnumeration(fFormatNames
, status
);
869 return nameEnumerator
;
872 // -------------------------------------
873 // Formats the source Formattable array and copy into the result buffer.
874 // Ignore the FieldPosition result for error checking.
877 MessageFormat::format(const Formattable
* source
,
879 UnicodeString
& appendTo
,
880 FieldPosition
& ignore
,
881 UErrorCode
& success
) const
883 return format(source
, NULL
, cnt
, appendTo
, &ignore
, success
);
886 // -------------------------------------
887 // Internally creates a MessageFormat instance based on the
888 // pattern and formats the arguments Formattable array and
889 // copy into the appendTo buffer.
892 MessageFormat::format( const UnicodeString
& pattern
,
893 const Formattable
* arguments
,
895 UnicodeString
& appendTo
,
898 MessageFormat
temp(pattern
, success
);
899 return temp
.format(arguments
, NULL
, cnt
, appendTo
, NULL
, success
);
902 // -------------------------------------
903 // Formats the source Formattable object and copy into the
904 // appendTo buffer. The Formattable object must be an array
905 // of Formattable instances, returns error otherwise.
908 MessageFormat::format(const Formattable
& source
,
909 UnicodeString
& appendTo
,
910 FieldPosition
& ignore
,
911 UErrorCode
& success
) const
913 if (U_FAILURE(success
))
915 if (source
.getType() != Formattable::kArray
) {
916 success
= U_ILLEGAL_ARGUMENT_ERROR
;
920 const Formattable
* tmpPtr
= source
.getArray(cnt
);
921 return format(tmpPtr
, NULL
, cnt
, appendTo
, &ignore
, success
);
925 MessageFormat::format(const UnicodeString
* argumentNames
,
926 const Formattable
* arguments
,
928 UnicodeString
& appendTo
,
929 UErrorCode
& success
) const {
930 return format(arguments
, argumentNames
, count
, appendTo
, NULL
, success
);
933 // Does linear search to find the match for an ArgName.
934 const Formattable
* MessageFormat::getArgFromListByName(const Formattable
* arguments
,
935 const UnicodeString
*argumentNames
,
936 int32_t cnt
, UnicodeString
& name
) const {
937 for (int32_t i
= 0; i
< cnt
; ++i
) {
938 if (0 == argumentNames
[i
].compare(name
)) {
939 return arguments
+ i
;
947 MessageFormat::format(const Formattable
* arguments
,
948 const UnicodeString
*argumentNames
,
950 UnicodeString
& appendTo
,
952 UErrorCode
& status
) const {
953 if (U_FAILURE(status
)) {
957 UnicodeStringAppendable
usapp(appendTo
);
958 AppendableWrapper
app(usapp
);
959 format(0, NULL
, arguments
, argumentNames
, cnt
, app
, pos
, status
);
966 * Mutable input/output values for the PluralSelectorProvider.
967 * Separate so that it is possible to make MessageFormat Freezable.
969 class PluralSelectorContext
{
971 PluralSelectorContext(int32_t start
, const UnicodeString
&name
,
972 const Formattable
&num
, double off
, UErrorCode
&errorCode
)
973 : startIndex(start
), argName(name
), offset(off
),
974 numberArgIndex(-1), formatter(NULL
), forReplaceNumber(FALSE
) {
975 // number needs to be set even when select() is not called.
976 // Keep it as a Number/Formattable:
977 // For format() methods, and to preserve information (e.g., BigDecimal).
981 number
= num
.getDouble(errorCode
) - off
;
985 // Input values for plural selection with decimals.
987 const UnicodeString
&argName
;
988 /** argument number - plural offset */
991 // Output values for plural selection with decimals.
992 /** -1 if REPLACE_NUMBER, 0 arg not found, >0 ARG_START index */
993 int32_t numberArgIndex
;
994 const Format
*formatter
;
995 /** formatted argument number - plural offset */
996 UnicodeString numberString
;
997 /** TRUE if number-offset was formatted with the stock number formatter */
998 UBool forReplaceNumber
;
1003 // if argumentNames is NULL, this means arguments is a numeric array.
1004 // arguments can not be NULL.
1005 // We use const void *plNumber rather than const PluralSelectorContext *pluralNumber
1006 // so that we need not declare the PluralSelectorContext in the public header file.
1007 void MessageFormat::format(int32_t msgStart
, const void *plNumber
,
1008 const Formattable
* arguments
,
1009 const UnicodeString
*argumentNames
,
1011 AppendableWrapper
& appendTo
,
1012 FieldPosition
* ignore
,
1013 UErrorCode
& success
) const {
1014 if (U_FAILURE(success
)) {
1018 const UnicodeString
& msgString
= msgPattern
.getPatternString();
1019 int32_t prevIndex
= msgPattern
.getPart(msgStart
).getLimit();
1020 for (int32_t i
= msgStart
+ 1; U_SUCCESS(success
) ; ++i
) {
1021 const MessagePattern::Part
* part
= &msgPattern
.getPart(i
);
1022 const UMessagePatternPartType type
= part
->getType();
1023 int32_t index
= part
->getIndex();
1024 appendTo
.append(msgString
, prevIndex
, index
- prevIndex
);
1025 if (type
== UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1028 prevIndex
= part
->getLimit();
1029 if (type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) {
1030 const PluralSelectorContext
&pluralNumber
=
1031 *static_cast<const PluralSelectorContext
*>(plNumber
);
1032 if(pluralNumber
.forReplaceNumber
) {
1033 // number-offset was already formatted.
1034 appendTo
.formatAndAppend(pluralNumber
.formatter
,
1035 pluralNumber
.number
, pluralNumber
.numberString
, success
);
1037 const NumberFormat
* nf
= getDefaultNumberFormat(success
);
1038 appendTo
.formatAndAppend(nf
, pluralNumber
.number
, success
);
1042 if (type
!= UMSGPAT_PART_TYPE_ARG_START
) {
1045 int32_t argLimit
= msgPattern
.getLimitPartIndex(i
);
1046 UMessagePatternArgType argType
= part
->getArgType();
1047 part
= &msgPattern
.getPart(++i
);
1048 const Formattable
* arg
;
1049 UBool noArg
= FALSE
;
1050 UnicodeString argName
= msgPattern
.getSubstring(*part
);
1051 if (argumentNames
== NULL
) {
1052 int32_t argNumber
= part
->getValue(); // ARG_NUMBER
1053 if (0 <= argNumber
&& argNumber
< cnt
) {
1054 arg
= arguments
+ argNumber
;
1060 arg
= getArgFromListByName(arguments
, argumentNames
, cnt
, argName
);
1066 int32_t prevDestLength
= appendTo
.length();
1067 const Format
* formatter
= NULL
;
1070 UnicodeString(LEFT_CURLY_BRACE
).append(argName
).append(RIGHT_CURLY_BRACE
));
1071 } else if (arg
== NULL
) {
1072 appendTo
.append(NULL_STRING
, 4);
1073 } else if(plNumber
!=NULL
&&
1074 static_cast<const PluralSelectorContext
*>(plNumber
)->numberArgIndex
==(i
-2)) {
1075 const PluralSelectorContext
&pluralNumber
=
1076 *static_cast<const PluralSelectorContext
*>(plNumber
);
1077 if(pluralNumber
.offset
== 0) {
1078 // The number was already formatted with this formatter.
1079 appendTo
.formatAndAppend(pluralNumber
.formatter
, pluralNumber
.number
,
1080 pluralNumber
.numberString
, success
);
1082 // Do not use the formatted (number-offset) string for a named argument
1083 // that formats the number without subtracting the offset.
1084 appendTo
.formatAndAppend(pluralNumber
.formatter
, *arg
, success
);
1086 } else if ((formatter
= getCachedFormatter(i
-2)) != 0) {
1087 // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings.
1088 if (dynamic_cast<const ChoiceFormat
*>(formatter
) ||
1089 dynamic_cast<const PluralFormat
*>(formatter
) ||
1090 dynamic_cast<const SelectFormat
*>(formatter
)) {
1091 // We only handle nested formats here if they were provided via
1092 // setFormat() or its siblings. Otherwise they are not cached and instead
1093 // handled below according to argType.
1094 UnicodeString subMsgString
;
1095 formatter
->format(*arg
, subMsgString
, success
);
1096 if (subMsgString
.indexOf(LEFT_CURLY_BRACE
) >= 0 ||
1097 (subMsgString
.indexOf(SINGLE_QUOTE
) >= 0 && !MessageImpl::jdkAposMode(msgPattern
))
1099 MessageFormat
subMsgFormat(subMsgString
, fLocale
, success
);
1100 subMsgFormat
.format(0, NULL
, arguments
, argumentNames
, cnt
, appendTo
, ignore
, success
);
1102 appendTo
.append(subMsgString
);
1105 appendTo
.formatAndAppend(formatter
, *arg
, success
);
1107 } else if (argType
== UMSGPAT_ARG_TYPE_NONE
|| (cachedFormatters
&& uhash_iget(cachedFormatters
, i
- 2))) {
1108 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
1109 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
1110 // for the hash table containind DummyFormat.
1111 if (arg
->isNumeric()) {
1112 const NumberFormat
* nf
= getDefaultNumberFormat(success
);
1113 appendTo
.formatAndAppend(nf
, *arg
, success
);
1114 } else if (arg
->getType() == Formattable::kDate
) {
1115 const DateFormat
* df
= getDefaultDateFormat(success
);
1116 appendTo
.formatAndAppend(df
, *arg
, success
);
1118 appendTo
.append(arg
->getString(success
));
1120 } else if (argType
== UMSGPAT_ARG_TYPE_CHOICE
) {
1121 if (!arg
->isNumeric()) {
1122 success
= U_ILLEGAL_ARGUMENT_ERROR
;
1125 // We must use the Formattable::getDouble() variant with the UErrorCode parameter
1126 // because only this one converts non-double numeric types to double.
1127 const double number
= arg
->getDouble(success
);
1128 int32_t subMsgStart
= ChoiceFormat::findSubMessage(msgPattern
, i
, number
);
1129 formatComplexSubMessage(subMsgStart
, NULL
, arguments
, argumentNames
,
1130 cnt
, appendTo
, success
);
1131 } else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType
)) {
1132 if (!arg
->isNumeric()) {
1133 success
= U_ILLEGAL_ARGUMENT_ERROR
;
1136 const PluralSelectorProvider
&selector
=
1137 argType
== UMSGPAT_ARG_TYPE_PLURAL
? pluralProvider
: ordinalProvider
;
1138 // We must use the Formattable::getDouble() variant with the UErrorCode parameter
1139 // because only this one converts non-double numeric types to double.
1140 double offset
= msgPattern
.getPluralOffset(i
);
1141 PluralSelectorContext
context(i
, argName
, *arg
, offset
, success
);
1142 int32_t subMsgStart
= PluralFormat::findSubMessage(
1143 msgPattern
, i
, selector
, &context
, arg
->getDouble(success
), success
);
1144 formatComplexSubMessage(subMsgStart
, &context
, arguments
, argumentNames
,
1145 cnt
, appendTo
, success
);
1146 } else if (argType
== UMSGPAT_ARG_TYPE_SELECT
) {
1147 int32_t subMsgStart
= SelectFormat::findSubMessage(msgPattern
, i
, arg
->getString(success
), success
);
1148 formatComplexSubMessage(subMsgStart
, NULL
, arguments
, argumentNames
,
1149 cnt
, appendTo
, success
);
1151 // This should never happen.
1152 success
= U_INTERNAL_PROGRAM_ERROR
;
1155 ignore
= updateMetaData(appendTo
, prevDestLength
, ignore
, arg
);
1156 prevIndex
= msgPattern
.getPart(argLimit
).getLimit();
1162 void MessageFormat::formatComplexSubMessage(int32_t msgStart
,
1163 const void *plNumber
,
1164 const Formattable
* arguments
,
1165 const UnicodeString
*argumentNames
,
1167 AppendableWrapper
& appendTo
,
1168 UErrorCode
& success
) const {
1169 if (U_FAILURE(success
)) {
1173 if (!MessageImpl::jdkAposMode(msgPattern
)) {
1174 format(msgStart
, plNumber
, arguments
, argumentNames
, cnt
, appendTo
, NULL
, success
);
1178 // JDK compatibility mode: (see JDK MessageFormat.format() API docs)
1179 // - remove SKIP_SYNTAX; that is, remove half of the apostrophes
1180 // - if the result string contains an open curly brace '{' then
1181 // instantiate a temporary MessageFormat object and format again;
1182 // otherwise just append the result string
1183 const UnicodeString
& msgString
= msgPattern
.getPatternString();
1185 int32_t prevIndex
= msgPattern
.getPart(msgStart
).getLimit();
1186 for (int32_t i
= msgStart
;;) {
1187 const MessagePattern::Part
& part
= msgPattern
.getPart(++i
);
1188 const UMessagePatternPartType type
= part
.getType();
1189 int32_t index
= part
.getIndex();
1190 if (type
== UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1191 sb
.append(msgString
, prevIndex
, index
- prevIndex
);
1193 } else if (type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
|| type
== UMSGPAT_PART_TYPE_SKIP_SYNTAX
) {
1194 sb
.append(msgString
, prevIndex
, index
- prevIndex
);
1195 if (type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) {
1196 const PluralSelectorContext
&pluralNumber
=
1197 *static_cast<const PluralSelectorContext
*>(plNumber
);
1198 if(pluralNumber
.forReplaceNumber
) {
1199 // number-offset was already formatted.
1200 sb
.append(pluralNumber
.numberString
);
1202 const NumberFormat
* nf
= getDefaultNumberFormat(success
);
1203 sb
.append(nf
->format(pluralNumber
.number
, sb
, success
));
1206 prevIndex
= part
.getLimit();
1207 } else if (type
== UMSGPAT_PART_TYPE_ARG_START
) {
1208 sb
.append(msgString
, prevIndex
, index
- prevIndex
);
1210 i
= msgPattern
.getLimitPartIndex(i
);
1211 index
= msgPattern
.getPart(i
).getLimit();
1212 MessageImpl::appendReducedApostrophes(msgString
, prevIndex
, index
, sb
);
1216 if (sb
.indexOf(LEFT_CURLY_BRACE
) >= 0) {
1217 UnicodeString emptyPattern
; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter.
1218 MessageFormat
subMsgFormat(emptyPattern
, fLocale
, success
);
1219 subMsgFormat
.applyPattern(sb
, UMSGPAT_APOS_DOUBLE_REQUIRED
, NULL
, success
);
1220 subMsgFormat
.format(0, NULL
, arguments
, argumentNames
, cnt
, appendTo
, NULL
, success
);
1222 appendTo
.append(sb
);
1227 UnicodeString
MessageFormat::getLiteralStringUntilNextArgument(int32_t from
) const {
1228 const UnicodeString
& msgString
=msgPattern
.getPatternString();
1229 int32_t prevIndex
=msgPattern
.getPart(from
).getLimit();
1231 for (int32_t i
= from
+ 1; ; ++i
) {
1232 const MessagePattern::Part
& part
= msgPattern
.getPart(i
);
1233 const UMessagePatternPartType type
=part
.getType();
1234 int32_t index
=part
.getIndex();
1235 b
.append(msgString
, prevIndex
, index
- prevIndex
);
1236 if(type
==UMSGPAT_PART_TYPE_ARG_START
|| type
==UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1239 // Unexpected Part "part" in parsed message.
1240 U_ASSERT(type
==UMSGPAT_PART_TYPE_SKIP_SYNTAX
|| type
==UMSGPAT_PART_TYPE_INSERT_CHAR
);
1241 prevIndex
=part
.getLimit();
1246 FieldPosition
* MessageFormat::updateMetaData(AppendableWrapper
& /*dest*/, int32_t /*prevLength*/,
1247 FieldPosition
* /*fp*/, const Formattable
* /*argId*/) const {
1248 // Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing.
1251 if (fp != NULL && Field.ARGUMENT.equals(fp.getFieldAttribute())) {
1252 fp->setBeginIndex(prevLength);
1253 fp->setEndIndex(dest.get_length());
1261 MessageFormat::findOtherSubMessage(int32_t partIndex
) const {
1262 int32_t count
=msgPattern
.countParts();
1263 const MessagePattern::Part
*part
= &msgPattern
.getPart(partIndex
);
1264 if(MessagePattern::Part::hasNumericValue(part
->getType())) {
1267 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
1268 // until ARG_LIMIT or end of plural-only pattern.
1269 UnicodeString
other(FALSE
, OTHER_STRING
, 5);
1271 part
=&msgPattern
.getPart(partIndex
++);
1272 UMessagePatternPartType type
=part
->getType();
1273 if(type
==UMSGPAT_PART_TYPE_ARG_LIMIT
) {
1276 U_ASSERT(type
==UMSGPAT_PART_TYPE_ARG_SELECTOR
);
1277 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
1278 if(msgPattern
.partSubstringMatches(*part
, other
)) {
1281 if(MessagePattern::Part::hasNumericValue(msgPattern
.getPartType(partIndex
))) {
1282 ++partIndex
; // skip the numeric-value part of "=1" etc.
1284 partIndex
=msgPattern
.getLimitPartIndex(partIndex
);
1285 } while(++partIndex
<count
);
1290 MessageFormat::findFirstPluralNumberArg(int32_t msgStart
, const UnicodeString
&argName
) const {
1291 for(int32_t i
=msgStart
+1;; ++i
) {
1292 const MessagePattern::Part
&part
=msgPattern
.getPart(i
);
1293 UMessagePatternPartType type
=part
.getType();
1294 if(type
==UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1297 if(type
==UMSGPAT_PART_TYPE_REPLACE_NUMBER
) {
1300 if(type
==UMSGPAT_PART_TYPE_ARG_START
) {
1301 UMessagePatternArgType argType
=part
.getArgType();
1302 if(!argName
.isEmpty() && (argType
==UMSGPAT_ARG_TYPE_NONE
|| argType
==UMSGPAT_ARG_TYPE_SIMPLE
)) {
1303 // ARG_NUMBER or ARG_NAME
1304 if(msgPattern
.partSubstringMatches(msgPattern
.getPart(i
+1), argName
)) {
1308 i
=msgPattern
.getLimitPartIndex(i
);
1313 void MessageFormat::copyObjects(const MessageFormat
& that
, UErrorCode
& ec
) {
1314 // Deep copy pointer fields.
1315 // We need not copy the formatAliases because they are re-filled
1316 // in each getFormats() call.
1317 // The defaultNumberFormat, defaultDateFormat and pluralProvider.rules
1318 // also get created on demand.
1319 argTypeCount
= that
.argTypeCount
;
1320 if (argTypeCount
> 0) {
1321 if (!allocateArgTypes(argTypeCount
, ec
)) {
1324 uprv_memcpy(argTypes
, that
.argTypes
, argTypeCount
* sizeof(argTypes
[0]));
1326 if (cachedFormatters
!= NULL
) {
1327 uhash_removeAll(cachedFormatters
);
1329 if (customFormatArgStarts
!= NULL
) {
1330 uhash_removeAll(customFormatArgStarts
);
1332 if (that
.cachedFormatters
) {
1333 if (cachedFormatters
== NULL
) {
1334 cachedFormatters
=uhash_open(uhash_hashLong
, uhash_compareLong
,
1335 equalFormatsForHash
, &ec
);
1336 if (U_FAILURE(ec
)) {
1339 uhash_setValueDeleter(cachedFormatters
, uprv_deleteUObject
);
1342 const int32_t count
= uhash_count(that
.cachedFormatters
);
1344 for (idx
= 0, pos
= UHASH_FIRST
; idx
< count
&& U_SUCCESS(ec
); ++idx
) {
1345 const UHashElement
* cur
= uhash_nextElement(that
.cachedFormatters
, &pos
);
1346 Format
* newFormat
= ((Format
*)(cur
->value
.pointer
))->clone();
1348 uhash_iput(cachedFormatters
, cur
->key
.integer
, newFormat
, &ec
);
1350 ec
= U_MEMORY_ALLOCATION_ERROR
;
1355 if (that
.customFormatArgStarts
) {
1356 if (customFormatArgStarts
== NULL
) {
1357 customFormatArgStarts
=uhash_open(uhash_hashLong
, uhash_compareLong
,
1360 const int32_t count
= uhash_count(that
.customFormatArgStarts
);
1362 for (idx
= 0, pos
= UHASH_FIRST
; idx
< count
&& U_SUCCESS(ec
); ++idx
) {
1363 const UHashElement
* cur
= uhash_nextElement(that
.customFormatArgStarts
, &pos
);
1364 uhash_iputi(customFormatArgStarts
, cur
->key
.integer
, cur
->value
.integer
, &ec
);
1371 MessageFormat::parse(int32_t msgStart
,
1372 const UnicodeString
& source
,
1375 UErrorCode
& ec
) const {
1377 if (U_FAILURE(ec
)) {
1378 pos
.setErrorIndex(pos
.getIndex());
1381 // parse() does not work with named arguments.
1382 if (msgPattern
.hasNamedArguments()) {
1383 ec
= U_ARGUMENT_TYPE_MISMATCH
;
1384 pos
.setErrorIndex(pos
.getIndex());
1387 LocalArray
<Formattable
> resultArray(new Formattable
[argTypeCount
? argTypeCount
: 1]);
1388 const UnicodeString
& msgString
=msgPattern
.getPatternString();
1389 int32_t prevIndex
=msgPattern
.getPart(msgStart
).getLimit();
1390 int32_t sourceOffset
= pos
.getIndex();
1391 ParsePosition
tempStatus(0);
1393 for(int32_t i
=msgStart
+1; ; ++i
) {
1394 UBool haveArgResult
= FALSE
;
1395 const MessagePattern::Part
* part
=&msgPattern
.getPart(i
);
1396 const UMessagePatternPartType type
=part
->getType();
1397 int32_t index
=part
->getIndex();
1398 // Make sure the literal string matches.
1399 int32_t len
= index
- prevIndex
;
1400 if (len
== 0 || (0 == msgString
.compare(prevIndex
, len
, source
, sourceOffset
, len
))) {
1401 sourceOffset
+= len
;
1404 pos
.setErrorIndex(sourceOffset
);
1405 return NULL
; // leave index as is to signal error
1407 if(type
==UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1408 // Things went well! Done.
1409 pos
.setIndex(sourceOffset
);
1410 return resultArray
.orphan();
1412 if(type
==UMSGPAT_PART_TYPE_SKIP_SYNTAX
|| type
==UMSGPAT_PART_TYPE_INSERT_CHAR
) {
1413 prevIndex
=part
->getLimit();
1416 // We do not support parsing Plural formats. (No REPLACE_NUMBER here.)
1417 // Unexpected Part "part" in parsed message.
1418 U_ASSERT(type
==UMSGPAT_PART_TYPE_ARG_START
);
1419 int32_t argLimit
=msgPattern
.getLimitPartIndex(i
);
1421 UMessagePatternArgType argType
=part
->getArgType();
1422 part
=&msgPattern
.getPart(++i
);
1423 int32_t argNumber
= part
->getValue(); // ARG_NUMBER
1426 const Format
* formatter
= NULL
;
1427 Formattable
& argResult
= resultArray
[argNumber
];
1429 if(cachedFormatters
!=NULL
&& (formatter
= getCachedFormatter(i
- 2))!=NULL
) {
1430 // Just parse using the formatter.
1431 tempStatus
.setIndex(sourceOffset
);
1432 formatter
->parseObject(source
, argResult
, tempStatus
);
1433 if (tempStatus
.getIndex() == sourceOffset
) {
1434 pos
.setErrorIndex(sourceOffset
);
1435 return NULL
; // leave index as is to signal error
1437 sourceOffset
= tempStatus
.getIndex();
1438 haveArgResult
= TRUE
;
1440 argType
==UMSGPAT_ARG_TYPE_NONE
|| (cachedFormatters
&& uhash_iget(cachedFormatters
, i
-2))) {
1441 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
1442 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
1443 // for the hash table containind DummyFormat.
1445 // Match as a string.
1446 // if at end, use longest possible match
1447 // otherwise uses first match to intervening string
1448 // does NOT recursively try all possibilities
1449 UnicodeString stringAfterArgument
= getLiteralStringUntilNextArgument(argLimit
);
1451 if (!stringAfterArgument
.isEmpty()) {
1452 next
= source
.indexOf(stringAfterArgument
, sourceOffset
);
1454 next
= source
.length();
1457 pos
.setErrorIndex(sourceOffset
);
1458 return NULL
; // leave index as is to signal error
1460 UnicodeString
strValue(source
.tempSubString(sourceOffset
, next
- sourceOffset
));
1461 UnicodeString compValue
;
1462 compValue
.append(LEFT_CURLY_BRACE
);
1463 itos(argNumber
, compValue
);
1464 compValue
.append(RIGHT_CURLY_BRACE
);
1465 if (0 != strValue
.compare(compValue
)) {
1466 argResult
.setString(strValue
);
1467 haveArgResult
= TRUE
;
1469 sourceOffset
= next
;
1471 } else if(argType
==UMSGPAT_ARG_TYPE_CHOICE
) {
1472 tempStatus
.setIndex(sourceOffset
);
1473 double choiceResult
= ChoiceFormat::parseArgument(msgPattern
, i
, source
, tempStatus
);
1474 if (tempStatus
.getIndex() == sourceOffset
) {
1475 pos
.setErrorIndex(sourceOffset
);
1476 return NULL
; // leave index as is to signal error
1478 argResult
.setDouble(choiceResult
);
1479 haveArgResult
= TRUE
;
1480 sourceOffset
= tempStatus
.getIndex();
1481 } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType
) || argType
==UMSGPAT_ARG_TYPE_SELECT
) {
1482 // Parsing not supported.
1483 ec
= U_UNSUPPORTED_ERROR
;
1486 // This should never happen.
1487 ec
= U_INTERNAL_PROGRAM_ERROR
;
1490 if (haveArgResult
&& count
<= argNumber
) {
1491 count
= argNumber
+ 1;
1493 prevIndex
=msgPattern
.getPart(argLimit
).getLimit();
1497 // -------------------------------------
1498 // Parses the source pattern and returns the Formattable objects array,
1499 // the array count and the ending parse position. The caller of this method
1503 MessageFormat::parse(const UnicodeString
& source
,
1505 int32_t& count
) const {
1506 UErrorCode ec
= U_ZERO_ERROR
;
1507 return parse(0, source
, pos
, count
, ec
);
1510 // -------------------------------------
1511 // Parses the source string and returns the array of
1512 // Formattable objects and the array count. The caller
1513 // owns the returned array.
1516 MessageFormat::parse(const UnicodeString
& source
,
1518 UErrorCode
& success
) const
1520 if (msgPattern
.hasNamedArguments()) {
1521 success
= U_ARGUMENT_TYPE_MISMATCH
;
1524 ParsePosition
status(0);
1525 // Calls the actual implementation method and starts
1526 // from zero offset of the source text.
1527 Formattable
* result
= parse(source
, status
, cnt
);
1528 if (status
.getIndex() == 0) {
1529 success
= U_MESSAGE_PARSE_ERROR
;
1536 // -------------------------------------
1537 // Parses the source text and copy into the result buffer.
1540 MessageFormat::parseObject( const UnicodeString
& source
,
1541 Formattable
& result
,
1542 ParsePosition
& status
) const
1545 Formattable
* tmpResult
= parse(source
, status
, cnt
);
1546 if (tmpResult
!= NULL
)
1547 result
.adoptArray(tmpResult
, cnt
);
1551 MessageFormat::autoQuoteApostrophe(const UnicodeString
& pattern
, UErrorCode
& status
) {
1552 UnicodeString result
;
1553 if (U_SUCCESS(status
)) {
1554 int32_t plen
= pattern
.length();
1555 const UChar
* pat
= pattern
.getBuffer();
1556 int32_t blen
= plen
* 2 + 1; // space for null termination, convenience
1557 UChar
* buf
= result
.getBuffer(blen
);
1559 status
= U_MEMORY_ALLOCATION_ERROR
;
1561 int32_t len
= umsg_autoQuoteApostrophe(pat
, plen
, buf
, blen
, &status
);
1562 result
.releaseBuffer(U_SUCCESS(status
) ? len
: 0);
1565 if (U_FAILURE(status
)) {
1566 result
.setToBogus();
1571 // -------------------------------------
1573 static Format
* makeRBNF(URBNFRuleSetTag tag
, const Locale
& locale
, const UnicodeString
& defaultRuleSet
, UErrorCode
& ec
) {
1574 RuleBasedNumberFormat
* fmt
= new RuleBasedNumberFormat(tag
, locale
, ec
);
1576 ec
= U_MEMORY_ALLOCATION_ERROR
;
1577 } else if (U_SUCCESS(ec
) && defaultRuleSet
.length() > 0) {
1578 UErrorCode localStatus
= U_ZERO_ERROR
; // ignore unrecognized default rule set
1579 fmt
->setDefaultRuleSet(defaultRuleSet
, localStatus
);
1584 void MessageFormat::cacheExplicitFormats(UErrorCode
& status
) {
1585 if (U_FAILURE(status
)) {
1589 if (cachedFormatters
!= NULL
) {
1590 uhash_removeAll(cachedFormatters
);
1592 if (customFormatArgStarts
!= NULL
) {
1593 uhash_removeAll(customFormatArgStarts
);
1596 // The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT
1597 // which we need not examine.
1598 int32_t limit
= msgPattern
.countParts() - 2;
1600 // We also need not look at the first two "parts"
1601 // (at most MSG_START and ARG_START) in this loop.
1602 // We determine the argTypeCount first so that we can allocateArgTypes
1603 // so that the next loop can set argTypes[argNumber].
1604 // (This is for the C API which needs the argTypes to read its va_arg list.)
1605 for (int32_t i
= 2; i
< limit
&& U_SUCCESS(status
); ++i
) {
1606 const MessagePattern::Part
& part
= msgPattern
.getPart(i
);
1607 if (part
.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER
) {
1608 const int argNumber
= part
.getValue();
1609 if (argNumber
>= argTypeCount
) {
1610 argTypeCount
= argNumber
+ 1;
1614 if (!allocateArgTypes(argTypeCount
, status
)) {
1617 // Set all argTypes to kObject, as a "none" value, for lack of any better value.
1618 // We never use kObject for real arguments.
1619 // We use it as "no argument yet" for the check for hasArgTypeConflicts.
1620 for (int32_t i
= 0; i
< argTypeCount
; ++i
) {
1621 argTypes
[i
] = Formattable::kObject
;
1623 hasArgTypeConflicts
= FALSE
;
1625 // This loop starts at part index 1 because we do need to examine
1626 // ARG_START parts. (But we can ignore the MSG_START.)
1627 for (int32_t i
= 1; i
< limit
&& U_SUCCESS(status
); ++i
) {
1628 const MessagePattern::Part
* part
= &msgPattern
.getPart(i
);
1629 if (part
->getType() != UMSGPAT_PART_TYPE_ARG_START
) {
1632 UMessagePatternArgType argType
= part
->getArgType();
1634 int32_t argNumber
= -1;
1635 part
= &msgPattern
.getPart(i
+ 1);
1636 if (part
->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER
) {
1637 argNumber
= part
->getValue();
1639 Formattable::Type formattableType
;
1642 case UMSGPAT_ARG_TYPE_NONE
:
1643 formattableType
= Formattable::kString
;
1645 case UMSGPAT_ARG_TYPE_SIMPLE
: {
1648 UnicodeString explicitType
= msgPattern
.getSubstring(msgPattern
.getPart(i
++));
1649 UnicodeString style
;
1650 if ((part
= &msgPattern
.getPart(i
))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE
) {
1651 style
= msgPattern
.getSubstring(*part
);
1654 UParseError parseError
;
1655 Format
* formatter
= createAppropriateFormat(explicitType
, style
, formattableType
, parseError
, status
);
1656 setArgStartFormat(index
, formatter
, status
);
1659 case UMSGPAT_ARG_TYPE_CHOICE
:
1660 case UMSGPAT_ARG_TYPE_PLURAL
:
1661 case UMSGPAT_ARG_TYPE_SELECTORDINAL
:
1662 formattableType
= Formattable::kDouble
;
1664 case UMSGPAT_ARG_TYPE_SELECT
:
1665 formattableType
= Formattable::kString
;
1668 status
= U_INTERNAL_PROGRAM_ERROR
; // Should be unreachable.
1669 formattableType
= Formattable::kString
;
1672 if (argNumber
!= -1) {
1673 if (argTypes
[argNumber
] != Formattable::kObject
&& argTypes
[argNumber
] != formattableType
) {
1674 hasArgTypeConflicts
= TRUE
;
1676 argTypes
[argNumber
] = formattableType
;
1681 Format
* MessageFormat::createAppropriateFormat(UnicodeString
& type
, UnicodeString
& style
,
1682 Formattable::Type
& formattableType
, UParseError
& parseError
,
1684 if (U_FAILURE(ec
)) {
1688 int32_t typeID
, styleID
;
1689 DateFormat::EStyle date_style
;
1690 int32_t firstNonSpace
;
1692 switch (typeID
= findKeyword(type
, TYPE_IDS
)) {
1694 formattableType
= Formattable::kDouble
;
1695 switch (findKeyword(style
, NUMBER_STYLE_IDS
)) {
1697 fmt
= NumberFormat::createInstance(fLocale
, ec
);
1700 fmt
= NumberFormat::createCurrencyInstance(fLocale
, ec
);
1703 fmt
= NumberFormat::createPercentInstance(fLocale
, ec
);
1706 formattableType
= Formattable::kLong
;
1707 fmt
= createIntegerFormat(fLocale
, ec
);
1709 default: // pattern or skeleton
1710 firstNonSpace
= PatternProps::skipWhiteSpace(style
, 0);
1711 if (style
.compare(firstNonSpace
, 2, u
"::", 0, 2) == 0) {
1713 UnicodeString skeleton
= style
.tempSubString(firstNonSpace
+ 2);
1714 fmt
= number::NumberFormatter::forSkeleton(skeleton
, ec
).locale(fLocale
).toFormat(ec
);
1717 fmt
= NumberFormat::createInstance(fLocale
, ec
);
1719 auto* decfmt
= dynamic_cast<DecimalFormat
*>(fmt
);
1720 if (decfmt
!= nullptr) {
1721 decfmt
->applyPattern(style
, parseError
, ec
);
1731 formattableType
= Formattable::kDate
;
1732 firstNonSpace
= PatternProps::skipWhiteSpace(style
, 0);
1733 if (style
.compare(firstNonSpace
, 2, u
"::", 0, 2) == 0) {
1735 UnicodeString skeleton
= style
.tempSubString(firstNonSpace
+ 2);
1736 fmt
= DateFormat::createInstanceForSkeleton(skeleton
, fLocale
, ec
);
1739 styleID
= findKeyword(style
, DATE_STYLE_IDS
);
1740 date_style
= (styleID
>= 0) ? DATE_STYLES
[styleID
] : DateFormat::kDefault
;
1743 fmt
= DateFormat::createDateInstance(date_style
, fLocale
);
1745 fmt
= DateFormat::createTimeInstance(date_style
, fLocale
);
1748 if (styleID
< 0 && fmt
!= NULL
) {
1749 SimpleDateFormat
* sdtfmt
= dynamic_cast<SimpleDateFormat
*>(fmt
);
1750 if (sdtfmt
!= NULL
) {
1751 sdtfmt
->applyPattern(style
);
1758 formattableType
= Formattable::kDouble
;
1759 fmt
= makeRBNF(URBNF_SPELLOUT
, fLocale
, style
, ec
);
1762 formattableType
= Formattable::kDouble
;
1763 fmt
= makeRBNF(URBNF_ORDINAL
, fLocale
, style
, ec
);
1766 formattableType
= Formattable::kDouble
;
1767 fmt
= makeRBNF(URBNF_DURATION
, fLocale
, style
, ec
);
1770 formattableType
= Formattable::kString
;
1771 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
1779 //-------------------------------------
1780 // Finds the string, s, in the string array, list.
1781 int32_t MessageFormat::findKeyword(const UnicodeString
& s
,
1782 const UChar
* const *list
)
1785 return 0; // default
1788 int32_t length
= s
.length();
1789 const UChar
*ps
= PatternProps::trimWhiteSpace(s
.getBuffer(), length
);
1790 UnicodeString
buffer(FALSE
, ps
, length
);
1791 // Trims the space characters and turns all characters
1792 // in s to lower case.
1794 for (int32_t i
= 0; list
[i
]; ++i
) {
1795 if (!buffer
.compare(list
[i
], u_strlen(list
[i
]))) {
1803 * Convenience method that ought to be in NumberFormat
1806 MessageFormat::createIntegerFormat(const Locale
& locale
, UErrorCode
& status
) const {
1807 NumberFormat
*temp
= NumberFormat::createInstance(locale
, status
);
1808 DecimalFormat
*temp2
;
1809 if (temp
!= NULL
&& (temp2
= dynamic_cast<DecimalFormat
*>(temp
)) != NULL
) {
1810 temp2
->setMaximumFractionDigits(0);
1811 temp2
->setDecimalSeparatorAlwaysShown(FALSE
);
1812 temp2
->setParseIntegerOnly(TRUE
);
1819 * Return the default number format. Used to format a numeric
1820 * argument when subformats[i].format is NULL. Returns NULL
1823 * Semantically const but may modify *this.
1825 const NumberFormat
* MessageFormat::getDefaultNumberFormat(UErrorCode
& ec
) const {
1826 if (defaultNumberFormat
== NULL
) {
1827 MessageFormat
* t
= (MessageFormat
*) this;
1828 t
->defaultNumberFormat
= NumberFormat::createInstance(fLocale
, ec
);
1829 if (U_FAILURE(ec
)) {
1830 delete t
->defaultNumberFormat
;
1831 t
->defaultNumberFormat
= NULL
;
1832 } else if (t
->defaultNumberFormat
== NULL
) {
1833 ec
= U_MEMORY_ALLOCATION_ERROR
;
1836 return defaultNumberFormat
;
1840 * Return the default date format. Used to format a date
1841 * argument when subformats[i].format is NULL. Returns NULL
1844 * Semantically const but may modify *this.
1846 const DateFormat
* MessageFormat::getDefaultDateFormat(UErrorCode
& ec
) const {
1847 if (defaultDateFormat
== NULL
) {
1848 MessageFormat
* t
= (MessageFormat
*) this;
1849 t
->defaultDateFormat
= DateFormat::createDateTimeInstance(DateFormat::kShort
, DateFormat::kShort
, fLocale
);
1850 if (t
->defaultDateFormat
== NULL
) {
1851 ec
= U_MEMORY_ALLOCATION_ERROR
;
1854 return defaultDateFormat
;
1858 MessageFormat::usesNamedArguments() const {
1859 return msgPattern
.hasNamedArguments();
1863 MessageFormat::getArgTypeCount() const {
1864 return argTypeCount
;
1867 UBool
MessageFormat::equalFormats(const void* left
, const void* right
) {
1868 return *(const Format
*)left
==*(const Format
*)right
;
1872 UBool
MessageFormat::DummyFormat::operator==(const Format
&) const {
1876 MessageFormat::DummyFormat
* MessageFormat::DummyFormat::clone() const {
1877 return new DummyFormat();
1880 UnicodeString
& MessageFormat::DummyFormat::format(const Formattable
&,
1881 UnicodeString
& appendTo
,
1882 UErrorCode
& status
) const {
1883 if (U_SUCCESS(status
)) {
1884 status
= U_UNSUPPORTED_ERROR
;
1889 UnicodeString
& MessageFormat::DummyFormat::format(const Formattable
&,
1890 UnicodeString
& appendTo
,
1892 UErrorCode
& status
) const {
1893 if (U_SUCCESS(status
)) {
1894 status
= U_UNSUPPORTED_ERROR
;
1899 UnicodeString
& MessageFormat::DummyFormat::format(const Formattable
&,
1900 UnicodeString
& appendTo
,
1901 FieldPositionIterator
*,
1902 UErrorCode
& status
) const {
1903 if (U_SUCCESS(status
)) {
1904 status
= U_UNSUPPORTED_ERROR
;
1909 void MessageFormat::DummyFormat::parseObject(const UnicodeString
&,
1911 ParsePosition
& ) const {
1915 FormatNameEnumeration::FormatNameEnumeration(UVector
*fNameList
, UErrorCode
& /*status*/) {
1917 fFormatNames
= fNameList
;
1920 const UnicodeString
*
1921 FormatNameEnumeration::snext(UErrorCode
& status
) {
1922 if (U_SUCCESS(status
) && pos
< fFormatNames
->size()) {
1923 return (const UnicodeString
*)fFormatNames
->elementAt(pos
++);
1929 FormatNameEnumeration::reset(UErrorCode
& /*status*/) {
1934 FormatNameEnumeration::count(UErrorCode
& /*status*/) const {
1935 return (fFormatNames
==NULL
) ? 0 : fFormatNames
->size();
1938 FormatNameEnumeration::~FormatNameEnumeration() {
1939 delete fFormatNames
;
1942 MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const MessageFormat
&mf
, UPluralType t
)
1943 : msgFormat(mf
), rules(NULL
), type(t
) {
1946 MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() {
1950 UnicodeString
MessageFormat::PluralSelectorProvider::select(void *ctx
, double number
,
1951 UErrorCode
& ec
) const {
1952 if (U_FAILURE(ec
)) {
1953 return UnicodeString(FALSE
, OTHER_STRING
, 5);
1955 MessageFormat::PluralSelectorProvider
* t
= const_cast<MessageFormat::PluralSelectorProvider
*>(this);
1957 t
->rules
= PluralRules::forLocale(msgFormat
.fLocale
, type
, ec
);
1958 if (U_FAILURE(ec
)) {
1959 return UnicodeString(FALSE
, OTHER_STRING
, 5);
1962 // Select a sub-message according to how the number is formatted,
1963 // which is specified in the selected sub-message.
1964 // We avoid this circle by looking at how
1965 // the number is formatted in the "other" sub-message
1966 // which must always be present and usually contains the number.
1967 // Message authors should be consistent across sub-messages.
1968 PluralSelectorContext
&context
= *static_cast<PluralSelectorContext
*>(ctx
);
1969 int32_t otherIndex
= msgFormat
.findOtherSubMessage(context
.startIndex
);
1970 context
.numberArgIndex
= msgFormat
.findFirstPluralNumberArg(otherIndex
, context
.argName
);
1971 if(context
.numberArgIndex
> 0 && msgFormat
.cachedFormatters
!= NULL
) {
1973 (const Format
*)uhash_iget(msgFormat
.cachedFormatters
, context
.numberArgIndex
);
1975 if(context
.formatter
== NULL
) {
1976 context
.formatter
= msgFormat
.getDefaultNumberFormat(ec
);
1977 context
.forReplaceNumber
= TRUE
;
1979 if (context
.number
.getDouble(ec
) != number
) {
1980 ec
= U_INTERNAL_PROGRAM_ERROR
;
1981 return UnicodeString(FALSE
, OTHER_STRING
, 5);
1983 context
.formatter
->format(context
.number
, context
.numberString
, ec
);
1984 auto* decFmt
= dynamic_cast<const DecimalFormat
*>(context
.formatter
);
1985 if(decFmt
!= NULL
) {
1986 number::impl::DecimalQuantity dq
;
1987 decFmt
->formatToDecimalQuantity(context
.number
, dq
, ec
);
1988 if (U_FAILURE(ec
)) {
1989 return UnicodeString(FALSE
, OTHER_STRING
, 5);
1991 return rules
->select(dq
);
1993 return rules
->select(number
);
1997 void MessageFormat::PluralSelectorProvider::reset() {
2005 #endif /* #if !UCONFIG_NO_FORMATTING */