1 /********************************************************************
3 * Copyright (c) 1997-2014, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/20/97 helena Finished first cut of implementation.
14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
15 * 06/11/97 helena Fixed addPattern to take the pattern correctly.
16 * 06/17/97 helena Fixed the getPattern to return the correct pattern.
17 * 07/09/97 helena Made ParsePosition into a class.
18 * 02/22/99 stephen Removed character literals for EBCDIC safety
19 * 11/01/09 kirtig Added SelectFormat
20 ********************************************************************/
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_FORMATTING
26 #include "unicode/appendable.h"
27 #include "unicode/choicfmt.h"
28 #include "unicode/datefmt.h"
29 #include "unicode/decimfmt.h"
30 #include "unicode/localpointer.h"
31 #include "unicode/msgfmt.h"
32 #include "unicode/plurfmt.h"
33 #include "unicode/rbnf.h"
34 #include "unicode/selfmt.h"
35 #include "unicode/smpdtfmt.h"
36 #include "unicode/umsg.h"
37 #include "unicode/ustring.h"
39 #include "patternprops.h"
40 #include "messageimpl.h"
41 #include "msgfmt_impl.h"
42 #include "plurrule_impl.h"
50 // *****************************************************************************
51 // class MessageFormat
52 // *****************************************************************************
54 #define SINGLE_QUOTE ((UChar)0x0027)
55 #define COMMA ((UChar)0x002C)
56 #define LEFT_CURLY_BRACE ((UChar)0x007B)
57 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
59 //---------------------------------------
62 static const UChar ID_NUMBER
[] = {
63 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
65 static const UChar ID_DATE
[] = {
66 0x64, 0x61, 0x74, 0x65, 0 /* "date" */
68 static const UChar ID_TIME
[] = {
69 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
71 static const UChar ID_SPELLOUT
[] = {
72 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
74 static const UChar ID_ORDINAL
[] = {
75 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
77 static const UChar ID_DURATION
[] = {
78 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
81 // MessageFormat Type List Number, Date, Time or Choice
82 static const UChar
* const TYPE_IDS
[] = {
92 static const UChar ID_EMPTY
[] = {
93 0 /* empty string, used for default so that null can mark end of list */
95 static const UChar ID_CURRENCY
[] = {
96 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
98 static const UChar ID_PERCENT
[] = {
99 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
101 static const UChar ID_INTEGER
[] = {
102 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
105 // NumberFormat modifier list, default, currency, percent or integer
106 static const UChar
* const NUMBER_STYLE_IDS
[] = {
114 static const UChar ID_SHORT
[] = {
115 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
117 static const UChar ID_MEDIUM
[] = {
118 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
120 static const UChar ID_LONG
[] = {
121 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
123 static const UChar ID_FULL
[] = {
124 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
127 // DateFormat modifier list, default, short, medium, long or full
128 static const UChar
* const DATE_STYLE_IDS
[] = {
137 static const icu::DateFormat::EStyle DATE_STYLES
[] = {
138 icu::DateFormat::kDefault
,
139 icu::DateFormat::kShort
,
140 icu::DateFormat::kMedium
,
141 icu::DateFormat::kLong
,
142 icu::DateFormat::kFull
,
145 static const int32_t DEFAULT_INITIAL_CAPACITY
= 10;
147 static const UChar NULL_STRING
[] = {
148 0x6E, 0x75, 0x6C, 0x6C, 0 // "null"
151 static const UChar OTHER_STRING
[] = {
152 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
156 static UBool U_CALLCONV
equalFormatsForHash(const UHashTok key1
,
157 const UHashTok key2
) {
158 return icu::MessageFormat::equalFormats(key1
.pointer
, key2
.pointer
);
165 // -------------------------------------
166 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat
)
167 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration
)
169 //--------------------------------------------------------------------
172 * Convert an integer value to a string and append the result to
173 * the given UnicodeString.
175 static UnicodeString
& itos(int32_t i
, UnicodeString
& appendTo
) {
177 uprv_itou(temp
,16,i
,10,0); // 10 == radix
178 appendTo
.append(temp
, -1);
183 // AppendableWrapper: encapsulates the result of formatting, keeping track
184 // of the string and its length.
185 class AppendableWrapper
: public UMemory
{
187 AppendableWrapper(Appendable
& appendable
) : app(appendable
), len(0) {
189 void append(const UnicodeString
& s
) {
190 app
.appendString(s
.getBuffer(), s
.length());
193 void append(const UChar
* s
, const int32_t sLength
) {
194 app
.appendString(s
, sLength
);
197 void append(const UnicodeString
& s
, int32_t start
, int32_t length
) {
198 append(s
.tempSubString(start
, length
));
200 void formatAndAppend(const Format
* formatter
, const Formattable
& arg
, UErrorCode
& ec
) {
202 formatter
->format(arg
, s
, ec
);
207 void formatAndAppend(const Format
* formatter
, const Formattable
& arg
,
208 const UnicodeString
&argString
, UErrorCode
& ec
) {
209 if (!argString
.isEmpty()) {
214 formatAndAppend(formatter
, arg
, ec
);
226 // -------------------------------------
227 // Creates a MessageFormat instance based on the pattern.
229 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
231 : fLocale(Locale::getDefault()), // Uses the default locale
234 formatAliasesCapacity(0),
238 hasArgTypeConflicts(FALSE
),
239 defaultNumberFormat(NULL
),
240 defaultDateFormat(NULL
),
241 cachedFormatters(NULL
),
242 customFormatArgStarts(NULL
),
243 pluralProvider(*this, UPLURAL_TYPE_CARDINAL
),
244 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL
)
246 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
247 applyPattern(pattern
, success
);
250 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
251 const Locale
& newLocale
,
253 : fLocale(newLocale
),
256 formatAliasesCapacity(0),
260 hasArgTypeConflicts(FALSE
),
261 defaultNumberFormat(NULL
),
262 defaultDateFormat(NULL
),
263 cachedFormatters(NULL
),
264 customFormatArgStarts(NULL
),
265 pluralProvider(*this, UPLURAL_TYPE_CARDINAL
),
266 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL
)
268 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
269 applyPattern(pattern
, success
);
272 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
273 const Locale
& newLocale
,
274 UParseError
& parseError
,
276 : fLocale(newLocale
),
279 formatAliasesCapacity(0),
283 hasArgTypeConflicts(FALSE
),
284 defaultNumberFormat(NULL
),
285 defaultDateFormat(NULL
),
286 cachedFormatters(NULL
),
287 customFormatArgStarts(NULL
),
288 pluralProvider(*this, UPLURAL_TYPE_CARDINAL
),
289 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL
)
291 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
292 applyPattern(pattern
, parseError
, success
);
295 MessageFormat::MessageFormat(const MessageFormat
& that
)
298 fLocale(that
.fLocale
),
299 msgPattern(that
.msgPattern
),
301 formatAliasesCapacity(0),
305 hasArgTypeConflicts(that
.hasArgTypeConflicts
),
306 defaultNumberFormat(NULL
),
307 defaultDateFormat(NULL
),
308 cachedFormatters(NULL
),
309 customFormatArgStarts(NULL
),
310 pluralProvider(*this, UPLURAL_TYPE_CARDINAL
),
311 ordinalProvider(*this, UPLURAL_TYPE_ORDINAL
)
313 // This will take care of creating the hash tables (since they are NULL).
314 UErrorCode ec
= U_ZERO_ERROR
;
315 copyObjects(that
, ec
);
321 MessageFormat::~MessageFormat()
323 uhash_close(cachedFormatters
);
324 uhash_close(customFormatArgStarts
);
327 uprv_free(formatAliases
);
328 delete defaultNumberFormat
;
329 delete defaultDateFormat
;
332 //--------------------------------------------------------------------
333 // Variable-size array management
336 * Allocate argTypes[] to at least the given capacity and return
337 * TRUE if successful. If not, leave argTypes[] unchanged.
339 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it
340 * if necessary to be at least as large as specified.
342 UBool
MessageFormat::allocateArgTypes(int32_t capacity
, UErrorCode
& status
) {
343 if (U_FAILURE(status
)) {
346 if (argTypeCapacity
>= capacity
) {
349 if (capacity
< DEFAULT_INITIAL_CAPACITY
) {
350 capacity
= DEFAULT_INITIAL_CAPACITY
;
351 } else if (capacity
< 2*argTypeCapacity
) {
352 capacity
= 2*argTypeCapacity
;
354 Formattable::Type
* a
= (Formattable::Type
*)
355 uprv_realloc(argTypes
, sizeof(*argTypes
) * capacity
);
357 status
= U_MEMORY_ALLOCATION_ERROR
;
361 argTypeCapacity
= capacity
;
365 // -------------------------------------
366 // assignment operator
369 MessageFormat::operator=(const MessageFormat
& that
)
372 // Calls the super class for assignment first.
373 Format::operator=(that
);
375 setLocale(that
.fLocale
);
376 msgPattern
= that
.msgPattern
;
377 hasArgTypeConflicts
= that
.hasArgTypeConflicts
;
379 UErrorCode ec
= U_ZERO_ERROR
;
380 copyObjects(that
, ec
);
389 MessageFormat::operator==(const Format
& rhs
) const
391 if (this == &rhs
) return TRUE
;
393 MessageFormat
& that
= (MessageFormat
&)rhs
;
395 // Check class ID before checking MessageFormat members
396 if (!Format::operator==(rhs
) ||
397 msgPattern
!= that
.msgPattern
||
398 fLocale
!= that
.fLocale
) {
402 // Compare hashtables.
403 if ((customFormatArgStarts
== NULL
) != (that
.customFormatArgStarts
== NULL
)) {
406 if (customFormatArgStarts
== NULL
) {
410 UErrorCode ec
= U_ZERO_ERROR
;
411 const int32_t count
= uhash_count(customFormatArgStarts
);
412 const int32_t rhs_count
= uhash_count(that
.customFormatArgStarts
);
413 if (count
!= rhs_count
) {
416 int32_t idx
= 0, rhs_idx
= 0, pos
= UHASH_FIRST
, rhs_pos
= UHASH_FIRST
;
417 for (; idx
< count
&& rhs_idx
< rhs_count
&& U_SUCCESS(ec
); ++idx
, ++rhs_idx
) {
418 const UHashElement
* cur
= uhash_nextElement(customFormatArgStarts
, &pos
);
419 const UHashElement
* rhs_cur
= uhash_nextElement(that
.customFormatArgStarts
, &rhs_pos
);
420 if (cur
->key
.integer
!= rhs_cur
->key
.integer
) {
423 const Format
* format
= (const Format
*)uhash_iget(cachedFormatters
, cur
->key
.integer
);
424 const Format
* rhs_format
= (const Format
*)uhash_iget(that
.cachedFormatters
, rhs_cur
->key
.integer
);
425 if (*format
!= *rhs_format
) {
432 // -------------------------------------
433 // Creates a copy of this MessageFormat, the caller owns the copy.
436 MessageFormat::clone() const
438 return new MessageFormat(*this);
441 // -------------------------------------
442 // Sets the locale of this MessageFormat object to theLocale.
445 MessageFormat::setLocale(const Locale
& theLocale
)
447 if (fLocale
!= theLocale
) {
448 delete defaultNumberFormat
;
449 defaultNumberFormat
= NULL
;
450 delete defaultDateFormat
;
451 defaultDateFormat
= NULL
;
453 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
454 pluralProvider
.reset();
455 ordinalProvider
.reset();
459 // -------------------------------------
460 // Gets the locale of this MessageFormat object.
463 MessageFormat::getLocale() const
469 MessageFormat::applyPattern(const UnicodeString
& newPattern
,
472 UParseError parseError
;
473 applyPattern(newPattern
,parseError
,status
);
477 // -------------------------------------
478 // Applies the new pattern and returns an error if the pattern
481 MessageFormat::applyPattern(const UnicodeString
& pattern
,
482 UParseError
& parseError
,
488 msgPattern
.parse(pattern
, &parseError
, ec
);
489 cacheExplicitFormats(ec
);
496 void MessageFormat::resetPattern() {
498 uhash_close(cachedFormatters
);
499 cachedFormatters
= NULL
;
500 uhash_close(customFormatArgStarts
);
501 customFormatArgStarts
= NULL
;
503 hasArgTypeConflicts
= FALSE
;
507 MessageFormat::applyPattern(const UnicodeString
& pattern
,
508 UMessagePatternApostropheMode aposMode
,
509 UParseError
* parseError
,
510 UErrorCode
& status
) {
511 if (aposMode
!= msgPattern
.getApostropheMode()) {
512 msgPattern
.clearPatternAndSetApostropheMode(aposMode
);
514 applyPattern(pattern
, *parseError
, status
);
517 // -------------------------------------
518 // Converts this MessageFormat instance to a pattern.
521 MessageFormat::toPattern(UnicodeString
& appendTo
) const {
522 if ((customFormatArgStarts
!= NULL
&& 0 != uhash_count(customFormatArgStarts
)) ||
523 0 == msgPattern
.countParts()
525 appendTo
.setToBogus();
528 return appendTo
.append(msgPattern
.getPatternString());
531 int32_t MessageFormat::nextTopLevelArgStart(int32_t partIndex
) const {
532 if (partIndex
!= 0) {
533 partIndex
= msgPattern
.getLimitPartIndex(partIndex
);
536 UMessagePatternPartType type
= msgPattern
.getPartType(++partIndex
);
537 if (type
== UMSGPAT_PART_TYPE_ARG_START
) {
540 if (type
== UMSGPAT_PART_TYPE_MSG_LIMIT
) {
546 void MessageFormat::setArgStartFormat(int32_t argStart
,
548 UErrorCode
& status
) {
549 if (U_FAILURE(status
)) {
553 if (cachedFormatters
== NULL
) {
554 cachedFormatters
=uhash_open(uhash_hashLong
, uhash_compareLong
,
555 equalFormatsForHash
, &status
);
556 if (U_FAILURE(status
)) {
560 uhash_setValueDeleter(cachedFormatters
, uprv_deleteUObject
);
562 if (formatter
== NULL
) {
563 formatter
= new DummyFormat();
565 uhash_iput(cachedFormatters
, argStart
, formatter
, &status
);
569 UBool
MessageFormat::argNameMatches(int32_t partIndex
, const UnicodeString
& argName
, int32_t argNumber
) {
570 const MessagePattern::Part
& part
= msgPattern
.getPart(partIndex
);
571 return part
.getType() == UMSGPAT_PART_TYPE_ARG_NAME
?
572 msgPattern
.partSubstringMatches(part
, argName
) :
573 part
.getValue() == argNumber
; // ARG_NUMBER
576 // Sets a custom formatter for a MessagePattern ARG_START part index.
577 // "Custom" formatters are provided by the user via setFormat() or similar APIs.
578 void MessageFormat::setCustomArgStartFormat(int32_t argStart
,
580 UErrorCode
& status
) {
581 setArgStartFormat(argStart
, formatter
, status
);
582 if (customFormatArgStarts
== NULL
) {
583 customFormatArgStarts
=uhash_open(uhash_hashLong
, uhash_compareLong
,
586 uhash_iputi(customFormatArgStarts
, argStart
, 1, &status
);
589 Format
* MessageFormat::getCachedFormatter(int32_t argumentNumber
) const {
590 if (cachedFormatters
== NULL
) {
593 void* ptr
= uhash_iget(cachedFormatters
, argumentNumber
);
594 if (ptr
!= NULL
&& dynamic_cast<DummyFormat
*>((Format
*)ptr
) == NULL
) {
595 return (Format
*) ptr
;
597 // Not cached, or a DummyFormat representing setFormat(NULL).
602 // -------------------------------------
603 // Adopts the new formats array and updates the array count.
604 // This MessageFormat instance owns the new formats.
606 MessageFormat::adoptFormats(Format
** newFormats
,
608 if (newFormats
== NULL
|| count
< 0) {
611 // Throw away any cached formatters.
612 if (cachedFormatters
!= NULL
) {
613 uhash_removeAll(cachedFormatters
);
615 if (customFormatArgStarts
!= NULL
) {
616 uhash_removeAll(customFormatArgStarts
);
619 int32_t formatNumber
= 0;
620 UErrorCode status
= U_ZERO_ERROR
;
621 for (int32_t partIndex
= 0;
622 formatNumber
< count
&& U_SUCCESS(status
) &&
623 (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
624 setCustomArgStartFormat(partIndex
, newFormats
[formatNumber
], status
);
627 // Delete those that didn't get used (if any).
628 for (; formatNumber
< count
; ++formatNumber
) {
629 delete newFormats
[formatNumber
];
634 // -------------------------------------
635 // Sets the new formats array and updates the array count.
636 // This MessageFormat instance maks a copy of the new formats.
639 MessageFormat::setFormats(const Format
** newFormats
,
641 if (newFormats
== NULL
|| count
< 0) {
644 // Throw away any cached formatters.
645 if (cachedFormatters
!= NULL
) {
646 uhash_removeAll(cachedFormatters
);
648 if (customFormatArgStarts
!= NULL
) {
649 uhash_removeAll(customFormatArgStarts
);
652 UErrorCode status
= U_ZERO_ERROR
;
653 int32_t formatNumber
= 0;
654 for (int32_t partIndex
= 0;
655 formatNumber
< count
&& U_SUCCESS(status
) && (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
656 Format
* newFormat
= NULL
;
657 if (newFormats
[formatNumber
] != NULL
) {
658 newFormat
= newFormats
[formatNumber
]->clone();
659 if (newFormat
== NULL
) {
660 status
= U_MEMORY_ALLOCATION_ERROR
;
663 setCustomArgStartFormat(partIndex
, newFormat
, status
);
666 if (U_FAILURE(status
)) {
671 // -------------------------------------
672 // Adopt a single format by format number.
673 // Do nothing if the format number is not less than the array count.
676 MessageFormat::adoptFormat(int32_t n
, Format
*newFormat
) {
677 LocalPointer
<Format
> p(newFormat
);
679 int32_t formatNumber
= 0;
680 for (int32_t partIndex
= 0; (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
681 if (n
== formatNumber
) {
682 UErrorCode status
= U_ZERO_ERROR
;
683 setCustomArgStartFormat(partIndex
, p
.orphan(), status
);
691 // -------------------------------------
692 // Adopt a single format by format name.
693 // Do nothing if there is no match of formatName.
695 MessageFormat::adoptFormat(const UnicodeString
& formatName
,
696 Format
* formatToAdopt
,
697 UErrorCode
& status
) {
698 LocalPointer
<Format
> p(formatToAdopt
);
699 if (U_FAILURE(status
)) {
702 int32_t argNumber
= MessagePattern::validateArgumentName(formatName
);
703 if (argNumber
< UMSGPAT_ARG_NAME_NOT_NUMBER
) {
704 status
= U_ILLEGAL_ARGUMENT_ERROR
;
707 for (int32_t partIndex
= 0;
708 (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0 && U_SUCCESS(status
);
710 if (argNameMatches(partIndex
+ 1, formatName
, argNumber
)) {
714 } else if (formatToAdopt
== NULL
) {
717 f
= formatToAdopt
->clone();
719 status
= U_MEMORY_ALLOCATION_ERROR
;
723 setCustomArgStartFormat(partIndex
, f
, status
);
728 // -------------------------------------
729 // Set a single format.
730 // Do nothing if the variable is not less than the array count.
732 MessageFormat::setFormat(int32_t n
, const Format
& newFormat
) {
735 int32_t formatNumber
= 0;
736 for (int32_t partIndex
= 0;
737 (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
738 if (n
== formatNumber
) {
739 Format
* new_format
= newFormat
.clone();
741 UErrorCode status
= U_ZERO_ERROR
;
742 setCustomArgStartFormat(partIndex
, new_format
, status
);
751 // -------------------------------------
752 // Get a single format by format name.
753 // Do nothing if the variable is not less than the array count.
755 MessageFormat::getFormat(const UnicodeString
& formatName
, UErrorCode
& status
) {
756 if (U_FAILURE(status
) || cachedFormatters
== NULL
) return NULL
;
758 int32_t argNumber
= MessagePattern::validateArgumentName(formatName
);
759 if (argNumber
< UMSGPAT_ARG_NAME_NOT_NUMBER
) {
760 status
= U_ILLEGAL_ARGUMENT_ERROR
;
763 for (int32_t partIndex
= 0; (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
764 if (argNameMatches(partIndex
+ 1, formatName
, argNumber
)) {
765 return getCachedFormatter(partIndex
);
771 // -------------------------------------
772 // Set a single format by format name
773 // Do nothing if the variable is not less than the array count.
775 MessageFormat::setFormat(const UnicodeString
& formatName
,
776 const Format
& newFormat
,
777 UErrorCode
& status
) {
778 if (U_FAILURE(status
)) return;
780 int32_t argNumber
= MessagePattern::validateArgumentName(formatName
);
781 if (argNumber
< UMSGPAT_ARG_NAME_NOT_NUMBER
) {
782 status
= U_ILLEGAL_ARGUMENT_ERROR
;
785 for (int32_t partIndex
= 0;
786 (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0 && U_SUCCESS(status
);
788 if (argNameMatches(partIndex
+ 1, formatName
, argNumber
)) {
789 Format
* new_format
= newFormat
.clone();
790 if (new_format
== NULL
) {
791 status
= U_MEMORY_ALLOCATION_ERROR
;
794 setCustomArgStartFormat(partIndex
, new_format
, status
);
799 // -------------------------------------
800 // Gets the format array.
802 MessageFormat::getFormats(int32_t& cnt
) const
804 // This old API returns an array (which we hold) of Format*
805 // pointers. The array is valid up to the next call to any
806 // method on this object. We construct and resize an array
807 // on demand that contains aliases to the subformats[i].format
809 MessageFormat
* t
= const_cast<MessageFormat
*> (this);
811 if (formatAliases
== NULL
) {
812 t
->formatAliasesCapacity
= (argTypeCount
<10) ? 10 : argTypeCount
;
813 Format
** a
= (Format
**)
814 uprv_malloc(sizeof(Format
*) * formatAliasesCapacity
);
816 t
->formatAliasesCapacity
= 0;
819 t
->formatAliases
= a
;
820 } else if (argTypeCount
> formatAliasesCapacity
) {
821 Format
** a
= (Format
**)
822 uprv_realloc(formatAliases
, sizeof(Format
*) * argTypeCount
);
824 t
->formatAliasesCapacity
= 0;
827 t
->formatAliases
= a
;
828 t
->formatAliasesCapacity
= argTypeCount
;
831 for (int32_t partIndex
= 0; (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
832 t
->formatAliases
[cnt
++] = getCachedFormatter(partIndex
);
835 return (const Format
**)formatAliases
;
839 UnicodeString
MessageFormat::getArgName(int32_t partIndex
) {
840 const MessagePattern::Part
& part
= msgPattern
.getPart(partIndex
);
841 return msgPattern
.getSubstring(part
);
845 MessageFormat::getFormatNames(UErrorCode
& status
) {
846 if (U_FAILURE(status
)) return NULL
;
848 UVector
*fFormatNames
= new UVector(status
);
849 if (U_FAILURE(status
)) {
850 status
= U_MEMORY_ALLOCATION_ERROR
;
853 fFormatNames
->setDeleter(uprv_deleteUObject
);
855 for (int32_t partIndex
= 0; (partIndex
= nextTopLevelArgStart(partIndex
)) >= 0;) {
856 fFormatNames
->addElement(new UnicodeString(getArgName(partIndex
+ 1)), status
);
859 StringEnumeration
* nameEnumerator
= new FormatNameEnumeration(fFormatNames
, status
);
860 return nameEnumerator
;
863 // -------------------------------------
864 // Formats the source Formattable array and copy into the result buffer.
865 // Ignore the FieldPosition result for error checking.
868 MessageFormat::format(const Formattable
* source
,
870 UnicodeString
& appendTo
,
871 FieldPosition
& ignore
,
872 UErrorCode
& success
) const
874 return format(source
, NULL
, cnt
, appendTo
, &ignore
, success
);
877 // -------------------------------------
878 // Internally creates a MessageFormat instance based on the
879 // pattern and formats the arguments Formattable array and
880 // copy into the appendTo buffer.
883 MessageFormat::format( const UnicodeString
& pattern
,
884 const Formattable
* arguments
,
886 UnicodeString
& appendTo
,
889 MessageFormat
temp(pattern
, success
);
890 return temp
.format(arguments
, NULL
, cnt
, appendTo
, NULL
, success
);
893 // -------------------------------------
894 // Formats the source Formattable object and copy into the
895 // appendTo buffer. The Formattable object must be an array
896 // of Formattable instances, returns error otherwise.
899 MessageFormat::format(const Formattable
& source
,
900 UnicodeString
& appendTo
,
901 FieldPosition
& ignore
,
902 UErrorCode
& success
) const
904 if (U_FAILURE(success
))
906 if (source
.getType() != Formattable::kArray
) {
907 success
= U_ILLEGAL_ARGUMENT_ERROR
;
911 const Formattable
* tmpPtr
= source
.getArray(cnt
);
912 return format(tmpPtr
, NULL
, cnt
, appendTo
, &ignore
, success
);
916 MessageFormat::format(const UnicodeString
* argumentNames
,
917 const Formattable
* arguments
,
919 UnicodeString
& appendTo
,
920 UErrorCode
& success
) const {
921 return format(arguments
, argumentNames
, count
, appendTo
, NULL
, success
);
924 // Does linear search to find the match for an ArgName.
925 const Formattable
* MessageFormat::getArgFromListByName(const Formattable
* arguments
,
926 const UnicodeString
*argumentNames
,
927 int32_t cnt
, UnicodeString
& name
) const {
928 for (int32_t i
= 0; i
< cnt
; ++i
) {
929 if (0 == argumentNames
[i
].compare(name
)) {
930 return arguments
+ i
;
938 MessageFormat::format(const Formattable
* arguments
,
939 const UnicodeString
*argumentNames
,
941 UnicodeString
& appendTo
,
943 UErrorCode
& status
) const {
944 if (U_FAILURE(status
)) {
948 UnicodeStringAppendable
usapp(appendTo
);
949 AppendableWrapper
app(usapp
);
950 format(0, NULL
, arguments
, argumentNames
, cnt
, app
, pos
, status
);
957 * Mutable input/output values for the PluralSelectorProvider.
958 * Separate so that it is possible to make MessageFormat Freezable.
960 class PluralSelectorContext
{
962 PluralSelectorContext(int32_t start
, const UnicodeString
&name
,
963 const Formattable
&num
, double off
, UErrorCode
&errorCode
)
964 : startIndex(start
), argName(name
), offset(off
),
965 numberArgIndex(-1), formatter(NULL
), forReplaceNumber(FALSE
) {
966 // number needs to be set even when select() is not called.
967 // Keep it as a Number/Formattable:
968 // For format() methods, and to preserve information (e.g., BigDecimal).
972 number
= num
.getDouble(errorCode
) - off
;
976 // Input values for plural selection with decimals.
978 const UnicodeString
&argName
;
979 /** argument number - plural offset */
982 // Output values for plural selection with decimals.
983 /** -1 if REPLACE_NUMBER, 0 arg not found, >0 ARG_START index */
984 int32_t numberArgIndex
;
985 const Format
*formatter
;
986 /** formatted argument number - plural offset */
987 UnicodeString numberString
;
988 /** TRUE if number-offset was formatted with the stock number formatter */
989 UBool forReplaceNumber
;
994 // if argumentNames is NULL, this means arguments is a numeric array.
995 // arguments can not be NULL.
996 // We use const void *plNumber rather than const PluralSelectorContext *pluralNumber
997 // so that we need not declare the PluralSelectorContext in the public header file.
998 void MessageFormat::format(int32_t msgStart
, const void *plNumber
,
999 const Formattable
* arguments
,
1000 const UnicodeString
*argumentNames
,
1002 AppendableWrapper
& appendTo
,
1003 FieldPosition
* ignore
,
1004 UErrorCode
& success
) const {
1005 if (U_FAILURE(success
)) {
1009 const UnicodeString
& msgString
= msgPattern
.getPatternString();
1010 int32_t prevIndex
= msgPattern
.getPart(msgStart
).getLimit();
1011 for (int32_t i
= msgStart
+ 1; U_SUCCESS(success
) ; ++i
) {
1012 const MessagePattern::Part
* part
= &msgPattern
.getPart(i
);
1013 const UMessagePatternPartType type
= part
->getType();
1014 int32_t index
= part
->getIndex();
1015 appendTo
.append(msgString
, prevIndex
, index
- prevIndex
);
1016 if (type
== UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1019 prevIndex
= part
->getLimit();
1020 if (type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) {
1021 const PluralSelectorContext
&pluralNumber
=
1022 *static_cast<const PluralSelectorContext
*>(plNumber
);
1023 if(pluralNumber
.forReplaceNumber
) {
1024 // number-offset was already formatted.
1025 appendTo
.formatAndAppend(pluralNumber
.formatter
,
1026 pluralNumber
.number
, pluralNumber
.numberString
, success
);
1028 const NumberFormat
* nf
= getDefaultNumberFormat(success
);
1029 appendTo
.formatAndAppend(nf
, pluralNumber
.number
, success
);
1033 if (type
!= UMSGPAT_PART_TYPE_ARG_START
) {
1036 int32_t argLimit
= msgPattern
.getLimitPartIndex(i
);
1037 UMessagePatternArgType argType
= part
->getArgType();
1038 part
= &msgPattern
.getPart(++i
);
1039 const Formattable
* arg
;
1040 UBool noArg
= FALSE
;
1041 UnicodeString argName
= msgPattern
.getSubstring(*part
);
1042 if (argumentNames
== NULL
) {
1043 int32_t argNumber
= part
->getValue(); // ARG_NUMBER
1044 if (0 <= argNumber
&& argNumber
< cnt
) {
1045 arg
= arguments
+ argNumber
;
1051 arg
= getArgFromListByName(arguments
, argumentNames
, cnt
, argName
);
1057 int32_t prevDestLength
= appendTo
.length();
1058 const Format
* formatter
= NULL
;
1061 UnicodeString(LEFT_CURLY_BRACE
).append(argName
).append(RIGHT_CURLY_BRACE
));
1062 } else if (arg
== NULL
) {
1063 appendTo
.append(NULL_STRING
, 4);
1064 } else if(plNumber
!=NULL
&&
1065 static_cast<const PluralSelectorContext
*>(plNumber
)->numberArgIndex
==(i
-2)) {
1066 const PluralSelectorContext
&pluralNumber
=
1067 *static_cast<const PluralSelectorContext
*>(plNumber
);
1068 if(pluralNumber
.offset
== 0) {
1069 // The number was already formatted with this formatter.
1070 appendTo
.formatAndAppend(pluralNumber
.formatter
, pluralNumber
.number
,
1071 pluralNumber
.numberString
, success
);
1073 // Do not use the formatted (number-offset) string for a named argument
1074 // that formats the number without subtracting the offset.
1075 appendTo
.formatAndAppend(pluralNumber
.formatter
, *arg
, success
);
1077 } else if ((formatter
= getCachedFormatter(i
-2))) {
1078 // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings.
1079 if (dynamic_cast<const ChoiceFormat
*>(formatter
) ||
1080 dynamic_cast<const PluralFormat
*>(formatter
) ||
1081 dynamic_cast<const SelectFormat
*>(formatter
)) {
1082 // We only handle nested formats here if they were provided via
1083 // setFormat() or its siblings. Otherwise they are not cached and instead
1084 // handled below according to argType.
1085 UnicodeString subMsgString
;
1086 formatter
->format(*arg
, subMsgString
, success
);
1087 if (subMsgString
.indexOf(LEFT_CURLY_BRACE
) >= 0 ||
1088 (subMsgString
.indexOf(SINGLE_QUOTE
) >= 0 && !MessageImpl::jdkAposMode(msgPattern
))
1090 MessageFormat
subMsgFormat(subMsgString
, fLocale
, success
);
1091 subMsgFormat
.format(0, NULL
, arguments
, argumentNames
, cnt
, appendTo
, ignore
, success
);
1093 appendTo
.append(subMsgString
);
1096 appendTo
.formatAndAppend(formatter
, *arg
, success
);
1098 } else if (argType
== UMSGPAT_ARG_TYPE_NONE
|| (cachedFormatters
&& uhash_iget(cachedFormatters
, i
- 2))) {
1099 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
1100 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
1101 // for the hash table containind DummyFormat.
1102 if (arg
->isNumeric()) {
1103 const NumberFormat
* nf
= getDefaultNumberFormat(success
);
1104 appendTo
.formatAndAppend(nf
, *arg
, success
);
1105 } else if (arg
->getType() == Formattable::kDate
) {
1106 const DateFormat
* df
= getDefaultDateFormat(success
);
1107 appendTo
.formatAndAppend(df
, *arg
, success
);
1109 appendTo
.append(arg
->getString(success
));
1111 } else if (argType
== UMSGPAT_ARG_TYPE_CHOICE
) {
1112 if (!arg
->isNumeric()) {
1113 success
= U_ILLEGAL_ARGUMENT_ERROR
;
1116 // We must use the Formattable::getDouble() variant with the UErrorCode parameter
1117 // because only this one converts non-double numeric types to double.
1118 const double number
= arg
->getDouble(success
);
1119 int32_t subMsgStart
= ChoiceFormat::findSubMessage(msgPattern
, i
, number
);
1120 formatComplexSubMessage(subMsgStart
, NULL
, arguments
, argumentNames
,
1121 cnt
, appendTo
, success
);
1122 } else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType
)) {
1123 if (!arg
->isNumeric()) {
1124 success
= U_ILLEGAL_ARGUMENT_ERROR
;
1127 const PluralSelectorProvider
&selector
=
1128 argType
== UMSGPAT_ARG_TYPE_PLURAL
? pluralProvider
: ordinalProvider
;
1129 // We must use the Formattable::getDouble() variant with the UErrorCode parameter
1130 // because only this one converts non-double numeric types to double.
1131 double offset
= msgPattern
.getPluralOffset(i
);
1132 PluralSelectorContext
context(i
, argName
, *arg
, offset
, success
);
1133 int32_t subMsgStart
= PluralFormat::findSubMessage(
1134 msgPattern
, i
, selector
, &context
, arg
->getDouble(success
), success
);
1135 formatComplexSubMessage(subMsgStart
, &context
, arguments
, argumentNames
,
1136 cnt
, appendTo
, success
);
1137 } else if (argType
== UMSGPAT_ARG_TYPE_SELECT
) {
1138 int32_t subMsgStart
= SelectFormat::findSubMessage(msgPattern
, i
, arg
->getString(success
), success
);
1139 formatComplexSubMessage(subMsgStart
, NULL
, arguments
, argumentNames
,
1140 cnt
, appendTo
, success
);
1142 // This should never happen.
1143 success
= U_INTERNAL_PROGRAM_ERROR
;
1146 ignore
= updateMetaData(appendTo
, prevDestLength
, ignore
, arg
);
1147 prevIndex
= msgPattern
.getPart(argLimit
).getLimit();
1153 void MessageFormat::formatComplexSubMessage(int32_t msgStart
,
1154 const void *plNumber
,
1155 const Formattable
* arguments
,
1156 const UnicodeString
*argumentNames
,
1158 AppendableWrapper
& appendTo
,
1159 UErrorCode
& success
) const {
1160 if (U_FAILURE(success
)) {
1164 if (!MessageImpl::jdkAposMode(msgPattern
)) {
1165 format(msgStart
, plNumber
, arguments
, argumentNames
, cnt
, appendTo
, NULL
, success
);
1169 // JDK compatibility mode: (see JDK MessageFormat.format() API docs)
1170 // - remove SKIP_SYNTAX; that is, remove half of the apostrophes
1171 // - if the result string contains an open curly brace '{' then
1172 // instantiate a temporary MessageFormat object and format again;
1173 // otherwise just append the result string
1174 const UnicodeString
& msgString
= msgPattern
.getPatternString();
1176 int32_t prevIndex
= msgPattern
.getPart(msgStart
).getLimit();
1177 for (int32_t i
= msgStart
;;) {
1178 const MessagePattern::Part
& part
= msgPattern
.getPart(++i
);
1179 const UMessagePatternPartType type
= part
.getType();
1180 int32_t index
= part
.getIndex();
1181 if (type
== UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1182 sb
.append(msgString
, prevIndex
, index
- prevIndex
);
1184 } else if (type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
|| type
== UMSGPAT_PART_TYPE_SKIP_SYNTAX
) {
1185 sb
.append(msgString
, prevIndex
, index
- prevIndex
);
1186 if (type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) {
1187 const PluralSelectorContext
&pluralNumber
=
1188 *static_cast<const PluralSelectorContext
*>(plNumber
);
1189 if(pluralNumber
.forReplaceNumber
) {
1190 // number-offset was already formatted.
1191 sb
.append(pluralNumber
.numberString
);
1193 const NumberFormat
* nf
= getDefaultNumberFormat(success
);
1194 sb
.append(nf
->format(pluralNumber
.number
, sb
, success
));
1197 prevIndex
= part
.getLimit();
1198 } else if (type
== UMSGPAT_PART_TYPE_ARG_START
) {
1199 sb
.append(msgString
, prevIndex
, index
- prevIndex
);
1201 i
= msgPattern
.getLimitPartIndex(i
);
1202 index
= msgPattern
.getPart(i
).getLimit();
1203 MessageImpl::appendReducedApostrophes(msgString
, prevIndex
, index
, sb
);
1207 if (sb
.indexOf(LEFT_CURLY_BRACE
) >= 0) {
1208 UnicodeString emptyPattern
; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter.
1209 MessageFormat
subMsgFormat(emptyPattern
, fLocale
, success
);
1210 subMsgFormat
.applyPattern(sb
, UMSGPAT_APOS_DOUBLE_REQUIRED
, NULL
, success
);
1211 subMsgFormat
.format(0, NULL
, arguments
, argumentNames
, cnt
, appendTo
, NULL
, success
);
1213 appendTo
.append(sb
);
1218 UnicodeString
MessageFormat::getLiteralStringUntilNextArgument(int32_t from
) const {
1219 const UnicodeString
& msgString
=msgPattern
.getPatternString();
1220 int32_t prevIndex
=msgPattern
.getPart(from
).getLimit();
1222 for (int32_t i
= from
+ 1; ; ++i
) {
1223 const MessagePattern::Part
& part
= msgPattern
.getPart(i
);
1224 const UMessagePatternPartType type
=part
.getType();
1225 int32_t index
=part
.getIndex();
1226 b
.append(msgString
, prevIndex
, index
- prevIndex
);
1227 if(type
==UMSGPAT_PART_TYPE_ARG_START
|| type
==UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1230 // Unexpected Part "part" in parsed message.
1231 U_ASSERT(type
==UMSGPAT_PART_TYPE_SKIP_SYNTAX
|| type
==UMSGPAT_PART_TYPE_INSERT_CHAR
);
1232 prevIndex
=part
.getLimit();
1237 FieldPosition
* MessageFormat::updateMetaData(AppendableWrapper
& /*dest*/, int32_t /*prevLength*/,
1238 FieldPosition
* /*fp*/, const Formattable
* /*argId*/) const {
1239 // Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing.
1242 if (fp != NULL && Field.ARGUMENT.equals(fp.getFieldAttribute())) {
1243 fp->setBeginIndex(prevLength);
1244 fp->setEndIndex(dest.get_length());
1252 MessageFormat::findOtherSubMessage(int32_t partIndex
) const {
1253 int32_t count
=msgPattern
.countParts();
1254 const MessagePattern::Part
*part
= &msgPattern
.getPart(partIndex
);
1255 if(MessagePattern::Part::hasNumericValue(part
->getType())) {
1258 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
1259 // until ARG_LIMIT or end of plural-only pattern.
1260 UnicodeString
other(FALSE
, OTHER_STRING
, 5);
1262 part
=&msgPattern
.getPart(partIndex
++);
1263 UMessagePatternPartType type
=part
->getType();
1264 if(type
==UMSGPAT_PART_TYPE_ARG_LIMIT
) {
1267 U_ASSERT(type
==UMSGPAT_PART_TYPE_ARG_SELECTOR
);
1268 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
1269 if(msgPattern
.partSubstringMatches(*part
, other
)) {
1272 if(MessagePattern::Part::hasNumericValue(msgPattern
.getPartType(partIndex
))) {
1273 ++partIndex
; // skip the numeric-value part of "=1" etc.
1275 partIndex
=msgPattern
.getLimitPartIndex(partIndex
);
1276 } while(++partIndex
<count
);
1281 MessageFormat::findFirstPluralNumberArg(int32_t msgStart
, const UnicodeString
&argName
) const {
1282 for(int32_t i
=msgStart
+1;; ++i
) {
1283 const MessagePattern::Part
&part
=msgPattern
.getPart(i
);
1284 UMessagePatternPartType type
=part
.getType();
1285 if(type
==UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1288 if(type
==UMSGPAT_PART_TYPE_REPLACE_NUMBER
) {
1291 if(type
==UMSGPAT_PART_TYPE_ARG_START
) {
1292 UMessagePatternArgType argType
=part
.getArgType();
1293 if(!argName
.isEmpty() && (argType
==UMSGPAT_ARG_TYPE_NONE
|| argType
==UMSGPAT_ARG_TYPE_SIMPLE
)) {
1294 // ARG_NUMBER or ARG_NAME
1295 if(msgPattern
.partSubstringMatches(msgPattern
.getPart(i
+1), argName
)) {
1299 i
=msgPattern
.getLimitPartIndex(i
);
1304 void MessageFormat::copyObjects(const MessageFormat
& that
, UErrorCode
& ec
) {
1305 // Deep copy pointer fields.
1306 // We need not copy the formatAliases because they are re-filled
1307 // in each getFormats() call.
1308 // The defaultNumberFormat, defaultDateFormat and pluralProvider.rules
1309 // also get created on demand.
1310 argTypeCount
= that
.argTypeCount
;
1311 if (argTypeCount
> 0) {
1312 if (!allocateArgTypes(argTypeCount
, ec
)) {
1315 uprv_memcpy(argTypes
, that
.argTypes
, argTypeCount
* sizeof(argTypes
[0]));
1317 if (cachedFormatters
!= NULL
) {
1318 uhash_removeAll(cachedFormatters
);
1320 if (customFormatArgStarts
!= NULL
) {
1321 uhash_removeAll(customFormatArgStarts
);
1323 if (that
.cachedFormatters
) {
1324 if (cachedFormatters
== NULL
) {
1325 cachedFormatters
=uhash_open(uhash_hashLong
, uhash_compareLong
,
1326 equalFormatsForHash
, &ec
);
1327 if (U_FAILURE(ec
)) {
1330 uhash_setValueDeleter(cachedFormatters
, uprv_deleteUObject
);
1333 const int32_t count
= uhash_count(that
.cachedFormatters
);
1335 for (idx
= 0, pos
= UHASH_FIRST
; idx
< count
&& U_SUCCESS(ec
); ++idx
) {
1336 const UHashElement
* cur
= uhash_nextElement(that
.cachedFormatters
, &pos
);
1337 Format
* newFormat
= ((Format
*)(cur
->value
.pointer
))->clone();
1339 uhash_iput(cachedFormatters
, cur
->key
.integer
, newFormat
, &ec
);
1341 ec
= U_MEMORY_ALLOCATION_ERROR
;
1346 if (that
.customFormatArgStarts
) {
1347 if (customFormatArgStarts
== NULL
) {
1348 customFormatArgStarts
=uhash_open(uhash_hashLong
, uhash_compareLong
,
1351 const int32_t count
= uhash_count(that
.customFormatArgStarts
);
1353 for (idx
= 0, pos
= UHASH_FIRST
; idx
< count
&& U_SUCCESS(ec
); ++idx
) {
1354 const UHashElement
* cur
= uhash_nextElement(that
.customFormatArgStarts
, &pos
);
1355 uhash_iputi(customFormatArgStarts
, cur
->key
.integer
, cur
->value
.integer
, &ec
);
1362 MessageFormat::parse(int32_t msgStart
,
1363 const UnicodeString
& source
,
1366 UErrorCode
& ec
) const {
1368 if (U_FAILURE(ec
)) {
1369 pos
.setErrorIndex(pos
.getIndex());
1372 // parse() does not work with named arguments.
1373 if (msgPattern
.hasNamedArguments()) {
1374 ec
= U_ARGUMENT_TYPE_MISMATCH
;
1375 pos
.setErrorIndex(pos
.getIndex());
1378 LocalArray
<Formattable
> resultArray(new Formattable
[argTypeCount
? argTypeCount
: 1]);
1379 const UnicodeString
& msgString
=msgPattern
.getPatternString();
1380 int32_t prevIndex
=msgPattern
.getPart(msgStart
).getLimit();
1381 int32_t sourceOffset
= pos
.getIndex();
1382 ParsePosition
tempStatus(0);
1384 for(int32_t i
=msgStart
+1; ; ++i
) {
1385 UBool haveArgResult
= FALSE
;
1386 const MessagePattern::Part
* part
=&msgPattern
.getPart(i
);
1387 const UMessagePatternPartType type
=part
->getType();
1388 int32_t index
=part
->getIndex();
1389 // Make sure the literal string matches.
1390 int32_t len
= index
- prevIndex
;
1391 if (len
== 0 || (0 == msgString
.compare(prevIndex
, len
, source
, sourceOffset
, len
))) {
1392 sourceOffset
+= len
;
1395 pos
.setErrorIndex(sourceOffset
);
1396 return NULL
; // leave index as is to signal error
1398 if(type
==UMSGPAT_PART_TYPE_MSG_LIMIT
) {
1399 // Things went well! Done.
1400 pos
.setIndex(sourceOffset
);
1401 return resultArray
.orphan();
1403 if(type
==UMSGPAT_PART_TYPE_SKIP_SYNTAX
|| type
==UMSGPAT_PART_TYPE_INSERT_CHAR
) {
1404 prevIndex
=part
->getLimit();
1407 // We do not support parsing Plural formats. (No REPLACE_NUMBER here.)
1408 // Unexpected Part "part" in parsed message.
1409 U_ASSERT(type
==UMSGPAT_PART_TYPE_ARG_START
);
1410 int32_t argLimit
=msgPattern
.getLimitPartIndex(i
);
1412 UMessagePatternArgType argType
=part
->getArgType();
1413 part
=&msgPattern
.getPart(++i
);
1414 int32_t argNumber
= part
->getValue(); // ARG_NUMBER
1417 const Format
* formatter
= NULL
;
1418 Formattable
& argResult
= resultArray
[argNumber
];
1420 if(cachedFormatters
!=NULL
&& (formatter
= getCachedFormatter(i
- 2))!=NULL
) {
1421 // Just parse using the formatter.
1422 tempStatus
.setIndex(sourceOffset
);
1423 formatter
->parseObject(source
, argResult
, tempStatus
);
1424 if (tempStatus
.getIndex() == sourceOffset
) {
1425 pos
.setErrorIndex(sourceOffset
);
1426 return NULL
; // leave index as is to signal error
1428 sourceOffset
= tempStatus
.getIndex();
1429 haveArgResult
= TRUE
;
1431 argType
==UMSGPAT_ARG_TYPE_NONE
|| (cachedFormatters
&& uhash_iget(cachedFormatters
, i
-2))) {
1432 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
1433 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
1434 // for the hash table containind DummyFormat.
1436 // Match as a string.
1437 // if at end, use longest possible match
1438 // otherwise uses first match to intervening string
1439 // does NOT recursively try all possibilities
1440 UnicodeString stringAfterArgument
= getLiteralStringUntilNextArgument(argLimit
);
1442 if (!stringAfterArgument
.isEmpty()) {
1443 next
= source
.indexOf(stringAfterArgument
, sourceOffset
);
1445 next
= source
.length();
1448 pos
.setErrorIndex(sourceOffset
);
1449 return NULL
; // leave index as is to signal error
1451 UnicodeString
strValue(source
.tempSubString(sourceOffset
, next
- sourceOffset
));
1452 UnicodeString compValue
;
1453 compValue
.append(LEFT_CURLY_BRACE
);
1454 itos(argNumber
, compValue
);
1455 compValue
.append(RIGHT_CURLY_BRACE
);
1456 if (0 != strValue
.compare(compValue
)) {
1457 argResult
.setString(strValue
);
1458 haveArgResult
= TRUE
;
1460 sourceOffset
= next
;
1462 } else if(argType
==UMSGPAT_ARG_TYPE_CHOICE
) {
1463 tempStatus
.setIndex(sourceOffset
);
1464 double choiceResult
= ChoiceFormat::parseArgument(msgPattern
, i
, source
, tempStatus
);
1465 if (tempStatus
.getIndex() == sourceOffset
) {
1466 pos
.setErrorIndex(sourceOffset
);
1467 return NULL
; // leave index as is to signal error
1469 argResult
.setDouble(choiceResult
);
1470 haveArgResult
= TRUE
;
1471 sourceOffset
= tempStatus
.getIndex();
1472 } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType
) || argType
==UMSGPAT_ARG_TYPE_SELECT
) {
1473 // Parsing not supported.
1474 ec
= U_UNSUPPORTED_ERROR
;
1477 // This should never happen.
1478 ec
= U_INTERNAL_PROGRAM_ERROR
;
1481 if (haveArgResult
&& count
<= argNumber
) {
1482 count
= argNumber
+ 1;
1484 prevIndex
=msgPattern
.getPart(argLimit
).getLimit();
1488 // -------------------------------------
1489 // Parses the source pattern and returns the Formattable objects array,
1490 // the array count and the ending parse position. The caller of this method
1494 MessageFormat::parse(const UnicodeString
& source
,
1496 int32_t& count
) const {
1497 UErrorCode ec
= U_ZERO_ERROR
;
1498 return parse(0, source
, pos
, count
, ec
);
1501 // -------------------------------------
1502 // Parses the source string and returns the array of
1503 // Formattable objects and the array count. The caller
1504 // owns the returned array.
1507 MessageFormat::parse(const UnicodeString
& source
,
1509 UErrorCode
& success
) const
1511 if (msgPattern
.hasNamedArguments()) {
1512 success
= U_ARGUMENT_TYPE_MISMATCH
;
1515 ParsePosition
status(0);
1516 // Calls the actual implementation method and starts
1517 // from zero offset of the source text.
1518 Formattable
* result
= parse(source
, status
, cnt
);
1519 if (status
.getIndex() == 0) {
1520 success
= U_MESSAGE_PARSE_ERROR
;
1527 // -------------------------------------
1528 // Parses the source text and copy into the result buffer.
1531 MessageFormat::parseObject( const UnicodeString
& source
,
1532 Formattable
& result
,
1533 ParsePosition
& status
) const
1536 Formattable
* tmpResult
= parse(source
, status
, cnt
);
1537 if (tmpResult
!= NULL
)
1538 result
.adoptArray(tmpResult
, cnt
);
1542 MessageFormat::autoQuoteApostrophe(const UnicodeString
& pattern
, UErrorCode
& status
) {
1543 UnicodeString result
;
1544 if (U_SUCCESS(status
)) {
1545 int32_t plen
= pattern
.length();
1546 const UChar
* pat
= pattern
.getBuffer();
1547 int32_t blen
= plen
* 2 + 1; // space for null termination, convenience
1548 UChar
* buf
= result
.getBuffer(blen
);
1550 status
= U_MEMORY_ALLOCATION_ERROR
;
1552 int32_t len
= umsg_autoQuoteApostrophe(pat
, plen
, buf
, blen
, &status
);
1553 result
.releaseBuffer(U_SUCCESS(status
) ? len
: 0);
1556 if (U_FAILURE(status
)) {
1557 result
.setToBogus();
1562 // -------------------------------------
1564 static Format
* makeRBNF(URBNFRuleSetTag tag
, const Locale
& locale
, const UnicodeString
& defaultRuleSet
, UErrorCode
& ec
) {
1565 RuleBasedNumberFormat
* fmt
= new RuleBasedNumberFormat(tag
, locale
, ec
);
1567 ec
= U_MEMORY_ALLOCATION_ERROR
;
1568 } else if (U_SUCCESS(ec
) && defaultRuleSet
.length() > 0) {
1569 UErrorCode localStatus
= U_ZERO_ERROR
; // ignore unrecognized default rule set
1570 fmt
->setDefaultRuleSet(defaultRuleSet
, localStatus
);
1575 void MessageFormat::cacheExplicitFormats(UErrorCode
& status
) {
1576 if (U_FAILURE(status
)) {
1580 if (cachedFormatters
!= NULL
) {
1581 uhash_removeAll(cachedFormatters
);
1583 if (customFormatArgStarts
!= NULL
) {
1584 uhash_removeAll(customFormatArgStarts
);
1587 // The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT
1588 // which we need not examine.
1589 int32_t limit
= msgPattern
.countParts() - 2;
1591 // We also need not look at the first two "parts"
1592 // (at most MSG_START and ARG_START) in this loop.
1593 // We determine the argTypeCount first so that we can allocateArgTypes
1594 // so that the next loop can set argTypes[argNumber].
1595 // (This is for the C API which needs the argTypes to read its va_arg list.)
1596 for (int32_t i
= 2; i
< limit
&& U_SUCCESS(status
); ++i
) {
1597 const MessagePattern::Part
& part
= msgPattern
.getPart(i
);
1598 if (part
.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER
) {
1599 const int argNumber
= part
.getValue();
1600 if (argNumber
>= argTypeCount
) {
1601 argTypeCount
= argNumber
+ 1;
1605 if (!allocateArgTypes(argTypeCount
, status
)) {
1608 // Set all argTypes to kObject, as a "none" value, for lack of any better value.
1609 // We never use kObject for real arguments.
1610 // We use it as "no argument yet" for the check for hasArgTypeConflicts.
1611 for (int32_t i
= 0; i
< argTypeCount
; ++i
) {
1612 argTypes
[i
] = Formattable::kObject
;
1614 hasArgTypeConflicts
= FALSE
;
1616 // This loop starts at part index 1 because we do need to examine
1617 // ARG_START parts. (But we can ignore the MSG_START.)
1618 for (int32_t i
= 1; i
< limit
&& U_SUCCESS(status
); ++i
) {
1619 const MessagePattern::Part
* part
= &msgPattern
.getPart(i
);
1620 if (part
->getType() != UMSGPAT_PART_TYPE_ARG_START
) {
1623 UMessagePatternArgType argType
= part
->getArgType();
1625 int32_t argNumber
= -1;
1626 part
= &msgPattern
.getPart(i
+ 1);
1627 if (part
->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER
) {
1628 argNumber
= part
->getValue();
1630 Formattable::Type formattableType
;
1633 case UMSGPAT_ARG_TYPE_NONE
:
1634 formattableType
= Formattable::kString
;
1636 case UMSGPAT_ARG_TYPE_SIMPLE
: {
1639 UnicodeString explicitType
= msgPattern
.getSubstring(msgPattern
.getPart(i
++));
1640 UnicodeString style
;
1641 if ((part
= &msgPattern
.getPart(i
))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE
) {
1642 style
= msgPattern
.getSubstring(*part
);
1645 UParseError parseError
;
1646 Format
* formatter
= createAppropriateFormat(explicitType
, style
, formattableType
, parseError
, status
);
1647 setArgStartFormat(index
, formatter
, status
);
1650 case UMSGPAT_ARG_TYPE_CHOICE
:
1651 case UMSGPAT_ARG_TYPE_PLURAL
:
1652 case UMSGPAT_ARG_TYPE_SELECTORDINAL
:
1653 formattableType
= Formattable::kDouble
;
1655 case UMSGPAT_ARG_TYPE_SELECT
:
1656 formattableType
= Formattable::kString
;
1659 status
= U_INTERNAL_PROGRAM_ERROR
; // Should be unreachable.
1660 formattableType
= Formattable::kString
;
1663 if (argNumber
!= -1) {
1664 if (argTypes
[argNumber
] != Formattable::kObject
&& argTypes
[argNumber
] != formattableType
) {
1665 hasArgTypeConflicts
= TRUE
;
1667 argTypes
[argNumber
] = formattableType
;
1673 Format
* MessageFormat::createAppropriateFormat(UnicodeString
& type
, UnicodeString
& style
,
1674 Formattable::Type
& formattableType
, UParseError
& parseError
,
1676 if (U_FAILURE(ec
)) {
1680 int32_t typeID
, styleID
;
1681 DateFormat::EStyle date_style
;
1683 switch (typeID
= findKeyword(type
, TYPE_IDS
)) {
1685 formattableType
= Formattable::kDouble
;
1686 switch (findKeyword(style
, NUMBER_STYLE_IDS
)) {
1688 fmt
= NumberFormat::createInstance(fLocale
, ec
);
1691 fmt
= NumberFormat::createCurrencyInstance(fLocale
, ec
);
1694 fmt
= NumberFormat::createPercentInstance(fLocale
, ec
);
1697 formattableType
= Formattable::kLong
;
1698 fmt
= createIntegerFormat(fLocale
, ec
);
1701 fmt
= NumberFormat::createInstance(fLocale
, ec
);
1703 DecimalFormat
* decfmt
= dynamic_cast<DecimalFormat
*>(fmt
);
1704 if (decfmt
!= NULL
) {
1705 decfmt
->applyPattern(style
,parseError
,ec
);
1714 formattableType
= Formattable::kDate
;
1715 styleID
= findKeyword(style
, DATE_STYLE_IDS
);
1716 date_style
= (styleID
>= 0) ? DATE_STYLES
[styleID
] : DateFormat::kDefault
;
1719 fmt
= DateFormat::createDateInstance(date_style
, fLocale
);
1721 fmt
= DateFormat::createTimeInstance(date_style
, fLocale
);
1724 if (styleID
< 0 && fmt
!= NULL
) {
1725 SimpleDateFormat
* sdtfmt
= dynamic_cast<SimpleDateFormat
*>(fmt
);
1726 if (sdtfmt
!= NULL
) {
1727 sdtfmt
->applyPattern(style
);
1733 formattableType
= Formattable::kDouble
;
1734 fmt
= makeRBNF(URBNF_SPELLOUT
, fLocale
, style
, ec
);
1737 formattableType
= Formattable::kDouble
;
1738 fmt
= makeRBNF(URBNF_ORDINAL
, fLocale
, style
, ec
);
1741 formattableType
= Formattable::kDouble
;
1742 fmt
= makeRBNF(URBNF_DURATION
, fLocale
, style
, ec
);
1745 formattableType
= Formattable::kString
;
1746 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
1754 //-------------------------------------
1755 // Finds the string, s, in the string array, list.
1756 int32_t MessageFormat::findKeyword(const UnicodeString
& s
,
1757 const UChar
* const *list
)
1760 return 0; // default
1763 int32_t length
= s
.length();
1764 const UChar
*ps
= PatternProps::trimWhiteSpace(s
.getBuffer(), length
);
1765 UnicodeString
buffer(FALSE
, ps
, length
);
1766 // Trims the space characters and turns all characters
1767 // in s to lower case.
1769 for (int32_t i
= 0; list
[i
]; ++i
) {
1770 if (!buffer
.compare(list
[i
], u_strlen(list
[i
]))) {
1778 * Convenience method that ought to be in NumberFormat
1781 MessageFormat::createIntegerFormat(const Locale
& locale
, UErrorCode
& status
) const {
1782 NumberFormat
*temp
= NumberFormat::createInstance(locale
, status
);
1783 DecimalFormat
*temp2
;
1784 if (temp
!= NULL
&& (temp2
= dynamic_cast<DecimalFormat
*>(temp
)) != NULL
) {
1785 temp2
->setMaximumFractionDigits(0);
1786 temp2
->setDecimalSeparatorAlwaysShown(FALSE
);
1787 temp2
->setParseIntegerOnly(TRUE
);
1794 * Return the default number format. Used to format a numeric
1795 * argument when subformats[i].format is NULL. Returns NULL
1798 * Semantically const but may modify *this.
1800 const NumberFormat
* MessageFormat::getDefaultNumberFormat(UErrorCode
& ec
) const {
1801 if (defaultNumberFormat
== NULL
) {
1802 MessageFormat
* t
= (MessageFormat
*) this;
1803 t
->defaultNumberFormat
= NumberFormat::createInstance(fLocale
, ec
);
1804 if (U_FAILURE(ec
)) {
1805 delete t
->defaultNumberFormat
;
1806 t
->defaultNumberFormat
= NULL
;
1807 } else if (t
->defaultNumberFormat
== NULL
) {
1808 ec
= U_MEMORY_ALLOCATION_ERROR
;
1811 return defaultNumberFormat
;
1815 * Return the default date format. Used to format a date
1816 * argument when subformats[i].format is NULL. Returns NULL
1819 * Semantically const but may modify *this.
1821 const DateFormat
* MessageFormat::getDefaultDateFormat(UErrorCode
& ec
) const {
1822 if (defaultDateFormat
== NULL
) {
1823 MessageFormat
* t
= (MessageFormat
*) this;
1824 t
->defaultDateFormat
= DateFormat::createDateTimeInstance(DateFormat::kShort
, DateFormat::kShort
, fLocale
);
1825 if (t
->defaultDateFormat
== NULL
) {
1826 ec
= U_MEMORY_ALLOCATION_ERROR
;
1829 return defaultDateFormat
;
1833 MessageFormat::usesNamedArguments() const {
1834 return msgPattern
.hasNamedArguments();
1838 MessageFormat::getArgTypeCount() const {
1839 return argTypeCount
;
1842 UBool
MessageFormat::equalFormats(const void* left
, const void* right
) {
1843 return *(const Format
*)left
==*(const Format
*)right
;
1847 UBool
MessageFormat::DummyFormat::operator==(const Format
&) const {
1851 Format
* MessageFormat::DummyFormat::clone() const {
1852 return new DummyFormat();
1855 UnicodeString
& MessageFormat::DummyFormat::format(const Formattable
&,
1856 UnicodeString
& appendTo
,
1857 UErrorCode
& status
) const {
1858 if (U_SUCCESS(status
)) {
1859 status
= U_UNSUPPORTED_ERROR
;
1864 UnicodeString
& MessageFormat::DummyFormat::format(const Formattable
&,
1865 UnicodeString
& appendTo
,
1867 UErrorCode
& status
) const {
1868 if (U_SUCCESS(status
)) {
1869 status
= U_UNSUPPORTED_ERROR
;
1874 UnicodeString
& MessageFormat::DummyFormat::format(const Formattable
&,
1875 UnicodeString
& appendTo
,
1876 FieldPositionIterator
*,
1877 UErrorCode
& status
) const {
1878 if (U_SUCCESS(status
)) {
1879 status
= U_UNSUPPORTED_ERROR
;
1884 void MessageFormat::DummyFormat::parseObject(const UnicodeString
&,
1886 ParsePosition
& ) const {
1890 FormatNameEnumeration::FormatNameEnumeration(UVector
*fNameList
, UErrorCode
& /*status*/) {
1892 fFormatNames
= fNameList
;
1895 const UnicodeString
*
1896 FormatNameEnumeration::snext(UErrorCode
& status
) {
1897 if (U_SUCCESS(status
) && pos
< fFormatNames
->size()) {
1898 return (const UnicodeString
*)fFormatNames
->elementAt(pos
++);
1904 FormatNameEnumeration::reset(UErrorCode
& /*status*/) {
1909 FormatNameEnumeration::count(UErrorCode
& /*status*/) const {
1910 return (fFormatNames
==NULL
) ? 0 : fFormatNames
->size();
1913 FormatNameEnumeration::~FormatNameEnumeration() {
1914 delete fFormatNames
;
1917 MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const MessageFormat
&mf
, UPluralType t
)
1918 : msgFormat(mf
), rules(NULL
), type(t
) {
1921 MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() {
1925 UnicodeString
MessageFormat::PluralSelectorProvider::select(void *ctx
, double number
,
1926 UErrorCode
& ec
) const {
1927 if (U_FAILURE(ec
)) {
1928 return UnicodeString(FALSE
, OTHER_STRING
, 5);
1930 MessageFormat::PluralSelectorProvider
* t
= const_cast<MessageFormat::PluralSelectorProvider
*>(this);
1932 t
->rules
= PluralRules::forLocale(msgFormat
.fLocale
, type
, ec
);
1933 if (U_FAILURE(ec
)) {
1934 return UnicodeString(FALSE
, OTHER_STRING
, 5);
1937 // Select a sub-message according to how the number is formatted,
1938 // which is specified in the selected sub-message.
1939 // We avoid this circle by looking at how
1940 // the number is formatted in the "other" sub-message
1941 // which must always be present and usually contains the number.
1942 // Message authors should be consistent across sub-messages.
1943 PluralSelectorContext
&context
= *static_cast<PluralSelectorContext
*>(ctx
);
1944 int32_t otherIndex
= msgFormat
.findOtherSubMessage(context
.startIndex
);
1945 context
.numberArgIndex
= msgFormat
.findFirstPluralNumberArg(otherIndex
, context
.argName
);
1946 if(context
.numberArgIndex
> 0 && msgFormat
.cachedFormatters
!= NULL
) {
1948 (const Format
*)uhash_iget(msgFormat
.cachedFormatters
, context
.numberArgIndex
);
1950 if(context
.formatter
== NULL
) {
1951 context
.formatter
= msgFormat
.getDefaultNumberFormat(ec
);
1952 context
.forReplaceNumber
= TRUE
;
1954 U_ASSERT(context
.number
.getDouble(ec
) == number
); // argument number minus the offset
1955 context
.formatter
->format(context
.number
, context
.numberString
, ec
);
1956 const DecimalFormat
*decFmt
= dynamic_cast<const DecimalFormat
*>(context
.formatter
);
1957 if(decFmt
!= NULL
) {
1958 FixedDecimal dec
= decFmt
->getFixedDecimal(context
.number
, ec
);
1959 return rules
->select(dec
);
1961 return rules
->select(number
);
1965 void MessageFormat::PluralSelectorProvider::reset() {
1973 #endif /* #if !UCONFIG_NO_FORMATTING */