2 *******************************************************************************
3 * Copyright (C) 1997-2005, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/20/97 helena Finished first cut of implementation.
14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
15 * 06/11/97 helena Fixed addPattern to take the pattern correctly.
16 * 06/17/97 helena Fixed the getPattern to return the correct pattern.
17 * 07/09/97 helena Made ParsePosition into a class.
18 * 02/22/99 stephen Removed character literals for EBCDIC safety
19 ********************************************************************************
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_FORMATTING
26 #include "unicode/msgfmt.h"
27 #include "unicode/decimfmt.h"
28 #include "unicode/datefmt.h"
29 #include "unicode/smpdtfmt.h"
30 #include "unicode/choicfmt.h"
31 #include "unicode/ustring.h"
32 #include "unicode/ucnv_err.h"
33 #include "unicode/uchar.h"
34 #include "unicode/umsg.h"
35 #include "unicode/rbnf.h"
41 // *****************************************************************************
42 // class MessageFormat
43 // *****************************************************************************
45 #define COMMA ((UChar)0x002C)
46 #define SINGLE_QUOTE ((UChar)0x0027)
47 #define LEFT_CURLY_BRACE ((UChar)0x007B)
48 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
50 //---------------------------------------
53 static const UChar ID_EMPTY
[] = {
54 0 /* empty string, used for default so that null can mark end of list */
57 static const UChar ID_NUMBER
[] = {
58 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
60 static const UChar ID_DATE
[] = {
61 0x64, 0x61, 0x74, 0x65, 0 /* "date" */
63 static const UChar ID_TIME
[] = {
64 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
66 static const UChar ID_CHOICE
[] = {
67 0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0 /* "choice" */
69 static const UChar ID_SPELLOUT
[] = {
70 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
72 static const UChar ID_ORDINAL
[] = {
73 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
75 static const UChar ID_DURATION
[] = {
76 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
79 // MessageFormat Type List Number, Date, Time or Choice
80 static const UChar
* const TYPE_IDS
[] = {
92 static const UChar ID_CURRENCY
[] = {
93 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
95 static const UChar ID_PERCENT
[] = {
96 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
98 static const UChar ID_INTEGER
[] = {
99 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
102 // NumberFormat modifier list, default, currency, percent or integer
103 static const UChar
* const NUMBER_STYLE_IDS
[] = {
111 static const UChar ID_SHORT
[] = {
112 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
114 static const UChar ID_MEDIUM
[] = {
115 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
117 static const UChar ID_LONG
[] = {
118 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
120 static const UChar ID_FULL
[] = {
121 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
124 // DateFormat modifier list, default, short, medium, long or full
125 static const UChar
* const DATE_STYLE_IDS
[] = {
134 static const DateFormat::EStyle DATE_STYLES
[] = {
135 DateFormat::kDefault
,
142 static const int32_t DEFAULT_INITIAL_CAPACITY
= 10;
146 // -------------------------------------
147 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat
)
149 //--------------------------------------------------------------------
152 * Convert a string to an unsigned decimal, ignoring rule whitespace.
153 * @return a non-negative number if successful, or a negative number
156 static int32_t stou(const UnicodeString
& string
) {
160 for (int32_t i
=0; i
<string
.length(); i
+=U16_LENGTH(c
)) {
161 c
= string
.char32At(i
);
162 if (uprv_isRuleWhiteSpace(c
)) {
165 int32_t d
= u_digit(c
, 10);
166 if (d
< 0 || ++count
> 10) {
175 * Convert an integer value to a string and append the result to
176 * the given UnicodeString.
178 static UnicodeString
& itos(int32_t i
, UnicodeString
& appendTo
) {
180 uprv_itou(temp
,16,i
,10,0); // 10 == radix
181 appendTo
.append(temp
);
185 // -------------------------------------
186 // Creates a MessageFormat instance based on the pattern.
188 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
190 : fLocale(Locale::getDefault()), // Uses the default locale
192 formatAliasesCapacity(0),
195 subformatCapacity(0),
199 defaultNumberFormat(NULL
),
200 defaultDateFormat(NULL
)
202 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY
) ||
203 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY
)) {
204 success
= U_MEMORY_ALLOCATION_ERROR
;
207 applyPattern(pattern
, success
);
208 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
211 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
212 const Locale
& newLocale
,
214 : fLocale(newLocale
),
216 formatAliasesCapacity(0),
219 subformatCapacity(0),
223 defaultNumberFormat(NULL
),
224 defaultDateFormat(NULL
)
226 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY
) ||
227 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY
)) {
228 success
= U_MEMORY_ALLOCATION_ERROR
;
231 applyPattern(pattern
, success
);
232 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
235 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
236 const Locale
& newLocale
,
237 UParseError
& parseError
,
239 : fLocale(newLocale
),
241 formatAliasesCapacity(0),
244 subformatCapacity(0),
248 defaultNumberFormat(NULL
),
249 defaultDateFormat(NULL
)
251 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY
) ||
252 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY
)) {
253 success
= U_MEMORY_ALLOCATION_ERROR
;
256 applyPattern(pattern
, parseError
, success
);
257 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
260 MessageFormat::MessageFormat(const MessageFormat
& that
)
263 formatAliasesCapacity(0),
266 subformatCapacity(0),
270 defaultNumberFormat(NULL
),
271 defaultDateFormat(NULL
)
276 MessageFormat::~MessageFormat()
279 for (idx
= 0; idx
< subformatCount
; idx
++) {
280 delete subformats
[idx
].format
;
282 uprv_free(subformats
);
284 subformatCount
= subformatCapacity
= 0;
288 argTypeCount
= argTypeCapacity
= 0;
290 uprv_free(formatAliases
);
292 delete defaultNumberFormat
;
293 delete defaultDateFormat
;
296 //--------------------------------------------------------------------
297 // Variable-size array management
300 * Allocate subformats[] to at least the given capacity and return
301 * TRUE if successful. If not, leave subformats[] unchanged.
303 * If subformats is NULL, allocate it. If it is not NULL, enlarge it
304 * if necessary to be at least as large as specified.
306 UBool
MessageFormat::allocateSubformats(int32_t capacity
) {
307 if (subformats
== NULL
) {
308 subformats
= (Subformat
*) uprv_malloc(sizeof(*subformats
) * capacity
);
309 subformatCapacity
= capacity
;
311 if (subformats
== NULL
) {
312 subformatCapacity
= 0;
315 } else if (subformatCapacity
< capacity
) {
316 if (capacity
< 2*subformatCapacity
) {
317 capacity
= 2*subformatCapacity
;
319 Subformat
* a
= (Subformat
*)
320 uprv_realloc(subformats
, sizeof(*subformats
) * capacity
);
322 return FALSE
; // request failed
325 subformatCapacity
= capacity
;
331 * Allocate argTypes[] to at least the given capacity and return
332 * TRUE if successful. If not, leave argTypes[] unchanged.
334 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it
335 * if necessary to be at least as large as specified.
337 UBool
MessageFormat::allocateArgTypes(int32_t capacity
) {
338 if (argTypes
== NULL
) {
339 argTypes
= (Formattable::Type
*) uprv_malloc(sizeof(*argTypes
) * capacity
);
341 argTypeCapacity
= capacity
;
342 if (argTypes
== NULL
) {
346 for (int32_t i
=0; i
<capacity
; ++i
) {
347 argTypes
[i
] = Formattable::kString
;
349 } else if (argTypeCapacity
< capacity
) {
350 if (capacity
< 2*argTypeCapacity
) {
351 capacity
= 2*argTypeCapacity
;
353 Formattable::Type
* a
= (Formattable::Type
*)
354 uprv_realloc(argTypes
, sizeof(*argTypes
) * capacity
);
356 return FALSE
; // request failed
358 for (int32_t i
=argTypeCapacity
; i
<capacity
; ++i
) {
359 a
[i
] = Formattable::kString
;
362 argTypeCapacity
= capacity
;
367 // -------------------------------------
368 // assignment operator
371 MessageFormat::operator=(const MessageFormat
& that
)
373 // Reallocate the arrays BEFORE changing this object
375 allocateSubformats(that
.subformatCount
) &&
376 allocateArgTypes(that
.argTypeCount
)) {
378 // Calls the super class for assignment first.
379 Format::operator=(that
);
381 fPattern
= that
.fPattern
;
382 setLocale(that
.fLocale
);
385 for (j
=0; j
<subformatCount
; ++j
) {
386 delete subformats
[j
].format
;
390 for (j
=0; j
<that
.subformatCount
; ++j
) {
391 // Subformat::operator= does NOT delete this.format
392 subformats
[j
] = that
.subformats
[j
];
394 subformatCount
= that
.subformatCount
;
396 for (j
=0; j
<that
.argTypeCount
; ++j
) {
397 argTypes
[j
] = that
.argTypes
[j
];
399 argTypeCount
= that
.argTypeCount
;
405 MessageFormat::operator==(const Format
& rhs
) const
407 if (this == &rhs
) return TRUE
;
409 MessageFormat
& that
= (MessageFormat
&)rhs
;
411 // Check class ID before checking MessageFormat members
412 if (!Format::operator==(rhs
) ||
413 fPattern
!= that
.fPattern
||
414 fLocale
!= that
.fLocale
) {
419 for (j
=0; j
<subformatCount
; ++j
) {
420 if (subformats
[j
] != that
.subformats
[j
]) {
428 // -------------------------------------
429 // Creates a copy of this MessageFormat, the caller owns the copy.
432 MessageFormat::clone() const
434 return new MessageFormat(*this);
437 // -------------------------------------
438 // Sets the locale of this MessageFormat object to theLocale.
441 MessageFormat::setLocale(const Locale
& theLocale
)
443 if (fLocale
!= theLocale
) {
444 delete defaultNumberFormat
;
445 defaultNumberFormat
= NULL
;
446 delete defaultDateFormat
;
447 defaultDateFormat
= NULL
;
450 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
453 // -------------------------------------
454 // Gets the locale of this MessageFormat object.
457 MessageFormat::getLocale() const
466 MessageFormat::applyPattern(const UnicodeString
& newPattern
,
469 UParseError parseError
;
470 applyPattern(newPattern
,parseError
,status
);
474 // -------------------------------------
475 // Applies the new pattern and returns an error if the pattern
478 MessageFormat::applyPattern(const UnicodeString
& pattern
,
479 UParseError
& parseError
,
485 // The pattern is broken up into segments. Each time a subformat
486 // is encountered, 4 segments are recorded. For example, consider
488 // "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}."
489 // The first set of segments is:
490 // segments[0] = "There "
492 // segments[2] = "choice"
493 // segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files"
495 // During parsing, the plain text is accumulated into segments[0].
496 // Segments 1..3 are used to parse each subpattern. Each time a
497 // subpattern is parsed, it creates a format object that is stored
498 // in the subformats array, together with an offset and argument
499 // number. The offset into the plain text stored in
502 // Quotes in segment 0 are handled normally. They are removed.
503 // Quotes may not occur in segments 1 or 2.
504 // Quotes in segment 3 are parsed and _copied_. This makes
505 // subformat patterns work, e.g., {1,number,'#'.##} passes
506 // the pattern "'#'.##" to DecimalFormat.
508 UnicodeString segments
[4];
509 int32_t part
= 0; // segment we are in, 0..3
510 // Record the highest argument number in the pattern. (In the
511 // subpattern {3,number} the argument number is 3.)
512 int32_t formatNumber
= 0;
513 UBool inQuote
= FALSE
;
514 int32_t braceStack
= 0;
515 // Clear error struct
516 parseError
.offset
= -1;
517 parseError
.preContext
[0] = parseError
.postContext
[0] = (UChar
)0;
518 int32_t patLen
= pattern
.length();
521 for (i
=0; i
<subformatCount
; ++i
) {
522 delete subformats
[i
].format
;
527 for (i
=0; i
<patLen
; ++i
) {
528 UChar ch
= pattern
[i
];
530 // In segment 0, recognize and remove quotes
531 if (ch
== SINGLE_QUOTE
) {
532 if (i
+1 < patLen
&& pattern
[i
+1] == SINGLE_QUOTE
) {
538 } else if (ch
== LEFT_CURLY_BRACE
&& !inQuote
) {
539 // The only way we get from segment 0 to 1 is via an
545 } else if (inQuote
) {
546 // In segments 1..3, recognize quoted matter, and copy it
547 // into the segment, together with the quotes. This takes
548 // care of '' as well.
549 segments
[part
] += ch
;
550 if (ch
== SINGLE_QUOTE
) {
554 // We have an unquoted character in segment 1..3
557 // Commas bump us to the next segment, except for segment 3,
558 // which can contain commas. See example above.
564 case LEFT_CURLY_BRACE
:
565 // Handle '{' within segment 3. The initial '{'
566 // before segment 1 is handled above.
568 ec
= U_PATTERN_SYNTAX_ERROR
;
572 segments
[part
] += ch
;
574 case RIGHT_CURLY_BRACE
:
575 if (braceStack
== 0) {
576 makeFormat(formatNumber
, segments
, parseError
,ec
);
581 segments
[1].remove();
582 segments
[2].remove();
583 segments
[3].remove();
587 segments
[part
] += ch
;
592 // fall through (copy quote chars in segments 1..3)
594 segments
[part
] += ch
;
599 if (braceStack
!= 0 || part
!= 0) {
600 // Unmatched braces in the pattern
601 ec
= U_UNMATCHED_BRACES
;
604 fPattern
= segments
[0];
608 syntaxError(pattern
, i
, parseError
);
609 for (i
=0; i
<subformatCount
; ++i
) {
610 delete subformats
[i
].format
;
612 argTypeCount
= subformatCount
= 0;
614 // -------------------------------------
615 // Converts this MessageFormat instance to a pattern.
618 MessageFormat::toPattern(UnicodeString
& appendTo
) const {
619 // later, make this more extensible
620 int32_t lastOffset
= 0;
622 for (i
=0; i
<subformatCount
; ++i
) {
623 copyAndFixQuotes(fPattern
, lastOffset
, subformats
[i
].offset
, appendTo
);
624 lastOffset
= subformats
[i
].offset
;
625 appendTo
+= LEFT_CURLY_BRACE
;
626 itos(subformats
[i
].arg
, appendTo
);
627 Format
* fmt
= subformats
[i
].format
;
629 // do nothing, string format
631 else if (fmt
->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
633 UErrorCode ec
= U_ZERO_ERROR
;
634 NumberFormat
& formatAlias
= *(NumberFormat
*)fmt
;
635 NumberFormat
*defaultTemplate
= NumberFormat::createInstance(fLocale
, ec
);
636 NumberFormat
*currencyTemplate
= NumberFormat::createCurrencyInstance(fLocale
, ec
);
637 NumberFormat
*percentTemplate
= NumberFormat::createPercentInstance(fLocale
, ec
);
638 NumberFormat
*integerTemplate
= createIntegerFormat(fLocale
, ec
);
641 appendTo
+= ID_NUMBER
;
642 if (formatAlias
!= *defaultTemplate
) {
644 if (formatAlias
== *currencyTemplate
) {
645 appendTo
+= ID_CURRENCY
;
647 else if (formatAlias
== *percentTemplate
) {
648 appendTo
+= ID_PERCENT
;
650 else if (formatAlias
== *integerTemplate
) {
651 appendTo
+= ID_INTEGER
;
654 UnicodeString buffer
;
655 appendTo
+= ((DecimalFormat
*)fmt
)->toPattern(buffer
);
659 delete defaultTemplate
;
660 delete currencyTemplate
;
661 delete percentTemplate
;
662 delete integerTemplate
;
664 else if (fmt
->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
665 DateFormat
& formatAlias
= *(DateFormat
*)fmt
;
666 DateFormat
*defaultDateTemplate
= DateFormat::createDateInstance(DateFormat::kDefault
, fLocale
);
667 DateFormat
*shortDateTemplate
= DateFormat::createDateInstance(DateFormat::kShort
, fLocale
);
668 DateFormat
*longDateTemplate
= DateFormat::createDateInstance(DateFormat::kLong
, fLocale
);
669 DateFormat
*fullDateTemplate
= DateFormat::createDateInstance(DateFormat::kFull
, fLocale
);
670 DateFormat
*defaultTimeTemplate
= DateFormat::createTimeInstance(DateFormat::kDefault
, fLocale
);
671 DateFormat
*shortTimeTemplate
= DateFormat::createTimeInstance(DateFormat::kShort
, fLocale
);
672 DateFormat
*longTimeTemplate
= DateFormat::createTimeInstance(DateFormat::kLong
, fLocale
);
673 DateFormat
*fullTimeTemplate
= DateFormat::createTimeInstance(DateFormat::kFull
, fLocale
);
677 if (formatAlias
== *defaultDateTemplate
) {
680 else if (formatAlias
== *shortDateTemplate
) {
683 appendTo
+= ID_SHORT
;
685 else if (formatAlias
== *defaultDateTemplate
) {
688 appendTo
+= ID_MEDIUM
;
690 else if (formatAlias
== *longDateTemplate
) {
695 else if (formatAlias
== *fullDateTemplate
) {
700 else if (formatAlias
== *defaultTimeTemplate
) {
703 else if (formatAlias
== *shortTimeTemplate
) {
706 appendTo
+= ID_SHORT
;
708 else if (formatAlias
== *defaultTimeTemplate
) {
711 appendTo
+= ID_MEDIUM
;
713 else if (formatAlias
== *longTimeTemplate
) {
718 else if (formatAlias
== *fullTimeTemplate
) {
724 UnicodeString buffer
;
727 appendTo
+= ((SimpleDateFormat
*)fmt
)->toPattern(buffer
);
730 delete defaultDateTemplate
;
731 delete shortDateTemplate
;
732 delete longDateTemplate
;
733 delete fullDateTemplate
;
734 delete defaultTimeTemplate
;
735 delete shortTimeTemplate
;
736 delete longTimeTemplate
;
737 delete fullTimeTemplate
;
738 // {sfb} there should be a more efficient way to do this!
740 else if (fmt
->getDynamicClassID() == ChoiceFormat::getStaticClassID()) {
741 UnicodeString buffer
;
743 appendTo
+= ID_CHOICE
;
745 appendTo
+= ((ChoiceFormat
*)fmt
)->toPattern(buffer
);
748 //appendTo += ", unknown";
750 appendTo
+= RIGHT_CURLY_BRACE
;
752 copyAndFixQuotes(fPattern
, lastOffset
, fPattern
.length(), appendTo
);
756 // -------------------------------------
757 // Adopts the new formats array and updates the array count.
758 // This MessageFormat instance owns the new formats.
761 MessageFormat::adoptFormats(Format
** newFormats
,
763 if (newFormats
== NULL
|| count
< 0) {
768 if (allocateSubformats(count
)) {
769 for (i
=0; i
<subformatCount
; ++i
) {
770 delete subformats
[i
].format
;
772 for (i
=0; i
<count
; ++i
) {
773 subformats
[i
].format
= newFormats
[i
];
775 subformatCount
= count
;
777 // An adopt method must always take ownership. Delete
778 // the incoming format objects and return unchanged.
779 for (i
=0; i
<count
; ++i
) {
780 delete newFormats
[i
];
784 // TODO: What about the .offset and .arg fields?
787 // -------------------------------------
788 // Sets the new formats array and updates the array count.
789 // This MessageFormat instance maks a copy of the new formats.
792 MessageFormat::setFormats(const Format
** newFormats
,
794 if (newFormats
== NULL
|| count
< 0) {
798 if (allocateSubformats(count
)) {
800 for (i
=0; i
<subformatCount
; ++i
) {
801 delete subformats
[i
].format
;
805 for (i
=0; i
<count
; ++i
) {
806 subformats
[i
].format
= newFormats
[i
] ? newFormats
[i
]->clone() : NULL
;
808 subformatCount
= count
;
811 // TODO: What about the .offset and .arg fields?
814 // -------------------------------------
815 // Adopt a single format.
816 // Do nothing is the format number is not less than the array count.
819 MessageFormat::adoptFormat(int32_t n
, Format
*newFormat
) {
820 if (n
< 0 || n
>= subformatCount
) {
823 delete subformats
[n
].format
;
824 subformats
[n
].format
= newFormat
;
828 // -------------------------------------
829 // Set a single format.
830 // Do nothing is the variable is not less than the array count.
833 MessageFormat::setFormat(int32_t n
, const Format
& newFormat
) {
834 if (n
>= 0 && n
< subformatCount
) {
835 delete subformats
[n
].format
;
836 if (&newFormat
== NULL
) {
837 // This should never happen -- but we'll be nice if it does
838 subformats
[n
].format
= NULL
;
840 subformats
[n
].format
= newFormat
.clone();
845 // -------------------------------------
846 // Gets the format array.
849 MessageFormat::getFormats(int32_t& cnt
) const
851 // This old API returns an array (which we hold) of Format*
852 // pointers. The array is valid up to the next call to any
853 // method on this object. We construct and resize an array
854 // on demand that contains aliases to the subformats[i].format
856 MessageFormat
* t
= (MessageFormat
*) this;
858 if (formatAliases
== NULL
) {
859 t
->formatAliasesCapacity
= (subformatCount
<10) ? 10 : subformatCount
;
860 Format
** a
= (Format
**)
861 uprv_malloc(sizeof(Format
*) * formatAliasesCapacity
);
865 t
->formatAliases
= a
;
866 } else if (subformatCount
> formatAliasesCapacity
) {
867 Format
** a
= (Format
**)
868 uprv_realloc(formatAliases
, sizeof(Format
*) * subformatCount
);
872 t
->formatAliases
= a
;
873 t
->formatAliasesCapacity
= subformatCount
;
875 for (int32_t i
=0; i
<subformatCount
; ++i
) {
876 t
->formatAliases
[i
] = subformats
[i
].format
;
878 cnt
= subformatCount
;
879 return (const Format
**)formatAliases
;
882 // -------------------------------------
883 // Formats the source Formattable array and copy into the result buffer.
884 // Ignore the FieldPosition result for error checking.
887 MessageFormat::format(const Formattable
* source
,
889 UnicodeString
& appendTo
,
890 FieldPosition
& ignore
,
891 UErrorCode
& success
) const
893 if (U_FAILURE(success
))
896 return format(source
, cnt
, appendTo
, ignore
, 0, success
);
899 // -------------------------------------
900 // Internally creates a MessageFormat instance based on the
901 // pattern and formats the arguments Formattable array and
902 // copy into the appendTo buffer.
905 MessageFormat::format( const UnicodeString
& pattern
,
906 const Formattable
* arguments
,
908 UnicodeString
& appendTo
,
911 MessageFormat
temp(pattern
, success
);
912 FieldPosition
ignore(0);
913 temp
.format(arguments
, cnt
, appendTo
, ignore
, success
);
917 // -------------------------------------
918 // Formats the source Formattable object and copy into the
919 // appendTo buffer. The Formattable object must be an array
920 // of Formattable instances, returns error otherwise.
923 MessageFormat::format(const Formattable
& source
,
924 UnicodeString
& appendTo
,
925 FieldPosition
& ignore
,
926 UErrorCode
& success
) const
930 if (U_FAILURE(success
))
932 if (source
.getType() != Formattable::kArray
) {
933 success
= U_ILLEGAL_ARGUMENT_ERROR
;
936 const Formattable
* tmpPtr
= source
.getArray(cnt
);
938 return format(tmpPtr
, cnt
, appendTo
, ignore
, 0, success
);
941 // -------------------------------------
942 // Formats the arguments Formattable array and copy into the appendTo buffer.
943 // Ignore the FieldPosition result for error checking.
946 MessageFormat::format(const Formattable
* arguments
,
948 UnicodeString
& appendTo
,
949 FieldPosition
& status
,
950 int32_t recursionProtection
,
951 UErrorCode
& success
) const
953 // Allow NULL array only if cnt == 0
954 if (cnt
< 0 || (cnt
&& arguments
== NULL
)) {
955 success
= U_ILLEGAL_ARGUMENT_ERROR
;
959 int32_t lastOffset
= 0;
960 for (int32_t i
=0; i
<subformatCount
; ++i
) {
961 // Append the prefix of current format element.
962 appendTo
.append(fPattern
, lastOffset
, subformats
[i
].offset
- lastOffset
);
963 lastOffset
= subformats
[i
].offset
;
964 int32_t argumentNumber
= subformats
[i
].arg
;
965 // Checks the scope of the argument number.
966 if (argumentNumber
>= cnt
) {
967 appendTo
+= LEFT_CURLY_BRACE
;
968 itos(argumentNumber
, appendTo
);
969 appendTo
+= RIGHT_CURLY_BRACE
;
973 const Formattable
*obj
= arguments
+ argumentNumber
;
974 Formattable::Type type
= obj
->getType();
976 // Recursively calling the format process only if the current
977 // format argument refers to a ChoiceFormat object.
978 Format
* fmt
= subformats
[i
].format
;
981 fmt
->format(*obj
, arg
, success
);
983 // Needs to reprocess the ChoiceFormat option by using the
984 // MessageFormat pattern application.
985 if (fmt
->getDynamicClassID() == ChoiceFormat::getStaticClassID() &&
986 arg
.indexOf(LEFT_CURLY_BRACE
) >= 0) {
987 MessageFormat
temp(arg
, fLocale
, success
);
988 // TODO: Implement recursion protection
989 temp
.format(arguments
, cnt
, appendTo
, status
, recursionProtection
, success
);
990 if (U_FAILURE(success
)) {
998 // If the obj data type is a number, use a NumberFormat instance.
999 else if ((type
== Formattable::kDouble
) ||
1000 (type
== Formattable::kLong
) ||
1001 (type
== Formattable::kInt64
)) {
1003 const NumberFormat
* nf
= getDefaultNumberFormat(success
);
1007 if (type
== Formattable::kDouble
) {
1008 nf
->format(obj
->getDouble(), appendTo
);
1009 } else if (type
== Formattable::kLong
) {
1010 nf
->format(obj
->getLong(), appendTo
);
1012 nf
->format(obj
->getInt64(), appendTo
);
1015 // If the obj data type is a Date instance, use a DateFormat instance.
1016 else if (type
== Formattable::kDate
) {
1017 const DateFormat
* df
= getDefaultDateFormat(success
);
1021 df
->format(obj
->getDate(), appendTo
);
1023 else if (type
== Formattable::kString
) {
1024 appendTo
+= obj
->getString();
1027 success
= U_ILLEGAL_ARGUMENT_ERROR
;
1031 // Appends the rest of the pattern characters after the real last offset.
1032 appendTo
.append(fPattern
, lastOffset
, 0x7fffffff);
1037 // -------------------------------------
1038 // Parses the source pattern and returns the Formattable objects array,
1039 // the array count and the ending parse position. The caller of this method
1043 MessageFormat::parse(const UnicodeString
& source
,
1045 int32_t& count
) const
1047 // Allocate at least one element. Allocating an array of length
1048 // zero causes problems on some platforms (e.g. Win32).
1049 Formattable
*resultArray
= new Formattable
[argTypeCount
? argTypeCount
: 1];
1050 int32_t patternOffset
= 0;
1051 int32_t sourceOffset
= pos
.getIndex();
1052 ParsePosition
tempPos(0);
1053 count
= 0; // {sfb} reset to zero
1055 for (int32_t i
= 0; i
< subformatCount
; ++i
) {
1056 // match up to format
1057 len
= subformats
[i
].offset
- patternOffset
;
1059 fPattern
.compare(patternOffset
, len
, source
, sourceOffset
, len
) == 0) {
1060 sourceOffset
+= len
;
1061 patternOffset
+= len
;
1068 Format
* fmt
= subformats
[i
].format
;
1069 int32_t arg
= subformats
[i
].arg
;
1070 if (fmt
== NULL
) { // string format
1071 // if at end, use longest possible match
1072 // otherwise uses first match to intervening string
1073 // does NOT recursively try all possibilities
1074 int32_t tempLength
= (i
+1<subformatCount
) ?
1075 subformats
[i
+1].offset
: fPattern
.length();
1078 if (patternOffset
>= tempLength
) {
1079 next
= source
.length();
1082 UnicodeString buffer
;
1083 fPattern
.extract(patternOffset
,tempLength
- patternOffset
, buffer
);
1084 next
= source
.indexOf(buffer
, sourceOffset
);
1091 UnicodeString buffer
;
1092 source
.extract(sourceOffset
,next
- sourceOffset
, buffer
);
1093 UnicodeString strValue
= buffer
;
1094 UnicodeString
temp(LEFT_CURLY_BRACE
);
1095 // {sfb} check this later
1097 temp
+= RIGHT_CURLY_BRACE
;
1098 if (strValue
!= temp
) {
1099 source
.extract(sourceOffset
,next
- sourceOffset
, buffer
);
1100 resultArray
[arg
].setString(buffer
);
1101 // {sfb} not sure about this
1102 if ((arg
+ 1) > count
) {
1106 sourceOffset
= next
;
1110 tempPos
.setIndex(sourceOffset
);
1111 fmt
->parseObject(source
, resultArray
[arg
], tempPos
);
1112 if (tempPos
.getIndex() == sourceOffset
) {
1116 if ((arg
+ 1) > count
) {
1119 sourceOffset
= tempPos
.getIndex(); // update
1122 len
= fPattern
.length() - patternOffset
;
1124 fPattern
.compare(patternOffset
, len
, source
, sourceOffset
, len
) == 0) {
1125 pos
.setIndex(sourceOffset
+ len
);
1128 // else fall through...
1131 pos
.setErrorIndex(sourceOffset
);
1132 delete [] resultArray
;
1134 return NULL
; // leave index as is to signal error
1137 // -------------------------------------
1138 // Parses the source string and returns the array of
1139 // Formattable objects and the array count. The caller
1140 // owns the returned array.
1143 MessageFormat::parse(const UnicodeString
& source
,
1145 UErrorCode
& success
) const
1147 ParsePosition
status(0);
1148 // Calls the actual implementation method and starts
1149 // from zero offset of the source text.
1150 Formattable
* result
= parse(source
, status
, cnt
);
1151 if (status
.getIndex() == 0) {
1152 success
= U_MESSAGE_PARSE_ERROR
;
1159 // -------------------------------------
1160 // Parses the source text and copy into the result buffer.
1163 MessageFormat::parseObject( const UnicodeString
& source
,
1164 Formattable
& result
,
1165 ParsePosition
& status
) const
1168 Formattable
* tmpResult
= parse(source
, status
, cnt
);
1169 if (tmpResult
!= NULL
)
1170 result
.adoptArray(tmpResult
, cnt
);
1174 MessageFormat::autoQuoteApostrophe(const UnicodeString
& pattern
, UErrorCode
& status
) {
1175 UnicodeString result
;
1176 if (U_SUCCESS(status
)) {
1177 int32_t plen
= pattern
.length();
1178 const UChar
* pat
= pattern
.getBuffer();
1179 int32_t blen
= plen
* 2 + 1; // space for null termination, convenience
1180 UChar
* buf
= result
.getBuffer(blen
);
1182 status
= U_MEMORY_ALLOCATION_ERROR
;
1184 int32_t len
= umsg_autoQuoteApostrophe(pat
, plen
, buf
, blen
, &status
);
1185 result
.releaseBuffer(U_SUCCESS(status
) ? len
: 0);
1188 if (U_FAILURE(status
)) {
1189 result
.setToBogus();
1194 // -------------------------------------
1196 static Format
* makeRBNF(URBNFRuleSetTag tag
, const Locale
& locale
, const UnicodeString
& defaultRuleSet
, UErrorCode
& ec
) {
1197 RuleBasedNumberFormat
* fmt
= new RuleBasedNumberFormat(tag
, locale
, ec
);
1198 if (U_SUCCESS(ec
) && defaultRuleSet
.length() > 0) {
1199 fmt
->setDefaultRuleSet(defaultRuleSet
, ec
);
1200 if (U_FAILURE(ec
)) { // ignore unrecognized default rule set
1208 * Reads the segments[] array (see applyPattern()) and parses the
1209 * segments[1..3] into a Format* object. Stores the format object in
1210 * the subformats[] array. Updates the argTypes[] array type
1211 * information for the corresponding argument.
1213 * @param formatNumber index into subformats[] for this format
1214 * @param segments array of strings with the parsed pattern segments
1215 * @param parseError parse error data (output param)
1216 * @param ec error code
1219 MessageFormat::makeFormat(int32_t formatNumber
,
1220 UnicodeString
* segments
,
1221 UParseError
& parseError
,
1223 if (U_FAILURE(ec
)) {
1227 // Parse the argument number
1228 int32_t argumentNumber
= stou(segments
[1]); // always unlocalized!
1229 if (argumentNumber
< 0) {
1230 ec
= U_INVALID_FORMAT_ERROR
;
1234 // Parse the format, recording the argument type and creating a
1235 // new Format object (except for string arguments).
1236 Formattable::Type argType
;
1238 int32_t typeID
, styleID
;
1239 DateFormat::EStyle style
;
1241 switch (typeID
= findKeyword(segments
[2], TYPE_IDS
)) {
1244 argType
= Formattable::kString
;
1248 argType
= Formattable::kDouble
;
1250 switch (findKeyword(segments
[3], NUMBER_STYLE_IDS
)) {
1252 fmt
= NumberFormat::createInstance(fLocale
, ec
);
1255 fmt
= NumberFormat::createCurrencyInstance(fLocale
, ec
);
1258 fmt
= NumberFormat::createPercentInstance(fLocale
, ec
);
1261 argType
= Formattable::kLong
;
1262 fmt
= createIntegerFormat(fLocale
, ec
);
1265 fmt
= NumberFormat::createInstance(fLocale
, ec
);
1267 fmt
->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
1268 ((DecimalFormat
*)fmt
)->applyPattern(segments
[3],parseError
,ec
);
1276 argType
= Formattable::kDate
;
1277 styleID
= findKeyword(segments
[3], DATE_STYLE_IDS
);
1278 style
= (styleID
>= 0) ? DATE_STYLES
[styleID
] : DateFormat::kDefault
;
1281 fmt
= DateFormat::createDateInstance(style
, fLocale
);
1283 fmt
= DateFormat::createTimeInstance(style
, fLocale
);
1288 fmt
->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
1289 ((SimpleDateFormat
*)fmt
)->applyPattern(segments
[3]);
1294 argType
= Formattable::kDouble
;
1296 fmt
= new ChoiceFormat(segments
[3], parseError
, ec
);
1300 argType
= Formattable::kDouble
;
1301 fmt
= makeRBNF(URBNF_SPELLOUT
, fLocale
, segments
[3], ec
);
1304 argType
= Formattable::kDouble
;
1305 fmt
= makeRBNF(URBNF_ORDINAL
, fLocale
, segments
[3], ec
);
1308 argType
= Formattable::kDouble
;
1309 fmt
= makeRBNF(URBNF_DURATION
, fLocale
, segments
[3], ec
);
1312 argType
= Formattable::kString
;
1313 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
1317 if (fmt
==NULL
&& argType
!=Formattable::kString
&& U_SUCCESS(ec
)) {
1318 ec
= U_MEMORY_ALLOCATION_ERROR
;
1321 if (!allocateSubformats(formatNumber
+1) ||
1322 !allocateArgTypes(argumentNumber
+1)) {
1323 ec
= U_MEMORY_ALLOCATION_ERROR
;
1326 if (U_FAILURE(ec
)) {
1331 // Parse succeeded; record results in our arrays
1332 subformats
[formatNumber
].format
= fmt
;
1333 subformats
[formatNumber
].offset
= segments
[0].length();
1334 subformats
[formatNumber
].arg
= argumentNumber
;
1335 subformatCount
= formatNumber
+1;
1337 // Careful here: argumentNumber may in general arrive out of
1338 // sequence, e.g., "There was {2} on {0,date} (see {1,number})."
1339 argTypes
[argumentNumber
] = argType
;
1340 if (argumentNumber
+1 > argTypeCount
) {
1341 argTypeCount
= argumentNumber
+1;
1345 // -------------------------------------
1346 // Finds the string, s, in the string array, list.
1347 int32_t MessageFormat::findKeyword(const UnicodeString
& s
,
1348 const UChar
* const *list
)
1350 if (s
.length() == 0)
1351 return 0; // default
1353 UnicodeString buffer
= s
;
1354 // Trims the space characters and turns all characters
1355 // in s to lower case.
1356 buffer
.trim().toLower("");
1357 for (int32_t i
= 0; list
[i
]; ++i
) {
1358 if (!buffer
.compare(list
[i
], u_strlen(list
[i
]))) {
1365 // -------------------------------------
1366 // Checks the range of the source text to quote the special
1367 // characters, { and ' and copy to target buffer.
1370 MessageFormat::copyAndFixQuotes(const UnicodeString
& source
,
1373 UnicodeString
& appendTo
)
1375 UBool gotLB
= FALSE
;
1377 for (int32_t i
= start
; i
< end
; ++i
) {
1378 UChar ch
= source
[i
];
1379 if (ch
== LEFT_CURLY_BRACE
) {
1380 appendTo
+= SINGLE_QUOTE
;
1381 appendTo
+= LEFT_CURLY_BRACE
;
1382 appendTo
+= SINGLE_QUOTE
;
1385 else if (ch
== RIGHT_CURLY_BRACE
) {
1387 appendTo
+= RIGHT_CURLY_BRACE
;
1392 appendTo
+= SINGLE_QUOTE
;
1393 appendTo
+= RIGHT_CURLY_BRACE
;
1394 appendTo
+= SINGLE_QUOTE
;
1397 else if (ch
== SINGLE_QUOTE
) {
1398 appendTo
+= SINGLE_QUOTE
;
1399 appendTo
+= SINGLE_QUOTE
;
1408 * Convenience method that ought to be in NumberFormat
1411 MessageFormat::createIntegerFormat(const Locale
& locale
, UErrorCode
& status
) const {
1412 NumberFormat
*temp
= NumberFormat::createInstance(locale
, status
);
1413 if (temp
!= NULL
&& temp
->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
1414 DecimalFormat
*temp2
= (DecimalFormat
*) temp
;
1415 temp2
->setMaximumFractionDigits(0);
1416 temp2
->setDecimalSeparatorAlwaysShown(FALSE
);
1417 temp2
->setParseIntegerOnly(TRUE
);
1424 * Return the default number format. Used to format a numeric
1425 * argument when subformats[i].format is NULL. Returns NULL
1428 * Semantically const but may modify *this.
1430 const NumberFormat
* MessageFormat::getDefaultNumberFormat(UErrorCode
& ec
) const {
1431 if (defaultNumberFormat
== NULL
) {
1432 MessageFormat
* t
= (MessageFormat
*) this;
1433 t
->defaultNumberFormat
= NumberFormat::createInstance(fLocale
, ec
);
1434 if (U_FAILURE(ec
)) {
1435 delete t
->defaultNumberFormat
;
1436 t
->defaultNumberFormat
= NULL
;
1437 } else if (t
->defaultNumberFormat
== NULL
) {
1438 ec
= U_MEMORY_ALLOCATION_ERROR
;
1441 return defaultNumberFormat
;
1445 * Return the default date format. Used to format a date
1446 * argument when subformats[i].format is NULL. Returns NULL
1449 * Semantically const but may modify *this.
1451 const DateFormat
* MessageFormat::getDefaultDateFormat(UErrorCode
& ec
) const {
1452 if (defaultDateFormat
== NULL
) {
1453 MessageFormat
* t
= (MessageFormat
*) this;
1454 t
->defaultDateFormat
= DateFormat::createDateTimeInstance(DateFormat::kShort
, DateFormat::kShort
, fLocale
);
1455 if (t
->defaultDateFormat
== NULL
) {
1456 ec
= U_MEMORY_ALLOCATION_ERROR
;
1459 return defaultDateFormat
;
1464 #endif /* #if !UCONFIG_NO_FORMATTING */