2 *******************************************************************************
3 * Copyright (C) 1997-2004, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/20/97 helena Finished first cut of implementation.
14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
15 * 06/11/97 helena Fixed addPattern to take the pattern correctly.
16 * 06/17/97 helena Fixed the getPattern to return the correct pattern.
17 * 07/09/97 helena Made ParsePosition into a class.
18 * 02/22/99 stephen Removed character literals for EBCDIC safety
19 ********************************************************************************
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_FORMATTING
26 #include "unicode/msgfmt.h"
27 #include "unicode/decimfmt.h"
28 #include "unicode/datefmt.h"
29 #include "unicode/smpdtfmt.h"
30 #include "unicode/choicfmt.h"
31 #include "unicode/ustring.h"
32 #include "unicode/ucnv_err.h"
33 #include "unicode/uchar.h"
34 #include "unicode/rbnf.h"
40 // *****************************************************************************
41 // class MessageFormat
42 // *****************************************************************************
44 #define COMMA ((UChar)0x002C)
45 #define SINGLE_QUOTE ((UChar)0x0027)
46 #define LEFT_CURLY_BRACE ((UChar)0x007B)
47 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
49 //---------------------------------------
52 static const UChar ID_EMPTY
[] = {
53 0 /* empty string, used for default so that null can mark end of list */
56 static const UChar ID_NUMBER
[] = {
57 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
59 static const UChar ID_DATE
[] = {
60 0x64, 0x61, 0x74, 0x65, 0 /* "date" */
62 static const UChar ID_TIME
[] = {
63 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
65 static const UChar ID_CHOICE
[] = {
66 0x63, 0x68, 0x6F, 0x69, 0x63, 0x65, 0 /* "choice" */
68 static const UChar ID_SPELLOUT
[] = {
69 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
71 static const UChar ID_ORDINAL
[] = {
72 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
74 static const UChar ID_DURATION
[] = {
75 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
78 // MessageFormat Type List Number, Date, Time or Choice
79 static const UChar
* const TYPE_IDS
[] = {
91 static const UChar ID_CURRENCY
[] = {
92 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
94 static const UChar ID_PERCENT
[] = {
95 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
97 static const UChar ID_INTEGER
[] = {
98 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
101 // NumberFormat modifier list, default, currency, percent or integer
102 static const UChar
* const NUMBER_STYLE_IDS
[] = {
110 static const UChar ID_SHORT
[] = {
111 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
113 static const UChar ID_MEDIUM
[] = {
114 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
116 static const UChar ID_LONG
[] = {
117 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
119 static const UChar ID_FULL
[] = {
120 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
123 // DateFormat modifier list, default, short, medium, long or full
124 static const UChar
* const DATE_STYLE_IDS
[] = {
133 static const DateFormat::EStyle DATE_STYLES
[] = {
134 DateFormat::kDefault
,
141 static const int32_t DEFAULT_INITIAL_CAPACITY
= 10;
145 // -------------------------------------
146 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat
)
148 //--------------------------------------------------------------------
151 * Convert a string to an unsigned decimal, ignoring rule whitespace.
152 * @return a non-negative number if successful, or a negative number
155 static int32_t stou(const UnicodeString
& string
) {
159 for (int32_t i
=0; i
<string
.length(); i
+=U16_LENGTH(c
)) {
160 c
= string
.char32At(i
);
161 if (uprv_isRuleWhiteSpace(c
)) {
164 int32_t d
= u_digit(c
, 10);
165 if (d
< 0 || ++count
> 10) {
174 * Convert an integer value to a string and append the result to
175 * the given UnicodeString.
177 static UnicodeString
& itos(int32_t i
, UnicodeString
& appendTo
) {
179 uprv_itou(temp
,16,i
,10,0); // 10 == radix
180 appendTo
.append(temp
);
184 // -------------------------------------
185 // Creates a MessageFormat instance based on the pattern.
187 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
189 : fLocale(Locale::getDefault()), // Uses the default locale
191 formatAliasesCapacity(0),
194 subformatCapacity(0),
198 defaultNumberFormat(NULL
),
199 defaultDateFormat(NULL
)
201 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY
) ||
202 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY
)) {
203 success
= U_MEMORY_ALLOCATION_ERROR
;
206 applyPattern(pattern
, success
);
207 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
210 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
211 const Locale
& newLocale
,
213 : fLocale(newLocale
),
215 formatAliasesCapacity(0),
218 subformatCapacity(0),
222 defaultNumberFormat(NULL
),
223 defaultDateFormat(NULL
)
225 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY
) ||
226 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY
)) {
227 success
= U_MEMORY_ALLOCATION_ERROR
;
230 applyPattern(pattern
, success
);
231 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
234 MessageFormat::MessageFormat(const UnicodeString
& pattern
,
235 const Locale
& newLocale
,
236 UParseError
& parseError
,
238 : fLocale(newLocale
),
240 formatAliasesCapacity(0),
243 subformatCapacity(0),
247 defaultNumberFormat(NULL
),
248 defaultDateFormat(NULL
)
250 if (!allocateSubformats(DEFAULT_INITIAL_CAPACITY
) ||
251 !allocateArgTypes(DEFAULT_INITIAL_CAPACITY
)) {
252 success
= U_MEMORY_ALLOCATION_ERROR
;
255 applyPattern(pattern
, parseError
, success
);
256 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
259 MessageFormat::MessageFormat(const MessageFormat
& that
)
262 formatAliasesCapacity(0),
265 subformatCapacity(0),
269 defaultNumberFormat(NULL
),
270 defaultDateFormat(NULL
)
275 MessageFormat::~MessageFormat()
278 for (idx
= 0; idx
< subformatCount
; idx
++) {
279 delete subformats
[idx
].format
;
281 uprv_free(subformats
);
283 subformatCount
= subformatCapacity
= 0;
287 argTypeCount
= argTypeCapacity
= 0;
289 uprv_free(formatAliases
);
291 delete defaultNumberFormat
;
292 delete defaultDateFormat
;
295 //--------------------------------------------------------------------
296 // Variable-size array management
299 * Allocate subformats[] to at least the given capacity and return
300 * TRUE if successful. If not, leave subformats[] unchanged.
302 * If subformats is NULL, allocate it. If it is not NULL, enlarge it
303 * if necessary to be at least as large as specified.
305 UBool
MessageFormat::allocateSubformats(int32_t capacity
) {
306 if (subformats
== NULL
) {
307 subformats
= (Subformat
*) uprv_malloc(sizeof(*subformats
) * capacity
);
308 subformatCapacity
= capacity
;
310 if (subformats
== NULL
) {
311 subformatCapacity
= 0;
314 } else if (subformatCapacity
< capacity
) {
315 if (capacity
< 2*subformatCapacity
) {
316 capacity
= 2*subformatCapacity
;
318 Subformat
* a
= (Subformat
*)
319 uprv_realloc(subformats
, sizeof(*subformats
) * capacity
);
321 return FALSE
; // request failed
324 subformatCapacity
= capacity
;
330 * Allocate argTypes[] to at least the given capacity and return
331 * TRUE if successful. If not, leave argTypes[] unchanged.
333 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it
334 * if necessary to be at least as large as specified.
336 UBool
MessageFormat::allocateArgTypes(int32_t capacity
) {
337 if (argTypes
== NULL
) {
338 argTypes
= (Formattable::Type
*) uprv_malloc(sizeof(*argTypes
) * capacity
);
340 argTypeCapacity
= capacity
;
341 if (argTypes
== NULL
) {
345 for (int32_t i
=0; i
<capacity
; ++i
) {
346 argTypes
[i
] = Formattable::kString
;
348 } else if (argTypeCapacity
< capacity
) {
349 if (capacity
< 2*argTypeCapacity
) {
350 capacity
= 2*argTypeCapacity
;
352 Formattable::Type
* a
= (Formattable::Type
*)
353 uprv_realloc(argTypes
, sizeof(*argTypes
) * capacity
);
355 return FALSE
; // request failed
357 for (int32_t i
=argTypeCapacity
; i
<capacity
; ++i
) {
358 a
[i
] = Formattable::kString
;
361 argTypeCapacity
= capacity
;
366 // -------------------------------------
367 // assignment operator
370 MessageFormat::operator=(const MessageFormat
& that
)
372 // Reallocate the arrays BEFORE changing this object
374 allocateSubformats(that
.subformatCount
) &&
375 allocateArgTypes(that
.argTypeCount
)) {
377 // Calls the super class for assignment first.
378 Format::operator=(that
);
380 fPattern
= that
.fPattern
;
381 setLocale(that
.fLocale
);
384 for (j
=0; j
<subformatCount
; ++j
) {
385 delete subformats
[j
].format
;
389 for (j
=0; j
<that
.subformatCount
; ++j
) {
390 // Subformat::operator= does NOT delete this.format
391 subformats
[j
] = that
.subformats
[j
];
393 subformatCount
= that
.subformatCount
;
395 for (j
=0; j
<that
.argTypeCount
; ++j
) {
396 argTypes
[j
] = that
.argTypes
[j
];
398 argTypeCount
= that
.argTypeCount
;
404 MessageFormat::operator==(const Format
& rhs
) const
406 if (this == &rhs
) return TRUE
;
408 MessageFormat
& that
= (MessageFormat
&)rhs
;
410 // Check class ID before checking MessageFormat members
411 if (!Format::operator==(rhs
) ||
412 fPattern
!= that
.fPattern
||
413 fLocale
!= that
.fLocale
) {
418 for (j
=0; j
<subformatCount
; ++j
) {
419 if (subformats
[j
] != that
.subformats
[j
]) {
427 // -------------------------------------
428 // Creates a copy of this MessageFormat, the caller owns the copy.
431 MessageFormat::clone() const
433 return new MessageFormat(*this);
436 // -------------------------------------
437 // Sets the locale of this MessageFormat object to theLocale.
440 MessageFormat::setLocale(const Locale
& theLocale
)
442 if (fLocale
!= theLocale
) {
443 delete defaultNumberFormat
;
444 defaultNumberFormat
= NULL
;
445 delete defaultDateFormat
;
446 defaultDateFormat
= NULL
;
449 setLocaleIDs(fLocale
.getName(), fLocale
.getName());
452 // -------------------------------------
453 // Gets the locale of this MessageFormat object.
456 MessageFormat::getLocale() const
465 MessageFormat::applyPattern(const UnicodeString
& newPattern
,
468 UParseError parseError
;
469 applyPattern(newPattern
,parseError
,status
);
473 // -------------------------------------
474 // Applies the new pattern and returns an error if the pattern
477 MessageFormat::applyPattern(const UnicodeString
& pattern
,
478 UParseError
& parseError
,
484 // The pattern is broken up into segments. Each time a subformat
485 // is encountered, 4 segments are recorded. For example, consider
487 // "There {0,choice,0.0#are no files|1.0#is one file|1.0<are {0, number} files} on disk {1}."
488 // The first set of segments is:
489 // segments[0] = "There "
491 // segments[2] = "choice"
492 // segments[3] = "0.0#are no files|1.0#is one file|1.0<are {0, number} files"
494 // During parsing, the plain text is accumulated into segments[0].
495 // Segments 1..3 are used to parse each subpattern. Each time a
496 // subpattern is parsed, it creates a format object that is stored
497 // in the subformats array, together with an offset and argument
498 // number. The offset into the plain text stored in
501 // Quotes in segment 0 are handled normally. They are removed.
502 // Quotes may not occur in segments 1 or 2.
503 // Quotes in segment 3 are parsed and _copied_. This makes
504 // subformat patterns work, e.g., {1,number,'#'.##} passes
505 // the pattern "'#'.##" to DecimalFormat.
507 UnicodeString segments
[4];
508 int32_t part
= 0; // segment we are in, 0..3
509 // Record the highest argument number in the pattern. (In the
510 // subpattern {3,number} the argument number is 3.)
511 int32_t formatNumber
= 0;
512 UBool inQuote
= FALSE
;
513 int32_t braceStack
= 0;
514 // Clear error struct
515 parseError
.offset
= -1;
516 parseError
.preContext
[0] = parseError
.postContext
[0] = (UChar
)0;
517 int32_t patLen
= pattern
.length();
520 for (i
=0; i
<subformatCount
; ++i
) {
521 delete subformats
[i
].format
;
526 for (i
=0; i
<patLen
; ++i
) {
527 UChar ch
= pattern
[i
];
529 // In segment 0, recognize and remove quotes
530 if (ch
== SINGLE_QUOTE
) {
531 if (i
+1 < patLen
&& pattern
[i
+1] == SINGLE_QUOTE
) {
537 } else if (ch
== LEFT_CURLY_BRACE
&& !inQuote
) {
538 // The only way we get from segment 0 to 1 is via an
544 } else if (inQuote
) {
545 // In segments 1..3, recognize quoted matter, and copy it
546 // into the segment, together with the quotes. This takes
547 // care of '' as well.
548 segments
[part
] += ch
;
549 if (ch
== SINGLE_QUOTE
) {
553 // We have an unquoted character in segment 1..3
556 // Commas bump us to the next segment, except for segment 3,
557 // which can contain commas. See example above.
563 case LEFT_CURLY_BRACE
:
564 // Handle '{' within segment 3. The initial '{'
565 // before segment 1 is handled above.
567 ec
= U_PATTERN_SYNTAX_ERROR
;
571 segments
[part
] += ch
;
573 case RIGHT_CURLY_BRACE
:
574 if (braceStack
== 0) {
575 makeFormat(formatNumber
, segments
, parseError
,ec
);
580 segments
[1].remove();
581 segments
[2].remove();
582 segments
[3].remove();
586 segments
[part
] += ch
;
591 // fall through (copy quote chars in segments 1..3)
593 segments
[part
] += ch
;
598 if (braceStack
!= 0 || part
!= 0) {
599 // Unmatched braces in the pattern
600 ec
= U_UNMATCHED_BRACES
;
603 fPattern
= segments
[0];
607 syntaxError(pattern
, i
, parseError
);
608 for (i
=0; i
<subformatCount
; ++i
) {
609 delete subformats
[i
].format
;
611 argTypeCount
= subformatCount
= 0;
613 // -------------------------------------
614 // Converts this MessageFormat instance to a pattern.
617 MessageFormat::toPattern(UnicodeString
& appendTo
) const {
618 // later, make this more extensible
619 int32_t lastOffset
= 0;
621 for (i
=0; i
<subformatCount
; ++i
) {
622 copyAndFixQuotes(fPattern
, lastOffset
, subformats
[i
].offset
, appendTo
);
623 lastOffset
= subformats
[i
].offset
;
624 appendTo
+= LEFT_CURLY_BRACE
;
625 itos(subformats
[i
].arg
, appendTo
);
626 Format
* fmt
= subformats
[i
].format
;
628 // do nothing, string format
630 else if (fmt
->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
632 UErrorCode ec
= U_ZERO_ERROR
;
633 NumberFormat
& formatAlias
= *(NumberFormat
*)fmt
;
634 NumberFormat
*defaultTemplate
= NumberFormat::createInstance(fLocale
, ec
);
635 NumberFormat
*currencyTemplate
= NumberFormat::createCurrencyInstance(fLocale
, ec
);
636 NumberFormat
*percentTemplate
= NumberFormat::createPercentInstance(fLocale
, ec
);
637 NumberFormat
*integerTemplate
= createIntegerFormat(fLocale
, ec
);
640 appendTo
+= ID_NUMBER
;
641 if (formatAlias
!= *defaultTemplate
) {
643 if (formatAlias
== *currencyTemplate
) {
644 appendTo
+= ID_CURRENCY
;
646 else if (formatAlias
== *percentTemplate
) {
647 appendTo
+= ID_PERCENT
;
649 else if (formatAlias
== *integerTemplate
) {
650 appendTo
+= ID_INTEGER
;
653 UnicodeString buffer
;
654 appendTo
+= ((DecimalFormat
*)fmt
)->toPattern(buffer
);
658 delete defaultTemplate
;
659 delete currencyTemplate
;
660 delete percentTemplate
;
661 delete integerTemplate
;
663 else if (fmt
->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
664 DateFormat
& formatAlias
= *(DateFormat
*)fmt
;
665 DateFormat
*defaultDateTemplate
= DateFormat::createDateInstance(DateFormat::kDefault
, fLocale
);
666 DateFormat
*shortDateTemplate
= DateFormat::createDateInstance(DateFormat::kShort
, fLocale
);
667 DateFormat
*longDateTemplate
= DateFormat::createDateInstance(DateFormat::kLong
, fLocale
);
668 DateFormat
*fullDateTemplate
= DateFormat::createDateInstance(DateFormat::kFull
, fLocale
);
669 DateFormat
*defaultTimeTemplate
= DateFormat::createTimeInstance(DateFormat::kDefault
, fLocale
);
670 DateFormat
*shortTimeTemplate
= DateFormat::createTimeInstance(DateFormat::kShort
, fLocale
);
671 DateFormat
*longTimeTemplate
= DateFormat::createTimeInstance(DateFormat::kLong
, fLocale
);
672 DateFormat
*fullTimeTemplate
= DateFormat::createTimeInstance(DateFormat::kFull
, fLocale
);
676 if (formatAlias
== *defaultDateTemplate
) {
679 else if (formatAlias
== *shortDateTemplate
) {
682 appendTo
+= ID_SHORT
;
684 else if (formatAlias
== *defaultDateTemplate
) {
687 appendTo
+= ID_MEDIUM
;
689 else if (formatAlias
== *longDateTemplate
) {
694 else if (formatAlias
== *fullDateTemplate
) {
699 else if (formatAlias
== *defaultTimeTemplate
) {
702 else if (formatAlias
== *shortTimeTemplate
) {
705 appendTo
+= ID_SHORT
;
707 else if (formatAlias
== *defaultTimeTemplate
) {
710 appendTo
+= ID_MEDIUM
;
712 else if (formatAlias
== *longTimeTemplate
) {
717 else if (formatAlias
== *fullTimeTemplate
) {
723 UnicodeString buffer
;
726 appendTo
+= ((SimpleDateFormat
*)fmt
)->toPattern(buffer
);
729 delete defaultDateTemplate
;
730 delete shortDateTemplate
;
731 delete longDateTemplate
;
732 delete fullDateTemplate
;
733 delete defaultTimeTemplate
;
734 delete shortTimeTemplate
;
735 delete longTimeTemplate
;
736 delete fullTimeTemplate
;
737 // {sfb} there should be a more efficient way to do this!
739 else if (fmt
->getDynamicClassID() == ChoiceFormat::getStaticClassID()) {
740 UnicodeString buffer
;
742 appendTo
+= ID_CHOICE
;
744 appendTo
+= ((ChoiceFormat
*)fmt
)->toPattern(buffer
);
747 //appendTo += ", unknown";
749 appendTo
+= RIGHT_CURLY_BRACE
;
751 copyAndFixQuotes(fPattern
, lastOffset
, fPattern
.length(), appendTo
);
755 // -------------------------------------
756 // Adopts the new formats array and updates the array count.
757 // This MessageFormat instance owns the new formats.
760 MessageFormat::adoptFormats(Format
** newFormats
,
762 if (newFormats
== NULL
|| count
< 0) {
767 if (allocateSubformats(count
)) {
768 for (i
=0; i
<subformatCount
; ++i
) {
769 delete subformats
[i
].format
;
771 for (i
=0; i
<count
; ++i
) {
772 subformats
[i
].format
= newFormats
[i
];
774 subformatCount
= count
;
776 // An adopt method must always take ownership. Delete
777 // the incoming format objects and return unchanged.
778 for (i
=0; i
<count
; ++i
) {
779 delete newFormats
[i
];
783 // TODO: What about the .offset and .arg fields?
786 // -------------------------------------
787 // Sets the new formats array and updates the array count.
788 // This MessageFormat instance maks a copy of the new formats.
791 MessageFormat::setFormats(const Format
** newFormats
,
793 if (newFormats
== NULL
|| count
< 0) {
797 if (allocateSubformats(count
)) {
799 for (i
=0; i
<subformatCount
; ++i
) {
800 delete subformats
[i
].format
;
804 for (i
=0; i
<count
; ++i
) {
805 subformats
[i
].format
= newFormats
[i
] ? newFormats
[i
]->clone() : NULL
;
807 subformatCount
= count
;
810 // TODO: What about the .offset and .arg fields?
813 // -------------------------------------
814 // Adopt a single format.
815 // Do nothing is the format number is not less than the array count.
818 MessageFormat::adoptFormat(int32_t n
, Format
*newFormat
) {
819 if (n
< 0 || n
>= subformatCount
) {
822 delete subformats
[n
].format
;
823 subformats
[n
].format
= newFormat
;
827 // -------------------------------------
828 // Set a single format.
829 // Do nothing is the variable is not less than the array count.
832 MessageFormat::setFormat(int32_t n
, const Format
& newFormat
) {
833 if (n
>= 0 && n
< subformatCount
) {
834 delete subformats
[n
].format
;
835 if (&newFormat
== NULL
) {
836 // This should never happen -- but we'll be nice if it does
837 subformats
[n
].format
= NULL
;
839 subformats
[n
].format
= newFormat
.clone();
844 // -------------------------------------
845 // Gets the format array.
848 MessageFormat::getFormats(int32_t& cnt
) const
850 // This old API returns an array (which we hold) of Format*
851 // pointers. The array is valid up to the next call to any
852 // method on this object. We construct and resize an array
853 // on demand that contains aliases to the subformats[i].format
855 MessageFormat
* t
= (MessageFormat
*) this;
857 if (formatAliases
== NULL
) {
858 t
->formatAliasesCapacity
= (subformatCount
<10) ? 10 : subformatCount
;
859 Format
** a
= (Format
**)
860 uprv_malloc(sizeof(Format
*) * formatAliasesCapacity
);
864 t
->formatAliases
= a
;
865 } else if (subformatCount
> formatAliasesCapacity
) {
866 Format
** a
= (Format
**)
867 uprv_realloc(formatAliases
, sizeof(Format
*) * subformatCount
);
871 t
->formatAliases
= a
;
872 t
->formatAliasesCapacity
= subformatCount
;
874 for (int32_t i
=0; i
<subformatCount
; ++i
) {
875 t
->formatAliases
[i
] = subformats
[i
].format
;
877 cnt
= subformatCount
;
878 return (const Format
**)formatAliases
;
881 // -------------------------------------
882 // Formats the source Formattable array and copy into the result buffer.
883 // Ignore the FieldPosition result for error checking.
886 MessageFormat::format(const Formattable
* source
,
888 UnicodeString
& appendTo
,
889 FieldPosition
& ignore
,
890 UErrorCode
& success
) const
892 if (U_FAILURE(success
))
895 return format(source
, cnt
, appendTo
, ignore
, 0, success
);
898 // -------------------------------------
899 // Internally creates a MessageFormat instance based on the
900 // pattern and formats the arguments Formattable array and
901 // copy into the appendTo buffer.
904 MessageFormat::format( const UnicodeString
& pattern
,
905 const Formattable
* arguments
,
907 UnicodeString
& appendTo
,
910 MessageFormat
temp(pattern
, success
);
911 FieldPosition
ignore(0);
912 temp
.format(arguments
, cnt
, appendTo
, ignore
, success
);
916 // -------------------------------------
917 // Formats the source Formattable object and copy into the
918 // appendTo buffer. The Formattable object must be an array
919 // of Formattable instances, returns error otherwise.
922 MessageFormat::format(const Formattable
& source
,
923 UnicodeString
& appendTo
,
924 FieldPosition
& ignore
,
925 UErrorCode
& success
) const
929 if (U_FAILURE(success
))
931 if (source
.getType() != Formattable::kArray
) {
932 success
= U_ILLEGAL_ARGUMENT_ERROR
;
935 const Formattable
* tmpPtr
= source
.getArray(cnt
);
937 return format(tmpPtr
, cnt
, appendTo
, ignore
, 0, success
);
940 // -------------------------------------
941 // Formats the arguments Formattable array and copy into the appendTo buffer.
942 // Ignore the FieldPosition result for error checking.
945 MessageFormat::format(const Formattable
* arguments
,
947 UnicodeString
& appendTo
,
948 FieldPosition
& status
,
949 int32_t recursionProtection
,
950 UErrorCode
& success
) const
952 // Allow NULL array only if cnt == 0
953 if (cnt
< 0 || (cnt
&& arguments
== NULL
)) {
954 success
= U_ILLEGAL_ARGUMENT_ERROR
;
958 int32_t lastOffset
= 0;
959 for (int32_t i
=0; i
<subformatCount
; ++i
) {
960 // Append the prefix of current format element.
961 appendTo
.append(fPattern
, lastOffset
, subformats
[i
].offset
- lastOffset
);
962 lastOffset
= subformats
[i
].offset
;
963 int32_t argumentNumber
= subformats
[i
].arg
;
964 // Checks the scope of the argument number.
965 if (argumentNumber
>= cnt
) {
966 appendTo
+= LEFT_CURLY_BRACE
;
967 itos(argumentNumber
, appendTo
);
968 appendTo
+= RIGHT_CURLY_BRACE
;
972 const Formattable
*obj
= arguments
+ argumentNumber
;
973 Formattable::Type type
= obj
->getType();
975 // Recursively calling the format process only if the current
976 // format argument refers to a ChoiceFormat object.
977 Format
* fmt
= subformats
[i
].format
;
980 fmt
->format(*obj
, arg
, success
);
982 // Needs to reprocess the ChoiceFormat option by using the
983 // MessageFormat pattern application.
984 if (fmt
->getDynamicClassID() == ChoiceFormat::getStaticClassID() &&
985 arg
.indexOf(LEFT_CURLY_BRACE
) >= 0) {
986 MessageFormat
temp(arg
, fLocale
, success
);
987 // TODO: Implement recursion protection
988 temp
.format(arguments
, cnt
, appendTo
, status
, recursionProtection
, success
);
989 if (U_FAILURE(success
)) {
997 // If the obj data type is a number, use a NumberFormat instance.
998 else if ((type
== Formattable::kDouble
) ||
999 (type
== Formattable::kLong
) ||
1000 (type
== Formattable::kInt64
)) {
1002 const NumberFormat
* nf
= getDefaultNumberFormat(success
);
1006 if (type
== Formattable::kDouble
) {
1007 nf
->format(obj
->getDouble(), appendTo
);
1008 } else if (type
== Formattable::kLong
) {
1009 nf
->format(obj
->getLong(), appendTo
);
1011 nf
->format(obj
->getInt64(), appendTo
);
1014 // If the obj data type is a Date instance, use a DateFormat instance.
1015 else if (type
== Formattable::kDate
) {
1016 const DateFormat
* df
= getDefaultDateFormat(success
);
1020 df
->format(obj
->getDate(), appendTo
);
1022 else if (type
== Formattable::kString
) {
1023 appendTo
+= obj
->getString();
1026 success
= U_ILLEGAL_ARGUMENT_ERROR
;
1030 // Appends the rest of the pattern characters after the real last offset.
1031 appendTo
.append(fPattern
, lastOffset
, 0x7fffffff);
1036 // -------------------------------------
1037 // Parses the source pattern and returns the Formattable objects array,
1038 // the array count and the ending parse position. The caller of this method
1042 MessageFormat::parse(const UnicodeString
& source
,
1044 int32_t& count
) const
1046 // Allocate at least one element. Allocating an array of length
1047 // zero causes problems on some platforms (e.g. Win32).
1048 Formattable
*resultArray
= new Formattable
[argTypeCount
? argTypeCount
: 1];
1049 int32_t patternOffset
= 0;
1050 int32_t sourceOffset
= pos
.getIndex();
1051 ParsePosition
tempPos(0);
1052 count
= 0; // {sfb} reset to zero
1054 for (int32_t i
= 0; i
< subformatCount
; ++i
) {
1055 // match up to format
1056 len
= subformats
[i
].offset
- patternOffset
;
1058 fPattern
.compare(patternOffset
, len
, source
, sourceOffset
, len
) == 0) {
1059 sourceOffset
+= len
;
1060 patternOffset
+= len
;
1067 Format
* fmt
= subformats
[i
].format
;
1068 int32_t arg
= subformats
[i
].arg
;
1069 if (fmt
== NULL
) { // string format
1070 // if at end, use longest possible match
1071 // otherwise uses first match to intervening string
1072 // does NOT recursively try all possibilities
1073 int32_t tempLength
= (i
+1<subformatCount
) ?
1074 subformats
[i
+1].offset
: fPattern
.length();
1077 if (patternOffset
>= tempLength
) {
1078 next
= source
.length();
1081 UnicodeString buffer
;
1082 fPattern
.extract(patternOffset
,tempLength
- patternOffset
, buffer
);
1083 next
= source
.indexOf(buffer
, sourceOffset
);
1090 UnicodeString buffer
;
1091 source
.extract(sourceOffset
,next
- sourceOffset
, buffer
);
1092 UnicodeString strValue
= buffer
;
1093 UnicodeString
temp(LEFT_CURLY_BRACE
);
1094 // {sfb} check this later
1096 temp
+= RIGHT_CURLY_BRACE
;
1097 if (strValue
!= temp
) {
1098 source
.extract(sourceOffset
,next
- sourceOffset
, buffer
);
1099 resultArray
[arg
].setString(buffer
);
1100 // {sfb} not sure about this
1101 if ((arg
+ 1) > count
) {
1105 sourceOffset
= next
;
1109 tempPos
.setIndex(sourceOffset
);
1110 fmt
->parseObject(source
, resultArray
[arg
], tempPos
);
1111 if (tempPos
.getIndex() == sourceOffset
) {
1115 if ((arg
+ 1) > count
) {
1118 sourceOffset
= tempPos
.getIndex(); // update
1121 len
= fPattern
.length() - patternOffset
;
1123 fPattern
.compare(patternOffset
, len
, source
, sourceOffset
, len
) == 0) {
1124 pos
.setIndex(sourceOffset
+ len
);
1127 // else fall through...
1130 pos
.setErrorIndex(sourceOffset
);
1131 delete [] resultArray
;
1133 return NULL
; // leave index as is to signal error
1136 // -------------------------------------
1137 // Parses the source string and returns the array of
1138 // Formattable objects and the array count. The caller
1139 // owns the returned array.
1142 MessageFormat::parse(const UnicodeString
& source
,
1144 UErrorCode
& success
) const
1146 ParsePosition
status(0);
1147 // Calls the actual implementation method and starts
1148 // from zero offset of the source text.
1149 Formattable
* result
= parse(source
, status
, cnt
);
1150 if (status
.getIndex() == 0) {
1151 success
= U_MESSAGE_PARSE_ERROR
;
1158 // -------------------------------------
1159 // Parses the source text and copy into the result buffer.
1162 MessageFormat::parseObject( const UnicodeString
& source
,
1163 Formattable
& result
,
1164 ParsePosition
& status
) const
1167 Formattable
* tmpResult
= parse(source
, status
, cnt
);
1168 if (tmpResult
!= NULL
)
1169 result
.adoptArray(tmpResult
, cnt
);
1172 // -------------------------------------
1174 static Format
* makeRBNF(URBNFRuleSetTag tag
, const Locale
& locale
, const UnicodeString
& defaultRuleSet
, UErrorCode
& ec
) {
1175 RuleBasedNumberFormat
* fmt
= new RuleBasedNumberFormat(tag
, locale
, ec
);
1176 if (U_SUCCESS(ec
) && defaultRuleSet
.length() > 0) {
1177 fmt
->setDefaultRuleSet(defaultRuleSet
, ec
);
1178 if (U_FAILURE(ec
)) { // ignore unrecognized default rule set
1186 * Reads the segments[] array (see applyPattern()) and parses the
1187 * segments[1..3] into a Format* object. Stores the format object in
1188 * the subformats[] array. Updates the argTypes[] array type
1189 * information for the corresponding argument.
1191 * @param formatNumber index into subformats[] for this format
1192 * @param segments array of strings with the parsed pattern segments
1193 * @param parseError parse error data (output param)
1194 * @param ec error code
1197 MessageFormat::makeFormat(int32_t formatNumber
,
1198 UnicodeString
* segments
,
1199 UParseError
& parseError
,
1201 if (U_FAILURE(ec
)) {
1205 // Parse the argument number
1206 int32_t argumentNumber
= stou(segments
[1]); // always unlocalized!
1207 if (argumentNumber
< 0) {
1208 ec
= U_INVALID_FORMAT_ERROR
;
1212 // Parse the format, recording the argument type and creating a
1213 // new Format object (except for string arguments).
1214 Formattable::Type argType
;
1216 int32_t typeID
, styleID
;
1217 DateFormat::EStyle style
;
1219 switch (typeID
= findKeyword(segments
[2], TYPE_IDS
)) {
1222 argType
= Formattable::kString
;
1226 argType
= Formattable::kDouble
;
1228 switch (findKeyword(segments
[3], NUMBER_STYLE_IDS
)) {
1230 fmt
= NumberFormat::createInstance(fLocale
, ec
);
1233 fmt
= NumberFormat::createCurrencyInstance(fLocale
, ec
);
1236 fmt
= NumberFormat::createPercentInstance(fLocale
, ec
);
1239 argType
= Formattable::kLong
;
1240 fmt
= createIntegerFormat(fLocale
, ec
);
1243 fmt
= NumberFormat::createInstance(fLocale
, ec
);
1245 fmt
->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
1246 ((DecimalFormat
*)fmt
)->applyPattern(segments
[3],parseError
,ec
);
1254 argType
= Formattable::kDate
;
1255 styleID
= findKeyword(segments
[3], DATE_STYLE_IDS
);
1256 style
= (styleID
>= 0) ? DATE_STYLES
[styleID
] : DateFormat::kDefault
;
1259 fmt
= DateFormat::createDateInstance(style
, fLocale
);
1261 fmt
= DateFormat::createTimeInstance(style
, fLocale
);
1266 fmt
->getDynamicClassID() == SimpleDateFormat::getStaticClassID()) {
1267 ((SimpleDateFormat
*)fmt
)->applyPattern(segments
[3]);
1272 argType
= Formattable::kDouble
;
1274 fmt
= new ChoiceFormat(segments
[3], parseError
, ec
);
1278 argType
= Formattable::kDouble
;
1279 fmt
= makeRBNF(URBNF_SPELLOUT
, fLocale
, segments
[3], ec
);
1282 argType
= Formattable::kDouble
;
1283 fmt
= makeRBNF(URBNF_ORDINAL
, fLocale
, segments
[3], ec
);
1286 argType
= Formattable::kDouble
;
1287 fmt
= makeRBNF(URBNF_DURATION
, fLocale
, segments
[3], ec
);
1290 argType
= Formattable::kString
;
1291 ec
= U_ILLEGAL_ARGUMENT_ERROR
;
1295 if (fmt
==NULL
&& argType
!=Formattable::kString
&& U_SUCCESS(ec
)) {
1296 ec
= U_MEMORY_ALLOCATION_ERROR
;
1299 if (!allocateSubformats(formatNumber
+1) ||
1300 !allocateArgTypes(argumentNumber
+1)) {
1301 ec
= U_MEMORY_ALLOCATION_ERROR
;
1304 if (U_FAILURE(ec
)) {
1309 // Parse succeeded; record results in our arrays
1310 subformats
[formatNumber
].format
= fmt
;
1311 subformats
[formatNumber
].offset
= segments
[0].length();
1312 subformats
[formatNumber
].arg
= argumentNumber
;
1313 subformatCount
= formatNumber
+1;
1315 // Careful here: argumentNumber may in general arrive out of
1316 // sequence, e.g., "There was {2} on {0,date} (see {1,number})."
1317 argTypes
[argumentNumber
] = argType
;
1318 if (argumentNumber
+1 > argTypeCount
) {
1319 argTypeCount
= argumentNumber
+1;
1323 // -------------------------------------
1324 // Finds the string, s, in the string array, list.
1325 int32_t MessageFormat::findKeyword(const UnicodeString
& s
,
1326 const UChar
* const *list
)
1328 if (s
.length() == 0)
1329 return 0; // default
1331 UnicodeString buffer
= s
;
1332 // Trims the space characters and turns all characters
1333 // in s to lower case.
1334 buffer
.trim().toLower();
1335 for (int32_t i
= 0; list
[i
]; ++i
) {
1336 if (!buffer
.compare(list
[i
], u_strlen(list
[i
]))) {
1343 // -------------------------------------
1344 // Checks the range of the source text to quote the special
1345 // characters, { and ' and copy to target buffer.
1348 MessageFormat::copyAndFixQuotes(const UnicodeString
& source
,
1351 UnicodeString
& appendTo
)
1353 UBool gotLB
= FALSE
;
1355 for (int32_t i
= start
; i
< end
; ++i
) {
1356 UChar ch
= source
[i
];
1357 if (ch
== LEFT_CURLY_BRACE
) {
1358 appendTo
+= SINGLE_QUOTE
;
1359 appendTo
+= LEFT_CURLY_BRACE
;
1360 appendTo
+= SINGLE_QUOTE
;
1363 else if (ch
== RIGHT_CURLY_BRACE
) {
1365 appendTo
+= RIGHT_CURLY_BRACE
;
1370 appendTo
+= SINGLE_QUOTE
;
1371 appendTo
+= RIGHT_CURLY_BRACE
;
1372 appendTo
+= SINGLE_QUOTE
;
1375 else if (ch
== SINGLE_QUOTE
) {
1376 appendTo
+= SINGLE_QUOTE
;
1377 appendTo
+= SINGLE_QUOTE
;
1386 * Convenience method that ought to be in NumberFormat
1389 MessageFormat::createIntegerFormat(const Locale
& locale
, UErrorCode
& status
) const {
1390 NumberFormat
*temp
= NumberFormat::createInstance(locale
, status
);
1391 if (temp
->getDynamicClassID() == DecimalFormat::getStaticClassID()) {
1392 DecimalFormat
*temp2
= (DecimalFormat
*) temp
;
1393 temp2
->setMaximumFractionDigits(0);
1394 temp2
->setDecimalSeparatorAlwaysShown(FALSE
);
1395 temp2
->setParseIntegerOnly(TRUE
);
1402 * Return the default number format. Used to format a numeric
1403 * argument when subformats[i].format is NULL. Returns NULL
1406 * Semantically const but may modify *this.
1408 const NumberFormat
* MessageFormat::getDefaultNumberFormat(UErrorCode
& ec
) const {
1409 if (defaultNumberFormat
== NULL
) {
1410 MessageFormat
* t
= (MessageFormat
*) this;
1411 t
->defaultNumberFormat
= NumberFormat::createInstance(fLocale
, ec
);
1412 if (U_FAILURE(ec
)) {
1413 delete t
->defaultNumberFormat
;
1414 t
->defaultNumberFormat
= NULL
;
1415 } else if (t
->defaultNumberFormat
== NULL
) {
1416 ec
= U_MEMORY_ALLOCATION_ERROR
;
1419 return defaultNumberFormat
;
1423 * Return the default date format. Used to format a date
1424 * argument when subformats[i].format is NULL. Returns NULL
1427 * Semantically const but may modify *this.
1429 const DateFormat
* MessageFormat::getDefaultDateFormat(UErrorCode
& ec
) const {
1430 if (defaultDateFormat
== NULL
) {
1431 MessageFormat
* t
= (MessageFormat
*) this;
1432 t
->defaultDateFormat
= DateFormat::createDateTimeInstance(DateFormat::kShort
, DateFormat::kShort
, fLocale
);
1433 if (t
->defaultDateFormat
== NULL
) {
1434 ec
= U_MEMORY_ALLOCATION_ERROR
;
1437 return defaultDateFormat
;
1442 #endif /* #if !UCONFIG_NO_FORMATTING */