]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/msgfmt.cpp
ICU-511.31.tar.gz
[apple/icu.git] / icuSources / i18n / msgfmt.cpp
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************
6 *
7 * File MSGFMT.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 02/19/97 aliu Converted from java.
13 * 03/20/97 helena Finished first cut of implementation.
14 * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
15 * 06/11/97 helena Fixed addPattern to take the pattern correctly.
16 * 06/17/97 helena Fixed the getPattern to return the correct pattern.
17 * 07/09/97 helena Made ParsePosition into a class.
18 * 02/22/99 stephen Removed character literals for EBCDIC safety
19 * 11/01/09 kirtig Added SelectFormat
20 ********************************************************************/
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_FORMATTING
25
26 #include "unicode/appendable.h"
27 #include "unicode/choicfmt.h"
28 #include "unicode/datefmt.h"
29 #include "unicode/decimfmt.h"
30 #include "unicode/localpointer.h"
31 #include "unicode/msgfmt.h"
32 #include "unicode/plurfmt.h"
33 #include "unicode/rbnf.h"
34 #include "unicode/selfmt.h"
35 #include "unicode/smpdtfmt.h"
36 #include "unicode/umsg.h"
37 #include "unicode/ustring.h"
38 #include "cmemory.h"
39 #include "patternprops.h"
40 #include "messageimpl.h"
41 #include "msgfmt_impl.h"
42 #include "uassert.h"
43 #include "uelement.h"
44 #include "uhash.h"
45 #include "ustrfmt.h"
46 #include "util.h"
47 #include "uvector.h"
48
49 // *****************************************************************************
50 // class MessageFormat
51 // *****************************************************************************
52
53 #define SINGLE_QUOTE ((UChar)0x0027)
54 #define COMMA ((UChar)0x002C)
55 #define LEFT_CURLY_BRACE ((UChar)0x007B)
56 #define RIGHT_CURLY_BRACE ((UChar)0x007D)
57
58 //---------------------------------------
59 // static data
60
61 static const UChar ID_NUMBER[] = {
62 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
63 };
64 static const UChar ID_DATE[] = {
65 0x64, 0x61, 0x74, 0x65, 0 /* "date" */
66 };
67 static const UChar ID_TIME[] = {
68 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
69 };
70 static const UChar ID_SPELLOUT[] = {
71 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
72 };
73 static const UChar ID_ORDINAL[] = {
74 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
75 };
76 static const UChar ID_DURATION[] = {
77 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
78 };
79
80 // MessageFormat Type List Number, Date, Time or Choice
81 static const UChar * const TYPE_IDS[] = {
82 ID_NUMBER,
83 ID_DATE,
84 ID_TIME,
85 ID_SPELLOUT,
86 ID_ORDINAL,
87 ID_DURATION,
88 NULL,
89 };
90
91 static const UChar ID_EMPTY[] = {
92 0 /* empty string, used for default so that null can mark end of list */
93 };
94 static const UChar ID_CURRENCY[] = {
95 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
96 };
97 static const UChar ID_PERCENT[] = {
98 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
99 };
100 static const UChar ID_INTEGER[] = {
101 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
102 };
103
104 // NumberFormat modifier list, default, currency, percent or integer
105 static const UChar * const NUMBER_STYLE_IDS[] = {
106 ID_EMPTY,
107 ID_CURRENCY,
108 ID_PERCENT,
109 ID_INTEGER,
110 NULL,
111 };
112
113 static const UChar ID_SHORT[] = {
114 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
115 };
116 static const UChar ID_MEDIUM[] = {
117 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
118 };
119 static const UChar ID_LONG[] = {
120 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
121 };
122 static const UChar ID_FULL[] = {
123 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
124 };
125
126 // DateFormat modifier list, default, short, medium, long or full
127 static const UChar * const DATE_STYLE_IDS[] = {
128 ID_EMPTY,
129 ID_SHORT,
130 ID_MEDIUM,
131 ID_LONG,
132 ID_FULL,
133 NULL,
134 };
135
136 static const icu::DateFormat::EStyle DATE_STYLES[] = {
137 icu::DateFormat::kDefault,
138 icu::DateFormat::kShort,
139 icu::DateFormat::kMedium,
140 icu::DateFormat::kLong,
141 icu::DateFormat::kFull,
142 };
143
144 static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
145
146 static const UChar NULL_STRING[] = {
147 0x6E, 0x75, 0x6C, 0x6C, 0 // "null"
148 };
149
150 static const UChar OTHER_STRING[] = {
151 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
152 };
153
154 U_CDECL_BEGIN
155 static UBool U_CALLCONV equalFormatsForHash(const UHashTok key1,
156 const UHashTok key2) {
157 return icu::MessageFormat::equalFormats(key1.pointer, key2.pointer);
158 }
159
160 U_CDECL_END
161
162 U_NAMESPACE_BEGIN
163
164 // -------------------------------------
165 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
166 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
167
168 //--------------------------------------------------------------------
169
170 /**
171 * Convert an integer value to a string and append the result to
172 * the given UnicodeString.
173 */
174 static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
175 UChar temp[16];
176 uprv_itou(temp,16,i,10,0); // 10 == radix
177 appendTo.append(temp, -1);
178 return appendTo;
179 }
180
181
182 // AppendableWrapper: encapsulates the result of formatting, keeping track
183 // of the string and its length.
184 class AppendableWrapper : public UMemory {
185 public:
186 AppendableWrapper(Appendable& appendable) : app(appendable), len(0) {
187 }
188 void append(const UnicodeString& s) {
189 app.appendString(s.getBuffer(), s.length());
190 len += s.length();
191 }
192 void append(const UChar* s, const int32_t sLength) {
193 app.appendString(s, sLength);
194 len += sLength;
195 }
196 void append(const UnicodeString& s, int32_t start, int32_t length) {
197 append(s.tempSubString(start, length));
198 }
199 void formatAndAppend(const Format* formatter, const Formattable& arg, UErrorCode& ec) {
200 UnicodeString s;
201 formatter->format(arg, s, ec);
202 if (U_SUCCESS(ec)) {
203 append(s);
204 }
205 }
206 int32_t length() {
207 return len;
208 }
209 private:
210 Appendable& app;
211 int32_t len;
212 };
213
214
215 // -------------------------------------
216 // Creates a MessageFormat instance based on the pattern.
217
218 MessageFormat::MessageFormat(const UnicodeString& pattern,
219 UErrorCode& success)
220 : fLocale(Locale::getDefault()), // Uses the default locale
221 msgPattern(success),
222 formatAliases(NULL),
223 formatAliasesCapacity(0),
224 argTypes(NULL),
225 argTypeCount(0),
226 argTypeCapacity(0),
227 hasArgTypeConflicts(FALSE),
228 defaultNumberFormat(NULL),
229 defaultDateFormat(NULL),
230 cachedFormatters(NULL),
231 customFormatArgStarts(NULL),
232 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL),
233 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL)
234 {
235 setLocaleIDs(fLocale.getName(), fLocale.getName());
236 applyPattern(pattern, success);
237 }
238
239 MessageFormat::MessageFormat(const UnicodeString& pattern,
240 const Locale& newLocale,
241 UErrorCode& success)
242 : fLocale(newLocale),
243 msgPattern(success),
244 formatAliases(NULL),
245 formatAliasesCapacity(0),
246 argTypes(NULL),
247 argTypeCount(0),
248 argTypeCapacity(0),
249 hasArgTypeConflicts(FALSE),
250 defaultNumberFormat(NULL),
251 defaultDateFormat(NULL),
252 cachedFormatters(NULL),
253 customFormatArgStarts(NULL),
254 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL),
255 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL)
256 {
257 setLocaleIDs(fLocale.getName(), fLocale.getName());
258 applyPattern(pattern, success);
259 }
260
261 MessageFormat::MessageFormat(const UnicodeString& pattern,
262 const Locale& newLocale,
263 UParseError& parseError,
264 UErrorCode& success)
265 : fLocale(newLocale),
266 msgPattern(success),
267 formatAliases(NULL),
268 formatAliasesCapacity(0),
269 argTypes(NULL),
270 argTypeCount(0),
271 argTypeCapacity(0),
272 hasArgTypeConflicts(FALSE),
273 defaultNumberFormat(NULL),
274 defaultDateFormat(NULL),
275 cachedFormatters(NULL),
276 customFormatArgStarts(NULL),
277 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL),
278 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL)
279 {
280 setLocaleIDs(fLocale.getName(), fLocale.getName());
281 applyPattern(pattern, parseError, success);
282 }
283
284 MessageFormat::MessageFormat(const MessageFormat& that)
285 :
286 Format(that),
287 fLocale(that.fLocale),
288 msgPattern(that.msgPattern),
289 formatAliases(NULL),
290 formatAliasesCapacity(0),
291 argTypes(NULL),
292 argTypeCount(0),
293 argTypeCapacity(0),
294 hasArgTypeConflicts(that.hasArgTypeConflicts),
295 defaultNumberFormat(NULL),
296 defaultDateFormat(NULL),
297 cachedFormatters(NULL),
298 customFormatArgStarts(NULL),
299 pluralProvider(&fLocale, UPLURAL_TYPE_CARDINAL),
300 ordinalProvider(&fLocale, UPLURAL_TYPE_ORDINAL)
301 {
302 // This will take care of creating the hash tables (since they are NULL).
303 UErrorCode ec = U_ZERO_ERROR;
304 copyObjects(that, ec);
305 if (U_FAILURE(ec)) {
306 resetPattern();
307 }
308 }
309
310 MessageFormat::~MessageFormat()
311 {
312 uhash_close(cachedFormatters);
313 uhash_close(customFormatArgStarts);
314
315 uprv_free(argTypes);
316 uprv_free(formatAliases);
317 delete defaultNumberFormat;
318 delete defaultDateFormat;
319 }
320
321 //--------------------------------------------------------------------
322 // Variable-size array management
323
324 /**
325 * Allocate argTypes[] to at least the given capacity and return
326 * TRUE if successful. If not, leave argTypes[] unchanged.
327 *
328 * If argTypes is NULL, allocate it. If it is not NULL, enlarge it
329 * if necessary to be at least as large as specified.
330 */
331 UBool MessageFormat::allocateArgTypes(int32_t capacity, UErrorCode& status) {
332 if (U_FAILURE(status)) {
333 return FALSE;
334 }
335 if (argTypeCapacity >= capacity) {
336 return TRUE;
337 }
338 if (capacity < DEFAULT_INITIAL_CAPACITY) {
339 capacity = DEFAULT_INITIAL_CAPACITY;
340 } else if (capacity < 2*argTypeCapacity) {
341 capacity = 2*argTypeCapacity;
342 }
343 Formattable::Type* a = (Formattable::Type*)
344 uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
345 if (a == NULL) {
346 status = U_MEMORY_ALLOCATION_ERROR;
347 return FALSE;
348 }
349 argTypes = a;
350 argTypeCapacity = capacity;
351 return TRUE;
352 }
353
354 // -------------------------------------
355 // assignment operator
356
357 const MessageFormat&
358 MessageFormat::operator=(const MessageFormat& that)
359 {
360 if (this != &that) {
361 // Calls the super class for assignment first.
362 Format::operator=(that);
363
364 setLocale(that.fLocale);
365 msgPattern = that.msgPattern;
366 hasArgTypeConflicts = that.hasArgTypeConflicts;
367
368 UErrorCode ec = U_ZERO_ERROR;
369 copyObjects(that, ec);
370 if (U_FAILURE(ec)) {
371 resetPattern();
372 }
373 }
374 return *this;
375 }
376
377 UBool
378 MessageFormat::operator==(const Format& rhs) const
379 {
380 if (this == &rhs) return TRUE;
381
382 MessageFormat& that = (MessageFormat&)rhs;
383
384 // Check class ID before checking MessageFormat members
385 if (!Format::operator==(rhs) ||
386 msgPattern != that.msgPattern ||
387 fLocale != that.fLocale) {
388 return FALSE;
389 }
390
391 // Compare hashtables.
392 if ((customFormatArgStarts == NULL) != (that.customFormatArgStarts == NULL)) {
393 return FALSE;
394 }
395 if (customFormatArgStarts == NULL) {
396 return TRUE;
397 }
398
399 UErrorCode ec = U_ZERO_ERROR;
400 const int32_t count = uhash_count(customFormatArgStarts);
401 const int32_t rhs_count = uhash_count(that.customFormatArgStarts);
402 if (count != rhs_count) {
403 return FALSE;
404 }
405 int32_t idx = 0, rhs_idx = 0, pos = -1, rhs_pos = -1;
406 for (; idx < count && rhs_idx < rhs_count && U_SUCCESS(ec); ++idx, ++rhs_idx) {
407 const UHashElement* cur = uhash_nextElement(customFormatArgStarts, &pos);
408 const UHashElement* rhs_cur = uhash_nextElement(that.customFormatArgStarts, &rhs_pos);
409 if (cur->key.integer != rhs_cur->key.integer) {
410 return FALSE;
411 }
412 const Format* format = (const Format*)uhash_iget(cachedFormatters, cur->key.integer);
413 const Format* rhs_format = (const Format*)uhash_iget(that.cachedFormatters, rhs_cur->key.integer);
414 if (*format != *rhs_format) {
415 return FALSE;
416 }
417 }
418 return TRUE;
419 }
420
421 // -------------------------------------
422 // Creates a copy of this MessageFormat, the caller owns the copy.
423
424 Format*
425 MessageFormat::clone() const
426 {
427 return new MessageFormat(*this);
428 }
429
430 // -------------------------------------
431 // Sets the locale of this MessageFormat object to theLocale.
432
433 void
434 MessageFormat::setLocale(const Locale& theLocale)
435 {
436 if (fLocale != theLocale) {
437 delete defaultNumberFormat;
438 defaultNumberFormat = NULL;
439 delete defaultDateFormat;
440 defaultDateFormat = NULL;
441 fLocale = theLocale;
442 setLocaleIDs(fLocale.getName(), fLocale.getName());
443 pluralProvider.reset(&fLocale);
444 ordinalProvider.reset(&fLocale);
445 }
446 }
447
448 // -------------------------------------
449 // Gets the locale of this MessageFormat object.
450
451 const Locale&
452 MessageFormat::getLocale() const
453 {
454 return fLocale;
455 }
456
457 void
458 MessageFormat::applyPattern(const UnicodeString& newPattern,
459 UErrorCode& status)
460 {
461 UParseError parseError;
462 applyPattern(newPattern,parseError,status);
463 }
464
465
466 // -------------------------------------
467 // Applies the new pattern and returns an error if the pattern
468 // is not correct.
469 void
470 MessageFormat::applyPattern(const UnicodeString& pattern,
471 UParseError& parseError,
472 UErrorCode& ec)
473 {
474 if(U_FAILURE(ec)) {
475 return;
476 }
477 msgPattern.parse(pattern, &parseError, ec);
478 cacheExplicitFormats(ec);
479
480 if (U_FAILURE(ec)) {
481 resetPattern();
482 }
483 }
484
485 void MessageFormat::resetPattern() {
486 msgPattern.clear();
487 uhash_close(cachedFormatters);
488 cachedFormatters = NULL;
489 uhash_close(customFormatArgStarts);
490 customFormatArgStarts = NULL;
491 argTypeCount = 0;
492 hasArgTypeConflicts = FALSE;
493 }
494
495 void
496 MessageFormat::applyPattern(const UnicodeString& pattern,
497 UMessagePatternApostropheMode aposMode,
498 UParseError* parseError,
499 UErrorCode& status) {
500 if (aposMode != msgPattern.getApostropheMode()) {
501 msgPattern.clearPatternAndSetApostropheMode(aposMode);
502 }
503 applyPattern(pattern, *parseError, status);
504 }
505
506 // -------------------------------------
507 // Converts this MessageFormat instance to a pattern.
508
509 UnicodeString&
510 MessageFormat::toPattern(UnicodeString& appendTo) const {
511 if ((customFormatArgStarts != NULL && 0 != uhash_count(customFormatArgStarts)) ||
512 0 == msgPattern.countParts()
513 ) {
514 appendTo.setToBogus();
515 return appendTo;
516 }
517 return appendTo.append(msgPattern.getPatternString());
518 }
519
520 int32_t MessageFormat::nextTopLevelArgStart(int32_t partIndex) const {
521 if (partIndex != 0) {
522 partIndex = msgPattern.getLimitPartIndex(partIndex);
523 }
524 for (;;) {
525 UMessagePatternPartType type = msgPattern.getPartType(++partIndex);
526 if (type == UMSGPAT_PART_TYPE_ARG_START) {
527 return partIndex;
528 }
529 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
530 return -1;
531 }
532 }
533 }
534
535 void MessageFormat::setArgStartFormat(int32_t argStart,
536 Format* formatter,
537 UErrorCode& status) {
538 if (U_FAILURE(status)) {
539 delete formatter;
540 }
541 if (cachedFormatters == NULL) {
542 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong,
543 equalFormatsForHash, &status);
544 if (U_FAILURE(status)) {
545 delete formatter;
546 return;
547 }
548 uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
549 }
550 if (formatter == NULL) {
551 formatter = new DummyFormat();
552 }
553 uhash_iput(cachedFormatters, argStart, formatter, &status);
554 }
555
556
557 UBool MessageFormat::argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber) {
558 const MessagePattern::Part& part = msgPattern.getPart(partIndex);
559 return part.getType() == UMSGPAT_PART_TYPE_ARG_NAME ?
560 msgPattern.partSubstringMatches(part, argName) :
561 part.getValue() == argNumber; // ARG_NUMBER
562 }
563
564 // Sets a custom formatter for a MessagePattern ARG_START part index.
565 // "Custom" formatters are provided by the user via setFormat() or similar APIs.
566 void MessageFormat::setCustomArgStartFormat(int32_t argStart,
567 Format* formatter,
568 UErrorCode& status) {
569 setArgStartFormat(argStart, formatter, status);
570 if (customFormatArgStarts == NULL) {
571 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong,
572 NULL, &status);
573 }
574 uhash_iputi(customFormatArgStarts, argStart, 1, &status);
575 }
576
577 Format* MessageFormat::getCachedFormatter(int32_t argumentNumber) const {
578 if (cachedFormatters == NULL) {
579 return NULL;
580 }
581 void* ptr = uhash_iget(cachedFormatters, argumentNumber);
582 if (ptr != NULL && dynamic_cast<DummyFormat*>((Format*)ptr) == NULL) {
583 return (Format*) ptr;
584 } else {
585 // Not cached, or a DummyFormat representing setFormat(NULL).
586 return NULL;
587 }
588 }
589
590 // -------------------------------------
591 // Adopts the new formats array and updates the array count.
592 // This MessageFormat instance owns the new formats.
593 void
594 MessageFormat::adoptFormats(Format** newFormats,
595 int32_t count) {
596 if (newFormats == NULL || count < 0) {
597 return;
598 }
599 // Throw away any cached formatters.
600 if (cachedFormatters != NULL) {
601 uhash_removeAll(cachedFormatters);
602 }
603 if (customFormatArgStarts != NULL) {
604 uhash_removeAll(customFormatArgStarts);
605 }
606
607 int32_t formatNumber = 0;
608 UErrorCode status = U_ZERO_ERROR;
609 for (int32_t partIndex = 0;
610 formatNumber < count && U_SUCCESS(status) &&
611 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
612 setCustomArgStartFormat(partIndex, newFormats[formatNumber], status);
613 ++formatNumber;
614 }
615 // Delete those that didn't get used (if any).
616 for (; formatNumber < count; ++formatNumber) {
617 delete newFormats[formatNumber];
618 }
619
620 }
621
622 // -------------------------------------
623 // Sets the new formats array and updates the array count.
624 // This MessageFormat instance maks a copy of the new formats.
625
626 void
627 MessageFormat::setFormats(const Format** newFormats,
628 int32_t count) {
629 if (newFormats == NULL || count < 0) {
630 return;
631 }
632 // Throw away any cached formatters.
633 if (cachedFormatters != NULL) {
634 uhash_removeAll(cachedFormatters);
635 }
636 if (customFormatArgStarts != NULL) {
637 uhash_removeAll(customFormatArgStarts);
638 }
639
640 UErrorCode status = U_ZERO_ERROR;
641 int32_t formatNumber = 0;
642 for (int32_t partIndex = 0;
643 formatNumber < count && U_SUCCESS(status) && (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
644 Format* newFormat = NULL;
645 if (newFormats[formatNumber] != NULL) {
646 newFormat = newFormats[formatNumber]->clone();
647 if (newFormat == NULL) {
648 status = U_MEMORY_ALLOCATION_ERROR;
649 }
650 }
651 setCustomArgStartFormat(partIndex, newFormat, status);
652 ++formatNumber;
653 }
654 if (U_FAILURE(status)) {
655 resetPattern();
656 }
657 }
658
659 // -------------------------------------
660 // Adopt a single format by format number.
661 // Do nothing if the format number is not less than the array count.
662
663 void
664 MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
665 LocalPointer<Format> p(newFormat);
666 if (n >= 0) {
667 int32_t formatNumber = 0;
668 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
669 if (n == formatNumber) {
670 UErrorCode status = U_ZERO_ERROR;
671 setCustomArgStartFormat(partIndex, p.orphan(), status);
672 return;
673 }
674 ++formatNumber;
675 }
676 }
677 }
678
679 // -------------------------------------
680 // Adopt a single format by format name.
681 // Do nothing if there is no match of formatName.
682 void
683 MessageFormat::adoptFormat(const UnicodeString& formatName,
684 Format* formatToAdopt,
685 UErrorCode& status) {
686 LocalPointer<Format> p(formatToAdopt);
687 if (U_FAILURE(status)) {
688 return;
689 }
690 int32_t argNumber = MessagePattern::validateArgumentName(formatName);
691 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
692 status = U_ILLEGAL_ARGUMENT_ERROR;
693 return;
694 }
695 for (int32_t partIndex = 0;
696 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status);
697 ) {
698 if (argNameMatches(partIndex + 1, formatName, argNumber)) {
699 Format* f;
700 if (p.isValid()) {
701 f = p.orphan();
702 } else if (formatToAdopt == NULL) {
703 f = NULL;
704 } else {
705 f = formatToAdopt->clone();
706 if (f == NULL) {
707 status = U_MEMORY_ALLOCATION_ERROR;
708 return;
709 }
710 }
711 setCustomArgStartFormat(partIndex, f, status);
712 }
713 }
714 }
715
716 // -------------------------------------
717 // Set a single format.
718 // Do nothing if the variable is not less than the array count.
719 void
720 MessageFormat::setFormat(int32_t n, const Format& newFormat) {
721
722 if (n >= 0) {
723 int32_t formatNumber = 0;
724 for (int32_t partIndex = 0;
725 (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
726 if (n == formatNumber) {
727 Format* new_format = newFormat.clone();
728 if (new_format) {
729 UErrorCode status = U_ZERO_ERROR;
730 setCustomArgStartFormat(partIndex, new_format, status);
731 }
732 return;
733 }
734 ++formatNumber;
735 }
736 }
737 }
738
739 // -------------------------------------
740 // Get a single format by format name.
741 // Do nothing if the variable is not less than the array count.
742 Format *
743 MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
744 if (U_FAILURE(status) || cachedFormatters == NULL) return NULL;
745
746 int32_t argNumber = MessagePattern::validateArgumentName(formatName);
747 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
748 status = U_ILLEGAL_ARGUMENT_ERROR;
749 return NULL;
750 }
751 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
752 if (argNameMatches(partIndex + 1, formatName, argNumber)) {
753 return getCachedFormatter(partIndex);
754 }
755 }
756 return NULL;
757 }
758
759 // -------------------------------------
760 // Set a single format by format name
761 // Do nothing if the variable is not less than the array count.
762 void
763 MessageFormat::setFormat(const UnicodeString& formatName,
764 const Format& newFormat,
765 UErrorCode& status) {
766 if (U_FAILURE(status)) return;
767
768 int32_t argNumber = MessagePattern::validateArgumentName(formatName);
769 if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
770 status = U_ILLEGAL_ARGUMENT_ERROR;
771 return;
772 }
773 for (int32_t partIndex = 0;
774 (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status);
775 ) {
776 if (argNameMatches(partIndex + 1, formatName, argNumber)) {
777 if (&newFormat == NULL) {
778 setCustomArgStartFormat(partIndex, NULL, status);
779 } else {
780 Format* new_format = newFormat.clone();
781 if (new_format == NULL) {
782 status = U_MEMORY_ALLOCATION_ERROR;
783 return;
784 }
785 setCustomArgStartFormat(partIndex, new_format, status);
786 }
787 }
788 }
789 }
790
791 // -------------------------------------
792 // Gets the format array.
793 const Format**
794 MessageFormat::getFormats(int32_t& cnt) const
795 {
796 // This old API returns an array (which we hold) of Format*
797 // pointers. The array is valid up to the next call to any
798 // method on this object. We construct and resize an array
799 // on demand that contains aliases to the subformats[i].format
800 // pointers.
801 MessageFormat* t = const_cast<MessageFormat*> (this);
802 cnt = 0;
803 if (formatAliases == NULL) {
804 t->formatAliasesCapacity = (argTypeCount<10) ? 10 : argTypeCount;
805 Format** a = (Format**)
806 uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
807 if (a == NULL) {
808 t->formatAliasesCapacity = 0;
809 return NULL;
810 }
811 t->formatAliases = a;
812 } else if (argTypeCount > formatAliasesCapacity) {
813 Format** a = (Format**)
814 uprv_realloc(formatAliases, sizeof(Format*) * argTypeCount);
815 if (a == NULL) {
816 t->formatAliasesCapacity = 0;
817 return NULL;
818 }
819 t->formatAliases = a;
820 t->formatAliasesCapacity = argTypeCount;
821 }
822
823 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
824 t->formatAliases[cnt++] = getCachedFormatter(partIndex);
825 }
826
827 return (const Format**)formatAliases;
828 }
829
830
831 UnicodeString MessageFormat::getArgName(int32_t partIndex) {
832 const MessagePattern::Part& part = msgPattern.getPart(partIndex);
833 if (part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) {
834 return msgPattern.getSubstring(part);
835 } else {
836 UnicodeString temp;
837 return itos(part.getValue(), temp);
838 }
839 }
840
841 StringEnumeration*
842 MessageFormat::getFormatNames(UErrorCode& status) {
843 if (U_FAILURE(status)) return NULL;
844
845 UVector *fFormatNames = new UVector(status);
846 if (U_FAILURE(status)) {
847 status = U_MEMORY_ALLOCATION_ERROR;
848 return NULL;
849 }
850 fFormatNames->setDeleter(uprv_deleteUObject);
851
852 for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
853 fFormatNames->addElement(new UnicodeString(getArgName(partIndex + 1)), status);
854 }
855
856 StringEnumeration* nameEnumerator = new FormatNameEnumeration(fFormatNames, status);
857 return nameEnumerator;
858 }
859
860 // -------------------------------------
861 // Formats the source Formattable array and copy into the result buffer.
862 // Ignore the FieldPosition result for error checking.
863
864 UnicodeString&
865 MessageFormat::format(const Formattable* source,
866 int32_t cnt,
867 UnicodeString& appendTo,
868 FieldPosition& ignore,
869 UErrorCode& success) const
870 {
871 return format(source, NULL, cnt, appendTo, &ignore, success);
872 }
873
874 // -------------------------------------
875 // Internally creates a MessageFormat instance based on the
876 // pattern and formats the arguments Formattable array and
877 // copy into the appendTo buffer.
878
879 UnicodeString&
880 MessageFormat::format( const UnicodeString& pattern,
881 const Formattable* arguments,
882 int32_t cnt,
883 UnicodeString& appendTo,
884 UErrorCode& success)
885 {
886 MessageFormat temp(pattern, success);
887 return temp.format(arguments, NULL, cnt, appendTo, NULL, success);
888 }
889
890 // -------------------------------------
891 // Formats the source Formattable object and copy into the
892 // appendTo buffer. The Formattable object must be an array
893 // of Formattable instances, returns error otherwise.
894
895 UnicodeString&
896 MessageFormat::format(const Formattable& source,
897 UnicodeString& appendTo,
898 FieldPosition& ignore,
899 UErrorCode& success) const
900 {
901 if (U_FAILURE(success))
902 return appendTo;
903 if (source.getType() != Formattable::kArray) {
904 success = U_ILLEGAL_ARGUMENT_ERROR;
905 return appendTo;
906 }
907 int32_t cnt;
908 const Formattable* tmpPtr = source.getArray(cnt);
909 return format(tmpPtr, NULL, cnt, appendTo, &ignore, success);
910 }
911
912 UnicodeString&
913 MessageFormat::format(const UnicodeString* argumentNames,
914 const Formattable* arguments,
915 int32_t count,
916 UnicodeString& appendTo,
917 UErrorCode& success) const {
918 return format(arguments, argumentNames, count, appendTo, NULL, success);
919 }
920
921 // Does linear search to find the match for an ArgName.
922 const Formattable* MessageFormat::getArgFromListByName(const Formattable* arguments,
923 const UnicodeString *argumentNames,
924 int32_t cnt, UnicodeString& name) const {
925 for (int32_t i = 0; i < cnt; ++i) {
926 if (0 == argumentNames[i].compare(name)) {
927 return arguments + i;
928 }
929 }
930 return NULL;
931 }
932
933
934 UnicodeString&
935 MessageFormat::format(const Formattable* arguments,
936 const UnicodeString *argumentNames,
937 int32_t cnt,
938 UnicodeString& appendTo,
939 FieldPosition* pos,
940 UErrorCode& status) const {
941 if (U_FAILURE(status)) {
942 return appendTo;
943 }
944
945 UnicodeStringAppendable usapp(appendTo);
946 AppendableWrapper app(usapp);
947 format(0, 0.0, arguments, argumentNames, cnt, app, pos, status);
948 return appendTo;
949 }
950
951 // if argumentNames is NULL, this means arguments is a numeric array.
952 // arguments can not be NULL.
953 void MessageFormat::format(int32_t msgStart, double pluralNumber,
954 const Formattable* arguments,
955 const UnicodeString *argumentNames,
956 int32_t cnt,
957 AppendableWrapper& appendTo,
958 FieldPosition* ignore,
959 UErrorCode& success) const {
960 if (U_FAILURE(success)) {
961 return;
962 }
963
964 const UnicodeString& msgString = msgPattern.getPatternString();
965 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit();
966 for (int32_t i = msgStart + 1; U_SUCCESS(success) ; ++i) {
967 const MessagePattern::Part* part = &msgPattern.getPart(i);
968 const UMessagePatternPartType type = part->getType();
969 int32_t index = part->getIndex();
970 appendTo.append(msgString, prevIndex, index - prevIndex);
971 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
972 return;
973 }
974 prevIndex = part->getLimit();
975 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
976 const NumberFormat* nf = getDefaultNumberFormat(success);
977 appendTo.formatAndAppend(nf, Formattable(pluralNumber), success);
978 continue;
979 }
980 if (type != UMSGPAT_PART_TYPE_ARG_START) {
981 continue;
982 }
983 int32_t argLimit = msgPattern.getLimitPartIndex(i);
984 UMessagePatternArgType argType = part->getArgType();
985 part = &msgPattern.getPart(++i);
986 const Formattable* arg;
987 UnicodeString noArg;
988 if (argumentNames == NULL) {
989 int32_t argNumber = part->getValue(); // ARG_NUMBER
990 if (0 <= argNumber && argNumber < cnt) {
991 arg = arguments + argNumber;
992 } else {
993 arg = NULL;
994 noArg.append(LEFT_CURLY_BRACE);
995 itos(argNumber, noArg);
996 noArg.append(RIGHT_CURLY_BRACE);
997 }
998 } else {
999 UnicodeString key;
1000 if (part->getType() == UMSGPAT_PART_TYPE_ARG_NAME) {
1001 key = msgPattern.getSubstring(*part);
1002 } else /* UMSGPAT_PART_TYPE_ARG_NUMBER */ {
1003 itos(part->getValue(), key);
1004 }
1005 arg = getArgFromListByName(arguments, argumentNames, cnt, key);
1006 if (arg == NULL) {
1007 noArg.append(LEFT_CURLY_BRACE);
1008 noArg.append(key);
1009 noArg.append(RIGHT_CURLY_BRACE);
1010 }
1011 }
1012 ++i;
1013 int32_t prevDestLength = appendTo.length();
1014 const Format* formatter = NULL;
1015 if (!noArg.isEmpty()) {
1016 appendTo.append(noArg);
1017 } else if (arg == NULL) {
1018 appendTo.append(NULL_STRING, 4);
1019 } else if ((formatter = getCachedFormatter(i -2))) {
1020 // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings.
1021 if (dynamic_cast<const ChoiceFormat*>(formatter) ||
1022 dynamic_cast<const PluralFormat*>(formatter) ||
1023 dynamic_cast<const SelectFormat*>(formatter)) {
1024 // We only handle nested formats here if they were provided via
1025 // setFormat() or its siblings. Otherwise they are not cached and instead
1026 // handled below according to argType.
1027 UnicodeString subMsgString;
1028 formatter->format(*arg, subMsgString, success);
1029 if (subMsgString.indexOf(LEFT_CURLY_BRACE) >= 0 ||
1030 (subMsgString.indexOf(SINGLE_QUOTE) >= 0 && !MessageImpl::jdkAposMode(msgPattern))
1031 ) {
1032 MessageFormat subMsgFormat(subMsgString, fLocale, success);
1033 subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, ignore, success);
1034 } else {
1035 appendTo.append(subMsgString);
1036 }
1037 } else {
1038 appendTo.formatAndAppend(formatter, *arg, success);
1039 }
1040 } else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) {
1041 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
1042 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
1043 // for the hash table containind DummyFormat.
1044 if (arg->isNumeric()) {
1045 const NumberFormat* nf = getDefaultNumberFormat(success);
1046 appendTo.formatAndAppend(nf, *arg, success);
1047 } else if (arg->getType() == Formattable::kDate) {
1048 const DateFormat* df = getDefaultDateFormat(success);
1049 appendTo.formatAndAppend(df, *arg, success);
1050 } else {
1051 appendTo.append(arg->getString(success));
1052 }
1053 } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) {
1054 if (!arg->isNumeric()) {
1055 success = U_ILLEGAL_ARGUMENT_ERROR;
1056 return;
1057 }
1058 // We must use the Formattable::getDouble() variant with the UErrorCode parameter
1059 // because only this one converts non-double numeric types to double.
1060 const double number = arg->getDouble(success);
1061 int32_t subMsgStart = ChoiceFormat::findSubMessage(msgPattern, i, number);
1062 formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames,
1063 cnt, appendTo, success);
1064 } else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType)) {
1065 if (!arg->isNumeric()) {
1066 success = U_ILLEGAL_ARGUMENT_ERROR;
1067 return;
1068 }
1069 const PluralFormat::PluralSelector &selector =
1070 argType == UMSGPAT_ARG_TYPE_PLURAL ? pluralProvider : ordinalProvider;
1071 // We must use the Formattable::getDouble() variant with the UErrorCode parameter
1072 // because only this one converts non-double numeric types to double.
1073 double number = arg->getDouble(success);
1074 int32_t subMsgStart = PluralFormat::findSubMessage(msgPattern, i, selector, number,
1075 success);
1076 double offset = msgPattern.getPluralOffset(i);
1077 formatComplexSubMessage(subMsgStart, number-offset, arguments, argumentNames,
1078 cnt, appendTo, success);
1079 } else if (argType == UMSGPAT_ARG_TYPE_SELECT) {
1080 int32_t subMsgStart = SelectFormat::findSubMessage(msgPattern, i, arg->getString(success), success);
1081 formatComplexSubMessage(subMsgStart, 0, arguments, argumentNames,
1082 cnt, appendTo, success);
1083 } else {
1084 // This should never happen.
1085 success = U_INTERNAL_PROGRAM_ERROR;
1086 return;
1087 }
1088 ignore = updateMetaData(appendTo, prevDestLength, ignore, arg);
1089 prevIndex = msgPattern.getPart(argLimit).getLimit();
1090 i = argLimit;
1091 }
1092 }
1093
1094
1095 void MessageFormat::formatComplexSubMessage(int32_t msgStart,
1096 double pluralNumber,
1097 const Formattable* arguments,
1098 const UnicodeString *argumentNames,
1099 int32_t cnt,
1100 AppendableWrapper& appendTo,
1101 UErrorCode& success) const {
1102 if (U_FAILURE(success)) {
1103 return;
1104 }
1105
1106 if (!MessageImpl::jdkAposMode(msgPattern)) {
1107 format(msgStart, pluralNumber, arguments, argumentNames, cnt, appendTo, NULL, success);
1108 return;
1109 }
1110
1111 // JDK compatibility mode: (see JDK MessageFormat.format() API docs)
1112 // - remove SKIP_SYNTAX; that is, remove half of the apostrophes
1113 // - if the result string contains an open curly brace '{' then
1114 // instantiate a temporary MessageFormat object and format again;
1115 // otherwise just append the result string
1116 const UnicodeString& msgString = msgPattern.getPatternString();
1117 UnicodeString sb;
1118 int32_t prevIndex = msgPattern.getPart(msgStart).getLimit();
1119 for (int32_t i = msgStart;;) {
1120 const MessagePattern::Part& part = msgPattern.getPart(++i);
1121 const UMessagePatternPartType type = part.getType();
1122 int32_t index = part.getIndex();
1123 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
1124 sb.append(msgString, prevIndex, index - prevIndex);
1125 break;
1126 } else if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER || type == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
1127 sb.append(msgString, prevIndex, index - prevIndex);
1128 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
1129 const NumberFormat* nf = getDefaultNumberFormat(success);
1130 sb.append(nf->format(pluralNumber, sb, success));
1131 }
1132 prevIndex = part.getLimit();
1133 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
1134 sb.append(msgString, prevIndex, index - prevIndex);
1135 prevIndex = index;
1136 i = msgPattern.getLimitPartIndex(i);
1137 index = msgPattern.getPart(i).getLimit();
1138 MessageImpl::appendReducedApostrophes(msgString, prevIndex, index, sb);
1139 prevIndex = index;
1140 }
1141 }
1142 if (sb.indexOf(LEFT_CURLY_BRACE) >= 0) {
1143 UnicodeString emptyPattern; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter.
1144 MessageFormat subMsgFormat(emptyPattern, fLocale, success);
1145 subMsgFormat.applyPattern(sb, UMSGPAT_APOS_DOUBLE_REQUIRED, NULL, success);
1146 subMsgFormat.format(0, 0, arguments, argumentNames, cnt, appendTo, NULL, success);
1147 } else {
1148 appendTo.append(sb);
1149 }
1150 }
1151
1152
1153 UnicodeString MessageFormat::getLiteralStringUntilNextArgument(int32_t from) const {
1154 const UnicodeString& msgString=msgPattern.getPatternString();
1155 int32_t prevIndex=msgPattern.getPart(from).getLimit();
1156 UnicodeString b;
1157 for (int32_t i = from + 1; ; ++i) {
1158 const MessagePattern::Part& part = msgPattern.getPart(i);
1159 const UMessagePatternPartType type=part.getType();
1160 int32_t index=part.getIndex();
1161 b.append(msgString, prevIndex, index - prevIndex);
1162 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
1163 return b;
1164 }
1165 // Unexpected Part "part" in parsed message.
1166 U_ASSERT(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR);
1167 prevIndex=part.getLimit();
1168 }
1169 }
1170
1171
1172 FieldPosition* MessageFormat::updateMetaData(AppendableWrapper& /*dest*/, int32_t /*prevLength*/,
1173 FieldPosition* /*fp*/, const Formattable* /*argId*/) const {
1174 // Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing.
1175 return NULL;
1176 /*
1177 if (fp != NULL && Field.ARGUMENT.equals(fp.getFieldAttribute())) {
1178 fp->setBeginIndex(prevLength);
1179 fp->setEndIndex(dest.get_length());
1180 return NULL;
1181 }
1182 return fp;
1183 */
1184 }
1185
1186 void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) {
1187 // Deep copy pointer fields.
1188 // We need not copy the formatAliases because they are re-filled
1189 // in each getFormats() call.
1190 // The defaultNumberFormat, defaultDateFormat and pluralProvider.rules
1191 // also get created on demand.
1192 argTypeCount = that.argTypeCount;
1193 if (argTypeCount > 0) {
1194 if (!allocateArgTypes(argTypeCount, ec)) {
1195 return;
1196 }
1197 uprv_memcpy(argTypes, that.argTypes, argTypeCount * sizeof(argTypes[0]));
1198 }
1199 if (cachedFormatters != NULL) {
1200 uhash_removeAll(cachedFormatters);
1201 }
1202 if (customFormatArgStarts != NULL) {
1203 uhash_removeAll(customFormatArgStarts);
1204 }
1205 if (that.cachedFormatters) {
1206 if (cachedFormatters == NULL) {
1207 cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong,
1208 equalFormatsForHash, &ec);
1209 if (U_FAILURE(ec)) {
1210 return;
1211 }
1212 uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
1213 }
1214
1215 const int32_t count = uhash_count(that.cachedFormatters);
1216 int32_t pos, idx;
1217 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) {
1218 const UHashElement* cur = uhash_nextElement(that.cachedFormatters, &pos);
1219 Format* newFormat = ((Format*)(cur->value.pointer))->clone();
1220 if (newFormat) {
1221 uhash_iput(cachedFormatters, cur->key.integer, newFormat, &ec);
1222 } else {
1223 ec = U_MEMORY_ALLOCATION_ERROR;
1224 return;
1225 }
1226 }
1227 }
1228 if (that.customFormatArgStarts) {
1229 if (customFormatArgStarts == NULL) {
1230 customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong,
1231 NULL, &ec);
1232 }
1233 const int32_t count = uhash_count(that.customFormatArgStarts);
1234 int32_t pos, idx;
1235 for (idx = 0, pos = -1; idx < count && U_SUCCESS(ec); ++idx) {
1236 const UHashElement* cur = uhash_nextElement(that.customFormatArgStarts, &pos);
1237 uhash_iputi(customFormatArgStarts, cur->key.integer, cur->value.integer, &ec);
1238 }
1239 }
1240 }
1241
1242
1243 Formattable*
1244 MessageFormat::parse(int32_t msgStart,
1245 const UnicodeString& source,
1246 ParsePosition& pos,
1247 int32_t& count,
1248 UErrorCode& ec) const {
1249 count = 0;
1250 if (U_FAILURE(ec)) {
1251 pos.setErrorIndex(pos.getIndex());
1252 return NULL;
1253 }
1254 // parse() does not work with named arguments.
1255 if (msgPattern.hasNamedArguments()) {
1256 ec = U_ARGUMENT_TYPE_MISMATCH;
1257 pos.setErrorIndex(pos.getIndex());
1258 return NULL;
1259 }
1260 LocalArray<Formattable> resultArray(new Formattable[argTypeCount ? argTypeCount : 1]);
1261 const UnicodeString& msgString=msgPattern.getPatternString();
1262 int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
1263 int32_t sourceOffset = pos.getIndex();
1264 ParsePosition tempStatus(0);
1265
1266 for(int32_t i=msgStart+1; ; ++i) {
1267 UBool haveArgResult = FALSE;
1268 const MessagePattern::Part* part=&msgPattern.getPart(i);
1269 const UMessagePatternPartType type=part->getType();
1270 int32_t index=part->getIndex();
1271 // Make sure the literal string matches.
1272 int32_t len = index - prevIndex;
1273 if (len == 0 || (0 == msgString.compare(prevIndex, len, source, sourceOffset, len))) {
1274 sourceOffset += len;
1275 prevIndex += len;
1276 } else {
1277 pos.setErrorIndex(sourceOffset);
1278 return NULL; // leave index as is to signal error
1279 }
1280 if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
1281 // Things went well! Done.
1282 pos.setIndex(sourceOffset);
1283 return resultArray.orphan();
1284 }
1285 if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR) {
1286 prevIndex=part->getLimit();
1287 continue;
1288 }
1289 // We do not support parsing Plural formats. (No REPLACE_NUMBER here.)
1290 // Unexpected Part "part" in parsed message.
1291 U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_START);
1292 int32_t argLimit=msgPattern.getLimitPartIndex(i);
1293
1294 UMessagePatternArgType argType=part->getArgType();
1295 part=&msgPattern.getPart(++i);
1296 int32_t argNumber = part->getValue(); // ARG_NUMBER
1297 UnicodeString key;
1298 ++i;
1299 const Format* formatter = NULL;
1300 Formattable& argResult = resultArray[argNumber];
1301
1302 if(cachedFormatters!=NULL && (formatter = getCachedFormatter(i - 2))!=NULL) {
1303 // Just parse using the formatter.
1304 tempStatus.setIndex(sourceOffset);
1305 formatter->parseObject(source, argResult, tempStatus);
1306 if (tempStatus.getIndex() == sourceOffset) {
1307 pos.setErrorIndex(sourceOffset);
1308 return NULL; // leave index as is to signal error
1309 }
1310 sourceOffset = tempStatus.getIndex();
1311 haveArgResult = TRUE;
1312 } else if(
1313 argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) {
1314 // We arrive here if getCachedFormatter returned NULL, but there was actually an element in the hash table.
1315 // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
1316 // for the hash table containind DummyFormat.
1317
1318 // Match as a string.
1319 // if at end, use longest possible match
1320 // otherwise uses first match to intervening string
1321 // does NOT recursively try all possibilities
1322 UnicodeString stringAfterArgument = getLiteralStringUntilNextArgument(argLimit);
1323 int32_t next;
1324 if (!stringAfterArgument.isEmpty()) {
1325 next = source.indexOf(stringAfterArgument, sourceOffset);
1326 } else {
1327 next = source.length();
1328 }
1329 if (next < 0) {
1330 pos.setErrorIndex(sourceOffset);
1331 return NULL; // leave index as is to signal error
1332 } else {
1333 UnicodeString strValue(source.tempSubString(sourceOffset, next - sourceOffset));
1334 UnicodeString compValue;
1335 compValue.append(LEFT_CURLY_BRACE);
1336 itos(argNumber, compValue);
1337 compValue.append(RIGHT_CURLY_BRACE);
1338 if (0 != strValue.compare(compValue)) {
1339 argResult.setString(strValue);
1340 haveArgResult = TRUE;
1341 }
1342 sourceOffset = next;
1343 }
1344 } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
1345 tempStatus.setIndex(sourceOffset);
1346 double choiceResult = ChoiceFormat::parseArgument(msgPattern, i, source, tempStatus);
1347 if (tempStatus.getIndex() == sourceOffset) {
1348 pos.setErrorIndex(sourceOffset);
1349 return NULL; // leave index as is to signal error
1350 }
1351 argResult.setDouble(choiceResult);
1352 haveArgResult = TRUE;
1353 sourceOffset = tempStatus.getIndex();
1354 } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) || argType==UMSGPAT_ARG_TYPE_SELECT) {
1355 // Parsing not supported.
1356 ec = U_UNSUPPORTED_ERROR;
1357 return NULL;
1358 } else {
1359 // This should never happen.
1360 ec = U_INTERNAL_PROGRAM_ERROR;
1361 return NULL;
1362 }
1363 if (haveArgResult && count <= argNumber) {
1364 count = argNumber + 1;
1365 }
1366 prevIndex=msgPattern.getPart(argLimit).getLimit();
1367 i=argLimit;
1368 }
1369 }
1370 // -------------------------------------
1371 // Parses the source pattern and returns the Formattable objects array,
1372 // the array count and the ending parse position. The caller of this method
1373 // owns the array.
1374
1375 Formattable*
1376 MessageFormat::parse(const UnicodeString& source,
1377 ParsePosition& pos,
1378 int32_t& count) const {
1379 UErrorCode ec = U_ZERO_ERROR;
1380 return parse(0, source, pos, count, ec);
1381 }
1382
1383 // -------------------------------------
1384 // Parses the source string and returns the array of
1385 // Formattable objects and the array count. The caller
1386 // owns the returned array.
1387
1388 Formattable*
1389 MessageFormat::parse(const UnicodeString& source,
1390 int32_t& cnt,
1391 UErrorCode& success) const
1392 {
1393 if (msgPattern.hasNamedArguments()) {
1394 success = U_ARGUMENT_TYPE_MISMATCH;
1395 return NULL;
1396 }
1397 ParsePosition status(0);
1398 // Calls the actual implementation method and starts
1399 // from zero offset of the source text.
1400 Formattable* result = parse(source, status, cnt);
1401 if (status.getIndex() == 0) {
1402 success = U_MESSAGE_PARSE_ERROR;
1403 delete[] result;
1404 return NULL;
1405 }
1406 return result;
1407 }
1408
1409 // -------------------------------------
1410 // Parses the source text and copy into the result buffer.
1411
1412 void
1413 MessageFormat::parseObject( const UnicodeString& source,
1414 Formattable& result,
1415 ParsePosition& status) const
1416 {
1417 int32_t cnt = 0;
1418 Formattable* tmpResult = parse(source, status, cnt);
1419 if (tmpResult != NULL)
1420 result.adoptArray(tmpResult, cnt);
1421 }
1422
1423 UnicodeString
1424 MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
1425 UnicodeString result;
1426 if (U_SUCCESS(status)) {
1427 int32_t plen = pattern.length();
1428 const UChar* pat = pattern.getBuffer();
1429 int32_t blen = plen * 2 + 1; // space for null termination, convenience
1430 UChar* buf = result.getBuffer(blen);
1431 if (buf == NULL) {
1432 status = U_MEMORY_ALLOCATION_ERROR;
1433 } else {
1434 int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
1435 result.releaseBuffer(U_SUCCESS(status) ? len : 0);
1436 }
1437 }
1438 if (U_FAILURE(status)) {
1439 result.setToBogus();
1440 }
1441 return result;
1442 }
1443
1444 // -------------------------------------
1445
1446 static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
1447 RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
1448 if (fmt == NULL) {
1449 ec = U_MEMORY_ALLOCATION_ERROR;
1450 } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
1451 UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
1452 fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
1453 }
1454 return fmt;
1455 }
1456
1457 void MessageFormat::cacheExplicitFormats(UErrorCode& status) {
1458 if (U_FAILURE(status)) {
1459 return;
1460 }
1461
1462 if (cachedFormatters != NULL) {
1463 uhash_removeAll(cachedFormatters);
1464 }
1465 if (customFormatArgStarts != NULL) {
1466 uhash_removeAll(customFormatArgStarts);
1467 }
1468
1469 // The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT
1470 // which we need not examine.
1471 int32_t limit = msgPattern.countParts() - 2;
1472 argTypeCount = 0;
1473 // We also need not look at the first two "parts"
1474 // (at most MSG_START and ARG_START) in this loop.
1475 // We determine the argTypeCount first so that we can allocateArgTypes
1476 // so that the next loop can set argTypes[argNumber].
1477 // (This is for the C API which needs the argTypes to read its va_arg list.)
1478 for (int32_t i = 2; i < limit && U_SUCCESS(status); ++i) {
1479 const MessagePattern::Part& part = msgPattern.getPart(i);
1480 if (part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
1481 const int argNumber = part.getValue();
1482 if (argNumber >= argTypeCount) {
1483 argTypeCount = argNumber + 1;
1484 }
1485 }
1486 }
1487 if (!allocateArgTypes(argTypeCount, status)) {
1488 return;
1489 }
1490 // Set all argTypes to kObject, as a "none" value, for lack of any better value.
1491 // We never use kObject for real arguments.
1492 // We use it as "no argument yet" for the check for hasArgTypeConflicts.
1493 for (int32_t i = 0; i < argTypeCount; ++i) {
1494 argTypes[i] = Formattable::kObject;
1495 }
1496 hasArgTypeConflicts = FALSE;
1497
1498 // This loop starts at part index 1 because we do need to examine
1499 // ARG_START parts. (But we can ignore the MSG_START.)
1500 for (int32_t i = 1; i < limit && U_SUCCESS(status); ++i) {
1501 const MessagePattern::Part* part = &msgPattern.getPart(i);
1502 if (part->getType() != UMSGPAT_PART_TYPE_ARG_START) {
1503 continue;
1504 }
1505 UMessagePatternArgType argType = part->getArgType();
1506
1507 int32_t argNumber = -1;
1508 part = &msgPattern.getPart(i + 1);
1509 if (part->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
1510 argNumber = part->getValue();
1511 }
1512 Formattable::Type formattableType;
1513
1514 switch (argType) {
1515 case UMSGPAT_ARG_TYPE_NONE:
1516 formattableType = Formattable::kString;
1517 break;
1518 case UMSGPAT_ARG_TYPE_SIMPLE: {
1519 int32_t index = i;
1520 i += 2;
1521 UnicodeString explicitType = msgPattern.getSubstring(msgPattern.getPart(i++));
1522 UnicodeString style;
1523 if ((part = &msgPattern.getPart(i))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE) {
1524 style = msgPattern.getSubstring(*part);
1525 ++i;
1526 }
1527 UParseError parseError;
1528 Format* formatter = createAppropriateFormat(explicitType, style, formattableType, parseError, status);
1529 setArgStartFormat(index, formatter, status);
1530 break;
1531 }
1532 case UMSGPAT_ARG_TYPE_CHOICE:
1533 case UMSGPAT_ARG_TYPE_PLURAL:
1534 case UMSGPAT_ARG_TYPE_SELECTORDINAL:
1535 formattableType = Formattable::kDouble;
1536 break;
1537 case UMSGPAT_ARG_TYPE_SELECT:
1538 formattableType = Formattable::kString;
1539 break;
1540 default:
1541 status = U_INTERNAL_PROGRAM_ERROR; // Should be unreachable.
1542 formattableType = Formattable::kString;
1543 break;
1544 }
1545 if (argNumber != -1) {
1546 if (argTypes[argNumber] != Formattable::kObject && argTypes[argNumber] != formattableType) {
1547 hasArgTypeConflicts = TRUE;
1548 }
1549 argTypes[argNumber] = formattableType;
1550 }
1551 }
1552 }
1553
1554
1555 Format* MessageFormat::createAppropriateFormat(UnicodeString& type, UnicodeString& style,
1556 Formattable::Type& formattableType, UParseError& parseError,
1557 UErrorCode& ec) {
1558 if (U_FAILURE(ec)) {
1559 return NULL;
1560 }
1561 Format* fmt = NULL;
1562 int32_t typeID, styleID;
1563 DateFormat::EStyle date_style;
1564
1565 switch (typeID = findKeyword(type, TYPE_IDS)) {
1566 case 0: // number
1567 formattableType = Formattable::kDouble;
1568 switch (findKeyword(style, NUMBER_STYLE_IDS)) {
1569 case 0: // default
1570 fmt = NumberFormat::createInstance(fLocale, ec);
1571 break;
1572 case 1: // currency
1573 fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
1574 break;
1575 case 2: // percent
1576 fmt = NumberFormat::createPercentInstance(fLocale, ec);
1577 break;
1578 case 3: // integer
1579 formattableType = Formattable::kLong;
1580 fmt = createIntegerFormat(fLocale, ec);
1581 break;
1582 default: // pattern
1583 fmt = NumberFormat::createInstance(fLocale, ec);
1584 if (fmt) {
1585 DecimalFormat* decfmt = dynamic_cast<DecimalFormat*>(fmt);
1586 if (decfmt != NULL) {
1587 decfmt->applyPattern(style,parseError,ec);
1588 }
1589 }
1590 break;
1591 }
1592 break;
1593
1594 case 1: // date
1595 case 2: // time
1596 formattableType = Formattable::kDate;
1597 styleID = findKeyword(style, DATE_STYLE_IDS);
1598 date_style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
1599
1600 if (typeID == 1) {
1601 fmt = DateFormat::createDateInstance(date_style, fLocale);
1602 } else {
1603 fmt = DateFormat::createTimeInstance(date_style, fLocale);
1604 }
1605
1606 if (styleID < 0 && fmt != NULL) {
1607 SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt);
1608 if (sdtfmt != NULL) {
1609 sdtfmt->applyPattern(style);
1610 }
1611 }
1612 break;
1613
1614 case 3: // spellout
1615 formattableType = Formattable::kDouble;
1616 fmt = makeRBNF(URBNF_SPELLOUT, fLocale, style, ec);
1617 break;
1618 case 4: // ordinal
1619 formattableType = Formattable::kDouble;
1620 fmt = makeRBNF(URBNF_ORDINAL, fLocale, style, ec);
1621 break;
1622 case 5: // duration
1623 formattableType = Formattable::kDouble;
1624 fmt = makeRBNF(URBNF_DURATION, fLocale, style, ec);
1625 break;
1626 default:
1627 formattableType = Formattable::kString;
1628 ec = U_ILLEGAL_ARGUMENT_ERROR;
1629 break;
1630 }
1631
1632 return fmt;
1633 }
1634
1635
1636 //-------------------------------------
1637 // Finds the string, s, in the string array, list.
1638 int32_t MessageFormat::findKeyword(const UnicodeString& s,
1639 const UChar * const *list)
1640 {
1641 if (s.isEmpty()) {
1642 return 0; // default
1643 }
1644
1645 int32_t length = s.length();
1646 const UChar *ps = PatternProps::trimWhiteSpace(s.getBuffer(), length);
1647 UnicodeString buffer(FALSE, ps, length);
1648 // Trims the space characters and turns all characters
1649 // in s to lower case.
1650 buffer.toLower("");
1651 for (int32_t i = 0; list[i]; ++i) {
1652 if (!buffer.compare(list[i], u_strlen(list[i]))) {
1653 return i;
1654 }
1655 }
1656 return -1;
1657 }
1658
1659 /**
1660 * Convenience method that ought to be in NumberFormat
1661 */
1662 NumberFormat*
1663 MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
1664 NumberFormat *temp = NumberFormat::createInstance(locale, status);
1665 DecimalFormat *temp2;
1666 if (temp != NULL && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != NULL) {
1667 temp2->setMaximumFractionDigits(0);
1668 temp2->setDecimalSeparatorAlwaysShown(FALSE);
1669 temp2->setParseIntegerOnly(TRUE);
1670 }
1671
1672 return temp;
1673 }
1674
1675 /**
1676 * Return the default number format. Used to format a numeric
1677 * argument when subformats[i].format is NULL. Returns NULL
1678 * on failure.
1679 *
1680 * Semantically const but may modify *this.
1681 */
1682 const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
1683 if (defaultNumberFormat == NULL) {
1684 MessageFormat* t = (MessageFormat*) this;
1685 t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
1686 if (U_FAILURE(ec)) {
1687 delete t->defaultNumberFormat;
1688 t->defaultNumberFormat = NULL;
1689 } else if (t->defaultNumberFormat == NULL) {
1690 ec = U_MEMORY_ALLOCATION_ERROR;
1691 }
1692 }
1693 return defaultNumberFormat;
1694 }
1695
1696 /**
1697 * Return the default date format. Used to format a date
1698 * argument when subformats[i].format is NULL. Returns NULL
1699 * on failure.
1700 *
1701 * Semantically const but may modify *this.
1702 */
1703 const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
1704 if (defaultDateFormat == NULL) {
1705 MessageFormat* t = (MessageFormat*) this;
1706 t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
1707 if (t->defaultDateFormat == NULL) {
1708 ec = U_MEMORY_ALLOCATION_ERROR;
1709 }
1710 }
1711 return defaultDateFormat;
1712 }
1713
1714 UBool
1715 MessageFormat::usesNamedArguments() const {
1716 return msgPattern.hasNamedArguments();
1717 }
1718
1719 int32_t
1720 MessageFormat::getArgTypeCount() const {
1721 return argTypeCount;
1722 }
1723
1724 UBool MessageFormat::equalFormats(const void* left, const void* right) {
1725 return *(const Format*)left==*(const Format*)right;
1726 }
1727
1728
1729 UBool MessageFormat::DummyFormat::operator==(const Format&) const {
1730 return TRUE;
1731 }
1732
1733 Format* MessageFormat::DummyFormat::clone() const {
1734 return new DummyFormat();
1735 }
1736
1737 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
1738 UnicodeString& appendTo,
1739 UErrorCode& status) const {
1740 if (U_SUCCESS(status)) {
1741 status = U_UNSUPPORTED_ERROR;
1742 }
1743 return appendTo;
1744 }
1745
1746 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
1747 UnicodeString& appendTo,
1748 FieldPosition&,
1749 UErrorCode& status) const {
1750 if (U_SUCCESS(status)) {
1751 status = U_UNSUPPORTED_ERROR;
1752 }
1753 return appendTo;
1754 }
1755
1756 UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
1757 UnicodeString& appendTo,
1758 FieldPositionIterator*,
1759 UErrorCode& status) const {
1760 if (U_SUCCESS(status)) {
1761 status = U_UNSUPPORTED_ERROR;
1762 }
1763 return appendTo;
1764 }
1765
1766 void MessageFormat::DummyFormat::parseObject(const UnicodeString&,
1767 Formattable&,
1768 ParsePosition& ) const {
1769 }
1770
1771
1772 FormatNameEnumeration::FormatNameEnumeration(UVector *fNameList, UErrorCode& /*status*/) {
1773 pos=0;
1774 fFormatNames = fNameList;
1775 }
1776
1777 const UnicodeString*
1778 FormatNameEnumeration::snext(UErrorCode& status) {
1779 if (U_SUCCESS(status) && pos < fFormatNames->size()) {
1780 return (const UnicodeString*)fFormatNames->elementAt(pos++);
1781 }
1782 return NULL;
1783 }
1784
1785 void
1786 FormatNameEnumeration::reset(UErrorCode& /*status*/) {
1787 pos=0;
1788 }
1789
1790 int32_t
1791 FormatNameEnumeration::count(UErrorCode& /*status*/) const {
1792 return (fFormatNames==NULL) ? 0 : fFormatNames->size();
1793 }
1794
1795 FormatNameEnumeration::~FormatNameEnumeration() {
1796 delete fFormatNames;
1797 }
1798
1799
1800 MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const Locale* loc, UPluralType t)
1801 : locale(loc), rules(NULL), type(t) {
1802 }
1803
1804 MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() {
1805 // We own the rules but not the locale.
1806 delete rules;
1807 }
1808
1809 UnicodeString MessageFormat::PluralSelectorProvider::select(double number, UErrorCode& ec) const {
1810 if (U_FAILURE(ec)) {
1811 return UnicodeString(FALSE, OTHER_STRING, 5);
1812 }
1813 MessageFormat::PluralSelectorProvider* t = const_cast<MessageFormat::PluralSelectorProvider*>(this);
1814 if(rules == NULL) {
1815 t->rules = PluralRules::forLocale(*locale, type, ec);
1816 if (U_FAILURE(ec)) {
1817 return UnicodeString(FALSE, OTHER_STRING, 5);
1818 }
1819 }
1820 return rules->select(number);
1821 }
1822
1823 void MessageFormat::PluralSelectorProvider::reset(const Locale* loc) {
1824 locale = loc;
1825 delete rules;
1826 rules = NULL;
1827 }
1828
1829
1830 U_NAMESPACE_END
1831
1832 #endif /* #if !UCONFIG_NO_FORMATTING */
1833
1834 //eof