1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2009-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
10 *******************************************************************************
13 #include "unicode/decimfmt.h"
14 #include "unicode/messagepattern.h"
15 #include "unicode/plurfmt.h"
16 #include "unicode/plurrule.h"
17 #include "unicode/utypes.h"
19 #include "messageimpl.h"
21 #include "plurrule_impl.h"
24 #include "precision.h"
25 #include "visibledigits.h"
27 #if !UCONFIG_NO_FORMATTING
31 static const UChar OTHER_STRING
[] = {
32 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
35 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat
)
37 PluralFormat::PluralFormat(UErrorCode
& status
)
38 : locale(Locale::getDefault()),
42 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
45 PluralFormat::PluralFormat(const Locale
& loc
, UErrorCode
& status
)
50 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
53 PluralFormat::PluralFormat(const PluralRules
& rules
, UErrorCode
& status
)
54 : locale(Locale::getDefault()),
58 init(&rules
, UPLURAL_TYPE_COUNT
, status
);
61 PluralFormat::PluralFormat(const Locale
& loc
,
62 const PluralRules
& rules
,
68 init(&rules
, UPLURAL_TYPE_COUNT
, status
);
71 PluralFormat::PluralFormat(const Locale
& loc
,
78 init(NULL
, type
, status
);
81 PluralFormat::PluralFormat(const UnicodeString
& pat
,
83 : locale(Locale::getDefault()),
87 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
88 applyPattern(pat
, status
);
91 PluralFormat::PluralFormat(const Locale
& loc
,
92 const UnicodeString
& pat
,
98 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
99 applyPattern(pat
, status
);
102 PluralFormat::PluralFormat(const PluralRules
& rules
,
103 const UnicodeString
& pat
,
105 : locale(Locale::getDefault()),
109 init(&rules
, UPLURAL_TYPE_COUNT
, status
);
110 applyPattern(pat
, status
);
113 PluralFormat::PluralFormat(const Locale
& loc
,
114 const PluralRules
& rules
,
115 const UnicodeString
& pat
,
121 init(&rules
, UPLURAL_TYPE_COUNT
, status
);
122 applyPattern(pat
, status
);
125 PluralFormat::PluralFormat(const Locale
& loc
,
127 const UnicodeString
& pat
,
133 init(NULL
, type
, status
);
134 applyPattern(pat
, status
);
137 PluralFormat::PluralFormat(const PluralFormat
& other
)
139 locale(other
.locale
),
140 msgPattern(other
.msgPattern
),
142 offset(other
.offset
) {
147 PluralFormat::copyObjects(const PluralFormat
& other
) {
148 UErrorCode status
= U_ZERO_ERROR
;
149 if (numberFormat
!= NULL
) {
152 if (pluralRulesWrapper
.pluralRules
!= NULL
) {
153 delete pluralRulesWrapper
.pluralRules
;
156 if (other
.numberFormat
== NULL
) {
157 numberFormat
= NumberFormat::createInstance(locale
, status
);
159 numberFormat
= (NumberFormat
*)other
.numberFormat
->clone();
161 if (other
.pluralRulesWrapper
.pluralRules
== NULL
) {
162 pluralRulesWrapper
.pluralRules
= PluralRules::forLocale(locale
, status
);
164 pluralRulesWrapper
.pluralRules
= other
.pluralRulesWrapper
.pluralRules
->clone();
169 PluralFormat::~PluralFormat() {
174 PluralFormat::init(const PluralRules
* rules
, UPluralType type
, UErrorCode
& status
) {
175 if (U_FAILURE(status
)) {
180 pluralRulesWrapper
.pluralRules
= PluralRules::forLocale(locale
, type
, status
);
182 pluralRulesWrapper
.pluralRules
= rules
->clone();
183 if (pluralRulesWrapper
.pluralRules
== NULL
) {
184 status
= U_MEMORY_ALLOCATION_ERROR
;
189 numberFormat
= NumberFormat::createInstance(locale
, status
);
193 PluralFormat::applyPattern(const UnicodeString
& newPattern
, UErrorCode
& status
) {
194 msgPattern
.parsePluralStyle(newPattern
, NULL
, status
);
195 if (U_FAILURE(status
)) {
200 offset
= msgPattern
.getPluralOffset(0);
204 PluralFormat::format(const Formattable
& obj
,
205 UnicodeString
& appendTo
,
207 UErrorCode
& status
) const
209 if (U_FAILURE(status
)) return appendTo
;
211 if (obj
.isNumeric()) {
212 return format(obj
, obj
.getDouble(), appendTo
, pos
, status
);
214 status
= U_ILLEGAL_ARGUMENT_ERROR
;
220 PluralFormat::format(int32_t number
, UErrorCode
& status
) const {
221 FieldPosition
fpos(FieldPosition::DONT_CARE
);
222 UnicodeString result
;
223 return format(Formattable(number
), number
, result
, fpos
, status
);
227 PluralFormat::format(double number
, UErrorCode
& status
) const {
228 FieldPosition
fpos(FieldPosition::DONT_CARE
);
229 UnicodeString result
;
230 return format(Formattable(number
), number
, result
, fpos
, status
);
235 PluralFormat::format(int32_t number
,
236 UnicodeString
& appendTo
,
238 UErrorCode
& status
) const {
239 return format(Formattable(number
), (double)number
, appendTo
, pos
, status
);
243 PluralFormat::format(double number
,
244 UnicodeString
& appendTo
,
246 UErrorCode
& status
) const {
247 return format(Formattable(number
), (double)number
, appendTo
, pos
, status
);
251 PluralFormat::format(const Formattable
& numberObject
, double number
,
252 UnicodeString
& appendTo
,
254 UErrorCode
& status
) const {
255 if (U_FAILURE(status
)) {
258 if (msgPattern
.countParts() == 0) {
259 return numberFormat
->format(numberObject
, appendTo
, pos
, status
);
261 // Get the appropriate sub-message.
262 // Select it based on the formatted number-offset.
263 double numberMinusOffset
= number
- offset
;
264 UnicodeString numberString
;
265 FieldPosition ignorePos
;
267 VisibleDigitsWithExponent dec
;
268 fp
.initVisibleDigitsWithExponent(numberMinusOffset
, dec
, status
);
269 if (U_FAILURE(status
)) {
273 DecimalFormat
*decFmt
= dynamic_cast<DecimalFormat
*>(numberFormat
);
275 decFmt
->initVisibleDigitsWithExponent(
276 numberObject
, dec
, status
);
277 if (U_FAILURE(status
)) {
280 decFmt
->format(dec
, numberString
, ignorePos
, status
);
282 numberFormat
->format(
283 numberObject
, numberString
, ignorePos
, status
); // could be BigDecimal etc.
286 DecimalFormat
*decFmt
= dynamic_cast<DecimalFormat
*>(numberFormat
);
288 decFmt
->initVisibleDigitsWithExponent(
289 numberMinusOffset
, dec
, status
);
290 if (U_FAILURE(status
)) {
293 decFmt
->format(dec
, numberString
, ignorePos
, status
);
295 numberFormat
->format(
296 numberMinusOffset
, numberString
, ignorePos
, status
);
299 int32_t partIndex
= findSubMessage(msgPattern
, 0, pluralRulesWrapper
, &dec
, number
, status
);
300 if (U_FAILURE(status
)) { return appendTo
; }
301 // Replace syntactic # signs in the top level of this sub-message
302 // (not in nested arguments) with the formatted number-offset.
303 const UnicodeString
& pattern
= msgPattern
.getPatternString();
304 int32_t prevIndex
= msgPattern
.getPart(partIndex
).getLimit();
306 const MessagePattern::Part
& part
= msgPattern
.getPart(++partIndex
);
307 const UMessagePatternPartType type
= part
.getType();
308 int32_t index
= part
.getIndex();
309 if (type
== UMSGPAT_PART_TYPE_MSG_LIMIT
) {
310 return appendTo
.append(pattern
, prevIndex
, index
- prevIndex
);
311 } else if ((type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) ||
312 (type
== UMSGPAT_PART_TYPE_SKIP_SYNTAX
&& MessageImpl::jdkAposMode(msgPattern
))) {
313 appendTo
.append(pattern
, prevIndex
, index
- prevIndex
);
314 if (type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) {
315 appendTo
.append(numberString
);
317 prevIndex
= part
.getLimit();
318 } else if (type
== UMSGPAT_PART_TYPE_ARG_START
) {
319 appendTo
.append(pattern
, prevIndex
, index
- prevIndex
);
321 partIndex
= msgPattern
.getLimitPartIndex(partIndex
);
322 index
= msgPattern
.getPart(partIndex
).getLimit();
323 MessageImpl::appendReducedApostrophes(pattern
, prevIndex
, index
, appendTo
);
330 PluralFormat::toPattern(UnicodeString
& appendTo
) {
331 if (0 == msgPattern
.countParts()) {
332 appendTo
.setToBogus();
334 appendTo
.append(msgPattern
.getPatternString());
340 PluralFormat::setLocale(const Locale
& loc
, UErrorCode
& status
) {
341 if (U_FAILURE(status
)) {
349 pluralRulesWrapper
.reset();
350 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
354 PluralFormat::setNumberFormat(const NumberFormat
* format
, UErrorCode
& status
) {
355 if (U_FAILURE(status
)) {
358 NumberFormat
* nf
= (NumberFormat
*)format
->clone();
363 status
= U_MEMORY_ALLOCATION_ERROR
;
368 PluralFormat::clone() const
370 return new PluralFormat(*this);
375 PluralFormat::operator=(const PluralFormat
& other
) {
376 if (this != &other
) {
377 locale
= other
.locale
;
378 msgPattern
= other
.msgPattern
;
379 offset
= other
.offset
;
387 PluralFormat::operator==(const Format
& other
) const {
388 if (this == &other
) {
391 if (!Format::operator==(other
)) {
394 const PluralFormat
& o
= (const PluralFormat
&)other
;
396 locale
== o
.locale
&&
397 msgPattern
== o
.msgPattern
&& // implies same offset
398 (numberFormat
== NULL
) == (o
.numberFormat
== NULL
) &&
399 (numberFormat
== NULL
|| *numberFormat
== *o
.numberFormat
) &&
400 (pluralRulesWrapper
.pluralRules
== NULL
) == (o
.pluralRulesWrapper
.pluralRules
== NULL
) &&
401 (pluralRulesWrapper
.pluralRules
== NULL
||
402 *pluralRulesWrapper
.pluralRules
== *o
.pluralRulesWrapper
.pluralRules
);
406 PluralFormat::operator!=(const Format
& other
) const {
407 return !operator==(other
);
411 PluralFormat::parseObject(const UnicodeString
& /*source*/,
412 Formattable
& /*result*/,
413 ParsePosition
& pos
) const
415 // Parsing not supported.
416 pos
.setErrorIndex(pos
.getIndex());
419 int32_t PluralFormat::findSubMessage(const MessagePattern
& pattern
, int32_t partIndex
,
420 const PluralSelector
& selector
, void *context
,
421 double number
, UErrorCode
& ec
) {
425 int32_t count
=pattern
.countParts();
427 const MessagePattern::Part
* part
=&pattern
.getPart(partIndex
);
428 if (MessagePattern::Part::hasNumericValue(part
->getType())) {
429 offset
=pattern
.getNumericValue(*part
);
434 // The keyword is empty until we need to match against a non-explicit, not-"other" value.
435 // Then we get the keyword from the selector.
436 // (In other words, we never call the selector if we match against an explicit value,
437 // or if the only non-explicit keyword is "other".)
438 UnicodeString keyword
;
439 UnicodeString
other(FALSE
, OTHER_STRING
, 5);
440 // When we find a match, we set msgStart>0 and also set this boolean to true
441 // to avoid matching the keyword again (duplicates are allowed)
442 // while we continue to look for an explicit-value match.
443 UBool haveKeywordMatch
=FALSE
;
444 // msgStart is 0 until we find any appropriate sub-message.
445 // We remember the first "other" sub-message if we have not seen any
446 // appropriate sub-message before.
447 // We remember the first matching-keyword sub-message if we have not seen
448 // one of those before.
449 // (The parser allows [does not check for] duplicate keywords.
450 // We just have to make sure to take the first one.)
451 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
452 // at the first keyword match.
453 // We keep going until we find an explicit-value match or reach the end of the plural style.
455 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
456 // until ARG_LIMIT or end of plural-only pattern.
458 part
=&pattern
.getPart(partIndex
++);
459 const UMessagePatternPartType type
= part
->getType();
460 if(type
==UMSGPAT_PART_TYPE_ARG_LIMIT
) {
463 U_ASSERT (type
==UMSGPAT_PART_TYPE_ARG_SELECTOR
);
464 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
465 if(MessagePattern::Part::hasNumericValue(pattern
.getPartType(partIndex
))) {
466 // explicit value like "=2"
467 part
=&pattern
.getPart(partIndex
++);
468 if(number
==pattern
.getNumericValue(*part
)) {
469 // matches explicit value
472 } else if(!haveKeywordMatch
) {
473 // plural keyword like "few" or "other"
474 // Compare "other" first and call the selector if this is not "other".
475 if(pattern
.partSubstringMatches(*part
, other
)) {
478 if(0 == keyword
.compare(other
)) {
479 // This is the first "other" sub-message,
480 // and the selected keyword is also "other".
481 // Do not match "other" again.
482 haveKeywordMatch
=TRUE
;
486 if(keyword
.isEmpty()) {
487 keyword
=selector
.select(context
, number
-offset
, ec
);
488 if(msgStart
!=0 && (0 == keyword
.compare(other
))) {
489 // We have already seen an "other" sub-message.
490 // Do not match "other" again.
491 haveKeywordMatch
=TRUE
;
492 // Skip keyword matching but do getLimitPartIndex().
495 if(!haveKeywordMatch
&& pattern
.partSubstringMatches(*part
, keyword
)) {
498 // Do not match this keyword again.
499 haveKeywordMatch
=TRUE
;
503 partIndex
=pattern
.getLimitPartIndex(partIndex
);
504 } while(++partIndex
<count
);
508 void PluralFormat::parseType(const UnicodeString
& source
, const NFRule
*rbnfLenientScanner
, Formattable
& result
, FieldPosition
& pos
) const {
509 // If no pattern was applied, return null.
510 if (msgPattern
.countParts() == 0) {
511 pos
.setBeginIndex(-1);
517 int count
=msgPattern
.countParts();
518 int startingAt
= pos
.getBeginIndex();
519 if (startingAt
< 0) {
523 // The keyword is null until we need to match against a non-explicit, not-"other" value.
524 // Then we get the keyword from the selector.
525 // (In other words, we never call the selector if we match against an explicit value,
526 // or if the only non-explicit keyword is "other".)
527 UnicodeString keyword
;
528 UnicodeString matchedWord
;
529 const UnicodeString
& pattern
= msgPattern
.getPatternString();
530 int matchedIndex
= -1;
531 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
532 // until the end of the plural-only pattern.
533 while (partIndex
< count
) {
534 const MessagePattern::Part
* partSelector
= &msgPattern
.getPart(partIndex
++);
535 if (partSelector
->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR
) {
540 const MessagePattern::Part
* partStart
= &msgPattern
.getPart(partIndex
++);
541 if (partStart
->getType() != UMSGPAT_PART_TYPE_MSG_START
) {
546 const MessagePattern::Part
* partLimit
= &msgPattern
.getPart(partIndex
++);
547 if (partLimit
->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT
) {
552 UnicodeString currArg
= pattern
.tempSubString(partStart
->getLimit(), partLimit
->getIndex() - partStart
->getLimit());
553 if (rbnfLenientScanner
!= NULL
) {
554 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
556 currMatchIndex
= rbnfLenientScanner
->findTextLenient(source
, currArg
, startingAt
, &length
);
559 currMatchIndex
= source
.indexOf(currArg
, startingAt
);
561 if (currMatchIndex
>= 0 && currMatchIndex
>= matchedIndex
&& currArg
.length() > matchedWord
.length()) {
562 matchedIndex
= currMatchIndex
;
563 matchedWord
= currArg
;
564 keyword
= pattern
.tempSubString(partStart
->getLimit(), partLimit
->getIndex() - partStart
->getLimit());
567 if (matchedIndex
>= 0) {
568 pos
.setBeginIndex(matchedIndex
);
569 pos
.setEndIndex(matchedIndex
+ matchedWord
.length());
570 result
.setString(keyword
);
575 pos
.setBeginIndex(-1);
579 PluralFormat::PluralSelector::~PluralSelector() {}
581 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
585 UnicodeString
PluralFormat::PluralSelectorAdapter::select(void *context
, double number
,
586 UErrorCode
& /*ec*/) const {
587 (void)number
; // unused except in the assertion
588 VisibleDigitsWithExponent
*dec
=static_cast<VisibleDigitsWithExponent
*>(context
);
589 return pluralRules
->select(*dec
);
592 void PluralFormat::PluralSelectorAdapter::reset() {
601 #endif /* #if !UCONFIG_NO_FORMATTING */