1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2009-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
10 *******************************************************************************
13 #include "unicode/decimfmt.h"
14 #include "unicode/messagepattern.h"
15 #include "unicode/plurfmt.h"
16 #include "unicode/plurrule.h"
17 #include "unicode/utypes.h"
19 #include "messageimpl.h"
21 #include "plurrule_impl.h"
24 #include "number_decimalquantity.h"
25 #include "number_utils.h"
26 #include "number_utypes.h"
28 #if !UCONFIG_NO_FORMATTING
32 using number::impl::DecimalQuantity
;
34 static const UChar OTHER_STRING
[] = {
35 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat
)
40 PluralFormat::PluralFormat(UErrorCode
& status
)
41 : locale(Locale::getDefault()),
45 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
48 PluralFormat::PluralFormat(const Locale
& loc
, UErrorCode
& status
)
53 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
56 PluralFormat::PluralFormat(const PluralRules
& rules
, UErrorCode
& status
)
57 : locale(Locale::getDefault()),
61 init(&rules
, UPLURAL_TYPE_COUNT
, status
);
64 PluralFormat::PluralFormat(const Locale
& loc
,
65 const PluralRules
& rules
,
71 init(&rules
, UPLURAL_TYPE_COUNT
, status
);
74 PluralFormat::PluralFormat(const Locale
& loc
,
81 init(NULL
, type
, status
);
84 PluralFormat::PluralFormat(const UnicodeString
& pat
,
86 : locale(Locale::getDefault()),
90 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
91 applyPattern(pat
, status
);
94 PluralFormat::PluralFormat(const Locale
& loc
,
95 const UnicodeString
& pat
,
101 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
102 applyPattern(pat
, status
);
105 PluralFormat::PluralFormat(const PluralRules
& rules
,
106 const UnicodeString
& pat
,
108 : locale(Locale::getDefault()),
112 init(&rules
, UPLURAL_TYPE_COUNT
, status
);
113 applyPattern(pat
, status
);
116 PluralFormat::PluralFormat(const Locale
& loc
,
117 const PluralRules
& rules
,
118 const UnicodeString
& pat
,
124 init(&rules
, UPLURAL_TYPE_COUNT
, status
);
125 applyPattern(pat
, status
);
128 PluralFormat::PluralFormat(const Locale
& loc
,
130 const UnicodeString
& pat
,
136 init(NULL
, type
, status
);
137 applyPattern(pat
, status
);
140 PluralFormat::PluralFormat(const PluralFormat
& other
)
142 locale(other
.locale
),
143 msgPattern(other
.msgPattern
),
145 offset(other
.offset
) {
150 PluralFormat::copyObjects(const PluralFormat
& other
) {
151 UErrorCode status
= U_ZERO_ERROR
;
152 if (numberFormat
!= NULL
) {
155 if (pluralRulesWrapper
.pluralRules
!= NULL
) {
156 delete pluralRulesWrapper
.pluralRules
;
159 if (other
.numberFormat
== NULL
) {
160 numberFormat
= NumberFormat::createInstance(locale
, status
);
162 numberFormat
= other
.numberFormat
->clone();
164 if (other
.pluralRulesWrapper
.pluralRules
== NULL
) {
165 pluralRulesWrapper
.pluralRules
= PluralRules::forLocale(locale
, status
);
167 pluralRulesWrapper
.pluralRules
= other
.pluralRulesWrapper
.pluralRules
->clone();
172 PluralFormat::~PluralFormat() {
177 PluralFormat::init(const PluralRules
* rules
, UPluralType type
, UErrorCode
& status
) {
178 if (U_FAILURE(status
)) {
183 pluralRulesWrapper
.pluralRules
= PluralRules::forLocale(locale
, type
, status
);
185 pluralRulesWrapper
.pluralRules
= rules
->clone();
186 if (pluralRulesWrapper
.pluralRules
== NULL
) {
187 status
= U_MEMORY_ALLOCATION_ERROR
;
192 numberFormat
= NumberFormat::createInstance(locale
, status
);
196 PluralFormat::applyPattern(const UnicodeString
& newPattern
, UErrorCode
& status
) {
197 msgPattern
.parsePluralStyle(newPattern
, NULL
, status
);
198 if (U_FAILURE(status
)) {
203 offset
= msgPattern
.getPluralOffset(0);
207 PluralFormat::format(const Formattable
& obj
,
208 UnicodeString
& appendTo
,
210 UErrorCode
& status
) const
212 if (U_FAILURE(status
)) return appendTo
;
214 if (obj
.isNumeric()) {
215 return format(obj
, obj
.getDouble(), appendTo
, pos
, status
);
217 status
= U_ILLEGAL_ARGUMENT_ERROR
;
223 PluralFormat::format(int32_t number
, UErrorCode
& status
) const {
224 FieldPosition
fpos(FieldPosition::DONT_CARE
);
225 UnicodeString result
;
226 return format(Formattable(number
), number
, result
, fpos
, status
);
230 PluralFormat::format(double number
, UErrorCode
& status
) const {
231 FieldPosition
fpos(FieldPosition::DONT_CARE
);
232 UnicodeString result
;
233 return format(Formattable(number
), number
, result
, fpos
, status
);
238 PluralFormat::format(int32_t number
,
239 UnicodeString
& appendTo
,
241 UErrorCode
& status
) const {
242 return format(Formattable(number
), (double)number
, appendTo
, pos
, status
);
246 PluralFormat::format(double number
,
247 UnicodeString
& appendTo
,
249 UErrorCode
& status
) const {
250 return format(Formattable(number
), (double)number
, appendTo
, pos
, status
);
254 PluralFormat::format(const Formattable
& numberObject
, double number
,
255 UnicodeString
& appendTo
,
257 UErrorCode
& status
) const {
258 if (U_FAILURE(status
)) {
261 if (msgPattern
.countParts() == 0) {
262 return numberFormat
->format(numberObject
, appendTo
, pos
, status
);
265 // Get the appropriate sub-message.
266 // Select it based on the formatted number-offset.
267 double numberMinusOffset
= number
- offset
;
268 // Call NumberFormatter to get both the DecimalQuantity and the string.
269 // This call site needs to use more internal APIs than the Java equivalent.
270 number::impl::UFormattedNumberData data
;
272 // could be BigDecimal etc.
273 numberObject
.populateDecimalQuantity(data
.quantity
, status
);
275 data
.quantity
.setToDouble(numberMinusOffset
);
277 UnicodeString numberString
;
278 auto *decFmt
= dynamic_cast<DecimalFormat
*>(numberFormat
);
279 if(decFmt
!= nullptr) {
280 const number::LocalizedNumberFormatter
* lnf
= decFmt
->toNumberFormatter(status
);
281 if (U_FAILURE(status
)) {
284 lnf
->formatImpl(&data
, status
); // mutates &data
285 if (U_FAILURE(status
)) {
288 numberString
= data
.getStringRef().toUnicodeString();
291 numberFormat
->format(numberObject
, numberString
, status
);
293 numberFormat
->format(numberMinusOffset
, numberString
, status
);
297 int32_t partIndex
= findSubMessage(msgPattern
, 0, pluralRulesWrapper
, &data
.quantity
, number
, status
);
298 if (U_FAILURE(status
)) { return appendTo
; }
299 // Replace syntactic # signs in the top level of this sub-message
300 // (not in nested arguments) with the formatted number-offset.
301 const UnicodeString
& pattern
= msgPattern
.getPatternString();
302 int32_t prevIndex
= msgPattern
.getPart(partIndex
).getLimit();
304 const MessagePattern::Part
& part
= msgPattern
.getPart(++partIndex
);
305 const UMessagePatternPartType type
= part
.getType();
306 int32_t index
= part
.getIndex();
307 if (type
== UMSGPAT_PART_TYPE_MSG_LIMIT
) {
308 return appendTo
.append(pattern
, prevIndex
, index
- prevIndex
);
309 } else if ((type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) ||
310 (type
== UMSGPAT_PART_TYPE_SKIP_SYNTAX
&& MessageImpl::jdkAposMode(msgPattern
))) {
311 appendTo
.append(pattern
, prevIndex
, index
- prevIndex
);
312 if (type
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) {
313 appendTo
.append(numberString
);
315 prevIndex
= part
.getLimit();
316 } else if (type
== UMSGPAT_PART_TYPE_ARG_START
) {
317 appendTo
.append(pattern
, prevIndex
, index
- prevIndex
);
319 partIndex
= msgPattern
.getLimitPartIndex(partIndex
);
320 index
= msgPattern
.getPart(partIndex
).getLimit();
321 MessageImpl::appendReducedApostrophes(pattern
, prevIndex
, index
, appendTo
);
328 PluralFormat::toPattern(UnicodeString
& appendTo
) {
329 if (0 == msgPattern
.countParts()) {
330 appendTo
.setToBogus();
332 appendTo
.append(msgPattern
.getPatternString());
338 PluralFormat::setLocale(const Locale
& loc
, UErrorCode
& status
) {
339 if (U_FAILURE(status
)) {
347 pluralRulesWrapper
.reset();
348 init(NULL
, UPLURAL_TYPE_CARDINAL
, status
);
352 PluralFormat::setNumberFormat(const NumberFormat
* format
, UErrorCode
& status
) {
353 if (U_FAILURE(status
)) {
356 NumberFormat
* nf
= format
->clone();
361 status
= U_MEMORY_ALLOCATION_ERROR
;
366 PluralFormat::clone() const
368 return new PluralFormat(*this);
373 PluralFormat::operator=(const PluralFormat
& other
) {
374 if (this != &other
) {
375 locale
= other
.locale
;
376 msgPattern
= other
.msgPattern
;
377 offset
= other
.offset
;
385 PluralFormat::operator==(const Format
& other
) const {
386 if (this == &other
) {
389 if (!Format::operator==(other
)) {
392 const PluralFormat
& o
= (const PluralFormat
&)other
;
394 locale
== o
.locale
&&
395 msgPattern
== o
.msgPattern
&& // implies same offset
396 (numberFormat
== NULL
) == (o
.numberFormat
== NULL
) &&
397 (numberFormat
== NULL
|| *numberFormat
== *o
.numberFormat
) &&
398 (pluralRulesWrapper
.pluralRules
== NULL
) == (o
.pluralRulesWrapper
.pluralRules
== NULL
) &&
399 (pluralRulesWrapper
.pluralRules
== NULL
||
400 *pluralRulesWrapper
.pluralRules
== *o
.pluralRulesWrapper
.pluralRules
);
404 PluralFormat::operator!=(const Format
& other
) const {
405 return !operator==(other
);
409 PluralFormat::parseObject(const UnicodeString
& /*source*/,
410 Formattable
& /*result*/,
411 ParsePosition
& pos
) const
413 // Parsing not supported.
414 pos
.setErrorIndex(pos
.getIndex());
417 int32_t PluralFormat::findSubMessage(const MessagePattern
& pattern
, int32_t partIndex
,
418 const PluralSelector
& selector
, void *context
,
419 double number
, UErrorCode
& ec
) {
423 int32_t count
=pattern
.countParts();
425 const MessagePattern::Part
* part
=&pattern
.getPart(partIndex
);
426 if (MessagePattern::Part::hasNumericValue(part
->getType())) {
427 offset
=pattern
.getNumericValue(*part
);
432 // The keyword is empty until we need to match against a non-explicit, not-"other" value.
433 // Then we get the keyword from the selector.
434 // (In other words, we never call the selector if we match against an explicit value,
435 // or if the only non-explicit keyword is "other".)
436 UnicodeString keyword
;
437 UnicodeString
other(FALSE
, OTHER_STRING
, 5);
438 // When we find a match, we set msgStart>0 and also set this boolean to true
439 // to avoid matching the keyword again (duplicates are allowed)
440 // while we continue to look for an explicit-value match.
441 UBool haveKeywordMatch
=FALSE
;
442 // msgStart is 0 until we find any appropriate sub-message.
443 // We remember the first "other" sub-message if we have not seen any
444 // appropriate sub-message before.
445 // We remember the first matching-keyword sub-message if we have not seen
446 // one of those before.
447 // (The parser allows [does not check for] duplicate keywords.
448 // We just have to make sure to take the first one.)
449 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
450 // at the first keyword match.
451 // We keep going until we find an explicit-value match or reach the end of the plural style.
453 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
454 // until ARG_LIMIT or end of plural-only pattern.
456 part
=&pattern
.getPart(partIndex
++);
457 const UMessagePatternPartType type
= part
->getType();
458 if(type
==UMSGPAT_PART_TYPE_ARG_LIMIT
) {
461 U_ASSERT (type
==UMSGPAT_PART_TYPE_ARG_SELECTOR
);
462 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
463 if(MessagePattern::Part::hasNumericValue(pattern
.getPartType(partIndex
))) {
464 // explicit value like "=2"
465 part
=&pattern
.getPart(partIndex
++);
466 if(number
==pattern
.getNumericValue(*part
)) {
467 // matches explicit value
470 } else if(!haveKeywordMatch
) {
471 // plural keyword like "few" or "other"
472 // Compare "other" first and call the selector if this is not "other".
473 if(pattern
.partSubstringMatches(*part
, other
)) {
476 if(0 == keyword
.compare(other
)) {
477 // This is the first "other" sub-message,
478 // and the selected keyword is also "other".
479 // Do not match "other" again.
480 haveKeywordMatch
=TRUE
;
484 if(keyword
.isEmpty()) {
485 keyword
=selector
.select(context
, number
-offset
, ec
);
486 if(msgStart
!=0 && (0 == keyword
.compare(other
))) {
487 // We have already seen an "other" sub-message.
488 // Do not match "other" again.
489 haveKeywordMatch
=TRUE
;
490 // Skip keyword matching but do getLimitPartIndex().
493 if(!haveKeywordMatch
&& pattern
.partSubstringMatches(*part
, keyword
)) {
496 // Do not match this keyword again.
497 haveKeywordMatch
=TRUE
;
501 partIndex
=pattern
.getLimitPartIndex(partIndex
);
502 } while(++partIndex
<count
);
506 void PluralFormat::parseType(const UnicodeString
& source
, const NFRule
*rbnfLenientScanner
, Formattable
& result
, FieldPosition
& pos
) const {
507 // If no pattern was applied, return null.
508 if (msgPattern
.countParts() == 0) {
509 pos
.setBeginIndex(-1);
515 int count
=msgPattern
.countParts();
516 int startingAt
= pos
.getBeginIndex();
517 if (startingAt
< 0) {
521 // The keyword is null until we need to match against a non-explicit, not-"other" value.
522 // Then we get the keyword from the selector.
523 // (In other words, we never call the selector if we match against an explicit value,
524 // or if the only non-explicit keyword is "other".)
525 UnicodeString keyword
;
526 UnicodeString matchedWord
;
527 const UnicodeString
& pattern
= msgPattern
.getPatternString();
528 int matchedIndex
= -1;
529 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
530 // until the end of the plural-only pattern.
531 while (partIndex
< count
) {
532 const MessagePattern::Part
* partSelector
= &msgPattern
.getPart(partIndex
++);
533 if (partSelector
->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR
) {
538 const MessagePattern::Part
* partStart
= &msgPattern
.getPart(partIndex
++);
539 if (partStart
->getType() != UMSGPAT_PART_TYPE_MSG_START
) {
544 const MessagePattern::Part
* partLimit
= &msgPattern
.getPart(partIndex
++);
545 if (partLimit
->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT
) {
550 UnicodeString currArg
= pattern
.tempSubString(partStart
->getLimit(), partLimit
->getIndex() - partStart
->getLimit());
551 if (rbnfLenientScanner
!= NULL
) {
552 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
554 currMatchIndex
= rbnfLenientScanner
->findTextLenient(source
, currArg
, startingAt
, &length
);
557 currMatchIndex
= source
.indexOf(currArg
, startingAt
);
559 if (currMatchIndex
>= 0 && currMatchIndex
>= matchedIndex
&& currArg
.length() > matchedWord
.length()) {
560 matchedIndex
= currMatchIndex
;
561 matchedWord
= currArg
;
562 keyword
= pattern
.tempSubString(partStart
->getLimit(), partLimit
->getIndex() - partStart
->getLimit());
565 if (matchedIndex
>= 0) {
566 pos
.setBeginIndex(matchedIndex
);
567 pos
.setEndIndex(matchedIndex
+ matchedWord
.length());
568 result
.setString(keyword
);
573 pos
.setBeginIndex(-1);
577 PluralFormat::PluralSelector::~PluralSelector() {}
579 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
583 UnicodeString
PluralFormat::PluralSelectorAdapter::select(void *context
, double number
,
584 UErrorCode
& /*ec*/) const {
585 (void)number
; // unused except in the assertion
586 IFixedDecimal
*dec
=static_cast<IFixedDecimal
*>(context
);
587 return pluralRules
->select(*dec
);
590 void PluralFormat::PluralSelectorAdapter::reset() {
599 #endif /* #if !UCONFIG_NO_FORMATTING */