1 // © 2016 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   4 ******************************************************************************* 
   5 * Copyright (C) 2009-2015, International Business Machines Corporation and 
   6 * others. All Rights Reserved. 
   7 ******************************************************************************* 
  10 ******************************************************************************* 
  13 #include "unicode/decimfmt.h" 
  14 #include "unicode/messagepattern.h" 
  15 #include "unicode/plurfmt.h" 
  16 #include "unicode/plurrule.h" 
  17 #include "unicode/utypes.h" 
  19 #include "messageimpl.h" 
  21 #include "plurrule_impl.h" 
  24 #include "number_decimalquantity.h" 
  25 #include "number_utils.h" 
  26 #include "number_utypes.h" 
  28 #if !UCONFIG_NO_FORMATTING 
  32 using number::impl::DecimalQuantity
; 
  34 static const UChar OTHER_STRING
[] = { 
  35     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other" 
  38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat
) 
  40 PluralFormat::PluralFormat(UErrorCode
& status
) 
  41         : locale(Locale::getDefault()), 
  45     init(NULL
, UPLURAL_TYPE_CARDINAL
, status
); 
  48 PluralFormat::PluralFormat(const Locale
& loc
, UErrorCode
& status
) 
  53     init(NULL
, UPLURAL_TYPE_CARDINAL
, status
); 
  56 PluralFormat::PluralFormat(const PluralRules
& rules
, UErrorCode
& status
) 
  57         : locale(Locale::getDefault()), 
  61     init(&rules
, UPLURAL_TYPE_COUNT
, status
); 
  64 PluralFormat::PluralFormat(const Locale
& loc
, 
  65                            const PluralRules
& rules
, 
  71     init(&rules
, UPLURAL_TYPE_COUNT
, status
); 
  74 PluralFormat::PluralFormat(const Locale
& loc
, 
  81     init(NULL
, type
, status
); 
  84 PluralFormat::PluralFormat(const UnicodeString
& pat
, 
  86         : locale(Locale::getDefault()), 
  90     init(NULL
, UPLURAL_TYPE_CARDINAL
, status
); 
  91     applyPattern(pat
, status
); 
  94 PluralFormat::PluralFormat(const Locale
& loc
, 
  95                            const UnicodeString
& pat
, 
 101     init(NULL
, UPLURAL_TYPE_CARDINAL
, status
); 
 102     applyPattern(pat
, status
); 
 105 PluralFormat::PluralFormat(const PluralRules
& rules
, 
 106                            const UnicodeString
& pat
, 
 108         : locale(Locale::getDefault()), 
 112     init(&rules
, UPLURAL_TYPE_COUNT
, status
); 
 113     applyPattern(pat
, status
); 
 116 PluralFormat::PluralFormat(const Locale
& loc
, 
 117                            const PluralRules
& rules
, 
 118                            const UnicodeString
& pat
, 
 124     init(&rules
, UPLURAL_TYPE_COUNT
, status
); 
 125     applyPattern(pat
, status
); 
 128 PluralFormat::PluralFormat(const Locale
& loc
, 
 130                            const UnicodeString
& pat
, 
 136     init(NULL
, type
, status
); 
 137     applyPattern(pat
, status
); 
 140 PluralFormat::PluralFormat(const PluralFormat
& other
) 
 142           locale(other
.locale
), 
 143           msgPattern(other
.msgPattern
), 
 145           offset(other
.offset
) { 
 150 PluralFormat::copyObjects(const PluralFormat
& other
) { 
 151     UErrorCode status 
= U_ZERO_ERROR
; 
 152     if (numberFormat 
!= NULL
) { 
 155     if (pluralRulesWrapper
.pluralRules 
!= NULL
) { 
 156         delete pluralRulesWrapper
.pluralRules
; 
 159     if (other
.numberFormat 
== NULL
) { 
 160         numberFormat 
= NumberFormat::createInstance(locale
, status
); 
 162         numberFormat 
= other
.numberFormat
->clone(); 
 164     if (other
.pluralRulesWrapper
.pluralRules 
== NULL
) { 
 165         pluralRulesWrapper
.pluralRules 
= PluralRules::forLocale(locale
, status
); 
 167         pluralRulesWrapper
.pluralRules 
= other
.pluralRulesWrapper
.pluralRules
->clone(); 
 172 PluralFormat::~PluralFormat() { 
 177 PluralFormat::init(const PluralRules
* rules
, UPluralType type
, UErrorCode
& status
) { 
 178     if (U_FAILURE(status
)) { 
 183         pluralRulesWrapper
.pluralRules 
= PluralRules::forLocale(locale
, type
, status
); 
 185         pluralRulesWrapper
.pluralRules 
= rules
->clone(); 
 186         if (pluralRulesWrapper
.pluralRules 
== NULL
) { 
 187             status 
= U_MEMORY_ALLOCATION_ERROR
; 
 192     numberFormat
= NumberFormat::createInstance(locale
, status
); 
 196 PluralFormat::applyPattern(const UnicodeString
& newPattern
, UErrorCode
& status
) { 
 197     msgPattern
.parsePluralStyle(newPattern
, NULL
, status
); 
 198     if (U_FAILURE(status
)) { 
 203     offset 
= msgPattern
.getPluralOffset(0); 
 207 PluralFormat::format(const Formattable
& obj
, 
 208                    UnicodeString
& appendTo
, 
 210                    UErrorCode
& status
) const 
 212     if (U_FAILURE(status
)) return appendTo
; 
 214     if (obj
.isNumeric()) { 
 215         return format(obj
, obj
.getDouble(), appendTo
, pos
, status
); 
 217         status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
 223 PluralFormat::format(int32_t number
, UErrorCode
& status
) const { 
 224     FieldPosition 
fpos(FieldPosition::DONT_CARE
); 
 225     UnicodeString result
; 
 226     return format(Formattable(number
), number
, result
, fpos
, status
); 
 230 PluralFormat::format(double number
, UErrorCode
& status
) const { 
 231     FieldPosition 
fpos(FieldPosition::DONT_CARE
); 
 232     UnicodeString result
; 
 233     return format(Formattable(number
), number
, result
, fpos
, status
); 
 238 PluralFormat::format(int32_t number
, 
 239                      UnicodeString
& appendTo
, 
 241                      UErrorCode
& status
) const { 
 242     return format(Formattable(number
), (double)number
, appendTo
, pos
, status
); 
 246 PluralFormat::format(double number
, 
 247                      UnicodeString
& appendTo
, 
 249                      UErrorCode
& status
) const { 
 250     return format(Formattable(number
), (double)number
, appendTo
, pos
, status
); 
 254 PluralFormat::format(const Formattable
& numberObject
, double number
, 
 255                      UnicodeString
& appendTo
, 
 257                      UErrorCode
& status
) const { 
 258     if (U_FAILURE(status
)) { 
 261     if (msgPattern
.countParts() == 0) { 
 262         return numberFormat
->format(numberObject
, appendTo
, pos
, status
); 
 265     // Get the appropriate sub-message. 
 266     // Select it based on the formatted number-offset. 
 267     double numberMinusOffset 
= number 
- offset
; 
 268     // Call NumberFormatter to get both the DecimalQuantity and the string. 
 269     // This call site needs to use more internal APIs than the Java equivalent. 
 270     number::impl::UFormattedNumberData data
; 
 272         // could be BigDecimal etc. 
 273         numberObject
.populateDecimalQuantity(data
.quantity
, status
); 
 275         data
.quantity
.setToDouble(numberMinusOffset
); 
 277     UnicodeString numberString
; 
 278     auto *decFmt 
= dynamic_cast<DecimalFormat 
*>(numberFormat
); 
 279     if(decFmt 
!= nullptr) { 
 280         const number::LocalizedNumberFormatter
* lnf 
= decFmt
->toNumberFormatter(status
); 
 281         if (U_FAILURE(status
)) { 
 284         lnf
->formatImpl(&data
, status
); // mutates &data 
 285         if (U_FAILURE(status
)) { 
 288         numberString 
= data
.getStringRef().toUnicodeString(); 
 291             numberFormat
->format(numberObject
, numberString
, status
); 
 293             numberFormat
->format(numberMinusOffset
, numberString
, status
); 
 297     int32_t partIndex 
= findSubMessage(msgPattern
, 0, pluralRulesWrapper
, &data
.quantity
, number
, status
); 
 298     if (U_FAILURE(status
)) { return appendTo
; } 
 299     // Replace syntactic # signs in the top level of this sub-message 
 300     // (not in nested arguments) with the formatted number-offset. 
 301     const UnicodeString
& pattern 
= msgPattern
.getPatternString(); 
 302     int32_t prevIndex 
= msgPattern
.getPart(partIndex
).getLimit(); 
 304         const MessagePattern::Part
& part 
= msgPattern
.getPart(++partIndex
); 
 305         const UMessagePatternPartType type 
= part
.getType(); 
 306         int32_t index 
= part
.getIndex(); 
 307         if (type 
== UMSGPAT_PART_TYPE_MSG_LIMIT
) { 
 308             return appendTo
.append(pattern
, prevIndex
, index 
- prevIndex
); 
 309         } else if ((type 
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) || 
 310             (type 
== UMSGPAT_PART_TYPE_SKIP_SYNTAX 
&& MessageImpl::jdkAposMode(msgPattern
))) { 
 311             appendTo
.append(pattern
, prevIndex
, index 
- prevIndex
); 
 312             if (type 
== UMSGPAT_PART_TYPE_REPLACE_NUMBER
) { 
 313                 appendTo
.append(numberString
); 
 315             prevIndex 
= part
.getLimit(); 
 316         } else if (type 
== UMSGPAT_PART_TYPE_ARG_START
) { 
 317             appendTo
.append(pattern
, prevIndex
, index 
- prevIndex
); 
 319             partIndex 
= msgPattern
.getLimitPartIndex(partIndex
); 
 320             index 
= msgPattern
.getPart(partIndex
).getLimit(); 
 321             MessageImpl::appendReducedApostrophes(pattern
, prevIndex
, index
, appendTo
); 
 328 PluralFormat::toPattern(UnicodeString
& appendTo
) { 
 329     if (0 == msgPattern
.countParts()) { 
 330         appendTo
.setToBogus(); 
 332         appendTo
.append(msgPattern
.getPatternString()); 
 338 PluralFormat::setLocale(const Locale
& loc
, UErrorCode
& status
) { 
 339     if (U_FAILURE(status
)) { 
 347     pluralRulesWrapper
.reset(); 
 348     init(NULL
, UPLURAL_TYPE_CARDINAL
, status
); 
 352 PluralFormat::setNumberFormat(const NumberFormat
* format
, UErrorCode
& status
) { 
 353     if (U_FAILURE(status
)) { 
 356     NumberFormat
* nf 
= format
->clone(); 
 361         status 
= U_MEMORY_ALLOCATION_ERROR
; 
 366 PluralFormat::clone() const 
 368     return new PluralFormat(*this); 
 373 PluralFormat::operator=(const PluralFormat
& other
) { 
 374     if (this != &other
) { 
 375         locale 
= other
.locale
; 
 376         msgPattern 
= other
.msgPattern
; 
 377         offset 
= other
.offset
; 
 385 PluralFormat::operator==(const Format
& other
) const { 
 386     if (this == &other
) { 
 389     if (!Format::operator==(other
)) { 
 392     const PluralFormat
& o 
= (const PluralFormat
&)other
; 
 394         locale 
== o
.locale 
&& 
 395         msgPattern 
== o
.msgPattern 
&&  // implies same offset 
 396         (numberFormat 
== NULL
) == (o
.numberFormat 
== NULL
) && 
 397         (numberFormat 
== NULL 
|| *numberFormat 
== *o
.numberFormat
) && 
 398         (pluralRulesWrapper
.pluralRules 
== NULL
) == (o
.pluralRulesWrapper
.pluralRules 
== NULL
) && 
 399         (pluralRulesWrapper
.pluralRules 
== NULL 
|| 
 400             *pluralRulesWrapper
.pluralRules 
== *o
.pluralRulesWrapper
.pluralRules
); 
 404 PluralFormat::operator!=(const Format
& other
) const { 
 405     return  !operator==(other
); 
 409 PluralFormat::parseObject(const UnicodeString
& /*source*/, 
 410                         Formattable
& /*result*/, 
 411                         ParsePosition
& pos
) const 
 413     // Parsing not supported. 
 414     pos
.setErrorIndex(pos
.getIndex()); 
 417 int32_t PluralFormat::findSubMessage(const MessagePattern
& pattern
, int32_t partIndex
, 
 418                                      const PluralSelector
& selector
, void *context
, 
 419                                      double number
, UErrorCode
& ec
) { 
 423     int32_t count
=pattern
.countParts(); 
 425     const MessagePattern::Part
* part
=&pattern
.getPart(partIndex
); 
 426     if (MessagePattern::Part::hasNumericValue(part
->getType())) { 
 427         offset
=pattern
.getNumericValue(*part
); 
 432     // The keyword is empty until we need to match against a non-explicit, not-"other" value. 
 433     // Then we get the keyword from the selector. 
 434     // (In other words, we never call the selector if we match against an explicit value, 
 435     // or if the only non-explicit keyword is "other".) 
 436     UnicodeString keyword
; 
 437     UnicodeString 
other(FALSE
, OTHER_STRING
, 5); 
 438     // When we find a match, we set msgStart>0 and also set this boolean to true 
 439     // to avoid matching the keyword again (duplicates are allowed) 
 440     // while we continue to look for an explicit-value match. 
 441     UBool haveKeywordMatch
=FALSE
; 
 442     // msgStart is 0 until we find any appropriate sub-message. 
 443     // We remember the first "other" sub-message if we have not seen any 
 444     // appropriate sub-message before. 
 445     // We remember the first matching-keyword sub-message if we have not seen 
 446     // one of those before. 
 447     // (The parser allows [does not check for] duplicate keywords. 
 448     // We just have to make sure to take the first one.) 
 449     // We avoid matching the keyword twice by also setting haveKeywordMatch=true 
 450     // at the first keyword match. 
 451     // We keep going until we find an explicit-value match or reach the end of the plural style. 
 453     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples 
 454     // until ARG_LIMIT or end of plural-only pattern. 
 456         part
=&pattern
.getPart(partIndex
++); 
 457         const UMessagePatternPartType type 
= part
->getType(); 
 458         if(type
==UMSGPAT_PART_TYPE_ARG_LIMIT
) { 
 461         U_ASSERT (type
==UMSGPAT_PART_TYPE_ARG_SELECTOR
); 
 462         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message 
 463         if(MessagePattern::Part::hasNumericValue(pattern
.getPartType(partIndex
))) { 
 464             // explicit value like "=2" 
 465             part
=&pattern
.getPart(partIndex
++); 
 466             if(number
==pattern
.getNumericValue(*part
)) { 
 467                 // matches explicit value 
 470         } else if(!haveKeywordMatch
) { 
 471             // plural keyword like "few" or "other" 
 472             // Compare "other" first and call the selector if this is not "other". 
 473             if(pattern
.partSubstringMatches(*part
, other
)) { 
 476                     if(0 == keyword
.compare(other
)) { 
 477                         // This is the first "other" sub-message, 
 478                         // and the selected keyword is also "other". 
 479                         // Do not match "other" again. 
 480                         haveKeywordMatch
=TRUE
; 
 484                 if(keyword
.isEmpty()) { 
 485                     keyword
=selector
.select(context
, number
-offset
, ec
); 
 486                     if(msgStart
!=0 && (0 == keyword
.compare(other
))) { 
 487                         // We have already seen an "other" sub-message. 
 488                         // Do not match "other" again. 
 489                         haveKeywordMatch
=TRUE
; 
 490                         // Skip keyword matching but do getLimitPartIndex(). 
 493                 if(!haveKeywordMatch 
&& pattern
.partSubstringMatches(*part
, keyword
)) { 
 496                     // Do not match this keyword again. 
 497                     haveKeywordMatch
=TRUE
; 
 501         partIndex
=pattern
.getLimitPartIndex(partIndex
); 
 502     } while(++partIndex
<count
); 
 506 void PluralFormat::parseType(const UnicodeString
& source
, const NFRule 
*rbnfLenientScanner
, Formattable
& result
, FieldPosition
& pos
) const { 
 507     // If no pattern was applied, return null. 
 508     if (msgPattern
.countParts() == 0) { 
 509         pos
.setBeginIndex(-1); 
 515     int count
=msgPattern
.countParts(); 
 516     int startingAt 
= pos
.getBeginIndex(); 
 517     if (startingAt 
< 0) { 
 521     // The keyword is null until we need to match against a non-explicit, not-"other" value. 
 522     // Then we get the keyword from the selector. 
 523     // (In other words, we never call the selector if we match against an explicit value, 
 524     // or if the only non-explicit keyword is "other".) 
 525     UnicodeString keyword
; 
 526     UnicodeString matchedWord
; 
 527     const UnicodeString
& pattern 
= msgPattern
.getPatternString(); 
 528     int matchedIndex 
= -1; 
 529     // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples 
 530     // until the end of the plural-only pattern. 
 531     while (partIndex 
< count
) { 
 532         const MessagePattern::Part
* partSelector 
= &msgPattern
.getPart(partIndex
++); 
 533         if (partSelector
->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR
) { 
 538         const MessagePattern::Part
* partStart 
= &msgPattern
.getPart(partIndex
++); 
 539         if (partStart
->getType() != UMSGPAT_PART_TYPE_MSG_START
) { 
 544         const MessagePattern::Part
* partLimit 
= &msgPattern
.getPart(partIndex
++); 
 545         if (partLimit
->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT
) { 
 550         UnicodeString currArg 
= pattern
.tempSubString(partStart
->getLimit(), partLimit
->getIndex() - partStart
->getLimit()); 
 551         if (rbnfLenientScanner 
!= NULL
) { 
 552             // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. 
 554             currMatchIndex 
= rbnfLenientScanner
->findTextLenient(source
, currArg
, startingAt
, &length
); 
 557             currMatchIndex 
= source
.indexOf(currArg
, startingAt
); 
 559         if (currMatchIndex 
>= 0 && currMatchIndex 
>= matchedIndex 
&& currArg
.length() > matchedWord
.length()) { 
 560             matchedIndex 
= currMatchIndex
; 
 561             matchedWord 
= currArg
; 
 562             keyword 
= pattern
.tempSubString(partStart
->getLimit(), partLimit
->getIndex() - partStart
->getLimit()); 
 565     if (matchedIndex 
>= 0) { 
 566         pos
.setBeginIndex(matchedIndex
); 
 567         pos
.setEndIndex(matchedIndex 
+ matchedWord
.length()); 
 568         result
.setString(keyword
); 
 573     pos
.setBeginIndex(-1); 
 577 PluralFormat::PluralSelector::~PluralSelector() {} 
 579 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { 
 583 UnicodeString 
PluralFormat::PluralSelectorAdapter::select(void *context
, double number
, 
 584                                                           UErrorCode
& /*ec*/) const { 
 585     (void)number
;  // unused except in the assertion 
 586     IFixedDecimal 
*dec
=static_cast<IFixedDecimal 
*>(context
); 
 587     return pluralRules
->select(*dec
); 
 590 void PluralFormat::PluralSelectorAdapter::reset() { 
 599 #endif /* #if !UCONFIG_NO_FORMATTING */