]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/plurfmt.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / plurfmt.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
46f4442e
A
3/*
4*******************************************************************************
b331163b 5* Copyright (C) 2009-2015, International Business Machines Corporation and
46f4442e
A
6* others. All Rights Reserved.
7*******************************************************************************
8*
9* File PLURFMT.CPP
46f4442e
A
10*******************************************************************************
11*/
12
57a6839d 13#include "unicode/decimfmt.h"
4388f060 14#include "unicode/messagepattern.h"
46f4442e
A
15#include "unicode/plurfmt.h"
16#include "unicode/plurrule.h"
4388f060
A
17#include "unicode/utypes.h"
18#include "cmemory.h"
19#include "messageimpl.h"
b331163b 20#include "nfrule.h"
46f4442e 21#include "plurrule_impl.h"
4388f060
A
22#include "uassert.h"
23#include "uhash.h"
3d1f044b
A
24#include "number_decimalquantity.h"
25#include "number_utils.h"
26#include "number_utypes.h"
46f4442e
A
27
28#if !UCONFIG_NO_FORMATTING
29
30U_NAMESPACE_BEGIN
31
3d1f044b
A
32using number::impl::DecimalQuantity;
33
4388f060
A
34static const UChar OTHER_STRING[] = {
35 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
36};
46f4442e
A
37
38UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
39
4388f060
A
40PluralFormat::PluralFormat(UErrorCode& status)
41 : locale(Locale::getDefault()),
42 msgPattern(status),
43 numberFormat(NULL),
44 offset(0) {
51004dcb 45 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
46}
47
4388f060
A
48PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
49 : locale(loc),
50 msgPattern(status),
51 numberFormat(NULL),
52 offset(0) {
51004dcb 53 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
54}
55
4388f060
A
56PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
57 : locale(Locale::getDefault()),
58 msgPattern(status),
59 numberFormat(NULL),
60 offset(0) {
51004dcb 61 init(&rules, UPLURAL_TYPE_COUNT, status);
46f4442e
A
62}
63
4388f060
A
64PluralFormat::PluralFormat(const Locale& loc,
65 const PluralRules& rules,
66 UErrorCode& status)
67 : locale(loc),
68 msgPattern(status),
69 numberFormat(NULL),
70 offset(0) {
51004dcb
A
71 init(&rules, UPLURAL_TYPE_COUNT, status);
72}
73
74PluralFormat::PluralFormat(const Locale& loc,
75 UPluralType type,
76 UErrorCode& status)
77 : locale(loc),
78 msgPattern(status),
79 numberFormat(NULL),
80 offset(0) {
81 init(NULL, type, status);
46f4442e
A
82}
83
4388f060
A
84PluralFormat::PluralFormat(const UnicodeString& pat,
85 UErrorCode& status)
86 : locale(Locale::getDefault()),
87 msgPattern(status),
88 numberFormat(NULL),
89 offset(0) {
51004dcb 90 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
91 applyPattern(pat, status);
92}
93
4388f060
A
94PluralFormat::PluralFormat(const Locale& loc,
95 const UnicodeString& pat,
96 UErrorCode& status)
97 : locale(loc),
98 msgPattern(status),
99 numberFormat(NULL),
100 offset(0) {
51004dcb 101 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
102 applyPattern(pat, status);
103}
104
4388f060
A
105PluralFormat::PluralFormat(const PluralRules& rules,
106 const UnicodeString& pat,
107 UErrorCode& status)
108 : locale(Locale::getDefault()),
109 msgPattern(status),
110 numberFormat(NULL),
111 offset(0) {
51004dcb 112 init(&rules, UPLURAL_TYPE_COUNT, status);
46f4442e
A
113 applyPattern(pat, status);
114}
115
4388f060
A
116PluralFormat::PluralFormat(const Locale& loc,
117 const PluralRules& rules,
118 const UnicodeString& pat,
119 UErrorCode& status)
120 : locale(loc),
121 msgPattern(status),
122 numberFormat(NULL),
123 offset(0) {
51004dcb
A
124 init(&rules, UPLURAL_TYPE_COUNT, status);
125 applyPattern(pat, status);
126}
127
128PluralFormat::PluralFormat(const Locale& loc,
129 UPluralType type,
130 const UnicodeString& pat,
131 UErrorCode& status)
132 : locale(loc),
133 msgPattern(status),
134 numberFormat(NULL),
135 offset(0) {
136 init(NULL, type, status);
46f4442e
A
137 applyPattern(pat, status);
138}
139
4388f060
A
140PluralFormat::PluralFormat(const PluralFormat& other)
141 : Format(other),
142 locale(other.locale),
143 msgPattern(other.msgPattern),
144 numberFormat(NULL),
145 offset(other.offset) {
146 copyObjects(other);
147}
148
149void
150PluralFormat::copyObjects(const PluralFormat& other) {
46f4442e 151 UErrorCode status = U_ZERO_ERROR;
4388f060
A
152 if (numberFormat != NULL) {
153 delete numberFormat;
729e4ab9 154 }
4388f060
A
155 if (pluralRulesWrapper.pluralRules != NULL) {
156 delete pluralRulesWrapper.pluralRules;
157 }
158
159 if (other.numberFormat == NULL) {
160 numberFormat = NumberFormat::createInstance(locale, status);
161 } else {
340931cb 162 numberFormat = other.numberFormat->clone();
4388f060
A
163 }
164 if (other.pluralRulesWrapper.pluralRules == NULL) {
165 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
166 } else {
167 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
729e4ab9 168 }
46f4442e
A
169}
170
4388f060 171
46f4442e 172PluralFormat::~PluralFormat() {
46f4442e
A
173 delete numberFormat;
174}
175
176void
51004dcb 177PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
729e4ab9
A
178 if (U_FAILURE(status)) {
179 return;
180 }
4388f060
A
181
182 if (rules==NULL) {
51004dcb 183 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
4388f060
A
184 } else {
185 pluralRulesWrapper.pluralRules = rules->clone();
186 if (pluralRulesWrapper.pluralRules == NULL) {
187 status = U_MEMORY_ALLOCATION_ERROR;
729e4ab9
A
188 return;
189 }
46f4442e 190 }
4388f060
A
191
192 numberFormat= NumberFormat::createInstance(locale, status);
46f4442e
A
193}
194
195void
196PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
4388f060 197 msgPattern.parsePluralStyle(newPattern, NULL, status);
729e4ab9 198 if (U_FAILURE(status)) {
4388f060
A
199 msgPattern.clear();
200 offset = 0;
729e4ab9
A
201 return;
202 }
4388f060 203 offset = msgPattern.getPluralOffset(0);
46f4442e
A
204}
205
206UnicodeString&
207PluralFormat::format(const Formattable& obj,
208 UnicodeString& appendTo,
209 FieldPosition& pos,
210 UErrorCode& status) const
211{
212 if (U_FAILURE(status)) return appendTo;
4388f060
A
213
214 if (obj.isNumeric()) {
57a6839d 215 return format(obj, obj.getDouble(), appendTo, pos, status);
4388f060 216 } else {
46f4442e
A
217 status = U_ILLEGAL_ARGUMENT_ERROR;
218 return appendTo;
219 }
220}
221
222UnicodeString
223PluralFormat::format(int32_t number, UErrorCode& status) const {
f3c0d7a5 224 FieldPosition fpos(FieldPosition::DONT_CARE);
46f4442e 225 UnicodeString result;
57a6839d 226 return format(Formattable(number), number, result, fpos, status);
46f4442e
A
227}
228
229UnicodeString
230PluralFormat::format(double number, UErrorCode& status) const {
f3c0d7a5 231 FieldPosition fpos(FieldPosition::DONT_CARE);
46f4442e 232 UnicodeString result;
57a6839d 233 return format(Formattable(number), number, result, fpos, status);
46f4442e
A
234}
235
236
237UnicodeString&
238PluralFormat::format(int32_t number,
4388f060 239 UnicodeString& appendTo,
46f4442e
A
240 FieldPosition& pos,
241 UErrorCode& status) const {
57a6839d 242 return format(Formattable(number), (double)number, appendTo, pos, status);
46f4442e
A
243}
244
245UnicodeString&
246PluralFormat::format(double number,
4388f060 247 UnicodeString& appendTo,
46f4442e 248 FieldPosition& pos,
4388f060 249 UErrorCode& status) const {
57a6839d
A
250 return format(Formattable(number), (double)number, appendTo, pos, status);
251}
252
253UnicodeString&
254PluralFormat::format(const Formattable& numberObject, double number,
255 UnicodeString& appendTo,
256 FieldPosition& pos,
257 UErrorCode& status) const {
4388f060
A
258 if (U_FAILURE(status)) {
259 return appendTo;
46f4442e 260 }
4388f060 261 if (msgPattern.countParts() == 0) {
57a6839d 262 return numberFormat->format(numberObject, appendTo, pos, status);
4388f060 263 }
3d1f044b 264
4388f060 265 // Get the appropriate sub-message.
57a6839d
A
266 // Select it based on the formatted number-offset.
267 double numberMinusOffset = number - offset;
3d1f044b
A
268 // Call NumberFormatter to get both the DecimalQuantity and the string.
269 // This call site needs to use more internal APIs than the Java equivalent.
270 number::impl::UFormattedNumberData data;
57a6839d 271 if (offset == 0) {
3d1f044b
A
272 // could be BigDecimal etc.
273 numberObject.populateDecimalQuantity(data.quantity, status);
57a6839d 274 } else {
3d1f044b
A
275 data.quantity.setToDouble(numberMinusOffset);
276 }
277 UnicodeString numberString;
278 auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
279 if(decFmt != nullptr) {
340931cb
A
280 const number::LocalizedNumberFormatter* lnf = decFmt->toNumberFormatter(status);
281 if (U_FAILURE(status)) {
282 return appendTo;
283 }
284 lnf->formatImpl(&data, status); // mutates &data
285 if (U_FAILURE(status)) {
286 return appendTo;
287 }
3d1f044b
A
288 numberString = data.getStringRef().toUnicodeString();
289 } else {
290 if (offset == 0) {
291 numberFormat->format(numberObject, numberString, status);
2ca993e8 292 } else {
3d1f044b 293 numberFormat->format(numberMinusOffset, numberString, status);
57a6839d
A
294 }
295 }
3d1f044b
A
296
297 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status);
57a6839d 298 if (U_FAILURE(status)) { return appendTo; }
4388f060
A
299 // Replace syntactic # signs in the top level of this sub-message
300 // (not in nested arguments) with the formatted number-offset.
301 const UnicodeString& pattern = msgPattern.getPatternString();
4388f060
A
302 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
303 for (;;) {
304 const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
305 const UMessagePatternPartType type = part.getType();
306 int32_t index = part.getIndex();
307 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
308 return appendTo.append(pattern, prevIndex, index - prevIndex);
309 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
310 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
311 appendTo.append(pattern, prevIndex, index - prevIndex);
312 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
57a6839d 313 appendTo.append(numberString);
4388f060
A
314 }
315 prevIndex = part.getLimit();
316 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
317 appendTo.append(pattern, prevIndex, index - prevIndex);
318 prevIndex = index;
319 partIndex = msgPattern.getLimitPartIndex(partIndex);
320 index = msgPattern.getPart(partIndex).getLimit();
321 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
322 prevIndex = index;
323 }
46f4442e 324 }
46f4442e
A
325}
326
327UnicodeString&
328PluralFormat::toPattern(UnicodeString& appendTo) {
4388f060
A
329 if (0 == msgPattern.countParts()) {
330 appendTo.setToBogus();
331 } else {
332 appendTo.append(msgPattern.getPatternString());
46f4442e 333 }
4388f060 334 return appendTo;
46f4442e
A
335}
336
4388f060
A
337void
338PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
339 if (U_FAILURE(status)) {
340 return;
46f4442e 341 }
4388f060
A
342 locale = loc;
343 msgPattern.clear();
344 delete numberFormat;
345 offset = 0;
346 numberFormat = NULL;
347 pluralRulesWrapper.reset();
51004dcb 348 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
349}
350
351void
4388f060 352PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
729e4ab9
A
353 if (U_FAILURE(status)) {
354 return;
355 }
340931cb 356 NumberFormat* nf = format->clone();
4388f060 357 if (nf != NULL) {
46f4442e 358 delete numberFormat;
4388f060
A
359 numberFormat = nf;
360 } else {
361 status = U_MEMORY_ALLOCATION_ERROR;
46f4442e 362 }
46f4442e
A
363}
364
340931cb 365PluralFormat*
46f4442e
A
366PluralFormat::clone() const
367{
368 return new PluralFormat(*this);
369}
370
4388f060 371
46f4442e
A
372PluralFormat&
373PluralFormat::operator=(const PluralFormat& other) {
374 if (this != &other) {
46f4442e 375 locale = other.locale;
4388f060
A
376 msgPattern = other.msgPattern;
377 offset = other.offset;
378 copyObjects(other);
46f4442e
A
379 }
380
381 return *this;
382}
383
384UBool
385PluralFormat::operator==(const Format& other) const {
4388f060
A
386 if (this == &other) {
387 return TRUE;
388 }
389 if (!Format::operator==(other)) {
390 return FALSE;
391 }
392 const PluralFormat& o = (const PluralFormat&)other;
393 return
394 locale == o.locale &&
395 msgPattern == o.msgPattern && // implies same offset
396 (numberFormat == NULL) == (o.numberFormat == NULL) &&
397 (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
398 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
399 (pluralRulesWrapper.pluralRules == NULL ||
400 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
46f4442e
A
401}
402
403UBool
404PluralFormat::operator!=(const Format& other) const {
405 return !operator==(other);
406}
407
408void
409PluralFormat::parseObject(const UnicodeString& /*source*/,
410 Formattable& /*result*/,
4388f060 411 ParsePosition& pos) const
46f4442e 412{
4388f060
A
413 // Parsing not supported.
414 pos.setErrorIndex(pos.getIndex());
46f4442e
A
415}
416
4388f060 417int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
57a6839d
A
418 const PluralSelector& selector, void *context,
419 double number, UErrorCode& ec) {
4388f060
A
420 if (U_FAILURE(ec)) {
421 return 0;
422 }
423 int32_t count=pattern.countParts();
424 double offset;
425 const MessagePattern::Part* part=&pattern.getPart(partIndex);
426 if (MessagePattern::Part::hasNumericValue(part->getType())) {
427 offset=pattern.getNumericValue(*part);
428 ++partIndex;
429 } else {
430 offset=0;
431 }
57a6839d 432 // The keyword is empty until we need to match against a non-explicit, not-"other" value.
4388f060
A
433 // Then we get the keyword from the selector.
434 // (In other words, we never call the selector if we match against an explicit value,
435 // or if the only non-explicit keyword is "other".)
436 UnicodeString keyword;
437 UnicodeString other(FALSE, OTHER_STRING, 5);
438 // When we find a match, we set msgStart>0 and also set this boolean to true
439 // to avoid matching the keyword again (duplicates are allowed)
440 // while we continue to look for an explicit-value match.
441 UBool haveKeywordMatch=FALSE;
442 // msgStart is 0 until we find any appropriate sub-message.
443 // We remember the first "other" sub-message if we have not seen any
444 // appropriate sub-message before.
445 // We remember the first matching-keyword sub-message if we have not seen
446 // one of those before.
447 // (The parser allows [does not check for] duplicate keywords.
448 // We just have to make sure to take the first one.)
449 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
450 // at the first keyword match.
451 // We keep going until we find an explicit-value match or reach the end of the plural style.
452 int32_t msgStart=0;
453 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
454 // until ARG_LIMIT or end of plural-only pattern.
455 do {
456 part=&pattern.getPart(partIndex++);
457 const UMessagePatternPartType type = part->getType();
458 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
46f4442e 459 break;
4388f060
A
460 }
461 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
462 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
463 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
464 // explicit value like "=2"
465 part=&pattern.getPart(partIndex++);
466 if(number==pattern.getNumericValue(*part)) {
467 // matches explicit value
468 return partIndex;
469 }
470 } else if(!haveKeywordMatch) {
471 // plural keyword like "few" or "other"
472 // Compare "other" first and call the selector if this is not "other".
473 if(pattern.partSubstringMatches(*part, other)) {
474 if(msgStart==0) {
475 msgStart=partIndex;
476 if(0 == keyword.compare(other)) {
477 // This is the first "other" sub-message,
478 // and the selected keyword is also "other".
479 // Do not match "other" again.
480 haveKeywordMatch=TRUE;
481 }
482 }
483 } else {
484 if(keyword.isEmpty()) {
57a6839d 485 keyword=selector.select(context, number-offset, ec);
4388f060
A
486 if(msgStart!=0 && (0 == keyword.compare(other))) {
487 // We have already seen an "other" sub-message.
488 // Do not match "other" again.
489 haveKeywordMatch=TRUE;
490 // Skip keyword matching but do getLimitPartIndex().
491 }
492 }
493 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
494 // keyword matches
495 msgStart=partIndex;
496 // Do not match this keyword again.
497 haveKeywordMatch=TRUE;
498 }
46f4442e 499 }
46f4442e 500 }
4388f060
A
501 partIndex=pattern.getLimitPartIndex(partIndex);
502 } while(++partIndex<count);
503 return msgStart;
46f4442e
A
504}
505
b331163b
A
506void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
507 // If no pattern was applied, return null.
508 if (msgPattern.countParts() == 0) {
509 pos.setBeginIndex(-1);
510 pos.setEndIndex(-1);
511 return;
512 }
513 int partIndex = 0;
514 int currMatchIndex;
515 int count=msgPattern.countParts();
516 int startingAt = pos.getBeginIndex();
517 if (startingAt < 0) {
518 startingAt = 0;
519 }
520
521 // The keyword is null until we need to match against a non-explicit, not-"other" value.
522 // Then we get the keyword from the selector.
523 // (In other words, we never call the selector if we match against an explicit value,
524 // or if the only non-explicit keyword is "other".)
525 UnicodeString keyword;
526 UnicodeString matchedWord;
527 const UnicodeString& pattern = msgPattern.getPatternString();
528 int matchedIndex = -1;
529 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
530 // until the end of the plural-only pattern.
531 while (partIndex < count) {
532 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
533 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
534 // Bad format
535 continue;
536 }
537
538 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
539 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
540 // Bad format
541 continue;
542 }
543
544 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
545 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
546 // Bad format
547 continue;
548 }
549
550 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
551 if (rbnfLenientScanner != NULL) {
552 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
553 int32_t length = -1;
554 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
555 }
556 else {
557 currMatchIndex = source.indexOf(currArg, startingAt);
558 }
559 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
560 matchedIndex = currMatchIndex;
561 matchedWord = currArg;
562 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
563 }
564 }
565 if (matchedIndex >= 0) {
566 pos.setBeginIndex(matchedIndex);
567 pos.setEndIndex(matchedIndex + matchedWord.length());
568 result.setString(keyword);
569 return;
570 }
571
572 // Not found!
573 pos.setBeginIndex(-1);
574 pos.setEndIndex(-1);
575}
576
4388f060
A
577PluralFormat::PluralSelector::~PluralSelector() {}
578
579PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
580 delete pluralRules;
581}
582
57a6839d 583UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
4388f060 584 UErrorCode& /*ec*/) const {
57a6839d 585 (void)number; // unused except in the assertion
3d1f044b 586 IFixedDecimal *dec=static_cast<IFixedDecimal *>(context);
57a6839d 587 return pluralRules->select(*dec);
4388f060
A
588}
589
590void PluralFormat::PluralSelectorAdapter::reset() {
591 delete pluralRules;
592 pluralRules = NULL;
46f4442e
A
593}
594
595
596U_NAMESPACE_END
597
598
599#endif /* #if !UCONFIG_NO_FORMATTING */
600
601//eof