]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/plurfmt.cpp
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / plurfmt.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
46f4442e
A
3/*
4*******************************************************************************
b331163b 5* Copyright (C) 2009-2015, International Business Machines Corporation and
46f4442e
A
6* others. All Rights Reserved.
7*******************************************************************************
8*
9* File PLURFMT.CPP
46f4442e
A
10*******************************************************************************
11*/
12
57a6839d 13#include "unicode/decimfmt.h"
4388f060 14#include "unicode/messagepattern.h"
46f4442e
A
15#include "unicode/plurfmt.h"
16#include "unicode/plurrule.h"
4388f060
A
17#include "unicode/utypes.h"
18#include "cmemory.h"
19#include "messageimpl.h"
b331163b 20#include "nfrule.h"
46f4442e 21#include "plurrule_impl.h"
4388f060
A
22#include "uassert.h"
23#include "uhash.h"
2ca993e8
A
24#include "precision.h"
25#include "visibledigits.h"
46f4442e
A
26
27#if !UCONFIG_NO_FORMATTING
28
29U_NAMESPACE_BEGIN
30
4388f060
A
31static const UChar OTHER_STRING[] = {
32 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
33};
46f4442e
A
34
35UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
36
4388f060
A
37PluralFormat::PluralFormat(UErrorCode& status)
38 : locale(Locale::getDefault()),
39 msgPattern(status),
40 numberFormat(NULL),
41 offset(0) {
51004dcb 42 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
43}
44
4388f060
A
45PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
46 : locale(loc),
47 msgPattern(status),
48 numberFormat(NULL),
49 offset(0) {
51004dcb 50 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
51}
52
4388f060
A
53PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
54 : locale(Locale::getDefault()),
55 msgPattern(status),
56 numberFormat(NULL),
57 offset(0) {
51004dcb 58 init(&rules, UPLURAL_TYPE_COUNT, status);
46f4442e
A
59}
60
4388f060
A
61PluralFormat::PluralFormat(const Locale& loc,
62 const PluralRules& rules,
63 UErrorCode& status)
64 : locale(loc),
65 msgPattern(status),
66 numberFormat(NULL),
67 offset(0) {
51004dcb
A
68 init(&rules, UPLURAL_TYPE_COUNT, status);
69}
70
71PluralFormat::PluralFormat(const Locale& loc,
72 UPluralType type,
73 UErrorCode& status)
74 : locale(loc),
75 msgPattern(status),
76 numberFormat(NULL),
77 offset(0) {
78 init(NULL, type, status);
46f4442e
A
79}
80
4388f060
A
81PluralFormat::PluralFormat(const UnicodeString& pat,
82 UErrorCode& status)
83 : locale(Locale::getDefault()),
84 msgPattern(status),
85 numberFormat(NULL),
86 offset(0) {
51004dcb 87 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
88 applyPattern(pat, status);
89}
90
4388f060
A
91PluralFormat::PluralFormat(const Locale& loc,
92 const UnicodeString& pat,
93 UErrorCode& status)
94 : locale(loc),
95 msgPattern(status),
96 numberFormat(NULL),
97 offset(0) {
51004dcb 98 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
99 applyPattern(pat, status);
100}
101
4388f060
A
102PluralFormat::PluralFormat(const PluralRules& rules,
103 const UnicodeString& pat,
104 UErrorCode& status)
105 : locale(Locale::getDefault()),
106 msgPattern(status),
107 numberFormat(NULL),
108 offset(0) {
51004dcb 109 init(&rules, UPLURAL_TYPE_COUNT, status);
46f4442e
A
110 applyPattern(pat, status);
111}
112
4388f060
A
113PluralFormat::PluralFormat(const Locale& loc,
114 const PluralRules& rules,
115 const UnicodeString& pat,
116 UErrorCode& status)
117 : locale(loc),
118 msgPattern(status),
119 numberFormat(NULL),
120 offset(0) {
51004dcb
A
121 init(&rules, UPLURAL_TYPE_COUNT, status);
122 applyPattern(pat, status);
123}
124
125PluralFormat::PluralFormat(const Locale& loc,
126 UPluralType type,
127 const UnicodeString& pat,
128 UErrorCode& status)
129 : locale(loc),
130 msgPattern(status),
131 numberFormat(NULL),
132 offset(0) {
133 init(NULL, type, status);
46f4442e
A
134 applyPattern(pat, status);
135}
136
4388f060
A
137PluralFormat::PluralFormat(const PluralFormat& other)
138 : Format(other),
139 locale(other.locale),
140 msgPattern(other.msgPattern),
141 numberFormat(NULL),
142 offset(other.offset) {
143 copyObjects(other);
144}
145
146void
147PluralFormat::copyObjects(const PluralFormat& other) {
46f4442e 148 UErrorCode status = U_ZERO_ERROR;
4388f060
A
149 if (numberFormat != NULL) {
150 delete numberFormat;
729e4ab9 151 }
4388f060
A
152 if (pluralRulesWrapper.pluralRules != NULL) {
153 delete pluralRulesWrapper.pluralRules;
154 }
155
156 if (other.numberFormat == NULL) {
157 numberFormat = NumberFormat::createInstance(locale, status);
158 } else {
159 numberFormat = (NumberFormat*)other.numberFormat->clone();
160 }
161 if (other.pluralRulesWrapper.pluralRules == NULL) {
162 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
163 } else {
164 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
729e4ab9 165 }
46f4442e
A
166}
167
4388f060 168
46f4442e 169PluralFormat::~PluralFormat() {
46f4442e
A
170 delete numberFormat;
171}
172
173void
51004dcb 174PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
729e4ab9
A
175 if (U_FAILURE(status)) {
176 return;
177 }
4388f060
A
178
179 if (rules==NULL) {
51004dcb 180 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
4388f060
A
181 } else {
182 pluralRulesWrapper.pluralRules = rules->clone();
183 if (pluralRulesWrapper.pluralRules == NULL) {
184 status = U_MEMORY_ALLOCATION_ERROR;
729e4ab9
A
185 return;
186 }
46f4442e 187 }
4388f060
A
188
189 numberFormat= NumberFormat::createInstance(locale, status);
46f4442e
A
190}
191
192void
193PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
4388f060 194 msgPattern.parsePluralStyle(newPattern, NULL, status);
729e4ab9 195 if (U_FAILURE(status)) {
4388f060
A
196 msgPattern.clear();
197 offset = 0;
729e4ab9
A
198 return;
199 }
4388f060 200 offset = msgPattern.getPluralOffset(0);
46f4442e
A
201}
202
203UnicodeString&
204PluralFormat::format(const Formattable& obj,
205 UnicodeString& appendTo,
206 FieldPosition& pos,
207 UErrorCode& status) const
208{
209 if (U_FAILURE(status)) return appendTo;
4388f060
A
210
211 if (obj.isNumeric()) {
57a6839d 212 return format(obj, obj.getDouble(), appendTo, pos, status);
4388f060 213 } else {
46f4442e
A
214 status = U_ILLEGAL_ARGUMENT_ERROR;
215 return appendTo;
216 }
217}
218
219UnicodeString
220PluralFormat::format(int32_t number, UErrorCode& status) const {
f3c0d7a5 221 FieldPosition fpos(FieldPosition::DONT_CARE);
46f4442e 222 UnicodeString result;
57a6839d 223 return format(Formattable(number), number, result, fpos, status);
46f4442e
A
224}
225
226UnicodeString
227PluralFormat::format(double number, UErrorCode& status) const {
f3c0d7a5 228 FieldPosition fpos(FieldPosition::DONT_CARE);
46f4442e 229 UnicodeString result;
57a6839d 230 return format(Formattable(number), number, result, fpos, status);
46f4442e
A
231}
232
233
234UnicodeString&
235PluralFormat::format(int32_t number,
4388f060 236 UnicodeString& appendTo,
46f4442e
A
237 FieldPosition& pos,
238 UErrorCode& status) const {
57a6839d 239 return format(Formattable(number), (double)number, appendTo, pos, status);
46f4442e
A
240}
241
242UnicodeString&
243PluralFormat::format(double number,
4388f060 244 UnicodeString& appendTo,
46f4442e 245 FieldPosition& pos,
4388f060 246 UErrorCode& status) const {
57a6839d
A
247 return format(Formattable(number), (double)number, appendTo, pos, status);
248}
249
250UnicodeString&
251PluralFormat::format(const Formattable& numberObject, double number,
252 UnicodeString& appendTo,
253 FieldPosition& pos,
254 UErrorCode& status) const {
4388f060
A
255 if (U_FAILURE(status)) {
256 return appendTo;
46f4442e 257 }
4388f060 258 if (msgPattern.countParts() == 0) {
57a6839d 259 return numberFormat->format(numberObject, appendTo, pos, status);
4388f060
A
260 }
261 // Get the appropriate sub-message.
57a6839d
A
262 // Select it based on the formatted number-offset.
263 double numberMinusOffset = number - offset;
264 UnicodeString numberString;
265 FieldPosition ignorePos;
2ca993e8
A
266 FixedPrecision fp;
267 VisibleDigitsWithExponent dec;
268 fp.initVisibleDigitsWithExponent(numberMinusOffset, dec, status);
269 if (U_FAILURE(status)) {
270 return appendTo;
271 }
57a6839d 272 if (offset == 0) {
57a6839d
A
273 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
274 if(decFmt != NULL) {
2ca993e8
A
275 decFmt->initVisibleDigitsWithExponent(
276 numberObject, dec, status);
277 if (U_FAILURE(status)) {
278 return appendTo;
279 }
280 decFmt->format(dec, numberString, ignorePos, status);
281 } else {
282 numberFormat->format(
283 numberObject, numberString, ignorePos, status); // could be BigDecimal etc.
57a6839d
A
284 }
285 } else {
57a6839d
A
286 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
287 if(decFmt != NULL) {
2ca993e8
A
288 decFmt->initVisibleDigitsWithExponent(
289 numberMinusOffset, dec, status);
290 if (U_FAILURE(status)) {
291 return appendTo;
292 }
293 decFmt->format(dec, numberString, ignorePos, status);
294 } else {
295 numberFormat->format(
296 numberMinusOffset, numberString, ignorePos, status);
57a6839d
A
297 }
298 }
299 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
300 if (U_FAILURE(status)) { return appendTo; }
4388f060
A
301 // Replace syntactic # signs in the top level of this sub-message
302 // (not in nested arguments) with the formatted number-offset.
303 const UnicodeString& pattern = msgPattern.getPatternString();
4388f060
A
304 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
305 for (;;) {
306 const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
307 const UMessagePatternPartType type = part.getType();
308 int32_t index = part.getIndex();
309 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
310 return appendTo.append(pattern, prevIndex, index - prevIndex);
311 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
312 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
313 appendTo.append(pattern, prevIndex, index - prevIndex);
314 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
57a6839d 315 appendTo.append(numberString);
4388f060
A
316 }
317 prevIndex = part.getLimit();
318 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
319 appendTo.append(pattern, prevIndex, index - prevIndex);
320 prevIndex = index;
321 partIndex = msgPattern.getLimitPartIndex(partIndex);
322 index = msgPattern.getPart(partIndex).getLimit();
323 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
324 prevIndex = index;
325 }
46f4442e 326 }
46f4442e
A
327}
328
329UnicodeString&
330PluralFormat::toPattern(UnicodeString& appendTo) {
4388f060
A
331 if (0 == msgPattern.countParts()) {
332 appendTo.setToBogus();
333 } else {
334 appendTo.append(msgPattern.getPatternString());
46f4442e 335 }
4388f060 336 return appendTo;
46f4442e
A
337}
338
4388f060
A
339void
340PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
341 if (U_FAILURE(status)) {
342 return;
46f4442e 343 }
4388f060
A
344 locale = loc;
345 msgPattern.clear();
346 delete numberFormat;
347 offset = 0;
348 numberFormat = NULL;
349 pluralRulesWrapper.reset();
51004dcb 350 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46f4442e
A
351}
352
353void
4388f060 354PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
729e4ab9
A
355 if (U_FAILURE(status)) {
356 return;
357 }
4388f060
A
358 NumberFormat* nf = (NumberFormat*)format->clone();
359 if (nf != NULL) {
46f4442e 360 delete numberFormat;
4388f060
A
361 numberFormat = nf;
362 } else {
363 status = U_MEMORY_ALLOCATION_ERROR;
46f4442e 364 }
46f4442e
A
365}
366
367Format*
368PluralFormat::clone() const
369{
370 return new PluralFormat(*this);
371}
372
4388f060 373
46f4442e
A
374PluralFormat&
375PluralFormat::operator=(const PluralFormat& other) {
376 if (this != &other) {
46f4442e 377 locale = other.locale;
4388f060
A
378 msgPattern = other.msgPattern;
379 offset = other.offset;
380 copyObjects(other);
46f4442e
A
381 }
382
383 return *this;
384}
385
386UBool
387PluralFormat::operator==(const Format& other) const {
4388f060
A
388 if (this == &other) {
389 return TRUE;
390 }
391 if (!Format::operator==(other)) {
392 return FALSE;
393 }
394 const PluralFormat& o = (const PluralFormat&)other;
395 return
396 locale == o.locale &&
397 msgPattern == o.msgPattern && // implies same offset
398 (numberFormat == NULL) == (o.numberFormat == NULL) &&
399 (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
400 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
401 (pluralRulesWrapper.pluralRules == NULL ||
402 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
46f4442e
A
403}
404
405UBool
406PluralFormat::operator!=(const Format& other) const {
407 return !operator==(other);
408}
409
410void
411PluralFormat::parseObject(const UnicodeString& /*source*/,
412 Formattable& /*result*/,
4388f060 413 ParsePosition& pos) const
46f4442e 414{
4388f060
A
415 // Parsing not supported.
416 pos.setErrorIndex(pos.getIndex());
46f4442e
A
417}
418
4388f060 419int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
57a6839d
A
420 const PluralSelector& selector, void *context,
421 double number, UErrorCode& ec) {
4388f060
A
422 if (U_FAILURE(ec)) {
423 return 0;
424 }
425 int32_t count=pattern.countParts();
426 double offset;
427 const MessagePattern::Part* part=&pattern.getPart(partIndex);
428 if (MessagePattern::Part::hasNumericValue(part->getType())) {
429 offset=pattern.getNumericValue(*part);
430 ++partIndex;
431 } else {
432 offset=0;
433 }
57a6839d 434 // The keyword is empty until we need to match against a non-explicit, not-"other" value.
4388f060
A
435 // Then we get the keyword from the selector.
436 // (In other words, we never call the selector if we match against an explicit value,
437 // or if the only non-explicit keyword is "other".)
438 UnicodeString keyword;
439 UnicodeString other(FALSE, OTHER_STRING, 5);
440 // When we find a match, we set msgStart>0 and also set this boolean to true
441 // to avoid matching the keyword again (duplicates are allowed)
442 // while we continue to look for an explicit-value match.
443 UBool haveKeywordMatch=FALSE;
444 // msgStart is 0 until we find any appropriate sub-message.
445 // We remember the first "other" sub-message if we have not seen any
446 // appropriate sub-message before.
447 // We remember the first matching-keyword sub-message if we have not seen
448 // one of those before.
449 // (The parser allows [does not check for] duplicate keywords.
450 // We just have to make sure to take the first one.)
451 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
452 // at the first keyword match.
453 // We keep going until we find an explicit-value match or reach the end of the plural style.
454 int32_t msgStart=0;
455 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
456 // until ARG_LIMIT or end of plural-only pattern.
457 do {
458 part=&pattern.getPart(partIndex++);
459 const UMessagePatternPartType type = part->getType();
460 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
46f4442e 461 break;
4388f060
A
462 }
463 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
464 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
465 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
466 // explicit value like "=2"
467 part=&pattern.getPart(partIndex++);
468 if(number==pattern.getNumericValue(*part)) {
469 // matches explicit value
470 return partIndex;
471 }
472 } else if(!haveKeywordMatch) {
473 // plural keyword like "few" or "other"
474 // Compare "other" first and call the selector if this is not "other".
475 if(pattern.partSubstringMatches(*part, other)) {
476 if(msgStart==0) {
477 msgStart=partIndex;
478 if(0 == keyword.compare(other)) {
479 // This is the first "other" sub-message,
480 // and the selected keyword is also "other".
481 // Do not match "other" again.
482 haveKeywordMatch=TRUE;
483 }
484 }
485 } else {
486 if(keyword.isEmpty()) {
57a6839d 487 keyword=selector.select(context, number-offset, ec);
4388f060
A
488 if(msgStart!=0 && (0 == keyword.compare(other))) {
489 // We have already seen an "other" sub-message.
490 // Do not match "other" again.
491 haveKeywordMatch=TRUE;
492 // Skip keyword matching but do getLimitPartIndex().
493 }
494 }
495 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
496 // keyword matches
497 msgStart=partIndex;
498 // Do not match this keyword again.
499 haveKeywordMatch=TRUE;
500 }
46f4442e 501 }
46f4442e 502 }
4388f060
A
503 partIndex=pattern.getLimitPartIndex(partIndex);
504 } while(++partIndex<count);
505 return msgStart;
46f4442e
A
506}
507
b331163b
A
508void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
509 // If no pattern was applied, return null.
510 if (msgPattern.countParts() == 0) {
511 pos.setBeginIndex(-1);
512 pos.setEndIndex(-1);
513 return;
514 }
515 int partIndex = 0;
516 int currMatchIndex;
517 int count=msgPattern.countParts();
518 int startingAt = pos.getBeginIndex();
519 if (startingAt < 0) {
520 startingAt = 0;
521 }
522
523 // The keyword is null until we need to match against a non-explicit, not-"other" value.
524 // Then we get the keyword from the selector.
525 // (In other words, we never call the selector if we match against an explicit value,
526 // or if the only non-explicit keyword is "other".)
527 UnicodeString keyword;
528 UnicodeString matchedWord;
529 const UnicodeString& pattern = msgPattern.getPatternString();
530 int matchedIndex = -1;
531 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
532 // until the end of the plural-only pattern.
533 while (partIndex < count) {
534 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
535 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
536 // Bad format
537 continue;
538 }
539
540 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
541 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
542 // Bad format
543 continue;
544 }
545
546 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
547 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
548 // Bad format
549 continue;
550 }
551
552 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
553 if (rbnfLenientScanner != NULL) {
554 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
555 int32_t length = -1;
556 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
557 }
558 else {
559 currMatchIndex = source.indexOf(currArg, startingAt);
560 }
561 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
562 matchedIndex = currMatchIndex;
563 matchedWord = currArg;
564 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
565 }
566 }
567 if (matchedIndex >= 0) {
568 pos.setBeginIndex(matchedIndex);
569 pos.setEndIndex(matchedIndex + matchedWord.length());
570 result.setString(keyword);
571 return;
572 }
573
574 // Not found!
575 pos.setBeginIndex(-1);
576 pos.setEndIndex(-1);
577}
578
4388f060
A
579PluralFormat::PluralSelector::~PluralSelector() {}
580
581PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
582 delete pluralRules;
583}
584
57a6839d 585UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
4388f060 586 UErrorCode& /*ec*/) const {
57a6839d 587 (void)number; // unused except in the assertion
2ca993e8 588 VisibleDigitsWithExponent *dec=static_cast<VisibleDigitsWithExponent *>(context);
57a6839d 589 return pluralRules->select(*dec);
4388f060
A
590}
591
592void PluralFormat::PluralSelectorAdapter::reset() {
593 delete pluralRules;
594 pluralRules = NULL;
46f4442e
A
595}
596
597
598U_NAMESPACE_END
599
600
601#endif /* #if !UCONFIG_NO_FORMATTING */
602
603//eof