]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/unicode/plurfmt.h
ICU-66108.tar.gz
[apple/icu.git] / icuSources / i18n / unicode / plurfmt.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2007-2014, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9
10 * File PLURFMT.H
11 ********************************************************************************
12 */
13
14 #ifndef PLURFMT
15 #define PLURFMT
16
17 #include "unicode/utypes.h"
18
19 #if U_SHOW_CPLUSPLUS_API
20
21 /**
22 * \file
23 * \brief C++ API: PluralFormat object
24 */
25
26 #if !UCONFIG_NO_FORMATTING
27
28 #include "unicode/messagepattern.h"
29 #include "unicode/numfmt.h"
30 #include "unicode/plurrule.h"
31
32 U_NAMESPACE_BEGIN
33
34 class Hashtable;
35 class NFRule;
36
37 /**
38 * <p>
39 * <code>PluralFormat</code> supports the creation of internationalized
40 * messages with plural inflection. It is based on <i>plural
41 * selection</i>, i.e. the caller specifies messages for each
42 * plural case that can appear in the user's language and the
43 * <code>PluralFormat</code> selects the appropriate message based on
44 * the number.
45 * </p>
46 * <h4>The Problem of Plural Forms in Internationalized Messages</h4>
47 * <p>
48 * Different languages have different ways to inflect
49 * plurals. Creating internationalized messages that include plural
50 * forms is only feasible when the framework is able to handle plural
51 * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code>
52 * doesn't handle this well, because it attaches a number interval to
53 * each message and selects the message whose interval contains a
54 * given number. This can only handle a finite number of
55 * intervals. But in some languages, like Polish, one plural case
56 * applies to infinitely many intervals (e.g., the plural case applies to
57 * numbers ending with 2, 3, or 4 except those ending with 12, 13, or
58 * 14). Thus <code>ChoiceFormat</code> is not adequate.
59 * </p><p>
60 * <code>PluralFormat</code> deals with this by breaking the problem
61 * into two parts:
62 * <ul>
63 * <li>It uses <code>PluralRules</code> that can define more complex
64 * conditions for a plural case than just a single interval. These plural
65 * rules define both what plural cases exist in a language, and to
66 * which numbers these cases apply.
67 * <li>It provides predefined plural rules for many languages. Thus, the programmer
68 * need not worry about the plural cases of a language and
69 * does not have to define the plural cases; they can simply
70 * use the predefined keywords. The whole plural formatting of messages can
71 * be done using localized patterns from resource bundles. For predefined plural
72 * rules, see the CLDR <i>Language Plural Rules</i> page at
73 * http://unicode.org/repos/cldr-tmp/trunk/diff/supplemental/language_plural_rules.html
74 * </ul>
75 * </p>
76 * <h4>Usage of <code>PluralFormat</code></h4>
77 * <p>Note: Typically, plural formatting is done via <code>MessageFormat</code>
78 * with a <code>plural</code> argument type,
79 * rather than using a stand-alone <code>PluralFormat</code>.
80 * </p><p>
81 * This discussion assumes that you use <code>PluralFormat</code> with
82 * a predefined set of plural rules. You can create one using one of
83 * the constructors that takes a <code>locale</code> object. To
84 * specify the message pattern, you can either pass it to the
85 * constructor or set it explicitly using the
86 * <code>applyPattern()</code> method. The <code>format()</code>
87 * method takes a number object and selects the message of the
88 * matching plural case. This message will be returned.
89 * </p>
90 * <h5>Patterns and Their Interpretation</h5>
91 * <p>
92 * The pattern text defines the message output for each plural case of the
93 * specified locale. Syntax:
94 * <pre>
95 * pluralStyle = [offsetValue] (selector '{' message '}')+
96 * offsetValue = "offset:" number
97 * selector = explicitValue | keyword
98 * explicitValue = '=' number // adjacent, no white space in between
99 * keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
100 * message: see {@link MessageFormat}
101 * </pre>
102 * Pattern_White_Space between syntax elements is ignored, except
103 * between the {curly braces} and their sub-message,
104 * and between the '=' and the number of an explicitValue.
105 *
106 * </p><p>
107 * There are 6 predefined casekeyword in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
108 * 'other'. You always have to define a message text for the default plural case
109 * <code>other</code> which is contained in every rule set.
110 * If you do not specify a message text for a particular plural case, the
111 * message text of the plural case <code>other</code> gets assigned to this
112 * plural case.
113 * </p><p>
114 * When formatting, the input number is first matched against the explicitValue clauses.
115 * If there is no exact-number match, then a keyword is selected by calling
116 * the <code>PluralRules</code> with the input number <em>minus the offset</em>.
117 * (The offset defaults to 0 if it is omitted from the pattern string.)
118 * If there is no clause with that keyword, then the "other" clauses is returned.
119 * </p><p>
120 * An unquoted pound sign (<code>#</code>) in the selected sub-message
121 * itself (i.e., outside of arguments nested in the sub-message)
122 * is replaced by the input number minus the offset.
123 * The number-minus-offset value is formatted using a
124 * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
125 * need special number formatting, you have to use a <code>MessageFormat</code>
126 * and explicitly specify a <code>NumberFormat</code> argument.
127 * <strong>Note:</strong> That argument is formatting without subtracting the offset!
128 * If you need a custom format and have a non-zero offset, then you need to pass the
129 * number-minus-offset value as a separate parameter.
130 * </p>
131 * For a usage example, see the {@link MessageFormat} class documentation.
132 *
133 * <h4>Defining Custom Plural Rules</h4>
134 * <p>If you need to use <code>PluralFormat</code> with custom rules, you can
135 * create a <code>PluralRules</code> object and pass it to
136 * <code>PluralFormat</code>'s constructor. If you also specify a locale in this
137 * constructor, this locale will be used to format the number in the message
138 * texts.
139 * </p><p>
140 * For more information about <code>PluralRules</code>, see
141 * {@link PluralRules}.
142 * </p>
143 *
144 * ported from Java
145 * @stable ICU 4.0
146 */
147
148 class U_I18N_API PluralFormat : public Format {
149 public:
150
151 /**
152 * Creates a new cardinal-number <code>PluralFormat</code> for the default locale.
153 * This locale will be used to get the set of plural rules and for standard
154 * number formatting.
155 * @param status output param set to success/failure code on exit, which
156 * must not indicate a failure before the function call.
157 * @stable ICU 4.0
158 */
159 PluralFormat(UErrorCode& status);
160
161 /**
162 * Creates a new cardinal-number <code>PluralFormat</code> for a given locale.
163 * @param locale the <code>PluralFormat</code> will be configured with
164 * rules for this locale. This locale will also be used for
165 * standard number formatting.
166 * @param status output param set to success/failure code on exit, which
167 * must not indicate a failure before the function call.
168 * @stable ICU 4.0
169 */
170 PluralFormat(const Locale& locale, UErrorCode& status);
171
172 /**
173 * Creates a new <code>PluralFormat</code> for a given set of rules.
174 * The standard number formatting will be done using the default locale.
175 * @param rules defines the behavior of the <code>PluralFormat</code>
176 * object.
177 * @param status output param set to success/failure code on exit, which
178 * must not indicate a failure before the function call.
179 * @stable ICU 4.0
180 */
181 PluralFormat(const PluralRules& rules, UErrorCode& status);
182
183 /**
184 * Creates a new <code>PluralFormat</code> for a given set of rules.
185 * The standard number formatting will be done using the given locale.
186 * @param locale the default number formatting will be done using this
187 * locale.
188 * @param rules defines the behavior of the <code>PluralFormat</code>
189 * object.
190 * @param status output param set to success/failure code on exit, which
191 * must not indicate a failure before the function call.
192 * @stable ICU 4.0
193 * <p>
194 * <h4>Sample code</h4>
195 * \snippet samples/plurfmtsample/plurfmtsample.cpp PluralFormatExample1
196 * \snippet samples/plurfmtsample/plurfmtsample.cpp PluralFormatExample
197 * <p>
198 */
199 PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status);
200
201 /**
202 * Creates a new <code>PluralFormat</code> for the plural type.
203 * The standard number formatting will be done using the given locale.
204 * @param locale the default number formatting will be done using this
205 * locale.
206 * @param type The plural type (e.g., cardinal or ordinal).
207 * @param status output param set to success/failure code on exit, which
208 * must not indicate a failure before the function call.
209 * @stable ICU 50
210 */
211 PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status);
212
213 /**
214 * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string.
215 * The default locale will be used to get the set of plural rules and for
216 * standard number formatting.
217 * @param pattern the pattern for this <code>PluralFormat</code>.
218 * errors are returned to status if the pattern is invalid.
219 * @param status output param set to success/failure code on exit, which
220 * must not indicate a failure before the function call.
221 * @stable ICU 4.0
222 */
223 PluralFormat(const UnicodeString& pattern, UErrorCode& status);
224
225 /**
226 * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string and
227 * locale.
228 * The locale will be used to get the set of plural rules and for
229 * standard number formatting.
230 * @param locale the <code>PluralFormat</code> will be configured with
231 * rules for this locale. This locale will also be used for
232 * standard number formatting.
233 * @param pattern the pattern for this <code>PluralFormat</code>.
234 * errors are returned to status if the pattern is invalid.
235 * @param status output param set to success/failure code on exit, which
236 * must not indicate a failure before the function call.
237 * @stable ICU 4.0
238 */
239 PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status);
240
241 /**
242 * Creates a new <code>PluralFormat</code> for a given set of rules, a
243 * pattern and a locale.
244 * @param rules defines the behavior of the <code>PluralFormat</code>
245 * object.
246 * @param pattern the pattern for this <code>PluralFormat</code>.
247 * errors are returned to status if the pattern is invalid.
248 * @param status output param set to success/failure code on exit, which
249 * must not indicate a failure before the function call.
250 * @stable ICU 4.0
251 */
252 PluralFormat(const PluralRules& rules,
253 const UnicodeString& pattern,
254 UErrorCode& status);
255
256 /**
257 * Creates a new <code>PluralFormat</code> for a given set of rules, a
258 * pattern and a locale.
259 * @param locale the <code>PluralFormat</code> will be configured with
260 * rules for this locale. This locale will also be used for
261 * standard number formatting.
262 * @param rules defines the behavior of the <code>PluralFormat</code>
263 * object.
264 * @param pattern the pattern for this <code>PluralFormat</code>.
265 * errors are returned to status if the pattern is invalid.
266 * @param status output param set to success/failure code on exit, which
267 * must not indicate a failure before the function call.
268 * @stable ICU 4.0
269 */
270 PluralFormat(const Locale& locale,
271 const PluralRules& rules,
272 const UnicodeString& pattern,
273 UErrorCode& status);
274
275 /**
276 * Creates a new <code>PluralFormat</code> for a plural type, a
277 * pattern and a locale.
278 * @param locale the <code>PluralFormat</code> will be configured with
279 * rules for this locale. This locale will also be used for
280 * standard number formatting.
281 * @param type The plural type (e.g., cardinal or ordinal).
282 * @param pattern the pattern for this <code>PluralFormat</code>.
283 * errors are returned to status if the pattern is invalid.
284 * @param status output param set to success/failure code on exit, which
285 * must not indicate a failure before the function call.
286 * @stable ICU 50
287 */
288 PluralFormat(const Locale& locale,
289 UPluralType type,
290 const UnicodeString& pattern,
291 UErrorCode& status);
292
293 /**
294 * copy constructor.
295 * @stable ICU 4.0
296 */
297 PluralFormat(const PluralFormat& other);
298
299 /**
300 * Destructor.
301 * @stable ICU 4.0
302 */
303 virtual ~PluralFormat();
304
305 /**
306 * Sets the pattern used by this plural format.
307 * The method parses the pattern and creates a map of format strings
308 * for the plural rules.
309 * Patterns and their interpretation are specified in the class description.
310 *
311 * @param pattern the pattern for this plural format
312 * errors are returned to status if the pattern is invalid.
313 * @param status output param set to success/failure code on exit, which
314 * must not indicate a failure before the function call.
315 * @stable ICU 4.0
316 */
317 void applyPattern(const UnicodeString& pattern, UErrorCode& status);
318
319
320 using Format::format;
321
322 /**
323 * Formats a plural message for a given number.
324 *
325 * @param number a number for which the plural message should be formatted
326 * for. If no pattern has been applied to this
327 * <code>PluralFormat</code> object yet, the formatted number
328 * will be returned.
329 * @param status output param set to success/failure code on exit, which
330 * must not indicate a failure before the function call.
331 * @return the string containing the formatted plural message.
332 * @stable ICU 4.0
333 */
334 UnicodeString format(int32_t number, UErrorCode& status) const;
335
336 /**
337 * Formats a plural message for a given number.
338 *
339 * @param number a number for which the plural message should be formatted
340 * for. If no pattern has been applied to this
341 * PluralFormat object yet, the formatted number
342 * will be returned.
343 * @param status output param set to success or failure code on exit, which
344 * must not indicate a failure before the function call.
345 * @return the string containing the formatted plural message.
346 * @stable ICU 4.0
347 */
348 UnicodeString format(double number, UErrorCode& status) const;
349
350 /**
351 * Formats a plural message for a given number.
352 *
353 * @param number a number for which the plural message should be formatted
354 * for. If no pattern has been applied to this
355 * <code>PluralFormat</code> object yet, the formatted number
356 * will be returned.
357 * @param appendTo output parameter to receive result.
358 * result is appended to existing contents.
359 * @param pos On input: an alignment field, if desired.
360 * On output: the offsets of the alignment field.
361 * @param status output param set to success/failure code on exit, which
362 * must not indicate a failure before the function call.
363 * @return the string containing the formatted plural message.
364 * @stable ICU 4.0
365 */
366 UnicodeString& format(int32_t number,
367 UnicodeString& appendTo,
368 FieldPosition& pos,
369 UErrorCode& status) const;
370
371 /**
372 * Formats a plural message for a given number.
373 *
374 * @param number a number for which the plural message should be formatted
375 * for. If no pattern has been applied to this
376 * PluralFormat object yet, the formatted number
377 * will be returned.
378 * @param appendTo output parameter to receive result.
379 * result is appended to existing contents.
380 * @param pos On input: an alignment field, if desired.
381 * On output: the offsets of the alignment field.
382 * @param status output param set to success/failure code on exit, which
383 * must not indicate a failure before the function call.
384 * @return the string containing the formatted plural message.
385 * @stable ICU 4.0
386 */
387 UnicodeString& format(double number,
388 UnicodeString& appendTo,
389 FieldPosition& pos,
390 UErrorCode& status) const;
391
392 #ifndef U_HIDE_DEPRECATED_API
393 /**
394 * Sets the locale used by this <code>PluraFormat</code> object.
395 * Note: Calling this method resets this <code>PluraFormat</code> object,
396 * i.e., a pattern that was applied previously will be removed,
397 * and the NumberFormat is set to the default number format for
398 * the locale. The resulting format behaves the same as one
399 * constructed from {@link #PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status)}
400 * with UPLURAL_TYPE_CARDINAL.
401 * @param locale the <code>locale</code> to use to configure the formatter.
402 * @param status output param set to success/failure code on exit, which
403 * must not indicate a failure before the function call.
404 * @deprecated ICU 50 This method clears the pattern and might create
405 * a different kind of PluralRules instance;
406 * use one of the constructors to create a new instance instead.
407 */
408 void setLocale(const Locale& locale, UErrorCode& status);
409 #endif /* U_HIDE_DEPRECATED_API */
410
411 /**
412 * Sets the number format used by this formatter. You only need to
413 * call this if you want a different number format than the default
414 * formatter for the locale.
415 * @param format the number format to use.
416 * @param status output param set to success/failure code on exit, which
417 * must not indicate a failure before the function call.
418 * @stable ICU 4.0
419 */
420 void setNumberFormat(const NumberFormat* format, UErrorCode& status);
421
422 /**
423 * Assignment operator
424 *
425 * @param other the PluralFormat object to copy from.
426 * @stable ICU 4.0
427 */
428 PluralFormat& operator=(const PluralFormat& other);
429
430 /**
431 * Return true if another object is semantically equal to this one.
432 *
433 * @param other the PluralFormat object to be compared with.
434 * @return true if other is semantically equal to this.
435 * @stable ICU 4.0
436 */
437 virtual UBool operator==(const Format& other) const;
438
439 /**
440 * Return true if another object is semantically unequal to this one.
441 *
442 * @param other the PluralFormat object to be compared with.
443 * @return true if other is semantically unequal to this.
444 * @stable ICU 4.0
445 */
446 virtual UBool operator!=(const Format& other) const;
447
448 /**
449 * Clones this Format object polymorphically. The caller owns the
450 * result and should delete it when done.
451 * @stable ICU 4.0
452 */
453 virtual PluralFormat* clone() const;
454
455 /**
456 * Formats a plural message for a number taken from a Formattable object.
457 *
458 * @param obj The object containing a number for which the
459 * plural message should be formatted.
460 * The object must be of a numeric type.
461 * @param appendTo output parameter to receive result.
462 * Result is appended to existing contents.
463 * @param pos On input: an alignment field, if desired.
464 * On output: the offsets of the alignment field.
465 * @param status output param filled with success/failure status.
466 * @return Reference to 'appendTo' parameter.
467 * @stable ICU 4.0
468 */
469 UnicodeString& format(const Formattable& obj,
470 UnicodeString& appendTo,
471 FieldPosition& pos,
472 UErrorCode& status) const;
473
474 /**
475 * Returns the pattern from applyPattern() or constructor().
476 *
477 * @param appendTo output parameter to receive result.
478 * Result is appended to existing contents.
479 * @return the UnicodeString with inserted pattern.
480 * @stable ICU 4.0
481 */
482 UnicodeString& toPattern(UnicodeString& appendTo);
483
484 /**
485 * This method is not yet supported by <code>PluralFormat</code>.
486 * <P>
487 * Before calling, set parse_pos.index to the offset you want to start
488 * parsing at in the source. After calling, parse_pos.index is the end of
489 * the text you parsed. If error occurs, index is unchanged.
490 * <P>
491 * When parsing, leading whitespace is discarded (with a successful parse),
492 * while trailing whitespace is left as is.
493 * <P>
494 * See Format::parseObject() for more.
495 *
496 * @param source The string to be parsed into an object.
497 * @param result Formattable to be set to the parse result.
498 * If parse fails, return contents are undefined.
499 * @param parse_pos The position to start parsing at. Upon return
500 * this param is set to the position after the
501 * last character successfully parsed. If the
502 * source is not parsed successfully, this param
503 * will remain unchanged.
504 * @stable ICU 4.0
505 */
506 virtual void parseObject(const UnicodeString& source,
507 Formattable& result,
508 ParsePosition& parse_pos) const;
509
510 /**
511 * ICU "poor man's RTTI", returns a UClassID for this class.
512 *
513 * @stable ICU 4.0
514 *
515 */
516 static UClassID U_EXPORT2 getStaticClassID(void);
517
518 /**
519 * ICU "poor man's RTTI", returns a UClassID for the actual class.
520 *
521 * @stable ICU 4.0
522 */
523 virtual UClassID getDynamicClassID() const;
524
525 private:
526 /**
527 * @internal (private)
528 */
529 class U_I18N_API PluralSelector : public UMemory {
530 public:
531 virtual ~PluralSelector();
532 /**
533 * Given a number, returns the appropriate PluralFormat keyword.
534 *
535 * @param context worker object for the selector.
536 * @param number The number to be plural-formatted.
537 * @param ec Error code.
538 * @return The selected PluralFormat keyword.
539 * @internal (private)
540 */
541 virtual UnicodeString select(void *context, double number, UErrorCode& ec) const = 0;
542 };
543
544 class U_I18N_API PluralSelectorAdapter : public PluralSelector {
545 public:
546 PluralSelectorAdapter() : pluralRules(NULL) {
547 }
548
549 virtual ~PluralSelectorAdapter();
550
551 virtual UnicodeString select(void *context, double number, UErrorCode& /*ec*/) const;
552
553 void reset();
554
555 PluralRules* pluralRules;
556 };
557
558 Locale locale;
559 MessagePattern msgPattern;
560 NumberFormat* numberFormat;
561 double offset;
562 PluralSelectorAdapter pluralRulesWrapper;
563
564 PluralFormat(); // default constructor not implemented
565 void init(const PluralRules* rules, UPluralType type, UErrorCode& status);
566 /**
567 * Copies dynamically allocated values (pointer fields).
568 * Others are copied using their copy constructors and assignment operators.
569 */
570 void copyObjects(const PluralFormat& other);
571
572 UnicodeString& format(const Formattable& numberObject, double number,
573 UnicodeString& appendTo,
574 FieldPosition& pos,
575 UErrorCode& status) const;
576
577 /**
578 * Finds the PluralFormat sub-message for the given number, or the "other" sub-message.
579 * @param pattern A MessagePattern.
580 * @param partIndex the index of the first PluralFormat argument style part.
581 * @param selector the PluralSelector for mapping the number (minus offset) to a keyword.
582 * @param context worker object for the selector.
583 * @param number a number to be matched to one of the PluralFormat argument's explicit values,
584 * or mapped via the PluralSelector.
585 * @param ec ICU error code.
586 * @return the sub-message start part index.
587 */
588 static int32_t findSubMessage(
589 const MessagePattern& pattern, int32_t partIndex,
590 const PluralSelector& selector, void *context, double number, UErrorCode& ec); /**< @internal */
591
592 void parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner,
593 Formattable& result, FieldPosition& pos) const;
594
595 friend class MessageFormat;
596 friend class NFRule;
597 };
598
599 U_NAMESPACE_END
600
601 #endif /* #if !UCONFIG_NO_FORMATTING */
602
603 #endif /* U_SHOW_CPLUSPLUS_API */
604
605 #endif // _PLURFMT
606 //eof