]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/messagepattern.h
ICU-491.11.3.tar.gz
[apple/icu.git] / icuSources / common / unicode / messagepattern.h
CommitLineData
4388f060
A
1/*
2*******************************************************************************
3* Copyright (C) 2011-2012, International Business Machines
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* file name: messagepattern.h
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* created on: 2011mar14
12* created by: Markus W. Scherer
13*/
14
15#ifndef __MESSAGEPATTERN_H__
16#define __MESSAGEPATTERN_H__
17
18/**
19 * \file
20 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
21 */
22
23#include "unicode/utypes.h"
24
25#if !UCONFIG_NO_FORMATTING
26
27#include "unicode/parseerr.h"
28#include "unicode/unistr.h"
29
30/**
31 * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
32 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
33 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
34 * <p>
35 * A pair of adjacent apostrophes always results in a single apostrophe in the output,
36 * even when the pair is between two single, text-quoting apostrophes.
37 * <p>
38 * The following table shows examples of desired MessageFormat.format() output
39 * with the pattern strings that yield that output.
40 * <p>
41 * <table>
42 * <tr>
43 * <th>Desired output</th>
44 * <th>DOUBLE_OPTIONAL</th>
45 * <th>DOUBLE_REQUIRED</th>
46 * </tr>
47 * <tr>
48 * <td>I see {many}</td>
49 * <td>I see '{many}'</td>
50 * <td>(same)</td>
51 * </tr>
52 * <tr>
53 * <td>I said {'Wow!'}</td>
54 * <td>I said '{''Wow!''}'</td>
55 * <td>(same)</td>
56 * </tr>
57 * <tr>
58 * <td>I don't know</td>
59 * <td>I don't know OR<br> I don''t know</td>
60 * <td>I don''t know</td>
61 * </tr>
62 * </table>
63 * @stable ICU 4.8
64 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
65 */
66enum UMessagePatternApostropheMode {
67 /**
68 * A literal apostrophe is represented by
69 * either a single or a double apostrophe pattern character.
70 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
71 * if it immediately precedes a curly brace {},
72 * or a pipe symbol | if inside a choice format,
73 * or a pound symbol # if inside a plural format.
74 * <p>
75 * This is the default behavior starting with ICU 4.8.
76 * @stable ICU 4.8
77 */
78 UMSGPAT_APOS_DOUBLE_OPTIONAL,
79 /**
80 * A literal apostrophe must be represented by
81 * a double apostrophe pattern character.
82 * A single apostrophe always starts quoted literal text.
83 * <p>
84 * This is the behavior of ICU 4.6 and earlier, and of the JDK.
85 * @stable ICU 4.8
86 */
87 UMSGPAT_APOS_DOUBLE_REQUIRED
88};
89/**
90 * @stable ICU 4.8
91 */
92typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
93
94/**
95 * MessagePattern::Part type constants.
96 * @stable ICU 4.8
97 */
98enum UMessagePatternPartType {
99 /**
100 * Start of a message pattern (main or nested).
101 * The length is 0 for the top-level message
102 * and for a choice argument sub-message, otherwise 1 for the '{'.
103 * The value indicates the nesting level, starting with 0 for the main message.
104 * <p>
105 * There is always a later MSG_LIMIT part.
106 * @stable ICU 4.8
107 */
108 UMSGPAT_PART_TYPE_MSG_START,
109 /**
110 * End of a message pattern (main or nested).
111 * The length is 0 for the top-level message and
112 * the last sub-message of a choice argument,
113 * otherwise 1 for the '}' or (in a choice argument style) the '|'.
114 * The value indicates the nesting level, starting with 0 for the main message.
115 * @stable ICU 4.8
116 */
117 UMSGPAT_PART_TYPE_MSG_LIMIT,
118 /**
119 * Indicates a substring of the pattern string which is to be skipped when formatting.
120 * For example, an apostrophe that begins or ends quoted text
121 * would be indicated with such a part.
122 * The value is undefined and currently always 0.
123 * @stable ICU 4.8
124 */
125 UMSGPAT_PART_TYPE_SKIP_SYNTAX,
126 /**
127 * Indicates that a syntax character needs to be inserted for auto-quoting.
128 * The length is 0.
129 * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
130 * @stable ICU 4.8
131 */
132 UMSGPAT_PART_TYPE_INSERT_CHAR,
133 /**
134 * Indicates a syntactic (non-escaped) # symbol in a plural variant.
135 * When formatting, replace this part's substring with the
136 * (value-offset) for the plural argument value.
137 * The value is undefined and currently always 0.
138 * @stable ICU 4.8
139 */
140 UMSGPAT_PART_TYPE_REPLACE_NUMBER,
141 /**
142 * Start of an argument.
143 * The length is 1 for the '{'.
144 * The value is the ordinal value of the ArgType. Use getArgType().
145 * <p>
146 * This part is followed by either an ARG_NUMBER or ARG_NAME,
147 * followed by optional argument sub-parts (see UMessagePatternArgType constants)
148 * and finally an ARG_LIMIT part.
149 * @stable ICU 4.8
150 */
151 UMSGPAT_PART_TYPE_ARG_START,
152 /**
153 * End of an argument.
154 * The length is 1 for the '}'.
155 * The value is the ordinal value of the ArgType. Use getArgType().
156 * @stable ICU 4.8
157 */
158 UMSGPAT_PART_TYPE_ARG_LIMIT,
159 /**
160 * The argument number, provided by the value.
161 * @stable ICU 4.8
162 */
163 UMSGPAT_PART_TYPE_ARG_NUMBER,
164 /**
165 * The argument name.
166 * The value is undefined and currently always 0.
167 * @stable ICU 4.8
168 */
169 UMSGPAT_PART_TYPE_ARG_NAME,
170 /**
171 * The argument type.
172 * The value is undefined and currently always 0.
173 * @stable ICU 4.8
174 */
175 UMSGPAT_PART_TYPE_ARG_TYPE,
176 /**
177 * The argument style text.
178 * The value is undefined and currently always 0.
179 * @stable ICU 4.8
180 */
181 UMSGPAT_PART_TYPE_ARG_STYLE,
182 /**
183 * A selector substring in a "complex" argument style.
184 * The value is undefined and currently always 0.
185 * @stable ICU 4.8
186 */
187 UMSGPAT_PART_TYPE_ARG_SELECTOR,
188 /**
189 * An integer value, for example the offset or an explicit selector value
190 * in a PluralFormat style.
191 * The part value is the integer value.
192 * @stable ICU 4.8
193 */
194 UMSGPAT_PART_TYPE_ARG_INT,
195 /**
196 * A numeric value, for example the offset or an explicit selector value
197 * in a PluralFormat style.
198 * The part value is an index into an internal array of numeric values;
199 * use getNumericValue().
200 * @stable ICU 4.8
201 */
202 UMSGPAT_PART_TYPE_ARG_DOUBLE
203};
204/**
205 * @stable ICU 4.8
206 */
207typedef enum UMessagePatternPartType UMessagePatternPartType;
208
209/**
210 * Argument type constants.
211 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
212 *
213 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
214 * with a nesting level one greater than the surrounding message.
215 * @stable ICU 4.8
216 */
217enum UMessagePatternArgType {
218 /**
219 * The argument has no specified type.
220 * @stable ICU 4.8
221 */
222 UMSGPAT_ARG_TYPE_NONE,
223 /**
224 * The argument has a "simple" type which is provided by the ARG_TYPE part.
225 * An ARG_STYLE part might follow that.
226 * @stable ICU 4.8
227 */
228 UMSGPAT_ARG_TYPE_SIMPLE,
229 /**
230 * The argument is a ChoiceFormat with one or more
231 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
232 * @stable ICU 4.8
233 */
234 UMSGPAT_ARG_TYPE_CHOICE,
235 /**
236 * The argument is a PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
237 * (e.g., offset:1)
238 * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
239 * If the selector has an explicit value (e.g., =2), then
240 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
241 * Otherwise the message immediately follows the ARG_SELECTOR.
242 * @stable ICU 4.8
243 */
244 UMSGPAT_ARG_TYPE_PLURAL,
245 /**
246 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
247 * @stable ICU 4.8
248 */
249 UMSGPAT_ARG_TYPE_SELECT
250};
251/**
252 * @stable ICU 4.8
253 */
254typedef enum UMessagePatternArgType UMessagePatternArgType;
255
256enum {
257 /**
258 * Return value from MessagePattern.validateArgumentName() for when
259 * the string is a valid "pattern identifier" but not a number.
260 * @stable ICU 4.8
261 */
262 UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
263
264 /**
265 * Return value from MessagePattern.validateArgumentName() for when
266 * the string is invalid.
267 * It might not be a valid "pattern identifier",
268 * or it have only ASCII digits but there is a leading zero or the number is too large.
269 * @stable ICU 4.8
270 */
271 UMSGPAT_ARG_NAME_NOT_VALID=-2
272};
273
274/**
275 * Special value that is returned by getNumericValue(Part) when no
276 * numeric value is defined for a part.
277 * @see MessagePattern.getNumericValue()
278 * @stable ICU 4.8
279 */
280#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
281
282U_NAMESPACE_BEGIN
283
284class MessagePatternDoubleList;
285class MessagePatternPartsList;
286
287/**
288 * Parses and represents ICU MessageFormat patterns.
289 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
290 * Used in the implementations of those classes as well as in tools
291 * for message validation, translation and format conversion.
292 * <p>
293 * The parser handles all syntax relevant for identifying message arguments.
294 * This includes "complex" arguments whose style strings contain
295 * nested MessageFormat pattern substrings.
296 * For "simple" arguments (with no nested MessageFormat pattern substrings),
297 * the argument style is not parsed any further.
298 * <p>
299 * The parser handles named and numbered message arguments and allows both in one message.
300 * <p>
301 * Once a pattern has been parsed successfully, iterate through the parsed data
302 * with countParts(), getPart() and related methods.
303 * <p>
304 * The data logically represents a parse tree, but is stored and accessed
305 * as a list of "parts" for fast and simple parsing and to minimize object allocations.
306 * Arguments and nested messages are best handled via recursion.
307 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
308 * the index of the corresponding _LIMIT "part".
309 * <p>
310 * List of "parts":
311 * <pre>
312 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
313 * argument = noneArg | simpleArg | complexArg
314 * complexArg = choiceArg | pluralArg | selectArg
315 *
316 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
317 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
318 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
319 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
320 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
321 *
322 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
323 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
324 * selectStyle = (ARG_SELECTOR message)+
325 * </pre>
326 * <ul>
327 * <li>Literal output text is not represented directly by "parts" but accessed
328 * between parts of a message, from one part's getLimit() to the next part's getIndex().
329 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
330 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
331 * the less-than-or-equal-to sign (U+2264).
332 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
333 * The optional numeric Part between each (ARG_SELECTOR, message) pair
334 * is the value of an explicit-number selector like "=2",
335 * otherwise the selector is a non-numeric identifier.
336 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
337 * </ul>
338 * <p>
339 * This class is not intended for public subclassing.
340 *
341 * @stable ICU 4.8
342 */
343class U_COMMON_API MessagePattern : public UObject {
344public:
345 /**
346 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
347 * @param errorCode Standard ICU error code. Its input value must
348 * pass the U_SUCCESS() test, or else the function returns
349 * immediately. Check for U_FAILURE() on output or use with
350 * function chaining. (See User Guide for details.)
351 * @stable ICU 4.8
352 */
353 MessagePattern(UErrorCode &errorCode);
354
355 /**
356 * Constructs an empty MessagePattern.
357 * @param mode Explicit UMessagePatternApostropheMode.
358 * @param errorCode Standard ICU error code. Its input value must
359 * pass the U_SUCCESS() test, or else the function returns
360 * immediately. Check for U_FAILURE() on output or use with
361 * function chaining. (See User Guide for details.)
362 * @stable ICU 4.8
363 */
364 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
365
366 /**
367 * Constructs a MessagePattern with default UMessagePatternApostropheMode and
368 * parses the MessageFormat pattern string.
369 * @param pattern a MessageFormat pattern string
370 * @param parseError Struct to receive information on the position
371 * of an error within the pattern.
372 * Can be NULL.
373 * @param errorCode Standard ICU error code. Its input value must
374 * pass the U_SUCCESS() test, or else the function returns
375 * immediately. Check for U_FAILURE() on output or use with
376 * function chaining. (See User Guide for details.)
377 * TODO: turn @throws into UErrorCode specifics?
378 * @throws IllegalArgumentException for syntax errors in the pattern string
379 * @throws IndexOutOfBoundsException if certain limits are exceeded
380 * (e.g., argument number too high, argument name too long, etc.)
381 * @throws NumberFormatException if a number could not be parsed
382 * @stable ICU 4.8
383 */
384 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
385
386 /**
387 * Copy constructor.
388 * @param other Object to copy.
389 * @stable ICU 4.8
390 */
391 MessagePattern(const MessagePattern &other);
392
393 /**
394 * Assignment operator.
395 * @param other Object to copy.
396 * @return *this=other
397 * @stable ICU 4.8
398 */
399 MessagePattern &operator=(const MessagePattern &other);
400
401 /**
402 * Destructor.
403 * @stable ICU 4.8
404 */
405 virtual ~MessagePattern();
406
407 /**
408 * Parses a MessageFormat pattern string.
409 * @param pattern a MessageFormat pattern string
410 * @param parseError Struct to receive information on the position
411 * of an error within the pattern.
412 * Can be NULL.
413 * @param errorCode Standard ICU error code. Its input value must
414 * pass the U_SUCCESS() test, or else the function returns
415 * immediately. Check for U_FAILURE() on output or use with
416 * function chaining. (See User Guide for details.)
417 * @return *this
418 * @throws IllegalArgumentException for syntax errors in the pattern string
419 * @throws IndexOutOfBoundsException if certain limits are exceeded
420 * (e.g., argument number too high, argument name too long, etc.)
421 * @throws NumberFormatException if a number could not be parsed
422 * @stable ICU 4.8
423 */
424 MessagePattern &parse(const UnicodeString &pattern,
425 UParseError *parseError, UErrorCode &errorCode);
426
427 /**
428 * Parses a ChoiceFormat pattern string.
429 * @param pattern a ChoiceFormat pattern string
430 * @param parseError Struct to receive information on the position
431 * of an error within the pattern.
432 * Can be NULL.
433 * @param errorCode Standard ICU error code. Its input value must
434 * pass the U_SUCCESS() test, or else the function returns
435 * immediately. Check for U_FAILURE() on output or use with
436 * function chaining. (See User Guide for details.)
437 * @return *this
438 * @throws IllegalArgumentException for syntax errors in the pattern string
439 * @throws IndexOutOfBoundsException if certain limits are exceeded
440 * (e.g., argument number too high, argument name too long, etc.)
441 * @throws NumberFormatException if a number could not be parsed
442 * @stable ICU 4.8
443 */
444 MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
445 UParseError *parseError, UErrorCode &errorCode);
446
447 /**
448 * Parses a PluralFormat pattern string.
449 * @param pattern a PluralFormat pattern string
450 * @param parseError Struct to receive information on the position
451 * of an error within the pattern.
452 * Can be NULL.
453 * @param errorCode Standard ICU error code. Its input value must
454 * pass the U_SUCCESS() test, or else the function returns
455 * immediately. Check for U_FAILURE() on output or use with
456 * function chaining. (See User Guide for details.)
457 * @return *this
458 * @throws IllegalArgumentException for syntax errors in the pattern string
459 * @throws IndexOutOfBoundsException if certain limits are exceeded
460 * (e.g., argument number too high, argument name too long, etc.)
461 * @throws NumberFormatException if a number could not be parsed
462 * @stable ICU 4.8
463 */
464 MessagePattern &parsePluralStyle(const UnicodeString &pattern,
465 UParseError *parseError, UErrorCode &errorCode);
466
467 /**
468 * Parses a SelectFormat pattern string.
469 * @param pattern a SelectFormat pattern string
470 * @param parseError Struct to receive information on the position
471 * of an error within the pattern.
472 * Can be NULL.
473 * @param errorCode Standard ICU error code. Its input value must
474 * pass the U_SUCCESS() test, or else the function returns
475 * immediately. Check for U_FAILURE() on output or use with
476 * function chaining. (See User Guide for details.)
477 * @return *this
478 * @throws IllegalArgumentException for syntax errors in the pattern string
479 * @throws IndexOutOfBoundsException if certain limits are exceeded
480 * (e.g., argument number too high, argument name too long, etc.)
481 * @throws NumberFormatException if a number could not be parsed
482 * @stable ICU 4.8
483 */
484 MessagePattern &parseSelectStyle(const UnicodeString &pattern,
485 UParseError *parseError, UErrorCode &errorCode);
486
487 /**
488 * Clears this MessagePattern.
489 * countParts() will return 0.
490 * @stable ICU 4.8
491 */
492 void clear();
493
494 /**
495 * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
496 * countParts() will return 0.
497 * @param mode The new UMessagePatternApostropheMode.
498 * @stable ICU 4.8
499 */
500 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
501 clear();
502 aposMode=mode;
503 }
504
505 /**
506 * @param other another object to compare with.
507 * @return TRUE if this object is equivalent to the other one.
508 * @stable ICU 4.8
509 */
510 UBool operator==(const MessagePattern &other) const;
511
512 /**
513 * @param other another object to compare with.
514 * @return FALSE if this object is equivalent to the other one.
515 * @stable ICU 4.8
516 */
517 inline UBool operator!=(const MessagePattern &other) const {
518 return !operator==(other);
519 }
520
521 /**
522 * @return A hash code for this object.
523 * @stable ICU 4.8
524 */
525 int32_t hashCode() const;
526
527 /**
528 * @return this instance's UMessagePatternApostropheMode.
529 * @stable ICU 4.8
530 */
531 UMessagePatternApostropheMode getApostropheMode() const {
532 return aposMode;
533 }
534
535 // Java has package-private jdkAposMode() here.
536 // In C++, this is declared in the MessageImpl class.
537
538 /**
539 * @return the parsed pattern string (null if none was parsed).
540 * @stable ICU 4.8
541 */
542 const UnicodeString &getPatternString() const {
543 return msg;
544 }
545
546 /**
547 * Does the parsed pattern have named arguments like {first_name}?
548 * @return TRUE if the parsed pattern has at least one named argument.
549 * @stable ICU 4.8
550 */
551 UBool hasNamedArguments() const {
552 return hasArgNames;
553 }
554
555 /**
556 * Does the parsed pattern have numbered arguments like {2}?
557 * @return TRUE if the parsed pattern has at least one numbered argument.
558 * @stable ICU 4.8
559 */
560 UBool hasNumberedArguments() const {
561 return hasArgNumbers;
562 }
563
564 /**
565 * Validates and parses an argument name or argument number string.
566 * An argument name must be a "pattern identifier", that is, it must contain
567 * no Unicode Pattern_Syntax or Pattern_White_Space characters.
568 * If it only contains ASCII digits, then it must be a small integer with no leading zero.
569 * @param name Input string.
570 * @return &gt;=0 if the name is a valid number,
571 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
572 * ARG_NAME_NOT_VALID (-2) if it is neither.
573 * @stable ICU 4.8
574 */
575 static int32_t validateArgumentName(const UnicodeString &name);
576
577 /**
578 * Returns a version of the parsed pattern string where each ASCII apostrophe
579 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
580 * <p>
581 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
582 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
583 * @return the deep-auto-quoted version of the parsed pattern string.
584 * @see MessageFormat.autoQuoteApostrophe()
585 * @stable ICU 4.8
586 */
587 UnicodeString autoQuoteApostropheDeep() const;
588
589 class Part;
590
591 /**
592 * Returns the number of "parts" created by parsing the pattern string.
593 * Returns 0 if no pattern has been parsed or clear() was called.
594 * @return the number of pattern parts.
595 * @stable ICU 4.8
596 */
597 int32_t countParts() const {
598 return partsLength;
599 }
600
601 /**
602 * Gets the i-th pattern "part".
603 * @param i The index of the Part data. (0..countParts()-1)
604 * @return the i-th pattern "part".
605 * @stable ICU 4.8
606 */
607 const Part &getPart(int32_t i) const {
608 return parts[i];
609 }
610
611 /**
612 * Returns the UMessagePatternPartType of the i-th pattern "part".
613 * Convenience method for getPart(i).getType().
614 * @param i The index of the Part data. (0..countParts()-1)
615 * @return The UMessagePatternPartType of the i-th Part.
616 * @stable ICU 4.8
617 */
618 UMessagePatternPartType getPartType(int32_t i) const {
619 return getPart(i).type;
620 }
621
622 /**
623 * Returns the pattern index of the specified pattern "part".
624 * Convenience method for getPart(partIndex).getIndex().
625 * @param partIndex The index of the Part data. (0..countParts()-1)
626 * @return The pattern index of this Part.
627 * @stable ICU 4.8
628 */
629 int32_t getPatternIndex(int32_t partIndex) const {
630 return getPart(partIndex).index;
631 }
632
633 /**
634 * Returns the substring of the pattern string indicated by the Part.
635 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
636 * @param part a part of this MessagePattern.
637 * @return the substring associated with part.
638 * @stable ICU 4.8
639 */
640 UnicodeString getSubstring(const Part &part) const {
641 return msg.tempSubString(part.index, part.length);
642 }
643
644 /**
645 * Compares the part's substring with the input string s.
646 * @param part a part of this MessagePattern.
647 * @param s a string.
648 * @return TRUE if getSubstring(part).equals(s).
649 * @stable ICU 4.8
650 */
651 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
652 return 0==msg.compare(part.index, part.length, s);
653 }
654
655 /**
656 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
657 * @param part a part of this MessagePattern.
658 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
659 * @stable ICU 4.8
660 */
661 double getNumericValue(const Part &part) const;
662
663 /**
664 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
665 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
666 * @return the "offset:" value.
667 * @stable ICU 4.8
668 */
669 double getPluralOffset(int32_t pluralStart) const;
670
671 /**
672 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
673 * @param start The index of some Part data (0..countParts()-1);
674 * this Part should be of Type ARG_START or MSG_START.
675 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
676 * or start itself if getPartType(msgStart)!=ARG|MSG_START.
677 * @stable ICU 4.8
678 */
679 int32_t getLimitPartIndex(int32_t start) const {
680 int32_t limit=getPart(start).limitPartIndex;
681 if(limit<start) {
682 return start;
683 }
684 return limit;
685 }
686
687 /**
688 * A message pattern "part", representing a pattern parsing event.
689 * There is a part for the start and end of a message or argument,
690 * for quoting and escaping of and with ASCII apostrophes,
691 * and for syntax elements of "complex" arguments.
692 * @stable ICU 4.8
693 */
694 class Part : public UMemory {
695 public:
696 /**
697 * Default constructor, do not use.
698 * @internal
699 */
700 Part() {}
701
702 /**
703 * Returns the type of this part.
704 * @return the part type.
705 * @stable ICU 4.8
706 */
707 UMessagePatternPartType getType() const {
708 return type;
709 }
710
711 /**
712 * Returns the pattern string index associated with this Part.
713 * @return this part's pattern string index.
714 * @stable ICU 4.8
715 */
716 int32_t getIndex() const {
717 return index;
718 }
719
720 /**
721 * Returns the length of the pattern substring associated with this Part.
722 * This is 0 for some parts.
723 * @return this part's pattern substring length.
724 * @stable ICU 4.8
725 */
726 int32_t getLength() const {
727 return length;
728 }
729
730 /**
731 * Returns the pattern string limit (exclusive-end) index associated with this Part.
732 * Convenience method for getIndex()+getLength().
733 * @return this part's pattern string limit index, same as getIndex()+getLength().
734 * @stable ICU 4.8
735 */
736 int32_t getLimit() const {
737 return index+length;
738 }
739
740 /**
741 * Returns a value associated with this part.
742 * See the documentation of each part type for details.
743 * @return the part value.
744 * @stable ICU 4.8
745 */
746 int32_t getValue() const {
747 return value;
748 }
749
750 /**
751 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
752 * otherwise UMSGPAT_ARG_TYPE_NONE.
753 * @return the argument type for this part.
754 * @stable ICU 4.8
755 */
756 UMessagePatternArgType getArgType() const {
757 UMessagePatternPartType type=getType();
758 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
759 return (UMessagePatternArgType)value;
760 } else {
761 return UMSGPAT_ARG_TYPE_NONE;
762 }
763 }
764
765 /**
766 * Indicates whether the Part type has a numeric value.
767 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
768 * @param type The Part type to be tested.
769 * @return TRUE if the Part type has a numeric value.
770 * @stable ICU 4.8
771 */
772 static UBool hasNumericValue(UMessagePatternPartType type) {
773 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
774 }
775
776 /**
777 * @param other another object to compare with.
778 * @return TRUE if this object is equivalent to the other one.
779 * @stable ICU 4.8
780 */
781 UBool operator==(const Part &other) const;
782
783 /**
784 * @param other another object to compare with.
785 * @return FALSE if this object is equivalent to the other one.
786 * @stable ICU 4.8
787 */
788 inline UBool operator!=(const Part &other) const {
789 return !operator==(other);
790 }
791
792 /**
793 * @return A hash code for this object.
794 * @stable ICU 4.8
795 */
796 int32_t hashCode() const {
797 return ((type*37+index)*37+length)*37+value;
798 }
799
800 private:
801 friend class MessagePattern;
802
803 static const int32_t MAX_LENGTH=0xffff;
804 static const int32_t MAX_VALUE=0x7fff;
805
806 // Some fields are not final because they are modified during pattern parsing.
807 // After pattern parsing, the parts are effectively immutable.
808 UMessagePatternPartType type;
809 int32_t index;
810 uint16_t length;
811 int16_t value;
812 int32_t limitPartIndex;
813 };
814
815private:
816 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
817
818 void postParse();
819
820 int32_t parseMessage(int32_t index, int32_t msgStartLength,
821 int32_t nestingLevel, UMessagePatternArgType parentType,
822 UParseError *parseError, UErrorCode &errorCode);
823
824 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
825 UParseError *parseError, UErrorCode &errorCode);
826
827 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
828
829 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
830 UParseError *parseError, UErrorCode &errorCode);
831
832 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
833 UParseError *parseError, UErrorCode &errorCode);
834
835 /**
836 * Validates and parses an argument name or argument number string.
837 * This internal method assumes that the input substring is a "pattern identifier".
838 * @return &gt;=0 if the name is a valid number,
839 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
840 * ARG_NAME_NOT_VALID (-2) if it is neither.
841 * @see #validateArgumentName(String)
842 */
843 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
844
845 int32_t parseArgNumber(int32_t start, int32_t limit) {
846 return parseArgNumber(msg, start, limit);
847 }
848
849 /**
850 * Parses a number from the specified message substring.
851 * @param start start index into the message string
852 * @param limit limit index into the message string, must be start<limit
853 * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
854 * @param parseError
855 * @param errorCode
856 */
857 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
858 UParseError *parseError, UErrorCode &errorCode);
859
860 // Java has package-private appendReducedApostrophes() here.
861 // In C++, this is declared in the MessageImpl class.
862
863 int32_t skipWhiteSpace(int32_t index);
864
865 int32_t skipIdentifier(int32_t index);
866
867 /**
868 * Skips a sequence of characters that could occur in a double value.
869 * Does not fully parse or validate the value.
870 */
871 int32_t skipDouble(int32_t index);
872
873 static UBool isArgTypeChar(UChar32 c);
874
875 UBool isChoice(int32_t index);
876
877 UBool isPlural(int32_t index);
878
879 UBool isSelect(int32_t index);
880
881 /**
882 * @return TRUE if we are inside a MessageFormat (sub-)pattern,
883 * as opposed to inside a top-level choice/plural/select pattern.
884 */
885 UBool inMessageFormatPattern(int32_t nestingLevel);
886
887 /**
888 * @return TRUE if we are in a MessageFormat sub-pattern
889 * of a top-level ChoiceFormat pattern.
890 */
891 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
892
893 void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
894 int32_t value, UErrorCode &errorCode);
895
896 void addLimitPart(int32_t start,
897 UMessagePatternPartType type, int32_t index, int32_t length,
898 int32_t value, UErrorCode &errorCode);
899
900 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
901
902 void setParseError(UParseError *parseError, int32_t index);
903
904 // No ICU "poor man's RTTI" for this class nor its subclasses.
905 virtual UClassID getDynamicClassID() const;
906
907 UBool init(UErrorCode &errorCode);
908 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
909
910 UMessagePatternApostropheMode aposMode;
911 UnicodeString msg;
912 // ArrayList<Part> parts=new ArrayList<Part>();
913 MessagePatternPartsList *partsList;
914 Part *parts;
915 int32_t partsLength;
916 // ArrayList<Double> numericValues;
917 MessagePatternDoubleList *numericValuesList;
918 double *numericValues;
919 int32_t numericValuesLength;
920 UBool hasArgNames;
921 UBool hasArgNumbers;
922 UBool needsAutoQuoting;
923};
924
925U_NAMESPACE_END
926
927#endif // !UCONFIG_NO_FORMATTING
928
929#endif // __MESSAGEPATTERN_H__