+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
-* Copyright (C) 2007-2008, International Business Machines Corporation and others. All Rights Reserved.
+* Copyright (C) 2007-2013, International Business Machines Corporation and
+* others. All Rights Reserved.
********************************************************************************
*
* File MSGFMT.H
* 03/20/97 helena Finished first cut of implementation.
* 07/22/98 stephen Removed operator!= (defined in Format)
* 08/19/2002 srl Removing Javaisms
-********************************************************************************
-*/
+*******************************************************************************/
#ifndef MSGFMT_H
#define MSGFMT_H
#include "unicode/utypes.h"
/**
- * \file
+ * \file
* \brief C++ API: Formats messages in a language-neutral way.
*/
-
+
#if !UCONFIG_NO_FORMATTING
#include "unicode/format.h"
#include "unicode/locid.h"
+#include "unicode/messagepattern.h"
#include "unicode/parseerr.h"
-#include "unicode/uchar.h"
+#include "unicode/plurfmt.h"
+#include "unicode/plurrule.h"
+U_CDECL_BEGIN
+// Forward declaration.
+struct UHashtable;
+typedef struct UHashtable UHashtable; /**< @internal */
+U_CDECL_END
+
+#if U_SHOW_CPLUSPLUS_API
U_NAMESPACE_BEGIN
-class NumberFormat;
+class AppendableWrapper;
class DateFormat;
+class NumberFormat;
/**
+ * <p>MessageFormat prepares strings for display to users,
+ * with optional arguments (variables/placeholders).
+ * The arguments can occur in any order, which is necessary for translation
+ * into languages with different grammars.
*
- * A MessageFormat produces concatenated messages in a
- * language-neutral way. It should be used for all string
- * concatenations that are visible to end users.
- * <P>
- * A MessageFormat contains an array of <EM>subformats</EM> arranged
- * within a <EM>template string</EM>. Together, the subformats and
- * template string determine how the MessageFormat will operate during
- * formatting and parsing.
- * <P>
- * Typically, both the subformats and the template string are
- * specified at once in a <EM>pattern</EM>. By using different
- * patterns for different locales, messages may be localized.
- * <P>
- * During formatting, the MessageFormat takes an array of arguments
- * and produces a user-readable string. Each argument is a
- * Formattable object; they may be passed in in an array, or as a
- * single Formattable object which itself contains an array. Each
- * argument is matched up with its corresponding subformat, which then
- * formats it into a string. The resultant strings are then assembled
- * within the string template of the MessageFormat to produce the
- * final output string.
- * <p>
- * <strong>Note:</strong>
- * In ICU 4.0 MessageFormat supports named arguments. If a named argument
- * is used, all arguments must be named. Names start with a character in
- * <code>UCHAR_ID_START</code> and continue with characters in
- * <code>UCHARID_CONTINUE</code>, in particular they do not start with a digit.
- * If named arguments are used, {@link #usesNamedArguments()} will return true.
- * <p>
- * The other new methods supporting named arguments are
- * {@link #getFormatNames(UErrorCode& status)},
- * {@link #getFormat(const UnicodeString& formatName, UErrorCode& status)}
- * {@link #setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status)},
- * {@link #adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status)},
- * {@link #format(const Formattable* arguments, const UnicodeString *argumentNames, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection, UErrorCode& success)},
- * {@link #format(const UnicodeString* argumentNames, const Formattable* arguments, int32_t count, UnicodeString& appendTo,UErrorCode& status)}.
- * These methods are all compatible with patterns that do not used named arguments--
- * in these cases the keys in the input or output use <code>UnicodeString</code>s
- * that name the argument indices, e.g. "0", "1", "2"... etc.
- * <p>
- * When named arguments are used, certain methods on MessageFormat that take or
- * return arrays do not perform any action, since it is not possible to
- * identify positions in an array using a name. UErrorCode is set to
- * U_ARGUMENT_TYPE_MISMATCH if there is a status/success field in the method.
- * These methods are
- * {@link #adoptFormats(Format** newFormats, int32_t count)},
- * {@link #setFormats(const Format** newFormats,int32_t count)},
- * {@link #adoptFormat(int32_t n, Format *newFormat)},
- * {@link #getFormats(int32_t& cnt)},
- * {@link #format(const Formattable* source,int32_t cnt,UnicodeString& appendTo, FieldPosition& ignore, UErrorCode& success)},
- * {@link #format(const UnicodeString& pattern,const Formattable* arguments,int32_t cnt,UnicodeString& appendTo,UErrorCode& success)},
- * {@link #format(const Formattable& source, UnicodeString& appendTo,FieldPosition& ignore, UErrorCode& success)},
- * {@link #format(const Formattable* arguments, int32_t cnt, UnicodeString& appendTo, FieldPosition& status, int32_t recursionProtection,UErrorCode& success)},
- * {@link #parse(const UnicodeString& source, ParsePosition& pos,int32_t& count)},
- * {@link #parse(const UnicodeString& source, int32_t& cnt, UErrorCode& status)}
+ * <p>A MessageFormat is constructed from a <em>pattern</em> string
+ * with arguments in {curly braces} which will be replaced by formatted values.
+ *
+ * <p><code>MessageFormat</code> differs from the other <code>Format</code>
+ * classes in that you create a <code>MessageFormat</code> object with one
+ * of its constructors (not with a <code>createInstance</code> style factory
+ * method). Factory methods aren't necessary because <code>MessageFormat</code>
+ * itself doesn't implement locale-specific behavior. Any locale-specific
+ * behavior is defined by the pattern that you provide and the
+ * subformats used for inserted arguments.
+ *
+ * <p>Arguments can be named (using identifiers) or numbered (using small ASCII-digit integers).
+ * Some of the API methods work only with argument numbers and throw an exception
+ * if the pattern has named arguments (see {@link #usesNamedArguments()}).
+ *
+ * <p>An argument might not specify any format type. In this case,
+ * a numeric value is formatted with a default (for the locale) NumberFormat,
+ * and a date/time value is formatted with a default (for the locale) DateFormat.
+ *
+ * <p>An argument might specify a "simple" type for which the specified
+ * Format object is created, cached and used.
+ *
+ * <p>An argument might have a "complex" type with nested MessageFormat sub-patterns.
+ * During formatting, one of these sub-messages is selected according to the argument value
+ * and recursively formatted.
+ *
+ * <p>After construction, a custom Format object can be set for
+ * a top-level argument, overriding the default formatting and parsing behavior
+ * for that argument.
+ * However, custom formatting can be achieved more simply by writing
+ * a typeless argument in the pattern string
+ * and supplying it with a preformatted string value.
+ *
+ * <p>When formatting, MessageFormat takes a collection of argument values
+ * and writes an output string.
+ * The argument values may be passed as an array
+ * (when the pattern contains only numbered arguments)
+ * or as an array of names and and an array of arguments (which works for both named
+ * and numbered arguments).
+ *
+ * <p>Each argument is matched with one of the input values by array index or argument name
+ * and formatted according to its pattern specification
+ * (or using a custom Format object if one was set).
+ * A numbered pattern argument is matched with an argument name that contains that number
+ * as an ASCII-decimal-digit string (without leading zero).
+ *
+ * <h4><a name="patterns">Patterns and Their Interpretation</a></h4>
+ *
+ * <code>MessageFormat</code> uses patterns of the following form:
+ * <pre>
+ * message = messageText (argument messageText)*
+ * argument = noneArg | simpleArg | complexArg
+ * complexArg = choiceArg | pluralArg | selectArg | selectordinalArg
+ *
+ * noneArg = '{' argNameOrNumber '}'
+ * simpleArg = '{' argNameOrNumber ',' argType [',' argStyle] '}'
+ * choiceArg = '{' argNameOrNumber ',' "choice" ',' choiceStyle '}'
+ * pluralArg = '{' argNameOrNumber ',' "plural" ',' pluralStyle '}'
+ * selectArg = '{' argNameOrNumber ',' "select" ',' selectStyle '}'
+ * selectordinalArg = '{' argNameOrNumber ',' "selectordinal" ',' pluralStyle '}'
+ *
+ * choiceStyle: see {@link ChoiceFormat}
+ * pluralStyle: see {@link PluralFormat}
+ * selectStyle: see {@link SelectFormat}
+ *
+ * argNameOrNumber = argName | argNumber
+ * argName = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
+ * argNumber = '0' | ('1'..'9' ('0'..'9')*)
+ *
+ * argType = "number" | "date" | "time" | "spellout" | "ordinal" | "duration"
+ * argStyle = "short" | "medium" | "long" | "full" | "integer" | "currency" | "percent" | argStyleText | "::" argSkeletonText
+ * </pre>
+ *
+ * <ul>
+ * <li>messageText can contain quoted literal strings including syntax characters.
+ * A quoted literal string begins with an ASCII apostrophe and a syntax character
+ * (usually a {curly brace}) and continues until the next single apostrophe.
+ * A double ASCII apostrohpe inside or outside of a quoted string represents
+ * one literal apostrophe.
+ * <li>Quotable syntax characters are the {curly braces} in all messageText parts,
+ * plus the '#' sign in a messageText immediately inside a pluralStyle,
+ * and the '|' symbol in a messageText immediately inside a choiceStyle.
+ * <li>See also {@link #UMessagePatternApostropheMode}
+ * <li>In argStyleText, every single ASCII apostrophe begins and ends quoted literal text,
+ * and unquoted {curly braces} must occur in matched pairs.
+ * </ul>
+ *
+ * <p>Recommendation: Use the real apostrophe (single quote) character
+ * \htmlonly’\endhtmlonly (U+2019) for
+ * human-readable text, and use the ASCII apostrophe ' (U+0027)
+ * only in program syntax, like quoting in MessageFormat.
+ * See the annotations for U+0027 Apostrophe in The Unicode Standard.
+ *
+ * <p>The <code>choice</code> argument type is deprecated.
+ * Use <code>plural</code> arguments for proper plural selection,
+ * and <code>select</code> arguments for simple selection among a fixed set of choices.
+ *
+ * <p>The <code>argType</code> and <code>argStyle</code> values are used to create
+ * a <code>Format</code> instance for the format element. The following
+ * table shows how the values map to Format instances. Combinations not
+ * shown in the table are illegal. Any <code>argStyleText</code> must
+ * be a valid pattern string for the Format subclass used.
+ *
+ * <p><table border=1>
+ * <tr>
+ * <th>argType
+ * <th>argStyle
+ * <th>resulting Format object
+ * <tr>
+ * <td colspan=2><i>(none)</i>
+ * <td><code>null</code>
+ * <tr>
+ * <td rowspan=6><code>number</code>
+ * <td><i>(none)</i>
+ * <td><code>NumberFormat.createInstance(getLocale(), status)</code>
+ * <tr>
+ * <td><code>integer</code>
+ * <td><code>NumberFormat.createInstance(getLocale(), kNumberStyle, status)</code>
+ * <tr>
+ * <td><code>currency</code>
+ * <td><code>NumberFormat.createCurrencyInstance(getLocale(), status)</code>
+ * <tr>
+ * <td><code>percent</code>
+ * <td><code>NumberFormat.createPercentInstance(getLocale(), status)</code>
+ * <tr>
+ * <td><i>argStyleText</i>
+ * <td><code>new DecimalFormat(argStyleText, new DecimalFormatSymbols(getLocale(), status), status)</code>
+ * <tr>
+ * <td><i>argSkeletonText</i>
+ * <td><code>NumberFormatter::forSkeleton(argSkeletonText, status).locale(getLocale()).toFormat(status)</code>
+ * <tr>
+ * <td rowspan=6><code>date</code>
+ * <td><i>(none)</i>
+ * <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
+ * <tr>
+ * <td><code>short</code>
+ * <td><code>DateFormat.createDateInstance(kShort, getLocale(), status)</code>
+ * <tr>
+ * <td><code>medium</code>
+ * <td><code>DateFormat.createDateInstance(kDefault, getLocale(), status)</code>
+ * <tr>
+ * <td><code>long</code>
+ * <td><code>DateFormat.createDateInstance(kLong, getLocale(), status)</code>
+ * <tr>
+ * <td><code>full</code>
+ * <td><code>DateFormat.createDateInstance(kFull, getLocale(), status)</code>
+ * <tr>
+ * <td><i>argStyleText</i>
+ * <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)</code>
+ * <tr>
+ * <td><i>argSkeletonText</i>
+ * <td><code>DateFormat::createInstanceForSkeleton(argSkeletonText, getLocale(), status)</code>
+ * <tr>
+ * <td rowspan=6><code>time</code>
+ * <td><i>(none)</i>
+ * <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
+ * <tr>
+ * <td><code>short</code>
+ * <td><code>DateFormat.createTimeInstance(kShort, getLocale(), status)</code>
+ * <tr>
+ * <td><code>medium</code>
+ * <td><code>DateFormat.createTimeInstance(kDefault, getLocale(), status)</code>
+ * <tr>
+ * <td><code>long</code>
+ * <td><code>DateFormat.createTimeInstance(kLong, getLocale(), status)</code>
+ * <tr>
+ * <td><code>full</code>
+ * <td><code>DateFormat.createTimeInstance(kFull, getLocale(), status)</code>
+ * <tr>
+ * <td><i>argStyleText</i>
+ * <td><code>new SimpleDateFormat(argStyleText, getLocale(), status)</code>
+ * <tr>
+ * <td><code>spellout</code>
+ * <td><i>argStyleText (optional)</i>
+ * <td><code>new RuleBasedNumberFormat(URBNF_SPELLOUT, getLocale(), status)
+ * <br/> .setDefaultRuleset(argStyleText, status);</code>
+ * <tr>
+ * <td><code>ordinal</code>
+ * <td><i>argStyleText (optional)</i>
+ * <td><code>new RuleBasedNumberFormat(URBNF_ORDINAL, getLocale(), status)
+ * <br/> .setDefaultRuleset(argStyleText, status);</code>
+ * <tr>
+ * <td><code>duration</code>
+ * <td><i>argStyleText (optional)</i>
+ * <td><code>new RuleBasedNumberFormat(URBNF_DURATION, getLocale(), status)
+ * <br/> .setDefaultRuleset(argStyleText, status);</code>
+ * </table>
* <p>
*
- * <P>
- * During parsing, an input string is matched against the string
- * template of the MessageFormat to produce an array of Formattable
- * objects. Plain text of the template string is matched directly
- * against intput text. At each position in the template string where
- * a subformat is located, the subformat is called to parse the
- * corresponding segment of input text to produce an output argument.
- * In this way, an array of arguments is created which together
- * constitute the parse result.
- * <P>
- * Parsing may fail or produce unexpected results in a number of
- * circumstances.
- * <UL>
- * <LI>If one of the arguments does not occur in the pattern, it
- * will be returned as a default Formattable.
- * <LI>If the format of an argument is loses information, such as with
- * a choice format where a large number formats to "many", then the
- * parse may not correspond to the originally formatted argument.
- * <LI>MessageFormat does not handle ChoiceFormat recursion during
- * parsing; such parses will fail.
- * <LI>Parsing will not always find a match (or the correct match) if
- * some part of the parse is ambiguous. For example, if the pattern
- * "{1},{2}" is used with the string arguments {"a,b", "c"}, it will
- * format as "a,b,c". When the result is parsed, it will return {"a",
- * "b,c"}.
- * <LI>If a single argument is formatted more than once in the string,
- * then the rightmost subformat in the pattern string will produce the
- * parse result; prior subformats with the same argument index will
- * have no effect.
- * </UL>
- * Here are some examples of usage:
- * <P>
+ * <h4>Argument formatting</h4>
+ *
+ * <p>Arguments are formatted according to their type, using the default
+ * ICU formatters for those types, unless otherwise specified.</p>
+ *
+ * <p>There are also several ways to control the formatting.</p>
+ *
+ * <p>We recommend you use default styles, predefined style values, skeletons,
+ * or preformatted values, but not pattern strings or custom format objects.</p>
+ *
+ * <p>For more details, see the
+ * <a href="http://userguide.icu-project.org/formatparse/messages">ICU User Guide</a>.</p>
+ *
+ * <h4>Usage Information</h4>
+ *
+ * <p>Here are some examples of usage:
* Example 1:
+ *
* <pre>
* \code
* UErrorCode success = U_ZERO_ERROR;
*
* UnicodeString result;
* MessageFormat::format(
- * "At {1,time} on {1,date}, there was {2} on planet {0,number}.",
+ * "At {1,time,::jmm} on {1,date,::dMMMM}, there was {2} on planet {0,number}.",
* arguments, 3, result, success );
*
* cout << "result: " << result << endl;
- * //<output>: At 4:34:20 PM on 23-Mar-98, there was a disturbance
+ * //<output>: At 4:34 PM on March 23, there was a disturbance
* // in the Force on planet 7.
* \endcode
* </pre>
+ *
* Typically, the message format will come from resources, and the
* arguments will be dynamically set at runtime.
- * <P>
- * Example 2:
+ *
+ * <p>Example 2:
+ *
* <pre>
* \code
* success = U_ZERO_ERROR;
* \endcode
* </pre>
*
- * The pattern is of the following form. Legend:
- * <pre>
- * \code
- * {optional item}
- * (group that may be repeated)*
- * \endcode
- * </pre>
- * Do not confuse optional items with items inside quotes braces, such
- * as this: "{". Quoted braces are literals.
- * <pre>
- * \code
- * messageFormatPattern := string ( "{" messageFormatElement "}" string )*
- *
- * messageFormatElement := argumentIndex | argumentName { "," elementFormat }
- *
- * elementFormat := "time" { "," datetimeStyle }
- * | "date" { "," datetimeStyle }
- * | "number" { "," numberStyle }
- * | "choice" "," choiceStyle
*
- * datetimeStyle := "short"
- * | "medium"
- * | "long"
- * | "full"
- * | dateFormatPattern
- *
- * numberStyle := "currency"
- * | "percent"
- * | "integer"
- * | numberFormatPattern
- *
- * choiceStyle := choiceFormatPattern
- *
- * pluralStyle := pluralFormatPattern
- * \endcode
- * </pre>
- * If there is no elementFormat, then the argument must be a string,
- * which is substituted. If there is no dateTimeStyle or numberStyle,
- * then the default format is used (e.g. NumberFormat::createInstance(),
- * DateFormat::createTimeInstance(DateFormat::kDefault, ...) or DateFormat::createDateInstance(DateFormat::kDefault, ...). For
- * a ChoiceFormat, the pattern must always be specified, since there
- * is no default.
- * <P>
- * In strings, single quotes can be used to quote syntax characters.
- * A literal single quote is represented by '', both within and outside
- * of single-quoted segments. Inside a
- * messageFormatElement, quotes are <EM>not</EM> removed. For example,
- * {1,number,$'#',##} will produce a number format with the pound-sign
- * quoted, with a result such as: "$#31,45".
- * <P>
- * If a pattern is used, then unquoted braces in the pattern, if any,
- * must match: that is, "ab {0} de" and "ab '}' de" are ok, but "ab
- * {0'}' de" and "ab } de" are not.
- * <p>
- * <dl><dt><b>Warning:</b><dd>The rules for using quotes within message
- * format patterns unfortunately have shown to be somewhat confusing.
- * In particular, it isn't always obvious to localizers whether single
- * quotes need to be doubled or not. Make sure to inform localizers about
- * the rules, and tell them (for example, by using comments in resource
- * bundle source files) which strings will be processed by MessageFormat.
- * Note that localizers may need to use single quotes in translated
- * strings where the original version doesn't have them.
- * <br>Note also that the simplest way to avoid the problem is to
- * use the real apostrophe (single quote) character U+2019 (') for
- * human-readable text, and to use the ASCII apostrophe (U+0027 ' )
- * only in program syntax, like quoting in MessageFormat.
- * See the annotations for U+0027 Apostrophe in The Unicode Standard.</p>
- * </dl>
- * <P>
- * The argumentIndex is a non-negative integer, which corresponds to the
- * index of the arguments presented in an array to be formatted. The
- * first argument has argumentIndex 0.
- * <P>
- * It is acceptable to have unused arguments in the array. With missing
- * arguments or arguments that are not of the right class for the
- * specified format, a failing UErrorCode result is set.
- * <P>
- * For more sophisticated patterns, you can use a ChoiceFormat to get
- * output:
- * <pre>
- * \code
- * UErrorCode success = U_ZERO_ERROR;
- * MessageFormat* form("The disk \"{1}\" contains {0}.", success);
- * double filelimits[] = {0,1,2};
- * UnicodeString filepart[] = {"no files","one file","{0,number} files"};
- * ChoiceFormat* fileform = new ChoiceFormat(filelimits, filepart, 3);
- * form.setFormat(1, *fileform); // NOT zero, see below
- *
- * Formattable testArgs[] = {1273L, "MyDisk"};
- *
- * UnicodeString string;
- * FieldPosition fpos = 0;
- * cout << form.format(testArgs, 2, string, fpos, success) << endl;
- *
- * // output, with different testArgs
- * // output: The disk "MyDisk" contains no files.
- * // output: The disk "MyDisk" contains one file.
- * // output: The disk "MyDisk" contains 1,273 files.
- * \endcode
- * </pre>
- * You can either do this programmatically, as in the above example,
- * or by using a pattern (see ChoiceFormat for more information) as in:
+ * <p>For messages that include plural forms, you can use a plural argument:
* <pre>
* \code
- * form.applyPattern(
- * "There {0,choice,0#are no files|1#is one file|1<are {0,number,integer} files}.");
+ * success = U_ZERO_ERROR;
+ * MessageFormat msgFmt(
+ * "{num_files, plural, "
+ * "=0{There are no files on disk \"{disk_name}\".}"
+ * "=1{There is one file on disk \"{disk_name}\".}"
+ * "other{There are # files on disk \"{disk_name}\".}}",
+ * Locale("en"),
+ * success);
+ * FieldPosition fpos = 0;
+ * Formattable testArgs[] = {0L, "MyDisk"};
+ * UnicodeString testArgsNames[] = {"num_files", "disk_name"};
+ * UnicodeString result;
+ * cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
+ * testArgs[0] = 3L;
+ * cout << msgFmt.format(testArgs, testArgsNames, 2, result, fpos, 0, success);
* \endcode
+ * <em>output</em>:
+ * There are no files on disk "MyDisk".
+ * There are 3 files on "MyDisk".
* </pre>
- * <P>
- * <EM>Note:</EM> As we see above, the string produced by a ChoiceFormat in
- * MessageFormat is treated specially; occurences of '{' are used to
- * indicated subformats, and cause recursion. If you create both a
- * MessageFormat and ChoiceFormat programmatically (instead of using
- * the string patterns), then be careful not to produce a format that
- * recurses on itself, which will cause an infinite loop.
- * <P>
- * <EM>Note:</EM> Subformats are numbered by their order in the pattern.
- * This is <EM>not</EM> the same as the argumentIndex.
- * <pre>
- * \code
- * For example: with "abc{2}def{3}ghi{0}...",
+ * See {@link PluralFormat} and {@link PluralRules} for details.
*
- * format0 affects the first variable {2}
- * format1 affects the second variable {3}
- * format2 affects the second variable {0}
- * \endcode
- * </pre>
+ * <h4><a name="synchronization">Synchronization</a></h4>
+ *
+ * <p>MessageFormats are not synchronized.
+ * It is recommended to create separate format instances for each thread.
+ * If multiple threads access a format concurrently, it must be synchronized
+ * externally.
*
- * <p><em>User subclasses are not supported.</em> While clients may write
- * subclasses, such code will not necessarily work and will not be
- * guaranteed to work stably from release to release.
+ * @stable ICU 2.0
*/
class U_I18N_API MessageFormat : public Format {
public:
+#ifndef U_HIDE_OBSOLETE_API
/**
* Enum type for kMaxFormat.
* @obsolete ICU 3.0. The 10-argument limit was removed as of ICU 2.6,
*/
kMaxFormat = 10
};
+#endif /* U_HIDE_OBSOLETE_API */
/**
* Constructs a new MessageFormat using the given pattern and the
* Constructs a new MessageFormat using the given pattern and locale.
* @param pattern Pattern used to construct object.
* @param newLocale The locale to use for formatting dates and numbers.
- * @param parseError Struct to recieve information on position
- * of error within the pattern.
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
* @param status Input/output error code. If the
* pattern cannot be parsed, set to failure code.
* @stable ICU 2.0
virtual UBool operator==(const Format& other) const;
/**
- * Sets the locale. This locale is used for fetching default number or date
- * format information.
+ * Sets the locale to be used for creating argument Format objects.
* @param theLocale the new locale value to be set.
* @stable ICU 2.0
*/
virtual void setLocale(const Locale& theLocale);
/**
- * Gets the locale. This locale is used for fetching default number or date
+ * Gets the locale used for creating argument Format objects.
* format information.
* @return the locale of the object.
* @stable ICU 2.0
* Applies the given pattern string to this message format.
*
* @param pattern The pattern to be applied.
- * @param parseError Struct to recieve information on position
- * of error within pattern.
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
* @param status Input/output error code. If the
* pattern cannot be parsed, set to failure code.
* @stable ICU 2.0
UParseError& parseError,
UErrorCode& status);
+ /**
+ * Sets the UMessagePatternApostropheMode and the pattern used by this message format.
+ * Parses the pattern and caches Format objects for simple argument types.
+ * Patterns and their interpretation are specified in the
+ * <a href="#patterns">class description</a>.
+ * <p>
+ * This method is best used only once on a given object to avoid confusion about the mode,
+ * and after constructing the object with an empty pattern string to minimize overhead.
+ *
+ * @param pattern The pattern to be applied.
+ * @param aposMode The new apostrophe mode.
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be NULL.
+ * @param status Input/output error code. If the
+ * pattern cannot be parsed, set to failure code.
+ * @stable ICU 4.8
+ */
+ virtual void applyPattern(const UnicodeString& pattern,
+ UMessagePatternApostropheMode aposMode,
+ UParseError* parseError,
+ UErrorCode& status);
+
+ /**
+ * @return this instance's UMessagePatternApostropheMode.
+ * @stable ICU 4.8
+ */
+ UMessagePatternApostropheMode getApostropheMode() const {
+ return msgPattern.getApostropheMode();
+ }
+
/**
* Returns a pattern that can be used to recreate this object.
*
* ownership is retained by the caller. If the call fails because
* memory cannot be allocated, then the formats will be deleted
* by this method, and this object will remain unchanged.
- *
+ *
+ * <p>If this format uses named arguments, the new formats are discarded
+ * and this format remains unchanged.
+ *
* @stable ICU 2.0
* @param formatsToAdopt the format to be adopted.
* @param count the size of the array.
* Each item in the array is cloned into the internal array.
* If the call fails because memory cannot be allocated, then this
* object will remain unchanged.
- *
+ *
+ * <p>If this format uses named arguments, the new formats are discarded
+ * and this format remains unchanged.
+ *
* @stable ICU 2.0
* @param newFormats the new format to be set.
* @param cnt the size of the array.
* The caller should not delete the Format object after this call.
* If the number is over the number of formats already set,
* the item will be deleted and ignored.
+ *
+ * <p>If this format uses named arguments, the new format is discarded
+ * and this format remains unchanged.
+ *
* @stable ICU 2.0
* @param formatNumber index of the subformat.
* @param formatToAdopt the format to be adopted.
/**
* Gets format names. This function returns formatNames in StringEnumerations
- * which can be used with getFormat() and setFormat() to export formattable
- * array from current MessageFormat to another. It is caller's resposibility
+ * which can be used with getFormat() and setFormat() to export formattable
+ * array from current MessageFormat to another. It is the caller's responsibility
* to delete the returned formatNames.
* @param status output param set to success/failure code.
- * @draft ICU 4.0
+ * @stable ICU 4.0
*/
virtual StringEnumeration* getFormatNames(UErrorCode& status);
-
+
/**
- * Gets subformat pointer for given format name.
+ * Gets subformat pointer for given format name.
* This function supports both named and numbered
- * arguments-- if numbered, the formatName is the
+ * arguments. If numbered, the formatName is the
* corresponding UnicodeStrings (e.g. "0", "1", "2"...).
* The returned Format object should not be deleted by the caller,
- * nor should the ponter of other object . The pointer and its
+ * nor should the ponter of other object . The pointer and its
* contents remain valid only until the next call to any method
- * of this class is made with this object.
+ * of this class is made with this object.
* @param formatName the name or number specifying a format
* @param status output param set to success/failure code.
- * @draft ICU 4.0
+ * @stable ICU 4.0
*/
virtual Format* getFormat(const UnicodeString& formatName, UErrorCode& status);
-
+
/**
* Sets one subformat for given format name.
- * See the class description about format name.
+ * See the class description about format name.
* This function supports both named and numbered
* arguments-- if numbered, the formatName is the
* corresponding UnicodeStrings (e.g. "0", "1", "2"...).
* @param formatName Name of the subformat.
* @param format the format to be set.
* @param status output param set to success/failure code.
- * @draft ICU 4.0
+ * @stable ICU 4.0
*/
virtual void setFormat(const UnicodeString& formatName, const Format& format, UErrorCode& status);
-
+
/**
* Sets one subformat for given format name.
- * See the class description about format name.
+ * See the class description about format name.
* This function supports both named and numbered
* arguments-- if numbered, the formatName is the
* corresponding UnicodeStrings (e.g. "0", "1", "2"...).
* @param formatName Name of the subformat.
* @param formatToAdopt Format to be adopted.
* @param status output param set to success/failure code.
- * @draft ICU 4.0
+ * @stable ICU 4.0
*/
virtual void adoptFormat(const UnicodeString& formatName, Format* formatToAdopt, UErrorCode& status);
-
/**
* Gets an array of subformats of this object. The returned array
* should not be deleted by the caller, nor should the pointers
* within the array. The array and its contents remain valid only
- * until the next call to any method of this class is made with
- * this object. See the class description about format numbering.
+ * until the next call to this format. See the class description
+ * about format numbering.
+ *
* @param count output parameter to receive the size of the array
* @return an array of count Format* objects, or NULL if out of
* memory. Any or all of the array elements may be NULL.
*/
virtual const Format** getFormats(int32_t& count) const;
+
+ using Format::format;
+
/**
* Formats the given array of arguments into a user-readable string.
* Does not take ownership of the Formattable* array or its contents.
*
+ * <p>If this format uses named arguments, appendTo is unchanged and
+ * status is set to U_ILLEGAL_ARGUMENT_ERROR.
+ *
* @param source An array of objects to be formatted.
* @param count The number of elements of 'source'.
* @param appendTo Output parameter to receive result.
* @return Reference to 'appendTo' parameter.
* @stable ICU 2.0
*/
- UnicodeString& format( const Formattable* source,
- int32_t count,
- UnicodeString& appendTo,
- FieldPosition& ignore,
- UErrorCode& status) const;
+ UnicodeString& format(const Formattable* source,
+ int32_t count,
+ UnicodeString& appendTo,
+ FieldPosition& ignore,
+ UErrorCode& status) const;
/**
* Formats the given array of arguments into a user-readable string
* using the given pattern.
*
+ * <p>If this format uses named arguments, appendTo is unchanged and
+ * status is set to U_ILLEGAL_ARGUMENT_ERROR.
+ *
* @param pattern The pattern.
* @param arguments An array of objects to be formatted.
* @param count The number of elements of 'source'.
* object of type kArray. If the Formattable object type is not of
* type kArray, then returns a failing UErrorCode.
*
+ * <p>If this format uses named arguments, appendTo is unchanged and
+ * status is set to U_ILLEGAL_ARGUMENT_ERROR.
+ *
* @param obj A Formattable of type kArray containing
* arguments to be formatted.
* @param appendTo Output parameter to receive result.
FieldPosition& pos,
UErrorCode& status) const;
- /**
- * Formats the given array of arguments into a user-readable
- * string. The array must be stored within a single Formattable
- * object of type kArray. If the Formattable object type is not of
- * type kArray, then returns a failing UErrorCode.
- *
- * @param obj The object to format
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
- * @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * @return Reference to 'appendTo' parameter.
- * @stable ICU 2.0
- */
- UnicodeString& format(const Formattable& obj,
- UnicodeString& appendTo,
- UErrorCode& status) const;
-
-
/**
* Formats the given array of arguments into a user-defined argument name
* array. This function supports both named and numbered
*
* @param argumentNames argument name array
* @param arguments An array of objects to be formatted.
- * @param count The number of elements of 'argumentNames' and
+ * @param count The number of elements of 'argumentNames' and
* arguments. The number of argumentNames and arguments
* must be the same.
* @param appendTo Output parameter to receive result.
* the array and its contents.
* @stable ICU 2.0
*/
- virtual Formattable* parse( const UnicodeString& source,
- ParsePosition& pos,
- int32_t& count) const;
+ virtual Formattable* parse(const UnicodeString& source,
+ ParsePosition& pos,
+ int32_t& count) const;
/**
* Parses the given string into an array of output arguments.
*
+ * <p>If this format uses named arguments, status is set to
+ * U_ARGUMENT_TYPE_MISMATCH.
+ *
* @param source String to be parsed.
* @param count Output param to receive size of returned array.
* @param status Input/output error code. If the
- * pattern cannot be parsed, set to failure code.
- * If the MessageFormat is named argument, the status is
- * set to U_ARGUMENT_TYPE_MISMATCH.
+ * pattern cannot be parsed, set to failure code.
* @return an array of parsed arguments. The caller owns both
- * the array and its contents. Return NULL if status is not U_ZERO_ERROR.
- *
+ * the array and its contents. Returns NULL if status is not U_ZERO_ERROR.
+ *
* @stable ICU 2.0
*/
- virtual Formattable* parse( const UnicodeString& source,
- int32_t& count,
- UErrorCode& status) const;
+ virtual Formattable* parse(const UnicodeString& source,
+ int32_t& count,
+ UErrorCode& status) const;
/**
* Parses the given string into an array of output arguments
/**
* Convert an 'apostrophe-friendly' pattern into a standard
* pattern. Standard patterns treat all apostrophes as
- * quotes, which is problematic in some languages, e.g.
+ * quotes, which is problematic in some languages, e.g.
* French, where apostrophe is commonly used. This utility
* assumes that only an unpaired apostrophe immediately before
* a brace is a true quote. Other unpaired apostrophes are paired,
* @return the standard equivalent of the original pattern
* @stable ICU 3.4
*/
- static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern,
+ static UnicodeString autoQuoteApostrophe(const UnicodeString& pattern,
UErrorCode& status);
-
+
+
/**
* Returns true if this MessageFormat uses named arguments,
* and false otherwise. See class description.
*
* @return true if named arguments are used.
- * @draft ICU 4.0
+ * @stable ICU 4.0
*/
UBool usesNamedArguments() const;
-
+
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * This API is for ICU internal use only.
+ * Please do not use it.
+ *
+ * Returns argument types count in the parsed pattern.
+ * Used to distinguish pattern "{0} d" and "d".
+ *
+ * @return The number of formattable types in the pattern
+ * @internal
+ */
+ int32_t getArgTypeCount() const;
+#endif /* U_HIDE_INTERNAL_API */
+
/**
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
* This method is to implement a simple version of RTTI, since not all
* @stable ICU 2.0
*/
static UClassID U_EXPORT2 getStaticClassID(void);
-
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Compares two Format objects. This is used for constructing the hash
+ * tables.
+ *
+ * @param left pointer to a Format object. Must not be NULL.
+ * @param right pointer to a Format object. Must not be NULL.
+ *
+ * @return whether the two objects are the same
+ * @internal
+ */
+ static UBool equalFormats(const void* left, const void* right);
+#endif /* U_HIDE_INTERNAL_API */
+
private:
Locale fLocale;
- UnicodeString fPattern;
+ MessagePattern msgPattern;
Format** formatAliases; // see getFormats
int32_t formatAliasesCapacity;
- UProperty idStart;
- UProperty idContinue;
MessageFormat(); // default constructor not implemented
- /*
- * A structure representing one subformat of this MessageFormat.
- * Each subformat has a Format object, an offset into the plain
- * pattern text fPattern, and an argument number. The argument
- * number corresponds to the array of arguments to be formatted.
- * @internal
- */
- class Subformat;
+ /**
+ * This provider helps defer instantiation of a PluralRules object
+ * until we actually need to select a keyword.
+ * For example, if the number matches an explicit-value selector like "=1"
+ * we do not need any PluralRules.
+ */
+ class U_I18N_API PluralSelectorProvider : public PluralFormat::PluralSelector {
+ public:
+ PluralSelectorProvider(const MessageFormat &mf, UPluralType type);
+ virtual ~PluralSelectorProvider();
+ virtual UnicodeString select(void *ctx, double number, UErrorCode& ec) const;
- /**
- * A MessageFormat contains an array of subformats. This array
- * needs to grow dynamically if the MessageFormat is modified.
- */
- Subformat* subformats;
- int32_t subformatCount;
- int32_t subformatCapacity;
+ void reset();
+ private:
+ const MessageFormat &msgFormat;
+ PluralRules* rules;
+ UPluralType type;
+ };
/**
* A MessageFormat formats an array of arguments. Each argument
int32_t argTypeCapacity;
/**
- * Is true iff all argument names are non-negative numbers.
- *
- */
- UBool isArgNumeric;
+ * TRUE if there are different argTypes for the same argument.
+ * This only matters when the MessageFormat is used in the plain C (umsg_xxx) API
+ * where the pattern argTypes determine how the va_arg list is read.
+ */
+ UBool hasArgTypeConflicts;
// Variable-size array management
- UBool allocateSubformats(int32_t capacity);
- UBool allocateArgTypes(int32_t capacity);
+ UBool allocateArgTypes(int32_t capacity, UErrorCode& status);
/**
* Default Format objects used when no format is specified and a
NumberFormat* defaultNumberFormat;
DateFormat* defaultDateFormat;
+ UHashtable* cachedFormatters;
+ UHashtable* customFormatArgStarts;
+
+ PluralSelectorProvider pluralProvider;
+ PluralSelectorProvider ordinalProvider;
+
/**
* Method to retrieve default formats (or NULL on failure).
* These are semantically const, but may modify *this.
* @return the index of the list which matches the keyword s.
*/
static int32_t findKeyword( const UnicodeString& s,
- const UChar * const *list);
+ const char16_t * const *list);
+
+ /**
+ * Thin wrapper around the format(... AppendableWrapper ...) variant.
+ * Wraps the destination UnicodeString into an AppendableWrapper and
+ * supplies default values for some other parameters.
+ */
+ UnicodeString& format(const Formattable* arguments,
+ const UnicodeString *argumentNames,
+ int32_t cnt,
+ UnicodeString& appendTo,
+ FieldPosition* pos,
+ UErrorCode& status) const;
/**
- * Formats the array of arguments and copies the result into the
- * result buffer, updates the field position.
+ * Formats the arguments and writes the result into the
+ * AppendableWrapper, updates the field position.
*
- * @param arguments The formattable objects array.
- * @param cnt The array count.
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
- * @param status Field position status.
- * @param recursionProtection
- * Initially zero. Bits 0..9 are used to indicate
- * that a parameter has already been seen, to
- * avoid recursion. Currently unused.
- * @param success The error code status.
- * @return Reference to 'appendTo' parameter.
+ * @param msgStart Index to msgPattern part to start formatting from.
+ * @param plNumber NULL except when formatting a plural argument sub-message
+ * where a '#' is replaced by the format string for this number.
+ * @param arguments The formattable objects array. (Must not be NULL.)
+ * @param argumentNames NULL if numbered values are used. Otherwise the same
+ * length as "arguments", and each entry is the name of the
+ * corresponding argument in "arguments".
+ * @param cnt The length of arguments (and of argumentNames if that is not NULL).
+ * @param appendTo Output parameter to receive the result.
+ * The result string is appended to existing contents.
+ * @param pos Field position status.
+ * @param success The error code status.
+ */
+ void format(int32_t msgStart,
+ const void *plNumber,
+ const Formattable* arguments,
+ const UnicodeString *argumentNames,
+ int32_t cnt,
+ AppendableWrapper& appendTo,
+ FieldPosition* pos,
+ UErrorCode& success) const;
+
+ UnicodeString getArgName(int32_t partIndex);
+
+ void setArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
+
+ void setCustomArgStartFormat(int32_t argStart, Format* formatter, UErrorCode& status);
+
+ int32_t nextTopLevelArgStart(int32_t partIndex) const;
+
+ UBool argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber);
+
+ void cacheExplicitFormats(UErrorCode& status);
+
+ int32_t skipLeadingSpaces(UnicodeString& style);
+
+ Format* createAppropriateFormat(UnicodeString& type,
+ UnicodeString& style,
+ Formattable::Type& formattableType,
+ UParseError& parseError,
+ UErrorCode& ec);
+
+ const Formattable* getArgFromListByName(const Formattable* arguments,
+ const UnicodeString *argumentNames,
+ int32_t cnt, UnicodeString& name) const;
+
+ Formattable* parse(int32_t msgStart,
+ const UnicodeString& source,
+ ParsePosition& pos,
+ int32_t& count,
+ UErrorCode& ec) const;
+
+ FieldPosition* updateMetaData(AppendableWrapper& dest, int32_t prevLength,
+ FieldPosition* fp, const Formattable* argId) const;
+
+ /**
+ * Finds the "other" sub-message.
+ * @param partIndex the index of the first PluralFormat argument style part.
+ * @return the "other" sub-message start part index.
*/
- UnicodeString& format( const Formattable* arguments,
- int32_t cnt,
- UnicodeString& appendTo,
- FieldPosition& status,
- int32_t recursionProtection,
- UErrorCode& success) const;
-
- UnicodeString& format( const Formattable* arguments,
- const UnicodeString *argumentNames,
- int32_t cnt,
- UnicodeString& appendTo,
- FieldPosition& status,
- int32_t recursionProtection,
- UErrorCode& success) const;
-
- void makeFormat(int32_t offsetNumber,
- UnicodeString* segments,
- UParseError& parseError,
- UErrorCode& success);
+ int32_t findOtherSubMessage(int32_t partIndex) const;
/**
- * Convenience method that ought to be in NumberFormat
+ * Returns the ARG_START index of the first occurrence of the plural number in a sub-message.
+ * Returns -1 if it is a REPLACE_NUMBER.
+ * Returns 0 if there is neither.
*/
- NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const;
+ int32_t findFirstPluralNumberArg(int32_t msgStart, const UnicodeString &argName) const;
+
+ Format* getCachedFormatter(int32_t argumentNumber) const;
+
+ UnicodeString getLiteralStringUntilNextArgument(int32_t from) const;
+
+ void copyObjects(const MessageFormat& that, UErrorCode& ec);
+
+ void formatComplexSubMessage(int32_t msgStart,
+ const void *plNumber,
+ const Formattable* arguments,
+ const UnicodeString *argumentNames,
+ int32_t cnt,
+ AppendableWrapper& appendTo,
+ UErrorCode& success) const;
/**
- * Checks the range of the source text to quote the special
- * characters, { and ' and copy to target buffer.
- * @param source
- * @param start the text offset to start the process of in the source string
- * @param end the text offset to end the process of in the source string
- * @param appendTo Output parameter to receive result.
- * Result is appended to existing contents.
+ * Convenience method that ought to be in NumberFormat
*/
- static void copyAndFixQuotes(const UnicodeString& appendTo, int32_t start, int32_t end, UnicodeString& target);
+ NumberFormat* createIntegerFormat(const Locale& locale, UErrorCode& status) const;
/**
- * Returns array of argument types in the parsed pattern
+ * Returns array of argument types in the parsed pattern
* for use in C API. Only for the use of umsg_vformat(). Not
* for public consumption.
* @param listCount Output parameter to receive the size of array
* @return The array of formattable types in the pattern
- * @internal
*/
const Formattable::Type* getArgTypeList(int32_t& listCount) const {
listCount = argTypeCount;
- return argTypes;
+ return argTypes;
}
-
+
/**
- * Returns FALSE if the argument name is not legal.
- * @param argName argument name.
- * @return TRUE if the argument name is legal, otherwise return FALSE.
+ * Resets the internal MessagePattern, and other associated caches.
*/
- UBool isLegalArgName(const UnicodeString& argName) const;
-
+ void resetPattern();
+
+ /**
+ * A DummyFormatter that we use solely to store a NULL value. UHash does
+ * not support storing NULL values.
+ */
+ class U_I18N_API DummyFormat : public Format {
+ public:
+ virtual UBool operator==(const Format&) const;
+ virtual Format* clone() const;
+ virtual UnicodeString& format(const Formattable& obj,
+ UnicodeString& appendTo,
+ UErrorCode& status) const;
+ virtual UnicodeString& format(const Formattable&,
+ UnicodeString& appendTo,
+ FieldPosition&,
+ UErrorCode& status) const;
+ virtual UnicodeString& format(const Formattable& obj,
+ UnicodeString& appendTo,
+ FieldPositionIterator* posIter,
+ UErrorCode& status) const;
+ virtual void parseObject(const UnicodeString&,
+ Formattable&,
+ ParsePosition&) const;
+ };
+
friend class MessageFormatAdapter; // getFormatTypeList() access
};
-inline UnicodeString&
-MessageFormat::format(const Formattable& obj,
- UnicodeString& appendTo,
- UErrorCode& status) const {
- return Format::format(obj, appendTo, status);
-}
U_NAMESPACE_END
+#endif // U_SHOW_CPLUSPLUS_API
#endif /* #if !UCONFIG_NO_FORMATTING */
#endif // _MSGFMT
//eof
-