icuSources/i18n/number_patternstring.h

   1 // © 2017 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3
   4 #include "unicode/utypes.h"
   5
   6 #if !UCONFIG_NO_FORMATTING
   7 #ifndef __NUMBER_PATTERNSTRING_H__
   8 #define __NUMBER_PATTERNSTRING_H__
   9
  10
  11 #include <cstdint>
  12 #include "unicode/unum.h"
  13 #include "unicode/unistr.h"
  14 #include "number_types.h"
  15 #include "number_decimalquantity.h"
  16 #include "number_decimfmtprops.h"
  17 #include "number_affixutils.h"
  18
  19 U_NAMESPACE_BEGIN namespace number {
  20 namespace impl {
  21
  22 // Forward declaration
  23 class PatternParser;
  24
  25 // Exported as U_I18N_API because it is a public member field of exported ParsedSubpatternInfo
  26 struct U_I18N_API Endpoints {
  27     int32_t start = 0;
  28     int32_t end = 0;
  29 };
  30
  31 // Exported as U_I18N_API because it is a public member field of exported ParsedPatternInfo
  32 struct U_I18N_API ParsedSubpatternInfo {
  33     uint64_t groupingSizes = 0x0000ffffffff0000L;
  34     int32_t integerLeadingHashSigns = 0;
  35     int32_t integerTrailingHashSigns = 0;
  36     int32_t integerNumerals = 0;
  37     int32_t integerAtSigns = 0;
  38     int32_t integerTotal = 0; // for convenience
  39     int32_t fractionNumerals = 0;
  40     int32_t fractionHashSigns = 0;
  41     int32_t fractionTotal = 0; // for convenience
  42     bool hasDecimal = false;
  43     int32_t widthExceptAffixes = 0;
  44     // Note: NullableValue causes issues here with std::move.
  45     bool hasPadding = false;
  46     UNumberFormatPadPosition paddingLocation = UNUM_PAD_BEFORE_PREFIX;
  47     DecimalQuantity rounding;
  48     bool exponentHasPlusSign = false;
  49     int32_t exponentZeros = 0;
  50     bool hasPercentSign = false;
  51     bool hasPerMilleSign = false;
  52     bool hasCurrencySign = false;
  53     bool hasMinusSign = false;
  54     bool hasPlusSign = false;
  55
  56     Endpoints prefixEndpoints;
  57     Endpoints suffixEndpoints;
  58     Endpoints paddingEndpoints;
  59 };
  60
  61 // Exported as U_I18N_API because it is needed for the unit test PatternStringTest
  62 struct U_I18N_API ParsedPatternInfo : public AffixPatternProvider, public UMemory {
  63     UnicodeString pattern;
  64     ParsedSubpatternInfo positive;
  65     ParsedSubpatternInfo negative;
  66
  67     ParsedPatternInfo()
  68             : state(this->pattern), currentSubpattern(nullptr) {}
  69
  70     ~ParsedPatternInfo() U_OVERRIDE = default;
  71
  72     // Need to declare this explicitly because of the destructor
  73     ParsedPatternInfo& operator=(ParsedPatternInfo&& src) U_NOEXCEPT = default;
  74
  75     static int32_t getLengthFromEndpoints(const Endpoints& endpoints);
  76
  77     char16_t charAt(int32_t flags, int32_t index) const U_OVERRIDE;
  78
  79     int32_t length(int32_t flags) const U_OVERRIDE;
  80
  81     UnicodeString getString(int32_t flags) const U_OVERRIDE;
  82
  83     bool positiveHasPlusSign() const U_OVERRIDE;
  84
  85     bool hasNegativeSubpattern() const U_OVERRIDE;
  86
  87     bool negativeHasMinusSign() const U_OVERRIDE;
  88
  89     bool hasCurrencySign() const U_OVERRIDE;
  90
  91     bool containsSymbolType(AffixPatternType type, UErrorCode& status) const U_OVERRIDE;
  92
  93     bool hasBody() const U_OVERRIDE;
  94
  95   private:
  96     struct U_I18N_API ParserState {
  97         const UnicodeString& pattern; // reference to the parent
  98         int32_t offset = 0;
  99
 100         explicit ParserState(const UnicodeString& _pattern)
 101                 : pattern(_pattern) {};
 102
 103         ParserState& operator=(ParserState&& src) U_NOEXCEPT {
 104             // Leave pattern reference alone; it will continue to point to the same place in memory,
 105             // which gets overwritten by ParsedPatternInfo's implicit move assignment.
 106             offset = src.offset;
 107             return *this;
 108         }
 109
 110         UChar32 peek();
 111
 112         UChar32 next();
 113
 114         // TODO: We don't currently do anything with the message string.
 115         // This method is here as a shell for Java compatibility.
 116         inline void toParseException(const char16_t* message) { (void) message; }
 117     } state;
 118
 119     // NOTE: In Java, these are written as pure functions.
 120     // In C++, they're written as methods.
 121     // The behavior is the same.
 122
 123     // Mutable transient pointer:
 124     ParsedSubpatternInfo* currentSubpattern;
 125
 126     // In Java, "negative == null" tells us whether or not we had a negative subpattern.
 127     // In C++, we need to remember in another boolean.
 128     bool fHasNegativeSubpattern = false;
 129
 130     const Endpoints& getEndpoints(int32_t flags) const;
 131
 132     /** Run the recursive descent parser. */
 133     void consumePattern(const UnicodeString& patternString, UErrorCode& status);
 134
 135     void consumeSubpattern(UErrorCode& status);
 136
 137     void consumePadding(PadPosition paddingLocation, UErrorCode& status);
 138
 139     void consumeAffix(Endpoints& endpoints, UErrorCode& status);
 140
 141     void consumeLiteral(UErrorCode& status);
 142
 143     void consumeFormat(UErrorCode& status);
 144
 145     void consumeIntegerFormat(UErrorCode& status);
 146
 147     void consumeFractionFormat(UErrorCode& status);
 148
 149     void consumeExponent(UErrorCode& status);
 150
 151     friend class PatternParser;
 152 };
 153
 154 enum IgnoreRounding {
 155     IGNORE_ROUNDING_NEVER = 0, IGNORE_ROUNDING_IF_CURRENCY = 1, IGNORE_ROUNDING_ALWAYS = 2
 156 };
 157
 158 class U_I18N_API PatternParser {
 159   public:
 160     /**
 161      * Runs the recursive descent parser on the given pattern string, returning a data structure with raw information
 162      * about the pattern string.
 163      *
 164      * <p>
 165      * To obtain a more useful form of the data, consider using {@link #parseToProperties} instead.
 166      *
 167      * TODO: Change argument type to const char16_t* instead of UnicodeString?
 168      *
 169      * @param patternString
 170      *            The LDML decimal format pattern (Excel-style pattern) to parse.
 171      * @return The results of the parse.
 172      */
 173     static void parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo,
 174                                    UErrorCode& status);
 175
 176     /**
 177      * Parses a pattern string into a new property bag.
 178      *
 179      * @param pattern
 180      *            The pattern string, like "#,##0.00"
 181      * @param ignoreRounding
 182      *            Whether to leave out rounding information (minFrac, maxFrac, and rounding increment) when parsing the
 183      *            pattern. This may be desirable if a custom rounding mode, such as CurrencyUsage, is to be used
 184      *            instead.
 185      * @return A property bag object.
 186      * @throws IllegalArgumentException
 187      *             If there is a syntax error in the pattern string.
 188      */
 189     static DecimalFormatProperties parseToProperties(const UnicodeString& pattern,
 190                                                      IgnoreRounding ignoreRounding, UErrorCode& status);
 191
 192     static DecimalFormatProperties parseToProperties(const UnicodeString& pattern, UErrorCode& status);
 193
 194     /**
 195      * Parses a pattern string into an existing property bag. All properties that can be encoded into a pattern string
 196      * will be overwritten with either their default value or with the value coming from the pattern string. Properties
 197      * that cannot be encoded into a pattern string, such as rounding mode, are not modified.
 198      *
 199      * @param pattern
 200      *            The pattern string, like "#,##0.00"
 201      * @param properties
 202      *            The property bag object to overwrite.
 203      * @param ignoreRounding
 204      *            See {@link #parseToProperties(String pattern, int ignoreRounding)}.
 205      * @throws IllegalArgumentException
 206      *             If there was a syntax error in the pattern string.
 207      */
 208     static void parseToExistingProperties(const UnicodeString& pattern,
 209                                           DecimalFormatProperties& properties,
 210                                           IgnoreRounding ignoreRounding, UErrorCode& status);
 211
 212   private:
 213     static void parseToExistingPropertiesImpl(const UnicodeString& pattern,
 214                                               DecimalFormatProperties& properties,
 215                                               IgnoreRounding ignoreRounding, UErrorCode& status);
 216
 217     /** Finalizes the temporary data stored in the ParsedPatternInfo to the Properties. */
 218     static void patternInfoToProperties(DecimalFormatProperties& properties,
 219                                         ParsedPatternInfo& patternInfo, IgnoreRounding _ignoreRounding,
 220                                         UErrorCode& status);
 221 };
 222
 223 class U_I18N_API PatternStringUtils {
 224   public:
 225     /**
 226      * Creates a pattern string from a property bag.
 227      *
 228      * <p>
 229      * Since pattern strings support only a subset of the functionality available in a property bag, a new property bag
 230      * created from the string returned by this function may not be the same as the original property bag.
 231      *
 232      * @param properties
 233      *            The property bag to serialize.
 234      * @return A pattern string approximately serializing the property bag.
 235      */
 236     static UnicodeString propertiesToPatternString(const DecimalFormatProperties& properties,
 237                                                    UErrorCode& status);
 238
 239
 240     /**
 241      * Converts a pattern between standard notation and localized notation. Localized notation means that instead of
 242      * using generic placeholders in the pattern, you use the corresponding locale-specific characters instead. For
 243      * example, in locale <em>fr-FR</em>, the period in the pattern "0.000" means "decimal" in standard notation (as it
 244      * does in every other locale), but it means "grouping" in localized notation.
 245      *
 246      * <p>
 247      * A greedy string-substitution strategy is used to substitute locale symbols. If two symbols are ambiguous or have
 248      * the same prefix, the result is not well-defined.
 249      *
 250      * <p>
 251      * Locale symbols are not allowed to contain the ASCII quote character.
 252      *
 253      * <p>
 254      * This method is provided for backwards compatibility and should not be used in any new code.
 255      *
 256      * TODO(C++): This method is not yet implemented.
 257      *
 258      * @param input
 259      *            The pattern to convert.
 260      * @param symbols
 261      *            The symbols corresponding to the localized pattern.
 262      * @param toLocalized
 263      *            true to convert from standard to localized notation; false to convert from localized to standard
 264      *            notation.
 265      * @return The pattern expressed in the other notation.
 266      */
 267     static UnicodeString convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols,
 268                                           bool toLocalized, UErrorCode& status);
 269
 270     /**
 271      * This method contains the heart of the logic for rendering LDML affix strings. It handles
 272      * sign-always-shown resolution, whether to use the positive or negative subpattern, permille
 273      * substitution, and plural forms for CurrencyPluralInfo.
 274      */
 275     static void patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix,
 276                                            int8_t signum, UNumberSignDisplay signDisplay,
 277                                            StandardPlural::Form plural, bool perMilleReplacesPercent,
 278                                            UnicodeString& output);
 279
 280   private:
 281     /** @return The number of chars inserted. */
 282     static int escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex,
 283                                    UErrorCode& status);
 284 };
 285
 286 } // namespace impl
 287 } // namespace number
 288 U_NAMESPACE_END
 289
 290
 291 #endif //__NUMBER_PATTERNSTRING_H__
 292
 293 #endif /* #if !UCONFIG_NO_FORMATTING */