]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
46f4442e A |
3 | /* |
4 | ******************************************************************************* | |
2ca993e8 | 5 | * Copyright (C) 2007-2016, International Business Machines Corporation and |
46f4442e A |
6 | * others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * | |
9 | * File PLURRULE_IMPL.H | |
10 | * | |
11 | ******************************************************************************* | |
12 | */ | |
13 | ||
14 | ||
2ca993e8 A |
15 | #ifndef PLURRULE_IMPL |
16 | #define PLURRULE_IMPL | |
46f4442e | 17 | |
4388f060 A |
18 | // Internal definitions for the PluralRules implementation. |
19 | ||
2ca993e8 A |
20 | #include "unicode/utypes.h" |
21 | ||
46f4442e A |
22 | #if !UCONFIG_NO_FORMATTING |
23 | ||
24 | #include "unicode/format.h" | |
25 | #include "unicode/locid.h" | |
26 | #include "unicode/parseerr.h" | |
f3c0d7a5 | 27 | #include "unicode/strenum.h" |
57a6839d | 28 | #include "unicode/ures.h" |
46f4442e A |
29 | #include "uvector.h" |
30 | #include "hash.h" | |
0f5d89e8 | 31 | #include "uassert.h" |
46f4442e | 32 | |
57a6839d A |
33 | class PluralRulesTest; |
34 | ||
46f4442e A |
35 | U_NAMESPACE_BEGIN |
36 | ||
57a6839d A |
37 | class AndConstraint; |
38 | class RuleChain; | |
2ca993e8 A |
39 | class DigitInterval; |
40 | class PluralRules; | |
41 | class VisibleDigits; | |
57a6839d | 42 | |
3d1f044b A |
43 | namespace pluralimpl { |
44 | ||
45 | // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility. | |
46 | ||
0f5d89e8 A |
47 | static const UChar DOT = ((UChar) 0x002E); |
48 | static const UChar SINGLE_QUOTE = ((UChar) 0x0027); | |
49 | static const UChar SLASH = ((UChar) 0x002F); | |
50 | static const UChar BACKSLASH = ((UChar) 0x005C); | |
51 | static const UChar SPACE = ((UChar) 0x0020); | |
52 | static const UChar EXCLAMATION = ((UChar) 0x0021); | |
53 | static const UChar QUOTATION_MARK = ((UChar) 0x0022); | |
54 | static const UChar NUMBER_SIGN = ((UChar) 0x0023); | |
55 | static const UChar PERCENT_SIGN = ((UChar) 0x0025); | |
56 | static const UChar ASTERISK = ((UChar) 0x002A); | |
57 | static const UChar COMMA = ((UChar) 0x002C); | |
58 | static const UChar HYPHEN = ((UChar) 0x002D); | |
59 | static const UChar U_ZERO = ((UChar) 0x0030); | |
60 | static const UChar U_ONE = ((UChar) 0x0031); | |
61 | static const UChar U_TWO = ((UChar) 0x0032); | |
62 | static const UChar U_THREE = ((UChar) 0x0033); | |
63 | static const UChar U_FOUR = ((UChar) 0x0034); | |
64 | static const UChar U_FIVE = ((UChar) 0x0035); | |
65 | static const UChar U_SIX = ((UChar) 0x0036); | |
66 | static const UChar U_SEVEN = ((UChar) 0x0037); | |
67 | static const UChar U_EIGHT = ((UChar) 0x0038); | |
68 | static const UChar U_NINE = ((UChar) 0x0039); | |
69 | static const UChar COLON = ((UChar) 0x003A); | |
70 | static const UChar SEMI_COLON = ((UChar) 0x003B); | |
71 | static const UChar EQUALS = ((UChar) 0x003D); | |
72 | static const UChar AT = ((UChar) 0x0040); | |
73 | static const UChar CAP_A = ((UChar) 0x0041); | |
74 | static const UChar CAP_B = ((UChar) 0x0042); | |
75 | static const UChar CAP_R = ((UChar) 0x0052); | |
76 | static const UChar CAP_Z = ((UChar) 0x005A); | |
77 | static const UChar LOWLINE = ((UChar) 0x005F); | |
78 | static const UChar LEFTBRACE = ((UChar) 0x007B); | |
79 | static const UChar RIGHTBRACE = ((UChar) 0x007D); | |
80 | static const UChar TILDE = ((UChar) 0x007E); | |
81 | static const UChar ELLIPSIS = ((UChar) 0x2026); | |
82 | ||
83 | static const UChar LOW_A = ((UChar) 0x0061); | |
84 | static const UChar LOW_B = ((UChar) 0x0062); | |
85 | static const UChar LOW_C = ((UChar) 0x0063); | |
86 | static const UChar LOW_D = ((UChar) 0x0064); | |
87 | static const UChar LOW_E = ((UChar) 0x0065); | |
88 | static const UChar LOW_F = ((UChar) 0x0066); | |
89 | static const UChar LOW_G = ((UChar) 0x0067); | |
90 | static const UChar LOW_H = ((UChar) 0x0068); | |
91 | static const UChar LOW_I = ((UChar) 0x0069); | |
92 | static const UChar LOW_J = ((UChar) 0x006a); | |
93 | static const UChar LOW_K = ((UChar) 0x006B); | |
94 | static const UChar LOW_L = ((UChar) 0x006C); | |
95 | static const UChar LOW_M = ((UChar) 0x006D); | |
96 | static const UChar LOW_N = ((UChar) 0x006E); | |
97 | static const UChar LOW_O = ((UChar) 0x006F); | |
98 | static const UChar LOW_P = ((UChar) 0x0070); | |
99 | static const UChar LOW_Q = ((UChar) 0x0071); | |
100 | static const UChar LOW_R = ((UChar) 0x0072); | |
101 | static const UChar LOW_S = ((UChar) 0x0073); | |
102 | static const UChar LOW_T = ((UChar) 0x0074); | |
103 | static const UChar LOW_U = ((UChar) 0x0075); | |
104 | static const UChar LOW_V = ((UChar) 0x0076); | |
105 | static const UChar LOW_W = ((UChar) 0x0077); | |
106 | static const UChar LOW_Y = ((UChar) 0x0079); | |
107 | static const UChar LOW_Z = ((UChar) 0x007A); | |
108 | ||
3d1f044b | 109 | } |
57a6839d A |
110 | |
111 | ||
112 | static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; | |
113 | ||
114 | enum tokenType { | |
46f4442e | 115 | none, |
46f4442e A |
116 | tNumber, |
117 | tComma, | |
118 | tSemiColon, | |
119 | tSpace, | |
120 | tColon, | |
57a6839d | 121 | tAt, // '@' |
46f4442e | 122 | tDot, |
57a6839d A |
123 | tDot2, |
124 | tEllipsis, | |
46f4442e | 125 | tKeyword, |
46f4442e A |
126 | tAnd, |
127 | tOr, | |
57a6839d A |
128 | tMod, // 'mod' or '%' |
129 | tNot, // 'not' only. | |
130 | tIn, // 'in' only. | |
131 | tEqual, // '=' only. | |
132 | tNotEqual, // '!=' | |
133 | tTilde, | |
46f4442e | 134 | tWithin, |
46f4442e | 135 | tIs, |
57a6839d A |
136 | tVariableN, |
137 | tVariableI, | |
138 | tVariableF, | |
139 | tVariableV, | |
140 | tVariableT, | |
141 | tDecimal, | |
142 | tInteger, | |
143 | tEOF | |
144 | }; | |
145 | ||
46f4442e | 146 | |
57a6839d | 147 | class PluralRuleParser: public UMemory { |
46f4442e | 148 | public: |
57a6839d A |
149 | PluralRuleParser(); |
150 | virtual ~PluralRuleParser(); | |
151 | ||
152 | void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); | |
153 | void getNextToken(UErrorCode &status); | |
154 | void checkSyntax(UErrorCode &status); | |
155 | static int32_t getNumberValue(const UnicodeString &token); | |
156 | ||
46f4442e | 157 | private: |
57a6839d A |
158 | static tokenType getKeyType(const UnicodeString& token, tokenType type); |
159 | static tokenType charType(UChar ch); | |
160 | static UBool isValidKeyword(const UnicodeString& token); | |
161 | ||
162 | const UnicodeString *ruleSrc; // The rules string. | |
163 | int32_t ruleIndex; // String index in the input rules, the current parse position. | |
164 | UnicodeString token; // Token most recently scanned. | |
165 | tokenType type; | |
166 | tokenType prevType; | |
167 | ||
168 | // The items currently being parsed & built. | |
169 | // Note: currentChain may not be the last RuleChain in the | |
170 | // list because the "other" chain is forced to the end. | |
171 | AndConstraint *curAndConstraint; | |
172 | RuleChain *currentChain; | |
173 | ||
174 | int32_t rangeLowIdx; // Indices in the UVector of ranges of the | |
175 | int32_t rangeHiIdx; // low and hi values currently being parsed. | |
176 | ||
177 | enum EParseState { | |
178 | kKeyword, | |
179 | kExpr, | |
180 | kValue, | |
181 | kRangeList, | |
182 | kSamples | |
183 | }; | |
57a6839d A |
184 | }; |
185 | ||
0f5d89e8 A |
186 | enum PluralOperand { |
187 | /** | |
188 | * The double value of the entire number. | |
189 | */ | |
190 | PLURAL_OPERAND_N, | |
191 | ||
192 | /** | |
193 | * The integer value, with the fraction digits truncated off. | |
194 | */ | |
195 | PLURAL_OPERAND_I, | |
196 | ||
197 | /** | |
198 | * All visible fraction digits as an integer, including trailing zeros. | |
199 | */ | |
200 | PLURAL_OPERAND_F, | |
201 | ||
202 | /** | |
203 | * Visible fraction digits as an integer, not including trailing zeros. | |
204 | */ | |
205 | PLURAL_OPERAND_T, | |
206 | ||
207 | /** | |
208 | * Number of visible fraction digits. | |
209 | */ | |
210 | PLURAL_OPERAND_V, | |
211 | ||
212 | /** | |
213 | * Number of visible fraction digits, not including trailing zeros. | |
214 | */ | |
215 | PLURAL_OPERAND_W, | |
216 | ||
217 | /** | |
218 | * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. | |
219 | * | |
220 | * <p>Returns the integer value, but will fail if the number has fraction digits. | |
221 | * That is, using "j" instead of "i" is like implicitly adding "v is 0". | |
222 | * | |
223 | * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches | |
224 | * "3" but not "3.1" or "3.0". | |
225 | */ | |
226 | PLURAL_OPERAND_J | |
227 | }; | |
228 | ||
229 | /** | |
230 | * Converts from the tokenType enum to PluralOperand. Asserts that the given | |
231 | * tokenType can be mapped to a PluralOperand. | |
232 | */ | |
233 | PluralOperand tokenTypeToPluralOperand(tokenType tt); | |
234 | ||
235 | /** | |
236 | * An interface to FixedDecimal, allowing for other implementations. | |
237 | * @internal | |
238 | */ | |
239 | class U_I18N_API IFixedDecimal { | |
240 | public: | |
241 | virtual ~IFixedDecimal(); | |
242 | ||
243 | /** | |
244 | * Returns the value corresponding to the specified operand (n, i, f, t, v, or w). | |
245 | * If the operand is 'n', returns a double; otherwise, returns an integer. | |
246 | */ | |
247 | virtual double getPluralOperand(PluralOperand operand) const = 0; | |
248 | ||
249 | virtual bool isNaN() const = 0; | |
250 | ||
251 | virtual bool isInfinite() const = 0; | |
252 | ||
253 | /** Whether the number has no nonzero fraction digits. */ | |
254 | virtual bool hasIntegerValue() const = 0; | |
255 | }; | |
256 | ||
57a6839d A |
257 | /** |
258 | * class FixedDecimal serves to communicate the properties | |
259 | * of a formatted number from a decimal formatter to PluralRules::select() | |
260 | * | |
261 | * see DecimalFormat::getFixedDecimal() | |
262 | * @internal | |
263 | */ | |
0f5d89e8 | 264 | class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { |
57a6839d A |
265 | public: |
266 | /** | |
267 | * @param n the number, e.g. 12.345 | |
268 | * @param v The number of visible fraction digits, e.g. 3 | |
269 | * @param f The fraction digits, e.g. 345 | |
270 | */ | |
271 | FixedDecimal(double n, int32_t v, int64_t f); | |
272 | FixedDecimal(double n, int32_t); | |
273 | explicit FixedDecimal(double n); | |
274 | FixedDecimal(); | |
0f5d89e8 | 275 | ~FixedDecimal() U_OVERRIDE; |
57a6839d A |
276 | FixedDecimal(const UnicodeString &s, UErrorCode &ec); |
277 | FixedDecimal(const FixedDecimal &other); | |
278 | ||
0f5d89e8 A |
279 | double getPluralOperand(PluralOperand operand) const U_OVERRIDE; |
280 | bool isNaN() const U_OVERRIDE; | |
281 | bool isInfinite() const U_OVERRIDE; | |
282 | bool hasIntegerValue() const U_OVERRIDE; | |
283 | ||
284 | bool isNanOrInfinity() const; // used in decimfmtimpl.cpp | |
285 | ||
57a6839d A |
286 | int32_t getVisibleFractionDigitCount() const; |
287 | ||
288 | void init(double n, int32_t v, int64_t f); | |
289 | void init(double n); | |
290 | UBool quickInit(double n); // Try a fast-path only initialization, | |
291 | // return TRUE if successful. | |
292 | void adjustForMinFractionDigits(int32_t min); | |
293 | static int64_t getFractionalDigits(double n, int32_t v); | |
294 | static int32_t decimals(double n); | |
295 | ||
296 | double source; | |
297 | int32_t visibleDecimalDigitCount; | |
298 | int64_t decimalDigits; | |
299 | int64_t decimalDigitsWithoutTrailingZeros; | |
300 | int64_t intValue; | |
0f5d89e8 | 301 | UBool _hasIntegerValue; |
57a6839d | 302 | UBool isNegative; |
0f5d89e8 A |
303 | UBool _isNaN; |
304 | UBool _isInfinite; | |
46f4442e A |
305 | }; |
306 | ||
307 | class AndConstraint : public UMemory { | |
308 | public: | |
309 | typedef enum RuleOp { | |
310 | NONE, | |
311 | MOD | |
312 | } RuleOp; | |
3d1f044b A |
313 | RuleOp op = AndConstraint::NONE; |
314 | int32_t opNum = -1; // for mod expressions, the right operand of the mod. | |
315 | int32_t value = -1; // valid for 'is' rules only. | |
316 | UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. | |
317 | UBool negated = FALSE; // TRUE for negated rules. | |
318 | UBool integerOnly = FALSE; // TRUE for 'within' rules. | |
319 | tokenType digitsType = none; // n | i | v | f constraint. | |
320 | AndConstraint *next = nullptr; | |
321 | // Internal error status, used for errors that occur during the copy constructor. | |
322 | UErrorCode fInternalStatus = U_ZERO_ERROR; | |
323 | ||
324 | AndConstraint() = default; | |
46f4442e A |
325 | AndConstraint(const AndConstraint& other); |
326 | virtual ~AndConstraint(); | |
3d1f044b | 327 | AndConstraint* add(UErrorCode& status); |
57a6839d | 328 | // UBool isFulfilled(double number); |
0f5d89e8 | 329 | UBool isFulfilled(const IFixedDecimal &number); |
46f4442e A |
330 | }; |
331 | ||
332 | class OrConstraint : public UMemory { | |
333 | public: | |
3d1f044b A |
334 | AndConstraint *childNode = nullptr; |
335 | OrConstraint *next = nullptr; | |
336 | // Internal error status, used for errors that occur during the copy constructor. | |
337 | UErrorCode fInternalStatus = U_ZERO_ERROR; | |
4388f060 | 338 | |
3d1f044b | 339 | OrConstraint() = default; |
46f4442e A |
340 | OrConstraint(const OrConstraint& other); |
341 | virtual ~OrConstraint(); | |
3d1f044b | 342 | AndConstraint* add(UErrorCode& status); |
57a6839d | 343 | // UBool isFulfilled(double number); |
0f5d89e8 | 344 | UBool isFulfilled(const IFixedDecimal &number); |
46f4442e A |
345 | }; |
346 | ||
347 | class RuleChain : public UMemory { | |
348 | public: | |
57a6839d | 349 | UnicodeString fKeyword; |
3d1f044b A |
350 | RuleChain *fNext = nullptr; |
351 | OrConstraint *ruleHeader = nullptr; | |
57a6839d A |
352 | UnicodeString fDecimalSamples; // Samples strings from rule source |
353 | UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. | |
3d1f044b A |
354 | UBool fDecimalSamplesUnbounded = FALSE; |
355 | UBool fIntegerSamplesUnbounded = FALSE; | |
356 | // Internal error status, used for errors that occur during the copy constructor. | |
357 | UErrorCode fInternalStatus = U_ZERO_ERROR; | |
57a6839d | 358 | |
3d1f044b | 359 | RuleChain() = default; |
46f4442e | 360 | RuleChain(const RuleChain& other); |
46f4442e | 361 | virtual ~RuleChain(); |
57a6839d | 362 | |
0f5d89e8 | 363 | UnicodeString select(const IFixedDecimal &number) const; |
57a6839d A |
364 | void dumpRules(UnicodeString& result); |
365 | UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; | |
366 | UBool isKeyword(const UnicodeString& keyword) const; | |
46f4442e A |
367 | }; |
368 | ||
369 | class PluralKeywordEnumeration : public StringEnumeration { | |
370 | public: | |
371 | PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); | |
372 | virtual ~PluralKeywordEnumeration(); | |
373 | static UClassID U_EXPORT2 getStaticClassID(void); | |
374 | virtual UClassID getDynamicClassID(void) const; | |
375 | virtual const UnicodeString* snext(UErrorCode& status); | |
376 | virtual void reset(UErrorCode& status); | |
377 | virtual int32_t count(UErrorCode& status) const; | |
378 | private: | |
57a6839d A |
379 | int32_t pos; |
380 | UVector fKeywordNames; | |
381 | }; | |
382 | ||
383 | ||
384 | class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { | |
385 | public: | |
386 | PluralAvailableLocalesEnumeration(UErrorCode &status); | |
387 | virtual ~PluralAvailableLocalesEnumeration(); | |
388 | virtual const char* next(int32_t *resultLength, UErrorCode& status); | |
389 | virtual void reset(UErrorCode& status); | |
390 | virtual int32_t count(UErrorCode& status) const; | |
391 | private: | |
392 | UErrorCode fOpenStatus; | |
3d1f044b A |
393 | UResourceBundle *fLocales = nullptr; |
394 | UResourceBundle *fRes = nullptr; | |
46f4442e A |
395 | }; |
396 | ||
397 | U_NAMESPACE_END | |
398 | ||
399 | #endif /* #if !UCONFIG_NO_FORMATTING */ | |
400 | ||
401 | #endif // _PLURRULE_IMPL | |
402 | //eof |