]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
46f4442e A |
3 | /* |
4 | ******************************************************************************* | |
2ca993e8 | 5 | * Copyright (C) 2007-2016, International Business Machines Corporation and |
46f4442e A |
6 | * others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * | |
9 | * File PLURRULE_IMPL.H | |
10 | * | |
11 | ******************************************************************************* | |
12 | */ | |
13 | ||
14 | ||
2ca993e8 A |
15 | #ifndef PLURRULE_IMPL |
16 | #define PLURRULE_IMPL | |
46f4442e | 17 | |
4388f060 A |
18 | // Internal definitions for the PluralRules implementation. |
19 | ||
2ca993e8 A |
20 | #include "unicode/utypes.h" |
21 | ||
46f4442e A |
22 | #if !UCONFIG_NO_FORMATTING |
23 | ||
24 | #include "unicode/format.h" | |
25 | #include "unicode/locid.h" | |
26 | #include "unicode/parseerr.h" | |
f3c0d7a5 | 27 | #include "unicode/strenum.h" |
57a6839d | 28 | #include "unicode/ures.h" |
46f4442e A |
29 | #include "uvector.h" |
30 | #include "hash.h" | |
31 | ||
57a6839d A |
32 | class PluralRulesTest; |
33 | ||
46f4442e A |
34 | U_NAMESPACE_BEGIN |
35 | ||
57a6839d A |
36 | class AndConstraint; |
37 | class RuleChain; | |
2ca993e8 A |
38 | class DigitInterval; |
39 | class PluralRules; | |
40 | class VisibleDigits; | |
57a6839d A |
41 | |
42 | static const UChar DOT = ((UChar)0x002E); | |
43 | static const UChar SINGLE_QUOTE = ((UChar)0x0027); | |
44 | static const UChar SLASH = ((UChar)0x002F); | |
45 | static const UChar BACKSLASH = ((UChar)0x005C); | |
46 | static const UChar SPACE = ((UChar)0x0020); | |
47 | static const UChar EXCLAMATION = ((UChar)0x0021); | |
48 | static const UChar QUOTATION_MARK = ((UChar)0x0022); | |
49 | static const UChar NUMBER_SIGN = ((UChar)0x0023); | |
50 | static const UChar PERCENT_SIGN = ((UChar)0x0025); | |
51 | static const UChar ASTERISK = ((UChar)0x002A); | |
52 | static const UChar COMMA = ((UChar)0x002C); | |
53 | static const UChar HYPHEN = ((UChar)0x002D); | |
54 | static const UChar U_ZERO = ((UChar)0x0030); | |
55 | static const UChar U_ONE = ((UChar)0x0031); | |
56 | static const UChar U_TWO = ((UChar)0x0032); | |
57 | static const UChar U_THREE = ((UChar)0x0033); | |
58 | static const UChar U_FOUR = ((UChar)0x0034); | |
59 | static const UChar U_FIVE = ((UChar)0x0035); | |
60 | static const UChar U_SIX = ((UChar)0x0036); | |
61 | static const UChar U_SEVEN = ((UChar)0x0037); | |
62 | static const UChar U_EIGHT = ((UChar)0x0038); | |
63 | static const UChar U_NINE = ((UChar)0x0039); | |
64 | static const UChar COLON = ((UChar)0x003A); | |
65 | static const UChar SEMI_COLON = ((UChar)0x003B); | |
66 | static const UChar EQUALS = ((UChar)0x003D); | |
67 | static const UChar AT = ((UChar)0x0040); | |
68 | static const UChar CAP_A = ((UChar)0x0041); | |
69 | static const UChar CAP_B = ((UChar)0x0042); | |
70 | static const UChar CAP_R = ((UChar)0x0052); | |
71 | static const UChar CAP_Z = ((UChar)0x005A); | |
72 | static const UChar LOWLINE = ((UChar)0x005F); | |
73 | static const UChar LEFTBRACE = ((UChar)0x007B); | |
74 | static const UChar RIGHTBRACE = ((UChar)0x007D); | |
75 | static const UChar TILDE = ((UChar)0x007E); | |
76 | static const UChar ELLIPSIS = ((UChar)0x2026); | |
77 | ||
78 | static const UChar LOW_A = ((UChar)0x0061); | |
79 | static const UChar LOW_B = ((UChar)0x0062); | |
80 | static const UChar LOW_C = ((UChar)0x0063); | |
81 | static const UChar LOW_D = ((UChar)0x0064); | |
82 | static const UChar LOW_E = ((UChar)0x0065); | |
83 | static const UChar LOW_F = ((UChar)0x0066); | |
84 | static const UChar LOW_G = ((UChar)0x0067); | |
85 | static const UChar LOW_H = ((UChar)0x0068); | |
86 | static const UChar LOW_I = ((UChar)0x0069); | |
87 | static const UChar LOW_J = ((UChar)0x006a); | |
88 | static const UChar LOW_K = ((UChar)0x006B); | |
89 | static const UChar LOW_L = ((UChar)0x006C); | |
90 | static const UChar LOW_M = ((UChar)0x006D); | |
91 | static const UChar LOW_N = ((UChar)0x006E); | |
92 | static const UChar LOW_O = ((UChar)0x006F); | |
93 | static const UChar LOW_P = ((UChar)0x0070); | |
94 | static const UChar LOW_Q = ((UChar)0x0071); | |
95 | static const UChar LOW_R = ((UChar)0x0072); | |
96 | static const UChar LOW_S = ((UChar)0x0073); | |
97 | static const UChar LOW_T = ((UChar)0x0074); | |
98 | static const UChar LOW_U = ((UChar)0x0075); | |
99 | static const UChar LOW_V = ((UChar)0x0076); | |
100 | static const UChar LOW_W = ((UChar)0x0077); | |
101 | static const UChar LOW_Y = ((UChar)0x0079); | |
102 | static const UChar LOW_Z = ((UChar)0x007A); | |
103 | ||
104 | ||
105 | static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; | |
106 | ||
107 | enum tokenType { | |
46f4442e | 108 | none, |
46f4442e A |
109 | tNumber, |
110 | tComma, | |
111 | tSemiColon, | |
112 | tSpace, | |
113 | tColon, | |
57a6839d | 114 | tAt, // '@' |
46f4442e | 115 | tDot, |
57a6839d A |
116 | tDot2, |
117 | tEllipsis, | |
46f4442e | 118 | tKeyword, |
46f4442e A |
119 | tAnd, |
120 | tOr, | |
57a6839d A |
121 | tMod, // 'mod' or '%' |
122 | tNot, // 'not' only. | |
123 | tIn, // 'in' only. | |
124 | tEqual, // '=' only. | |
125 | tNotEqual, // '!=' | |
126 | tTilde, | |
46f4442e | 127 | tWithin, |
46f4442e | 128 | tIs, |
57a6839d A |
129 | tVariableN, |
130 | tVariableI, | |
131 | tVariableF, | |
132 | tVariableV, | |
133 | tVariableT, | |
134 | tDecimal, | |
135 | tInteger, | |
136 | tEOF | |
137 | }; | |
138 | ||
46f4442e | 139 | |
57a6839d | 140 | class PluralRuleParser: public UMemory { |
46f4442e | 141 | public: |
57a6839d A |
142 | PluralRuleParser(); |
143 | virtual ~PluralRuleParser(); | |
144 | ||
145 | void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); | |
146 | void getNextToken(UErrorCode &status); | |
147 | void checkSyntax(UErrorCode &status); | |
148 | static int32_t getNumberValue(const UnicodeString &token); | |
149 | ||
46f4442e | 150 | private: |
57a6839d A |
151 | static tokenType getKeyType(const UnicodeString& token, tokenType type); |
152 | static tokenType charType(UChar ch); | |
153 | static UBool isValidKeyword(const UnicodeString& token); | |
154 | ||
155 | const UnicodeString *ruleSrc; // The rules string. | |
156 | int32_t ruleIndex; // String index in the input rules, the current parse position. | |
157 | UnicodeString token; // Token most recently scanned. | |
158 | tokenType type; | |
159 | tokenType prevType; | |
160 | ||
161 | // The items currently being parsed & built. | |
162 | // Note: currentChain may not be the last RuleChain in the | |
163 | // list because the "other" chain is forced to the end. | |
164 | AndConstraint *curAndConstraint; | |
165 | RuleChain *currentChain; | |
166 | ||
167 | int32_t rangeLowIdx; // Indices in the UVector of ranges of the | |
168 | int32_t rangeHiIdx; // low and hi values currently being parsed. | |
169 | ||
170 | enum EParseState { | |
171 | kKeyword, | |
172 | kExpr, | |
173 | kValue, | |
174 | kRangeList, | |
175 | kSamples | |
176 | }; | |
177 | ||
178 | }; | |
179 | ||
180 | /** | |
181 | * class FixedDecimal serves to communicate the properties | |
182 | * of a formatted number from a decimal formatter to PluralRules::select() | |
183 | * | |
184 | * see DecimalFormat::getFixedDecimal() | |
185 | * @internal | |
186 | */ | |
187 | class U_I18N_API FixedDecimal: public UMemory { | |
188 | public: | |
189 | /** | |
190 | * @param n the number, e.g. 12.345 | |
191 | * @param v The number of visible fraction digits, e.g. 3 | |
192 | * @param f The fraction digits, e.g. 345 | |
193 | */ | |
194 | FixedDecimal(double n, int32_t v, int64_t f); | |
195 | FixedDecimal(double n, int32_t); | |
196 | explicit FixedDecimal(double n); | |
2ca993e8 | 197 | explicit FixedDecimal(const VisibleDigits &n); |
57a6839d A |
198 | FixedDecimal(); |
199 | FixedDecimal(const UnicodeString &s, UErrorCode &ec); | |
200 | FixedDecimal(const FixedDecimal &other); | |
201 | ||
202 | double get(tokenType operand) const; | |
203 | int32_t getVisibleFractionDigitCount() const; | |
204 | ||
205 | void init(double n, int32_t v, int64_t f); | |
206 | void init(double n); | |
207 | UBool quickInit(double n); // Try a fast-path only initialization, | |
208 | // return TRUE if successful. | |
209 | void adjustForMinFractionDigits(int32_t min); | |
210 | static int64_t getFractionalDigits(double n, int32_t v); | |
211 | static int32_t decimals(double n); | |
212 | ||
213 | double source; | |
214 | int32_t visibleDecimalDigitCount; | |
215 | int64_t decimalDigits; | |
216 | int64_t decimalDigitsWithoutTrailingZeros; | |
217 | int64_t intValue; | |
218 | UBool hasIntegerValue; | |
219 | UBool isNegative; | |
220 | UBool isNanOrInfinity; | |
46f4442e A |
221 | }; |
222 | ||
223 | class AndConstraint : public UMemory { | |
224 | public: | |
225 | typedef enum RuleOp { | |
226 | NONE, | |
227 | MOD | |
228 | } RuleOp; | |
229 | RuleOp op; | |
57a6839d A |
230 | int32_t opNum; // for mod expressions, the right operand of the mod. |
231 | int32_t value; // valid for 'is' rules only. | |
232 | UVector32 *rangeList; // for 'in', 'within' rules. Null otherwise. | |
233 | UBool negated; // TRUE for negated rules. | |
234 | UBool integerOnly; // TRUE for 'within' rules. | |
235 | tokenType digitsType; // n | i | v | f constraint. | |
46f4442e | 236 | AndConstraint *next; |
4388f060 | 237 | |
46f4442e A |
238 | AndConstraint(); |
239 | AndConstraint(const AndConstraint& other); | |
240 | virtual ~AndConstraint(); | |
241 | AndConstraint* add(); | |
57a6839d A |
242 | // UBool isFulfilled(double number); |
243 | UBool isFulfilled(const FixedDecimal &number); | |
46f4442e A |
244 | }; |
245 | ||
246 | class OrConstraint : public UMemory { | |
247 | public: | |
248 | AndConstraint *childNode; | |
249 | OrConstraint *next; | |
250 | OrConstraint(); | |
4388f060 | 251 | |
46f4442e A |
252 | OrConstraint(const OrConstraint& other); |
253 | virtual ~OrConstraint(); | |
254 | AndConstraint* add(); | |
57a6839d A |
255 | // UBool isFulfilled(double number); |
256 | UBool isFulfilled(const FixedDecimal &number); | |
46f4442e A |
257 | }; |
258 | ||
259 | class RuleChain : public UMemory { | |
260 | public: | |
57a6839d A |
261 | UnicodeString fKeyword; |
262 | RuleChain *fNext; | |
263 | OrConstraint *ruleHeader; | |
264 | UnicodeString fDecimalSamples; // Samples strings from rule source | |
265 | UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. | |
266 | UBool fDecimalSamplesUnbounded; | |
267 | UBool fIntegerSamplesUnbounded; | |
268 | ||
269 | ||
46f4442e A |
270 | RuleChain(); |
271 | RuleChain(const RuleChain& other); | |
46f4442e | 272 | virtual ~RuleChain(); |
57a6839d A |
273 | |
274 | UnicodeString select(const FixedDecimal &number) const; | |
275 | void dumpRules(UnicodeString& result); | |
276 | UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; | |
277 | UBool isKeyword(const UnicodeString& keyword) const; | |
46f4442e A |
278 | }; |
279 | ||
280 | class PluralKeywordEnumeration : public StringEnumeration { | |
281 | public: | |
282 | PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); | |
283 | virtual ~PluralKeywordEnumeration(); | |
284 | static UClassID U_EXPORT2 getStaticClassID(void); | |
285 | virtual UClassID getDynamicClassID(void) const; | |
286 | virtual const UnicodeString* snext(UErrorCode& status); | |
287 | virtual void reset(UErrorCode& status); | |
288 | virtual int32_t count(UErrorCode& status) const; | |
289 | private: | |
57a6839d A |
290 | int32_t pos; |
291 | UVector fKeywordNames; | |
292 | }; | |
293 | ||
294 | ||
295 | class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { | |
296 | public: | |
297 | PluralAvailableLocalesEnumeration(UErrorCode &status); | |
298 | virtual ~PluralAvailableLocalesEnumeration(); | |
299 | virtual const char* next(int32_t *resultLength, UErrorCode& status); | |
300 | virtual void reset(UErrorCode& status); | |
301 | virtual int32_t count(UErrorCode& status) const; | |
302 | private: | |
303 | UErrorCode fOpenStatus; | |
304 | UResourceBundle *fLocales; | |
305 | UResourceBundle *fRes; | |
46f4442e A |
306 | }; |
307 | ||
308 | U_NAMESPACE_END | |
309 | ||
310 | #endif /* #if !UCONFIG_NO_FORMATTING */ | |
311 | ||
312 | #endif // _PLURRULE_IMPL | |
313 | //eof |