]>
Commit | Line | Data |
---|---|---|
46f4442e A |
1 | /* |
2 | ******************************************************************************* | |
2ca993e8 | 3 | * Copyright (C) 2007-2016, International Business Machines Corporation and |
46f4442e A |
4 | * others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * | |
7 | * File PLURRULE_IMPL.H | |
8 | * | |
9 | ******************************************************************************* | |
10 | */ | |
11 | ||
12 | ||
2ca993e8 A |
13 | #ifndef PLURRULE_IMPL |
14 | #define PLURRULE_IMPL | |
46f4442e | 15 | |
4388f060 A |
16 | // Internal definitions for the PluralRules implementation. |
17 | ||
2ca993e8 A |
18 | #include "unicode/utypes.h" |
19 | ||
46f4442e A |
20 | #if !UCONFIG_NO_FORMATTING |
21 | ||
22 | #include "unicode/format.h" | |
23 | #include "unicode/locid.h" | |
24 | #include "unicode/parseerr.h" | |
57a6839d | 25 | #include "unicode/ures.h" |
46f4442e A |
26 | #include "uvector.h" |
27 | #include "hash.h" | |
28 | ||
57a6839d A |
29 | class PluralRulesTest; |
30 | ||
46f4442e A |
31 | U_NAMESPACE_BEGIN |
32 | ||
57a6839d A |
33 | class AndConstraint; |
34 | class RuleChain; | |
2ca993e8 A |
35 | class DigitInterval; |
36 | class PluralRules; | |
37 | class VisibleDigits; | |
57a6839d A |
38 | |
39 | static const UChar DOT = ((UChar)0x002E); | |
40 | static const UChar SINGLE_QUOTE = ((UChar)0x0027); | |
41 | static const UChar SLASH = ((UChar)0x002F); | |
42 | static const UChar BACKSLASH = ((UChar)0x005C); | |
43 | static const UChar SPACE = ((UChar)0x0020); | |
44 | static const UChar EXCLAMATION = ((UChar)0x0021); | |
45 | static const UChar QUOTATION_MARK = ((UChar)0x0022); | |
46 | static const UChar NUMBER_SIGN = ((UChar)0x0023); | |
47 | static const UChar PERCENT_SIGN = ((UChar)0x0025); | |
48 | static const UChar ASTERISK = ((UChar)0x002A); | |
49 | static const UChar COMMA = ((UChar)0x002C); | |
50 | static const UChar HYPHEN = ((UChar)0x002D); | |
51 | static const UChar U_ZERO = ((UChar)0x0030); | |
52 | static const UChar U_ONE = ((UChar)0x0031); | |
53 | static const UChar U_TWO = ((UChar)0x0032); | |
54 | static const UChar U_THREE = ((UChar)0x0033); | |
55 | static const UChar U_FOUR = ((UChar)0x0034); | |
56 | static const UChar U_FIVE = ((UChar)0x0035); | |
57 | static const UChar U_SIX = ((UChar)0x0036); | |
58 | static const UChar U_SEVEN = ((UChar)0x0037); | |
59 | static const UChar U_EIGHT = ((UChar)0x0038); | |
60 | static const UChar U_NINE = ((UChar)0x0039); | |
61 | static const UChar COLON = ((UChar)0x003A); | |
62 | static const UChar SEMI_COLON = ((UChar)0x003B); | |
63 | static const UChar EQUALS = ((UChar)0x003D); | |
64 | static const UChar AT = ((UChar)0x0040); | |
65 | static const UChar CAP_A = ((UChar)0x0041); | |
66 | static const UChar CAP_B = ((UChar)0x0042); | |
67 | static const UChar CAP_R = ((UChar)0x0052); | |
68 | static const UChar CAP_Z = ((UChar)0x005A); | |
69 | static const UChar LOWLINE = ((UChar)0x005F); | |
70 | static const UChar LEFTBRACE = ((UChar)0x007B); | |
71 | static const UChar RIGHTBRACE = ((UChar)0x007D); | |
72 | static const UChar TILDE = ((UChar)0x007E); | |
73 | static const UChar ELLIPSIS = ((UChar)0x2026); | |
74 | ||
75 | static const UChar LOW_A = ((UChar)0x0061); | |
76 | static const UChar LOW_B = ((UChar)0x0062); | |
77 | static const UChar LOW_C = ((UChar)0x0063); | |
78 | static const UChar LOW_D = ((UChar)0x0064); | |
79 | static const UChar LOW_E = ((UChar)0x0065); | |
80 | static const UChar LOW_F = ((UChar)0x0066); | |
81 | static const UChar LOW_G = ((UChar)0x0067); | |
82 | static const UChar LOW_H = ((UChar)0x0068); | |
83 | static const UChar LOW_I = ((UChar)0x0069); | |
84 | static const UChar LOW_J = ((UChar)0x006a); | |
85 | static const UChar LOW_K = ((UChar)0x006B); | |
86 | static const UChar LOW_L = ((UChar)0x006C); | |
87 | static const UChar LOW_M = ((UChar)0x006D); | |
88 | static const UChar LOW_N = ((UChar)0x006E); | |
89 | static const UChar LOW_O = ((UChar)0x006F); | |
90 | static const UChar LOW_P = ((UChar)0x0070); | |
91 | static const UChar LOW_Q = ((UChar)0x0071); | |
92 | static const UChar LOW_R = ((UChar)0x0072); | |
93 | static const UChar LOW_S = ((UChar)0x0073); | |
94 | static const UChar LOW_T = ((UChar)0x0074); | |
95 | static const UChar LOW_U = ((UChar)0x0075); | |
96 | static const UChar LOW_V = ((UChar)0x0076); | |
97 | static const UChar LOW_W = ((UChar)0x0077); | |
98 | static const UChar LOW_Y = ((UChar)0x0079); | |
99 | static const UChar LOW_Z = ((UChar)0x007A); | |
100 | ||
101 | ||
102 | static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; | |
103 | ||
104 | enum tokenType { | |
46f4442e | 105 | none, |
46f4442e A |
106 | tNumber, |
107 | tComma, | |
108 | tSemiColon, | |
109 | tSpace, | |
110 | tColon, | |
57a6839d | 111 | tAt, // '@' |
46f4442e | 112 | tDot, |
57a6839d A |
113 | tDot2, |
114 | tEllipsis, | |
46f4442e | 115 | tKeyword, |
46f4442e A |
116 | tAnd, |
117 | tOr, | |
57a6839d A |
118 | tMod, // 'mod' or '%' |
119 | tNot, // 'not' only. | |
120 | tIn, // 'in' only. | |
121 | tEqual, // '=' only. | |
122 | tNotEqual, // '!=' | |
123 | tTilde, | |
46f4442e | 124 | tWithin, |
46f4442e | 125 | tIs, |
57a6839d A |
126 | tVariableN, |
127 | tVariableI, | |
128 | tVariableF, | |
129 | tVariableV, | |
130 | tVariableT, | |
131 | tDecimal, | |
132 | tInteger, | |
133 | tEOF | |
134 | }; | |
135 | ||
46f4442e | 136 | |
57a6839d | 137 | class PluralRuleParser: public UMemory { |
46f4442e | 138 | public: |
57a6839d A |
139 | PluralRuleParser(); |
140 | virtual ~PluralRuleParser(); | |
141 | ||
142 | void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); | |
143 | void getNextToken(UErrorCode &status); | |
144 | void checkSyntax(UErrorCode &status); | |
145 | static int32_t getNumberValue(const UnicodeString &token); | |
146 | ||
46f4442e | 147 | private: |
57a6839d A |
148 | static tokenType getKeyType(const UnicodeString& token, tokenType type); |
149 | static tokenType charType(UChar ch); | |
150 | static UBool isValidKeyword(const UnicodeString& token); | |
151 | ||
152 | const UnicodeString *ruleSrc; // The rules string. | |
153 | int32_t ruleIndex; // String index in the input rules, the current parse position. | |
154 | UnicodeString token; // Token most recently scanned. | |
155 | tokenType type; | |
156 | tokenType prevType; | |
157 | ||
158 | // The items currently being parsed & built. | |
159 | // Note: currentChain may not be the last RuleChain in the | |
160 | // list because the "other" chain is forced to the end. | |
161 | AndConstraint *curAndConstraint; | |
162 | RuleChain *currentChain; | |
163 | ||
164 | int32_t rangeLowIdx; // Indices in the UVector of ranges of the | |
165 | int32_t rangeHiIdx; // low and hi values currently being parsed. | |
166 | ||
167 | enum EParseState { | |
168 | kKeyword, | |
169 | kExpr, | |
170 | kValue, | |
171 | kRangeList, | |
172 | kSamples | |
173 | }; | |
174 | ||
175 | }; | |
176 | ||
177 | /** | |
178 | * class FixedDecimal serves to communicate the properties | |
179 | * of a formatted number from a decimal formatter to PluralRules::select() | |
180 | * | |
181 | * see DecimalFormat::getFixedDecimal() | |
182 | * @internal | |
183 | */ | |
184 | class U_I18N_API FixedDecimal: public UMemory { | |
185 | public: | |
186 | /** | |
187 | * @param n the number, e.g. 12.345 | |
188 | * @param v The number of visible fraction digits, e.g. 3 | |
189 | * @param f The fraction digits, e.g. 345 | |
190 | */ | |
191 | FixedDecimal(double n, int32_t v, int64_t f); | |
192 | FixedDecimal(double n, int32_t); | |
193 | explicit FixedDecimal(double n); | |
2ca993e8 | 194 | explicit FixedDecimal(const VisibleDigits &n); |
57a6839d A |
195 | FixedDecimal(); |
196 | FixedDecimal(const UnicodeString &s, UErrorCode &ec); | |
197 | FixedDecimal(const FixedDecimal &other); | |
198 | ||
199 | double get(tokenType operand) const; | |
200 | int32_t getVisibleFractionDigitCount() const; | |
201 | ||
202 | void init(double n, int32_t v, int64_t f); | |
203 | void init(double n); | |
204 | UBool quickInit(double n); // Try a fast-path only initialization, | |
205 | // return TRUE if successful. | |
206 | void adjustForMinFractionDigits(int32_t min); | |
207 | static int64_t getFractionalDigits(double n, int32_t v); | |
208 | static int32_t decimals(double n); | |
209 | ||
210 | double source; | |
211 | int32_t visibleDecimalDigitCount; | |
212 | int64_t decimalDigits; | |
213 | int64_t decimalDigitsWithoutTrailingZeros; | |
214 | int64_t intValue; | |
215 | UBool hasIntegerValue; | |
216 | UBool isNegative; | |
217 | UBool isNanOrInfinity; | |
46f4442e A |
218 | }; |
219 | ||
220 | class AndConstraint : public UMemory { | |
221 | public: | |
222 | typedef enum RuleOp { | |
223 | NONE, | |
224 | MOD | |
225 | } RuleOp; | |
226 | RuleOp op; | |
57a6839d A |
227 | int32_t opNum; // for mod expressions, the right operand of the mod. |
228 | int32_t value; // valid for 'is' rules only. | |
229 | UVector32 *rangeList; // for 'in', 'within' rules. Null otherwise. | |
230 | UBool negated; // TRUE for negated rules. | |
231 | UBool integerOnly; // TRUE for 'within' rules. | |
232 | tokenType digitsType; // n | i | v | f constraint. | |
46f4442e | 233 | AndConstraint *next; |
4388f060 | 234 | |
46f4442e A |
235 | AndConstraint(); |
236 | AndConstraint(const AndConstraint& other); | |
237 | virtual ~AndConstraint(); | |
238 | AndConstraint* add(); | |
57a6839d A |
239 | // UBool isFulfilled(double number); |
240 | UBool isFulfilled(const FixedDecimal &number); | |
46f4442e A |
241 | }; |
242 | ||
243 | class OrConstraint : public UMemory { | |
244 | public: | |
245 | AndConstraint *childNode; | |
246 | OrConstraint *next; | |
247 | OrConstraint(); | |
4388f060 | 248 | |
46f4442e A |
249 | OrConstraint(const OrConstraint& other); |
250 | virtual ~OrConstraint(); | |
251 | AndConstraint* add(); | |
57a6839d A |
252 | // UBool isFulfilled(double number); |
253 | UBool isFulfilled(const FixedDecimal &number); | |
46f4442e A |
254 | }; |
255 | ||
256 | class RuleChain : public UMemory { | |
257 | public: | |
57a6839d A |
258 | UnicodeString fKeyword; |
259 | RuleChain *fNext; | |
260 | OrConstraint *ruleHeader; | |
261 | UnicodeString fDecimalSamples; // Samples strings from rule source | |
262 | UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. | |
263 | UBool fDecimalSamplesUnbounded; | |
264 | UBool fIntegerSamplesUnbounded; | |
265 | ||
266 | ||
46f4442e A |
267 | RuleChain(); |
268 | RuleChain(const RuleChain& other); | |
46f4442e | 269 | virtual ~RuleChain(); |
57a6839d A |
270 | |
271 | UnicodeString select(const FixedDecimal &number) const; | |
272 | void dumpRules(UnicodeString& result); | |
273 | UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; | |
274 | UBool isKeyword(const UnicodeString& keyword) const; | |
46f4442e A |
275 | }; |
276 | ||
277 | class PluralKeywordEnumeration : public StringEnumeration { | |
278 | public: | |
279 | PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); | |
280 | virtual ~PluralKeywordEnumeration(); | |
281 | static UClassID U_EXPORT2 getStaticClassID(void); | |
282 | virtual UClassID getDynamicClassID(void) const; | |
283 | virtual const UnicodeString* snext(UErrorCode& status); | |
284 | virtual void reset(UErrorCode& status); | |
285 | virtual int32_t count(UErrorCode& status) const; | |
286 | private: | |
57a6839d A |
287 | int32_t pos; |
288 | UVector fKeywordNames; | |
289 | }; | |
290 | ||
291 | ||
292 | class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { | |
293 | public: | |
294 | PluralAvailableLocalesEnumeration(UErrorCode &status); | |
295 | virtual ~PluralAvailableLocalesEnumeration(); | |
296 | virtual const char* next(int32_t *resultLength, UErrorCode& status); | |
297 | virtual void reset(UErrorCode& status); | |
298 | virtual int32_t count(UErrorCode& status) const; | |
299 | private: | |
300 | UErrorCode fOpenStatus; | |
301 | UResourceBundle *fLocales; | |
302 | UResourceBundle *fRes; | |
46f4442e A |
303 | }; |
304 | ||
305 | U_NAMESPACE_END | |
306 | ||
307 | #endif /* #if !UCONFIG_NO_FORMATTING */ | |
308 | ||
309 | #endif // _PLURRULE_IMPL | |
310 | //eof |