]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
3 | * Copyright (c) 1997-2010, International Business Machines Corporation and | |
4 | * others. All Rights Reserved. | |
5 | * Copyright (C) 2010 , Yahoo! Inc. | |
6 | ******************************************************************** | |
7 | * | |
8 | * File SELFMT.CPP | |
9 | * | |
10 | * Modification History: | |
11 | * | |
12 | * Date Name Description | |
13 | * 11/11/09 kirtig Finished first cut of implementation. | |
14 | * 11/16/09 kirtig Improved version | |
15 | ********************************************************************/ | |
16 | ||
17 | #include <typeinfo> // for 'typeid' to work | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | #include "unicode/ustring.h" | |
21 | #include "unicode/ucnv_err.h" | |
22 | #include "unicode/uchar.h" | |
23 | #include "unicode/umsg.h" | |
24 | #include "unicode/rbnf.h" | |
25 | #include "cmemory.h" | |
26 | #include "util.h" | |
27 | #include "uassert.h" | |
28 | #include "ustrfmt.h" | |
29 | #include "uvector.h" | |
30 | ||
31 | #include "unicode/selfmt.h" | |
32 | #include "selfmtimpl.h" | |
33 | ||
34 | #if !UCONFIG_NO_FORMATTING | |
35 | ||
36 | U_NAMESPACE_BEGIN | |
37 | ||
38 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(SelectFormat) | |
39 | ||
40 | #define MAX_KEYWORD_SIZE 30 | |
41 | static const UChar SELECT_KEYWORD_OTHER[] = {LOW_O, LOW_T, LOW_H, LOW_E, LOW_R, 0}; | |
42 | ||
43 | SelectFormat::SelectFormat(const UnicodeString& pat, UErrorCode& status) : parsedValuesHash(NULL) { | |
44 | if (U_FAILURE(status)) { | |
45 | return; | |
46 | } | |
47 | initHashTable(status); | |
48 | applyPattern(pat, status); | |
49 | } | |
50 | ||
51 | SelectFormat::SelectFormat(const SelectFormat& other) : Format(other), parsedValuesHash(NULL) { | |
52 | UErrorCode status = U_ZERO_ERROR; | |
53 | pattern = other.pattern; | |
54 | copyHashtable(other.parsedValuesHash, status); | |
55 | } | |
56 | ||
57 | SelectFormat::~SelectFormat() { | |
58 | cleanHashTable(); | |
59 | } | |
60 | ||
61 | void SelectFormat::initHashTable(UErrorCode &status) { | |
62 | if (U_FAILURE(status)) { | |
63 | return; | |
64 | } | |
65 | // has inited | |
66 | if (parsedValuesHash != NULL) { | |
67 | return; | |
68 | } | |
69 | ||
70 | parsedValuesHash = new Hashtable(TRUE, status); | |
71 | if (U_FAILURE(status)) { | |
72 | cleanHashTable(); | |
73 | return; | |
74 | } else { | |
75 | if (parsedValuesHash == NULL) { | |
76 | status = U_MEMORY_ALLOCATION_ERROR; | |
77 | return; | |
78 | } | |
79 | } | |
80 | // to use hashtable->equals(), must set Value Compartor. | |
81 | parsedValuesHash->setValueComparator(uhash_compareCaselessUnicodeString); | |
82 | } | |
83 | ||
84 | void SelectFormat::cleanHashTable() { | |
85 | if (parsedValuesHash != NULL) { | |
86 | delete parsedValuesHash; | |
87 | parsedValuesHash = NULL; | |
88 | } | |
89 | } | |
90 | ||
91 | void | |
92 | SelectFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { | |
93 | if (U_FAILURE(status)) { | |
94 | return; | |
95 | } | |
96 | ||
97 | pattern = newPattern; | |
98 | enum State{ startState, keywordState, pastKeywordState, phraseState}; | |
99 | ||
100 | //Initialization | |
101 | UnicodeString keyword ; | |
102 | UnicodeString phrase ; | |
103 | UnicodeString* ptrPhrase ; | |
104 | int32_t braceCount = 0; | |
105 | ||
106 | if (parsedValuesHash == NULL) { | |
107 | initHashTable(status); | |
108 | if (U_FAILURE(status)) { | |
109 | return; | |
110 | } | |
111 | } | |
112 | parsedValuesHash->removeAll(); | |
113 | parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString); | |
114 | ||
115 | //Process the state machine | |
116 | State state = startState; | |
117 | for (int32_t i = 0; i < pattern.length(); ++i) { | |
118 | //Get the character and check its type | |
119 | UChar ch = pattern.charAt(i); | |
120 | CharacterClass type = classifyCharacter(ch); | |
121 | ||
122 | //Allow any character in phrase but nowhere else | |
123 | if ( type == tOther ) { | |
124 | if ( state == phraseState ){ | |
125 | phrase += ch; | |
126 | continue; | |
127 | }else { | |
128 | status = U_PATTERN_SYNTAX_ERROR; | |
129 | cleanHashTable(); | |
130 | return; | |
131 | } | |
132 | } | |
133 | ||
134 | //Process the state machine | |
135 | switch (state) { | |
136 | //At the start of pattern | |
137 | case startState: | |
138 | switch (type) { | |
139 | case tSpace: | |
140 | break; | |
141 | case tStartKeyword: | |
142 | state = keywordState; | |
143 | keyword += ch; | |
144 | break; | |
145 | //If anything else is encountered, it's a syntax error | |
146 | default: | |
147 | status = U_PATTERN_SYNTAX_ERROR; | |
148 | cleanHashTable(); | |
149 | return; | |
150 | }//end of switch(type) | |
151 | break; | |
152 | ||
153 | //Handle the keyword state | |
154 | case keywordState: | |
155 | switch (type) { | |
156 | case tSpace: | |
157 | state = pastKeywordState; | |
158 | break; | |
159 | case tStartKeyword: | |
160 | case tContinueKeyword: | |
161 | keyword += ch; | |
162 | break; | |
163 | case tLeftBrace: | |
164 | state = phraseState; | |
165 | break; | |
166 | //If anything else is encountered, it's a syntax error | |
167 | default: | |
168 | status = U_PATTERN_SYNTAX_ERROR; | |
169 | cleanHashTable(); | |
170 | return; | |
171 | }//end of switch(type) | |
172 | break; | |
173 | ||
174 | //Handle the pastkeyword state | |
175 | case pastKeywordState: | |
176 | switch (type) { | |
177 | case tSpace: | |
178 | break; | |
179 | case tLeftBrace: | |
180 | state = phraseState; | |
181 | break; | |
182 | //If anything else is encountered, it's a syntax error | |
183 | default: | |
184 | status = U_PATTERN_SYNTAX_ERROR; | |
185 | cleanHashTable(); | |
186 | return; | |
187 | }//end of switch(type) | |
188 | break; | |
189 | ||
190 | //Handle the phrase state | |
191 | case phraseState: | |
192 | switch (type) { | |
193 | case tLeftBrace: | |
194 | braceCount++; | |
195 | phrase += ch; | |
196 | break; | |
197 | case tRightBrace: | |
198 | //Matching keyword, phrase pair found | |
199 | if (braceCount == 0){ | |
200 | //Check validity of keyword | |
201 | if (parsedValuesHash->get(keyword) != NULL) { | |
202 | status = U_DUPLICATE_KEYWORD; | |
203 | cleanHashTable(); | |
204 | return; | |
205 | } | |
206 | if (keyword.length() == 0) { | |
207 | status = U_PATTERN_SYNTAX_ERROR; | |
208 | cleanHashTable(); | |
209 | return; | |
210 | } | |
211 | ||
212 | //Store the keyword, phrase pair in hashTable | |
213 | ptrPhrase = new UnicodeString(phrase); | |
214 | parsedValuesHash->put( keyword, ptrPhrase, status); | |
215 | ||
216 | //Reinitialize | |
217 | keyword.remove(); | |
218 | phrase.remove(); | |
219 | ptrPhrase = NULL; | |
220 | state = startState; | |
221 | } | |
222 | ||
223 | if (braceCount > 0){ | |
224 | braceCount-- ; | |
225 | phrase += ch; | |
226 | } | |
227 | break; | |
228 | default: | |
229 | phrase += ch; | |
230 | }//end of switch(type) | |
231 | break; | |
232 | ||
233 | //Handle the default case of switch(state) | |
234 | default: | |
235 | status = U_PATTERN_SYNTAX_ERROR; | |
236 | cleanHashTable(); | |
237 | return; | |
238 | ||
239 | }//end of switch(state) | |
240 | } | |
241 | ||
242 | //Check if the state machine is back to startState | |
243 | if ( state != startState){ | |
244 | status = U_PATTERN_SYNTAX_ERROR; | |
245 | cleanHashTable(); | |
246 | return; | |
247 | } | |
248 | ||
249 | //Check if "other" keyword is present | |
250 | if ( !checkSufficientDefinition() ) { | |
251 | status = U_DEFAULT_KEYWORD_MISSING; | |
252 | cleanHashTable(); | |
253 | } | |
254 | return; | |
255 | } | |
256 | ||
257 | UnicodeString& | |
258 | SelectFormat::format(const Formattable& obj, | |
259 | UnicodeString& appendTo, | |
260 | FieldPosition& pos, | |
261 | UErrorCode& status) const | |
262 | { | |
263 | switch (obj.getType()) | |
264 | { | |
265 | case Formattable::kString: | |
266 | return format(obj.getString(), appendTo, pos, status); | |
267 | default: | |
268 | if( U_SUCCESS(status) ){ | |
269 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
270 | } | |
271 | return appendTo; | |
272 | } | |
273 | } | |
274 | ||
275 | UnicodeString& | |
276 | SelectFormat::format(const UnicodeString& keyword, | |
277 | UnicodeString& appendTo, | |
278 | FieldPosition& /*pos */, | |
279 | UErrorCode& status) const { | |
280 | ||
281 | if (U_FAILURE(status)) return appendTo; | |
282 | ||
283 | if (parsedValuesHash == NULL) { | |
284 | status = U_INVALID_FORMAT_ERROR; | |
285 | return appendTo; | |
286 | } | |
287 | ||
288 | //Check for the validity of the keyword | |
289 | if ( !checkValidKeyword(keyword) ){ | |
290 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
291 | return appendTo; | |
292 | } | |
293 | ||
294 | UnicodeString *selectedPattern = (UnicodeString *)parsedValuesHash->get(keyword); | |
295 | if (selectedPattern == NULL) { | |
296 | selectedPattern = (UnicodeString *)parsedValuesHash->get(SELECT_KEYWORD_OTHER); | |
297 | } | |
298 | ||
299 | return appendTo += *selectedPattern; | |
300 | } | |
301 | ||
302 | UnicodeString& | |
303 | SelectFormat::toPattern(UnicodeString& appendTo) { | |
304 | return appendTo += pattern; | |
305 | } | |
306 | ||
307 | SelectFormat::CharacterClass | |
308 | SelectFormat::classifyCharacter(UChar ch) const{ | |
309 | if ((ch >= CAP_A) && (ch <= CAP_Z)) { | |
310 | return tStartKeyword; | |
311 | } | |
312 | if ((ch >= LOW_A) && (ch <= LOW_Z)) { | |
313 | return tStartKeyword; | |
314 | } | |
315 | if ((ch >= U_ZERO) && (ch <= U_NINE)) { | |
316 | return tContinueKeyword; | |
317 | } | |
318 | if ( uprv_isRuleWhiteSpace(ch) ){ | |
319 | return tSpace; | |
320 | } | |
321 | switch (ch) { | |
322 | case LEFTBRACE: | |
323 | return tLeftBrace; | |
324 | case RIGHTBRACE: | |
325 | return tRightBrace; | |
326 | case HYPHEN: | |
327 | case LOWLINE: | |
328 | return tContinueKeyword; | |
329 | default : | |
330 | return tOther; | |
331 | } | |
332 | } | |
333 | ||
334 | UBool | |
335 | SelectFormat::checkSufficientDefinition() { | |
336 | // Check that at least the default rule is defined. | |
337 | return (parsedValuesHash != NULL && | |
338 | parsedValuesHash->get(SELECT_KEYWORD_OTHER) != NULL) ; | |
339 | } | |
340 | ||
341 | UBool | |
342 | SelectFormat::checkValidKeyword(const UnicodeString& argKeyword ) const{ | |
343 | int32_t len = argKeyword.length(); | |
344 | if (len < 1){ | |
345 | return FALSE; | |
346 | } | |
347 | CharacterClass type = classifyCharacter(argKeyword.charAt(0)); | |
348 | if( type != tStartKeyword ){ | |
349 | return FALSE; | |
350 | } | |
351 | ||
352 | for (int32_t i = 0; i < argKeyword.length(); ++i) { | |
353 | type = classifyCharacter(argKeyword.charAt(i)); | |
354 | if( type != tStartKeyword && type != tContinueKeyword ){ | |
355 | return FALSE; | |
356 | } | |
357 | } | |
358 | return TRUE; | |
359 | } | |
360 | ||
361 | Format* SelectFormat::clone() const | |
362 | { | |
363 | return new SelectFormat(*this); | |
364 | } | |
365 | ||
366 | SelectFormat& | |
367 | SelectFormat::operator=(const SelectFormat& other) { | |
368 | if (this != &other) { | |
369 | UErrorCode status = U_ZERO_ERROR; | |
370 | pattern = other.pattern; | |
371 | copyHashtable(other.parsedValuesHash, status); | |
372 | } | |
373 | return *this; | |
374 | } | |
375 | ||
376 | UBool | |
377 | SelectFormat::operator==(const Format& other) const { | |
378 | if( this == &other){ | |
379 | return TRUE; | |
380 | } | |
381 | if (typeid(*this) != typeid(other)) { | |
382 | return FALSE; | |
383 | } | |
384 | SelectFormat* fmt = (SelectFormat*)&other; | |
385 | Hashtable* hashOther = fmt->parsedValuesHash; | |
386 | if ( parsedValuesHash == NULL && hashOther == NULL) | |
387 | return TRUE; | |
388 | if ( parsedValuesHash == NULL || hashOther == NULL) | |
389 | return FALSE; | |
390 | return parsedValuesHash->equals(*hashOther); | |
391 | } | |
392 | ||
393 | UBool | |
394 | SelectFormat::operator!=(const Format& other) const { | |
395 | return !operator==(other); | |
396 | } | |
397 | ||
398 | void | |
399 | SelectFormat::parseObject(const UnicodeString& /*source*/, | |
400 | Formattable& /*result*/, | |
401 | ParsePosition& pos) const | |
402 | { | |
403 | // TODO: not yet supported in icu4j and icu4c | |
404 | pos.setErrorIndex(pos.getIndex()); | |
405 | } | |
406 | ||
407 | void | |
408 | SelectFormat::copyHashtable(Hashtable *other, UErrorCode& status) { | |
409 | if (U_FAILURE(status)) { | |
410 | return; | |
411 | } | |
412 | if (other == NULL) { | |
413 | cleanHashTable(); | |
414 | return; | |
415 | } | |
416 | if (parsedValuesHash == NULL) { | |
417 | initHashTable(status); | |
418 | if (U_FAILURE(status)) { | |
419 | return; | |
420 | } | |
421 | } | |
422 | ||
423 | parsedValuesHash->removeAll(); | |
424 | parsedValuesHash->setValueDeleter(uhash_deleteUnicodeString); | |
425 | ||
426 | int32_t pos = -1; | |
427 | const UHashElement* elem = NULL; | |
428 | ||
429 | // walk through the hash table and create a deep clone | |
430 | while ((elem = other->nextElement(pos)) != NULL){ | |
431 | const UHashTok otherKeyTok = elem->key; | |
432 | UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; | |
433 | const UHashTok otherKeyToVal = elem->value; | |
434 | UnicodeString* otherValue = (UnicodeString*)otherKeyToVal.pointer; | |
435 | parsedValuesHash->put(*otherKey, new UnicodeString(*otherValue), status); | |
436 | if (U_FAILURE(status)){ | |
437 | cleanHashTable(); | |
438 | return; | |
439 | } | |
440 | } | |
441 | } | |
442 | ||
443 | U_NAMESPACE_END | |
444 | ||
445 | #endif /* #if !UCONFIG_NO_FORMATTING */ | |
446 | ||
447 | //eof |