]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
3 | * Copyright (c) 1999-2003, International Business Machines Corporation and | |
4 | * others. All Rights Reserved. | |
5 | ********************************************************************/ | |
6 | /************************************************************************ | |
7 | * Date Name Description | |
8 | * 12/14/99 Madhu Creation. | |
9 | * 01/12/2000 Madhu updated for changed API | |
10 | ************************************************************************/ | |
11 | ||
12 | #include "unicode/utypes.h" | |
13 | ||
14 | #if !UCONFIG_NO_BREAK_ITERATION | |
15 | ||
16 | #include "unicode/uchar.h" | |
17 | #include "intltest.h" | |
18 | #include "unicode/rbbi.h" | |
19 | #include "unicode/schriter.h" | |
20 | #include "rbbiapts.h" | |
21 | #include "rbbidata.h" | |
22 | #include "cstring.h" | |
23 | ||
24 | /** | |
25 | * API Test the RuleBasedBreakIterator class | |
26 | */ | |
27 | ||
28 | ||
29 | ||
30 | void RBBIAPITest::TestCloneEquals() | |
31 | { | |
32 | ||
33 | UErrorCode status=U_ZERO_ERROR; | |
34 | RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
35 | RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
36 | RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
37 | RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
38 | if(U_FAILURE(status)){ | |
39 | errln((UnicodeString)"FAIL : in construction"); | |
40 | return; | |
41 | } | |
42 | ||
43 | ||
44 | UnicodeString testString="Testing word break iterators's clone() and equals()"; | |
45 | bi1->setText(testString); | |
46 | bi2->setText(testString); | |
47 | biequal->setText(testString); | |
48 | ||
49 | bi3->setText("hello"); | |
50 | ||
51 | logln((UnicodeString)"Testing equals()"); | |
52 | ||
53 | logln((UnicodeString)"Testing == and !="); | |
54 | UBool b = (*bi1 != *biequal); | |
55 | b |= *bi1 == *bi2; | |
56 | b |= *bi1 == *bi3; | |
57 | if (b) { | |
58 | errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); | |
59 | } | |
60 | ||
61 | if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) | |
62 | errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); | |
63 | ||
64 | ||
65 | // Quick test of RulesBasedBreakIterator assignment - | |
66 | // Check that | |
67 | // two different iterators are != | |
68 | // they are == after assignment | |
69 | // source and dest iterator produce the same next() after assignment. | |
70 | // deleting one doesn't disable the other. | |
71 | logln("Testing assignment"); | |
72 | RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); | |
73 | if(U_FAILURE(status)){ | |
74 | errln((UnicodeString)"FAIL : in construction"); | |
75 | return; | |
76 | } | |
77 | ||
78 | RuleBasedBreakIterator biDefault, biDefault2; | |
79 | if(U_FAILURE(status)){ | |
80 | errln((UnicodeString)"FAIL : in construction of default iterator"); | |
81 | return; | |
82 | } | |
83 | if (biDefault == *bix) { | |
84 | errln((UnicodeString)"ERROR: iterators should not compare =="); | |
85 | return; | |
86 | } | |
87 | if (biDefault != biDefault2) { | |
88 | errln((UnicodeString)"ERROR: iterators should compare =="); | |
89 | return; | |
90 | } | |
91 | ||
92 | ||
93 | UnicodeString HelloString("Hello Kitty"); | |
94 | bix->setText(HelloString); | |
95 | if (*bix == *bi2) { | |
96 | errln(UnicodeString("ERROR: strings should not be equal before assignment.")); | |
97 | } | |
98 | *bix = *bi2; | |
99 | if (*bix != *bi2) { | |
100 | errln(UnicodeString("ERROR: strings should be equal before assignment.")); | |
101 | } | |
102 | ||
103 | int bixnext = bix->next(); | |
104 | int bi2next = bi2->next(); | |
105 | if (! (bixnext == bi2next && bixnext == 7)) { | |
106 | errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); | |
107 | } | |
108 | delete bix; | |
109 | if (bi2->next() != 8) { | |
110 | errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); | |
111 | } | |
112 | ||
113 | ||
114 | ||
115 | logln((UnicodeString)"Testing clone()"); | |
116 | RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); | |
117 | RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); | |
118 | ||
119 | if(*bi1clone != *bi1 || *bi1clone != *biequal || | |
120 | *bi1clone == *bi3 || *bi1clone == *bi2) | |
121 | errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); | |
122 | ||
123 | if(*bi2clone == *bi1 || *bi2clone == *biequal || | |
124 | *bi2clone == *bi3 || *bi2clone != *bi2) | |
125 | errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); | |
126 | ||
127 | if(bi1->getText() != bi1clone->getText() || | |
128 | bi2clone->getText() != bi2->getText() || | |
129 | *bi2clone == *bi1clone ) | |
130 | errln((UnicodeString)"ERROR: RBBI's clone() method failed"); | |
131 | ||
132 | delete bi1clone; | |
133 | delete bi2clone; | |
134 | delete bi1; | |
135 | delete bi3; | |
136 | delete bi2; | |
137 | delete biequal; | |
138 | } | |
139 | ||
140 | void RBBIAPITest::TestBoilerPlate() | |
141 | { | |
142 | UErrorCode status = U_ZERO_ERROR; | |
143 | BreakIterator* a = BreakIterator::createLineInstance(Locale("hi"), status); | |
144 | BreakIterator* b = BreakIterator::createLineInstance(Locale("hi_IN"),status); | |
145 | if(*a!=*b){ | |
146 | errln("Failed: boilerplate method operator!= does not return correct results"); | |
147 | } | |
148 | BreakIterator* c = BreakIterator::createLineInstance(Locale("th"),status); | |
149 | if(*c==*a){ | |
150 | errln("Failed: boilerplate method opertator== does not return correct results"); | |
151 | } | |
152 | delete a; | |
153 | delete b; | |
154 | delete c; | |
155 | } | |
156 | ||
157 | void RBBIAPITest::TestgetRules() | |
158 | { | |
159 | UErrorCode status=U_ZERO_ERROR; | |
160 | ||
161 | RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
162 | RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
163 | if(U_FAILURE(status)){ | |
164 | errln((UnicodeString)"FAIL: in construction"); | |
165 | delete bi1; | |
166 | delete bi2; | |
167 | return; | |
168 | } | |
169 | ||
170 | ||
171 | ||
172 | logln((UnicodeString)"Testing toString()"); | |
173 | ||
174 | bi1->setText((UnicodeString)"Hello there"); | |
175 | ||
176 | RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); | |
177 | ||
178 | UnicodeString temp=bi1->getRules(); | |
179 | UnicodeString temp2=bi2->getRules(); | |
180 | UnicodeString temp3=bi3->getRules(); | |
181 | if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) | |
182 | errln((UnicodeString)"ERROR: error in getRules() method"); | |
183 | ||
184 | delete bi1; | |
185 | delete bi2; | |
186 | delete bi3; | |
187 | } | |
188 | void RBBIAPITest::TestHashCode() | |
189 | { | |
190 | UErrorCode status=U_ZERO_ERROR; | |
191 | RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
192 | RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
193 | RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
194 | if(U_FAILURE(status)){ | |
195 | errln((UnicodeString)"FAIL : in construction"); | |
196 | delete bi1; | |
197 | delete bi2; | |
198 | delete bi3; | |
199 | return; | |
200 | } | |
201 | ||
202 | ||
203 | logln((UnicodeString)"Testing hashCode()"); | |
204 | ||
205 | bi1->setText((UnicodeString)"Hash code"); | |
206 | bi2->setText((UnicodeString)"Hash code"); | |
207 | bi3->setText((UnicodeString)"Hash code"); | |
208 | ||
209 | RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); | |
210 | RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); | |
211 | ||
212 | if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || | |
213 | bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) | |
214 | errln((UnicodeString)"ERROR: identical objects have different hashcodes"); | |
215 | ||
216 | if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || | |
217 | bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) | |
218 | errln((UnicodeString)"ERROR: different objects have same hashcodes"); | |
219 | ||
220 | delete bi1clone; | |
221 | delete bi2clone; | |
222 | delete bi1; | |
223 | delete bi2; | |
224 | delete bi3; | |
225 | ||
226 | } | |
227 | void RBBIAPITest::TestGetSetAdoptText() | |
228 | { | |
229 | logln((UnicodeString)"Testing getText setText "); | |
230 | UErrorCode status=U_ZERO_ERROR; | |
231 | UnicodeString str1="first string."; | |
232 | UnicodeString str2="Second string."; | |
233 | RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
234 | RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
235 | if(U_FAILURE(status)){ | |
236 | errln((UnicodeString)"FAIL : in construction"); | |
237 | return; | |
238 | } | |
239 | ||
240 | ||
241 | CharacterIterator* text1= new StringCharacterIterator(str1); | |
242 | CharacterIterator* text1Clone = text1->clone(); | |
243 | CharacterIterator* text2= new StringCharacterIterator(str2); | |
244 | CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" | |
245 | ||
246 | wordIter1->setText(str1); | |
247 | if(wordIter1->getText() != *text1) | |
248 | errln((UnicodeString)"ERROR:1 error in setText or getText "); | |
249 | if(wordIter1->current() != 0) | |
250 | errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); | |
251 | ||
252 | wordIter1->next(2); | |
253 | ||
254 | wordIter1->setText(str2); | |
255 | if(wordIter1->current() != 0) | |
256 | errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); | |
257 | ||
258 | ||
259 | charIter1->adoptText(text1Clone); | |
260 | if( wordIter1->getText() == charIter1->getText() || | |
261 | wordIter1->getText() != *text2 || charIter1->getText() != *text1 ) | |
262 | errln((UnicodeString)"ERROR:2 error is getText or setText()"); | |
263 | ||
264 | RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone(); | |
265 | rb->adoptText(text1); | |
266 | if(rb->getText() != *text1) | |
267 | errln((UnicodeString)"ERROR:1 error in adoptText "); | |
268 | rb->adoptText(text2); | |
269 | if(rb->getText() != *text2) | |
270 | errln((UnicodeString)"ERROR:2 error in adoptText "); | |
271 | ||
272 | // Adopt where iterator range is less than the entire orignal source string. | |
273 | rb->adoptText(text3); | |
274 | if(rb->preceding(2) != 3) { | |
275 | errln((UnicodeString)"ERROR:3 error in adoptText "); | |
276 | } | |
277 | if(rb->following(11) != BreakIterator::DONE) { | |
278 | errln((UnicodeString)"ERROR:4 error in adoptText "); | |
279 | } | |
280 | ||
281 | delete wordIter1; | |
282 | delete charIter1; | |
283 | delete rb; | |
284 | ||
285 | } | |
286 | ||
287 | ||
288 | void RBBIAPITest::TestIteration() | |
289 | { | |
290 | // This test just verifies that the API is present. | |
291 | // Testing for correct operation of the break rules happens elsewhere. | |
292 | ||
293 | UErrorCode status=U_ZERO_ERROR; | |
294 | RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
295 | if (U_FAILURE(status) || bi == NULL) { | |
296 | errln("Failure creating character break iterator. Status = %s", u_errorName(status)); | |
297 | } | |
298 | delete bi; | |
299 | ||
300 | status=U_ZERO_ERROR; | |
301 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
302 | if (U_FAILURE(status) || bi == NULL) { | |
303 | errln("Failure creating Word break iterator. Status = %s", u_errorName(status)); | |
304 | } | |
305 | delete bi; | |
306 | ||
307 | status=U_ZERO_ERROR; | |
308 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); | |
309 | if (U_FAILURE(status) || bi == NULL) { | |
310 | errln("Failure creating Line break iterator. Status = %s", u_errorName(status)); | |
311 | } | |
312 | delete bi; | |
313 | ||
314 | status=U_ZERO_ERROR; | |
315 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); | |
316 | if (U_FAILURE(status) || bi == NULL) { | |
317 | errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status)); | |
318 | } | |
319 | delete bi; | |
320 | ||
321 | status=U_ZERO_ERROR; | |
322 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); | |
323 | if (U_FAILURE(status) || bi == NULL) { | |
324 | errln("Failure creating Title break iterator. Status = %s", u_errorName(status)); | |
325 | } | |
326 | delete bi; | |
327 | ||
328 | status=U_ZERO_ERROR; | |
329 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
330 | if (U_FAILURE(status) || bi == NULL) { | |
331 | errln("Failure creating character break iterator. Status = %s", u_errorName(status)); | |
332 | return; // Skip the rest of these tests. | |
333 | } | |
334 | ||
335 | ||
336 | UnicodeString testString="0123456789"; | |
337 | bi->setText(testString); | |
338 | ||
339 | int32_t i; | |
340 | i = bi->first(); | |
341 | if (i != 0) { | |
342 | errln("Incorrect value from bi->first(). Expected 0, got %d.", i); | |
343 | } | |
344 | ||
345 | i = bi->last(); | |
346 | if (i != 10) { | |
347 | errln("Incorrect value from bi->last(). Expected 10, got %d", i); | |
348 | } | |
349 | ||
350 | // | |
351 | // Previous | |
352 | // | |
353 | bi->last(); | |
354 | i = bi->previous(); | |
355 | if (i != 9) { | |
356 | errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); | |
357 | } | |
358 | ||
359 | ||
360 | bi->first(); | |
361 | i = bi->previous(); | |
362 | if (i != BreakIterator::DONE) { | |
363 | errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); | |
364 | } | |
365 | ||
366 | // | |
367 | // next() | |
368 | // | |
369 | bi->first(); | |
370 | i = bi->next(); | |
371 | if (i != 1) { | |
372 | errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); | |
373 | } | |
374 | ||
375 | bi->last(); | |
376 | i = bi->next(); | |
377 | if (i != BreakIterator::DONE) { | |
378 | errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); | |
379 | } | |
380 | ||
381 | ||
382 | // | |
383 | // current() | |
384 | // | |
385 | bi->first(); | |
386 | i = bi->current(); | |
387 | if (i != 0) { | |
388 | errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); | |
389 | } | |
390 | ||
391 | bi->next(); | |
392 | i = bi->current(); | |
393 | if (i != 1) { | |
394 | errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); | |
395 | } | |
396 | ||
397 | bi->last(); | |
398 | bi->next(); | |
399 | i = bi->current(); | |
400 | if (i != 10) { | |
401 | errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); | |
402 | } | |
403 | ||
404 | bi->first(); | |
405 | bi->previous(); | |
406 | i = bi->current(); | |
407 | if (i != 0) { | |
408 | errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); | |
409 | } | |
410 | ||
411 | ||
412 | // | |
413 | // Following() | |
414 | // | |
415 | i = bi->following(4); | |
416 | if (i != 5) { | |
417 | errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); | |
418 | } | |
419 | ||
420 | i = bi->following(9); | |
421 | if (i != 10) { | |
422 | errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); | |
423 | } | |
424 | ||
425 | i = bi->following(10); | |
426 | if (i != BreakIterator::DONE) { | |
427 | errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); | |
428 | } | |
429 | ||
430 | ||
431 | // | |
432 | // Preceding | |
433 | // | |
434 | i = bi->preceding(4); | |
435 | if (i != 3) { | |
436 | errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); | |
437 | } | |
438 | ||
439 | i = bi->preceding(10); | |
440 | if (i != 9) { | |
441 | errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); | |
442 | } | |
443 | ||
444 | i = bi->preceding(1); | |
445 | if (i != 0) { | |
446 | errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); | |
447 | } | |
448 | ||
449 | i = bi->preceding(0); | |
450 | if (i != BreakIterator::DONE) { | |
451 | errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); | |
452 | } | |
453 | ||
454 | ||
455 | // | |
456 | // isBoundary() | |
457 | // | |
458 | bi->first(); | |
459 | if (bi->isBoundary(3) != TRUE) { | |
460 | errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); | |
461 | } | |
462 | i = bi->current(); | |
463 | if (i != 3) { | |
464 | errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); | |
465 | } | |
466 | ||
467 | ||
468 | if (bi->isBoundary(11) != FALSE) { | |
469 | errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); | |
470 | } | |
471 | i = bi->current(); | |
472 | if (i != 10) { | |
473 | errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); | |
474 | } | |
475 | ||
476 | // | |
477 | // next(n) | |
478 | // | |
479 | bi->first(); | |
480 | i = bi->next(4); | |
481 | if (i != 4) { | |
482 | errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); | |
483 | } | |
484 | ||
485 | i = bi->next(6); | |
486 | if (i != 10) { | |
487 | errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); | |
488 | } | |
489 | ||
490 | bi->first(); | |
491 | i = bi->next(11); | |
492 | if (i != BreakIterator::DONE) { | |
493 | errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); | |
494 | } | |
495 | ||
496 | delete bi; | |
497 | ||
498 | } | |
499 | ||
500 | ||
501 | ||
502 | ||
503 | ||
504 | ||
505 | void RBBIAPITest::TestBuilder() { | |
506 | UnicodeString rulesString1 = "$Letters = [:L:];\n" | |
507 | "$Numbers = [:N:];\n" | |
508 | "$Letters+;\n" | |
509 | "$Numbers+;\n" | |
510 | "[^$Letters $Numbers];\n" | |
511 | "!.*;\n"; | |
512 | UnicodeString testString1 = "abc123..abc"; | |
513 | // 01234567890 | |
514 | int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; | |
515 | UErrorCode status=U_ZERO_ERROR; | |
516 | UParseError parseError; | |
517 | ||
518 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); | |
519 | if(U_FAILURE(status)) { | |
520 | errln("FAIL : in construction"); | |
521 | } else { | |
522 | bi->setText(testString1); | |
523 | doBoundaryTest(*bi, testString1, bounds1); | |
524 | } | |
525 | delete bi; | |
526 | } | |
527 | ||
528 | ||
529 | // | |
530 | // TestQuoteGrouping | |
531 | // Single quotes within rules imply a grouping, so that a modifier | |
532 | // following the quoted text (* or +) applies to all of the quoted chars. | |
533 | // | |
534 | void RBBIAPITest::TestQuoteGrouping() { | |
535 | UnicodeString rulesString1 = "#Here comes the rule...\n" | |
536 | "'$@!'*;\n" // (\$\@\!)* | |
537 | ".;\n"; | |
538 | ||
539 | UnicodeString testString1 = "$@!$@!X$@!!X"; | |
540 | // 0123456789012 | |
541 | int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; | |
542 | UErrorCode status=U_ZERO_ERROR; | |
543 | UParseError parseError; | |
544 | ||
545 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); | |
546 | if(U_FAILURE(status)) { | |
547 | errln("FAIL : in construction"); | |
548 | } else { | |
549 | bi->setText(testString1); | |
550 | doBoundaryTest(*bi, testString1, bounds1); | |
551 | } | |
552 | delete bi; | |
553 | } | |
554 | ||
555 | // | |
556 | // TestWordStatus | |
557 | // Test word break rule status constants. | |
558 | // | |
559 | void RBBIAPITest::TestWordStatus() { | |
560 | ||
561 | ||
562 | UnicodeString testString1 = // Ideographic Katakana Hiragana | |
563 | CharsToUnicodeString("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094"); | |
564 | // 012345678901234567 8 9 0 1 2 3 4 5 6 | |
565 | int32_t bounds1[] = { 0, 5,6, 10,11, 17,18, 19, 20,21, 23,24, 25, 26}; | |
566 | int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, | |
567 | UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, | |
568 | UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE, | |
569 | UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA}; | |
570 | ||
571 | int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, | |
572 | UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, | |
573 | UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT, | |
574 | UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT}; | |
575 | ||
576 | UErrorCode status=U_ZERO_ERROR; | |
577 | ||
578 | RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getDefault(), status); | |
579 | if(U_FAILURE(status)) { | |
580 | errln("FAIL : in construction"); | |
581 | } else { | |
582 | bi->setText(testString1); | |
583 | // First test that the breaks are in the right spots. | |
584 | doBoundaryTest(*bi, testString1, bounds1); | |
585 | ||
586 | // Then go back and check tag values | |
587 | int32_t i = 0; | |
588 | int32_t pos, tag; | |
589 | for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { | |
590 | if (pos != bounds1[i]) { | |
591 | errln("FAIL: unexpected word break at postion %d", pos); | |
592 | break; | |
593 | } | |
594 | tag = bi->getRuleStatus(); | |
595 | if (tag < tag_lo[i] || tag >= tag_hi[i]) { | |
596 | errln("FAIL: incorrect tag value %d at position %d", tag, pos); | |
597 | break; | |
598 | } | |
599 | } | |
600 | } | |
601 | delete bi; | |
602 | } | |
603 | ||
604 | ||
605 | // | |
606 | // Bug 2190 Regression test. Builder crash on rule consisting of only a | |
607 | // $variable reference | |
608 | void RBBIAPITest::TestBug2190() { | |
609 | UnicodeString rulesString1 = "$aaa = abcd;\n" | |
610 | "$bbb = $aaa;\n" | |
611 | "$bbb;\n"; | |
612 | UnicodeString testString1 = "abcdabcd"; | |
613 | // 01234567890 | |
614 | int32_t bounds1[] = {0, 4, 8}; | |
615 | UErrorCode status=U_ZERO_ERROR; | |
616 | UParseError parseError; | |
617 | ||
618 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); | |
619 | if(U_FAILURE(status)) { | |
620 | errln("FAIL : in construction"); | |
621 | } else { | |
622 | bi->setText(testString1); | |
623 | doBoundaryTest(*bi, testString1, bounds1); | |
624 | } | |
625 | delete bi; | |
626 | } | |
627 | ||
628 | ||
629 | void RBBIAPITest::TestRegistration() { | |
630 | UErrorCode status = U_ZERO_ERROR; | |
631 | BreakIterator* thai_word = BreakIterator::createWordInstance("th_TH", status); | |
632 | ||
633 | // ok to not delete these if we exit because of error? | |
634 | BreakIterator* thai_char = BreakIterator::createCharacterInstance("th_TH", status); | |
635 | BreakIterator* root_word = BreakIterator::createWordInstance("", status); | |
636 | BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); | |
637 | ||
638 | URegistryKey key = BreakIterator::registerInstance(thai_word, "xx", UBRK_WORD, status); | |
639 | { | |
640 | if (*thai_word == *root_word) { | |
641 | errln("thai not different from root"); | |
642 | } | |
643 | } | |
644 | ||
645 | { | |
646 | BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); | |
647 | UBool fail = *result != *thai_word; | |
648 | delete result; | |
649 | if (fail) { | |
650 | errln("bad result for xx_XX/word"); | |
651 | } | |
652 | } | |
653 | ||
654 | { | |
655 | BreakIterator* result = BreakIterator::createCharacterInstance("th_TH", status); | |
656 | UBool fail = *result != *thai_char; | |
657 | delete result; | |
658 | if (fail) { | |
659 | errln("bad result for th_TH/char"); | |
660 | } | |
661 | } | |
662 | ||
663 | { | |
664 | BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); | |
665 | UBool fail = *result != *root_char; | |
666 | delete result; | |
667 | if (fail) { | |
668 | errln("bad result for xx_XX/char"); | |
669 | } | |
670 | } | |
671 | ||
672 | { | |
673 | StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
674 | UBool found = FALSE; | |
675 | const UnicodeString* p; | |
676 | while ((p = avail->snext(status))) { | |
677 | if (p->compare("xx") == 0) { | |
678 | found = TRUE; | |
679 | break; | |
680 | } | |
681 | } | |
682 | delete avail; | |
683 | if (!found) { | |
684 | errln("did not find test locale"); | |
685 | } | |
686 | } | |
687 | ||
688 | { | |
689 | UBool unreg = BreakIterator::unregister(key, status); | |
690 | if (!unreg) { | |
691 | errln("unable to unregister"); | |
692 | } | |
693 | } | |
694 | ||
695 | { | |
696 | BreakIterator* result = BreakIterator::createWordInstance("xx", status); | |
697 | BreakIterator* root = BreakIterator::createWordInstance("", status); | |
698 | UBool fail = *root != *result; | |
699 | delete root; | |
700 | delete result; | |
701 | if (fail) { | |
702 | errln("did not get root break"); | |
703 | } | |
704 | } | |
705 | ||
706 | { | |
707 | StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
708 | UBool found = FALSE; | |
709 | const UnicodeString* p; | |
710 | while ((p = avail->snext(status))) { | |
711 | if (p->compare("xx") == 0) { | |
712 | found = TRUE; | |
713 | break; | |
714 | } | |
715 | } | |
716 | delete avail; | |
717 | if (found) { | |
718 | errln("found test locale"); | |
719 | } | |
720 | } | |
721 | ||
722 | { | |
723 | int32_t count; | |
724 | UBool foundLocale = FALSE; | |
725 | const Locale *avail = BreakIterator::getAvailableLocales(count); | |
726 | for (int i=0; i<count; i++) { | |
727 | if (avail[i] == Locale::getEnglish()) { | |
728 | foundLocale = TRUE; | |
729 | break; | |
730 | } | |
731 | } | |
732 | if (foundLocale == FALSE) { | |
733 | errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); | |
734 | } | |
735 | } | |
736 | ||
737 | ||
738 | // that_word was adopted by factory | |
739 | delete thai_char; | |
740 | delete root_word; | |
741 | delete root_char; | |
742 | } | |
743 | ||
744 | void RBBIAPITest::RoundtripRule(const char *dataFile) { | |
745 | UErrorCode status = U_ZERO_ERROR; | |
746 | UParseError parseError; | |
747 | parseError.line = 0; | |
748 | parseError.offset = 0; | |
749 | UDataMemory *data = udata_open(NULL, "brk", dataFile, &status); | |
750 | uint32_t length; | |
751 | const UChar *builtSource; | |
752 | const uint8_t *rbbiRules; | |
753 | const uint8_t *builtRules; | |
754 | ||
755 | if (U_FAILURE(status)) { | |
756 | errln("Can't open \"%s\"", dataFile); | |
757 | return; | |
758 | } | |
759 | ||
760 | builtRules = (const uint8_t *)udata_getMemory(data); | |
761 | builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); | |
762 | RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); | |
763 | if (U_FAILURE(status)) { | |
764 | errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", | |
765 | u_errorName(status), parseError.line, parseError.offset); | |
766 | return; | |
767 | }; | |
768 | rbbiRules = brkItr->getBinaryRules(length); | |
769 | logln("Comparing \"%s\" len=%d", dataFile, length); | |
770 | if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { | |
771 | errln("Built rules and rebuilt rules are different %s", dataFile); | |
772 | return; | |
773 | } | |
774 | delete brkItr; | |
775 | udata_close(data); | |
776 | } | |
777 | ||
778 | void RBBIAPITest::TestRoundtripRules() { | |
779 | RoundtripRule("word"); | |
780 | RoundtripRule("title"); | |
781 | RoundtripRule("sent"); | |
782 | RoundtripRule("line"); | |
783 | RoundtripRule("char"); | |
784 | if (!quick) { | |
785 | RoundtripRule("word_th"); | |
786 | RoundtripRule("line_th"); | |
787 | } | |
788 | } | |
789 | ||
790 | //--------------------------------------------- | |
791 | // runIndexedTest | |
792 | //--------------------------------------------- | |
793 | ||
794 | void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) | |
795 | { | |
796 | if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); | |
797 | switch (index) { | |
798 | // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; | |
799 | case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; | |
800 | case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; | |
801 | case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; | |
802 | case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; | |
803 | case 4: name = "TestIteration"; if (exec) TestIteration(); break; | |
804 | case 5: name = "extra"; break; /* Extra */ | |
805 | case 6: name = "extra"; break; /* Extra */ | |
806 | case 7: name = "TestBuilder"; if (exec) TestBuilder(); break; | |
807 | case 8: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; | |
808 | case 9: name = "TestWordStatus"; if (exec) TestWordStatus(); break; | |
809 | case 10: name = "TestBug2190"; if (exec) TestBug2190(); break; | |
810 | case 11: name = "TestRegistration"; if (exec) TestRegistration(); break; | |
811 | case 12: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; | |
812 | case 13: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; | |
813 | ||
814 | default: name = ""; break; /*needed to end loop*/ | |
815 | } | |
816 | } | |
817 | ||
818 | //--------------------------------------------- | |
819 | //Internal subroutines | |
820 | //--------------------------------------------- | |
821 | ||
822 | void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){ | |
823 | logln((UnicodeString)"testIsBoundary():"); | |
824 | int32_t p = 0; | |
825 | UBool isB; | |
826 | for (int32_t i = 0; i < text.length(); i++) { | |
827 | isB = bi.isBoundary(i); | |
828 | logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); | |
829 | ||
830 | if (i == boundaries[p]) { | |
831 | if (!isB) | |
832 | errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); | |
833 | p++; | |
834 | } | |
835 | else { | |
836 | if (isB) | |
837 | errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); | |
838 | } | |
839 | } | |
840 | } | |
841 | void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ | |
842 | UnicodeString selected; | |
843 | UnicodeString expected=CharsToUnicodeString(expectedString); | |
844 | ||
845 | if(gotoffset != expectedOffset) | |
846 | errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); | |
847 | if(start <= gotoffset){ | |
848 | testString.extractBetween(start, gotoffset, selected); | |
849 | } | |
850 | else{ | |
851 | testString.extractBetween(gotoffset, start, selected); | |
852 | } | |
853 | if(selected.compare(expected) != 0) | |
854 | errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); | |
855 | else | |
856 | logln(prettify("****selected \"" + selected + "\"")); | |
857 | } | |
858 | ||
859 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |