]>
Commit | Line | Data |
---|---|---|
1 | /******************************************************************** | |
2 | * COPYRIGHT: | |
3 | * Copyright (c) 1999-2004, International Business Machines Corporation and | |
4 | * others. All Rights Reserved. | |
5 | ********************************************************************/ | |
6 | /************************************************************************ | |
7 | * Date Name Description | |
8 | * 12/14/99 Madhu Creation. | |
9 | * 01/12/2000 Madhu updated for changed API | |
10 | ************************************************************************/ | |
11 | ||
12 | #include "unicode/utypes.h" | |
13 | ||
14 | #if !UCONFIG_NO_BREAK_ITERATION | |
15 | ||
16 | #include "unicode/uchar.h" | |
17 | #include "intltest.h" | |
18 | #include "unicode/rbbi.h" | |
19 | #include "unicode/schriter.h" | |
20 | #include "rbbiapts.h" | |
21 | #include "rbbidata.h" | |
22 | #include "cstring.h" | |
23 | #include "unicode/ustring.h" | |
24 | ||
25 | /** | |
26 | * API Test the RuleBasedBreakIterator class | |
27 | */ | |
28 | ||
29 | ||
30 | #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ | |
31 | errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} | |
32 | ||
33 | #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ | |
34 | errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}} | |
35 | ||
36 | void RBBIAPITest::TestCloneEquals() | |
37 | { | |
38 | ||
39 | UErrorCode status=U_ZERO_ERROR; | |
40 | RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
41 | RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
42 | RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
43 | RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
44 | if(U_FAILURE(status)){ | |
45 | errln((UnicodeString)"FAIL : in construction"); | |
46 | return; | |
47 | } | |
48 | ||
49 | ||
50 | UnicodeString testString="Testing word break iterators's clone() and equals()"; | |
51 | bi1->setText(testString); | |
52 | bi2->setText(testString); | |
53 | biequal->setText(testString); | |
54 | ||
55 | bi3->setText("hello"); | |
56 | ||
57 | logln((UnicodeString)"Testing equals()"); | |
58 | ||
59 | logln((UnicodeString)"Testing == and !="); | |
60 | UBool b = (*bi1 != *biequal); | |
61 | b |= *bi1 == *bi2; | |
62 | b |= *bi1 == *bi3; | |
63 | if (b) { | |
64 | errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); | |
65 | } | |
66 | ||
67 | if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) | |
68 | errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); | |
69 | ||
70 | ||
71 | // Quick test of RulesBasedBreakIterator assignment - | |
72 | // Check that | |
73 | // two different iterators are != | |
74 | // they are == after assignment | |
75 | // source and dest iterator produce the same next() after assignment. | |
76 | // deleting one doesn't disable the other. | |
77 | logln("Testing assignment"); | |
78 | RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); | |
79 | if(U_FAILURE(status)){ | |
80 | errln((UnicodeString)"FAIL : in construction"); | |
81 | return; | |
82 | } | |
83 | ||
84 | RuleBasedBreakIterator biDefault, biDefault2; | |
85 | if(U_FAILURE(status)){ | |
86 | errln((UnicodeString)"FAIL : in construction of default iterator"); | |
87 | return; | |
88 | } | |
89 | if (biDefault == *bix) { | |
90 | errln((UnicodeString)"ERROR: iterators should not compare =="); | |
91 | return; | |
92 | } | |
93 | if (biDefault != biDefault2) { | |
94 | errln((UnicodeString)"ERROR: iterators should compare =="); | |
95 | return; | |
96 | } | |
97 | ||
98 | ||
99 | UnicodeString HelloString("Hello Kitty"); | |
100 | bix->setText(HelloString); | |
101 | if (*bix == *bi2) { | |
102 | errln(UnicodeString("ERROR: strings should not be equal before assignment.")); | |
103 | } | |
104 | *bix = *bi2; | |
105 | if (*bix != *bi2) { | |
106 | errln(UnicodeString("ERROR: strings should be equal before assignment.")); | |
107 | } | |
108 | ||
109 | int bixnext = bix->next(); | |
110 | int bi2next = bi2->next(); | |
111 | if (! (bixnext == bi2next && bixnext == 7)) { | |
112 | errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); | |
113 | } | |
114 | delete bix; | |
115 | if (bi2->next() != 8) { | |
116 | errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); | |
117 | } | |
118 | ||
119 | ||
120 | ||
121 | logln((UnicodeString)"Testing clone()"); | |
122 | RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); | |
123 | RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); | |
124 | ||
125 | if(*bi1clone != *bi1 || *bi1clone != *biequal || | |
126 | *bi1clone == *bi3 || *bi1clone == *bi2) | |
127 | errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); | |
128 | ||
129 | if(*bi2clone == *bi1 || *bi2clone == *biequal || | |
130 | *bi2clone == *bi3 || *bi2clone != *bi2) | |
131 | errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); | |
132 | ||
133 | if(bi1->getText() != bi1clone->getText() || | |
134 | bi2clone->getText() != bi2->getText() || | |
135 | *bi2clone == *bi1clone ) | |
136 | errln((UnicodeString)"ERROR: RBBI's clone() method failed"); | |
137 | ||
138 | delete bi1clone; | |
139 | delete bi2clone; | |
140 | delete bi1; | |
141 | delete bi3; | |
142 | delete bi2; | |
143 | delete biequal; | |
144 | } | |
145 | ||
146 | void RBBIAPITest::TestBoilerPlate() | |
147 | { | |
148 | UErrorCode status = U_ZERO_ERROR; | |
149 | BreakIterator* a = BreakIterator::createLineInstance(Locale("hi"), status); | |
150 | BreakIterator* b = BreakIterator::createLineInstance(Locale("hi_IN"),status); | |
151 | if (U_FAILURE(status)) { | |
152 | errln("Creation of break iterator failed %s", u_errorName(status)); | |
153 | return; | |
154 | } | |
155 | if(*a!=*b){ | |
156 | errln("Failed: boilerplate method operator!= does not return correct results"); | |
157 | } | |
158 | BreakIterator* c = BreakIterator::createLineInstance(Locale("th"),status); | |
159 | if(a && c){ | |
160 | if(*c==*a){ | |
161 | errln("Failed: boilerplate method opertator== does not return correct results"); | |
162 | } | |
163 | }else{ | |
164 | errln("creation of break iterator failed"); | |
165 | } | |
166 | delete a; | |
167 | delete b; | |
168 | delete c; | |
169 | } | |
170 | ||
171 | void RBBIAPITest::TestgetRules() | |
172 | { | |
173 | UErrorCode status=U_ZERO_ERROR; | |
174 | ||
175 | RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
176 | RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
177 | if(U_FAILURE(status)){ | |
178 | errln((UnicodeString)"FAIL: in construction"); | |
179 | delete bi1; | |
180 | delete bi2; | |
181 | return; | |
182 | } | |
183 | ||
184 | ||
185 | ||
186 | logln((UnicodeString)"Testing toString()"); | |
187 | ||
188 | bi1->setText((UnicodeString)"Hello there"); | |
189 | ||
190 | RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); | |
191 | ||
192 | UnicodeString temp=bi1->getRules(); | |
193 | UnicodeString temp2=bi2->getRules(); | |
194 | UnicodeString temp3=bi3->getRules(); | |
195 | if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) | |
196 | errln((UnicodeString)"ERROR: error in getRules() method"); | |
197 | ||
198 | delete bi1; | |
199 | delete bi2; | |
200 | delete bi3; | |
201 | } | |
202 | void RBBIAPITest::TestHashCode() | |
203 | { | |
204 | UErrorCode status=U_ZERO_ERROR; | |
205 | RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
206 | RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
207 | RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
208 | if(U_FAILURE(status)){ | |
209 | errln((UnicodeString)"FAIL : in construction"); | |
210 | delete bi1; | |
211 | delete bi2; | |
212 | delete bi3; | |
213 | return; | |
214 | } | |
215 | ||
216 | ||
217 | logln((UnicodeString)"Testing hashCode()"); | |
218 | ||
219 | bi1->setText((UnicodeString)"Hash code"); | |
220 | bi2->setText((UnicodeString)"Hash code"); | |
221 | bi3->setText((UnicodeString)"Hash code"); | |
222 | ||
223 | RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); | |
224 | RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); | |
225 | ||
226 | if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || | |
227 | bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) | |
228 | errln((UnicodeString)"ERROR: identical objects have different hashcodes"); | |
229 | ||
230 | if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || | |
231 | bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) | |
232 | errln((UnicodeString)"ERROR: different objects have same hashcodes"); | |
233 | ||
234 | delete bi1clone; | |
235 | delete bi2clone; | |
236 | delete bi1; | |
237 | delete bi2; | |
238 | delete bi3; | |
239 | ||
240 | } | |
241 | void RBBIAPITest::TestGetSetAdoptText() | |
242 | { | |
243 | logln((UnicodeString)"Testing getText setText "); | |
244 | UErrorCode status=U_ZERO_ERROR; | |
245 | UnicodeString str1="first string."; | |
246 | UnicodeString str2="Second string."; | |
247 | RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
248 | RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
249 | if(U_FAILURE(status)){ | |
250 | errln((UnicodeString)"FAIL : in construction"); | |
251 | return; | |
252 | } | |
253 | ||
254 | ||
255 | CharacterIterator* text1= new StringCharacterIterator(str1); | |
256 | CharacterIterator* text1Clone = text1->clone(); | |
257 | CharacterIterator* text2= new StringCharacterIterator(str2); | |
258 | CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" | |
259 | ||
260 | wordIter1->setText(str1); | |
261 | if(wordIter1->getText() != *text1) | |
262 | errln((UnicodeString)"ERROR:1 error in setText or getText "); | |
263 | if(wordIter1->current() != 0) | |
264 | errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); | |
265 | ||
266 | wordIter1->next(2); | |
267 | ||
268 | wordIter1->setText(str2); | |
269 | if(wordIter1->current() != 0) | |
270 | errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); | |
271 | ||
272 | ||
273 | charIter1->adoptText(text1Clone); | |
274 | if( wordIter1->getText() == charIter1->getText() || | |
275 | wordIter1->getText() != *text2 || charIter1->getText() != *text1 ) | |
276 | errln((UnicodeString)"ERROR:2 error is getText or setText()"); | |
277 | ||
278 | RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone(); | |
279 | rb->adoptText(text1); | |
280 | if(rb->getText() != *text1) | |
281 | errln((UnicodeString)"ERROR:1 error in adoptText "); | |
282 | rb->adoptText(text2); | |
283 | if(rb->getText() != *text2) | |
284 | errln((UnicodeString)"ERROR:2 error in adoptText "); | |
285 | ||
286 | // Adopt where iterator range is less than the entire orignal source string. | |
287 | rb->adoptText(text3); | |
288 | if(rb->preceding(2) != 3) { | |
289 | errln((UnicodeString)"ERROR:3 error in adoptText "); | |
290 | } | |
291 | if(rb->following(11) != BreakIterator::DONE) { | |
292 | errln((UnicodeString)"ERROR:4 error in adoptText "); | |
293 | } | |
294 | ||
295 | delete wordIter1; | |
296 | delete charIter1; | |
297 | delete rb; | |
298 | ||
299 | } | |
300 | ||
301 | ||
302 | void RBBIAPITest::TestIteration() | |
303 | { | |
304 | // This test just verifies that the API is present. | |
305 | // Testing for correct operation of the break rules happens elsewhere. | |
306 | ||
307 | UErrorCode status=U_ZERO_ERROR; | |
308 | RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
309 | if (U_FAILURE(status) || bi == NULL) { | |
310 | errln("Failure creating character break iterator. Status = %s", u_errorName(status)); | |
311 | } | |
312 | delete bi; | |
313 | ||
314 | status=U_ZERO_ERROR; | |
315 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
316 | if (U_FAILURE(status) || bi == NULL) { | |
317 | errln("Failure creating Word break iterator. Status = %s", u_errorName(status)); | |
318 | } | |
319 | delete bi; | |
320 | ||
321 | status=U_ZERO_ERROR; | |
322 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); | |
323 | if (U_FAILURE(status) || bi == NULL) { | |
324 | errln("Failure creating Line break iterator. Status = %s", u_errorName(status)); | |
325 | } | |
326 | delete bi; | |
327 | ||
328 | status=U_ZERO_ERROR; | |
329 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); | |
330 | if (U_FAILURE(status) || bi == NULL) { | |
331 | errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status)); | |
332 | } | |
333 | delete bi; | |
334 | ||
335 | status=U_ZERO_ERROR; | |
336 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); | |
337 | if (U_FAILURE(status) || bi == NULL) { | |
338 | errln("Failure creating Title break iterator. Status = %s", u_errorName(status)); | |
339 | } | |
340 | delete bi; | |
341 | ||
342 | status=U_ZERO_ERROR; | |
343 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
344 | if (U_FAILURE(status) || bi == NULL) { | |
345 | errln("Failure creating character break iterator. Status = %s", u_errorName(status)); | |
346 | return; // Skip the rest of these tests. | |
347 | } | |
348 | ||
349 | ||
350 | UnicodeString testString="0123456789"; | |
351 | bi->setText(testString); | |
352 | ||
353 | int32_t i; | |
354 | i = bi->first(); | |
355 | if (i != 0) { | |
356 | errln("Incorrect value from bi->first(). Expected 0, got %d.", i); | |
357 | } | |
358 | ||
359 | i = bi->last(); | |
360 | if (i != 10) { | |
361 | errln("Incorrect value from bi->last(). Expected 10, got %d", i); | |
362 | } | |
363 | ||
364 | // | |
365 | // Previous | |
366 | // | |
367 | bi->last(); | |
368 | i = bi->previous(); | |
369 | if (i != 9) { | |
370 | errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); | |
371 | } | |
372 | ||
373 | ||
374 | bi->first(); | |
375 | i = bi->previous(); | |
376 | if (i != BreakIterator::DONE) { | |
377 | errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); | |
378 | } | |
379 | ||
380 | // | |
381 | // next() | |
382 | // | |
383 | bi->first(); | |
384 | i = bi->next(); | |
385 | if (i != 1) { | |
386 | errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); | |
387 | } | |
388 | ||
389 | bi->last(); | |
390 | i = bi->next(); | |
391 | if (i != BreakIterator::DONE) { | |
392 | errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); | |
393 | } | |
394 | ||
395 | ||
396 | // | |
397 | // current() | |
398 | // | |
399 | bi->first(); | |
400 | i = bi->current(); | |
401 | if (i != 0) { | |
402 | errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); | |
403 | } | |
404 | ||
405 | bi->next(); | |
406 | i = bi->current(); | |
407 | if (i != 1) { | |
408 | errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); | |
409 | } | |
410 | ||
411 | bi->last(); | |
412 | bi->next(); | |
413 | i = bi->current(); | |
414 | if (i != 10) { | |
415 | errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); | |
416 | } | |
417 | ||
418 | bi->first(); | |
419 | bi->previous(); | |
420 | i = bi->current(); | |
421 | if (i != 0) { | |
422 | errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); | |
423 | } | |
424 | ||
425 | ||
426 | // | |
427 | // Following() | |
428 | // | |
429 | i = bi->following(4); | |
430 | if (i != 5) { | |
431 | errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); | |
432 | } | |
433 | ||
434 | i = bi->following(9); | |
435 | if (i != 10) { | |
436 | errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); | |
437 | } | |
438 | ||
439 | i = bi->following(10); | |
440 | if (i != BreakIterator::DONE) { | |
441 | errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); | |
442 | } | |
443 | ||
444 | ||
445 | // | |
446 | // Preceding | |
447 | // | |
448 | i = bi->preceding(4); | |
449 | if (i != 3) { | |
450 | errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); | |
451 | } | |
452 | ||
453 | i = bi->preceding(10); | |
454 | if (i != 9) { | |
455 | errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); | |
456 | } | |
457 | ||
458 | i = bi->preceding(1); | |
459 | if (i != 0) { | |
460 | errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); | |
461 | } | |
462 | ||
463 | i = bi->preceding(0); | |
464 | if (i != BreakIterator::DONE) { | |
465 | errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); | |
466 | } | |
467 | ||
468 | ||
469 | // | |
470 | // isBoundary() | |
471 | // | |
472 | bi->first(); | |
473 | if (bi->isBoundary(3) != TRUE) { | |
474 | errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); | |
475 | } | |
476 | i = bi->current(); | |
477 | if (i != 3) { | |
478 | errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); | |
479 | } | |
480 | ||
481 | ||
482 | if (bi->isBoundary(11) != FALSE) { | |
483 | errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); | |
484 | } | |
485 | i = bi->current(); | |
486 | if (i != 10) { | |
487 | errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); | |
488 | } | |
489 | ||
490 | // | |
491 | // next(n) | |
492 | // | |
493 | bi->first(); | |
494 | i = bi->next(4); | |
495 | if (i != 4) { | |
496 | errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); | |
497 | } | |
498 | ||
499 | i = bi->next(6); | |
500 | if (i != 10) { | |
501 | errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); | |
502 | } | |
503 | ||
504 | bi->first(); | |
505 | i = bi->next(11); | |
506 | if (i != BreakIterator::DONE) { | |
507 | errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); | |
508 | } | |
509 | ||
510 | delete bi; | |
511 | ||
512 | } | |
513 | ||
514 | ||
515 | ||
516 | ||
517 | ||
518 | ||
519 | void RBBIAPITest::TestBuilder() { | |
520 | UnicodeString rulesString1 = "$Letters = [:L:];\n" | |
521 | "$Numbers = [:N:];\n" | |
522 | "$Letters+;\n" | |
523 | "$Numbers+;\n" | |
524 | "[^$Letters $Numbers];\n" | |
525 | "!.*;\n"; | |
526 | UnicodeString testString1 = "abc123..abc"; | |
527 | // 01234567890 | |
528 | int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; | |
529 | UErrorCode status=U_ZERO_ERROR; | |
530 | UParseError parseError; | |
531 | ||
532 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); | |
533 | if(U_FAILURE(status)) { | |
534 | errln("FAIL : in construction"); | |
535 | } else { | |
536 | bi->setText(testString1); | |
537 | doBoundaryTest(*bi, testString1, bounds1); | |
538 | } | |
539 | delete bi; | |
540 | } | |
541 | ||
542 | ||
543 | // | |
544 | // TestQuoteGrouping | |
545 | // Single quotes within rules imply a grouping, so that a modifier | |
546 | // following the quoted text (* or +) applies to all of the quoted chars. | |
547 | // | |
548 | void RBBIAPITest::TestQuoteGrouping() { | |
549 | UnicodeString rulesString1 = "#Here comes the rule...\n" | |
550 | "'$@!'*;\n" // (\$\@\!)* | |
551 | ".;\n"; | |
552 | ||
553 | UnicodeString testString1 = "$@!$@!X$@!!X"; | |
554 | // 0123456789012 | |
555 | int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; | |
556 | UErrorCode status=U_ZERO_ERROR; | |
557 | UParseError parseError; | |
558 | ||
559 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); | |
560 | if(U_FAILURE(status)) { | |
561 | errln("FAIL : in construction"); | |
562 | } else { | |
563 | bi->setText(testString1); | |
564 | doBoundaryTest(*bi, testString1, bounds1); | |
565 | } | |
566 | delete bi; | |
567 | } | |
568 | ||
569 | // | |
570 | // TestRuleStatus | |
571 | // Test word break rule status constants. | |
572 | // | |
573 | void RBBIAPITest::TestRuleStatus() { | |
574 | UChar str[30]; | |
575 | u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094", | |
576 | // 012345678901234567 8 9 0 1 2 3 4 5 6 | |
577 | // Ideographic Katakana Hiragana | |
578 | str, 30); | |
579 | UnicodeString testString1(str); | |
580 | int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26}; | |
581 | int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, | |
582 | UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, | |
583 | UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE, | |
584 | UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA}; | |
585 | ||
586 | int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, | |
587 | UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, | |
588 | UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT, | |
589 | UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT}; | |
590 | ||
591 | UErrorCode status=U_ZERO_ERROR; | |
592 | ||
593 | RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getDefault(), status); | |
594 | if(U_FAILURE(status)) { | |
595 | errln("FAIL : in construction"); | |
596 | } else { | |
597 | bi->setText(testString1); | |
598 | // First test that the breaks are in the right spots. | |
599 | doBoundaryTest(*bi, testString1, bounds1); | |
600 | ||
601 | // Then go back and check tag values | |
602 | int32_t i = 0; | |
603 | int32_t pos, tag; | |
604 | for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { | |
605 | if (pos != bounds1[i]) { | |
606 | errln("FAIL: unexpected word break at postion %d", pos); | |
607 | break; | |
608 | } | |
609 | tag = bi->getRuleStatus(); | |
610 | if (tag < tag_lo[i] || tag >= tag_hi[i]) { | |
611 | errln("FAIL: incorrect tag value %d at position %d", tag, pos); | |
612 | break; | |
613 | } | |
614 | ||
615 | // Check that we get the same tag values from getRuleStatusVec() | |
616 | int32_t vec[10]; | |
617 | int t = bi->getRuleStatusVec(vec, 10, status); | |
618 | TEST_ASSERT_SUCCESS(status); | |
619 | TEST_ASSERT(t==1); | |
620 | TEST_ASSERT(vec[0] == tag); | |
621 | } | |
622 | } | |
623 | delete bi; | |
624 | ||
625 | // Now test line break status. This test mostly is to confirm that the status constants | |
626 | // are correctly declared in the header. | |
627 | testString1 = "test line. \n"; | |
628 | // break type s s h | |
629 | ||
630 | bi = (RuleBasedBreakIterator *) | |
631 | BreakIterator::createLineInstance(Locale::getEnglish(), status); | |
632 | if(U_FAILURE(status)) { | |
633 | errln("failed to create word break iterator."); | |
634 | } else { | |
635 | int32_t i = 0; | |
636 | int32_t pos, tag; | |
637 | UBool success; | |
638 | ||
639 | bi->setText(testString1); | |
640 | pos = bi->current(); | |
641 | tag = bi->getRuleStatus(); | |
642 | for (i=0; i<3; i++) { | |
643 | switch (i) { | |
644 | case 0: | |
645 | success = pos==0 && tag==UBRK_LINE_SOFT; break; | |
646 | case 1: | |
647 | success = pos==5 && tag==UBRK_LINE_SOFT; break; | |
648 | case 2: | |
649 | success = pos==12 && tag==UBRK_LINE_HARD; break; | |
650 | default: | |
651 | success = FALSE; break; | |
652 | } | |
653 | if (success == FALSE) { | |
654 | errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", | |
655 | i, pos, tag); | |
656 | break; | |
657 | } | |
658 | pos = bi->next(); | |
659 | tag = bi->getRuleStatus(); | |
660 | } | |
661 | if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || | |
662 | UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || | |
663 | UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) { | |
664 | errln("UBRK_LINE_* constants from header are inconsistent."); | |
665 | } | |
666 | } | |
667 | delete bi; | |
668 | ||
669 | } | |
670 | ||
671 | ||
672 | // | |
673 | // TestRuleStatusVec | |
674 | // Test the vector form of break rule status. | |
675 | // | |
676 | void RBBIAPITest::TestRuleStatusVec() { | |
677 | UnicodeString rulesString = "[A-N]{100}; \n" | |
678 | "[a-w]{200}; \n" | |
679 | "[\\p{L}]{300}; \n" | |
680 | "[\\p{N}]{400}; \n" | |
681 | "[0-5]{500}; \n" | |
682 | "!.*;\n"; | |
683 | UnicodeString testString1 = "Aapz5?"; | |
684 | int32_t statusVals[10]; | |
685 | int32_t numStatuses; | |
686 | int32_t pos; | |
687 | ||
688 | UErrorCode status=U_ZERO_ERROR; | |
689 | UParseError parseError; | |
690 | ||
691 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); | |
692 | TEST_ASSERT_SUCCESS(status); | |
693 | if (U_SUCCESS(status)) { | |
694 | bi->setText(testString1); | |
695 | ||
696 | // A | |
697 | pos = bi->next(); | |
698 | TEST_ASSERT(pos==1); | |
699 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
700 | TEST_ASSERT_SUCCESS(status); | |
701 | TEST_ASSERT(numStatuses == 2); | |
702 | TEST_ASSERT(statusVals[0] == 100); | |
703 | TEST_ASSERT(statusVals[1] == 300); | |
704 | ||
705 | // a | |
706 | pos = bi->next(); | |
707 | TEST_ASSERT(pos==2); | |
708 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
709 | TEST_ASSERT_SUCCESS(status); | |
710 | TEST_ASSERT(numStatuses == 2); | |
711 | TEST_ASSERT(statusVals[0] == 200); | |
712 | TEST_ASSERT(statusVals[1] == 300); | |
713 | ||
714 | // p | |
715 | pos = bi->next(); | |
716 | TEST_ASSERT(pos==3); | |
717 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
718 | TEST_ASSERT_SUCCESS(status); | |
719 | TEST_ASSERT(numStatuses == 2); | |
720 | TEST_ASSERT(statusVals[0] == 200); | |
721 | TEST_ASSERT(statusVals[1] == 300); | |
722 | ||
723 | // z | |
724 | pos = bi->next(); | |
725 | TEST_ASSERT(pos==4); | |
726 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
727 | TEST_ASSERT_SUCCESS(status); | |
728 | TEST_ASSERT(numStatuses == 1); | |
729 | TEST_ASSERT(statusVals[0] == 300); | |
730 | ||
731 | // 5 | |
732 | pos = bi->next(); | |
733 | TEST_ASSERT(pos==5); | |
734 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
735 | TEST_ASSERT_SUCCESS(status); | |
736 | TEST_ASSERT(numStatuses == 2); | |
737 | TEST_ASSERT(statusVals[0] == 400); | |
738 | TEST_ASSERT(statusVals[1] == 500); | |
739 | ||
740 | // ? | |
741 | pos = bi->next(); | |
742 | TEST_ASSERT(pos==6); | |
743 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
744 | TEST_ASSERT_SUCCESS(status); | |
745 | TEST_ASSERT(numStatuses == 1); | |
746 | TEST_ASSERT(statusVals[0] == 0); | |
747 | ||
748 | // | |
749 | // Check buffer overflow error handling. Char == A | |
750 | // | |
751 | bi->first(); | |
752 | pos = bi->next(); | |
753 | TEST_ASSERT(pos==1); | |
754 | memset(statusVals, -1, sizeof(statusVals)); | |
755 | numStatuses = bi->getRuleStatusVec(statusVals, 0, status); | |
756 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
757 | TEST_ASSERT(numStatuses == 2); | |
758 | TEST_ASSERT(statusVals[0] == -1); | |
759 | ||
760 | status = U_ZERO_ERROR; | |
761 | memset(statusVals, -1, sizeof(statusVals)); | |
762 | numStatuses = bi->getRuleStatusVec(statusVals, 1, status); | |
763 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
764 | TEST_ASSERT(numStatuses == 2); | |
765 | TEST_ASSERT(statusVals[0] == 100); | |
766 | TEST_ASSERT(statusVals[1] == -1); | |
767 | ||
768 | status = U_ZERO_ERROR; | |
769 | memset(statusVals, -1, sizeof(statusVals)); | |
770 | numStatuses = bi->getRuleStatusVec(statusVals, 2, status); | |
771 | TEST_ASSERT_SUCCESS(status); | |
772 | TEST_ASSERT(numStatuses == 2); | |
773 | TEST_ASSERT(statusVals[0] == 100); | |
774 | TEST_ASSERT(statusVals[1] == 300); | |
775 | TEST_ASSERT(statusVals[2] == -1); | |
776 | } | |
777 | delete bi; | |
778 | ||
779 | } | |
780 | ||
781 | // | |
782 | // Bug 2190 Regression test. Builder crash on rule consisting of only a | |
783 | // $variable reference | |
784 | void RBBIAPITest::TestBug2190() { | |
785 | UnicodeString rulesString1 = "$aaa = abcd;\n" | |
786 | "$bbb = $aaa;\n" | |
787 | "$bbb;\n"; | |
788 | UnicodeString testString1 = "abcdabcd"; | |
789 | // 01234567890 | |
790 | int32_t bounds1[] = {0, 4, 8}; | |
791 | UErrorCode status=U_ZERO_ERROR; | |
792 | UParseError parseError; | |
793 | ||
794 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); | |
795 | if(U_FAILURE(status)) { | |
796 | errln("FAIL : in construction"); | |
797 | } else { | |
798 | bi->setText(testString1); | |
799 | doBoundaryTest(*bi, testString1, bounds1); | |
800 | } | |
801 | delete bi; | |
802 | } | |
803 | ||
804 | ||
805 | void RBBIAPITest::TestRegistration() { | |
806 | #if !UCONFIG_NO_SERVICE | |
807 | UErrorCode status = U_ZERO_ERROR; | |
808 | BreakIterator* thai_word = BreakIterator::createWordInstance("th_TH", status); | |
809 | ||
810 | // ok to not delete these if we exit because of error? | |
811 | BreakIterator* thai_char = BreakIterator::createCharacterInstance("th_TH", status); | |
812 | BreakIterator* root_word = BreakIterator::createWordInstance("", status); | |
813 | BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); | |
814 | ||
815 | URegistryKey key = BreakIterator::registerInstance(thai_word, "xx", UBRK_WORD, status); | |
816 | { | |
817 | if (thai_word && *thai_word == *root_word) { | |
818 | errln("thai not different from root"); | |
819 | } | |
820 | } | |
821 | ||
822 | { | |
823 | BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); | |
824 | UBool fail = TRUE; | |
825 | if(result){ | |
826 | fail = *result != *thai_word; | |
827 | } | |
828 | delete result; | |
829 | if (fail) { | |
830 | errln("bad result for xx_XX/word"); | |
831 | } | |
832 | } | |
833 | ||
834 | { | |
835 | BreakIterator* result = BreakIterator::createCharacterInstance("th_TH", status); | |
836 | UBool fail = TRUE; | |
837 | if(result){ | |
838 | fail = *result != *thai_char; | |
839 | } | |
840 | delete result; | |
841 | if (fail) { | |
842 | errln("bad result for th_TH/char"); | |
843 | } | |
844 | } | |
845 | ||
846 | { | |
847 | BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); | |
848 | UBool fail = TRUE; | |
849 | if(result){ | |
850 | fail = *result != *root_char; | |
851 | } | |
852 | delete result; | |
853 | if (fail) { | |
854 | errln("bad result for xx_XX/char"); | |
855 | } | |
856 | } | |
857 | ||
858 | { | |
859 | StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
860 | UBool found = FALSE; | |
861 | const UnicodeString* p; | |
862 | while ((p = avail->snext(status))) { | |
863 | if (p->compare("xx") == 0) { | |
864 | found = TRUE; | |
865 | break; | |
866 | } | |
867 | } | |
868 | delete avail; | |
869 | if (!found) { | |
870 | errln("did not find test locale"); | |
871 | } | |
872 | } | |
873 | ||
874 | { | |
875 | UBool unreg = BreakIterator::unregister(key, status); | |
876 | if (!unreg) { | |
877 | errln("unable to unregister"); | |
878 | } | |
879 | } | |
880 | ||
881 | { | |
882 | BreakIterator* result = BreakIterator::createWordInstance("xx", status); | |
883 | BreakIterator* root = BreakIterator::createWordInstance("", status); | |
884 | UBool fail = TRUE; | |
885 | if(root){ | |
886 | fail = *root != *result; | |
887 | } | |
888 | delete root; | |
889 | delete result; | |
890 | if (fail) { | |
891 | errln("did not get root break"); | |
892 | } | |
893 | } | |
894 | ||
895 | { | |
896 | StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
897 | UBool found = FALSE; | |
898 | const UnicodeString* p; | |
899 | while ((p = avail->snext(status))) { | |
900 | if (p->compare("xx") == 0) { | |
901 | found = TRUE; | |
902 | break; | |
903 | } | |
904 | } | |
905 | delete avail; | |
906 | if (found) { | |
907 | errln("found test locale"); | |
908 | } | |
909 | } | |
910 | ||
911 | { | |
912 | int32_t count; | |
913 | UBool foundLocale = FALSE; | |
914 | const Locale *avail = BreakIterator::getAvailableLocales(count); | |
915 | for (int i=0; i<count; i++) { | |
916 | if (avail[i] == Locale::getEnglish()) { | |
917 | foundLocale = TRUE; | |
918 | break; | |
919 | } | |
920 | } | |
921 | if (foundLocale == FALSE) { | |
922 | errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); | |
923 | } | |
924 | } | |
925 | ||
926 | ||
927 | // that_word was adopted by factory | |
928 | delete thai_char; | |
929 | delete root_word; | |
930 | delete root_char; | |
931 | #endif | |
932 | } | |
933 | ||
934 | void RBBIAPITest::RoundtripRule(const char *dataFile) { | |
935 | UErrorCode status = U_ZERO_ERROR; | |
936 | UParseError parseError; | |
937 | parseError.line = 0; | |
938 | parseError.offset = 0; | |
939 | UDataMemory *data = udata_open(NULL, "brk", dataFile, &status); | |
940 | uint32_t length; | |
941 | const UChar *builtSource; | |
942 | const uint8_t *rbbiRules; | |
943 | const uint8_t *builtRules; | |
944 | ||
945 | if (U_FAILURE(status)) { | |
946 | errln("Can't open \"%s\"", dataFile); | |
947 | return; | |
948 | } | |
949 | ||
950 | builtRules = (const uint8_t *)udata_getMemory(data); | |
951 | builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); | |
952 | RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); | |
953 | if (U_FAILURE(status)) { | |
954 | errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", | |
955 | u_errorName(status), parseError.line, parseError.offset); | |
956 | return; | |
957 | }; | |
958 | rbbiRules = brkItr->getBinaryRules(length); | |
959 | logln("Comparing \"%s\" len=%d", dataFile, length); | |
960 | if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { | |
961 | errln("Built rules and rebuilt rules are different %s", dataFile); | |
962 | return; | |
963 | } | |
964 | delete brkItr; | |
965 | udata_close(data); | |
966 | } | |
967 | ||
968 | void RBBIAPITest::TestRoundtripRules() { | |
969 | RoundtripRule("word"); | |
970 | RoundtripRule("title"); | |
971 | RoundtripRule("sent"); | |
972 | RoundtripRule("line"); | |
973 | RoundtripRule("char"); | |
974 | if (!quick) { | |
975 | RoundtripRule("word_th"); | |
976 | RoundtripRule("line_th"); | |
977 | } | |
978 | } | |
979 | ||
980 | //--------------------------------------------- | |
981 | // runIndexedTest | |
982 | //--------------------------------------------- | |
983 | ||
984 | void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) | |
985 | { | |
986 | if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); | |
987 | switch (index) { | |
988 | // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; | |
989 | case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; | |
990 | case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; | |
991 | case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; | |
992 | case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; | |
993 | case 4: name = "TestIteration"; if (exec) TestIteration(); break; | |
994 | case 5: name = "extra"; break; // Extra | |
995 | case 6: name = "extra"; break; // Extra | |
996 | case 7: name = "TestBuilder"; if (exec) TestBuilder(); break; | |
997 | case 8: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; | |
998 | case 9: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; | |
999 | case 10: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; | |
1000 | case 11: name = "TestBug2190"; if (exec) TestBug2190(); break; | |
1001 | case 12: name = "TestRegistration"; if (exec) TestRegistration(); break; | |
1002 | case 13: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; | |
1003 | case 14: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; | |
1004 | ||
1005 | default: name = ""; break; // needed to end loop | |
1006 | } | |
1007 | } | |
1008 | ||
1009 | //--------------------------------------------- | |
1010 | //Internal subroutines | |
1011 | //--------------------------------------------- | |
1012 | ||
1013 | void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){ | |
1014 | logln((UnicodeString)"testIsBoundary():"); | |
1015 | int32_t p = 0; | |
1016 | UBool isB; | |
1017 | for (int32_t i = 0; i < text.length(); i++) { | |
1018 | isB = bi.isBoundary(i); | |
1019 | logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); | |
1020 | ||
1021 | if (i == boundaries[p]) { | |
1022 | if (!isB) | |
1023 | errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); | |
1024 | p++; | |
1025 | } | |
1026 | else { | |
1027 | if (isB) | |
1028 | errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); | |
1029 | } | |
1030 | } | |
1031 | } | |
1032 | void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ | |
1033 | UnicodeString selected; | |
1034 | UnicodeString expected=CharsToUnicodeString(expectedString); | |
1035 | ||
1036 | if(gotoffset != expectedOffset) | |
1037 | errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); | |
1038 | if(start <= gotoffset){ | |
1039 | testString.extractBetween(start, gotoffset, selected); | |
1040 | } | |
1041 | else{ | |
1042 | testString.extractBetween(gotoffset, start, selected); | |
1043 | } | |
1044 | if(selected.compare(expected) != 0) | |
1045 | errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); | |
1046 | else | |
1047 | logln(prettify("****selected \"" + selected + "\"")); | |
1048 | } | |
1049 | ||
1050 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |