]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/rbbiapts.cpp
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbiapts.cpp
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1999-2006, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /************************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ************************************************************************/
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_BREAK_ITERATION
15
16 #include "unicode/uchar.h"
17 #include "intltest.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
20 #include "rbbiapts.h"
21 #include "rbbidata.h"
22 #include "cstring.h"
23 #include "ubrkimpl.h"
24 #include "unicode/ustring.h"
25 #include "unicode/utext.h"
26
27 /**
28 * API Test the RuleBasedBreakIterator class
29 */
30
31
32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
33 errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
34
35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
36 errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
37
38 void RBBIAPITest::TestCloneEquals()
39 {
40
41 UErrorCode status=U_ZERO_ERROR;
42 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
43 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
44 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
45 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
46 if(U_FAILURE(status)){
47 errln((UnicodeString)"FAIL : in construction");
48 return;
49 }
50
51
52 UnicodeString testString="Testing word break iterators's clone() and equals()";
53 bi1->setText(testString);
54 bi2->setText(testString);
55 biequal->setText(testString);
56
57 bi3->setText("hello");
58
59 logln((UnicodeString)"Testing equals()");
60
61 logln((UnicodeString)"Testing == and !=");
62 UBool b = (*bi1 != *biequal);
63 b |= *bi1 == *bi2;
64 b |= *bi1 == *bi3;
65 if (b) {
66 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
67 }
68
69 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
70 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
71
72
73 // Quick test of RulesBasedBreakIterator assignment -
74 // Check that
75 // two different iterators are !=
76 // they are == after assignment
77 // source and dest iterator produce the same next() after assignment.
78 // deleting one doesn't disable the other.
79 logln("Testing assignment");
80 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
81 if(U_FAILURE(status)){
82 errln((UnicodeString)"FAIL : in construction");
83 return;
84 }
85
86 RuleBasedBreakIterator biDefault, biDefault2;
87 if(U_FAILURE(status)){
88 errln((UnicodeString)"FAIL : in construction of default iterator");
89 return;
90 }
91 if (biDefault == *bix) {
92 errln((UnicodeString)"ERROR: iterators should not compare ==");
93 return;
94 }
95 if (biDefault != biDefault2) {
96 errln((UnicodeString)"ERROR: iterators should compare ==");
97 return;
98 }
99
100
101 UnicodeString HelloString("Hello Kitty");
102 bix->setText(HelloString);
103 if (*bix == *bi2) {
104 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
105 }
106 *bix = *bi2;
107 if (*bix != *bi2) {
108 errln(UnicodeString("ERROR: strings should be equal before assignment."));
109 }
110
111 int bixnext = bix->next();
112 int bi2next = bi2->next();
113 if (! (bixnext == bi2next && bixnext == 7)) {
114 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
115 }
116 delete bix;
117 if (bi2->next() != 8) {
118 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
119 }
120
121
122
123 logln((UnicodeString)"Testing clone()");
124 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
125 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
126
127 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
128 *bi1clone == *bi3 || *bi1clone == *bi2)
129 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
130
131 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
132 *bi2clone == *bi3 || *bi2clone != *bi2)
133 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
134
135 if(bi1->getText() != bi1clone->getText() ||
136 bi2clone->getText() != bi2->getText() ||
137 *bi2clone == *bi1clone )
138 errln((UnicodeString)"ERROR: RBBI's clone() method failed");
139
140 delete bi1clone;
141 delete bi2clone;
142 delete bi1;
143 delete bi3;
144 delete bi2;
145 delete biequal;
146 }
147
148 void RBBIAPITest::TestBoilerPlate()
149 {
150 UErrorCode status = U_ZERO_ERROR;
151 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
152 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
153 if (U_FAILURE(status)) {
154 errln("Creation of break iterator failed %s", u_errorName(status));
155 return;
156 }
157 if(*a!=*b){
158 errln("Failed: boilerplate method operator!= does not return correct results");
159 }
160 BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status);
161 if(a && c){
162 if(*c==*a){
163 errln("Failed: boilerplate method opertator== does not return correct results");
164 }
165 }else{
166 errln("creation of break iterator failed");
167 }
168 delete a;
169 delete b;
170 delete c;
171 }
172
173 void RBBIAPITest::TestgetRules()
174 {
175 UErrorCode status=U_ZERO_ERROR;
176
177 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
178 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
179 if(U_FAILURE(status)){
180 errln((UnicodeString)"FAIL: in construction");
181 delete bi1;
182 delete bi2;
183 return;
184 }
185
186
187
188 logln((UnicodeString)"Testing toString()");
189
190 bi1->setText((UnicodeString)"Hello there");
191
192 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
193
194 UnicodeString temp=bi1->getRules();
195 UnicodeString temp2=bi2->getRules();
196 UnicodeString temp3=bi3->getRules();
197 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
198 errln((UnicodeString)"ERROR: error in getRules() method");
199
200 delete bi1;
201 delete bi2;
202 delete bi3;
203 }
204 void RBBIAPITest::TestHashCode()
205 {
206 UErrorCode status=U_ZERO_ERROR;
207 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
208 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
209 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
210 if(U_FAILURE(status)){
211 errln((UnicodeString)"FAIL : in construction");
212 delete bi1;
213 delete bi2;
214 delete bi3;
215 return;
216 }
217
218
219 logln((UnicodeString)"Testing hashCode()");
220
221 bi1->setText((UnicodeString)"Hash code");
222 bi2->setText((UnicodeString)"Hash code");
223 bi3->setText((UnicodeString)"Hash code");
224
225 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
226 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
227
228 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
229 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
230 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
231
232 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
233 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
234 errln((UnicodeString)"ERROR: different objects have same hashcodes");
235
236 delete bi1clone;
237 delete bi2clone;
238 delete bi1;
239 delete bi2;
240 delete bi3;
241
242 }
243 void RBBIAPITest::TestGetSetAdoptText()
244 {
245 logln((UnicodeString)"Testing getText setText ");
246 UErrorCode status=U_ZERO_ERROR;
247 UnicodeString str1="first string.";
248 UnicodeString str2="Second string.";
249 RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
250 RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
251 if(U_FAILURE(status)){
252 errln((UnicodeString)"FAIL : in construction");
253 return;
254 }
255
256
257 CharacterIterator* text1= new StringCharacterIterator(str1);
258 CharacterIterator* text1Clone = text1->clone();
259 CharacterIterator* text2= new StringCharacterIterator(str2);
260 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
261
262 wordIter1->setText(str1);
263 CharacterIterator *tci = &wordIter1->getText();
264 UnicodeString tstr;
265 tci->getText(tstr);
266 TEST_ASSERT(tstr == str1);
267 if(wordIter1->current() != 0)
268 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
269
270 wordIter1->next(2);
271
272 wordIter1->setText(str2);
273 if(wordIter1->current() != 0)
274 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
275
276
277 charIter1->adoptText(text1Clone);
278 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
279 tci = &wordIter1->getText();
280 tci->getText(tstr);
281 TEST_ASSERT(tstr == str2);
282 tci = &charIter1->getText();
283 tci->getText(tstr);
284 TEST_ASSERT(tstr == str1);
285
286
287 RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone();
288 rb->adoptText(text1);
289 if(rb->getText() != *text1)
290 errln((UnicodeString)"ERROR:1 error in adoptText ");
291 rb->adoptText(text2);
292 if(rb->getText() != *text2)
293 errln((UnicodeString)"ERROR:2 error in adoptText ");
294
295 // Adopt where iterator range is less than the entire orignal source string.
296 // (With the change of the break engine to working with UText internally,
297 // CharacterIterators starting at positions other than zero are not supported)
298 rb->adoptText(text3);
299 TEST_ASSERT(rb->preceding(2) == 0);
300 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
301 //if(rb->preceding(2) != 3) {
302 // errln((UnicodeString)"ERROR:3 error in adoptText ");
303 //}
304 //if(rb->following(11) != BreakIterator::DONE) {
305 // errln((UnicodeString)"ERROR:4 error in adoptText ");
306 //}
307
308 // UText API
309 //
310 // Quick test to see if UText is working at all.
311 //
312 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
313 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
314 // 012345678901
315
316 status = U_ZERO_ERROR;
317 UText *ut = utext_openUTF8(NULL, s1, -1, &status);
318 wordIter1->setText(ut, status);
319 TEST_ASSERT_SUCCESS(status);
320
321 int32_t pos;
322 pos = wordIter1->first();
323 TEST_ASSERT(pos==0);
324 pos = wordIter1->next();
325 TEST_ASSERT(pos==5);
326 pos = wordIter1->next();
327 TEST_ASSERT(pos==6);
328 pos = wordIter1->next();
329 TEST_ASSERT(pos==11);
330 pos = wordIter1->next();
331 TEST_ASSERT(pos==UBRK_DONE);
332
333 status = U_ZERO_ERROR;
334 UText *ut2 = utext_openUTF8(NULL, s2, -1, &status);
335 TEST_ASSERT_SUCCESS(status);
336 wordIter1->setText(ut2, status);
337 TEST_ASSERT_SUCCESS(status);
338
339 pos = wordIter1->first();
340 TEST_ASSERT(pos==0);
341 pos = wordIter1->next();
342 TEST_ASSERT(pos==3);
343 pos = wordIter1->next();
344 TEST_ASSERT(pos==4);
345
346 pos = wordIter1->last();
347 TEST_ASSERT(pos==6);
348 pos = wordIter1->previous();
349 TEST_ASSERT(pos==4);
350 pos = wordIter1->previous();
351 TEST_ASSERT(pos==3);
352 pos = wordIter1->previous();
353 TEST_ASSERT(pos==0);
354 pos = wordIter1->previous();
355 TEST_ASSERT(pos==UBRK_DONE);
356
357 status = U_ZERO_ERROR;
358 UnicodeString sEmpty;
359 UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status);
360 wordIter1->getUText(gut2, status);
361 TEST_ASSERT_SUCCESS(status);
362 utext_close(gut2);
363
364 utext_close(ut);
365 utext_close(ut2);
366
367 delete wordIter1;
368 delete charIter1;
369 delete rb;
370
371 }
372
373
374 void RBBIAPITest::TestIteration()
375 {
376 // This test just verifies that the API is present.
377 // Testing for correct operation of the break rules happens elsewhere.
378
379 UErrorCode status=U_ZERO_ERROR;
380 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
381 if (U_FAILURE(status) || bi == NULL) {
382 errln("Failure creating character break iterator. Status = %s", u_errorName(status));
383 }
384 delete bi;
385
386 status=U_ZERO_ERROR;
387 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
388 if (U_FAILURE(status) || bi == NULL) {
389 errln("Failure creating Word break iterator. Status = %s", u_errorName(status));
390 }
391 delete bi;
392
393 status=U_ZERO_ERROR;
394 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
395 if (U_FAILURE(status) || bi == NULL) {
396 errln("Failure creating Line break iterator. Status = %s", u_errorName(status));
397 }
398 delete bi;
399
400 status=U_ZERO_ERROR;
401 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
402 if (U_FAILURE(status) || bi == NULL) {
403 errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status));
404 }
405 delete bi;
406
407 status=U_ZERO_ERROR;
408 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
409 if (U_FAILURE(status) || bi == NULL) {
410 errln("Failure creating Title break iterator. Status = %s", u_errorName(status));
411 }
412 delete bi;
413
414 status=U_ZERO_ERROR;
415 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
416 if (U_FAILURE(status) || bi == NULL) {
417 errln("Failure creating character break iterator. Status = %s", u_errorName(status));
418 return; // Skip the rest of these tests.
419 }
420
421
422 UnicodeString testString="0123456789";
423 bi->setText(testString);
424
425 int32_t i;
426 i = bi->first();
427 if (i != 0) {
428 errln("Incorrect value from bi->first(). Expected 0, got %d.", i);
429 }
430
431 i = bi->last();
432 if (i != 10) {
433 errln("Incorrect value from bi->last(). Expected 10, got %d", i);
434 }
435
436 //
437 // Previous
438 //
439 bi->last();
440 i = bi->previous();
441 if (i != 9) {
442 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i);
443 }
444
445
446 bi->first();
447 i = bi->previous();
448 if (i != BreakIterator::DONE) {
449 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i);
450 }
451
452 //
453 // next()
454 //
455 bi->first();
456 i = bi->next();
457 if (i != 1) {
458 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i);
459 }
460
461 bi->last();
462 i = bi->next();
463 if (i != BreakIterator::DONE) {
464 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i);
465 }
466
467
468 //
469 // current()
470 //
471 bi->first();
472 i = bi->current();
473 if (i != 0) {
474 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
475 }
476
477 bi->next();
478 i = bi->current();
479 if (i != 1) {
480 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i);
481 }
482
483 bi->last();
484 bi->next();
485 i = bi->current();
486 if (i != 10) {
487 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i);
488 }
489
490 bi->first();
491 bi->previous();
492 i = bi->current();
493 if (i != 0) {
494 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
495 }
496
497
498 //
499 // Following()
500 //
501 i = bi->following(4);
502 if (i != 5) {
503 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i);
504 }
505
506 i = bi->following(9);
507 if (i != 10) {
508 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i);
509 }
510
511 i = bi->following(10);
512 if (i != BreakIterator::DONE) {
513 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i);
514 }
515
516
517 //
518 // Preceding
519 //
520 i = bi->preceding(4);
521 if (i != 3) {
522 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i);
523 }
524
525 i = bi->preceding(10);
526 if (i != 9) {
527 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i);
528 }
529
530 i = bi->preceding(1);
531 if (i != 0) {
532 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i);
533 }
534
535 i = bi->preceding(0);
536 if (i != BreakIterator::DONE) {
537 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i);
538 }
539
540
541 //
542 // isBoundary()
543 //
544 bi->first();
545 if (bi->isBoundary(3) != TRUE) {
546 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i);
547 }
548 i = bi->current();
549 if (i != 3) {
550 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i);
551 }
552
553
554 if (bi->isBoundary(11) != FALSE) {
555 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i);
556 }
557 i = bi->current();
558 if (i != 10) {
559 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i);
560 }
561
562 //
563 // next(n)
564 //
565 bi->first();
566 i = bi->next(4);
567 if (i != 4) {
568 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i);
569 }
570
571 i = bi->next(6);
572 if (i != 10) {
573 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i);
574 }
575
576 bi->first();
577 i = bi->next(11);
578 if (i != BreakIterator::DONE) {
579 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i);
580 }
581
582 delete bi;
583
584 }
585
586
587
588
589
590
591 void RBBIAPITest::TestBuilder() {
592 UnicodeString rulesString1 = "$Letters = [:L:];\n"
593 "$Numbers = [:N:];\n"
594 "$Letters+;\n"
595 "$Numbers+;\n"
596 "[^$Letters $Numbers];\n"
597 "!.*;\n";
598 UnicodeString testString1 = "abc123..abc";
599 // 01234567890
600 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
601 UErrorCode status=U_ZERO_ERROR;
602 UParseError parseError;
603
604 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
605 if(U_FAILURE(status)) {
606 errln("FAIL : in construction");
607 } else {
608 bi->setText(testString1);
609 doBoundaryTest(*bi, testString1, bounds1);
610 }
611 delete bi;
612 }
613
614
615 //
616 // TestQuoteGrouping
617 // Single quotes within rules imply a grouping, so that a modifier
618 // following the quoted text (* or +) applies to all of the quoted chars.
619 //
620 void RBBIAPITest::TestQuoteGrouping() {
621 UnicodeString rulesString1 = "#Here comes the rule...\n"
622 "'$@!'*;\n" // (\$\@\!)*
623 ".;\n";
624
625 UnicodeString testString1 = "$@!$@!X$@!!X";
626 // 0123456789012
627 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
628 UErrorCode status=U_ZERO_ERROR;
629 UParseError parseError;
630
631 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
632 if(U_FAILURE(status)) {
633 errln("FAIL : in construction");
634 } else {
635 bi->setText(testString1);
636 doBoundaryTest(*bi, testString1, bounds1);
637 }
638 delete bi;
639 }
640
641 //
642 // TestRuleStatus
643 // Test word break rule status constants.
644 //
645 void RBBIAPITest::TestRuleStatus() {
646 UChar str[30];
647 u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
648 // 012345678901234567 8 9 0 1 2 3 4 5 6
649 // Ideographic Katakana Hiragana
650 str, 30);
651 UnicodeString testString1(str);
652 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
653 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
654 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
655 UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE,
656 UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA};
657
658 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
659 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
660 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT,
661 UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
662
663 UErrorCode status=U_ZERO_ERROR;
664
665 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
666 if(U_FAILURE(status)) {
667 errln("FAIL : in construction");
668 } else {
669 bi->setText(testString1);
670 // First test that the breaks are in the right spots.
671 doBoundaryTest(*bi, testString1, bounds1);
672
673 // Then go back and check tag values
674 int32_t i = 0;
675 int32_t pos, tag;
676 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
677 if (pos != bounds1[i]) {
678 errln("FAIL: unexpected word break at postion %d", pos);
679 break;
680 }
681 tag = bi->getRuleStatus();
682 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
683 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
684 break;
685 }
686
687 // Check that we get the same tag values from getRuleStatusVec()
688 int32_t vec[10];
689 int t = bi->getRuleStatusVec(vec, 10, status);
690 TEST_ASSERT_SUCCESS(status);
691 TEST_ASSERT(t==1);
692 TEST_ASSERT(vec[0] == tag);
693 }
694 }
695 delete bi;
696
697 // Now test line break status. This test mostly is to confirm that the status constants
698 // are correctly declared in the header.
699 testString1 = "test line. \n";
700 // break type s s h
701
702 bi = (RuleBasedBreakIterator *)
703 BreakIterator::createLineInstance(Locale::getEnglish(), status);
704 if(U_FAILURE(status)) {
705 errln("failed to create word break iterator.");
706 } else {
707 int32_t i = 0;
708 int32_t pos, tag;
709 UBool success;
710
711 bi->setText(testString1);
712 pos = bi->current();
713 tag = bi->getRuleStatus();
714 for (i=0; i<3; i++) {
715 switch (i) {
716 case 0:
717 success = pos==0 && tag==UBRK_LINE_SOFT; break;
718 case 1:
719 success = pos==5 && tag==UBRK_LINE_SOFT; break;
720 case 2:
721 success = pos==12 && tag==UBRK_LINE_HARD; break;
722 default:
723 success = FALSE; break;
724 }
725 if (success == FALSE) {
726 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
727 i, pos, tag);
728 break;
729 }
730 pos = bi->next();
731 tag = bi->getRuleStatus();
732 }
733 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
734 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
735 UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) {
736 errln("UBRK_LINE_* constants from header are inconsistent.");
737 }
738 }
739 delete bi;
740
741 }
742
743
744 //
745 // TestRuleStatusVec
746 // Test the vector form of break rule status.
747 //
748 void RBBIAPITest::TestRuleStatusVec() {
749 UnicodeString rulesString = "[A-N]{100}; \n"
750 "[a-w]{200}; \n"
751 "[\\p{L}]{300}; \n"
752 "[\\p{N}]{400}; \n"
753 "[0-5]{500}; \n"
754 "!.*;\n";
755 UnicodeString testString1 = "Aapz5?";
756 int32_t statusVals[10];
757 int32_t numStatuses;
758 int32_t pos;
759
760 UErrorCode status=U_ZERO_ERROR;
761 UParseError parseError;
762
763 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
764 TEST_ASSERT_SUCCESS(status);
765 if (U_SUCCESS(status)) {
766 bi->setText(testString1);
767
768 // A
769 pos = bi->next();
770 TEST_ASSERT(pos==1);
771 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
772 TEST_ASSERT_SUCCESS(status);
773 TEST_ASSERT(numStatuses == 2);
774 TEST_ASSERT(statusVals[0] == 100);
775 TEST_ASSERT(statusVals[1] == 300);
776
777 // a
778 pos = bi->next();
779 TEST_ASSERT(pos==2);
780 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
781 TEST_ASSERT_SUCCESS(status);
782 TEST_ASSERT(numStatuses == 2);
783 TEST_ASSERT(statusVals[0] == 200);
784 TEST_ASSERT(statusVals[1] == 300);
785
786 // p
787 pos = bi->next();
788 TEST_ASSERT(pos==3);
789 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
790 TEST_ASSERT_SUCCESS(status);
791 TEST_ASSERT(numStatuses == 2);
792 TEST_ASSERT(statusVals[0] == 200);
793 TEST_ASSERT(statusVals[1] == 300);
794
795 // z
796 pos = bi->next();
797 TEST_ASSERT(pos==4);
798 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
799 TEST_ASSERT_SUCCESS(status);
800 TEST_ASSERT(numStatuses == 1);
801 TEST_ASSERT(statusVals[0] == 300);
802
803 // 5
804 pos = bi->next();
805 TEST_ASSERT(pos==5);
806 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
807 TEST_ASSERT_SUCCESS(status);
808 TEST_ASSERT(numStatuses == 2);
809 TEST_ASSERT(statusVals[0] == 400);
810 TEST_ASSERT(statusVals[1] == 500);
811
812 // ?
813 pos = bi->next();
814 TEST_ASSERT(pos==6);
815 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
816 TEST_ASSERT_SUCCESS(status);
817 TEST_ASSERT(numStatuses == 1);
818 TEST_ASSERT(statusVals[0] == 0);
819
820 //
821 // Check buffer overflow error handling. Char == A
822 //
823 bi->first();
824 pos = bi->next();
825 TEST_ASSERT(pos==1);
826 memset(statusVals, -1, sizeof(statusVals));
827 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
828 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
829 TEST_ASSERT(numStatuses == 2);
830 TEST_ASSERT(statusVals[0] == -1);
831
832 status = U_ZERO_ERROR;
833 memset(statusVals, -1, sizeof(statusVals));
834 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
835 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
836 TEST_ASSERT(numStatuses == 2);
837 TEST_ASSERT(statusVals[0] == 100);
838 TEST_ASSERT(statusVals[1] == -1);
839
840 status = U_ZERO_ERROR;
841 memset(statusVals, -1, sizeof(statusVals));
842 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
843 TEST_ASSERT_SUCCESS(status);
844 TEST_ASSERT(numStatuses == 2);
845 TEST_ASSERT(statusVals[0] == 100);
846 TEST_ASSERT(statusVals[1] == 300);
847 TEST_ASSERT(statusVals[2] == -1);
848 }
849 delete bi;
850
851 }
852
853 //
854 // Bug 2190 Regression test. Builder crash on rule consisting of only a
855 // $variable reference
856 void RBBIAPITest::TestBug2190() {
857 UnicodeString rulesString1 = "$aaa = abcd;\n"
858 "$bbb = $aaa;\n"
859 "$bbb;\n";
860 UnicodeString testString1 = "abcdabcd";
861 // 01234567890
862 int32_t bounds1[] = {0, 4, 8};
863 UErrorCode status=U_ZERO_ERROR;
864 UParseError parseError;
865
866 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
867 if(U_FAILURE(status)) {
868 errln("FAIL : in construction");
869 } else {
870 bi->setText(testString1);
871 doBoundaryTest(*bi, testString1, bounds1);
872 }
873 delete bi;
874 }
875
876
877 void RBBIAPITest::TestRegistration() {
878 #if !UCONFIG_NO_SERVICE
879 UErrorCode status = U_ZERO_ERROR;
880 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
881
882 // ok to not delete these if we exit because of error?
883 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
884 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
885 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
886
887 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
888 {
889 if (ja_word && *ja_word == *root_word) {
890 errln("japan not different from root");
891 }
892 }
893
894 {
895 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
896 UBool fail = TRUE;
897 if(result){
898 fail = *result != *ja_word;
899 }
900 delete result;
901 if (fail) {
902 errln("bad result for xx_XX/word");
903 }
904 }
905
906 {
907 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
908 UBool fail = TRUE;
909 if(result){
910 fail = *result != *ja_char;
911 }
912 delete result;
913 if (fail) {
914 errln("bad result for ja_JP/char");
915 }
916 }
917
918 {
919 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
920 UBool fail = TRUE;
921 if(result){
922 fail = *result != *root_char;
923 }
924 delete result;
925 if (fail) {
926 errln("bad result for xx_XX/char");
927 }
928 }
929
930 {
931 StringEnumeration* avail = BreakIterator::getAvailableLocales();
932 UBool found = FALSE;
933 const UnicodeString* p;
934 while ((p = avail->snext(status))) {
935 if (p->compare("xx") == 0) {
936 found = TRUE;
937 break;
938 }
939 }
940 delete avail;
941 if (!found) {
942 errln("did not find test locale");
943 }
944 }
945
946 {
947 UBool unreg = BreakIterator::unregister(key, status);
948 if (!unreg) {
949 errln("unable to unregister");
950 }
951 }
952
953 {
954 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
955 BreakIterator* root = BreakIterator::createWordInstance("", status);
956 UBool fail = TRUE;
957 if(root){
958 fail = *root != *result;
959 }
960 delete root;
961 delete result;
962 if (fail) {
963 errln("did not get root break");
964 }
965 }
966
967 {
968 StringEnumeration* avail = BreakIterator::getAvailableLocales();
969 UBool found = FALSE;
970 const UnicodeString* p;
971 while ((p = avail->snext(status))) {
972 if (p->compare("xx") == 0) {
973 found = TRUE;
974 break;
975 }
976 }
977 delete avail;
978 if (found) {
979 errln("found test locale");
980 }
981 }
982
983 {
984 int32_t count;
985 UBool foundLocale = FALSE;
986 const Locale *avail = BreakIterator::getAvailableLocales(count);
987 for (int i=0; i<count; i++) {
988 if (avail[i] == Locale::getEnglish()) {
989 foundLocale = TRUE;
990 break;
991 }
992 }
993 if (foundLocale == FALSE) {
994 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
995 }
996 }
997
998
999 // ja_word was adopted by factory
1000 delete ja_char;
1001 delete root_word;
1002 delete root_char;
1003 #endif
1004 }
1005
1006 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1007 UErrorCode status = U_ZERO_ERROR;
1008 UParseError parseError;
1009 parseError.line = 0;
1010 parseError.offset = 0;
1011 UDataMemory *data = udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status);
1012 uint32_t length;
1013 const UChar *builtSource;
1014 const uint8_t *rbbiRules;
1015 const uint8_t *builtRules;
1016
1017 if (U_FAILURE(status)) {
1018 errln("Can't open \"%s\"", dataFile);
1019 return;
1020 }
1021
1022 builtRules = (const uint8_t *)udata_getMemory(data);
1023 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1024 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1025 if (U_FAILURE(status)) {
1026 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1027 u_errorName(status), parseError.line, parseError.offset);
1028 return;
1029 };
1030 rbbiRules = brkItr->getBinaryRules(length);
1031 logln("Comparing \"%s\" len=%d", dataFile, length);
1032 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1033 errln("Built rules and rebuilt rules are different %s", dataFile);
1034 return;
1035 }
1036 delete brkItr;
1037 udata_close(data);
1038 }
1039
1040 void RBBIAPITest::TestRoundtripRules() {
1041 RoundtripRule("word");
1042 RoundtripRule("title");
1043 RoundtripRule("sent");
1044 RoundtripRule("line");
1045 RoundtripRule("char");
1046 if (!quick) {
1047 RoundtripRule("word_ja");
1048 RoundtripRule("word_POSIX");
1049 }
1050 }
1051
1052 //---------------------------------------------
1053 // runIndexedTest
1054 //---------------------------------------------
1055
1056 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1057 {
1058 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1059 switch (index) {
1060 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1061 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1062 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1063 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1064 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1065 case 4: name = "TestIteration"; if (exec) TestIteration(); break;
1066 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1067 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1068 case 7: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1069 case 8: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1070 case 9: name = "TestBug2190"; if (exec) TestBug2190(); break;
1071 case 10: name = "TestRegistration"; if (exec) TestRegistration(); break;
1072 case 11: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1073 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1074
1075 default: name = ""; break; // needed to end loop
1076 }
1077 }
1078
1079 //---------------------------------------------
1080 //Internal subroutines
1081 //---------------------------------------------
1082
1083 void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1084 logln((UnicodeString)"testIsBoundary():");
1085 int32_t p = 0;
1086 UBool isB;
1087 for (int32_t i = 0; i < text.length(); i++) {
1088 isB = bi.isBoundary(i);
1089 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1090
1091 if (i == boundaries[p]) {
1092 if (!isB)
1093 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1094 p++;
1095 }
1096 else {
1097 if (isB)
1098 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1099 }
1100 }
1101 }
1102 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1103 UnicodeString selected;
1104 UnicodeString expected=CharsToUnicodeString(expectedString);
1105
1106 if(gotoffset != expectedOffset)
1107 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1108 if(start <= gotoffset){
1109 testString.extractBetween(start, gotoffset, selected);
1110 }
1111 else{
1112 testString.extractBetween(gotoffset, start, selected);
1113 }
1114 if(selected.compare(expected) != 0)
1115 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1116 else
1117 logln(prettify("****selected \"" + selected + "\""));
1118 }
1119
1120 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */