1 /********************************************************************
3 * Copyright (c) 1999-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /************************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ************************************************************************/
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_BREAK_ITERATION
16 #include "unicode/uchar.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
23 #include "unicode/ustring.h"
26 * API Test the RuleBasedBreakIterator class
30 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
31 errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
33 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
34 errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
36 void RBBIAPITest::TestCloneEquals()
39 UErrorCode status
=U_ZERO_ERROR
;
40 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
41 RuleBasedBreakIterator
* biequal
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
42 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
43 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
44 if(U_FAILURE(status
)){
45 errln((UnicodeString
)"FAIL : in construction");
50 UnicodeString testString
="Testing word break iterators's clone() and equals()";
51 bi1
->setText(testString
);
52 bi2
->setText(testString
);
53 biequal
->setText(testString
);
55 bi3
->setText("hello");
57 logln((UnicodeString
)"Testing equals()");
59 logln((UnicodeString
)"Testing == and !=");
60 UBool b
= (*bi1
!= *biequal
);
64 errln((UnicodeString
)"ERROR:1 RBBI's == and != operator failed.");
67 if(*bi2
== *biequal
|| *bi2
== *bi1
|| *biequal
== *bi3
)
68 errln((UnicodeString
)"ERROR:2 RBBI's == and != operator failed.");
71 // Quick test of RulesBasedBreakIterator assignment -
73 // two different iterators are !=
74 // they are == after assignment
75 // source and dest iterator produce the same next() after assignment.
76 // deleting one doesn't disable the other.
77 logln("Testing assignment");
78 RuleBasedBreakIterator
*bix
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getEnglish(), status
);
79 if(U_FAILURE(status
)){
80 errln((UnicodeString
)"FAIL : in construction");
84 RuleBasedBreakIterator biDefault
, biDefault2
;
85 if(U_FAILURE(status
)){
86 errln((UnicodeString
)"FAIL : in construction of default iterator");
89 if (biDefault
== *bix
) {
90 errln((UnicodeString
)"ERROR: iterators should not compare ==");
93 if (biDefault
!= biDefault2
) {
94 errln((UnicodeString
)"ERROR: iterators should compare ==");
99 UnicodeString
HelloString("Hello Kitty");
100 bix
->setText(HelloString
);
102 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
106 errln(UnicodeString("ERROR: strings should be equal before assignment."));
109 int bixnext
= bix
->next();
110 int bi2next
= bi2
->next();
111 if (! (bixnext
== bi2next
&& bixnext
== 7)) {
112 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
115 if (bi2
->next() != 8) {
116 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
121 logln((UnicodeString
)"Testing clone()");
122 RuleBasedBreakIterator
* bi1clone
=(RuleBasedBreakIterator
*)bi1
->clone();
123 RuleBasedBreakIterator
* bi2clone
=(RuleBasedBreakIterator
*)bi2
->clone();
125 if(*bi1clone
!= *bi1
|| *bi1clone
!= *biequal
||
126 *bi1clone
== *bi3
|| *bi1clone
== *bi2
)
127 errln((UnicodeString
)"ERROR:1 RBBI's clone() method failed");
129 if(*bi2clone
== *bi1
|| *bi2clone
== *biequal
||
130 *bi2clone
== *bi3
|| *bi2clone
!= *bi2
)
131 errln((UnicodeString
)"ERROR:2 RBBI's clone() method failed");
133 if(bi1
->getText() != bi1clone
->getText() ||
134 bi2clone
->getText() != bi2
->getText() ||
135 *bi2clone
== *bi1clone
)
136 errln((UnicodeString
)"ERROR: RBBI's clone() method failed");
146 void RBBIAPITest::TestBoilerPlate()
148 UErrorCode status
= U_ZERO_ERROR
;
149 BreakIterator
* a
= BreakIterator::createLineInstance(Locale("hi"), status
);
150 BreakIterator
* b
= BreakIterator::createLineInstance(Locale("hi_IN"),status
);
151 if (U_FAILURE(status
)) {
152 errln("Creation of break iterator failed %s", u_errorName(status
));
156 errln("Failed: boilerplate method operator!= does not return correct results");
158 BreakIterator
* c
= BreakIterator::createLineInstance(Locale("th"),status
);
161 errln("Failed: boilerplate method opertator== does not return correct results");
164 errln("creation of break iterator failed");
171 void RBBIAPITest::TestgetRules()
173 UErrorCode status
=U_ZERO_ERROR
;
175 RuleBasedBreakIterator
* bi1
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
176 RuleBasedBreakIterator
* bi2
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
177 if(U_FAILURE(status
)){
178 errln((UnicodeString
)"FAIL: in construction");
186 logln((UnicodeString
)"Testing toString()");
188 bi1
->setText((UnicodeString
)"Hello there");
190 RuleBasedBreakIterator
* bi3
=(RuleBasedBreakIterator
*)bi1
->clone();
192 UnicodeString temp
=bi1
->getRules();
193 UnicodeString temp2
=bi2
->getRules();
194 UnicodeString temp3
=bi3
->getRules();
195 if( temp2
.compare(temp3
) ==0 || temp
.compare(temp2
) == 0 || temp
.compare(temp3
) != 0)
196 errln((UnicodeString
)"ERROR: error in getRules() method");
202 void RBBIAPITest::TestHashCode()
204 UErrorCode status
=U_ZERO_ERROR
;
205 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
206 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
207 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
208 if(U_FAILURE(status
)){
209 errln((UnicodeString
)"FAIL : in construction");
217 logln((UnicodeString
)"Testing hashCode()");
219 bi1
->setText((UnicodeString
)"Hash code");
220 bi2
->setText((UnicodeString
)"Hash code");
221 bi3
->setText((UnicodeString
)"Hash code");
223 RuleBasedBreakIterator
* bi1clone
= (RuleBasedBreakIterator
*)bi1
->clone();
224 RuleBasedBreakIterator
* bi2clone
= (RuleBasedBreakIterator
*)bi2
->clone();
226 if(bi1
->hashCode() != bi1clone
->hashCode() || bi1
->hashCode() != bi3
->hashCode() ||
227 bi1clone
->hashCode() != bi3
->hashCode() || bi2
->hashCode() != bi2clone
->hashCode())
228 errln((UnicodeString
)"ERROR: identical objects have different hashcodes");
230 if(bi1
->hashCode() == bi2
->hashCode() || bi2
->hashCode() == bi3
->hashCode() ||
231 bi1clone
->hashCode() == bi2clone
->hashCode() || bi1clone
->hashCode() == bi2
->hashCode())
232 errln((UnicodeString
)"ERROR: different objects have same hashcodes");
241 void RBBIAPITest::TestGetSetAdoptText()
243 logln((UnicodeString
)"Testing getText setText ");
244 UErrorCode status
=U_ZERO_ERROR
;
245 UnicodeString str1
="first string.";
246 UnicodeString str2
="Second string.";
247 RuleBasedBreakIterator
* charIter1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
248 RuleBasedBreakIterator
* wordIter1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
249 if(U_FAILURE(status
)){
250 errln((UnicodeString
)"FAIL : in construction");
255 CharacterIterator
* text1
= new StringCharacterIterator(str1
);
256 CharacterIterator
* text1Clone
= text1
->clone();
257 CharacterIterator
* text2
= new StringCharacterIterator(str2
);
258 CharacterIterator
* text3
= new StringCharacterIterator(str2
, 3, 10, 3); // "ond str"
260 wordIter1
->setText(str1
);
261 if(wordIter1
->getText() != *text1
)
262 errln((UnicodeString
)"ERROR:1 error in setText or getText ");
263 if(wordIter1
->current() != 0)
264 errln((UnicodeString
)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
268 wordIter1
->setText(str2
);
269 if(wordIter1
->current() != 0)
270 errln((UnicodeString
)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
273 charIter1
->adoptText(text1Clone
);
274 if( wordIter1
->getText() == charIter1
->getText() ||
275 wordIter1
->getText() != *text2
|| charIter1
->getText() != *text1
)
276 errln((UnicodeString
)"ERROR:2 error is getText or setText()");
278 RuleBasedBreakIterator
* rb
=(RuleBasedBreakIterator
*)wordIter1
->clone();
279 rb
->adoptText(text1
);
280 if(rb
->getText() != *text1
)
281 errln((UnicodeString
)"ERROR:1 error in adoptText ");
282 rb
->adoptText(text2
);
283 if(rb
->getText() != *text2
)
284 errln((UnicodeString
)"ERROR:2 error in adoptText ");
286 // Adopt where iterator range is less than the entire orignal source string.
287 rb
->adoptText(text3
);
288 if(rb
->preceding(2) != 3) {
289 errln((UnicodeString
)"ERROR:3 error in adoptText ");
291 if(rb
->following(11) != BreakIterator::DONE
) {
292 errln((UnicodeString
)"ERROR:4 error in adoptText ");
302 void RBBIAPITest::TestIteration()
304 // This test just verifies that the API is present.
305 // Testing for correct operation of the break rules happens elsewhere.
307 UErrorCode status
=U_ZERO_ERROR
;
308 RuleBasedBreakIterator
* bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
309 if (U_FAILURE(status
) || bi
== NULL
) {
310 errln("Failure creating character break iterator. Status = %s", u_errorName(status
));
315 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
316 if (U_FAILURE(status
) || bi
== NULL
) {
317 errln("Failure creating Word break iterator. Status = %s", u_errorName(status
));
322 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status
);
323 if (U_FAILURE(status
) || bi
== NULL
) {
324 errln("Failure creating Line break iterator. Status = %s", u_errorName(status
));
329 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status
);
330 if (U_FAILURE(status
) || bi
== NULL
) {
331 errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status
));
336 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status
);
337 if (U_FAILURE(status
) || bi
== NULL
) {
338 errln("Failure creating Title break iterator. Status = %s", u_errorName(status
));
343 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
344 if (U_FAILURE(status
) || bi
== NULL
) {
345 errln("Failure creating character break iterator. Status = %s", u_errorName(status
));
346 return; // Skip the rest of these tests.
350 UnicodeString testString
="0123456789";
351 bi
->setText(testString
);
356 errln("Incorrect value from bi->first(). Expected 0, got %d.", i
);
361 errln("Incorrect value from bi->last(). Expected 10, got %d", i
);
370 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__
, i
);
376 if (i
!= BreakIterator::DONE
) {
377 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__
, i
);
386 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__
, i
);
391 if (i
!= BreakIterator::DONE
) {
392 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__
, i
);
402 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
408 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__
, i
);
415 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__
, i
);
422 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
429 i
= bi
->following(4);
431 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__
, i
);
434 i
= bi
->following(9);
436 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__
, i
);
439 i
= bi
->following(10);
440 if (i
!= BreakIterator::DONE
) {
441 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__
, i
);
448 i
= bi
->preceding(4);
450 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__
, i
);
453 i
= bi
->preceding(10);
455 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__
, i
);
458 i
= bi
->preceding(1);
460 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__
, i
);
463 i
= bi
->preceding(0);
464 if (i
!= BreakIterator::DONE
) {
465 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__
, i
);
473 if (bi
->isBoundary(3) != TRUE
) {
474 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__
, i
);
478 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__
, i
);
482 if (bi
->isBoundary(11) != FALSE
) {
483 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__
, i
);
487 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__
, i
);
496 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__
, i
);
501 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__
, i
);
506 if (i
!= BreakIterator::DONE
) {
507 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__
, i
);
519 void RBBIAPITest::TestBuilder() {
520 UnicodeString rulesString1
= "$Letters = [:L:];\n"
521 "$Numbers = [:N:];\n"
524 "[^$Letters $Numbers];\n"
526 UnicodeString testString1
= "abc123..abc";
528 int32_t bounds1
[] = {0, 3, 6, 7, 8, 11};
529 UErrorCode status
=U_ZERO_ERROR
;
530 UParseError parseError
;
532 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
533 if(U_FAILURE(status
)) {
534 errln("FAIL : in construction");
536 bi
->setText(testString1
);
537 doBoundaryTest(*bi
, testString1
, bounds1
);
545 // Single quotes within rules imply a grouping, so that a modifier
546 // following the quoted text (* or +) applies to all of the quoted chars.
548 void RBBIAPITest::TestQuoteGrouping() {
549 UnicodeString rulesString1
= "#Here comes the rule...\n"
550 "'$@!'*;\n" // (\$\@\!)*
553 UnicodeString testString1
= "$@!$@!X$@!!X";
555 int32_t bounds1
[] = {0, 6, 7, 10, 11, 12};
556 UErrorCode status
=U_ZERO_ERROR
;
557 UParseError parseError
;
559 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
560 if(U_FAILURE(status
)) {
561 errln("FAIL : in construction");
563 bi
->setText(testString1
);
564 doBoundaryTest(*bi
, testString1
, bounds1
);
571 // Test word break rule status constants.
573 void RBBIAPITest::TestRuleStatus() {
575 u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
576 // 012345678901234567 8 9 0 1 2 3 4 5 6
577 // Ideographic Katakana Hiragana
579 UnicodeString
testString1(str
);
580 int32_t bounds1
[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
581 int32_t tag_lo
[] = {UBRK_WORD_NONE
, UBRK_WORD_LETTER
, UBRK_WORD_NONE
, UBRK_WORD_LETTER
,
582 UBRK_WORD_NONE
, UBRK_WORD_NUMBER
, UBRK_WORD_NONE
,
583 UBRK_WORD_IDEO
, UBRK_WORD_IDEO
, UBRK_WORD_NONE
,
584 UBRK_WORD_KANA
, UBRK_WORD_NONE
, UBRK_WORD_KANA
, UBRK_WORD_KANA
};
586 int32_t tag_hi
[] = {UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
,
587 UBRK_WORD_NONE_LIMIT
, UBRK_WORD_NUMBER_LIMIT
, UBRK_WORD_NONE_LIMIT
,
588 UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_NONE_LIMIT
,
589 UBRK_WORD_KANA_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_KANA_LIMIT
, UBRK_WORD_KANA_LIMIT
};
591 UErrorCode status
=U_ZERO_ERROR
;
593 RuleBasedBreakIterator
*bi
= (RuleBasedBreakIterator
*)BreakIterator::createWordInstance(Locale::getDefault(), status
);
594 if(U_FAILURE(status
)) {
595 errln("FAIL : in construction");
597 bi
->setText(testString1
);
598 // First test that the breaks are in the right spots.
599 doBoundaryTest(*bi
, testString1
, bounds1
);
601 // Then go back and check tag values
604 for (pos
= bi
->first(); pos
!= BreakIterator::DONE
; pos
= bi
->next(), i
++) {
605 if (pos
!= bounds1
[i
]) {
606 errln("FAIL: unexpected word break at postion %d", pos
);
609 tag
= bi
->getRuleStatus();
610 if (tag
< tag_lo
[i
] || tag
>= tag_hi
[i
]) {
611 errln("FAIL: incorrect tag value %d at position %d", tag
, pos
);
615 // Check that we get the same tag values from getRuleStatusVec()
617 int t
= bi
->getRuleStatusVec(vec
, 10, status
);
618 TEST_ASSERT_SUCCESS(status
);
620 TEST_ASSERT(vec
[0] == tag
);
625 // Now test line break status. This test mostly is to confirm that the status constants
626 // are correctly declared in the header.
627 testString1
= "test line. \n";
630 bi
= (RuleBasedBreakIterator
*)
631 BreakIterator::createLineInstance(Locale::getEnglish(), status
);
632 if(U_FAILURE(status
)) {
633 errln("failed to create word break iterator.");
639 bi
->setText(testString1
);
641 tag
= bi
->getRuleStatus();
642 for (i
=0; i
<3; i
++) {
645 success
= pos
==0 && tag
==UBRK_LINE_SOFT
; break;
647 success
= pos
==5 && tag
==UBRK_LINE_SOFT
; break;
649 success
= pos
==12 && tag
==UBRK_LINE_HARD
; break;
651 success
= FALSE
; break;
653 if (success
== FALSE
) {
654 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
659 tag
= bi
->getRuleStatus();
661 if (UBRK_LINE_SOFT
>= UBRK_LINE_SOFT_LIMIT
||
662 UBRK_LINE_HARD
>= UBRK_LINE_HARD_LIMIT
||
663 UBRK_LINE_HARD
> UBRK_LINE_SOFT
&& UBRK_LINE_HARD
< UBRK_LINE_SOFT_LIMIT
) {
664 errln("UBRK_LINE_* constants from header are inconsistent.");
674 // Test the vector form of break rule status.
676 void RBBIAPITest::TestRuleStatusVec() {
677 UnicodeString rulesString
= "[A-N]{100}; \n"
683 UnicodeString testString1
= "Aapz5?";
684 int32_t statusVals
[10];
688 UErrorCode status
=U_ZERO_ERROR
;
689 UParseError parseError
;
691 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString
, parseError
, status
);
692 TEST_ASSERT_SUCCESS(status
);
693 if (U_SUCCESS(status
)) {
694 bi
->setText(testString1
);
699 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
700 TEST_ASSERT_SUCCESS(status
);
701 TEST_ASSERT(numStatuses
== 2);
702 TEST_ASSERT(statusVals
[0] == 100);
703 TEST_ASSERT(statusVals
[1] == 300);
708 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
709 TEST_ASSERT_SUCCESS(status
);
710 TEST_ASSERT(numStatuses
== 2);
711 TEST_ASSERT(statusVals
[0] == 200);
712 TEST_ASSERT(statusVals
[1] == 300);
717 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
718 TEST_ASSERT_SUCCESS(status
);
719 TEST_ASSERT(numStatuses
== 2);
720 TEST_ASSERT(statusVals
[0] == 200);
721 TEST_ASSERT(statusVals
[1] == 300);
726 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
727 TEST_ASSERT_SUCCESS(status
);
728 TEST_ASSERT(numStatuses
== 1);
729 TEST_ASSERT(statusVals
[0] == 300);
734 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
735 TEST_ASSERT_SUCCESS(status
);
736 TEST_ASSERT(numStatuses
== 2);
737 TEST_ASSERT(statusVals
[0] == 400);
738 TEST_ASSERT(statusVals
[1] == 500);
743 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
744 TEST_ASSERT_SUCCESS(status
);
745 TEST_ASSERT(numStatuses
== 1);
746 TEST_ASSERT(statusVals
[0] == 0);
749 // Check buffer overflow error handling. Char == A
754 memset(statusVals
, -1, sizeof(statusVals
));
755 numStatuses
= bi
->getRuleStatusVec(statusVals
, 0, status
);
756 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
757 TEST_ASSERT(numStatuses
== 2);
758 TEST_ASSERT(statusVals
[0] == -1);
760 status
= U_ZERO_ERROR
;
761 memset(statusVals
, -1, sizeof(statusVals
));
762 numStatuses
= bi
->getRuleStatusVec(statusVals
, 1, status
);
763 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
764 TEST_ASSERT(numStatuses
== 2);
765 TEST_ASSERT(statusVals
[0] == 100);
766 TEST_ASSERT(statusVals
[1] == -1);
768 status
= U_ZERO_ERROR
;
769 memset(statusVals
, -1, sizeof(statusVals
));
770 numStatuses
= bi
->getRuleStatusVec(statusVals
, 2, status
);
771 TEST_ASSERT_SUCCESS(status
);
772 TEST_ASSERT(numStatuses
== 2);
773 TEST_ASSERT(statusVals
[0] == 100);
774 TEST_ASSERT(statusVals
[1] == 300);
775 TEST_ASSERT(statusVals
[2] == -1);
782 // Bug 2190 Regression test. Builder crash on rule consisting of only a
783 // $variable reference
784 void RBBIAPITest::TestBug2190() {
785 UnicodeString rulesString1
= "$aaa = abcd;\n"
788 UnicodeString testString1
= "abcdabcd";
790 int32_t bounds1
[] = {0, 4, 8};
791 UErrorCode status
=U_ZERO_ERROR
;
792 UParseError parseError
;
794 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
795 if(U_FAILURE(status
)) {
796 errln("FAIL : in construction");
798 bi
->setText(testString1
);
799 doBoundaryTest(*bi
, testString1
, bounds1
);
805 void RBBIAPITest::TestRegistration() {
806 #if !UCONFIG_NO_SERVICE
807 UErrorCode status
= U_ZERO_ERROR
;
808 BreakIterator
* thai_word
= BreakIterator::createWordInstance("th_TH", status
);
810 // ok to not delete these if we exit because of error?
811 BreakIterator
* thai_char
= BreakIterator::createCharacterInstance("th_TH", status
);
812 BreakIterator
* root_word
= BreakIterator::createWordInstance("", status
);
813 BreakIterator
* root_char
= BreakIterator::createCharacterInstance("", status
);
815 URegistryKey key
= BreakIterator::registerInstance(thai_word
, "xx", UBRK_WORD
, status
);
817 if (thai_word
&& *thai_word
== *root_word
) {
818 errln("thai not different from root");
823 BreakIterator
* result
= BreakIterator::createWordInstance("xx_XX", status
);
826 fail
= *result
!= *thai_word
;
830 errln("bad result for xx_XX/word");
835 BreakIterator
* result
= BreakIterator::createCharacterInstance("th_TH", status
);
838 fail
= *result
!= *thai_char
;
842 errln("bad result for th_TH/char");
847 BreakIterator
* result
= BreakIterator::createCharacterInstance("xx_XX", status
);
850 fail
= *result
!= *root_char
;
854 errln("bad result for xx_XX/char");
859 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
861 const UnicodeString
* p
;
862 while ((p
= avail
->snext(status
))) {
863 if (p
->compare("xx") == 0) {
870 errln("did not find test locale");
875 UBool unreg
= BreakIterator::unregister(key
, status
);
877 errln("unable to unregister");
882 BreakIterator
* result
= BreakIterator::createWordInstance("xx", status
);
883 BreakIterator
* root
= BreakIterator::createWordInstance("", status
);
886 fail
= *root
!= *result
;
891 errln("did not get root break");
896 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
898 const UnicodeString
* p
;
899 while ((p
= avail
->snext(status
))) {
900 if (p
->compare("xx") == 0) {
907 errln("found test locale");
913 UBool foundLocale
= FALSE
;
914 const Locale
*avail
= BreakIterator::getAvailableLocales(count
);
915 for (int i
=0; i
<count
; i
++) {
916 if (avail
[i
] == Locale::getEnglish()) {
921 if (foundLocale
== FALSE
) {
922 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
927 // that_word was adopted by factory
934 void RBBIAPITest::RoundtripRule(const char *dataFile
) {
935 UErrorCode status
= U_ZERO_ERROR
;
936 UParseError parseError
;
938 parseError
.offset
= 0;
939 UDataMemory
*data
= udata_open(NULL
, "brk", dataFile
, &status
);
941 const UChar
*builtSource
;
942 const uint8_t *rbbiRules
;
943 const uint8_t *builtRules
;
945 if (U_FAILURE(status
)) {
946 errln("Can't open \"%s\"", dataFile
);
950 builtRules
= (const uint8_t *)udata_getMemory(data
);
951 builtSource
= (const UChar
*)(builtRules
+ ((RBBIDataHeader
*)builtRules
)->fRuleSource
);
952 RuleBasedBreakIterator
*brkItr
= new RuleBasedBreakIterator(builtSource
, parseError
, status
);
953 if (U_FAILURE(status
)) {
954 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
955 u_errorName(status
), parseError
.line
, parseError
.offset
);
958 rbbiRules
= brkItr
->getBinaryRules(length
);
959 logln("Comparing \"%s\" len=%d", dataFile
, length
);
960 if (memcmp(builtRules
, rbbiRules
, (int32_t)length
) != 0) {
961 errln("Built rules and rebuilt rules are different %s", dataFile
);
968 void RBBIAPITest::TestRoundtripRules() {
969 RoundtripRule("word");
970 RoundtripRule("title");
971 RoundtripRule("sent");
972 RoundtripRule("line");
973 RoundtripRule("char");
975 RoundtripRule("word_th");
976 RoundtripRule("line_th");
980 //---------------------------------------------
982 //---------------------------------------------
984 void RBBIAPITest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
986 if (exec
) logln((UnicodeString
)"TestSuite RuleBasedBreakIterator API ");
988 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
989 case 0: name
= "TestCloneEquals"; if (exec
) TestCloneEquals(); break;
990 case 1: name
= "TestgetRules"; if (exec
) TestgetRules(); break;
991 case 2: name
= "TestHashCode"; if (exec
) TestHashCode(); break;
992 case 3: name
= "TestGetSetAdoptText"; if (exec
) TestGetSetAdoptText(); break;
993 case 4: name
= "TestIteration"; if (exec
) TestIteration(); break;
994 case 5: name
= "extra"; break; // Extra
995 case 6: name
= "extra"; break; // Extra
996 case 7: name
= "TestBuilder"; if (exec
) TestBuilder(); break;
997 case 8: name
= "TestQuoteGrouping"; if (exec
) TestQuoteGrouping(); break;
998 case 9: name
= "TestRuleStatus"; if (exec
) TestRuleStatus(); break;
999 case 10: name
= "TestRuleStatusVec"; if (exec
) TestRuleStatusVec(); break;
1000 case 11: name
= "TestBug2190"; if (exec
) TestBug2190(); break;
1001 case 12: name
= "TestRegistration"; if (exec
) TestRegistration(); break;
1002 case 13: name
= "TestBoilerPlate"; if (exec
) TestBoilerPlate(); break;
1003 case 14: name
= "TestRoundtripRules"; if (exec
) TestRoundtripRules(); break;
1005 default: name
= ""; break; // needed to end loop
1009 //---------------------------------------------
1010 //Internal subroutines
1011 //---------------------------------------------
1013 void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator
& bi
, UnicodeString
& text
, int32_t *boundaries
){
1014 logln((UnicodeString
)"testIsBoundary():");
1017 for (int32_t i
= 0; i
< text
.length(); i
++) {
1018 isB
= bi
.isBoundary(i
);
1019 logln((UnicodeString
)"bi.isBoundary(" + i
+ ") -> " + isB
);
1021 if (i
== boundaries
[p
]) {
1023 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected true, got false");
1028 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected false, got true");
1032 void RBBIAPITest::doTest(UnicodeString
& testString
, int32_t start
, int32_t gotoffset
, int32_t expectedOffset
, const char* expectedString
){
1033 UnicodeString selected
;
1034 UnicodeString expected
=CharsToUnicodeString(expectedString
);
1036 if(gotoffset
!= expectedOffset
)
1037 errln((UnicodeString
)"ERROR:****returned #" + gotoffset
+ (UnicodeString
)" instead of #" + expectedOffset
);
1038 if(start
<= gotoffset
){
1039 testString
.extractBetween(start
, gotoffset
, selected
);
1042 testString
.extractBetween(gotoffset
, start
, selected
);
1044 if(selected
.compare(expected
) != 0)
1045 errln(prettify((UnicodeString
)"ERROR:****selected \"" + selected
+ "\" instead of \"" + expected
+ "\""));
1047 logln(prettify("****selected \"" + selected
+ "\""));
1050 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */