1 /********************************************************************
2 * Copyright (c) 1999-2008, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 * Date Name Description
6 * 12/14/99 Madhu Creation.
7 * 01/12/2000 Madhu updated for changed API
8 ********************************************************************/
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/uchar.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/schriter.h"
22 #include "unicode/ustring.h"
23 #include "unicode/utext.h"
27 * API Test the RuleBasedBreakIterator class
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
32 errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
37 void RBBIAPITest::TestCloneEquals()
40 UErrorCode status
=U_ZERO_ERROR
;
41 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
42 RuleBasedBreakIterator
* biequal
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
43 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
44 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
45 if(U_FAILURE(status
)){
46 errln((UnicodeString
)"FAIL : in construction");
51 UnicodeString testString
="Testing word break iterators's clone() and equals()";
52 bi1
->setText(testString
);
53 bi2
->setText(testString
);
54 biequal
->setText(testString
);
56 bi3
->setText("hello");
58 logln((UnicodeString
)"Testing equals()");
60 logln((UnicodeString
)"Testing == and !=");
61 UBool b
= (*bi1
!= *biequal
);
65 errln((UnicodeString
)"ERROR:1 RBBI's == and != operator failed.");
68 if(*bi2
== *biequal
|| *bi2
== *bi1
|| *biequal
== *bi3
)
69 errln((UnicodeString
)"ERROR:2 RBBI's == and != operator failed.");
72 // Quick test of RulesBasedBreakIterator assignment -
74 // two different iterators are !=
75 // they are == after assignment
76 // source and dest iterator produce the same next() after assignment.
77 // deleting one doesn't disable the other.
78 logln("Testing assignment");
79 RuleBasedBreakIterator
*bix
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getDefault(), status
);
80 if(U_FAILURE(status
)){
81 errln((UnicodeString
)"FAIL : in construction");
85 RuleBasedBreakIterator biDefault
, biDefault2
;
86 if(U_FAILURE(status
)){
87 errln((UnicodeString
)"FAIL : in construction of default iterator");
90 if (biDefault
== *bix
) {
91 errln((UnicodeString
)"ERROR: iterators should not compare ==");
94 if (biDefault
!= biDefault2
) {
95 errln((UnicodeString
)"ERROR: iterators should compare ==");
100 UnicodeString
HelloString("Hello Kitty");
101 bix
->setText(HelloString
);
103 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
107 errln(UnicodeString("ERROR: strings should be equal before assignment."));
110 int bixnext
= bix
->next();
111 int bi2next
= bi2
->next();
112 if (! (bixnext
== bi2next
&& bixnext
== 7)) {
113 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
116 if (bi2
->next() != 8) {
117 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
122 logln((UnicodeString
)"Testing clone()");
123 RuleBasedBreakIterator
* bi1clone
=(RuleBasedBreakIterator
*)bi1
->clone();
124 RuleBasedBreakIterator
* bi2clone
=(RuleBasedBreakIterator
*)bi2
->clone();
126 if(*bi1clone
!= *bi1
|| *bi1clone
!= *biequal
||
127 *bi1clone
== *bi3
|| *bi1clone
== *bi2
)
128 errln((UnicodeString
)"ERROR:1 RBBI's clone() method failed");
130 if(*bi2clone
== *bi1
|| *bi2clone
== *biequal
||
131 *bi2clone
== *bi3
|| *bi2clone
!= *bi2
)
132 errln((UnicodeString
)"ERROR:2 RBBI's clone() method failed");
134 if(bi1
->getText() != bi1clone
->getText() ||
135 bi2clone
->getText() != bi2
->getText() ||
136 *bi2clone
== *bi1clone
)
137 errln((UnicodeString
)"ERROR: RBBI's clone() method failed");
147 void RBBIAPITest::TestBoilerPlate()
149 UErrorCode status
= U_ZERO_ERROR
;
150 BreakIterator
* a
= BreakIterator::createWordInstance(Locale("hi"), status
);
151 BreakIterator
* b
= BreakIterator::createWordInstance(Locale("hi_IN"),status
);
152 if (U_FAILURE(status
)) {
153 errln("Creation of break iterator failed %s", u_errorName(status
));
157 errln("Failed: boilerplate method operator!= does not return correct results");
159 BreakIterator
* c
= BreakIterator::createWordInstance(Locale("ja"),status
);
162 errln("Failed: boilerplate method opertator== does not return correct results");
165 errln("creation of break iterator failed");
172 void RBBIAPITest::TestgetRules()
174 UErrorCode status
=U_ZERO_ERROR
;
176 RuleBasedBreakIterator
* bi1
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
177 RuleBasedBreakIterator
* bi2
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
178 if(U_FAILURE(status
)){
179 errln((UnicodeString
)"FAIL: in construction");
187 logln((UnicodeString
)"Testing toString()");
189 bi1
->setText((UnicodeString
)"Hello there");
191 RuleBasedBreakIterator
* bi3
=(RuleBasedBreakIterator
*)bi1
->clone();
193 UnicodeString temp
=bi1
->getRules();
194 UnicodeString temp2
=bi2
->getRules();
195 UnicodeString temp3
=bi3
->getRules();
196 if( temp2
.compare(temp3
) ==0 || temp
.compare(temp2
) == 0 || temp
.compare(temp3
) != 0)
197 errln((UnicodeString
)"ERROR: error in getRules() method");
203 void RBBIAPITest::TestHashCode()
205 UErrorCode status
=U_ZERO_ERROR
;
206 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
207 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
208 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
209 if(U_FAILURE(status
)){
210 errln((UnicodeString
)"FAIL : in construction");
218 logln((UnicodeString
)"Testing hashCode()");
220 bi1
->setText((UnicodeString
)"Hash code");
221 bi2
->setText((UnicodeString
)"Hash code");
222 bi3
->setText((UnicodeString
)"Hash code");
224 RuleBasedBreakIterator
* bi1clone
= (RuleBasedBreakIterator
*)bi1
->clone();
225 RuleBasedBreakIterator
* bi2clone
= (RuleBasedBreakIterator
*)bi2
->clone();
227 if(bi1
->hashCode() != bi1clone
->hashCode() || bi1
->hashCode() != bi3
->hashCode() ||
228 bi1clone
->hashCode() != bi3
->hashCode() || bi2
->hashCode() != bi2clone
->hashCode())
229 errln((UnicodeString
)"ERROR: identical objects have different hashcodes");
231 if(bi1
->hashCode() == bi2
->hashCode() || bi2
->hashCode() == bi3
->hashCode() ||
232 bi1clone
->hashCode() == bi2clone
->hashCode() || bi1clone
->hashCode() == bi2
->hashCode())
233 errln((UnicodeString
)"ERROR: different objects have same hashcodes");
242 void RBBIAPITest::TestGetSetAdoptText()
244 logln((UnicodeString
)"Testing getText setText ");
245 UErrorCode status
=U_ZERO_ERROR
;
246 UnicodeString str1
="first string.";
247 UnicodeString str2
="Second string.";
248 RuleBasedBreakIterator
* charIter1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
249 RuleBasedBreakIterator
* wordIter1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
250 if(U_FAILURE(status
)){
251 errln((UnicodeString
)"FAIL : in construction");
256 CharacterIterator
* text1
= new StringCharacterIterator(str1
);
257 CharacterIterator
* text1Clone
= text1
->clone();
258 CharacterIterator
* text2
= new StringCharacterIterator(str2
);
259 CharacterIterator
* text3
= new StringCharacterIterator(str2
, 3, 10, 3); // "ond str"
261 wordIter1
->setText(str1
);
262 CharacterIterator
*tci
= &wordIter1
->getText();
265 TEST_ASSERT(tstr
== str1
);
266 if(wordIter1
->current() != 0)
267 errln((UnicodeString
)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
271 wordIter1
->setText(str2
);
272 if(wordIter1
->current() != 0)
273 errln((UnicodeString
)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
276 charIter1
->adoptText(text1Clone
);
277 TEST_ASSERT(wordIter1
->getText() != charIter1
->getText());
278 tci
= &wordIter1
->getText();
280 TEST_ASSERT(tstr
== str2
);
281 tci
= &charIter1
->getText();
283 TEST_ASSERT(tstr
== str1
);
286 RuleBasedBreakIterator
* rb
=(RuleBasedBreakIterator
*)wordIter1
->clone();
287 rb
->adoptText(text1
);
288 if(rb
->getText() != *text1
)
289 errln((UnicodeString
)"ERROR:1 error in adoptText ");
290 rb
->adoptText(text2
);
291 if(rb
->getText() != *text2
)
292 errln((UnicodeString
)"ERROR:2 error in adoptText ");
294 // Adopt where iterator range is less than the entire orignal source string.
295 // (With the change of the break engine to working with UText internally,
296 // CharacterIterators starting at positions other than zero are not supported)
297 rb
->adoptText(text3
);
298 TEST_ASSERT(rb
->preceding(2) == 0);
299 TEST_ASSERT(rb
->following(11) == BreakIterator::DONE
);
300 //if(rb->preceding(2) != 3) {
301 // errln((UnicodeString)"ERROR:3 error in adoptText ");
303 //if(rb->following(11) != BreakIterator::DONE) {
304 // errln((UnicodeString)"ERROR:4 error in adoptText ");
309 // Quick test to see if UText is working at all.
311 const char *s1
= "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
312 const char *s2
= "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
315 status
= U_ZERO_ERROR
;
316 UText
*ut
= utext_openUTF8(NULL
, s1
, -1, &status
);
317 wordIter1
->setText(ut
, status
);
318 TEST_ASSERT_SUCCESS(status
);
321 pos
= wordIter1
->first();
323 pos
= wordIter1
->next();
325 pos
= wordIter1
->next();
327 pos
= wordIter1
->next();
328 TEST_ASSERT(pos
==11);
329 pos
= wordIter1
->next();
330 TEST_ASSERT(pos
==UBRK_DONE
);
332 status
= U_ZERO_ERROR
;
333 UText
*ut2
= utext_openUTF8(NULL
, s2
, -1, &status
);
334 TEST_ASSERT_SUCCESS(status
);
335 wordIter1
->setText(ut2
, status
);
336 TEST_ASSERT_SUCCESS(status
);
338 pos
= wordIter1
->first();
340 pos
= wordIter1
->next();
342 pos
= wordIter1
->next();
345 pos
= wordIter1
->last();
347 pos
= wordIter1
->previous();
349 pos
= wordIter1
->previous();
351 pos
= wordIter1
->previous();
353 pos
= wordIter1
->previous();
354 TEST_ASSERT(pos
==UBRK_DONE
);
356 status
= U_ZERO_ERROR
;
357 UnicodeString sEmpty
;
358 UText
*gut2
= utext_openUnicodeString(NULL
, &sEmpty
, &status
);
359 wordIter1
->getUText(gut2
, status
);
360 TEST_ASSERT_SUCCESS(status
);
373 void RBBIAPITest::TestIteration()
375 // This test just verifies that the API is present.
376 // Testing for correct operation of the break rules happens elsewhere.
378 UErrorCode status
=U_ZERO_ERROR
;
379 RuleBasedBreakIterator
* bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
380 if (U_FAILURE(status
) || bi
== NULL
) {
381 errln("Failure creating character break iterator. Status = %s", u_errorName(status
));
386 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
387 if (U_FAILURE(status
) || bi
== NULL
) {
388 errln("Failure creating Word break iterator. Status = %s", u_errorName(status
));
393 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status
);
394 if (U_FAILURE(status
) || bi
== NULL
) {
395 errln("Failure creating Line break iterator. Status = %s", u_errorName(status
));
400 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status
);
401 if (U_FAILURE(status
) || bi
== NULL
) {
402 errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status
));
407 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status
);
408 if (U_FAILURE(status
) || bi
== NULL
) {
409 errln("Failure creating Title break iterator. Status = %s", u_errorName(status
));
414 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
415 if (U_FAILURE(status
) || bi
== NULL
) {
416 errln("Failure creating character break iterator. Status = %s", u_errorName(status
));
417 return; // Skip the rest of these tests.
421 UnicodeString testString
="0123456789";
422 bi
->setText(testString
);
427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i
);
432 errln("Incorrect value from bi->last(). Expected 10, got %d", i
);
441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__
, i
);
447 if (i
!= BreakIterator::DONE
) {
448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__
, i
);
457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__
, i
);
462 if (i
!= BreakIterator::DONE
) {
463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__
, i
);
473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__
, i
);
486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__
, i
);
493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
500 i
= bi
->following(4);
502 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__
, i
);
505 i
= bi
->following(9);
507 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__
, i
);
510 i
= bi
->following(10);
511 if (i
!= BreakIterator::DONE
) {
512 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__
, i
);
519 i
= bi
->preceding(4);
521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__
, i
);
524 i
= bi
->preceding(10);
526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__
, i
);
529 i
= bi
->preceding(1);
531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__
, i
);
534 i
= bi
->preceding(0);
535 if (i
!= BreakIterator::DONE
) {
536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__
, i
);
544 if (bi
->isBoundary(3) != TRUE
) {
545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__
, i
);
549 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__
, i
);
553 if (bi
->isBoundary(11) != FALSE
) {
554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__
, i
);
558 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__
, i
);
567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__
, i
);
572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__
, i
);
577 if (i
!= BreakIterator::DONE
) {
578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__
, i
);
590 void RBBIAPITest::TestBuilder() {
591 UnicodeString rulesString1
= "$Letters = [:L:];\n"
592 "$Numbers = [:N:];\n"
595 "[^$Letters $Numbers];\n"
597 UnicodeString testString1
= "abc123..abc";
599 int32_t bounds1
[] = {0, 3, 6, 7, 8, 11};
600 UErrorCode status
=U_ZERO_ERROR
;
601 UParseError parseError
;
603 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
604 if(U_FAILURE(status
)) {
605 errln("FAIL : in construction");
607 bi
->setText(testString1
);
608 doBoundaryTest(*bi
, testString1
, bounds1
);
616 // Single quotes within rules imply a grouping, so that a modifier
617 // following the quoted text (* or +) applies to all of the quoted chars.
619 void RBBIAPITest::TestQuoteGrouping() {
620 UnicodeString rulesString1
= "#Here comes the rule...\n"
621 "'$@!'*;\n" // (\$\@\!)*
624 UnicodeString testString1
= "$@!$@!X$@!!X";
626 int32_t bounds1
[] = {0, 6, 7, 10, 11, 12};
627 UErrorCode status
=U_ZERO_ERROR
;
628 UParseError parseError
;
630 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
631 if(U_FAILURE(status
)) {
632 errln("FAIL : in construction");
634 bi
->setText(testString1
);
635 doBoundaryTest(*bi
, testString1
, bounds1
);
642 // Test word break rule status constants.
644 void RBBIAPITest::TestRuleStatus() {
646 u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
647 // 012345678901234567 8 9 0 1 2 3 4 5 6
648 // Ideographic Katakana Hiragana
650 UnicodeString
testString1(str
);
651 int32_t bounds1
[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
652 int32_t tag_lo
[] = {UBRK_WORD_NONE
, UBRK_WORD_LETTER
, UBRK_WORD_NONE
, UBRK_WORD_LETTER
,
653 UBRK_WORD_NONE
, UBRK_WORD_NUMBER
, UBRK_WORD_NONE
,
654 UBRK_WORD_IDEO
, UBRK_WORD_IDEO
, UBRK_WORD_NONE
,
655 UBRK_WORD_KANA
, UBRK_WORD_NONE
, UBRK_WORD_KANA
, UBRK_WORD_KANA
};
657 int32_t tag_hi
[] = {UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
,
658 UBRK_WORD_NONE_LIMIT
, UBRK_WORD_NUMBER_LIMIT
, UBRK_WORD_NONE_LIMIT
,
659 UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_NONE_LIMIT
,
660 UBRK_WORD_KANA_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_KANA_LIMIT
, UBRK_WORD_KANA_LIMIT
};
662 UErrorCode status
=U_ZERO_ERROR
;
664 RuleBasedBreakIterator
*bi
= (RuleBasedBreakIterator
*)BreakIterator::createWordInstance(Locale::getEnglish(), status
);
665 if(U_FAILURE(status
)) {
666 errln("FAIL : in construction");
668 bi
->setText(testString1
);
669 // First test that the breaks are in the right spots.
670 doBoundaryTest(*bi
, testString1
, bounds1
);
672 // Then go back and check tag values
675 for (pos
= bi
->first(); pos
!= BreakIterator::DONE
; pos
= bi
->next(), i
++) {
676 if (pos
!= bounds1
[i
]) {
677 errln("FAIL: unexpected word break at postion %d", pos
);
680 tag
= bi
->getRuleStatus();
681 if (tag
< tag_lo
[i
] || tag
>= tag_hi
[i
]) {
682 errln("FAIL: incorrect tag value %d at position %d", tag
, pos
);
686 // Check that we get the same tag values from getRuleStatusVec()
688 int t
= bi
->getRuleStatusVec(vec
, 10, status
);
689 TEST_ASSERT_SUCCESS(status
);
691 TEST_ASSERT(vec
[0] == tag
);
696 // Now test line break status. This test mostly is to confirm that the status constants
697 // are correctly declared in the header.
698 testString1
= "test line. \n";
701 bi
= (RuleBasedBreakIterator
*)
702 BreakIterator::createLineInstance(Locale::getEnglish(), status
);
703 if(U_FAILURE(status
)) {
704 errln("failed to create word break iterator.");
710 bi
->setText(testString1
);
712 tag
= bi
->getRuleStatus();
713 for (i
=0; i
<3; i
++) {
716 success
= pos
==0 && tag
==UBRK_LINE_SOFT
; break;
718 success
= pos
==5 && tag
==UBRK_LINE_SOFT
; break;
720 success
= pos
==12 && tag
==UBRK_LINE_HARD
; break;
722 success
= FALSE
; break;
724 if (success
== FALSE
) {
725 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
730 tag
= bi
->getRuleStatus();
732 if (UBRK_LINE_SOFT
>= UBRK_LINE_SOFT_LIMIT
||
733 UBRK_LINE_HARD
>= UBRK_LINE_HARD_LIMIT
||
734 UBRK_LINE_HARD
> UBRK_LINE_SOFT
&& UBRK_LINE_HARD
< UBRK_LINE_SOFT_LIMIT
) {
735 errln("UBRK_LINE_* constants from header are inconsistent.");
745 // Test the vector form of break rule status.
747 void RBBIAPITest::TestRuleStatusVec() {
748 UnicodeString
rulesString( "[A-N]{100}; \n"
753 "!.*;\n", -1, US_INV
);
754 UnicodeString testString1
= "Aapz5?";
755 int32_t statusVals
[10];
759 UErrorCode status
=U_ZERO_ERROR
;
760 UParseError parseError
;
762 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString
, parseError
, status
);
763 TEST_ASSERT_SUCCESS(status
);
764 if (U_SUCCESS(status
)) {
765 bi
->setText(testString1
);
770 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
771 TEST_ASSERT_SUCCESS(status
);
772 TEST_ASSERT(numStatuses
== 2);
773 TEST_ASSERT(statusVals
[0] == 100);
774 TEST_ASSERT(statusVals
[1] == 300);
779 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
780 TEST_ASSERT_SUCCESS(status
);
781 TEST_ASSERT(numStatuses
== 2);
782 TEST_ASSERT(statusVals
[0] == 200);
783 TEST_ASSERT(statusVals
[1] == 300);
788 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
789 TEST_ASSERT_SUCCESS(status
);
790 TEST_ASSERT(numStatuses
== 2);
791 TEST_ASSERT(statusVals
[0] == 200);
792 TEST_ASSERT(statusVals
[1] == 300);
797 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
798 TEST_ASSERT_SUCCESS(status
);
799 TEST_ASSERT(numStatuses
== 1);
800 TEST_ASSERT(statusVals
[0] == 300);
805 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
806 TEST_ASSERT_SUCCESS(status
);
807 TEST_ASSERT(numStatuses
== 2);
808 TEST_ASSERT(statusVals
[0] == 400);
809 TEST_ASSERT(statusVals
[1] == 500);
814 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
815 TEST_ASSERT_SUCCESS(status
);
816 TEST_ASSERT(numStatuses
== 1);
817 TEST_ASSERT(statusVals
[0] == 0);
820 // Check buffer overflow error handling. Char == A
825 memset(statusVals
, -1, sizeof(statusVals
));
826 numStatuses
= bi
->getRuleStatusVec(statusVals
, 0, status
);
827 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
828 TEST_ASSERT(numStatuses
== 2);
829 TEST_ASSERT(statusVals
[0] == -1);
831 status
= U_ZERO_ERROR
;
832 memset(statusVals
, -1, sizeof(statusVals
));
833 numStatuses
= bi
->getRuleStatusVec(statusVals
, 1, status
);
834 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
835 TEST_ASSERT(numStatuses
== 2);
836 TEST_ASSERT(statusVals
[0] == 100);
837 TEST_ASSERT(statusVals
[1] == -1);
839 status
= U_ZERO_ERROR
;
840 memset(statusVals
, -1, sizeof(statusVals
));
841 numStatuses
= bi
->getRuleStatusVec(statusVals
, 2, status
);
842 TEST_ASSERT_SUCCESS(status
);
843 TEST_ASSERT(numStatuses
== 2);
844 TEST_ASSERT(statusVals
[0] == 100);
845 TEST_ASSERT(statusVals
[1] == 300);
846 TEST_ASSERT(statusVals
[2] == -1);
853 // Bug 2190 Regression test. Builder crash on rule consisting of only a
854 // $variable reference
855 void RBBIAPITest::TestBug2190() {
856 UnicodeString rulesString1
= "$aaa = abcd;\n"
859 UnicodeString testString1
= "abcdabcd";
861 int32_t bounds1
[] = {0, 4, 8};
862 UErrorCode status
=U_ZERO_ERROR
;
863 UParseError parseError
;
865 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
866 if(U_FAILURE(status
)) {
867 errln("FAIL : in construction");
869 bi
->setText(testString1
);
870 doBoundaryTest(*bi
, testString1
, bounds1
);
876 void RBBIAPITest::TestRegistration() {
877 #if !UCONFIG_NO_SERVICE
878 UErrorCode status
= U_ZERO_ERROR
;
879 BreakIterator
* ja_word
= BreakIterator::createWordInstance("ja_JP", status
);
881 // ok to not delete these if we exit because of error?
882 BreakIterator
* ja_char
= BreakIterator::createCharacterInstance("ja_JP", status
);
883 BreakIterator
* root_word
= BreakIterator::createWordInstance("", status
);
884 BreakIterator
* root_char
= BreakIterator::createCharacterInstance("", status
);
886 URegistryKey key
= BreakIterator::registerInstance(ja_word
, "xx", UBRK_WORD
, status
);
888 if (ja_word
&& *ja_word
== *root_word
) {
889 errln("japan not different from root");
894 BreakIterator
* result
= BreakIterator::createWordInstance("xx_XX", status
);
897 fail
= *result
!= *ja_word
;
901 errln("bad result for xx_XX/word");
906 BreakIterator
* result
= BreakIterator::createCharacterInstance("ja_JP", status
);
909 fail
= *result
!= *ja_char
;
913 errln("bad result for ja_JP/char");
918 BreakIterator
* result
= BreakIterator::createCharacterInstance("xx_XX", status
);
921 fail
= *result
!= *root_char
;
925 errln("bad result for xx_XX/char");
930 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
932 const UnicodeString
* p
;
933 while ((p
= avail
->snext(status
))) {
934 if (p
->compare("xx") == 0) {
941 errln("did not find test locale");
946 UBool unreg
= BreakIterator::unregister(key
, status
);
948 errln("unable to unregister");
953 BreakIterator
* result
= BreakIterator::createWordInstance("en_US", status
);
954 BreakIterator
* root
= BreakIterator::createWordInstance("", status
);
957 fail
= *root
!= *result
;
962 errln("did not get root break");
967 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
969 const UnicodeString
* p
;
970 while ((p
= avail
->snext(status
))) {
971 if (p
->compare("xx") == 0) {
978 errln("found test locale");
984 UBool foundLocale
= FALSE
;
985 const Locale
*avail
= BreakIterator::getAvailableLocales(count
);
986 for (int i
=0; i
<count
; i
++) {
987 if (avail
[i
] == Locale::getEnglish()) {
992 if (foundLocale
== FALSE
) {
993 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
998 // ja_word was adopted by factory
1005 void RBBIAPITest::RoundtripRule(const char *dataFile
) {
1006 UErrorCode status
= U_ZERO_ERROR
;
1007 UParseError parseError
;
1008 parseError
.line
= 0;
1009 parseError
.offset
= 0;
1010 UDataMemory
*data
= udata_open(U_ICUDATA_BRKITR
, "brk", dataFile
, &status
);
1012 const UChar
*builtSource
;
1013 const uint8_t *rbbiRules
;
1014 const uint8_t *builtRules
;
1016 if (U_FAILURE(status
)) {
1017 errln("Can't open \"%s\"", dataFile
);
1021 builtRules
= (const uint8_t *)udata_getMemory(data
);
1022 builtSource
= (const UChar
*)(builtRules
+ ((RBBIDataHeader
*)builtRules
)->fRuleSource
);
1023 RuleBasedBreakIterator
*brkItr
= new RuleBasedBreakIterator(builtSource
, parseError
, status
);
1024 if (U_FAILURE(status
)) {
1025 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1026 u_errorName(status
), parseError
.line
, parseError
.offset
);
1029 rbbiRules
= brkItr
->getBinaryRules(length
);
1030 logln("Comparing \"%s\" len=%d", dataFile
, length
);
1031 if (memcmp(builtRules
, rbbiRules
, (int32_t)length
) != 0) {
1032 errln("Built rules and rebuilt rules are different %s", dataFile
);
1039 void RBBIAPITest::TestRoundtripRules() {
1040 RoundtripRule("word");
1041 RoundtripRule("title");
1042 RoundtripRule("sent");
1043 RoundtripRule("line");
1044 RoundtripRule("char");
1046 RoundtripRule("word_ja");
1047 RoundtripRule("word_POSIX");
1051 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1052 // (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1053 // This is just a sanity check, not a thorough test (e.g. we don't check that the
1054 // first delete actually frees rulesCopy).
1055 void RBBIAPITest::TestCreateFromRBBIData() {
1056 // Get some handy RBBIData
1057 const char *brkName
= "word"; // or "sent", "line", "char", etc.
1058 UErrorCode status
= U_ZERO_ERROR
;
1059 UDataMemory
* data
= udata_open(U_ICUDATA_BRKITR
, "brk", brkName
, &status
);
1060 if ( U_SUCCESS(status
) ) {
1061 const RBBIDataHeader
* builtRules
= (const RBBIDataHeader
*)udata_getMemory(data
);
1062 uint32_t length
= builtRules
->fLength
;
1063 RBBIWithProtectedFunctions
* brkItr
;
1065 // Try the memory-adopting constructor, need to copy the data first
1066 RBBIDataHeader
* rulesCopy
= (RBBIDataHeader
*) uprv_malloc(length
);
1068 uprv_memcpy( rulesCopy
, builtRules
, length
);
1070 brkItr
= new RBBIWithProtectedFunctions(rulesCopy
, status
);
1071 if ( U_SUCCESS(status
) ) {
1072 delete brkItr
; // this should free rulesCopy
1074 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status
) );
1075 status
= U_ZERO_ERROR
;// reset for the next test
1076 uprv_free( rulesCopy
);
1080 // Now try the non-adopting constructor
1081 brkItr
= new RBBIWithProtectedFunctions(builtRules
, RBBIWithProtectedFunctions::kDontAdopt
, status
);
1082 if ( U_SUCCESS(status
) ) {
1083 delete brkItr
; // this should NOT attempt to free builtRules
1084 if (builtRules
->fLength
!= length
) { // sanity check
1085 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1088 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status
) );
1095 //---------------------------------------------
1097 //---------------------------------------------
1099 void RBBIAPITest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
1101 if (exec
) logln((UnicodeString
)"TestSuite RuleBasedBreakIterator API ");
1103 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1104 case 0: name
= "TestCloneEquals"; if (exec
) TestCloneEquals(); break;
1105 case 1: name
= "TestgetRules"; if (exec
) TestgetRules(); break;
1106 case 2: name
= "TestHashCode"; if (exec
) TestHashCode(); break;
1107 case 3: name
= "TestGetSetAdoptText"; if (exec
) TestGetSetAdoptText(); break;
1108 case 4: name
= "TestIteration"; if (exec
) TestIteration(); break;
1109 case 5: name
= "TestBuilder"; if (exec
) TestBuilder(); break;
1110 case 6: name
= "TestQuoteGrouping"; if (exec
) TestQuoteGrouping(); break;
1111 case 7: name
= "TestRuleStatus"; if (exec
) TestRuleStatus(); break;
1112 case 8: name
= "TestRuleStatusVec"; if (exec
) TestRuleStatusVec(); break;
1113 case 9: name
= "TestBug2190"; if (exec
) TestBug2190(); break;
1114 case 10: name
= "TestRegistration"; if (exec
) TestRegistration(); break;
1115 case 11: name
= "TestBoilerPlate"; if (exec
) TestBoilerPlate(); break;
1116 case 12: name
= "TestRoundtripRules"; if (exec
) TestRoundtripRules(); break;
1117 case 13: name
= "TestCreateFromRBBIData"; if (exec
) TestCreateFromRBBIData(); break;
1119 default: name
= ""; break; // needed to end loop
1123 //---------------------------------------------
1124 //Internal subroutines
1125 //---------------------------------------------
1127 void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator
& bi
, UnicodeString
& text
, int32_t *boundaries
){
1128 logln((UnicodeString
)"testIsBoundary():");
1131 for (int32_t i
= 0; i
< text
.length(); i
++) {
1132 isB
= bi
.isBoundary(i
);
1133 logln((UnicodeString
)"bi.isBoundary(" + i
+ ") -> " + isB
);
1135 if (i
== boundaries
[p
]) {
1137 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected true, got false");
1142 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected false, got true");
1146 void RBBIAPITest::doTest(UnicodeString
& testString
, int32_t start
, int32_t gotoffset
, int32_t expectedOffset
, const char* expectedString
){
1147 UnicodeString selected
;
1148 UnicodeString expected
=CharsToUnicodeString(expectedString
);
1150 if(gotoffset
!= expectedOffset
)
1151 errln((UnicodeString
)"ERROR:****returned #" + gotoffset
+ (UnicodeString
)" instead of #" + expectedOffset
);
1152 if(start
<= gotoffset
){
1153 testString
.extractBetween(start
, gotoffset
, selected
);
1156 testString
.extractBetween(gotoffset
, start
, selected
);
1158 if(selected
.compare(expected
) != 0)
1159 errln(prettify((UnicodeString
)"ERROR:****selected \"" + selected
+ "\" instead of \"" + expected
+ "\""));
1161 logln(prettify("****selected \"" + selected
+ "\""));
1164 //---------------------------------------------
1165 //RBBIWithProtectedFunctions class functions
1166 //---------------------------------------------
1168 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader
* data
, UErrorCode
&status
)
1169 : RuleBasedBreakIterator(data
, status
)
1173 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader
* data
, enum EDontAdopt
, UErrorCode
&status
)
1174 : RuleBasedBreakIterator(data
, RuleBasedBreakIterator::kDontAdopt
, status
)
1178 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */