1 /********************************************************************
2 * Copyright (c) 1999-2014, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 * Date Name Description
6 * 12/14/99 Madhu Creation.
7 * 01/12/2000 Madhu updated for changed API
8 ********************************************************************/
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/uchar.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/schriter.h"
22 #include "unicode/locid.h"
23 #include "unicode/ustring.h"
24 #include "unicode/utext.h"
26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
27 #include "unicode/filteredbrk.h"
28 #include <stdio.h> // for sprintf
31 * API Test the RuleBasedBreakIterator class
35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
39 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
41 void RBBIAPITest::TestCloneEquals()
44 UErrorCode status
=U_ZERO_ERROR
;
45 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
46 RuleBasedBreakIterator
* biequal
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
47 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
48 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
49 if(U_FAILURE(status
)){
50 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
55 UnicodeString testString
="Testing word break iterators's clone() and equals()";
56 bi1
->setText(testString
);
57 bi2
->setText(testString
);
58 biequal
->setText(testString
);
60 bi3
->setText("hello");
62 logln((UnicodeString
)"Testing equals()");
64 logln((UnicodeString
)"Testing == and !=");
65 UBool b
= (*bi1
!= *biequal
);
69 errln((UnicodeString
)"ERROR:1 RBBI's == and != operator failed.");
72 if(*bi2
== *biequal
|| *bi2
== *bi1
|| *biequal
== *bi3
)
73 errln((UnicodeString
)"ERROR:2 RBBI's == and != operator failed.");
76 // Quick test of RulesBasedBreakIterator assignment -
78 // two different iterators are !=
79 // they are == after assignment
80 // source and dest iterator produce the same next() after assignment.
81 // deleting one doesn't disable the other.
82 logln("Testing assignment");
83 RuleBasedBreakIterator
*bix
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getDefault(), status
);
84 if(U_FAILURE(status
)){
85 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
89 RuleBasedBreakIterator biDefault
, biDefault2
;
90 if(U_FAILURE(status
)){
91 errln((UnicodeString
)"FAIL : in construction of default iterator");
94 if (biDefault
== *bix
) {
95 errln((UnicodeString
)"ERROR: iterators should not compare ==");
98 if (biDefault
!= biDefault2
) {
99 errln((UnicodeString
)"ERROR: iterators should compare ==");
104 UnicodeString
HelloString("Hello Kitty");
105 bix
->setText(HelloString
);
107 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
111 errln(UnicodeString("ERROR: strings should be equal before assignment."));
114 int bixnext
= bix
->next();
115 int bi2next
= bi2
->next();
116 if (! (bixnext
== bi2next
&& bixnext
== 7)) {
117 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
120 if (bi2
->next() != 8) {
121 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
126 logln((UnicodeString
)"Testing clone()");
127 RuleBasedBreakIterator
* bi1clone
=(RuleBasedBreakIterator
*)bi1
->clone();
128 RuleBasedBreakIterator
* bi2clone
=(RuleBasedBreakIterator
*)bi2
->clone();
130 if(*bi1clone
!= *bi1
|| *bi1clone
!= *biequal
||
131 *bi1clone
== *bi3
|| *bi1clone
== *bi2
)
132 errln((UnicodeString
)"ERROR:1 RBBI's clone() method failed");
134 if(*bi2clone
== *bi1
|| *bi2clone
== *biequal
||
135 *bi2clone
== *bi3
|| *bi2clone
!= *bi2
)
136 errln((UnicodeString
)"ERROR:2 RBBI's clone() method failed");
138 if(bi1
->getText() != bi1clone
->getText() ||
139 bi2clone
->getText() != bi2
->getText() ||
140 *bi2clone
== *bi1clone
)
141 errln((UnicodeString
)"ERROR: RBBI's clone() method failed");
151 void RBBIAPITest::TestBoilerPlate()
153 UErrorCode status
= U_ZERO_ERROR
;
154 BreakIterator
* a
= BreakIterator::createWordInstance(Locale("hi"), status
);
155 BreakIterator
* b
= BreakIterator::createWordInstance(Locale("hi_IN"),status
);
156 if (U_FAILURE(status
)) {
157 errcheckln(status
, "Creation of break iterator failed %s", u_errorName(status
));
161 errln("Failed: boilerplate method operator!= does not return correct results");
163 // Japanese word break iterators are identical to root with
164 // a dictionary-based break iterator
165 BreakIterator
* c
= BreakIterator::createCharacterInstance(Locale("ja"),status
);
166 BreakIterator
* d
= BreakIterator::createCharacterInstance(Locale("root"),status
);
169 errln("Failed: boilerplate method operator== does not return correct results");
172 errln("creation of break iterator failed");
180 void RBBIAPITest::TestgetRules()
182 UErrorCode status
=U_ZERO_ERROR
;
184 RuleBasedBreakIterator
* bi1
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
185 RuleBasedBreakIterator
* bi2
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
186 if(U_FAILURE(status
)){
187 errcheckln(status
, "FAIL: in construction - %s", u_errorName(status
));
195 logln((UnicodeString
)"Testing toString()");
197 bi1
->setText((UnicodeString
)"Hello there");
199 RuleBasedBreakIterator
* bi3
=(RuleBasedBreakIterator
*)bi1
->clone();
201 UnicodeString temp
=bi1
->getRules();
202 UnicodeString temp2
=bi2
->getRules();
203 UnicodeString temp3
=bi3
->getRules();
204 if( temp2
.compare(temp3
) ==0 || temp
.compare(temp2
) == 0 || temp
.compare(temp3
) != 0)
205 errln((UnicodeString
)"ERROR: error in getRules() method");
211 void RBBIAPITest::TestHashCode()
213 UErrorCode status
=U_ZERO_ERROR
;
214 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
215 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
216 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
217 if(U_FAILURE(status
)){
218 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
226 logln((UnicodeString
)"Testing hashCode()");
228 bi1
->setText((UnicodeString
)"Hash code");
229 bi2
->setText((UnicodeString
)"Hash code");
230 bi3
->setText((UnicodeString
)"Hash code");
232 RuleBasedBreakIterator
* bi1clone
= (RuleBasedBreakIterator
*)bi1
->clone();
233 RuleBasedBreakIterator
* bi2clone
= (RuleBasedBreakIterator
*)bi2
->clone();
235 if(bi1
->hashCode() != bi1clone
->hashCode() || bi1
->hashCode() != bi3
->hashCode() ||
236 bi1clone
->hashCode() != bi3
->hashCode() || bi2
->hashCode() != bi2clone
->hashCode())
237 errln((UnicodeString
)"ERROR: identical objects have different hashcodes");
239 if(bi1
->hashCode() == bi2
->hashCode() || bi2
->hashCode() == bi3
->hashCode() ||
240 bi1clone
->hashCode() == bi2clone
->hashCode() || bi1clone
->hashCode() == bi2
->hashCode())
241 errln((UnicodeString
)"ERROR: different objects have same hashcodes");
250 void RBBIAPITest::TestGetSetAdoptText()
252 logln((UnicodeString
)"Testing getText setText ");
253 IcuTestErrorCode
status(*this, "TestGetSetAdoptText");
254 UnicodeString str1
="first string.";
255 UnicodeString str2
="Second string.";
256 LocalPointer
<RuleBasedBreakIterator
> charIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
));
257 LocalPointer
<RuleBasedBreakIterator
> wordIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
));
258 if(status
.isFailure()){
259 errcheckln(status
, "Fail : in construction - %s", status
.errorName());
264 CharacterIterator
* text1
= new StringCharacterIterator(str1
);
265 CharacterIterator
* text1Clone
= text1
->clone();
266 CharacterIterator
* text2
= new StringCharacterIterator(str2
);
267 CharacterIterator
* text3
= new StringCharacterIterator(str2
, 3, 10, 3); // "ond str"
269 wordIter1
->setText(str1
);
270 CharacterIterator
*tci
= &wordIter1
->getText();
273 TEST_ASSERT(tstr
== str1
);
274 if(wordIter1
->current() != 0)
275 errln((UnicodeString
)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
279 wordIter1
->setText(str2
);
280 if(wordIter1
->current() != 0)
281 errln((UnicodeString
)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
284 charIter1
->adoptText(text1Clone
);
285 TEST_ASSERT(wordIter1
->getText() != charIter1
->getText());
286 tci
= &wordIter1
->getText();
288 TEST_ASSERT(tstr
== str2
);
289 tci
= &charIter1
->getText();
291 TEST_ASSERT(tstr
== str1
);
294 LocalPointer
<RuleBasedBreakIterator
> rb((RuleBasedBreakIterator
*)wordIter1
->clone());
295 rb
->adoptText(text1
);
296 if(rb
->getText() != *text1
)
297 errln((UnicodeString
)"ERROR:1 error in adoptText ");
298 rb
->adoptText(text2
);
299 if(rb
->getText() != *text2
)
300 errln((UnicodeString
)"ERROR:2 error in adoptText ");
302 // Adopt where iterator range is less than the entire orignal source string.
303 // (With the change of the break engine to working with UText internally,
304 // CharacterIterators starting at positions other than zero are not supported)
305 rb
->adoptText(text3
);
306 TEST_ASSERT(rb
->preceding(2) == 0);
307 TEST_ASSERT(rb
->following(11) == BreakIterator::DONE
);
308 //if(rb->preceding(2) != 3) {
309 // errln((UnicodeString)"ERROR:3 error in adoptText ");
311 //if(rb->following(11) != BreakIterator::DONE) {
312 // errln((UnicodeString)"ERROR:4 error in adoptText ");
317 // Quick test to see if UText is working at all.
319 const char *s1
= "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
320 const char *s2
= "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
324 LocalUTextPointer
ut(utext_openUTF8(NULL
, s1
, -1, status
));
325 wordIter1
->setText(ut
.getAlias(), status
);
326 TEST_ASSERT_SUCCESS(status
);
329 pos
= wordIter1
->first();
331 pos
= wordIter1
->next();
333 pos
= wordIter1
->next();
335 pos
= wordIter1
->next();
336 TEST_ASSERT(pos
==11);
337 pos
= wordIter1
->next();
338 TEST_ASSERT(pos
==UBRK_DONE
);
341 LocalUTextPointer
ut2(utext_openUTF8(NULL
, s2
, -1, status
));
342 TEST_ASSERT_SUCCESS(status
);
343 wordIter1
->setText(ut2
.getAlias(), status
);
344 TEST_ASSERT_SUCCESS(status
);
346 pos
= wordIter1
->first();
348 pos
= wordIter1
->next();
350 pos
= wordIter1
->next();
353 pos
= wordIter1
->last();
355 pos
= wordIter1
->previous();
357 pos
= wordIter1
->previous();
359 pos
= wordIter1
->previous();
361 pos
= wordIter1
->previous();
362 TEST_ASSERT(pos
==UBRK_DONE
);
365 UnicodeString sEmpty
;
366 LocalUTextPointer
gut2(utext_openUnicodeString(NULL
, &sEmpty
, status
));
367 wordIter1
->getUText(gut2
.getAlias(), status
);
368 TEST_ASSERT_SUCCESS(status
);
373 void RBBIAPITest::TestIteration()
375 // This test just verifies that the API is present.
376 // Testing for correct operation of the break rules happens elsewhere.
378 UErrorCode status
=U_ZERO_ERROR
;
379 RuleBasedBreakIterator
* bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
380 if (U_FAILURE(status
) || bi
== NULL
) {
381 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
386 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
387 if (U_FAILURE(status
) || bi
== NULL
) {
388 errcheckln(status
, "Failure creating Word break iterator. Status = %s", u_errorName(status
));
393 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status
);
394 if (U_FAILURE(status
) || bi
== NULL
) {
395 errcheckln(status
, "Failure creating Line break iterator. Status = %s", u_errorName(status
));
400 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status
);
401 if (U_FAILURE(status
) || bi
== NULL
) {
402 errcheckln(status
, "Failure creating Sentence break iterator. Status = %s", u_errorName(status
));
407 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status
);
408 if (U_FAILURE(status
) || bi
== NULL
) {
409 errcheckln(status
, "Failure creating Title break iterator. Status = %s", u_errorName(status
));
414 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
415 if (U_FAILURE(status
) || bi
== NULL
) {
416 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
417 return; // Skip the rest of these tests.
421 UnicodeString testString
="0123456789";
422 bi
->setText(testString
);
427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i
);
432 errln("Incorrect value from bi->last(). Expected 10, got %d", i
);
441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__
, i
);
447 if (i
!= BreakIterator::DONE
) {
448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__
, i
);
457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__
, i
);
462 if (i
!= BreakIterator::DONE
) {
463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__
, i
);
473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__
, i
);
486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__
, i
);
493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
500 i
= bi
->following(4);
502 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__
, i
);
505 i
= bi
->following(9);
507 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__
, i
);
510 i
= bi
->following(10);
511 if (i
!= BreakIterator::DONE
) {
512 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__
, i
);
519 i
= bi
->preceding(4);
521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__
, i
);
524 i
= bi
->preceding(10);
526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__
, i
);
529 i
= bi
->preceding(1);
531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__
, i
);
534 i
= bi
->preceding(0);
535 if (i
!= BreakIterator::DONE
) {
536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__
, i
);
544 if (bi
->isBoundary(3) != TRUE
) {
545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__
, i
);
549 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__
, i
);
553 if (bi
->isBoundary(11) != FALSE
) {
554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__
, i
);
558 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__
, i
);
567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__
, i
);
572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__
, i
);
577 if (i
!= BreakIterator::DONE
) {
578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__
, i
);
590 void RBBIAPITest::TestBuilder() {
591 UnicodeString rulesString1
= "$Letters = [:L:];\n"
592 "$Numbers = [:N:];\n"
595 "[^$Letters $Numbers];\n"
597 UnicodeString testString1
= "abc123..abc";
599 int32_t bounds1
[] = {0, 3, 6, 7, 8, 11};
600 UErrorCode status
=U_ZERO_ERROR
;
601 UParseError parseError
;
603 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
604 if(U_FAILURE(status
)) {
605 dataerrln("Fail : in construction - %s", u_errorName(status
));
607 bi
->setText(testString1
);
608 doBoundaryTest(*bi
, testString1
, bounds1
);
616 // Single quotes within rules imply a grouping, so that a modifier
617 // following the quoted text (* or +) applies to all of the quoted chars.
619 void RBBIAPITest::TestQuoteGrouping() {
620 UnicodeString rulesString1
= "#Here comes the rule...\n"
621 "'$@!'*;\n" // (\$\@\!)*
624 UnicodeString testString1
= "$@!$@!X$@!!X";
626 int32_t bounds1
[] = {0, 6, 7, 10, 11, 12};
627 UErrorCode status
=U_ZERO_ERROR
;
628 UParseError parseError
;
630 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
631 if(U_FAILURE(status
)) {
632 dataerrln("Fail : in construction - %s", u_errorName(status
));
634 bi
->setText(testString1
);
635 doBoundaryTest(*bi
, testString1
, bounds1
);
642 // Test word break rule status constants.
644 void RBBIAPITest::TestRuleStatus() {
646 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
647 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
648 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
649 // 012345678901234567 8 9 0
652 UnicodeString
testString1(str
);
653 int32_t bounds1
[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
654 int32_t tag_lo
[] = {UBRK_WORD_NONE
, UBRK_WORD_LETTER
, UBRK_WORD_NONE
, UBRK_WORD_LETTER
,
655 UBRK_WORD_NONE
, UBRK_WORD_NUMBER
, UBRK_WORD_NONE
,
656 UBRK_WORD_IDEO
, UBRK_WORD_NONE
};
658 int32_t tag_hi
[] = {UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
,
659 UBRK_WORD_NONE_LIMIT
, UBRK_WORD_NUMBER_LIMIT
, UBRK_WORD_NONE_LIMIT
,
660 UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_NONE_LIMIT
};
662 UErrorCode status
=U_ZERO_ERROR
;
664 BreakIterator
*bi
= BreakIterator::createWordInstance(Locale::getEnglish(), status
);
665 if(U_FAILURE(status
)) {
666 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
668 bi
->setText(testString1
);
669 // First test that the breaks are in the right spots.
670 doBoundaryTest(*bi
, testString1
, bounds1
);
672 // Then go back and check tag values
675 for (pos
= bi
->first(); pos
!= BreakIterator::DONE
; pos
= bi
->next(), i
++) {
676 if (pos
!= bounds1
[i
]) {
677 errln("FAIL: unexpected word break at postion %d", pos
);
680 tag
= bi
->getRuleStatus();
681 if (tag
< tag_lo
[i
] || tag
>= tag_hi
[i
]) {
682 errln("FAIL: incorrect tag value %d at position %d", tag
, pos
);
686 // Check that we get the same tag values from getRuleStatusVec()
688 int t
= bi
->getRuleStatusVec(vec
, 10, status
);
689 TEST_ASSERT_SUCCESS(status
);
691 TEST_ASSERT(vec
[0] == tag
);
696 // Now test line break status. This test mostly is to confirm that the status constants
697 // are correctly declared in the header.
698 testString1
= "test line. \n";
701 bi
= BreakIterator::createLineInstance(Locale::getEnglish(), status
);
702 if(U_FAILURE(status
)) {
703 errcheckln(status
, "failed to create word break iterator. - %s", u_errorName(status
));
709 bi
->setText(testString1
);
711 tag
= bi
->getRuleStatus();
712 for (i
=0; i
<3; i
++) {
715 success
= pos
==0 && tag
==UBRK_LINE_SOFT
; break;
717 success
= pos
==5 && tag
==UBRK_LINE_SOFT
; break;
719 success
= pos
==12 && tag
==UBRK_LINE_HARD
; break;
721 success
= FALSE
; break;
723 if (success
== FALSE
) {
724 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
729 tag
= bi
->getRuleStatus();
731 if (UBRK_LINE_SOFT
>= UBRK_LINE_SOFT_LIMIT
||
732 UBRK_LINE_HARD
>= UBRK_LINE_HARD_LIMIT
||
733 (UBRK_LINE_HARD
> UBRK_LINE_SOFT
&& UBRK_LINE_HARD
< UBRK_LINE_SOFT_LIMIT
)) {
734 errln("UBRK_LINE_* constants from header are inconsistent.");
744 // Test the vector form of break rule status.
746 void RBBIAPITest::TestRuleStatusVec() {
747 UnicodeString
rulesString( "[A-N]{100}; \n"
752 "!.*;\n", -1, US_INV
);
753 UnicodeString testString1
= "Aapz5?";
754 int32_t statusVals
[10];
758 UErrorCode status
=U_ZERO_ERROR
;
759 UParseError parseError
;
761 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString
, parseError
, status
);
762 if (U_FAILURE(status
)) {
763 dataerrln("Failure at file %s, line %d, error = %s", __FILE__
, __LINE__
, u_errorName(status
));
765 bi
->setText(testString1
);
770 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
771 TEST_ASSERT_SUCCESS(status
);
772 TEST_ASSERT(numStatuses
== 2);
773 TEST_ASSERT(statusVals
[0] == 100);
774 TEST_ASSERT(statusVals
[1] == 300);
779 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
780 TEST_ASSERT_SUCCESS(status
);
781 TEST_ASSERT(numStatuses
== 2);
782 TEST_ASSERT(statusVals
[0] == 200);
783 TEST_ASSERT(statusVals
[1] == 300);
788 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
789 TEST_ASSERT_SUCCESS(status
);
790 TEST_ASSERT(numStatuses
== 2);
791 TEST_ASSERT(statusVals
[0] == 200);
792 TEST_ASSERT(statusVals
[1] == 300);
797 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
798 TEST_ASSERT_SUCCESS(status
);
799 TEST_ASSERT(numStatuses
== 1);
800 TEST_ASSERT(statusVals
[0] == 300);
805 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
806 TEST_ASSERT_SUCCESS(status
);
807 TEST_ASSERT(numStatuses
== 2);
808 TEST_ASSERT(statusVals
[0] == 400);
809 TEST_ASSERT(statusVals
[1] == 500);
814 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
815 TEST_ASSERT_SUCCESS(status
);
816 TEST_ASSERT(numStatuses
== 1);
817 TEST_ASSERT(statusVals
[0] == 0);
820 // Check buffer overflow error handling. Char == A
825 memset(statusVals
, -1, sizeof(statusVals
));
826 numStatuses
= bi
->getRuleStatusVec(statusVals
, 0, status
);
827 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
828 TEST_ASSERT(numStatuses
== 2);
829 TEST_ASSERT(statusVals
[0] == -1);
831 status
= U_ZERO_ERROR
;
832 memset(statusVals
, -1, sizeof(statusVals
));
833 numStatuses
= bi
->getRuleStatusVec(statusVals
, 1, status
);
834 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
835 TEST_ASSERT(numStatuses
== 2);
836 TEST_ASSERT(statusVals
[0] == 100);
837 TEST_ASSERT(statusVals
[1] == -1);
839 status
= U_ZERO_ERROR
;
840 memset(statusVals
, -1, sizeof(statusVals
));
841 numStatuses
= bi
->getRuleStatusVec(statusVals
, 2, status
);
842 TEST_ASSERT_SUCCESS(status
);
843 TEST_ASSERT(numStatuses
== 2);
844 TEST_ASSERT(statusVals
[0] == 100);
845 TEST_ASSERT(statusVals
[1] == 300);
846 TEST_ASSERT(statusVals
[2] == -1);
853 // Bug 2190 Regression test. Builder crash on rule consisting of only a
854 // $variable reference
855 void RBBIAPITest::TestBug2190() {
856 UnicodeString rulesString1
= "$aaa = abcd;\n"
859 UnicodeString testString1
= "abcdabcd";
861 int32_t bounds1
[] = {0, 4, 8};
862 UErrorCode status
=U_ZERO_ERROR
;
863 UParseError parseError
;
865 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
866 if(U_FAILURE(status
)) {
867 dataerrln("Fail : in construction - %s", u_errorName(status
));
869 bi
->setText(testString1
);
870 doBoundaryTest(*bi
, testString1
, bounds1
);
876 void RBBIAPITest::TestRegistration() {
877 #if !UCONFIG_NO_SERVICE
878 UErrorCode status
= U_ZERO_ERROR
;
879 BreakIterator
* ja_word
= BreakIterator::createWordInstance("ja_JP", status
);
880 // ok to not delete these if we exit because of error?
881 BreakIterator
* ja_char
= BreakIterator::createCharacterInstance("ja_JP", status
);
882 BreakIterator
* root_word
= BreakIterator::createWordInstance("", status
);
883 BreakIterator
* root_char
= BreakIterator::createCharacterInstance("", status
);
885 if (status
== U_MISSING_RESOURCE_ERROR
|| status
== U_FILE_ACCESS_ERROR
) {
886 dataerrln("Error creating instances of break interactors - %s", u_errorName(status
));
896 URegistryKey key
= BreakIterator::registerInstance(ja_word
, "xx", UBRK_WORD
, status
);
898 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
899 if (ja_word
&& *ja_word
== *root_word
) {
900 errln("japan not different from root");
906 BreakIterator
* result
= BreakIterator::createWordInstance("xx_XX", status
);
909 fail
= *result
!= *ja_word
;
913 errln("bad result for xx_XX/word");
918 BreakIterator
* result
= BreakIterator::createCharacterInstance("ja_JP", status
);
921 fail
= *result
!= *ja_char
;
925 errln("bad result for ja_JP/char");
930 BreakIterator
* result
= BreakIterator::createCharacterInstance("xx_XX", status
);
933 fail
= *result
!= *root_char
;
937 errln("bad result for xx_XX/char");
942 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
944 const UnicodeString
* p
;
945 while ((p
= avail
->snext(status
))) {
946 if (p
->compare("xx") == 0) {
953 errln("did not find test locale");
958 UBool unreg
= BreakIterator::unregister(key
, status
);
960 errln("unable to unregister");
965 BreakIterator
* result
= BreakIterator::createWordInstance("en_US", status
);
966 BreakIterator
* root
= BreakIterator::createWordInstance("", status
);
969 fail
= *root
!= *result
;
974 errln("did not get root break");
979 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
981 const UnicodeString
* p
;
982 while ((p
= avail
->snext(status
))) {
983 if (p
->compare("xx") == 0) {
990 errln("found test locale");
996 UBool foundLocale
= FALSE
;
997 const Locale
*avail
= BreakIterator::getAvailableLocales(count
);
998 for (int i
=0; i
<count
; i
++) {
999 if (avail
[i
] == Locale::getEnglish()) {
1004 if (foundLocale
== FALSE
) {
1005 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1010 // ja_word was adopted by factory
1017 void RBBIAPITest::RoundtripRule(const char *dataFile
) {
1018 UErrorCode status
= U_ZERO_ERROR
;
1019 UParseError parseError
;
1020 parseError
.line
= 0;
1021 parseError
.offset
= 0;
1022 LocalUDataMemoryPointer
data(udata_open(U_ICUDATA_BRKITR
, "brk", dataFile
, &status
));
1024 const UChar
*builtSource
;
1025 const uint8_t *rbbiRules
;
1026 const uint8_t *builtRules
;
1028 if (U_FAILURE(status
)) {
1029 errcheckln(status
, "Can't open \"%s\" - %s", dataFile
, u_errorName(status
));
1033 builtRules
= (const uint8_t *)udata_getMemory(data
.getAlias());
1034 builtSource
= (const UChar
*)(builtRules
+ ((RBBIDataHeader
*)builtRules
)->fRuleSource
);
1035 RuleBasedBreakIterator
*brkItr
= new RuleBasedBreakIterator(builtSource
, parseError
, status
);
1036 if (U_FAILURE(status
)) {
1037 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1038 u_errorName(status
), parseError
.line
, parseError
.offset
);
1041 rbbiRules
= brkItr
->getBinaryRules(length
);
1042 logln("Comparing \"%s\" len=%d", dataFile
, length
);
1043 if (memcmp(builtRules
, rbbiRules
, (int32_t)length
) != 0) {
1044 errln("Built rules and rebuilt rules are different %s", dataFile
);
1050 void RBBIAPITest::TestRoundtripRules() {
1051 RoundtripRule("word");
1052 RoundtripRule("title");
1053 RoundtripRule("sent");
1054 RoundtripRule("line");
1055 RoundtripRule("char");
1057 RoundtripRule("word_POSIX");
1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1062 // (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1063 // This is just a sanity check, not a thorough test (e.g. we don't check that the
1064 // first delete actually frees rulesCopy).
1065 void RBBIAPITest::TestCreateFromRBBIData() {
1066 // Get some handy RBBIData
1067 const char *brkName
= "word"; // or "sent", "line", "char", etc.
1068 UErrorCode status
= U_ZERO_ERROR
;
1069 LocalUDataMemoryPointer
data(udata_open(U_ICUDATA_BRKITR
, "brk", brkName
, &status
));
1070 if ( U_SUCCESS(status
) ) {
1071 const RBBIDataHeader
* builtRules
= (const RBBIDataHeader
*)udata_getMemory(data
.getAlias());
1072 uint32_t length
= builtRules
->fLength
;
1073 RBBIWithProtectedFunctions
* brkItr
;
1075 // Try the memory-adopting constructor, need to copy the data first
1076 RBBIDataHeader
* rulesCopy
= (RBBIDataHeader
*) uprv_malloc(length
);
1078 uprv_memcpy( rulesCopy
, builtRules
, length
);
1080 brkItr
= new RBBIWithProtectedFunctions(rulesCopy
, status
);
1081 if ( U_SUCCESS(status
) ) {
1082 delete brkItr
; // this should free rulesCopy
1084 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status
) );
1085 status
= U_ZERO_ERROR
;// reset for the next test
1086 uprv_free( rulesCopy
);
1090 // Now try the non-adopting constructor
1091 brkItr
= new RBBIWithProtectedFunctions(builtRules
, RBBIWithProtectedFunctions::kDontAdopt
, status
);
1092 if ( U_SUCCESS(status
) ) {
1093 delete brkItr
; // this should NOT attempt to free builtRules
1094 if (builtRules
->fLength
!= length
) { // sanity check
1095 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1098 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status
) );
1102 // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
1104 status
= U_ZERO_ERROR
;
1105 RuleBasedBreakIterator
*rb
= (RuleBasedBreakIterator
*)BreakIterator::createWordInstance(Locale::getEnglish(), status
);
1106 if (rb
== NULL
|| U_FAILURE(status
)) {
1107 dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status
));
1110 const uint8_t *rules
= rb
->getBinaryRules(length
);
1111 RuleBasedBreakIterator
*rb2
= new RuleBasedBreakIterator(rules
, length
, status
);
1112 TEST_ASSERT_SUCCESS(status
);
1113 TEST_ASSERT(*rb
== *rb2
);
1114 UnicodeString words
= "one two three ";
1115 rb2
->setText(words
);
1116 int wordCounter
= 0;
1117 while (rb2
->next() != UBRK_DONE
) {
1120 TEST_ASSERT(wordCounter
== 6);
1122 status
= U_ZERO_ERROR
;
1123 RuleBasedBreakIterator
*rb3
= new RuleBasedBreakIterator(rules
, length
-1, status
);
1124 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1133 void RBBIAPITest::TestRefreshInputText() {
1135 * RefreshInput changes out the input of a Break Iterator without
1136 * changing anything else in the iterator's state. Used with Java JNI,
1137 * when Java moves the underlying string storage. This test
1138 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1139 * The right set of boundaries should still be found.
1141 UChar testStr
[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1142 UChar movedStr
[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1143 UErrorCode status
= U_ZERO_ERROR
;
1144 UText ut1
= UTEXT_INITIALIZER
;
1145 UText ut2
= UTEXT_INITIALIZER
;
1146 RuleBasedBreakIterator
*bi
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getEnglish(), status
);
1147 TEST_ASSERT_SUCCESS(status
);
1149 utext_openUChars(&ut1
, testStr
, -1, &status
);
1150 TEST_ASSERT_SUCCESS(status
);
1152 if (U_SUCCESS(status
)) {
1153 bi
->setText(&ut1
, status
);
1154 TEST_ASSERT_SUCCESS(status
);
1156 /* Line boundaries will occur before each letter in the original string */
1157 TEST_ASSERT(1 == bi
->next());
1158 TEST_ASSERT(3 == bi
->next());
1160 /* Move the string, kill the original string. */
1161 u_strcpy(movedStr
, testStr
);
1162 u_memset(testStr
, 0x20, u_strlen(testStr
));
1163 utext_openUChars(&ut2
, movedStr
, -1, &status
);
1164 TEST_ASSERT_SUCCESS(status
);
1165 RuleBasedBreakIterator
*returnedBI
= &bi
->refreshInputText(&ut2
, status
);
1166 TEST_ASSERT_SUCCESS(status
);
1167 TEST_ASSERT(bi
== returnedBI
);
1169 /* Find the following matches, now working in the moved string. */
1170 TEST_ASSERT(5 == bi
->next());
1171 TEST_ASSERT(7 == bi
->next());
1172 TEST_ASSERT(8 == bi
->next());
1173 TEST_ASSERT(UBRK_DONE
== bi
->next());
1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1183 static void prtbrks(BreakIterator
* brk
, const UnicodeString
&ustr
, IntlTest
&it
) {
1184 static const UChar PILCROW
=0x00B6, CHSTR
=0x3010, CHEND
=0x3011; // lenticular brackets
1185 it
.logln(UnicodeString("String:'")+ustr
+UnicodeString("'"));
1187 int32_t *pos
= new int32_t[ustr
.length()];
1188 int32_t posCount
= 0;
1190 // calculate breaks up front, so we can print out
1191 // sans any debugging
1192 for(int32_t n
= 0; (n
=brk
->next())!=UBRK_DONE
; ) {
1193 pos
[posCount
++] = n
;
1194 if(posCount
>=ustr
.length()) {
1195 it
.errln("brk count exceeds string length!");
1200 out
.append((UChar
)CHSTR
);
1202 for(int32_t i
=0;i
<posCount
;i
++) {
1204 out
.append(ustr
.tempSubString(prev
,n
-prev
));
1205 out
.append((UChar
)PILCROW
);
1208 out
.append(ustr
.tempSubString(prev
,ustr
.length()-prev
));
1209 out
.append((UChar
)CHEND
);
1213 for(int32_t i
=0;i
<posCount
;i
++) {
1215 sprintf(tmp
,"%d ",pos
[i
]);
1216 out
.append(UnicodeString(tmp
));
1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1225 UErrorCode status
= U_ZERO_ERROR
;
1226 LocalPointer
<FilteredBreakIteratorBuilder
> builder
;
1227 LocalPointer
<BreakIterator
> baseBI
;
1228 LocalPointer
<BreakIterator
> filteredBI
;
1229 LocalPointer
<BreakIterator
> frenchBI
;
1231 const UnicodeString
text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1232 const UnicodeString
ABBR_MR("Mr.");
1233 const UnicodeString
ABBR_CAPT("Capt.");
1236 logln("Constructing empty builder\n");
1237 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1238 TEST_ASSERT_SUCCESS(status
);
1240 logln("Constructing base BI\n");
1241 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1242 TEST_ASSERT_SUCCESS(status
);
1244 logln("Building new BI\n");
1245 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1246 TEST_ASSERT_SUCCESS(status
);
1248 if (U_SUCCESS(status
)) {
1250 filteredBI
->setText(text
);
1251 TEST_ASSERT(20 == filteredBI
->next()); // Mr.
1252 TEST_ASSERT(84 == filteredBI
->next()); // recovered.
1253 TEST_ASSERT(90 == filteredBI
->next()); // Capt.
1254 TEST_ASSERT(181 == filteredBI
->next()); // Mr.
1255 TEST_ASSERT(278 == filteredBI
->next()); // charge.
1256 filteredBI
->first();
1257 prtbrks(filteredBI
.getAlias(), text
, *this);
1262 logln("Constructing empty builder\n");
1263 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1264 TEST_ASSERT_SUCCESS(status
);
1266 if (U_SUCCESS(status
)) {
1267 logln("Adding Mr. as an exception\n");
1268 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1269 TEST_ASSERT(FALSE
== builder
->suppressBreakAfter(ABBR_MR
, status
)); // already have it
1270 TEST_ASSERT(TRUE
== builder
->unsuppressBreakAfter(ABBR_MR
, status
));
1271 TEST_ASSERT(FALSE
== builder
->unsuppressBreakAfter(ABBR_MR
, status
)); // already removed it
1272 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1273 TEST_ASSERT_SUCCESS(status
);
1275 logln("Constructing base BI\n");
1276 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1277 TEST_ASSERT_SUCCESS(status
);
1279 logln("Building new BI\n");
1280 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1281 TEST_ASSERT_SUCCESS(status
);
1284 filteredBI
->setText(text
);
1285 TEST_ASSERT(84 == filteredBI
->next());
1286 TEST_ASSERT(90 == filteredBI
->next());// Capt.
1287 TEST_ASSERT(278 == filteredBI
->next());
1288 filteredBI
->first();
1289 prtbrks(filteredBI
.getAlias(), text
, *this);
1295 logln("Constructing empty builder\n");
1296 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1297 TEST_ASSERT_SUCCESS(status
);
1299 if (U_SUCCESS(status
)) {
1300 logln("Adding Mr. and Capt as an exception\n");
1301 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1302 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_CAPT
, status
));
1303 TEST_ASSERT_SUCCESS(status
);
1305 logln("Constructing base BI\n");
1306 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1307 TEST_ASSERT_SUCCESS(status
);
1309 logln("Building new BI\n");
1310 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1311 TEST_ASSERT_SUCCESS(status
);
1314 filteredBI
->setText(text
);
1315 TEST_ASSERT(84 == filteredBI
->next());
1316 TEST_ASSERT(278 == filteredBI
->next());
1317 filteredBI
->first();
1318 prtbrks(filteredBI
.getAlias(), text
, *this);
1324 logln("Constructing English builder\n");
1325 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
));
1326 TEST_ASSERT_SUCCESS(status
);
1328 logln("Constructing base BI\n");
1329 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1330 TEST_ASSERT_SUCCESS(status
);
1332 if (U_SUCCESS(status
)) {
1333 logln("unsuppressing 'Capt'");
1334 TEST_ASSERT(TRUE
== builder
->unsuppressBreakAfter(ABBR_CAPT
, status
));
1336 logln("Building new BI\n");
1337 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1338 TEST_ASSERT_SUCCESS(status
);
1340 if(filteredBI
.isValid()) {
1342 filteredBI
->setText(text
);
1343 TEST_ASSERT(84 == filteredBI
->next());
1344 TEST_ASSERT(90 == filteredBI
->next());
1345 TEST_ASSERT(278 == filteredBI
->next());
1346 filteredBI
->first();
1347 prtbrks(filteredBI
.getAlias(), text
, *this);
1354 logln("Constructing English builder\n");
1355 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
));
1356 TEST_ASSERT_SUCCESS(status
);
1358 logln("Constructing base BI\n");
1359 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1360 TEST_ASSERT_SUCCESS(status
);
1362 if (U_SUCCESS(status
)) {
1363 logln("Building new BI\n");
1364 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1365 TEST_ASSERT_SUCCESS(status
);
1367 if(filteredBI
.isValid()) {
1369 filteredBI
->setText(text
);
1370 TEST_ASSERT(84 == filteredBI
->next());
1371 TEST_ASSERT(278 == filteredBI
->next());
1372 filteredBI
->first();
1373 prtbrks(filteredBI
.getAlias(), text
, *this);
1378 // reenable once french is in
1380 logln("Constructing French builder");
1381 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status
));
1382 TEST_ASSERT_SUCCESS(status
);
1384 logln("Constructing base BI\n");
1385 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status
));
1386 TEST_ASSERT_SUCCESS(status
);
1388 if (U_SUCCESS(status
)) {
1389 logln("Building new BI\n");
1390 frenchBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1391 TEST_ASSERT_SUCCESS(status
);
1394 if(frenchBI
.isValid()) {
1396 UnicodeString
frText("C'est MM. Duval.");
1397 frenchBI
->setText(frText
);
1398 TEST_ASSERT(16 == frenchBI
->next());
1399 TEST_ASSERT(BreakIterator::DONE
== frenchBI
->next());
1401 prtbrks(frenchBI
.getAlias(), frText
, *this);
1402 logln("Testing against English:");
1403 filteredBI
->setText(frText
);
1404 TEST_ASSERT(10 == filteredBI
->next()); // wrong for french, but filterBI is english.
1405 TEST_ASSERT(16 == filteredBI
->next());
1406 TEST_ASSERT(BreakIterator::DONE
== filteredBI
->next());
1407 filteredBI
->first();
1408 prtbrks(filteredBI
.getAlias(), frText
, *this);
1411 TEST_ASSERT_TRUE(*frenchBI
== *frenchBI
);
1412 TEST_ASSERT_TRUE(*filteredBI
!= *frenchBI
);
1413 TEST_ASSERT_TRUE(*frenchBI
!= *filteredBI
);
1415 dataerrln("French BI: not valid.");
1420 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1424 //---------------------------------------------
1426 //---------------------------------------------
1428 void RBBIAPITest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
1430 if (exec
) logln((UnicodeString
)"TestSuite RuleBasedBreakIterator API ");
1432 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1433 #if !UCONFIG_NO_FILE_IO
1434 case 0: name
= "TestCloneEquals"; if (exec
) TestCloneEquals(); break;
1435 case 1: name
= "TestgetRules"; if (exec
) TestgetRules(); break;
1436 case 2: name
= "TestHashCode"; if (exec
) TestHashCode(); break;
1437 case 3: name
= "TestGetSetAdoptText"; if (exec
) TestGetSetAdoptText(); break;
1438 case 4: name
= "TestIteration"; if (exec
) TestIteration(); break;
1440 case 0: case 1: case 2: case 3: case 4: name
= "skip"; break;
1442 case 5: name
= "TestBuilder"; if (exec
) TestBuilder(); break;
1443 case 6: name
= "TestQuoteGrouping"; if (exec
) TestQuoteGrouping(); break;
1444 case 7: name
= "TestRuleStatusVec"; if (exec
) TestRuleStatusVec(); break;
1445 case 8: name
= "TestBug2190"; if (exec
) TestBug2190(); break;
1446 #if !UCONFIG_NO_FILE_IO
1447 case 9: name
= "TestRegistration"; if (exec
) TestRegistration(); break;
1448 case 10: name
= "TestBoilerPlate"; if (exec
) TestBoilerPlate(); break;
1449 case 11: name
= "TestRuleStatus"; if (exec
) TestRuleStatus(); break;
1450 case 12: name
= "TestRoundtripRules"; if (exec
) TestRoundtripRules(); break;
1451 case 13: name
= "TestCreateFromRBBIData"; if (exec
) TestCreateFromRBBIData(); break;
1453 case 9: case 10: case 11: case 12: case 13: name
= "skip"; break;
1455 case 14: name
= "TestRefreshInputText"; if (exec
) TestRefreshInputText(); break;
1457 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
1458 case 15: name
= "TestFilteredBreakIteratorBuilder"; if(exec
) TestFilteredBreakIteratorBuilder(); break;
1460 case 15: name
="skip"; break;
1462 default: name
= ""; break; // needed to end loop
1466 //---------------------------------------------
1467 //Internal subroutines
1468 //---------------------------------------------
1470 void RBBIAPITest::doBoundaryTest(BreakIterator
& bi
, UnicodeString
& text
, int32_t *boundaries
){
1471 logln((UnicodeString
)"testIsBoundary():");
1474 for (int32_t i
= 0; i
< text
.length(); i
++) {
1475 isB
= bi
.isBoundary(i
);
1476 logln((UnicodeString
)"bi.isBoundary(" + i
+ ") -> " + isB
);
1478 if (i
== boundaries
[p
]) {
1480 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected true, got false");
1485 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected false, got true");
1489 void RBBIAPITest::doTest(UnicodeString
& testString
, int32_t start
, int32_t gotoffset
, int32_t expectedOffset
, const char* expectedString
){
1490 UnicodeString selected
;
1491 UnicodeString expected
=CharsToUnicodeString(expectedString
);
1493 if(gotoffset
!= expectedOffset
)
1494 errln((UnicodeString
)"ERROR:****returned #" + gotoffset
+ (UnicodeString
)" instead of #" + expectedOffset
);
1495 if(start
<= gotoffset
){
1496 testString
.extractBetween(start
, gotoffset
, selected
);
1499 testString
.extractBetween(gotoffset
, start
, selected
);
1501 if(selected
.compare(expected
) != 0)
1502 errln(prettify((UnicodeString
)"ERROR:****selected \"" + selected
+ "\" instead of \"" + expected
+ "\""));
1504 logln(prettify("****selected \"" + selected
+ "\""));
1507 //---------------------------------------------
1508 //RBBIWithProtectedFunctions class functions
1509 //---------------------------------------------
1511 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader
* data
, UErrorCode
&status
)
1512 : RuleBasedBreakIterator(data
, status
)
1516 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader
* data
, enum EDontAdopt
, UErrorCode
&status
)
1517 : RuleBasedBreakIterator(data
, RuleBasedBreakIterator::kDontAdopt
, status
)
1521 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */