1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ********************************************************************/
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_BREAK_ITERATION
16 #include "unicode/uchar.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
33 * API Test the RuleBasedBreakIterator class
37 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
40 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
43 void RBBIAPITest::TestCloneEquals()
46 UErrorCode status
=U_ZERO_ERROR
;
47 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
48 RuleBasedBreakIterator
* biequal
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
49 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
50 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
51 if(U_FAILURE(status
)){
52 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
57 UnicodeString testString
="Testing word break iterators's clone() and equals()";
58 bi1
->setText(testString
);
59 bi2
->setText(testString
);
60 biequal
->setText(testString
);
62 bi3
->setText("hello");
64 logln((UnicodeString
)"Testing equals()");
66 logln((UnicodeString
)"Testing == and !=");
67 UBool b
= (*bi1
!= *biequal
);
71 errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__
, __LINE__
);
74 if(*bi2
== *biequal
|| *bi2
== *bi1
|| *biequal
== *bi3
)
75 errln("%s:%d ERROR:2 RBBI's == and != operator failed.", __FILE__
, __LINE__
);
78 // Quick test of RulesBasedBreakIterator assignment -
80 // two different iterators are !=
81 // they are == after assignment
82 // source and dest iterator produce the same next() after assignment.
83 // deleting one doesn't disable the other.
84 logln("Testing assignment");
85 RuleBasedBreakIterator
*bix
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getDefault(), status
);
86 if(U_FAILURE(status
)){
87 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
91 RuleBasedBreakIterator biDefault
, biDefault2
;
92 if(U_FAILURE(status
)){
93 errln("%s:%d FAIL : in construction of default iterator", __FILE__
, __LINE__
);
96 if (biDefault
== *bix
) {
97 errln("%s:%d ERROR: iterators should not compare ==", __FILE__
, __LINE__
);
100 if (biDefault
!= biDefault2
) {
101 errln("%s:%d ERROR: iterators should compare ==", __FILE__
, __LINE__
);
106 UnicodeString
HelloString("Hello Kitty");
107 bix
->setText(HelloString
);
109 errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__
, __LINE__
);
113 errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__
, __LINE__
);
116 int bixnext
= bix
->next();
117 int bi2next
= bi2
->next();
118 if (! (bixnext
== bi2next
&& bixnext
== 7)) {
119 errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__
, __LINE__
);
122 if (bi2
->next() != 8) {
123 errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__
, __LINE__
);
128 logln((UnicodeString
)"Testing clone()");
129 RuleBasedBreakIterator
* bi1clone
= dynamic_cast<RuleBasedBreakIterator
*>(bi1
->clone());
130 RuleBasedBreakIterator
* bi2clone
= dynamic_cast<RuleBasedBreakIterator
*>(bi2
->clone());
132 if(*bi1clone
!= *bi1
|| *bi1clone
!= *biequal
||
133 *bi1clone
== *bi3
|| *bi1clone
== *bi2
)
134 errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__
, __LINE__
);
136 if(*bi2clone
== *bi1
|| *bi2clone
== *biequal
||
137 *bi2clone
== *bi3
|| *bi2clone
!= *bi2
)
138 errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__
, __LINE__
);
140 if(bi1
->getText() != bi1clone
->getText() ||
141 bi2clone
->getText() != bi2
->getText() ||
142 *bi2clone
== *bi1clone
)
143 errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__
, __LINE__
);
153 void RBBIAPITest::TestBoilerPlate()
155 UErrorCode status
= U_ZERO_ERROR
;
156 BreakIterator
* a
= BreakIterator::createWordInstance(Locale("hi"), status
);
157 BreakIterator
* b
= BreakIterator::createWordInstance(Locale("hi_IN"),status
);
158 if (U_FAILURE(status
)) {
159 errcheckln(status
, "Creation of break iterator failed %s", u_errorName(status
));
163 errln("Failed: boilerplate method operator!= does not return correct results");
165 // Japanese word break iterators are identical to root with
166 // a dictionary-based break iterator
167 BreakIterator
* c
= BreakIterator::createCharacterInstance(Locale("ja"),status
);
168 BreakIterator
* d
= BreakIterator::createCharacterInstance(Locale("root"),status
);
171 errln("Failed: boilerplate method operator== does not return correct results");
174 errln("creation of break iterator failed");
182 void RBBIAPITest::TestgetRules()
184 UErrorCode status
=U_ZERO_ERROR
;
186 LocalPointer
<RuleBasedBreakIterator
> bi1(
187 (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
), status
);
188 LocalPointer
<RuleBasedBreakIterator
> bi2(
189 (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
), status
);
190 if(U_FAILURE(status
)){
191 errcheckln(status
, "%s:%d, FAIL: in construction - %s", __FILE__
, __LINE__
, u_errorName(status
));
195 logln((UnicodeString
)"Testing getRules()");
197 UnicodeString
text(u
"Hello there");
200 LocalPointer
<RuleBasedBreakIterator
> bi3((RuleBasedBreakIterator
*)bi1
->clone());
202 UnicodeString temp
=bi1
->getRules();
203 UnicodeString temp2
=bi2
->getRules();
204 UnicodeString temp3
=bi3
->getRules();
205 if( temp2
.compare(temp3
) ==0 || temp
.compare(temp2
) == 0 || temp
.compare(temp3
) != 0)
206 errln("%s:%d ERROR: error in getRules() method", __FILE__
, __LINE__
);
208 RuleBasedBreakIterator bi4
; // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
209 if (!bi4
.getRules().isEmpty()) {
210 errln("%s:%d Empty string expected.", __FILE__
, __LINE__
);
214 void RBBIAPITest::TestHashCode()
216 UErrorCode status
=U_ZERO_ERROR
;
217 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
218 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
219 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
220 if(U_FAILURE(status
)){
221 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
229 logln((UnicodeString
)"Testing hashCode()");
231 bi1
->setText((UnicodeString
)"Hash code");
232 bi2
->setText((UnicodeString
)"Hash code");
233 bi3
->setText((UnicodeString
)"Hash code");
235 RuleBasedBreakIterator
* bi1clone
= (RuleBasedBreakIterator
*)bi1
->clone();
236 RuleBasedBreakIterator
* bi2clone
= (RuleBasedBreakIterator
*)bi2
->clone();
238 if(bi1
->hashCode() != bi1clone
->hashCode() || bi1
->hashCode() != bi3
->hashCode() ||
239 bi1clone
->hashCode() != bi3
->hashCode() || bi2
->hashCode() != bi2clone
->hashCode())
240 errln((UnicodeString
)"ERROR: identical objects have different hashcodes");
242 if(bi1
->hashCode() == bi2
->hashCode() || bi2
->hashCode() == bi3
->hashCode() ||
243 bi1clone
->hashCode() == bi2clone
->hashCode() || bi1clone
->hashCode() == bi2
->hashCode())
244 errln((UnicodeString
)"ERROR: different objects have same hashcodes");
253 void RBBIAPITest::TestGetSetAdoptText()
255 logln((UnicodeString
)"Testing getText setText ");
256 IcuTestErrorCode
status(*this, "TestGetSetAdoptText");
257 UnicodeString str1
="first string.";
258 UnicodeString str2
="Second string.";
259 LocalPointer
<RuleBasedBreakIterator
> charIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
));
260 LocalPointer
<RuleBasedBreakIterator
> wordIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
));
261 if(status
.isFailure()){
262 errcheckln(status
, "Fail : in construction - %s", status
.errorName());
267 CharacterIterator
* text1
= new StringCharacterIterator(str1
);
268 CharacterIterator
* text1Clone
= text1
->clone();
269 CharacterIterator
* text2
= new StringCharacterIterator(str2
);
270 CharacterIterator
* text3
= new StringCharacterIterator(str2
, 3, 10, 3); // "ond str"
272 wordIter1
->setText(str1
);
273 CharacterIterator
*tci
= &wordIter1
->getText();
276 TEST_ASSERT(tstr
== str1
);
277 if(wordIter1
->current() != 0)
278 errln((UnicodeString
)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
282 wordIter1
->setText(str2
);
283 if(wordIter1
->current() != 0)
284 errln((UnicodeString
)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
287 charIter1
->adoptText(text1Clone
);
288 TEST_ASSERT(wordIter1
->getText() != charIter1
->getText());
289 tci
= &wordIter1
->getText();
291 TEST_ASSERT(tstr
== str2
);
292 tci
= &charIter1
->getText();
294 TEST_ASSERT(tstr
== str1
);
297 LocalPointer
<RuleBasedBreakIterator
> rb((RuleBasedBreakIterator
*)wordIter1
->clone());
298 rb
->adoptText(text1
);
299 if(rb
->getText() != *text1
)
300 errln((UnicodeString
)"ERROR:1 error in adoptText ");
301 rb
->adoptText(text2
);
302 if(rb
->getText() != *text2
)
303 errln((UnicodeString
)"ERROR:2 error in adoptText ");
305 // Adopt where iterator range is less than the entire orignal source string.
306 // (With the change of the break engine to working with UText internally,
307 // CharacterIterators starting at positions other than zero are not supported)
308 rb
->adoptText(text3
);
309 TEST_ASSERT(rb
->preceding(2) == 0);
310 TEST_ASSERT(rb
->following(11) == BreakIterator::DONE
);
311 //if(rb->preceding(2) != 3) {
312 // errln((UnicodeString)"ERROR:3 error in adoptText ");
314 //if(rb->following(11) != BreakIterator::DONE) {
315 // errln((UnicodeString)"ERROR:4 error in adoptText ");
320 // Quick test to see if UText is working at all.
322 const char *s1
= "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
323 const char *s2
= "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
327 LocalUTextPointer
ut(utext_openUTF8(NULL
, s1
, -1, status
));
328 wordIter1
->setText(ut
.getAlias(), status
);
329 TEST_ASSERT_SUCCESS(status
);
332 pos
= wordIter1
->first();
334 pos
= wordIter1
->next();
336 pos
= wordIter1
->next();
338 pos
= wordIter1
->next();
339 TEST_ASSERT(pos
==11);
340 pos
= wordIter1
->next();
341 TEST_ASSERT(pos
==UBRK_DONE
);
344 LocalUTextPointer
ut2(utext_openUTF8(NULL
, s2
, -1, status
));
345 TEST_ASSERT_SUCCESS(status
);
346 wordIter1
->setText(ut2
.getAlias(), status
);
347 TEST_ASSERT_SUCCESS(status
);
349 pos
= wordIter1
->first();
351 pos
= wordIter1
->next();
353 pos
= wordIter1
->next();
356 pos
= wordIter1
->last();
358 pos
= wordIter1
->previous();
360 pos
= wordIter1
->previous();
362 pos
= wordIter1
->previous();
364 pos
= wordIter1
->previous();
365 TEST_ASSERT(pos
==UBRK_DONE
);
368 UnicodeString sEmpty
;
369 LocalUTextPointer
gut2(utext_openUnicodeString(NULL
, &sEmpty
, status
));
370 wordIter1
->getUText(gut2
.getAlias(), status
);
371 TEST_ASSERT_SUCCESS(status
);
376 void RBBIAPITest::TestIteration()
378 // This test just verifies that the API is present.
379 // Testing for correct operation of the break rules happens elsewhere.
381 UErrorCode status
=U_ZERO_ERROR
;
382 RuleBasedBreakIterator
* bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
383 if (U_FAILURE(status
) || bi
== NULL
) {
384 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
389 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
390 if (U_FAILURE(status
) || bi
== NULL
) {
391 errcheckln(status
, "Failure creating Word break iterator. Status = %s", u_errorName(status
));
396 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status
);
397 if (U_FAILURE(status
) || bi
== NULL
) {
398 errcheckln(status
, "Failure creating Line break iterator. Status = %s", u_errorName(status
));
403 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status
);
404 if (U_FAILURE(status
) || bi
== NULL
) {
405 errcheckln(status
, "Failure creating Sentence break iterator. Status = %s", u_errorName(status
));
410 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status
);
411 if (U_FAILURE(status
) || bi
== NULL
) {
412 errcheckln(status
, "Failure creating Title break iterator. Status = %s", u_errorName(status
));
417 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
418 if (U_FAILURE(status
) || bi
== NULL
) {
419 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
420 return; // Skip the rest of these tests.
424 UnicodeString testString
="0123456789";
425 bi
->setText(testString
);
430 errln("%s:%d Incorrect value from bi->first(). Expected 0, got %d.", __FILE__
, __LINE__
, i
);
435 errln("%s:%d Incorrect value from bi->last(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
444 errln("%s:%d Incorrect value from bi->last(). Expected 9, got %d", __FILE__
, __LINE__
, i
);
450 if (i
!= BreakIterator::DONE
) {
451 errln("%s:%d Incorrect value from bi->previous(). Expected DONE, got %d", __FILE__
, __LINE__
, i
);
460 errln("%s:%d Incorrect value from bi->next(). Expected 1, got %d", __FILE__
, __LINE__
, i
);
465 if (i
!= BreakIterator::DONE
) {
466 errln("%s:%d Incorrect value from bi->next(). Expected DONE, got %d", __FILE__
, __LINE__
, i
);
476 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__
, __LINE__
, i
);
482 errln("%s:%d Incorrect value from bi->current(). Expected 1, got %d", __FILE__
, __LINE__
, i
);
489 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
496 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__
, __LINE__
, i
);
503 i
= bi
->following(4);
505 errln("%s:%d Incorrect value from bi->following(). Expected 5, got %d", __FILE__
, __LINE__
, i
);
508 i
= bi
->following(9);
510 errln("%s:%d Incorrect value from bi->following(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
513 i
= bi
->following(10);
514 if (i
!= BreakIterator::DONE
) {
515 errln("%s:%d Incorrect value from bi->following(). Expected DONE, got %d", __FILE__
, __LINE__
, i
);
522 i
= bi
->preceding(4);
524 errln("%s:%d Incorrect value from bi->preceding(). Expected 3, got %d", __FILE__
, __LINE__
, i
);
527 i
= bi
->preceding(10);
529 errln("%s:%d Incorrect value from bi->preceding(). Expected 9, got %d", __FILE__
, __LINE__
, i
);
532 i
= bi
->preceding(1);
534 errln("%s:%d Incorrect value from bi->preceding(). Expected 0, got %d", __FILE__
, __LINE__
, i
);
537 i
= bi
->preceding(0);
538 if (i
!= BreakIterator::DONE
) {
539 errln("%s:%d Incorrect value from bi->preceding(). Expected DONE, got %d", __FILE__
, __LINE__
, i
);
547 if (bi
->isBoundary(3) != TRUE
) {
548 errln("%s:%d Incorrect value from bi->isBoudary(). Expected TRUE, got FALSE", __FILE__
, __LINE__
, i
);
552 errln("%s:%d Incorrect value from bi->current(). Expected 3, got %d", __FILE__
, __LINE__
, i
);
556 if (bi
->isBoundary(11) != FALSE
) {
557 errln("%s:%d Incorrect value from bi->isBoudary(). Expected FALSE, got TRUE", __FILE__
, __LINE__
, i
);
561 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
570 errln("%s:%d Incorrect value from bi->next(). Expected 4, got %d", __FILE__
, __LINE__
, i
);
575 errln("%s:%d Incorrect value from bi->next(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
580 if (i
!= BreakIterator::DONE
) {
581 errln("%s:%d Incorrect value from bi->next(). Expected BreakIterator::DONE, got %d", __FILE__
, __LINE__
, i
);
593 void RBBIAPITest::TestBuilder() {
594 UnicodeString rulesString1
= "$Letters = [:L:];\n"
595 "$Numbers = [:N:];\n"
598 "[^$Letters $Numbers];\n"
600 UnicodeString testString1
= "abc123..abc";
602 int32_t bounds1
[] = {0, 3, 6, 7, 8, 11};
603 UErrorCode status
=U_ZERO_ERROR
;
604 UParseError parseError
;
606 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
607 if(U_FAILURE(status
)) {
608 dataerrln("Fail : in construction - %s", u_errorName(status
));
610 bi
->setText(testString1
);
611 doBoundaryTest(*bi
, testString1
, bounds1
);
619 // Single quotes within rules imply a grouping, so that a modifier
620 // following the quoted text (* or +) applies to all of the quoted chars.
622 void RBBIAPITest::TestQuoteGrouping() {
623 UnicodeString rulesString1
= "#Here comes the rule...\n"
624 "'$@!'*;\n" // (\$\@\!)*
627 UnicodeString testString1
= "$@!$@!X$@!!X";
629 int32_t bounds1
[] = {0, 6, 7, 10, 11, 12};
630 UErrorCode status
=U_ZERO_ERROR
;
631 UParseError parseError
;
633 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
634 if(U_FAILURE(status
)) {
635 dataerrln("Fail : in construction - %s", u_errorName(status
));
637 bi
->setText(testString1
);
638 doBoundaryTest(*bi
, testString1
, bounds1
);
645 // Test word break rule status constants.
647 void RBBIAPITest::TestRuleStatus() {
649 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
650 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
651 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
652 // 012345678901234567 8 9 0
655 UnicodeString
testString1(str
);
656 int32_t bounds1
[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
657 int32_t tag_lo
[] = {UBRK_WORD_NONE
, UBRK_WORD_LETTER
, UBRK_WORD_NONE
, UBRK_WORD_LETTER
,
658 UBRK_WORD_NONE
, UBRK_WORD_NUMBER
, UBRK_WORD_NONE
,
659 UBRK_WORD_IDEO
, UBRK_WORD_NONE
};
661 int32_t tag_hi
[] = {UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
,
662 UBRK_WORD_NONE_LIMIT
, UBRK_WORD_NUMBER_LIMIT
, UBRK_WORD_NONE_LIMIT
,
663 UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_NONE_LIMIT
};
665 UErrorCode status
=U_ZERO_ERROR
;
667 BreakIterator
*bi
= BreakIterator::createWordInstance(Locale::getEnglish(), status
);
668 if(U_FAILURE(status
)) {
669 errcheckln(status
, "%s:%d Fail in construction - %s", __FILE__
, __LINE__
, u_errorName(status
));
671 bi
->setText(testString1
);
672 // First test that the breaks are in the right spots.
673 doBoundaryTest(*bi
, testString1
, bounds1
);
675 // Then go back and check tag values
678 for (pos
= bi
->first(); pos
!= BreakIterator::DONE
; pos
= bi
->next(), i
++) {
679 if (pos
!= bounds1
[i
]) {
680 errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__
, __LINE__
, pos
);
683 tag
= bi
->getRuleStatus();
684 if (tag
< tag_lo
[i
] || tag
>= tag_hi
[i
]) {
685 errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__
, __LINE__
, tag
, pos
);
689 // Check that we get the same tag values from getRuleStatusVec()
691 int t
= bi
->getRuleStatusVec(vec
, 10, status
);
692 TEST_ASSERT_SUCCESS(status
);
694 TEST_ASSERT(vec
[0] == tag
);
699 // Now test line break status. This test mostly is to confirm that the status constants
700 // are correctly declared in the header.
701 testString1
= "test line. \n";
704 bi
= BreakIterator::createLineInstance(Locale::getEnglish(), status
);
705 if(U_FAILURE(status
)) {
706 errcheckln(status
, "%s:%d failed to create line break iterator. - %s", __FILE__
, __LINE__
, u_errorName(status
));
712 bi
->setText(testString1
);
714 tag
= bi
->getRuleStatus();
715 for (i
=0; i
<3; i
++) {
718 success
= pos
==0 && tag
==UBRK_LINE_SOFT
; break;
720 success
= pos
==5 && tag
==UBRK_LINE_SOFT
; break;
722 success
= pos
==12 && tag
==UBRK_LINE_HARD
; break;
724 success
= FALSE
; break;
726 if (success
== FALSE
) {
727 errln("%s:%d: incorrect line break status or position. i=%d, pos=%d, tag=%d",
728 __FILE__
, __LINE__
, i
, pos
, tag
);
732 tag
= bi
->getRuleStatus();
734 if (UBRK_LINE_SOFT
>= UBRK_LINE_SOFT_LIMIT
||
735 UBRK_LINE_HARD
>= UBRK_LINE_HARD_LIMIT
||
736 (UBRK_LINE_HARD
> UBRK_LINE_SOFT
&& UBRK_LINE_HARD
< UBRK_LINE_SOFT_LIMIT
)) {
737 errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__
, __LINE__
);
747 // Test the vector form of break rule status.
749 void RBBIAPITest::TestRuleStatusVec() {
750 UnicodeString
rulesString( "[A-N]{100}; \n"
755 "!.*;\n", -1, US_INV
);
756 UnicodeString testString1
= "Aapz5?";
757 int32_t statusVals
[10];
761 UErrorCode status
=U_ZERO_ERROR
;
762 UParseError parseError
;
764 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString
, parseError
, status
);
765 if (U_FAILURE(status
)) {
766 dataerrln("Failure at file %s, line %d, error = %s", __FILE__
, __LINE__
, u_errorName(status
));
768 bi
->setText(testString1
);
773 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
774 TEST_ASSERT_SUCCESS(status
);
775 TEST_ASSERT(numStatuses
== 2);
776 TEST_ASSERT(statusVals
[0] == 100);
777 TEST_ASSERT(statusVals
[1] == 300);
782 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
783 TEST_ASSERT_SUCCESS(status
);
784 TEST_ASSERT(numStatuses
== 2);
785 TEST_ASSERT(statusVals
[0] == 200);
786 TEST_ASSERT(statusVals
[1] == 300);
791 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
792 TEST_ASSERT_SUCCESS(status
);
793 TEST_ASSERT(numStatuses
== 2);
794 TEST_ASSERT(statusVals
[0] == 200);
795 TEST_ASSERT(statusVals
[1] == 300);
800 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
801 TEST_ASSERT_SUCCESS(status
);
802 TEST_ASSERT(numStatuses
== 1);
803 TEST_ASSERT(statusVals
[0] == 300);
808 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
809 TEST_ASSERT_SUCCESS(status
);
810 TEST_ASSERT(numStatuses
== 2);
811 TEST_ASSERT(statusVals
[0] == 400);
812 TEST_ASSERT(statusVals
[1] == 500);
817 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
818 TEST_ASSERT_SUCCESS(status
);
819 TEST_ASSERT(numStatuses
== 1);
820 TEST_ASSERT(statusVals
[0] == 0);
823 // Check buffer overflow error handling. Char == A
828 memset(statusVals
, -1, sizeof(statusVals
));
829 numStatuses
= bi
->getRuleStatusVec(statusVals
, 0, status
);
830 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
831 TEST_ASSERT(numStatuses
== 2);
832 TEST_ASSERT(statusVals
[0] == -1);
834 status
= U_ZERO_ERROR
;
835 memset(statusVals
, -1, sizeof(statusVals
));
836 numStatuses
= bi
->getRuleStatusVec(statusVals
, 1, status
);
837 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
838 TEST_ASSERT(numStatuses
== 2);
839 TEST_ASSERT(statusVals
[0] == 100);
840 TEST_ASSERT(statusVals
[1] == -1);
842 status
= U_ZERO_ERROR
;
843 memset(statusVals
, -1, sizeof(statusVals
));
844 numStatuses
= bi
->getRuleStatusVec(statusVals
, 2, status
);
845 TEST_ASSERT_SUCCESS(status
);
846 TEST_ASSERT(numStatuses
== 2);
847 TEST_ASSERT(statusVals
[0] == 100);
848 TEST_ASSERT(statusVals
[1] == 300);
849 TEST_ASSERT(statusVals
[2] == -1);
856 // Bug 2190 Regression test. Builder crash on rule consisting of only a
857 // $variable reference
858 void RBBIAPITest::TestBug2190() {
859 UnicodeString rulesString1
= "$aaa = abcd;\n"
862 UnicodeString testString1
= "abcdabcd";
864 int32_t bounds1
[] = {0, 4, 8};
865 UErrorCode status
=U_ZERO_ERROR
;
866 UParseError parseError
;
868 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
869 if(U_FAILURE(status
)) {
870 dataerrln("Fail : in construction - %s", u_errorName(status
));
872 bi
->setText(testString1
);
873 doBoundaryTest(*bi
, testString1
, bounds1
);
879 void RBBIAPITest::TestRegistration() {
880 #if !UCONFIG_NO_SERVICE
881 UErrorCode status
= U_ZERO_ERROR
;
882 BreakIterator
* ja_word
= BreakIterator::createWordInstance("ja_JP", status
);
883 // ok to not delete these if we exit because of error?
884 BreakIterator
* ja_char
= BreakIterator::createCharacterInstance("ja_JP", status
);
885 BreakIterator
* root_word
= BreakIterator::createWordInstance("", status
);
886 BreakIterator
* root_char
= BreakIterator::createCharacterInstance("", status
);
888 if (status
== U_MISSING_RESOURCE_ERROR
|| status
== U_FILE_ACCESS_ERROR
) {
889 dataerrln("Error creating instances of break interactors - %s", u_errorName(status
));
899 URegistryKey key
= BreakIterator::registerInstance(ja_word
, "xx", UBRK_WORD
, status
);
901 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
902 if (ja_word
&& *ja_word
== *root_word
) {
903 errln("japan not different from root");
909 BreakIterator
* result
= BreakIterator::createWordInstance("xx_XX", status
);
912 fail
= *result
!= *ja_word
;
916 errln("bad result for xx_XX/word");
921 BreakIterator
* result
= BreakIterator::createCharacterInstance("ja_JP", status
);
924 fail
= *result
!= *ja_char
;
928 errln("bad result for ja_JP/char");
933 BreakIterator
* result
= BreakIterator::createCharacterInstance("xx_XX", status
);
936 fail
= *result
!= *root_char
;
940 errln("bad result for xx_XX/char");
945 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
947 const UnicodeString
* p
;
948 while ((p
= avail
->snext(status
))) {
949 if (p
->compare("xx") == 0) {
956 errln("did not find test locale");
961 UBool unreg
= BreakIterator::unregister(key
, status
);
963 errln("unable to unregister");
968 BreakIterator
* result
= BreakIterator::createWordInstance("en_US", status
);
969 BreakIterator
* root
= BreakIterator::createWordInstance("", status
);
972 fail
= *root
!= *result
;
977 errln("did not get root break");
982 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
984 const UnicodeString
* p
;
985 while ((p
= avail
->snext(status
))) {
986 if (p
->compare("xx") == 0) {
993 errln("found test locale");
999 UBool foundLocale
= FALSE
;
1000 const Locale
*avail
= BreakIterator::getAvailableLocales(count
);
1001 for (int i
=0; i
<count
; i
++) {
1002 if (avail
[i
] == Locale::getEnglish()) {
1007 if (foundLocale
== FALSE
) {
1008 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1013 // ja_word was adopted by factory
1020 void RBBIAPITest::RoundtripRule(const char *dataFile
) {
1021 UErrorCode status
= U_ZERO_ERROR
;
1022 UParseError parseError
;
1023 parseError
.line
= 0;
1024 parseError
.offset
= 0;
1025 LocalUDataMemoryPointer
data(udata_open(U_ICUDATA_BRKITR
, "brk", dataFile
, &status
));
1027 const UChar
*builtSource
;
1028 const uint8_t *rbbiRules
;
1029 const uint8_t *builtRules
;
1031 if (U_FAILURE(status
)) {
1032 errcheckln(status
, "%s:%d Can't open \"%s\" - %s", __FILE__
, __LINE__
, dataFile
, u_errorName(status
));
1036 builtRules
= (const uint8_t *)udata_getMemory(data
.getAlias());
1037 builtSource
= (const UChar
*)(builtRules
+ ((RBBIDataHeader
*)builtRules
)->fRuleSource
);
1038 LocalPointer
<RuleBasedBreakIterator
> brkItr (new RuleBasedBreakIterator(builtSource
, parseError
, status
));
1039 if (U_FAILURE(status
)) {
1040 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1041 __FILE__
, __LINE__
, u_errorName(status
), parseError
.line
, parseError
.offset
);
1042 errln(UnicodeString(builtSource
));
1045 rbbiRules
= brkItr
->getBinaryRules(length
);
1046 logln("Comparing \"%s\" len=%d", dataFile
, length
);
1047 if (memcmp(builtRules
, rbbiRules
, (int32_t)length
) != 0) {
1048 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__
, __LINE__
, dataFile
);
1053 void RBBIAPITest::TestRoundtripRules() {
1054 RoundtripRule("word");
1055 RoundtripRule("title");
1056 RoundtripRule("sent");
1057 RoundtripRule("line");
1058 RoundtripRule("char");
1060 RoundtripRule("word_POSIX");
1065 // Check getBinaryRules() and construction of a break iterator from those rules.
1067 void RBBIAPITest::TestGetBinaryRules() {
1068 UErrorCode status
=U_ZERO_ERROR
;
1069 LocalPointer
<BreakIterator
> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status
));
1070 if (U_FAILURE(status
)) {
1071 dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status
));
1074 RuleBasedBreakIterator
*rbbi
= dynamic_cast<RuleBasedBreakIterator
*>(bi
.getAlias());
1076 dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1080 // Check that the new line break iterator is nominally functional.
1081 UnicodeString
helloWorld("Hello, World!");
1082 rbbi
->setText(helloWorld
);
1084 while (bi
->next() != UBRK_DONE
) {
1087 TEST_ASSERT(n
== 2);
1089 // Extract the binary rules as a uint8_t blob.
1090 uint32_t ruleLength
;
1091 const uint8_t *binRules
= rbbi
->getBinaryRules(ruleLength
);
1092 TEST_ASSERT(ruleLength
> 0);
1093 TEST_ASSERT(binRules
!= NULL
);
1095 // Clone the binary rules, and create a break iterator from that.
1096 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1097 uint8_t *clonedRules
= new uint8_t[ruleLength
];
1098 memcpy(clonedRules
, binRules
, ruleLength
);
1099 RuleBasedBreakIterator
clonedBI(clonedRules
, ruleLength
, status
);
1100 TEST_ASSERT_SUCCESS(status
);
1102 // Check that the cloned line break iterator is nominally alive.
1103 clonedBI
.setText(helloWorld
);
1105 while (clonedBI
.next() != UBRK_DONE
) {
1108 TEST_ASSERT(n
== 2);
1110 delete[] clonedRules
;
1114 void RBBIAPITest::TestRefreshInputText() {
1116 * RefreshInput changes out the input of a Break Iterator without
1117 * changing anything else in the iterator's state. Used with Java JNI,
1118 * when Java moves the underlying string storage. This test
1119 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1120 * The right set of boundaries should still be found.
1122 UChar testStr
[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1123 UChar movedStr
[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1124 UErrorCode status
= U_ZERO_ERROR
;
1125 UText ut1
= UTEXT_INITIALIZER
;
1126 UText ut2
= UTEXT_INITIALIZER
;
1127 RuleBasedBreakIterator
*bi
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getEnglish(), status
);
1128 TEST_ASSERT_SUCCESS(status
);
1130 utext_openUChars(&ut1
, testStr
, -1, &status
);
1131 TEST_ASSERT_SUCCESS(status
);
1133 if (U_SUCCESS(status
)) {
1134 bi
->setText(&ut1
, status
);
1135 TEST_ASSERT_SUCCESS(status
);
1137 /* Line boundaries will occur before each letter in the original string */
1138 TEST_ASSERT(1 == bi
->next());
1139 TEST_ASSERT(3 == bi
->next());
1141 /* Move the string, kill the original string. */
1142 u_strcpy(movedStr
, testStr
);
1143 u_memset(testStr
, 0x20, u_strlen(testStr
));
1144 utext_openUChars(&ut2
, movedStr
, -1, &status
);
1145 TEST_ASSERT_SUCCESS(status
);
1146 RuleBasedBreakIterator
*returnedBI
= &bi
->refreshInputText(&ut2
, status
);
1147 TEST_ASSERT_SUCCESS(status
);
1148 TEST_ASSERT(bi
== returnedBI
);
1150 /* Find the following matches, now working in the moved string. */
1151 TEST_ASSERT(5 == bi
->next());
1152 TEST_ASSERT(7 == bi
->next());
1153 TEST_ASSERT(8 == bi
->next());
1154 TEST_ASSERT(UBRK_DONE
== bi
->next());
1163 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1164 static void prtbrks(BreakIterator
* brk
, const UnicodeString
&ustr
, IntlTest
&it
) {
1165 static const UChar PILCROW
=0x00B6, CHSTR
=0x3010, CHEND
=0x3011; // lenticular brackets
1166 it
.logln(UnicodeString("String:'")+ustr
+UnicodeString("'"));
1168 int32_t *pos
= new int32_t[ustr
.length()];
1169 int32_t posCount
= 0;
1171 // calculate breaks up front, so we can print out
1172 // sans any debugging
1173 for(int32_t n
= 0; (n
=brk
->next())!=UBRK_DONE
; ) {
1174 pos
[posCount
++] = n
;
1175 if(posCount
>=ustr
.length()) {
1176 it
.errln("brk count exceeds string length!");
1181 out
.append((UChar
)CHSTR
);
1183 for(int32_t i
=0;i
<posCount
;i
++) {
1185 out
.append(ustr
.tempSubString(prev
,n
-prev
));
1186 out
.append((UChar
)PILCROW
);
1189 out
.append(ustr
.tempSubString(prev
,ustr
.length()-prev
));
1190 out
.append((UChar
)CHEND
);
1194 for(int32_t i
=0;i
<posCount
;i
++) {
1196 sprintf(tmp
,"%d ",pos
[i
]);
1197 out
.append(UnicodeString(tmp
));
1204 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1205 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1206 UErrorCode status
= U_ZERO_ERROR
;
1207 LocalPointer
<FilteredBreakIteratorBuilder
> builder
;
1208 LocalPointer
<BreakIterator
> baseBI
;
1209 LocalPointer
<BreakIterator
> filteredBI
;
1210 LocalPointer
<BreakIterator
> frenchBI
;
1212 const UnicodeString
text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1213 const UnicodeString
ABBR_MR("Mr.");
1214 const UnicodeString
ABBR_CAPT("Capt.");
1217 logln("Constructing empty builder\n");
1218 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1219 TEST_ASSERT_SUCCESS(status
);
1221 logln("Constructing base BI\n");
1222 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1223 TEST_ASSERT_SUCCESS(status
);
1225 logln("Building new BI\n");
1226 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1227 TEST_ASSERT_SUCCESS(status
);
1229 if (U_SUCCESS(status
)) {
1231 filteredBI
->setText(text
);
1232 TEST_ASSERT(20 == filteredBI
->next()); // Mr.
1233 TEST_ASSERT(84 == filteredBI
->next()); // recovered.
1234 TEST_ASSERT(90 == filteredBI
->next()); // Capt.
1235 TEST_ASSERT(181 == filteredBI
->next()); // Mr.
1236 TEST_ASSERT(278 == filteredBI
->next()); // charge.
1237 filteredBI
->first();
1238 prtbrks(filteredBI
.getAlias(), text
, *this);
1243 logln("Constructing empty builder\n");
1244 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1245 TEST_ASSERT_SUCCESS(status
);
1247 if (U_SUCCESS(status
)) {
1248 logln("Adding Mr. as an exception\n");
1249 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1250 TEST_ASSERT(FALSE
== builder
->suppressBreakAfter(ABBR_MR
, status
)); // already have it
1251 TEST_ASSERT(TRUE
== builder
->unsuppressBreakAfter(ABBR_MR
, status
));
1252 TEST_ASSERT(FALSE
== builder
->unsuppressBreakAfter(ABBR_MR
, status
)); // already removed it
1253 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1254 TEST_ASSERT_SUCCESS(status
);
1256 logln("Constructing base BI\n");
1257 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1258 TEST_ASSERT_SUCCESS(status
);
1260 logln("Building new BI\n");
1261 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1262 TEST_ASSERT_SUCCESS(status
);
1265 filteredBI
->setText(text
);
1266 TEST_ASSERT(84 == filteredBI
->next());
1267 TEST_ASSERT(90 == filteredBI
->next());// Capt.
1268 TEST_ASSERT(278 == filteredBI
->next());
1269 filteredBI
->first();
1270 prtbrks(filteredBI
.getAlias(), text
, *this);
1276 logln("Constructing empty builder\n");
1277 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1278 TEST_ASSERT_SUCCESS(status
);
1280 if (U_SUCCESS(status
)) {
1281 logln("Adding Mr. and Capt as an exception\n");
1282 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1283 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_CAPT
, status
));
1284 TEST_ASSERT_SUCCESS(status
);
1286 logln("Constructing base BI\n");
1287 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1288 TEST_ASSERT_SUCCESS(status
);
1290 logln("Building new BI\n");
1291 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1292 TEST_ASSERT_SUCCESS(status
);
1295 filteredBI
->setText(text
);
1296 TEST_ASSERT(84 == filteredBI
->next());
1297 TEST_ASSERT(278 == filteredBI
->next());
1298 filteredBI
->first();
1299 prtbrks(filteredBI
.getAlias(), text
, *this);
1305 logln("Constructing English builder\n");
1306 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
));
1307 TEST_ASSERT_SUCCESS(status
);
1309 logln("Constructing base BI\n");
1310 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1311 TEST_ASSERT_SUCCESS(status
);
1313 if (U_SUCCESS(status
)) {
1314 logln("unsuppressing 'Capt'");
1315 TEST_ASSERT(TRUE
== builder
->unsuppressBreakAfter(ABBR_CAPT
, status
));
1317 logln("Building new BI\n");
1318 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1319 TEST_ASSERT_SUCCESS(status
);
1321 if(filteredBI
.isValid()) {
1323 filteredBI
->setText(text
);
1324 TEST_ASSERT(84 == filteredBI
->next());
1325 TEST_ASSERT(90 == filteredBI
->next());
1326 TEST_ASSERT(278 == filteredBI
->next());
1327 filteredBI
->first();
1328 prtbrks(filteredBI
.getAlias(), text
, *this);
1335 logln("Constructing English builder\n");
1336 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
));
1337 TEST_ASSERT_SUCCESS(status
);
1339 logln("Constructing base BI\n");
1340 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1341 TEST_ASSERT_SUCCESS(status
);
1343 if (U_SUCCESS(status
)) {
1344 logln("Building new BI\n");
1345 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1346 TEST_ASSERT_SUCCESS(status
);
1348 if(filteredBI
.isValid()) {
1350 filteredBI
->setText(text
);
1351 TEST_ASSERT(84 == filteredBI
->next());
1352 TEST_ASSERT(278 == filteredBI
->next());
1353 filteredBI
->first();
1354 prtbrks(filteredBI
.getAlias(), text
, *this);
1359 // reenable once french is in
1361 logln("Constructing French builder");
1362 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status
));
1363 TEST_ASSERT_SUCCESS(status
);
1365 logln("Constructing base BI\n");
1366 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status
));
1367 TEST_ASSERT_SUCCESS(status
);
1369 if (U_SUCCESS(status
)) {
1370 logln("Building new BI\n");
1371 frenchBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1372 TEST_ASSERT_SUCCESS(status
);
1375 if(frenchBI
.isValid()) {
1377 UnicodeString
frText("C'est MM. Duval.");
1378 frenchBI
->setText(frText
);
1379 TEST_ASSERT(16 == frenchBI
->next());
1380 TEST_ASSERT(BreakIterator::DONE
== frenchBI
->next());
1382 prtbrks(frenchBI
.getAlias(), frText
, *this);
1383 logln("Testing against English:");
1384 filteredBI
->setText(frText
);
1385 TEST_ASSERT(10 == filteredBI
->next()); // wrong for french, but filterBI is english.
1386 TEST_ASSERT(16 == filteredBI
->next());
1387 TEST_ASSERT(BreakIterator::DONE
== filteredBI
->next());
1388 filteredBI
->first();
1389 prtbrks(filteredBI
.getAlias(), frText
, *this);
1392 TEST_ASSERT_TRUE(*frenchBI
== *frenchBI
);
1393 TEST_ASSERT_TRUE(*filteredBI
!= *frenchBI
);
1394 TEST_ASSERT_TRUE(*frenchBI
!= *filteredBI
);
1396 dataerrln("French BI: not valid.");
1401 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1405 //---------------------------------------------
1407 //---------------------------------------------
1409 void RBBIAPITest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
1411 if (exec
) logln((UnicodeString
)"TestSuite RuleBasedBreakIterator API ");
1412 TESTCASE_AUTO_BEGIN
;
1413 #if !UCONFIG_NO_FILE_IO
1414 TESTCASE_AUTO(TestCloneEquals
);
1415 TESTCASE_AUTO(TestgetRules
);
1416 TESTCASE_AUTO(TestHashCode
);
1417 TESTCASE_AUTO(TestGetSetAdoptText
);
1418 TESTCASE_AUTO(TestIteration
);
1420 TESTCASE_AUTO(TestBuilder
);
1421 TESTCASE_AUTO(TestQuoteGrouping
);
1422 TESTCASE_AUTO(TestRuleStatusVec
);
1423 TESTCASE_AUTO(TestBug2190
);
1424 #if !UCONFIG_NO_FILE_IO
1425 TESTCASE_AUTO(TestRegistration
);
1426 TESTCASE_AUTO(TestBoilerPlate
);
1427 TESTCASE_AUTO(TestRuleStatus
);
1428 TESTCASE_AUTO(TestRoundtripRules
);
1429 TESTCASE_AUTO(TestGetBinaryRules
);
1431 TESTCASE_AUTO(TestRefreshInputText
);
1432 #if !UCONFIG_NO_BREAK_ITERATION
1433 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder
);
1439 //---------------------------------------------
1440 //Internal subroutines
1441 //---------------------------------------------
1443 void RBBIAPITest::doBoundaryTest(BreakIterator
& bi
, UnicodeString
& text
, int32_t *boundaries
){
1444 logln((UnicodeString
)"testIsBoundary():");
1447 for (int32_t i
= 0; i
< text
.length(); i
++) {
1448 isB
= bi
.isBoundary(i
);
1449 logln((UnicodeString
)"bi.isBoundary(" + i
+ ") -> " + isB
);
1451 if (i
== boundaries
[p
]) {
1453 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected true, got false");
1458 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected false, got true");
1462 void RBBIAPITest::doTest(UnicodeString
& testString
, int32_t start
, int32_t gotoffset
, int32_t expectedOffset
, const char* expectedString
){
1463 UnicodeString selected
;
1464 UnicodeString expected
=CharsToUnicodeString(expectedString
);
1466 if(gotoffset
!= expectedOffset
)
1467 errln((UnicodeString
)"ERROR:****returned #" + gotoffset
+ (UnicodeString
)" instead of #" + expectedOffset
);
1468 if(start
<= gotoffset
){
1469 testString
.extractBetween(start
, gotoffset
, selected
);
1472 testString
.extractBetween(gotoffset
, start
, selected
);
1474 if(selected
.compare(expected
) != 0)
1475 errln(prettify((UnicodeString
)"ERROR:****selected \"" + selected
+ "\" instead of \"" + expected
+ "\""));
1477 logln(prettify("****selected \"" + selected
+ "\""));
1480 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */