1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ********************************************************************/
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_BREAK_ITERATION
16 #include "unicode/uchar.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
33 * API Test the RuleBasedBreakIterator class
37 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
38 if (U_FAILURE(status)) { \
39 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); \
41 } UPRV_BLOCK_MACRO_END
43 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
44 if ((expr) == FALSE) { \
45 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr); \
47 } UPRV_BLOCK_MACRO_END
49 void RBBIAPITest::TestCloneEquals()
52 UErrorCode status
=U_ZERO_ERROR
;
53 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
54 RuleBasedBreakIterator
* biequal
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
55 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
56 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
57 if(U_FAILURE(status
)){
58 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
63 UnicodeString testString
="Testing word break iterators's clone() and equals()";
64 bi1
->setText(testString
);
65 bi2
->setText(testString
);
66 biequal
->setText(testString
);
68 bi3
->setText("hello");
70 logln((UnicodeString
)"Testing equals()");
72 logln((UnicodeString
)"Testing == and !=");
73 UBool b
= (*bi1
!= *biequal
);
77 errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__
, __LINE__
);
80 if(*bi2
== *biequal
|| *bi2
== *bi1
|| *biequal
== *bi3
)
81 errln("%s:%d ERROR:2 RBBI's == and != operator failed.", __FILE__
, __LINE__
);
84 // Quick test of RulesBasedBreakIterator assignment -
86 // two different iterators are !=
87 // they are == after assignment
88 // source and dest iterator produce the same next() after assignment.
89 // deleting one doesn't disable the other.
90 logln("Testing assignment");
91 RuleBasedBreakIterator
*bix
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getDefault(), status
);
92 if(U_FAILURE(status
)){
93 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
97 RuleBasedBreakIterator biDefault
, biDefault2
;
98 if(U_FAILURE(status
)){
99 errln("%s:%d FAIL : in construction of default iterator", __FILE__
, __LINE__
);
102 if (biDefault
== *bix
) {
103 errln("%s:%d ERROR: iterators should not compare ==", __FILE__
, __LINE__
);
106 if (biDefault
!= biDefault2
) {
107 errln("%s:%d ERROR: iterators should compare ==", __FILE__
, __LINE__
);
112 UnicodeString
HelloString("Hello Kitty");
113 bix
->setText(HelloString
);
115 errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__
, __LINE__
);
119 errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__
, __LINE__
);
122 int bixnext
= bix
->next();
123 int bi2next
= bi2
->next();
124 if (! (bixnext
== bi2next
&& bixnext
== 7)) {
125 errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__
, __LINE__
);
128 if (bi2
->next() != 8) {
129 errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__
, __LINE__
);
134 logln((UnicodeString
)"Testing clone()");
135 RuleBasedBreakIterator
* bi1clone
= bi1
->clone();
136 RuleBasedBreakIterator
* bi2clone
= bi2
->clone();
138 if(*bi1clone
!= *bi1
|| *bi1clone
!= *biequal
||
139 *bi1clone
== *bi3
|| *bi1clone
== *bi2
)
140 errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__
, __LINE__
);
142 if(*bi2clone
== *bi1
|| *bi2clone
== *biequal
||
143 *bi2clone
== *bi3
|| *bi2clone
!= *bi2
)
144 errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__
, __LINE__
);
146 if(bi1
->getText() != bi1clone
->getText() ||
147 bi2clone
->getText() != bi2
->getText() ||
148 *bi2clone
== *bi1clone
)
149 errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__
, __LINE__
);
159 void RBBIAPITest::TestBoilerPlate()
161 UErrorCode status
= U_ZERO_ERROR
;
162 BreakIterator
* a
= BreakIterator::createWordInstance(Locale("hi"), status
);
163 BreakIterator
* b
= BreakIterator::createWordInstance(Locale("hi_IN"),status
);
164 if (U_FAILURE(status
)) {
165 errcheckln(status
, "Creation of break iterator failed %s", u_errorName(status
));
169 errln("Failed: boilerplate method operator!= does not return correct results");
171 // Japanese word break iterators are identical to root with
172 // a dictionary-based break iterator
173 BreakIterator
* c
= BreakIterator::createCharacterInstance(Locale("ja"),status
);
174 BreakIterator
* d
= BreakIterator::createCharacterInstance(Locale("root"),status
);
177 errln("Failed: boilerplate method operator== does not return correct results");
180 errln("creation of break iterator failed");
188 void RBBIAPITest::TestgetRules()
190 UErrorCode status
=U_ZERO_ERROR
;
192 LocalPointer
<RuleBasedBreakIterator
> bi1(
193 (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
), status
);
194 LocalPointer
<RuleBasedBreakIterator
> bi2(
195 (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
), status
);
196 if(U_FAILURE(status
)){
197 errcheckln(status
, "%s:%d, FAIL: in construction - %s", __FILE__
, __LINE__
, u_errorName(status
));
201 logln((UnicodeString
)"Testing getRules()");
203 UnicodeString
text(u
"Hello there");
206 LocalPointer
<RuleBasedBreakIterator
> bi3(bi1
->clone());
208 UnicodeString temp
=bi1
->getRules();
209 UnicodeString temp2
=bi2
->getRules();
210 UnicodeString temp3
=bi3
->getRules();
211 if( temp2
.compare(temp3
) ==0 || temp
.compare(temp2
) == 0 || temp
.compare(temp3
) != 0)
212 errln("%s:%d ERROR: error in getRules() method", __FILE__
, __LINE__
);
214 RuleBasedBreakIterator bi4
; // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
215 if (!bi4
.getRules().isEmpty()) {
216 errln("%s:%d Empty string expected.", __FILE__
, __LINE__
);
220 void RBBIAPITest::TestHashCode()
222 UErrorCode status
=U_ZERO_ERROR
;
223 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
224 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
225 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
226 if(U_FAILURE(status
)){
227 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
235 logln((UnicodeString
)"Testing hashCode()");
237 bi1
->setText((UnicodeString
)"Hash code");
238 bi2
->setText((UnicodeString
)"Hash code");
239 bi3
->setText((UnicodeString
)"Hash code");
241 RuleBasedBreakIterator
* bi1clone
= bi1
->clone();
242 RuleBasedBreakIterator
* bi2clone
= bi2
->clone();
244 if(bi1
->hashCode() != bi1clone
->hashCode() || bi1
->hashCode() != bi3
->hashCode() ||
245 bi1clone
->hashCode() != bi3
->hashCode() || bi2
->hashCode() != bi2clone
->hashCode())
246 errln((UnicodeString
)"ERROR: identical objects have different hashcodes");
248 if(bi1
->hashCode() == bi2
->hashCode() || bi2
->hashCode() == bi3
->hashCode() ||
249 bi1clone
->hashCode() == bi2clone
->hashCode() || bi1clone
->hashCode() == bi2
->hashCode())
250 errln((UnicodeString
)"ERROR: different objects have same hashcodes");
259 void RBBIAPITest::TestGetSetAdoptText()
261 logln((UnicodeString
)"Testing getText setText ");
262 IcuTestErrorCode
status(*this, "TestGetSetAdoptText");
263 UnicodeString str1
="first string.";
264 UnicodeString str2
="Second string.";
265 LocalPointer
<RuleBasedBreakIterator
> charIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
));
266 LocalPointer
<RuleBasedBreakIterator
> wordIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
));
267 if(status
.isFailure()){
268 errcheckln(status
, "Fail : in construction - %s", status
.errorName());
273 CharacterIterator
* text1
= new StringCharacterIterator(str1
);
274 CharacterIterator
* text1Clone
= text1
->clone();
275 CharacterIterator
* text2
= new StringCharacterIterator(str2
);
276 CharacterIterator
* text3
= new StringCharacterIterator(str2
, 3, 10, 3); // "ond str"
278 wordIter1
->setText(str1
);
279 CharacterIterator
*tci
= &wordIter1
->getText();
282 TEST_ASSERT(tstr
== str1
);
283 if(wordIter1
->current() != 0)
284 errln((UnicodeString
)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
288 wordIter1
->setText(str2
);
289 if(wordIter1
->current() != 0)
290 errln((UnicodeString
)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
293 charIter1
->adoptText(text1Clone
);
294 TEST_ASSERT(wordIter1
->getText() != charIter1
->getText());
295 tci
= &wordIter1
->getText();
297 TEST_ASSERT(tstr
== str2
);
298 tci
= &charIter1
->getText();
300 TEST_ASSERT(tstr
== str1
);
303 LocalPointer
<RuleBasedBreakIterator
> rb(wordIter1
->clone());
304 rb
->adoptText(text1
);
305 if(rb
->getText() != *text1
)
306 errln((UnicodeString
)"ERROR:1 error in adoptText ");
307 rb
->adoptText(text2
);
308 if(rb
->getText() != *text2
)
309 errln((UnicodeString
)"ERROR:2 error in adoptText ");
311 // Adopt where iterator range is less than the entire orignal source string.
312 // (With the change of the break engine to working with UText internally,
313 // CharacterIterators starting at positions other than zero are not supported)
314 rb
->adoptText(text3
);
315 TEST_ASSERT(rb
->preceding(2) == 0);
316 TEST_ASSERT(rb
->following(11) == BreakIterator::DONE
);
317 //if(rb->preceding(2) != 3) {
318 // errln((UnicodeString)"ERROR:3 error in adoptText ");
320 //if(rb->following(11) != BreakIterator::DONE) {
321 // errln((UnicodeString)"ERROR:4 error in adoptText ");
326 // Quick test to see if UText is working at all.
328 const char *s1
= "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
329 const char *s2
= "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
333 LocalUTextPointer
ut(utext_openUTF8(NULL
, s1
, -1, status
));
334 wordIter1
->setText(ut
.getAlias(), status
);
335 TEST_ASSERT_SUCCESS(status
);
338 pos
= wordIter1
->first();
340 pos
= wordIter1
->next();
342 pos
= wordIter1
->next();
344 pos
= wordIter1
->next();
345 TEST_ASSERT(pos
==11);
346 pos
= wordIter1
->next();
347 TEST_ASSERT(pos
==UBRK_DONE
);
350 LocalUTextPointer
ut2(utext_openUTF8(NULL
, s2
, -1, status
));
351 TEST_ASSERT_SUCCESS(status
);
352 wordIter1
->setText(ut2
.getAlias(), status
);
353 TEST_ASSERT_SUCCESS(status
);
355 pos
= wordIter1
->first();
357 pos
= wordIter1
->next();
359 pos
= wordIter1
->next();
362 pos
= wordIter1
->last();
364 pos
= wordIter1
->previous();
366 pos
= wordIter1
->previous();
368 pos
= wordIter1
->previous();
370 pos
= wordIter1
->previous();
371 TEST_ASSERT(pos
==UBRK_DONE
);
374 UnicodeString sEmpty
;
375 LocalUTextPointer
gut2(utext_openUnicodeString(NULL
, &sEmpty
, status
));
376 wordIter1
->getUText(gut2
.getAlias(), status
);
377 TEST_ASSERT_SUCCESS(status
);
382 void RBBIAPITest::TestIteration()
384 // This test just verifies that the API is present.
385 // Testing for correct operation of the break rules happens elsewhere.
387 UErrorCode status
=U_ZERO_ERROR
;
388 RuleBasedBreakIterator
* bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
389 if (U_FAILURE(status
) || bi
== NULL
) {
390 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
395 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
396 if (U_FAILURE(status
) || bi
== NULL
) {
397 errcheckln(status
, "Failure creating Word break iterator. Status = %s", u_errorName(status
));
402 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status
);
403 if (U_FAILURE(status
) || bi
== NULL
) {
404 errcheckln(status
, "Failure creating Line break iterator. Status = %s", u_errorName(status
));
409 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status
);
410 if (U_FAILURE(status
) || bi
== NULL
) {
411 errcheckln(status
, "Failure creating Sentence break iterator. Status = %s", u_errorName(status
));
416 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status
);
417 if (U_FAILURE(status
) || bi
== NULL
) {
418 errcheckln(status
, "Failure creating Title break iterator. Status = %s", u_errorName(status
));
423 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
424 if (U_FAILURE(status
) || bi
== NULL
) {
425 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
426 return; // Skip the rest of these tests.
430 UnicodeString testString
="0123456789";
431 bi
->setText(testString
);
436 errln("%s:%d Incorrect value from bi->first(). Expected 0, got %d.", __FILE__
, __LINE__
, i
);
441 errln("%s:%d Incorrect value from bi->last(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
450 errln("%s:%d Incorrect value from bi->last(). Expected 9, got %d", __FILE__
, __LINE__
, i
);
456 if (i
!= BreakIterator::DONE
) {
457 errln("%s:%d Incorrect value from bi->previous(). Expected DONE, got %d", __FILE__
, __LINE__
, i
);
466 errln("%s:%d Incorrect value from bi->next(). Expected 1, got %d", __FILE__
, __LINE__
, i
);
471 if (i
!= BreakIterator::DONE
) {
472 errln("%s:%d Incorrect value from bi->next(). Expected DONE, got %d", __FILE__
, __LINE__
, i
);
482 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__
, __LINE__
, i
);
488 errln("%s:%d Incorrect value from bi->current(). Expected 1, got %d", __FILE__
, __LINE__
, i
);
495 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
502 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__
, __LINE__
, i
);
509 i
= bi
->following(4);
511 errln("%s:%d Incorrect value from bi->following(). Expected 5, got %d", __FILE__
, __LINE__
, i
);
514 i
= bi
->following(9);
516 errln("%s:%d Incorrect value from bi->following(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
519 i
= bi
->following(10);
520 if (i
!= BreakIterator::DONE
) {
521 errln("%s:%d Incorrect value from bi->following(). Expected DONE, got %d", __FILE__
, __LINE__
, i
);
528 i
= bi
->preceding(4);
530 errln("%s:%d Incorrect value from bi->preceding(). Expected 3, got %d", __FILE__
, __LINE__
, i
);
533 i
= bi
->preceding(10);
535 errln("%s:%d Incorrect value from bi->preceding(). Expected 9, got %d", __FILE__
, __LINE__
, i
);
538 i
= bi
->preceding(1);
540 errln("%s:%d Incorrect value from bi->preceding(). Expected 0, got %d", __FILE__
, __LINE__
, i
);
543 i
= bi
->preceding(0);
544 if (i
!= BreakIterator::DONE
) {
545 errln("%s:%d Incorrect value from bi->preceding(). Expected DONE, got %d", __FILE__
, __LINE__
, i
);
553 if (bi
->isBoundary(3) != TRUE
) {
554 errln("%s:%d Incorrect value from bi->isBoudary(). Expected TRUE, got FALSE", __FILE__
, __LINE__
, i
);
558 errln("%s:%d Incorrect value from bi->current(). Expected 3, got %d", __FILE__
, __LINE__
, i
);
562 if (bi
->isBoundary(11) != FALSE
) {
563 errln("%s:%d Incorrect value from bi->isBoudary(). Expected FALSE, got TRUE", __FILE__
, __LINE__
, i
);
567 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
576 errln("%s:%d Incorrect value from bi->next(). Expected 4, got %d", __FILE__
, __LINE__
, i
);
581 errln("%s:%d Incorrect value from bi->next(). Expected 10, got %d", __FILE__
, __LINE__
, i
);
586 if (i
!= BreakIterator::DONE
) {
587 errln("%s:%d Incorrect value from bi->next(). Expected BreakIterator::DONE, got %d", __FILE__
, __LINE__
, i
);
599 void RBBIAPITest::TestBuilder() {
600 UnicodeString rulesString1
= "$Letters = [:L:];\n"
601 "$Numbers = [:N:];\n"
604 "[^$Letters $Numbers];\n"
606 UnicodeString testString1
= "abc123..abc";
608 int32_t bounds1
[] = {0, 3, 6, 7, 8, 11};
609 UErrorCode status
=U_ZERO_ERROR
;
610 UParseError parseError
;
612 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
613 if(U_FAILURE(status
)) {
614 dataerrln("Fail : in construction - %s", u_errorName(status
));
616 bi
->setText(testString1
);
617 doBoundaryTest(*bi
, testString1
, bounds1
);
625 // Single quotes within rules imply a grouping, so that a modifier
626 // following the quoted text (* or +) applies to all of the quoted chars.
628 void RBBIAPITest::TestQuoteGrouping() {
629 UnicodeString rulesString1
= "#Here comes the rule...\n"
630 "'$@!'*;\n" // (\$\@\!)*
633 UnicodeString testString1
= "$@!$@!X$@!!X";
635 int32_t bounds1
[] = {0, 6, 7, 10, 11, 12};
636 UErrorCode status
=U_ZERO_ERROR
;
637 UParseError parseError
;
639 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
640 if(U_FAILURE(status
)) {
641 dataerrln("Fail : in construction - %s", u_errorName(status
));
643 bi
->setText(testString1
);
644 doBoundaryTest(*bi
, testString1
, bounds1
);
651 // Test word break rule status constants.
653 void RBBIAPITest::TestRuleStatus() {
655 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
656 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
657 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
658 // 012345678901234567 8 9 0
661 UnicodeString
testString1(str
);
662 int32_t bounds1
[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
663 int32_t tag_lo
[] = {UBRK_WORD_NONE
, UBRK_WORD_LETTER
, UBRK_WORD_NONE
, UBRK_WORD_LETTER
,
664 UBRK_WORD_NONE
, UBRK_WORD_NUMBER
, UBRK_WORD_NONE
,
665 UBRK_WORD_IDEO
, UBRK_WORD_NONE
};
667 int32_t tag_hi
[] = {UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
,
668 UBRK_WORD_NONE_LIMIT
, UBRK_WORD_NUMBER_LIMIT
, UBRK_WORD_NONE_LIMIT
,
669 UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_NONE_LIMIT
};
671 UErrorCode status
=U_ZERO_ERROR
;
673 BreakIterator
*bi
= BreakIterator::createWordInstance(Locale::getEnglish(), status
);
674 if(U_FAILURE(status
)) {
675 errcheckln(status
, "%s:%d Fail in construction - %s", __FILE__
, __LINE__
, u_errorName(status
));
677 bi
->setText(testString1
);
678 // First test that the breaks are in the right spots.
679 doBoundaryTest(*bi
, testString1
, bounds1
);
681 // Then go back and check tag values
684 for (pos
= bi
->first(); pos
!= BreakIterator::DONE
; pos
= bi
->next(), i
++) {
685 if (pos
!= bounds1
[i
]) {
686 errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__
, __LINE__
, pos
);
689 tag
= bi
->getRuleStatus();
690 if (tag
< tag_lo
[i
] || tag
>= tag_hi
[i
]) {
691 errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__
, __LINE__
, tag
, pos
);
695 // Check that we get the same tag values from getRuleStatusVec()
697 int t
= bi
->getRuleStatusVec(vec
, 10, status
);
698 TEST_ASSERT_SUCCESS(status
);
700 TEST_ASSERT(vec
[0] == tag
);
705 // Now test line break status. This test mostly is to confirm that the status constants
706 // are correctly declared in the header.
707 testString1
= "test line. \n";
710 bi
= BreakIterator::createLineInstance(Locale::getEnglish(), status
);
711 if(U_FAILURE(status
)) {
712 errcheckln(status
, "%s:%d failed to create line break iterator. - %s", __FILE__
, __LINE__
, u_errorName(status
));
718 bi
->setText(testString1
);
720 tag
= bi
->getRuleStatus();
721 for (i
=0; i
<3; i
++) {
724 success
= pos
==0 && tag
==UBRK_LINE_SOFT
; break;
726 success
= pos
==5 && tag
==UBRK_LINE_SOFT
; break;
728 success
= pos
==12 && tag
==UBRK_LINE_HARD
; break;
730 success
= FALSE
; break;
732 if (success
== FALSE
) {
733 errln("%s:%d: incorrect line break status or position. i=%d, pos=%d, tag=%d",
734 __FILE__
, __LINE__
, i
, pos
, tag
);
738 tag
= bi
->getRuleStatus();
740 if (UBRK_LINE_SOFT
>= UBRK_LINE_SOFT_LIMIT
||
741 UBRK_LINE_HARD
>= UBRK_LINE_HARD_LIMIT
||
742 (UBRK_LINE_HARD
> UBRK_LINE_SOFT
&& UBRK_LINE_HARD
< UBRK_LINE_SOFT_LIMIT
)) {
743 errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__
, __LINE__
);
753 // Test the vector form of break rule status.
755 void RBBIAPITest::TestRuleStatusVec() {
756 UnicodeString
rulesString( "[A-N]{100}; \n"
761 "!.*;\n", -1, US_INV
);
762 UnicodeString testString1
= "Aapz5?";
763 int32_t statusVals
[10];
767 UErrorCode status
=U_ZERO_ERROR
;
768 UParseError parseError
;
770 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString
, parseError
, status
);
771 if (U_FAILURE(status
)) {
772 dataerrln("Failure at file %s, line %d, error = %s", __FILE__
, __LINE__
, u_errorName(status
));
774 bi
->setText(testString1
);
779 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
780 TEST_ASSERT_SUCCESS(status
);
781 TEST_ASSERT(numStatuses
== 2);
782 TEST_ASSERT(statusVals
[0] == 100);
783 TEST_ASSERT(statusVals
[1] == 300);
788 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
789 TEST_ASSERT_SUCCESS(status
);
790 TEST_ASSERT(numStatuses
== 2);
791 TEST_ASSERT(statusVals
[0] == 200);
792 TEST_ASSERT(statusVals
[1] == 300);
797 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
798 TEST_ASSERT_SUCCESS(status
);
799 TEST_ASSERT(numStatuses
== 2);
800 TEST_ASSERT(statusVals
[0] == 200);
801 TEST_ASSERT(statusVals
[1] == 300);
806 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
807 TEST_ASSERT_SUCCESS(status
);
808 TEST_ASSERT(numStatuses
== 1);
809 TEST_ASSERT(statusVals
[0] == 300);
814 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
815 TEST_ASSERT_SUCCESS(status
);
816 TEST_ASSERT(numStatuses
== 2);
817 TEST_ASSERT(statusVals
[0] == 400);
818 TEST_ASSERT(statusVals
[1] == 500);
823 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
824 TEST_ASSERT_SUCCESS(status
);
825 TEST_ASSERT(numStatuses
== 1);
826 TEST_ASSERT(statusVals
[0] == 0);
829 // Check buffer overflow error handling. Char == A
834 memset(statusVals
, -1, sizeof(statusVals
));
835 numStatuses
= bi
->getRuleStatusVec(statusVals
, 0, status
);
836 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
837 TEST_ASSERT(numStatuses
== 2);
838 TEST_ASSERT(statusVals
[0] == -1);
840 status
= U_ZERO_ERROR
;
841 memset(statusVals
, -1, sizeof(statusVals
));
842 numStatuses
= bi
->getRuleStatusVec(statusVals
, 1, status
);
843 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
844 TEST_ASSERT(numStatuses
== 2);
845 TEST_ASSERT(statusVals
[0] == 100);
846 TEST_ASSERT(statusVals
[1] == -1);
848 status
= U_ZERO_ERROR
;
849 memset(statusVals
, -1, sizeof(statusVals
));
850 numStatuses
= bi
->getRuleStatusVec(statusVals
, 2, status
);
851 TEST_ASSERT_SUCCESS(status
);
852 TEST_ASSERT(numStatuses
== 2);
853 TEST_ASSERT(statusVals
[0] == 100);
854 TEST_ASSERT(statusVals
[1] == 300);
855 TEST_ASSERT(statusVals
[2] == -1);
862 // Bug 2190 Regression test. Builder crash on rule consisting of only a
863 // $variable reference
864 void RBBIAPITest::TestBug2190() {
865 UnicodeString rulesString1
= "$aaa = abcd;\n"
868 UnicodeString testString1
= "abcdabcd";
870 int32_t bounds1
[] = {0, 4, 8};
871 UErrorCode status
=U_ZERO_ERROR
;
872 UParseError parseError
;
874 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
875 if(U_FAILURE(status
)) {
876 dataerrln("Fail : in construction - %s", u_errorName(status
));
878 bi
->setText(testString1
);
879 doBoundaryTest(*bi
, testString1
, bounds1
);
885 void RBBIAPITest::TestRegistration() {
886 #if !UCONFIG_NO_SERVICE
887 UErrorCode status
= U_ZERO_ERROR
;
888 BreakIterator
* ja_word
= BreakIterator::createWordInstance("ja_JP", status
);
889 // ok to not delete these if we exit because of error?
890 BreakIterator
* ja_char
= BreakIterator::createCharacterInstance("ja_JP", status
);
891 BreakIterator
* root_word
= BreakIterator::createWordInstance("", status
);
892 BreakIterator
* root_char
= BreakIterator::createCharacterInstance("", status
);
894 if (status
== U_MISSING_RESOURCE_ERROR
|| status
== U_FILE_ACCESS_ERROR
) {
895 dataerrln("Error creating instances of break interactors - %s", u_errorName(status
));
905 URegistryKey key
= BreakIterator::registerInstance(ja_word
, "xx", UBRK_WORD
, status
);
907 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
908 if (ja_word
&& *ja_word
== *root_word
) {
909 errln("japan not different from root");
915 BreakIterator
* result
= BreakIterator::createWordInstance("xx_XX", status
);
918 fail
= *result
!= *ja_word
;
922 errln("bad result for xx_XX/word");
927 BreakIterator
* result
= BreakIterator::createCharacterInstance("ja_JP", status
);
930 fail
= *result
!= *ja_char
;
934 errln("bad result for ja_JP/char");
939 BreakIterator
* result
= BreakIterator::createCharacterInstance("xx_XX", status
);
942 fail
= *result
!= *root_char
;
946 errln("bad result for xx_XX/char");
951 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
953 const UnicodeString
* p
;
954 while ((p
= avail
->snext(status
))) {
955 if (p
->compare("xx") == 0) {
962 errln("did not find test locale");
967 UBool unreg
= BreakIterator::unregister(key
, status
);
969 errln("unable to unregister");
974 BreakIterator
* result
= BreakIterator::createWordInstance("en_US", status
);
975 BreakIterator
* root
= BreakIterator::createWordInstance("", status
);
978 fail
= *root
!= *result
;
983 errln("did not get root break");
988 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
990 const UnicodeString
* p
;
991 while ((p
= avail
->snext(status
))) {
992 if (p
->compare("xx") == 0) {
999 errln("found test locale");
1005 UBool foundLocale
= FALSE
;
1006 const Locale
*avail
= BreakIterator::getAvailableLocales(count
);
1007 for (int i
=0; i
<count
; i
++) {
1008 if (avail
[i
] == Locale::getEnglish()) {
1013 if (foundLocale
== FALSE
) {
1014 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1019 // ja_word was adopted by factory
1026 void RBBIAPITest::RoundtripRule(const char *dataFile
) {
1027 UErrorCode status
= U_ZERO_ERROR
;
1028 UParseError parseError
;
1029 parseError
.line
= 0;
1030 parseError
.offset
= 0;
1031 LocalUDataMemoryPointer
data(udata_open(U_ICUDATA_BRKITR
, "brk", dataFile
, &status
));
1033 const UChar
*builtSource
;
1034 const uint8_t *rbbiRules
;
1035 const uint8_t *builtRules
;
1037 if (U_FAILURE(status
)) {
1038 errcheckln(status
, "%s:%d Can't open \"%s\" - %s", __FILE__
, __LINE__
, dataFile
, u_errorName(status
));
1042 builtRules
= (const uint8_t *)udata_getMemory(data
.getAlias());
1043 builtSource
= (const UChar
*)(builtRules
+ ((RBBIDataHeader
*)builtRules
)->fRuleSource
);
1044 LocalPointer
<RuleBasedBreakIterator
> brkItr (new RuleBasedBreakIterator(builtSource
, parseError
, status
));
1045 if (U_FAILURE(status
)) {
1046 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1047 __FILE__
, __LINE__
, u_errorName(status
), parseError
.line
, parseError
.offset
);
1048 errln(UnicodeString(builtSource
));
1051 rbbiRules
= brkItr
->getBinaryRules(length
);
1052 logln("Comparing \"%s\" len=%d", dataFile
, length
);
1053 if (memcmp(builtRules
, rbbiRules
, (int32_t)length
) != 0) {
1054 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__
, __LINE__
, dataFile
);
1059 void RBBIAPITest::TestRoundtripRules() {
1060 RoundtripRule("word");
1061 RoundtripRule("title");
1062 RoundtripRule("sent");
1063 RoundtripRule("line");
1064 RoundtripRule("char");
1066 RoundtripRule("word_POSIX");
1071 // Check getBinaryRules() and construction of a break iterator from those rules.
1073 void RBBIAPITest::TestGetBinaryRules() {
1074 UErrorCode status
=U_ZERO_ERROR
;
1075 LocalPointer
<BreakIterator
> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status
));
1076 if (U_FAILURE(status
)) {
1077 dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status
));
1080 RuleBasedBreakIterator
*rbbi
= dynamic_cast<RuleBasedBreakIterator
*>(bi
.getAlias());
1082 dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1086 // Check that the new line break iterator is nominally functional.
1087 UnicodeString
helloWorld("Hello, World!");
1088 rbbi
->setText(helloWorld
);
1090 while (bi
->next() != UBRK_DONE
) {
1093 TEST_ASSERT(n
== 2);
1095 // Extract the binary rules as a uint8_t blob.
1096 uint32_t ruleLength
;
1097 const uint8_t *binRules
= rbbi
->getBinaryRules(ruleLength
);
1098 TEST_ASSERT(ruleLength
> 0);
1099 TEST_ASSERT(binRules
!= NULL
);
1101 // Clone the binary rules, and create a break iterator from that.
1102 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1103 uint8_t *clonedRules
= new uint8_t[ruleLength
];
1104 memcpy(clonedRules
, binRules
, ruleLength
);
1105 RuleBasedBreakIterator
clonedBI(clonedRules
, ruleLength
, status
);
1106 TEST_ASSERT_SUCCESS(status
);
1108 // Check that the cloned line break iterator is nominally alive.
1109 clonedBI
.setText(helloWorld
);
1111 while (clonedBI
.next() != UBRK_DONE
) {
1114 TEST_ASSERT(n
== 2);
1116 delete[] clonedRules
;
1120 void RBBIAPITest::TestRefreshInputText() {
1122 * RefreshInput changes out the input of a Break Iterator without
1123 * changing anything else in the iterator's state. Used with Java JNI,
1124 * when Java moves the underlying string storage. This test
1125 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1126 * The right set of boundaries should still be found.
1128 UChar testStr
[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1129 UChar movedStr
[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1130 UErrorCode status
= U_ZERO_ERROR
;
1131 UText ut1
= UTEXT_INITIALIZER
;
1132 UText ut2
= UTEXT_INITIALIZER
;
1133 RuleBasedBreakIterator
*bi
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getEnglish(), status
);
1134 TEST_ASSERT_SUCCESS(status
);
1136 utext_openUChars(&ut1
, testStr
, -1, &status
);
1137 TEST_ASSERT_SUCCESS(status
);
1139 if (U_SUCCESS(status
)) {
1140 bi
->setText(&ut1
, status
);
1141 TEST_ASSERT_SUCCESS(status
);
1143 /* Line boundaries will occur before each letter in the original string */
1144 TEST_ASSERT(1 == bi
->next());
1145 TEST_ASSERT(3 == bi
->next());
1147 /* Move the string, kill the original string. */
1148 u_strcpy(movedStr
, testStr
);
1149 u_memset(testStr
, 0x20, u_strlen(testStr
));
1150 utext_openUChars(&ut2
, movedStr
, -1, &status
);
1151 TEST_ASSERT_SUCCESS(status
);
1152 RuleBasedBreakIterator
*returnedBI
= &bi
->refreshInputText(&ut2
, status
);
1153 TEST_ASSERT_SUCCESS(status
);
1154 TEST_ASSERT(bi
== returnedBI
);
1156 /* Find the following matches, now working in the moved string. */
1157 TEST_ASSERT(5 == bi
->next());
1158 TEST_ASSERT(7 == bi
->next());
1159 TEST_ASSERT(8 == bi
->next());
1160 TEST_ASSERT(UBRK_DONE
== bi
->next());
1169 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1170 static void prtbrks(BreakIterator
* brk
, const UnicodeString
&ustr
, IntlTest
&it
) {
1171 static const UChar PILCROW
=0x00B6, CHSTR
=0x3010, CHEND
=0x3011; // lenticular brackets
1172 it
.logln(UnicodeString("String:'")+ustr
+UnicodeString("'"));
1174 int32_t *pos
= new int32_t[ustr
.length()];
1175 int32_t posCount
= 0;
1177 // calculate breaks up front, so we can print out
1178 // sans any debugging
1179 for(int32_t n
= 0; (n
=brk
->next())!=UBRK_DONE
; ) {
1180 pos
[posCount
++] = n
;
1181 if(posCount
>=ustr
.length()) {
1182 it
.errln("brk count exceeds string length!");
1187 out
.append((UChar
)CHSTR
);
1189 for(int32_t i
=0;i
<posCount
;i
++) {
1191 out
.append(ustr
.tempSubString(prev
,n
-prev
));
1192 out
.append((UChar
)PILCROW
);
1195 out
.append(ustr
.tempSubString(prev
,ustr
.length()-prev
));
1196 out
.append((UChar
)CHEND
);
1200 for(int32_t i
=0;i
<posCount
;i
++) {
1202 sprintf(tmp
,"%d ",pos
[i
]);
1203 out
.append(UnicodeString(tmp
));
1210 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1211 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1212 UErrorCode status
= U_ZERO_ERROR
;
1213 LocalPointer
<FilteredBreakIteratorBuilder
> builder
;
1214 LocalPointer
<BreakIterator
> baseBI
;
1215 LocalPointer
<BreakIterator
> filteredBI
;
1216 LocalPointer
<BreakIterator
> frenchBI
;
1218 const UnicodeString
text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1219 const UnicodeString
ABBR_MR("Mr.");
1220 const UnicodeString
ABBR_CAPT("Capt.");
1223 logln("Constructing empty builder\n");
1224 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1225 TEST_ASSERT_SUCCESS(status
);
1227 logln("Constructing base BI\n");
1228 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1229 TEST_ASSERT_SUCCESS(status
);
1231 logln("Building new BI\n");
1232 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1233 TEST_ASSERT_SUCCESS(status
);
1235 if (U_SUCCESS(status
)) {
1237 filteredBI
->setText(text
);
1238 TEST_ASSERT(20 == filteredBI
->next()); // Mr.
1239 TEST_ASSERT(84 == filteredBI
->next()); // recovered.
1240 TEST_ASSERT(90 == filteredBI
->next()); // Capt.
1241 TEST_ASSERT(181 == filteredBI
->next()); // Mr.
1242 TEST_ASSERT(278 == filteredBI
->next()); // charge.
1243 filteredBI
->first();
1244 prtbrks(filteredBI
.getAlias(), text
, *this);
1249 logln("Constructing empty builder\n");
1250 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1251 TEST_ASSERT_SUCCESS(status
);
1253 if (U_SUCCESS(status
)) {
1254 logln("Adding Mr. as an exception\n");
1255 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1256 TEST_ASSERT(FALSE
== builder
->suppressBreakAfter(ABBR_MR
, status
)); // already have it
1257 TEST_ASSERT(TRUE
== builder
->unsuppressBreakAfter(ABBR_MR
, status
));
1258 TEST_ASSERT(FALSE
== builder
->unsuppressBreakAfter(ABBR_MR
, status
)); // already removed it
1259 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1260 TEST_ASSERT_SUCCESS(status
);
1262 logln("Constructing base BI\n");
1263 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1264 TEST_ASSERT_SUCCESS(status
);
1266 logln("Building new BI\n");
1267 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1268 TEST_ASSERT_SUCCESS(status
);
1271 filteredBI
->setText(text
);
1272 TEST_ASSERT(84 == filteredBI
->next());
1273 TEST_ASSERT(90 == filteredBI
->next());// Capt.
1274 TEST_ASSERT(278 == filteredBI
->next());
1275 filteredBI
->first();
1276 prtbrks(filteredBI
.getAlias(), text
, *this);
1282 logln("Constructing empty builder\n");
1283 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1284 TEST_ASSERT_SUCCESS(status
);
1286 if (U_SUCCESS(status
)) {
1287 logln("Adding Mr. and Capt as an exception\n");
1288 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1289 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_CAPT
, status
));
1290 TEST_ASSERT_SUCCESS(status
);
1292 logln("Constructing base BI\n");
1293 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1294 TEST_ASSERT_SUCCESS(status
);
1296 logln("Building new BI\n");
1297 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1298 TEST_ASSERT_SUCCESS(status
);
1301 filteredBI
->setText(text
);
1302 TEST_ASSERT(84 == filteredBI
->next());
1303 TEST_ASSERT(278 == filteredBI
->next());
1304 filteredBI
->first();
1305 prtbrks(filteredBI
.getAlias(), text
, *this);
1311 logln("Constructing English builder\n");
1312 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
));
1313 TEST_ASSERT_SUCCESS(status
);
1315 logln("Constructing base BI\n");
1316 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1317 TEST_ASSERT_SUCCESS(status
);
1319 if (U_SUCCESS(status
)) {
1320 logln("unsuppressing 'Capt'");
1321 TEST_ASSERT(TRUE
== builder
->unsuppressBreakAfter(ABBR_CAPT
, status
));
1323 logln("Building new BI\n");
1324 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1325 TEST_ASSERT_SUCCESS(status
);
1327 if(filteredBI
.isValid()) {
1329 filteredBI
->setText(text
);
1330 TEST_ASSERT(84 == filteredBI
->next());
1331 TEST_ASSERT(90 == filteredBI
->next());
1332 TEST_ASSERT(278 == filteredBI
->next());
1333 filteredBI
->first();
1334 prtbrks(filteredBI
.getAlias(), text
, *this);
1341 logln("Constructing English builder\n");
1342 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
));
1343 TEST_ASSERT_SUCCESS(status
);
1345 logln("Constructing base BI\n");
1346 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1347 TEST_ASSERT_SUCCESS(status
);
1349 if (U_SUCCESS(status
)) {
1350 logln("Building new BI\n");
1351 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1352 TEST_ASSERT_SUCCESS(status
);
1354 if(filteredBI
.isValid()) {
1356 filteredBI
->setText(text
);
1357 TEST_ASSERT(84 == filteredBI
->next());
1358 TEST_ASSERT(278 == filteredBI
->next());
1359 filteredBI
->first();
1360 prtbrks(filteredBI
.getAlias(), text
, *this);
1365 // reenable once french is in
1367 logln("Constructing French builder");
1368 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status
));
1369 TEST_ASSERT_SUCCESS(status
);
1371 logln("Constructing base BI\n");
1372 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status
));
1373 TEST_ASSERT_SUCCESS(status
);
1375 if (U_SUCCESS(status
)) {
1376 logln("Building new BI\n");
1377 frenchBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1378 TEST_ASSERT_SUCCESS(status
);
1381 if(frenchBI
.isValid()) {
1383 UnicodeString
frText("C'est MM. Duval.");
1384 frenchBI
->setText(frText
);
1385 TEST_ASSERT(16 == frenchBI
->next());
1386 TEST_ASSERT(BreakIterator::DONE
== frenchBI
->next());
1388 prtbrks(frenchBI
.getAlias(), frText
, *this);
1389 logln("Testing against English:");
1390 filteredBI
->setText(frText
);
1391 TEST_ASSERT(10 == filteredBI
->next()); // wrong for french, but filterBI is english.
1392 TEST_ASSERT(16 == filteredBI
->next());
1393 TEST_ASSERT(BreakIterator::DONE
== filteredBI
->next());
1394 filteredBI
->first();
1395 prtbrks(filteredBI
.getAlias(), frText
, *this);
1398 assertTrue(WHERE
, *frenchBI
== *frenchBI
);
1399 assertTrue(WHERE
, *filteredBI
!= *frenchBI
);
1400 assertTrue(WHERE
, *frenchBI
!= *filteredBI
);
1402 dataerrln("French BI: not valid.");
1407 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1411 //---------------------------------------------
1413 //---------------------------------------------
1415 void RBBIAPITest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
1417 if (exec
) logln((UnicodeString
)"TestSuite RuleBasedBreakIterator API ");
1418 TESTCASE_AUTO_BEGIN
;
1419 #if !UCONFIG_NO_FILE_IO
1420 TESTCASE_AUTO(TestCloneEquals
);
1421 TESTCASE_AUTO(TestgetRules
);
1422 TESTCASE_AUTO(TestHashCode
);
1423 TESTCASE_AUTO(TestGetSetAdoptText
);
1424 TESTCASE_AUTO(TestIteration
);
1426 TESTCASE_AUTO(TestBuilder
);
1427 TESTCASE_AUTO(TestQuoteGrouping
);
1428 TESTCASE_AUTO(TestRuleStatusVec
);
1429 TESTCASE_AUTO(TestBug2190
);
1430 #if !UCONFIG_NO_FILE_IO
1431 TESTCASE_AUTO(TestRegistration
);
1432 TESTCASE_AUTO(TestBoilerPlate
);
1433 TESTCASE_AUTO(TestRuleStatus
);
1434 TESTCASE_AUTO(TestRoundtripRules
);
1435 TESTCASE_AUTO(TestGetBinaryRules
);
1437 TESTCASE_AUTO(TestRefreshInputText
);
1438 #if !UCONFIG_NO_BREAK_ITERATION
1439 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder
);
1445 //---------------------------------------------
1446 //Internal subroutines
1447 //---------------------------------------------
1449 void RBBIAPITest::doBoundaryTest(BreakIterator
& bi
, UnicodeString
& text
, int32_t *boundaries
){
1450 logln((UnicodeString
)"testIsBoundary():");
1453 for (int32_t i
= 0; i
< text
.length(); i
++) {
1454 isB
= bi
.isBoundary(i
);
1455 logln((UnicodeString
)"bi.isBoundary(" + i
+ ") -> " + isB
);
1457 if (i
== boundaries
[p
]) {
1459 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected true, got false");
1464 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected false, got true");
1468 void RBBIAPITest::doTest(UnicodeString
& testString
, int32_t start
, int32_t gotoffset
, int32_t expectedOffset
, const char* expectedString
){
1469 UnicodeString selected
;
1470 UnicodeString expected
=CharsToUnicodeString(expectedString
);
1472 if(gotoffset
!= expectedOffset
)
1473 errln((UnicodeString
)"ERROR:****returned #" + gotoffset
+ (UnicodeString
)" instead of #" + expectedOffset
);
1474 if(start
<= gotoffset
){
1475 testString
.extractBetween(start
, gotoffset
, selected
);
1478 testString
.extractBetween(gotoffset
, start
, selected
);
1480 if(selected
.compare(expected
) != 0)
1481 errln(prettify((UnicodeString
)"ERROR:****selected \"" + selected
+ "\" instead of \"" + expected
+ "\""));
1483 logln(prettify("****selected \"" + selected
+ "\""));
1486 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */