1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1999-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
10 ********************************************************************/
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_BREAK_ITERATION
16 #include "unicode/uchar.h"
18 #include "unicode/rbbi.h"
19 #include "unicode/schriter.h"
24 #include "unicode/locid.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utext.h"
28 #if !UCONFIG_NO_BREAK_ITERATION
29 #include "unicode/filteredbrk.h"
30 #include <stdio.h> // for sprintf
33 * API Test the RuleBasedBreakIterator class
37 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
38 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
40 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
41 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
43 void RBBIAPITest::TestCloneEquals()
46 UErrorCode status
=U_ZERO_ERROR
;
47 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
48 RuleBasedBreakIterator
* biequal
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
49 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
50 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
51 if(U_FAILURE(status
)){
52 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
57 UnicodeString testString
="Testing word break iterators's clone() and equals()";
58 bi1
->setText(testString
);
59 bi2
->setText(testString
);
60 biequal
->setText(testString
);
62 bi3
->setText("hello");
64 logln((UnicodeString
)"Testing equals()");
66 logln((UnicodeString
)"Testing == and !=");
67 UBool b
= (*bi1
!= *biequal
);
71 errln((UnicodeString
)"ERROR:1 RBBI's == and != operator failed.");
74 if(*bi2
== *biequal
|| *bi2
== *bi1
|| *biequal
== *bi3
)
75 errln((UnicodeString
)"ERROR:2 RBBI's == and != operator failed.");
78 // Quick test of RulesBasedBreakIterator assignment -
80 // two different iterators are !=
81 // they are == after assignment
82 // source and dest iterator produce the same next() after assignment.
83 // deleting one doesn't disable the other.
84 logln("Testing assignment");
85 RuleBasedBreakIterator
*bix
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getDefault(), status
);
86 if(U_FAILURE(status
)){
87 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
91 RuleBasedBreakIterator biDefault
, biDefault2
;
92 if(U_FAILURE(status
)){
93 errln((UnicodeString
)"FAIL : in construction of default iterator");
96 if (biDefault
== *bix
) {
97 errln((UnicodeString
)"ERROR: iterators should not compare ==");
100 if (biDefault
!= biDefault2
) {
101 errln((UnicodeString
)"ERROR: iterators should compare ==");
106 UnicodeString
HelloString("Hello Kitty");
107 bix
->setText(HelloString
);
109 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
113 errln(UnicodeString("ERROR: strings should be equal before assignment."));
116 int bixnext
= bix
->next();
117 int bi2next
= bi2
->next();
118 if (! (bixnext
== bi2next
&& bixnext
== 7)) {
119 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
122 if (bi2
->next() != 8) {
123 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
128 logln((UnicodeString
)"Testing clone()");
129 RuleBasedBreakIterator
* bi1clone
=(RuleBasedBreakIterator
*)bi1
->clone();
130 RuleBasedBreakIterator
* bi2clone
=(RuleBasedBreakIterator
*)bi2
->clone();
132 if(*bi1clone
!= *bi1
|| *bi1clone
!= *biequal
||
133 *bi1clone
== *bi3
|| *bi1clone
== *bi2
)
134 errln((UnicodeString
)"ERROR:1 RBBI's clone() method failed");
136 if(*bi2clone
== *bi1
|| *bi2clone
== *biequal
||
137 *bi2clone
== *bi3
|| *bi2clone
!= *bi2
)
138 errln((UnicodeString
)"ERROR:2 RBBI's clone() method failed");
140 if(bi1
->getText() != bi1clone
->getText() ||
141 bi2clone
->getText() != bi2
->getText() ||
142 *bi2clone
== *bi1clone
)
143 errln((UnicodeString
)"ERROR: RBBI's clone() method failed");
153 void RBBIAPITest::TestBoilerPlate()
155 UErrorCode status
= U_ZERO_ERROR
;
156 BreakIterator
* a
= BreakIterator::createWordInstance(Locale("hi"), status
);
157 BreakIterator
* b
= BreakIterator::createWordInstance(Locale("hi_IN"),status
);
158 if (U_FAILURE(status
)) {
159 errcheckln(status
, "Creation of break iterator failed %s", u_errorName(status
));
163 errln("Failed: boilerplate method operator!= does not return correct results");
165 // Japanese word break iterators are identical to root with
166 // a dictionary-based break iterator
167 BreakIterator
* c
= BreakIterator::createCharacterInstance(Locale("ja"),status
);
168 BreakIterator
* d
= BreakIterator::createCharacterInstance(Locale("root"),status
);
171 errln("Failed: boilerplate method operator== does not return correct results");
174 errln("creation of break iterator failed");
182 void RBBIAPITest::TestgetRules()
184 UErrorCode status
=U_ZERO_ERROR
;
186 RuleBasedBreakIterator
* bi1
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
187 RuleBasedBreakIterator
* bi2
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
188 if(U_FAILURE(status
)){
189 errcheckln(status
, "FAIL: in construction - %s", u_errorName(status
));
197 logln((UnicodeString
)"Testing toString()");
199 bi1
->setText((UnicodeString
)"Hello there");
201 RuleBasedBreakIterator
* bi3
=(RuleBasedBreakIterator
*)bi1
->clone();
203 UnicodeString temp
=bi1
->getRules();
204 UnicodeString temp2
=bi2
->getRules();
205 UnicodeString temp3
=bi3
->getRules();
206 if( temp2
.compare(temp3
) ==0 || temp
.compare(temp2
) == 0 || temp
.compare(temp3
) != 0)
207 errln((UnicodeString
)"ERROR: error in getRules() method");
213 void RBBIAPITest::TestHashCode()
215 UErrorCode status
=U_ZERO_ERROR
;
216 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
217 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
218 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
219 if(U_FAILURE(status
)){
220 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
228 logln((UnicodeString
)"Testing hashCode()");
230 bi1
->setText((UnicodeString
)"Hash code");
231 bi2
->setText((UnicodeString
)"Hash code");
232 bi3
->setText((UnicodeString
)"Hash code");
234 RuleBasedBreakIterator
* bi1clone
= (RuleBasedBreakIterator
*)bi1
->clone();
235 RuleBasedBreakIterator
* bi2clone
= (RuleBasedBreakIterator
*)bi2
->clone();
237 if(bi1
->hashCode() != bi1clone
->hashCode() || bi1
->hashCode() != bi3
->hashCode() ||
238 bi1clone
->hashCode() != bi3
->hashCode() || bi2
->hashCode() != bi2clone
->hashCode())
239 errln((UnicodeString
)"ERROR: identical objects have different hashcodes");
241 if(bi1
->hashCode() == bi2
->hashCode() || bi2
->hashCode() == bi3
->hashCode() ||
242 bi1clone
->hashCode() == bi2clone
->hashCode() || bi1clone
->hashCode() == bi2
->hashCode())
243 errln((UnicodeString
)"ERROR: different objects have same hashcodes");
252 void RBBIAPITest::TestGetSetAdoptText()
254 logln((UnicodeString
)"Testing getText setText ");
255 IcuTestErrorCode
status(*this, "TestGetSetAdoptText");
256 UnicodeString str1
="first string.";
257 UnicodeString str2
="Second string.";
258 LocalPointer
<RuleBasedBreakIterator
> charIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
));
259 LocalPointer
<RuleBasedBreakIterator
> wordIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
));
260 if(status
.isFailure()){
261 errcheckln(status
, "Fail : in construction - %s", status
.errorName());
266 CharacterIterator
* text1
= new StringCharacterIterator(str1
);
267 CharacterIterator
* text1Clone
= text1
->clone();
268 CharacterIterator
* text2
= new StringCharacterIterator(str2
);
269 CharacterIterator
* text3
= new StringCharacterIterator(str2
, 3, 10, 3); // "ond str"
271 wordIter1
->setText(str1
);
272 CharacterIterator
*tci
= &wordIter1
->getText();
275 TEST_ASSERT(tstr
== str1
);
276 if(wordIter1
->current() != 0)
277 errln((UnicodeString
)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
281 wordIter1
->setText(str2
);
282 if(wordIter1
->current() != 0)
283 errln((UnicodeString
)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
286 charIter1
->adoptText(text1Clone
);
287 TEST_ASSERT(wordIter1
->getText() != charIter1
->getText());
288 tci
= &wordIter1
->getText();
290 TEST_ASSERT(tstr
== str2
);
291 tci
= &charIter1
->getText();
293 TEST_ASSERT(tstr
== str1
);
296 LocalPointer
<RuleBasedBreakIterator
> rb((RuleBasedBreakIterator
*)wordIter1
->clone());
297 rb
->adoptText(text1
);
298 if(rb
->getText() != *text1
)
299 errln((UnicodeString
)"ERROR:1 error in adoptText ");
300 rb
->adoptText(text2
);
301 if(rb
->getText() != *text2
)
302 errln((UnicodeString
)"ERROR:2 error in adoptText ");
304 // Adopt where iterator range is less than the entire orignal source string.
305 // (With the change of the break engine to working with UText internally,
306 // CharacterIterators starting at positions other than zero are not supported)
307 rb
->adoptText(text3
);
308 TEST_ASSERT(rb
->preceding(2) == 0);
309 TEST_ASSERT(rb
->following(11) == BreakIterator::DONE
);
310 //if(rb->preceding(2) != 3) {
311 // errln((UnicodeString)"ERROR:3 error in adoptText ");
313 //if(rb->following(11) != BreakIterator::DONE) {
314 // errln((UnicodeString)"ERROR:4 error in adoptText ");
319 // Quick test to see if UText is working at all.
321 const char *s1
= "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
322 const char *s2
= "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
326 LocalUTextPointer
ut(utext_openUTF8(NULL
, s1
, -1, status
));
327 wordIter1
->setText(ut
.getAlias(), status
);
328 TEST_ASSERT_SUCCESS(status
);
331 pos
= wordIter1
->first();
333 pos
= wordIter1
->next();
335 pos
= wordIter1
->next();
337 pos
= wordIter1
->next();
338 TEST_ASSERT(pos
==11);
339 pos
= wordIter1
->next();
340 TEST_ASSERT(pos
==UBRK_DONE
);
343 LocalUTextPointer
ut2(utext_openUTF8(NULL
, s2
, -1, status
));
344 TEST_ASSERT_SUCCESS(status
);
345 wordIter1
->setText(ut2
.getAlias(), status
);
346 TEST_ASSERT_SUCCESS(status
);
348 pos
= wordIter1
->first();
350 pos
= wordIter1
->next();
352 pos
= wordIter1
->next();
355 pos
= wordIter1
->last();
357 pos
= wordIter1
->previous();
359 pos
= wordIter1
->previous();
361 pos
= wordIter1
->previous();
363 pos
= wordIter1
->previous();
364 TEST_ASSERT(pos
==UBRK_DONE
);
367 UnicodeString sEmpty
;
368 LocalUTextPointer
gut2(utext_openUnicodeString(NULL
, &sEmpty
, status
));
369 wordIter1
->getUText(gut2
.getAlias(), status
);
370 TEST_ASSERT_SUCCESS(status
);
375 void RBBIAPITest::TestIteration()
377 // This test just verifies that the API is present.
378 // Testing for correct operation of the break rules happens elsewhere.
380 UErrorCode status
=U_ZERO_ERROR
;
381 RuleBasedBreakIterator
* bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
382 if (U_FAILURE(status
) || bi
== NULL
) {
383 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
388 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
389 if (U_FAILURE(status
) || bi
== NULL
) {
390 errcheckln(status
, "Failure creating Word break iterator. Status = %s", u_errorName(status
));
395 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status
);
396 if (U_FAILURE(status
) || bi
== NULL
) {
397 errcheckln(status
, "Failure creating Line break iterator. Status = %s", u_errorName(status
));
402 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status
);
403 if (U_FAILURE(status
) || bi
== NULL
) {
404 errcheckln(status
, "Failure creating Sentence break iterator. Status = %s", u_errorName(status
));
409 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status
);
410 if (U_FAILURE(status
) || bi
== NULL
) {
411 errcheckln(status
, "Failure creating Title break iterator. Status = %s", u_errorName(status
));
416 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
417 if (U_FAILURE(status
) || bi
== NULL
) {
418 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
419 return; // Skip the rest of these tests.
423 UnicodeString testString
="0123456789";
424 bi
->setText(testString
);
429 errln("Incorrect value from bi->first(). Expected 0, got %d.", i
);
434 errln("Incorrect value from bi->last(). Expected 10, got %d", i
);
443 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__
, i
);
449 if (i
!= BreakIterator::DONE
) {
450 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__
, i
);
459 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__
, i
);
464 if (i
!= BreakIterator::DONE
) {
465 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__
, i
);
475 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
481 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__
, i
);
488 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__
, i
);
495 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
502 i
= bi
->following(4);
504 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__
, i
);
507 i
= bi
->following(9);
509 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__
, i
);
512 i
= bi
->following(10);
513 if (i
!= BreakIterator::DONE
) {
514 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__
, i
);
521 i
= bi
->preceding(4);
523 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__
, i
);
526 i
= bi
->preceding(10);
528 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__
, i
);
531 i
= bi
->preceding(1);
533 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__
, i
);
536 i
= bi
->preceding(0);
537 if (i
!= BreakIterator::DONE
) {
538 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__
, i
);
546 if (bi
->isBoundary(3) != TRUE
) {
547 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__
, i
);
551 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__
, i
);
555 if (bi
->isBoundary(11) != FALSE
) {
556 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__
, i
);
560 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__
, i
);
569 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__
, i
);
574 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__
, i
);
579 if (i
!= BreakIterator::DONE
) {
580 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__
, i
);
592 void RBBIAPITest::TestBuilder() {
593 UnicodeString rulesString1
= "$Letters = [:L:];\n"
594 "$Numbers = [:N:];\n"
597 "[^$Letters $Numbers];\n"
599 UnicodeString testString1
= "abc123..abc";
601 int32_t bounds1
[] = {0, 3, 6, 7, 8, 11};
602 UErrorCode status
=U_ZERO_ERROR
;
603 UParseError parseError
;
605 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
606 if(U_FAILURE(status
)) {
607 dataerrln("Fail : in construction - %s", u_errorName(status
));
609 bi
->setText(testString1
);
610 doBoundaryTest(*bi
, testString1
, bounds1
);
618 // Single quotes within rules imply a grouping, so that a modifier
619 // following the quoted text (* or +) applies to all of the quoted chars.
621 void RBBIAPITest::TestQuoteGrouping() {
622 UnicodeString rulesString1
= "#Here comes the rule...\n"
623 "'$@!'*;\n" // (\$\@\!)*
626 UnicodeString testString1
= "$@!$@!X$@!!X";
628 int32_t bounds1
[] = {0, 6, 7, 10, 11, 12};
629 UErrorCode status
=U_ZERO_ERROR
;
630 UParseError parseError
;
632 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
633 if(U_FAILURE(status
)) {
634 dataerrln("Fail : in construction - %s", u_errorName(status
));
636 bi
->setText(testString1
);
637 doBoundaryTest(*bi
, testString1
, bounds1
);
644 // Test word break rule status constants.
646 void RBBIAPITest::TestRuleStatus() {
648 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
649 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
650 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
651 // 012345678901234567 8 9 0
654 UnicodeString
testString1(str
);
655 int32_t bounds1
[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
656 int32_t tag_lo
[] = {UBRK_WORD_NONE
, UBRK_WORD_LETTER
, UBRK_WORD_NONE
, UBRK_WORD_LETTER
,
657 UBRK_WORD_NONE
, UBRK_WORD_NUMBER
, UBRK_WORD_NONE
,
658 UBRK_WORD_IDEO
, UBRK_WORD_NONE
};
660 int32_t tag_hi
[] = {UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
,
661 UBRK_WORD_NONE_LIMIT
, UBRK_WORD_NUMBER_LIMIT
, UBRK_WORD_NONE_LIMIT
,
662 UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_NONE_LIMIT
};
664 UErrorCode status
=U_ZERO_ERROR
;
666 BreakIterator
*bi
= BreakIterator::createWordInstance(Locale::getEnglish(), status
);
667 if(U_FAILURE(status
)) {
668 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
670 bi
->setText(testString1
);
671 // First test that the breaks are in the right spots.
672 doBoundaryTest(*bi
, testString1
, bounds1
);
674 // Then go back and check tag values
677 for (pos
= bi
->first(); pos
!= BreakIterator::DONE
; pos
= bi
->next(), i
++) {
678 if (pos
!= bounds1
[i
]) {
679 errln("FAIL: unexpected word break at postion %d", pos
);
682 tag
= bi
->getRuleStatus();
683 if (tag
< tag_lo
[i
] || tag
>= tag_hi
[i
]) {
684 errln("FAIL: incorrect tag value %d at position %d", tag
, pos
);
688 // Check that we get the same tag values from getRuleStatusVec()
690 int t
= bi
->getRuleStatusVec(vec
, 10, status
);
691 TEST_ASSERT_SUCCESS(status
);
693 TEST_ASSERT(vec
[0] == tag
);
698 // Now test line break status. This test mostly is to confirm that the status constants
699 // are correctly declared in the header.
700 testString1
= "test line. \n";
703 bi
= BreakIterator::createLineInstance(Locale::getEnglish(), status
);
704 if(U_FAILURE(status
)) {
705 errcheckln(status
, "failed to create word break iterator. - %s", u_errorName(status
));
711 bi
->setText(testString1
);
713 tag
= bi
->getRuleStatus();
714 for (i
=0; i
<3; i
++) {
717 success
= pos
==0 && tag
==UBRK_LINE_SOFT
; break;
719 success
= pos
==5 && tag
==UBRK_LINE_SOFT
; break;
721 success
= pos
==12 && tag
==UBRK_LINE_HARD
; break;
723 success
= FALSE
; break;
725 if (success
== FALSE
) {
726 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
731 tag
= bi
->getRuleStatus();
733 if (UBRK_LINE_SOFT
>= UBRK_LINE_SOFT_LIMIT
||
734 UBRK_LINE_HARD
>= UBRK_LINE_HARD_LIMIT
||
735 (UBRK_LINE_HARD
> UBRK_LINE_SOFT
&& UBRK_LINE_HARD
< UBRK_LINE_SOFT_LIMIT
)) {
736 errln("UBRK_LINE_* constants from header are inconsistent.");
746 // Test the vector form of break rule status.
748 void RBBIAPITest::TestRuleStatusVec() {
749 UnicodeString
rulesString( "[A-N]{100}; \n"
754 "!.*;\n", -1, US_INV
);
755 UnicodeString testString1
= "Aapz5?";
756 int32_t statusVals
[10];
760 UErrorCode status
=U_ZERO_ERROR
;
761 UParseError parseError
;
763 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString
, parseError
, status
);
764 if (U_FAILURE(status
)) {
765 dataerrln("Failure at file %s, line %d, error = %s", __FILE__
, __LINE__
, u_errorName(status
));
767 bi
->setText(testString1
);
772 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
773 TEST_ASSERT_SUCCESS(status
);
774 TEST_ASSERT(numStatuses
== 2);
775 TEST_ASSERT(statusVals
[0] == 100);
776 TEST_ASSERT(statusVals
[1] == 300);
781 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
782 TEST_ASSERT_SUCCESS(status
);
783 TEST_ASSERT(numStatuses
== 2);
784 TEST_ASSERT(statusVals
[0] == 200);
785 TEST_ASSERT(statusVals
[1] == 300);
790 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
791 TEST_ASSERT_SUCCESS(status
);
792 TEST_ASSERT(numStatuses
== 2);
793 TEST_ASSERT(statusVals
[0] == 200);
794 TEST_ASSERT(statusVals
[1] == 300);
799 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
800 TEST_ASSERT_SUCCESS(status
);
801 TEST_ASSERT(numStatuses
== 1);
802 TEST_ASSERT(statusVals
[0] == 300);
807 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
808 TEST_ASSERT_SUCCESS(status
);
809 TEST_ASSERT(numStatuses
== 2);
810 TEST_ASSERT(statusVals
[0] == 400);
811 TEST_ASSERT(statusVals
[1] == 500);
816 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
817 TEST_ASSERT_SUCCESS(status
);
818 TEST_ASSERT(numStatuses
== 1);
819 TEST_ASSERT(statusVals
[0] == 0);
822 // Check buffer overflow error handling. Char == A
827 memset(statusVals
, -1, sizeof(statusVals
));
828 numStatuses
= bi
->getRuleStatusVec(statusVals
, 0, status
);
829 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
830 TEST_ASSERT(numStatuses
== 2);
831 TEST_ASSERT(statusVals
[0] == -1);
833 status
= U_ZERO_ERROR
;
834 memset(statusVals
, -1, sizeof(statusVals
));
835 numStatuses
= bi
->getRuleStatusVec(statusVals
, 1, status
);
836 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
837 TEST_ASSERT(numStatuses
== 2);
838 TEST_ASSERT(statusVals
[0] == 100);
839 TEST_ASSERT(statusVals
[1] == -1);
841 status
= U_ZERO_ERROR
;
842 memset(statusVals
, -1, sizeof(statusVals
));
843 numStatuses
= bi
->getRuleStatusVec(statusVals
, 2, status
);
844 TEST_ASSERT_SUCCESS(status
);
845 TEST_ASSERT(numStatuses
== 2);
846 TEST_ASSERT(statusVals
[0] == 100);
847 TEST_ASSERT(statusVals
[1] == 300);
848 TEST_ASSERT(statusVals
[2] == -1);
855 // Bug 2190 Regression test. Builder crash on rule consisting of only a
856 // $variable reference
857 void RBBIAPITest::TestBug2190() {
858 UnicodeString rulesString1
= "$aaa = abcd;\n"
861 UnicodeString testString1
= "abcdabcd";
863 int32_t bounds1
[] = {0, 4, 8};
864 UErrorCode status
=U_ZERO_ERROR
;
865 UParseError parseError
;
867 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
868 if(U_FAILURE(status
)) {
869 dataerrln("Fail : in construction - %s", u_errorName(status
));
871 bi
->setText(testString1
);
872 doBoundaryTest(*bi
, testString1
, bounds1
);
878 void RBBIAPITest::TestRegistration() {
879 #if !UCONFIG_NO_SERVICE
880 UErrorCode status
= U_ZERO_ERROR
;
881 BreakIterator
* ja_word
= BreakIterator::createWordInstance("ja_JP", status
);
882 // ok to not delete these if we exit because of error?
883 BreakIterator
* ja_char
= BreakIterator::createCharacterInstance("ja_JP", status
);
884 BreakIterator
* root_word
= BreakIterator::createWordInstance("", status
);
885 BreakIterator
* root_char
= BreakIterator::createCharacterInstance("", status
);
887 if (status
== U_MISSING_RESOURCE_ERROR
|| status
== U_FILE_ACCESS_ERROR
) {
888 dataerrln("Error creating instances of break interactors - %s", u_errorName(status
));
898 URegistryKey key
= BreakIterator::registerInstance(ja_word
, "xx", UBRK_WORD
, status
);
900 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
901 if (ja_word
&& *ja_word
== *root_word
) {
902 errln("japan not different from root");
908 BreakIterator
* result
= BreakIterator::createWordInstance("xx_XX", status
);
911 fail
= *result
!= *ja_word
;
915 errln("bad result for xx_XX/word");
920 BreakIterator
* result
= BreakIterator::createCharacterInstance("ja_JP", status
);
923 fail
= *result
!= *ja_char
;
927 errln("bad result for ja_JP/char");
932 BreakIterator
* result
= BreakIterator::createCharacterInstance("xx_XX", status
);
935 fail
= *result
!= *root_char
;
939 errln("bad result for xx_XX/char");
944 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
946 const UnicodeString
* p
;
947 while ((p
= avail
->snext(status
))) {
948 if (p
->compare("xx") == 0) {
955 errln("did not find test locale");
960 UBool unreg
= BreakIterator::unregister(key
, status
);
962 errln("unable to unregister");
967 BreakIterator
* result
= BreakIterator::createWordInstance("en_US", status
);
968 BreakIterator
* root
= BreakIterator::createWordInstance("", status
);
971 fail
= *root
!= *result
;
976 errln("did not get root break");
981 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
983 const UnicodeString
* p
;
984 while ((p
= avail
->snext(status
))) {
985 if (p
->compare("xx") == 0) {
992 errln("found test locale");
998 UBool foundLocale
= FALSE
;
999 const Locale
*avail
= BreakIterator::getAvailableLocales(count
);
1000 for (int i
=0; i
<count
; i
++) {
1001 if (avail
[i
] == Locale::getEnglish()) {
1006 if (foundLocale
== FALSE
) {
1007 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1012 // ja_word was adopted by factory
1019 void RBBIAPITest::RoundtripRule(const char *dataFile
) {
1020 UErrorCode status
= U_ZERO_ERROR
;
1021 UParseError parseError
;
1022 parseError
.line
= 0;
1023 parseError
.offset
= 0;
1024 LocalUDataMemoryPointer
data(udata_open(U_ICUDATA_BRKITR
, "brk", dataFile
, &status
));
1026 const UChar
*builtSource
;
1027 const uint8_t *rbbiRules
;
1028 const uint8_t *builtRules
;
1030 if (U_FAILURE(status
)) {
1031 errcheckln(status
, "%s:%d Can't open \"%s\" - %s", __FILE__
, __LINE__
, dataFile
, u_errorName(status
));
1035 builtRules
= (const uint8_t *)udata_getMemory(data
.getAlias());
1036 builtSource
= (const UChar
*)(builtRules
+ ((RBBIDataHeader
*)builtRules
)->fRuleSource
);
1037 RuleBasedBreakIterator
*brkItr
= new RuleBasedBreakIterator(builtSource
, parseError
, status
);
1038 if (U_FAILURE(status
)) {
1039 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1040 __FILE__
, __LINE__
, u_errorName(status
), parseError
.line
, parseError
.offset
);
1041 errln(UnicodeString(builtSource
));
1044 rbbiRules
= brkItr
->getBinaryRules(length
);
1045 logln("Comparing \"%s\" len=%d", dataFile
, length
);
1046 if (memcmp(builtRules
, rbbiRules
, (int32_t)length
) != 0) {
1047 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__
, __LINE__
, dataFile
);
1053 void RBBIAPITest::TestRoundtripRules() {
1054 RoundtripRule("word");
1055 RoundtripRule("title");
1056 RoundtripRule("sent");
1057 RoundtripRule("line");
1058 RoundtripRule("char");
1060 RoundtripRule("word_POSIX");
1065 // Check getBinaryRules() and construction of a break iterator from those rules.
1067 void RBBIAPITest::TestGetBinaryRules() {
1068 UErrorCode status
=U_ZERO_ERROR
;
1069 LocalPointer
<BreakIterator
> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status
));
1070 if (U_FAILURE(status
)) {
1071 dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status
));
1074 RuleBasedBreakIterator
*rbbi
= dynamic_cast<RuleBasedBreakIterator
*>(bi
.getAlias());
1076 dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1080 // Check that the new line break iterator is nominally functional.
1081 UnicodeString
helloWorld("Hello, World!");
1082 rbbi
->setText(helloWorld
);
1084 while (bi
->next() != UBRK_DONE
) {
1087 TEST_ASSERT(n
== 2);
1089 // Extract the binary rules as a uint8_t blob.
1090 uint32_t ruleLength
;
1091 const uint8_t *binRules
= rbbi
->getBinaryRules(ruleLength
);
1092 TEST_ASSERT(ruleLength
> 0);
1093 TEST_ASSERT(binRules
!= NULL
);
1095 // Clone the binary rules, and create a break iterator from that.
1096 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1097 uint8_t *clonedRules
= new uint8_t[ruleLength
];
1098 memcpy(clonedRules
, binRules
, ruleLength
);
1099 RuleBasedBreakIterator
clonedBI(clonedRules
, ruleLength
, status
);
1100 TEST_ASSERT_SUCCESS(status
);
1102 // Check that the cloned line break iterator is nominally alive.
1103 clonedBI
.setText(helloWorld
);
1105 while (clonedBI
.next() != UBRK_DONE
) {
1108 TEST_ASSERT(n
== 2);
1110 delete[] clonedRules
;
1114 void RBBIAPITest::TestRefreshInputText() {
1116 * RefreshInput changes out the input of a Break Iterator without
1117 * changing anything else in the iterator's state. Used with Java JNI,
1118 * when Java moves the underlying string storage. This test
1119 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1120 * The right set of boundaries should still be found.
1122 UChar testStr
[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1123 UChar movedStr
[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1124 UErrorCode status
= U_ZERO_ERROR
;
1125 UText ut1
= UTEXT_INITIALIZER
;
1126 UText ut2
= UTEXT_INITIALIZER
;
1127 RuleBasedBreakIterator
*bi
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getEnglish(), status
);
1128 TEST_ASSERT_SUCCESS(status
);
1130 utext_openUChars(&ut1
, testStr
, -1, &status
);
1131 TEST_ASSERT_SUCCESS(status
);
1133 if (U_SUCCESS(status
)) {
1134 bi
->setText(&ut1
, status
);
1135 TEST_ASSERT_SUCCESS(status
);
1137 /* Line boundaries will occur before each letter in the original string */
1138 TEST_ASSERT(1 == bi
->next());
1139 TEST_ASSERT(3 == bi
->next());
1141 /* Move the string, kill the original string. */
1142 u_strcpy(movedStr
, testStr
);
1143 u_memset(testStr
, 0x20, u_strlen(testStr
));
1144 utext_openUChars(&ut2
, movedStr
, -1, &status
);
1145 TEST_ASSERT_SUCCESS(status
);
1146 RuleBasedBreakIterator
*returnedBI
= &bi
->refreshInputText(&ut2
, status
);
1147 TEST_ASSERT_SUCCESS(status
);
1148 TEST_ASSERT(bi
== returnedBI
);
1150 /* Find the following matches, now working in the moved string. */
1151 TEST_ASSERT(5 == bi
->next());
1152 TEST_ASSERT(7 == bi
->next());
1153 TEST_ASSERT(8 == bi
->next());
1154 TEST_ASSERT(UBRK_DONE
== bi
->next());
1163 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1164 static void prtbrks(BreakIterator
* brk
, const UnicodeString
&ustr
, IntlTest
&it
) {
1165 static const UChar PILCROW
=0x00B6, CHSTR
=0x3010, CHEND
=0x3011; // lenticular brackets
1166 it
.logln(UnicodeString("String:'")+ustr
+UnicodeString("'"));
1168 int32_t *pos
= new int32_t[ustr
.length()];
1169 int32_t posCount
= 0;
1171 // calculate breaks up front, so we can print out
1172 // sans any debugging
1173 for(int32_t n
= 0; (n
=brk
->next())!=UBRK_DONE
; ) {
1174 pos
[posCount
++] = n
;
1175 if(posCount
>=ustr
.length()) {
1176 it
.errln("brk count exceeds string length!");
1181 out
.append((UChar
)CHSTR
);
1183 for(int32_t i
=0;i
<posCount
;i
++) {
1185 out
.append(ustr
.tempSubString(prev
,n
-prev
));
1186 out
.append((UChar
)PILCROW
);
1189 out
.append(ustr
.tempSubString(prev
,ustr
.length()-prev
));
1190 out
.append((UChar
)CHEND
);
1194 for(int32_t i
=0;i
<posCount
;i
++) {
1196 sprintf(tmp
,"%d ",pos
[i
]);
1197 out
.append(UnicodeString(tmp
));
1204 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1205 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1206 UErrorCode status
= U_ZERO_ERROR
;
1207 LocalPointer
<FilteredBreakIteratorBuilder
> builder
;
1208 LocalPointer
<BreakIterator
> baseBI
;
1209 LocalPointer
<BreakIterator
> filteredBI
;
1210 LocalPointer
<BreakIterator
> frenchBI
;
1212 const UnicodeString
text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1213 const UnicodeString
ABBR_MR("Mr.");
1214 const UnicodeString
ABBR_CAPT("Capt.");
1217 logln("Constructing empty builder\n");
1218 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1219 TEST_ASSERT_SUCCESS(status
);
1221 logln("Constructing base BI\n");
1222 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1223 TEST_ASSERT_SUCCESS(status
);
1225 logln("Building new BI\n");
1226 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1227 TEST_ASSERT_SUCCESS(status
);
1229 if (U_SUCCESS(status
)) {
1231 filteredBI
->setText(text
);
1232 TEST_ASSERT(20 == filteredBI
->next()); // Mr.
1233 TEST_ASSERT(84 == filteredBI
->next()); // recovered.
1234 TEST_ASSERT(90 == filteredBI
->next()); // Capt.
1235 TEST_ASSERT(181 == filteredBI
->next()); // Mr.
1236 TEST_ASSERT(278 == filteredBI
->next()); // charge.
1237 filteredBI
->first();
1238 prtbrks(filteredBI
.getAlias(), text
, *this);
1243 logln("Constructing empty builder\n");
1244 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1245 TEST_ASSERT_SUCCESS(status
);
1247 if (U_SUCCESS(status
)) {
1248 logln("Adding Mr. as an exception\n");
1249 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1250 TEST_ASSERT(FALSE
== builder
->suppressBreakAfter(ABBR_MR
, status
)); // already have it
1251 TEST_ASSERT(TRUE
== builder
->unsuppressBreakAfter(ABBR_MR
, status
));
1252 TEST_ASSERT(FALSE
== builder
->unsuppressBreakAfter(ABBR_MR
, status
)); // already removed it
1253 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1254 TEST_ASSERT_SUCCESS(status
);
1256 logln("Constructing base BI\n");
1257 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1258 TEST_ASSERT_SUCCESS(status
);
1260 logln("Building new BI\n");
1261 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1262 TEST_ASSERT_SUCCESS(status
);
1265 filteredBI
->setText(text
);
1266 TEST_ASSERT(84 == filteredBI
->next());
1267 TEST_ASSERT(90 == filteredBI
->next());// Capt.
1268 TEST_ASSERT(278 == filteredBI
->next());
1269 filteredBI
->first();
1270 prtbrks(filteredBI
.getAlias(), text
, *this);
1276 logln("Constructing empty builder\n");
1277 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
));
1278 TEST_ASSERT_SUCCESS(status
);
1280 if (U_SUCCESS(status
)) {
1281 logln("Adding Mr. and Capt as an exception\n");
1282 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_MR
, status
));
1283 TEST_ASSERT(TRUE
== builder
->suppressBreakAfter(ABBR_CAPT
, status
));
1284 TEST_ASSERT_SUCCESS(status
);
1286 logln("Constructing base BI\n");
1287 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1288 TEST_ASSERT_SUCCESS(status
);
1290 logln("Building new BI\n");
1291 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1292 TEST_ASSERT_SUCCESS(status
);
1295 filteredBI
->setText(text
);
1296 TEST_ASSERT(84 == filteredBI
->next());
1297 TEST_ASSERT(278 == filteredBI
->next());
1298 filteredBI
->first();
1299 prtbrks(filteredBI
.getAlias(), text
, *this);
1305 logln("Constructing English builder\n");
1306 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
));
1307 TEST_ASSERT_SUCCESS(status
);
1309 logln("Constructing base BI\n");
1310 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1311 TEST_ASSERT_SUCCESS(status
);
1313 if (U_SUCCESS(status
)) {
1314 logln("unsuppressing 'Capt'");
1315 TEST_ASSERT(TRUE
== builder
->unsuppressBreakAfter(ABBR_CAPT
, status
));
1317 logln("Building new BI\n");
1318 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1319 TEST_ASSERT_SUCCESS(status
);
1321 if(filteredBI
.isValid()) {
1323 filteredBI
->setText(text
);
1324 TEST_ASSERT(84 == filteredBI
->next());
1325 TEST_ASSERT(90 == filteredBI
->next());
1326 TEST_ASSERT(278 == filteredBI
->next());
1327 filteredBI
->first();
1328 prtbrks(filteredBI
.getAlias(), text
, *this);
1335 logln("Constructing English builder\n");
1336 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
));
1337 TEST_ASSERT_SUCCESS(status
);
1339 logln("Constructing base BI\n");
1340 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
));
1341 TEST_ASSERT_SUCCESS(status
);
1343 if (U_SUCCESS(status
)) {
1344 logln("Building new BI\n");
1345 filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1346 TEST_ASSERT_SUCCESS(status
);
1348 if(filteredBI
.isValid()) {
1350 filteredBI
->setText(text
);
1351 TEST_ASSERT(84 == filteredBI
->next());
1352 TEST_ASSERT(278 == filteredBI
->next());
1353 filteredBI
->first();
1354 prtbrks(filteredBI
.getAlias(), text
, *this);
1359 // reenable once french is in
1361 logln("Constructing French builder");
1362 builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status
));
1363 TEST_ASSERT_SUCCESS(status
);
1365 logln("Constructing base BI\n");
1366 baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status
));
1367 TEST_ASSERT_SUCCESS(status
);
1369 if (U_SUCCESS(status
)) {
1370 logln("Building new BI\n");
1371 frenchBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
));
1372 TEST_ASSERT_SUCCESS(status
);
1375 if(frenchBI
.isValid()) {
1377 UnicodeString
frText("C'est MM. Duval.");
1378 frenchBI
->setText(frText
);
1379 TEST_ASSERT(16 == frenchBI
->next());
1380 TEST_ASSERT(BreakIterator::DONE
== frenchBI
->next());
1382 prtbrks(frenchBI
.getAlias(), frText
, *this);
1383 logln("Testing against English:");
1384 filteredBI
->setText(frText
);
1385 TEST_ASSERT(10 == filteredBI
->next()); // wrong for french, but filterBI is english.
1386 TEST_ASSERT(16 == filteredBI
->next());
1387 TEST_ASSERT(BreakIterator::DONE
== filteredBI
->next());
1388 filteredBI
->first();
1389 prtbrks(filteredBI
.getAlias(), frText
, *this);
1392 TEST_ASSERT_TRUE(*frenchBI
== *frenchBI
);
1393 TEST_ASSERT_TRUE(*filteredBI
!= *frenchBI
);
1394 TEST_ASSERT_TRUE(*frenchBI
!= *filteredBI
);
1396 dataerrln("French BI: not valid.");
1401 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1405 //---------------------------------------------
1407 //---------------------------------------------
1409 void RBBIAPITest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
1411 if (exec
) logln((UnicodeString
)"TestSuite RuleBasedBreakIterator API ");
1412 TESTCASE_AUTO_BEGIN
;
1413 #if !UCONFIG_NO_FILE_IO
1414 TESTCASE_AUTO(TestCloneEquals
);
1415 TESTCASE_AUTO(TestgetRules
);
1416 TESTCASE_AUTO(TestHashCode
);
1417 TESTCASE_AUTO(TestGetSetAdoptText
);
1418 TESTCASE_AUTO(TestIteration
);
1420 TESTCASE_AUTO(TestBuilder
);
1421 TESTCASE_AUTO(TestQuoteGrouping
);
1422 TESTCASE_AUTO(TestRuleStatusVec
);
1423 TESTCASE_AUTO(TestBug2190
);
1424 #if !UCONFIG_NO_FILE_IO
1425 TESTCASE_AUTO(TestRegistration
);
1426 TESTCASE_AUTO(TestBoilerPlate
);
1427 TESTCASE_AUTO(TestRuleStatus
);
1428 TESTCASE_AUTO(TestRoundtripRules
);
1429 TESTCASE_AUTO(TestGetBinaryRules
);
1431 TESTCASE_AUTO(TestRefreshInputText
);
1432 #if !UCONFIG_NO_BREAK_ITERATION
1433 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder
);
1439 //---------------------------------------------
1440 //Internal subroutines
1441 //---------------------------------------------
1443 void RBBIAPITest::doBoundaryTest(BreakIterator
& bi
, UnicodeString
& text
, int32_t *boundaries
){
1444 logln((UnicodeString
)"testIsBoundary():");
1447 for (int32_t i
= 0; i
< text
.length(); i
++) {
1448 isB
= bi
.isBoundary(i
);
1449 logln((UnicodeString
)"bi.isBoundary(" + i
+ ") -> " + isB
);
1451 if (i
== boundaries
[p
]) {
1453 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected true, got false");
1458 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected false, got true");
1462 void RBBIAPITest::doTest(UnicodeString
& testString
, int32_t start
, int32_t gotoffset
, int32_t expectedOffset
, const char* expectedString
){
1463 UnicodeString selected
;
1464 UnicodeString expected
=CharsToUnicodeString(expectedString
);
1466 if(gotoffset
!= expectedOffset
)
1467 errln((UnicodeString
)"ERROR:****returned #" + gotoffset
+ (UnicodeString
)" instead of #" + expectedOffset
);
1468 if(start
<= gotoffset
){
1469 testString
.extractBetween(start
, gotoffset
, selected
);
1472 testString
.extractBetween(gotoffset
, start
, selected
);
1474 if(selected
.compare(expected
) != 0)
1475 errln(prettify((UnicodeString
)"ERROR:****selected \"" + selected
+ "\" instead of \"" + expected
+ "\""));
1477 logln(prettify("****selected \"" + selected
+ "\""));
1480 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */