1 // © 2016 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   3 /******************************************************************** 
   4  * Copyright (c) 1999-2016, International Business Machines 
   5  * Corporation and others. All Rights Reserved. 
   6  ******************************************************************** 
   7  *   Date        Name        Description 
   8  *   12/14/99    Madhu        Creation. 
   9  *   01/12/2000  Madhu        updated for changed API 
  10  ********************************************************************/ 
  12 #include "unicode/utypes.h" 
  14 #if !UCONFIG_NO_BREAK_ITERATION 
  16 #include "unicode/uchar.h" 
  18 #include "unicode/rbbi.h" 
  19 #include "unicode/schriter.h" 
  24 #include "unicode/locid.h" 
  25 #include "unicode/ustring.h" 
  26 #include "unicode/utext.h" 
  28 #if !UCONFIG_NO_BREAK_ITERATION 
  29 #include "unicode/filteredbrk.h" 
  30 #include <stdio.h> // for sprintf 
  33  * API Test the RuleBasedBreakIterator class 
  37 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \ 
  38     if (U_FAILURE(status)) { \ 
  39         dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); \ 
  41 } UPRV_BLOCK_MACRO_END 
  43 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \ 
  44     if ((expr) == FALSE) { \ 
  45         errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr); \ 
  47 } UPRV_BLOCK_MACRO_END 
  49 void RBBIAPITest::TestCloneEquals() 
  52     UErrorCode status
=U_ZERO_ERROR
; 
  53     RuleBasedBreakIterator
* bi1     
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
); 
  54     RuleBasedBreakIterator
* biequal 
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
); 
  55     RuleBasedBreakIterator
* bi3     
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
); 
  56     RuleBasedBreakIterator
* bi2     
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
); 
  57     if(U_FAILURE(status
)){ 
  58         errcheckln(status
, "Fail : in construction - %s", u_errorName(status
)); 
  63     UnicodeString testString
="Testing word break iterators's clone() and equals()"; 
  64     bi1
->setText(testString
); 
  65     bi2
->setText(testString
); 
  66     biequal
->setText(testString
); 
  68     bi3
->setText("hello"); 
  70     logln((UnicodeString
)"Testing equals()"); 
  72     logln((UnicodeString
)"Testing == and !="); 
  73     UBool b 
= (*bi1 
!= *biequal
); 
  77         errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__
, __LINE__
); 
  80     if(*bi2 
== *biequal 
|| *bi2 
== *bi1  
|| *biequal 
== *bi3
) 
  81         errln("%s:%d ERROR:2 RBBI's == and != operator  failed.", __FILE__
, __LINE__
); 
  84     // Quick test of RulesBasedBreakIterator assignment - 
  86     //    two different iterators are != 
  87     //    they are == after assignment 
  88     //    source and dest iterator produce the same next() after assignment. 
  89     //    deleting one doesn't disable the other. 
  90     logln("Testing assignment"); 
  91     RuleBasedBreakIterator 
*bix 
= (RuleBasedBreakIterator 
*)BreakIterator::createLineInstance(Locale::getDefault(), status
); 
  92     if(U_FAILURE(status
)){ 
  93         errcheckln(status
, "Fail : in construction - %s", u_errorName(status
)); 
  97     RuleBasedBreakIterator biDefault
, biDefault2
; 
  98     if(U_FAILURE(status
)){ 
  99         errln("%s:%d FAIL : in construction of default iterator", __FILE__
, __LINE__
); 
 102     if (biDefault 
== *bix
) { 
 103         errln("%s:%d ERROR: iterators should not compare ==", __FILE__
, __LINE__
); 
 106     if (biDefault 
!= biDefault2
) { 
 107         errln("%s:%d ERROR: iterators should compare ==", __FILE__
, __LINE__
); 
 112     UnicodeString   
HelloString("Hello Kitty"); 
 113     bix
->setText(HelloString
); 
 115         errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__
, __LINE__
); 
 119         errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__
, __LINE__
); 
 122     int bixnext 
= bix
->next(); 
 123     int bi2next 
= bi2
->next(); 
 124     if (! (bixnext 
== bi2next 
&& bixnext 
== 7)) { 
 125         errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__
, __LINE__
); 
 128     if (bi2
->next() != 8) { 
 129         errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__
, __LINE__
); 
 134     logln((UnicodeString
)"Testing clone()"); 
 135     RuleBasedBreakIterator
* bi1clone 
= bi1
->clone(); 
 136     RuleBasedBreakIterator
* bi2clone 
= bi2
->clone(); 
 138     if(*bi1clone 
!= *bi1 
|| *bi1clone  
!= *biequal  
|| 
 139       *bi1clone 
== *bi3 
|| *bi1clone 
== *bi2
) 
 140         errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__
, __LINE__
); 
 142     if(*bi2clone 
== *bi1 
|| *bi2clone 
== *biequal 
|| 
 143        *bi2clone 
== *bi3 
|| *bi2clone 
!= *bi2
) 
 144         errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__
, __LINE__
); 
 146     if(bi1
->getText() != bi1clone
->getText()   || 
 147        bi2clone
->getText() != bi2
->getText()   || 
 148        *bi2clone 
== *bi1clone 
) 
 149         errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__
, __LINE__
); 
 159 void RBBIAPITest::TestBoilerPlate() 
 161     UErrorCode status 
= U_ZERO_ERROR
; 
 162     BreakIterator
* a 
= BreakIterator::createWordInstance(Locale("hi"), status
); 
 163     BreakIterator
* b 
= BreakIterator::createWordInstance(Locale("hi_IN"),status
); 
 164     if (U_FAILURE(status
)) { 
 165         errcheckln(status
, "Creation of break iterator failed %s", u_errorName(status
)); 
 169         errln("Failed: boilerplate method operator!= does not return correct results"); 
 171     // Japanese word break iterators are identical to root with 
 172     // a dictionary-based break iterator 
 173     BreakIterator
* c 
= BreakIterator::createCharacterInstance(Locale("ja"),status
); 
 174     BreakIterator
* d 
= BreakIterator::createCharacterInstance(Locale("root"),status
); 
 177             errln("Failed: boilerplate method operator== does not return correct results"); 
 180         errln("creation of break iterator failed"); 
 188 void RBBIAPITest::TestgetRules() 
 190     UErrorCode status
=U_ZERO_ERROR
; 
 192     LocalPointer
<RuleBasedBreakIterator
> bi1( 
 193             (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
), status
); 
 194     LocalPointer
<RuleBasedBreakIterator
> bi2( 
 195             (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
), status
); 
 196     if(U_FAILURE(status
)){ 
 197         errcheckln(status
, "%s:%d, FAIL: in construction - %s", __FILE__
, __LINE__
, u_errorName(status
)); 
 201     logln((UnicodeString
)"Testing getRules()"); 
 203     UnicodeString 
text(u
"Hello there"); 
 206     LocalPointer 
<RuleBasedBreakIterator
> bi3(bi1
->clone()); 
 208     UnicodeString temp
=bi1
->getRules(); 
 209     UnicodeString temp2
=bi2
->getRules(); 
 210     UnicodeString temp3
=bi3
->getRules(); 
 211     if( temp2
.compare(temp3
) ==0 || temp
.compare(temp2
) == 0 || temp
.compare(temp3
) != 0) 
 212         errln("%s:%d ERROR: error in getRules() method", __FILE__
, __LINE__
); 
 214     RuleBasedBreakIterator bi4
;   // Default RuleBasedBreakIterator constructor gives empty shell with empty rules. 
 215     if (!bi4
.getRules().isEmpty()) { 
 216         errln("%s:%d Empty string expected.", __FILE__
, __LINE__
); 
 220 void RBBIAPITest::TestHashCode() 
 222     UErrorCode status
=U_ZERO_ERROR
; 
 223     RuleBasedBreakIterator
* bi1     
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
); 
 224     RuleBasedBreakIterator
* bi3     
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
); 
 225     RuleBasedBreakIterator
* bi2     
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
); 
 226     if(U_FAILURE(status
)){ 
 227         errcheckln(status
, "Fail : in construction - %s", u_errorName(status
)); 
 235     logln((UnicodeString
)"Testing hashCode()"); 
 237     bi1
->setText((UnicodeString
)"Hash code"); 
 238     bi2
->setText((UnicodeString
)"Hash code"); 
 239     bi3
->setText((UnicodeString
)"Hash code"); 
 241     RuleBasedBreakIterator
* bi1clone
= bi1
->clone(); 
 242     RuleBasedBreakIterator
* bi2clone
= bi2
->clone(); 
 244     if(bi1
->hashCode() != bi1clone
->hashCode() ||  bi1
->hashCode() != bi3
->hashCode() || 
 245         bi1clone
->hashCode() != bi3
->hashCode() || bi2
->hashCode() != bi2clone
->hashCode()) 
 246         errln((UnicodeString
)"ERROR: identical objects have different hashcodes"); 
 248     if(bi1
->hashCode() == bi2
->hashCode() ||  bi2
->hashCode() == bi3
->hashCode() || 
 249         bi1clone
->hashCode() == bi2clone
->hashCode() || bi1clone
->hashCode() == bi2
->hashCode()) 
 250         errln((UnicodeString
)"ERROR: different objects have same hashcodes"); 
 259 void RBBIAPITest::TestGetSetAdoptText() 
 261     logln((UnicodeString
)"Testing getText setText "); 
 262     IcuTestErrorCode 
status(*this, "TestGetSetAdoptText"); 
 263     UnicodeString str1
="first string."; 
 264     UnicodeString str2
="Second string."; 
 265     LocalPointer
<RuleBasedBreakIterator
> charIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
)); 
 266     LocalPointer
<RuleBasedBreakIterator
> wordIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
)); 
 267     if(status
.isFailure()){ 
 268         errcheckln(status
, "Fail : in construction - %s", status
.errorName()); 
 273     CharacterIterator
* text1
= new StringCharacterIterator(str1
); 
 274     CharacterIterator
* text1Clone 
= text1
->clone(); 
 275     CharacterIterator
* text2
= new StringCharacterIterator(str2
); 
 276     CharacterIterator
* text3
= new StringCharacterIterator(str2
, 3, 10, 3); //  "ond str" 
 278     wordIter1
->setText(str1
); 
 279     CharacterIterator 
*tci 
= &wordIter1
->getText(); 
 282     TEST_ASSERT(tstr 
== str1
); 
 283     if(wordIter1
->current() != 0) 
 284         errln((UnicodeString
)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n"); 
 288     wordIter1
->setText(str2
); 
 289     if(wordIter1
->current() != 0) 
 290         errln((UnicodeString
)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n"); 
 293     charIter1
->adoptText(text1Clone
); 
 294     TEST_ASSERT(wordIter1
->getText() != charIter1
->getText()); 
 295     tci 
= &wordIter1
->getText(); 
 297     TEST_ASSERT(tstr 
== str2
); 
 298     tci 
= &charIter1
->getText(); 
 300     TEST_ASSERT(tstr 
== str1
); 
 303     LocalPointer
<RuleBasedBreakIterator
> rb(wordIter1
->clone()); 
 304     rb
->adoptText(text1
); 
 305     if(rb
->getText() != *text1
) 
 306         errln((UnicodeString
)"ERROR:1 error in adoptText "); 
 307     rb
->adoptText(text2
); 
 308     if(rb
->getText() != *text2
) 
 309         errln((UnicodeString
)"ERROR:2 error in adoptText "); 
 311     // Adopt where iterator range is less than the entire orignal source string. 
 312     //   (With the change of the break engine to working with UText internally, 
 313     //    CharacterIterators starting at positions other than zero are not supported) 
 314     rb
->adoptText(text3
); 
 315     TEST_ASSERT(rb
->preceding(2) == 0); 
 316     TEST_ASSERT(rb
->following(11) == BreakIterator::DONE
); 
 317     //if(rb->preceding(2) != 3) { 
 318     //    errln((UnicodeString)"ERROR:3 error in adoptText "); 
 320     //if(rb->following(11) != BreakIterator::DONE) { 
 321     //    errln((UnicodeString)"ERROR:4 error in adoptText "); 
 326     //   Quick test to see if UText is working at all. 
 328     const char *s1 
= "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ 
 329     const char *s2 
= "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ 
 333     LocalUTextPointer 
ut(utext_openUTF8(NULL
, s1
, -1, status
)); 
 334     wordIter1
->setText(ut
.getAlias(), status
); 
 335     TEST_ASSERT_SUCCESS(status
); 
 338     pos 
= wordIter1
->first(); 
 340     pos 
= wordIter1
->next(); 
 342     pos 
= wordIter1
->next(); 
 344     pos 
= wordIter1
->next(); 
 345     TEST_ASSERT(pos
==11); 
 346     pos 
= wordIter1
->next(); 
 347     TEST_ASSERT(pos
==UBRK_DONE
); 
 350     LocalUTextPointer 
ut2(utext_openUTF8(NULL
, s2
, -1, status
)); 
 351     TEST_ASSERT_SUCCESS(status
); 
 352     wordIter1
->setText(ut2
.getAlias(), status
); 
 353     TEST_ASSERT_SUCCESS(status
); 
 355     pos 
= wordIter1
->first(); 
 357     pos 
= wordIter1
->next(); 
 359     pos 
= wordIter1
->next(); 
 362     pos 
= wordIter1
->last(); 
 364     pos 
= wordIter1
->previous(); 
 366     pos 
= wordIter1
->previous(); 
 368     pos 
= wordIter1
->previous(); 
 370     pos 
= wordIter1
->previous(); 
 371     TEST_ASSERT(pos
==UBRK_DONE
); 
 374     UnicodeString sEmpty
; 
 375     LocalUTextPointer 
gut2(utext_openUnicodeString(NULL
, &sEmpty
, status
)); 
 376     wordIter1
->getUText(gut2
.getAlias(), status
); 
 377     TEST_ASSERT_SUCCESS(status
); 
 382 void RBBIAPITest::TestIteration() 
 384     // This test just verifies that the API is present. 
 385     // Testing for correct operation of the break rules happens elsewhere. 
 387     UErrorCode status
=U_ZERO_ERROR
; 
 388     RuleBasedBreakIterator
* bi  
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
); 
 389     if (U_FAILURE(status
) || bi 
== NULL
)  { 
 390         errcheckln(status
, "Failure creating character break iterator.  Status = %s", u_errorName(status
)); 
 395     bi  
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
); 
 396     if (U_FAILURE(status
) || bi 
== NULL
)  { 
 397         errcheckln(status
, "Failure creating Word break iterator.  Status = %s", u_errorName(status
)); 
 402     bi  
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status
); 
 403     if (U_FAILURE(status
) || bi 
== NULL
)  { 
 404         errcheckln(status
, "Failure creating Line break iterator.  Status = %s", u_errorName(status
)); 
 409     bi  
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status
); 
 410     if (U_FAILURE(status
) || bi 
== NULL
)  { 
 411         errcheckln(status
, "Failure creating Sentence break iterator.  Status = %s", u_errorName(status
)); 
 416     bi  
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status
); 
 417     if (U_FAILURE(status
) || bi 
== NULL
)  { 
 418         errcheckln(status
, "Failure creating Title break iterator.  Status = %s", u_errorName(status
)); 
 423     bi  
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
); 
 424     if (U_FAILURE(status
) || bi 
== NULL
)  { 
 425         errcheckln(status
, "Failure creating character break iterator.  Status = %s", u_errorName(status
)); 
 426         return;   // Skip the rest of these tests. 
 430     UnicodeString testString
="0123456789"; 
 431     bi
->setText(testString
); 
 436         errln("%s:%d Incorrect value from bi->first().  Expected 0, got %d.", __FILE__
, __LINE__
, i
); 
 441         errln("%s:%d Incorrect value from bi->last().  Expected 10, got %d", __FILE__
, __LINE__
, i
); 
 450         errln("%s:%d Incorrect value from bi->last().  Expected 9, got %d", __FILE__
, __LINE__
, i
); 
 456     if (i 
!= BreakIterator::DONE
) { 
 457         errln("%s:%d Incorrect value from bi->previous().  Expected DONE, got %d", __FILE__
, __LINE__
, i
); 
 466         errln("%s:%d Incorrect value from bi->next().  Expected 1, got %d", __FILE__
, __LINE__
, i
); 
 471     if (i 
!= BreakIterator::DONE
) { 
 472         errln("%s:%d Incorrect value from bi->next().  Expected DONE, got %d", __FILE__
, __LINE__
, i
); 
 482         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__
, __LINE__
, i
); 
 488         errln("%s:%d Incorrect value from bi->current().  Expected 1, got %d", __FILE__
, __LINE__
, i
); 
 495         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__
, __LINE__
, i
); 
 502         errln("%s:%d Incorrect value from bi->current().  Expected 0, got %d", __FILE__
, __LINE__
, i
); 
 509     i 
= bi
->following(4); 
 511         errln("%s:%d Incorrect value from bi->following().  Expected 5, got %d", __FILE__
, __LINE__
, i
); 
 514     i 
= bi
->following(9); 
 516         errln("%s:%d Incorrect value from bi->following().  Expected 10, got %d", __FILE__
, __LINE__
, i
); 
 519     i 
= bi
->following(10); 
 520     if (i 
!= BreakIterator::DONE
) { 
 521         errln("%s:%d Incorrect value from bi->following().  Expected DONE, got %d", __FILE__
, __LINE__
, i
); 
 528     i 
= bi
->preceding(4); 
 530         errln("%s:%d Incorrect value from bi->preceding().  Expected 3, got %d", __FILE__
, __LINE__
, i
); 
 533     i 
= bi
->preceding(10); 
 535         errln("%s:%d Incorrect value from bi->preceding().  Expected 9, got %d", __FILE__
, __LINE__
, i
); 
 538     i 
= bi
->preceding(1); 
 540         errln("%s:%d Incorrect value from bi->preceding().  Expected 0, got %d", __FILE__
, __LINE__
, i
); 
 543     i 
= bi
->preceding(0); 
 544     if (i 
!= BreakIterator::DONE
) { 
 545         errln("%s:%d Incorrect value from bi->preceding().  Expected DONE, got %d", __FILE__
, __LINE__
, i
); 
 553     if (bi
->isBoundary(3) != TRUE
) { 
 554         errln("%s:%d Incorrect value from bi->isBoudary().  Expected TRUE, got FALSE", __FILE__
, __LINE__
, i
); 
 558         errln("%s:%d Incorrect value from bi->current().  Expected 3, got %d", __FILE__
, __LINE__
, i
); 
 562     if (bi
->isBoundary(11) != FALSE
) { 
 563         errln("%s:%d Incorrect value from bi->isBoudary().  Expected FALSE, got TRUE", __FILE__
, __LINE__
, i
); 
 567         errln("%s:%d Incorrect value from bi->current().  Expected 10, got %d", __FILE__
, __LINE__
, i
); 
 576         errln("%s:%d Incorrect value from bi->next().  Expected 4, got %d", __FILE__
, __LINE__
, i
); 
 581         errln("%s:%d Incorrect value from bi->next().  Expected 10, got %d", __FILE__
, __LINE__
, i
); 
 586     if (i 
!= BreakIterator::DONE
) { 
 587         errln("%s:%d Incorrect value from bi->next().  Expected BreakIterator::DONE, got %d", __FILE__
, __LINE__
, i
); 
 599 void RBBIAPITest::TestBuilder() { 
 600      UnicodeString rulesString1 
= "$Letters = [:L:];\n" 
 601                                   "$Numbers = [:N:];\n" 
 604                                   "[^$Letters $Numbers];\n" 
 606      UnicodeString testString1  
= "abc123..abc"; 
 608      int32_t bounds1
[] = {0, 3, 6, 7, 8, 11}; 
 609      UErrorCode status
=U_ZERO_ERROR
; 
 610      UParseError    parseError
; 
 612      RuleBasedBreakIterator 
*bi 
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
); 
 613      if(U_FAILURE(status
)) { 
 614          dataerrln("Fail : in construction - %s", u_errorName(status
)); 
 616          bi
->setText(testString1
); 
 617          doBoundaryTest(*bi
, testString1
, bounds1
); 
 625 //       Single quotes within rules imply a grouping, so that a modifier 
 626 //       following the quoted text (* or +) applies to all of the quoted chars. 
 628 void RBBIAPITest::TestQuoteGrouping() { 
 629      UnicodeString rulesString1 
= "#Here comes the rule...\n" 
 630                                   "'$@!'*;\n"   //  (\$\@\!)* 
 633      UnicodeString testString1  
= "$@!$@!X$@!!X"; 
 635      int32_t bounds1
[] = {0, 6, 7, 10, 11, 12}; 
 636      UErrorCode status
=U_ZERO_ERROR
; 
 637      UParseError    parseError
; 
 639      RuleBasedBreakIterator 
*bi 
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
); 
 640      if(U_FAILURE(status
)) { 
 641          dataerrln("Fail : in construction - %s", u_errorName(status
)); 
 643          bi
->setText(testString1
); 
 644          doBoundaryTest(*bi
, testString1
, bounds1
); 
 651 //      Test word break rule status constants. 
 653 void RBBIAPITest::TestRuleStatus() { 
 655      //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing 
 656      // changed UBRK_WORD_KANA to UBRK_WORD_IDEO 
 657      u_unescape("plain word 123.45 \\u30a1\\u30a2 ", 
 658               // 012345678901234567  8      9    0 
 661      UnicodeString 
testString1(str
); 
 662      int32_t bounds1
[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; 
 663      int32_t tag_lo
[]  = {UBRK_WORD_NONE
,     UBRK_WORD_LETTER
, UBRK_WORD_NONE
,    UBRK_WORD_LETTER
, 
 664                           UBRK_WORD_NONE
,     UBRK_WORD_NUMBER
, UBRK_WORD_NONE
, 
 665                           UBRK_WORD_IDEO
,     UBRK_WORD_NONE
}; 
 667      int32_t tag_hi
[]  = {UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
, 
 668                           UBRK_WORD_NONE_LIMIT
, UBRK_WORD_NUMBER_LIMIT
, UBRK_WORD_NONE_LIMIT
, 
 669                           UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_NONE_LIMIT
}; 
 671      UErrorCode status
=U_ZERO_ERROR
; 
 673      BreakIterator 
*bi 
= BreakIterator::createWordInstance(Locale::getEnglish(), status
); 
 674      if(U_FAILURE(status
)) { 
 675          errcheckln(status
, "%s:%d Fail in construction - %s", __FILE__
, __LINE__
, u_errorName(status
)); 
 677          bi
->setText(testString1
); 
 678          // First test that the breaks are in the right spots. 
 679          doBoundaryTest(*bi
, testString1
, bounds1
); 
 681          // Then go back and check tag values 
 684          for (pos 
= bi
->first(); pos 
!= BreakIterator::DONE
; pos 
= bi
->next(), i
++) { 
 685              if (pos 
!= bounds1
[i
]) { 
 686                  errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__
, __LINE__
, pos
); 
 689              tag 
= bi
->getRuleStatus(); 
 690              if (tag 
< tag_lo
[i
] || tag 
>= tag_hi
[i
]) { 
 691                  errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__
, __LINE__
, tag
, pos
); 
 695              // Check that we get the same tag values from getRuleStatusVec() 
 697              int t 
= bi
->getRuleStatusVec(vec
, 10, status
); 
 698              TEST_ASSERT_SUCCESS(status
); 
 700              TEST_ASSERT(vec
[0] == tag
); 
 705      // Now test line break status.  This test mostly is to confirm that the status constants 
 706      //                              are correctly declared in the header. 
 707      testString1 
=   "test line. \n"; 
 710      bi 
= BreakIterator::createLineInstance(Locale::getEnglish(), status
); 
 711      if(U_FAILURE(status
)) { 
 712          errcheckln(status
, "%s:%d failed to create line break iterator. - %s", __FILE__
, __LINE__
, u_errorName(status
)); 
 718          bi
->setText(testString1
); 
 720          tag 
= bi
->getRuleStatus(); 
 721          for (i
=0; i
<3; i
++) { 
 724                  success 
= pos
==0  && tag
==UBRK_LINE_SOFT
; break; 
 726                  success 
= pos
==5  && tag
==UBRK_LINE_SOFT
; break; 
 728                  success 
= pos
==12 && tag
==UBRK_LINE_HARD
; break; 
 730                  success 
= FALSE
; break; 
 732              if (success 
== FALSE
) { 
 733                  errln("%s:%d: incorrect line break status or position.  i=%d, pos=%d, tag=%d", 
 734                      __FILE__
, __LINE__
, i
, pos
, tag
); 
 738              tag 
= bi
->getRuleStatus(); 
 740          if (UBRK_LINE_SOFT 
>= UBRK_LINE_SOFT_LIMIT 
|| 
 741              UBRK_LINE_HARD 
>= UBRK_LINE_HARD_LIMIT 
|| 
 742              (UBRK_LINE_HARD 
> UBRK_LINE_SOFT 
&& UBRK_LINE_HARD 
< UBRK_LINE_SOFT_LIMIT
)) { 
 743              errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__
, __LINE__
); 
 753 //      Test the vector form of  break rule status. 
 755 void RBBIAPITest::TestRuleStatusVec() { 
 756     UnicodeString 
rulesString(   "[A-N]{100}; \n" 
 761                                   "!.*;\n", -1, US_INV
); 
 762      UnicodeString testString1  
= "Aapz5?"; 
 763      int32_t  statusVals
[10]; 
 767      UErrorCode status
=U_ZERO_ERROR
; 
 768      UParseError    parseError
; 
 770      RuleBasedBreakIterator 
*bi 
= new RuleBasedBreakIterator(rulesString
, parseError
, status
); 
 771      if (U_FAILURE(status
)) { 
 772          dataerrln("Failure at file %s, line %d, error = %s", __FILE__
, __LINE__
, u_errorName(status
)); 
 774          bi
->setText(testString1
); 
 779          numStatuses 
= bi
->getRuleStatusVec(statusVals
, 10, status
); 
 780          TEST_ASSERT_SUCCESS(status
); 
 781          TEST_ASSERT(numStatuses 
== 2); 
 782          TEST_ASSERT(statusVals
[0] == 100); 
 783          TEST_ASSERT(statusVals
[1] == 300); 
 788          numStatuses 
= bi
->getRuleStatusVec(statusVals
, 10, status
); 
 789          TEST_ASSERT_SUCCESS(status
); 
 790          TEST_ASSERT(numStatuses 
== 2); 
 791          TEST_ASSERT(statusVals
[0] == 200); 
 792          TEST_ASSERT(statusVals
[1] == 300); 
 797          numStatuses 
= bi
->getRuleStatusVec(statusVals
, 10, status
); 
 798          TEST_ASSERT_SUCCESS(status
); 
 799          TEST_ASSERT(numStatuses 
== 2); 
 800          TEST_ASSERT(statusVals
[0] == 200); 
 801          TEST_ASSERT(statusVals
[1] == 300); 
 806          numStatuses 
= bi
->getRuleStatusVec(statusVals
, 10, status
); 
 807          TEST_ASSERT_SUCCESS(status
); 
 808          TEST_ASSERT(numStatuses 
== 1); 
 809          TEST_ASSERT(statusVals
[0] == 300); 
 814          numStatuses 
= bi
->getRuleStatusVec(statusVals
, 10, status
); 
 815          TEST_ASSERT_SUCCESS(status
); 
 816          TEST_ASSERT(numStatuses 
== 2); 
 817          TEST_ASSERT(statusVals
[0] == 400); 
 818          TEST_ASSERT(statusVals
[1] == 500); 
 823          numStatuses 
= bi
->getRuleStatusVec(statusVals
, 10, status
); 
 824          TEST_ASSERT_SUCCESS(status
); 
 825          TEST_ASSERT(numStatuses 
== 1); 
 826          TEST_ASSERT(statusVals
[0] == 0); 
 829          //  Check buffer overflow error handling.   Char == A 
 834          memset(statusVals
, -1, sizeof(statusVals
)); 
 835          numStatuses 
= bi
->getRuleStatusVec(statusVals
, 0, status
); 
 836          TEST_ASSERT(status 
== U_BUFFER_OVERFLOW_ERROR
); 
 837          TEST_ASSERT(numStatuses 
== 2); 
 838          TEST_ASSERT(statusVals
[0] == -1); 
 840          status 
= U_ZERO_ERROR
; 
 841          memset(statusVals
, -1, sizeof(statusVals
)); 
 842          numStatuses 
= bi
->getRuleStatusVec(statusVals
, 1, status
); 
 843          TEST_ASSERT(status 
== U_BUFFER_OVERFLOW_ERROR
); 
 844          TEST_ASSERT(numStatuses 
== 2); 
 845          TEST_ASSERT(statusVals
[0] == 100); 
 846          TEST_ASSERT(statusVals
[1] == -1); 
 848          status 
= U_ZERO_ERROR
; 
 849          memset(statusVals
, -1, sizeof(statusVals
)); 
 850          numStatuses 
= bi
->getRuleStatusVec(statusVals
, 2, status
); 
 851          TEST_ASSERT_SUCCESS(status
); 
 852          TEST_ASSERT(numStatuses 
== 2); 
 853          TEST_ASSERT(statusVals
[0] == 100); 
 854          TEST_ASSERT(statusVals
[1] == 300); 
 855          TEST_ASSERT(statusVals
[2] == -1); 
 862 //   Bug 2190 Regression test.   Builder crash on rule consisting of only a 
 863 //                               $variable reference 
 864 void RBBIAPITest::TestBug2190() { 
 865      UnicodeString rulesString1 
= "$aaa = abcd;\n" 
 868      UnicodeString testString1  
= "abcdabcd"; 
 870      int32_t bounds1
[] = {0, 4, 8}; 
 871      UErrorCode status
=U_ZERO_ERROR
; 
 872      UParseError    parseError
; 
 874      RuleBasedBreakIterator 
*bi 
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
); 
 875      if(U_FAILURE(status
)) { 
 876          dataerrln("Fail : in construction - %s", u_errorName(status
)); 
 878          bi
->setText(testString1
); 
 879          doBoundaryTest(*bi
, testString1
, bounds1
); 
 885 void RBBIAPITest::TestRegistration() { 
 886 #if !UCONFIG_NO_SERVICE 
 887     UErrorCode status 
= U_ZERO_ERROR
; 
 888     BreakIterator
* ja_word 
= BreakIterator::createWordInstance("ja_JP", status
); 
 889     // ok to not delete these if we exit because of error? 
 890     BreakIterator
* ja_char 
= BreakIterator::createCharacterInstance("ja_JP", status
); 
 891     BreakIterator
* root_word 
= BreakIterator::createWordInstance("", status
); 
 892     BreakIterator
* root_char 
= BreakIterator::createCharacterInstance("", status
); 
 894     if (status 
== U_MISSING_RESOURCE_ERROR 
|| status 
== U_FILE_ACCESS_ERROR
) { 
 895         dataerrln("Error creating instances of break interactors - %s", u_errorName(status
)); 
 905     URegistryKey key 
= BreakIterator::registerInstance(ja_word
, "xx", UBRK_WORD
, status
); 
 907 #if 0 // With a dictionary based word breaking, ja_word is identical to root. 
 908         if (ja_word 
&& *ja_word 
== *root_word
) { 
 909             errln("japan not different from root"); 
 915         BreakIterator
* result 
= BreakIterator::createWordInstance("xx_XX", status
); 
 918             fail 
= *result 
!= *ja_word
; 
 922             errln("bad result for xx_XX/word"); 
 927         BreakIterator
* result 
= BreakIterator::createCharacterInstance("ja_JP", status
); 
 930             fail 
= *result 
!= *ja_char
; 
 934             errln("bad result for ja_JP/char"); 
 939         BreakIterator
* result 
= BreakIterator::createCharacterInstance("xx_XX", status
); 
 942             fail 
= *result 
!= *root_char
; 
 946             errln("bad result for xx_XX/char"); 
 951         StringEnumeration
* avail 
= BreakIterator::getAvailableLocales(); 
 953         const UnicodeString
* p
; 
 954         while ((p 
= avail
->snext(status
))) { 
 955             if (p
->compare("xx") == 0) { 
 962             errln("did not find test locale"); 
 967         UBool unreg 
= BreakIterator::unregister(key
, status
); 
 969             errln("unable to unregister"); 
 974         BreakIterator
* result 
= BreakIterator::createWordInstance("en_US", status
); 
 975         BreakIterator
* root 
= BreakIterator::createWordInstance("", status
); 
 978           fail 
= *root 
!= *result
; 
 983             errln("did not get root break"); 
 988         StringEnumeration
* avail 
= BreakIterator::getAvailableLocales(); 
 990         const UnicodeString
* p
; 
 991         while ((p 
= avail
->snext(status
))) { 
 992             if (p
->compare("xx") == 0) { 
 999             errln("found test locale"); 
1005         UBool   foundLocale 
= FALSE
; 
1006         const Locale 
*avail 
= BreakIterator::getAvailableLocales(count
); 
1007         for (int i
=0; i
<count
; i
++) { 
1008             if (avail
[i
] == Locale::getEnglish()) { 
1013         if (foundLocale 
== FALSE
) { 
1014             errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); 
1019     // ja_word was adopted by factory 
1026 void RBBIAPITest::RoundtripRule(const char *dataFile
) { 
1027     UErrorCode status 
= U_ZERO_ERROR
; 
1028     UParseError parseError
; 
1029     parseError
.line 
= 0; 
1030     parseError
.offset 
= 0; 
1031     LocalUDataMemoryPointer 
data(udata_open(U_ICUDATA_BRKITR
, "brk", dataFile
, &status
)); 
1033     const UChar 
*builtSource
; 
1034     const uint8_t *rbbiRules
; 
1035     const uint8_t *builtRules
; 
1037     if (U_FAILURE(status
)) { 
1038         errcheckln(status
, "%s:%d Can't open \"%s\" - %s", __FILE__
, __LINE__
, dataFile
, u_errorName(status
)); 
1042     builtRules 
= (const uint8_t *)udata_getMemory(data
.getAlias()); 
1043     builtSource 
= (const UChar 
*)(builtRules 
+ ((RBBIDataHeader
*)builtRules
)->fRuleSource
); 
1044     LocalPointer
<RuleBasedBreakIterator
> brkItr (new RuleBasedBreakIterator(builtSource
, parseError
, status
)); 
1045     if (U_FAILURE(status
)) { 
1046         errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\"  at line %d, column %d\n", 
1047                 __FILE__
, __LINE__
, u_errorName(status
), parseError
.line
, parseError
.offset
); 
1048         errln(UnicodeString(builtSource
)); 
1051     rbbiRules 
= brkItr
->getBinaryRules(length
); 
1052     logln("Comparing \"%s\" len=%d", dataFile
, length
); 
1053     if (memcmp(builtRules
, rbbiRules
, (int32_t)length
) != 0) { 
1054         errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__
, __LINE__
, dataFile
); 
1059 void RBBIAPITest::TestRoundtripRules() { 
1060     RoundtripRule("word"); 
1061     RoundtripRule("title"); 
1062     RoundtripRule("sent"); 
1063     RoundtripRule("line"); 
1064     RoundtripRule("char"); 
1066         RoundtripRule("word_POSIX"); 
1071 // Check getBinaryRules() and construction of a break iterator from those rules. 
1073 void RBBIAPITest::TestGetBinaryRules() { 
1074     UErrorCode status
=U_ZERO_ERROR
; 
1075     LocalPointer
<BreakIterator
> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status
)); 
1076     if (U_FAILURE(status
)) { 
1077         dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status
)); 
1080     RuleBasedBreakIterator 
*rbbi 
= dynamic_cast<RuleBasedBreakIterator 
*>(bi
.getAlias()); 
1082         dataerrln("FAIL: RuleBasedBreakIterator is NULL"); 
1086     // Check that the new line break iterator is nominally functional. 
1087     UnicodeString 
helloWorld("Hello, World!"); 
1088     rbbi
->setText(helloWorld
); 
1090     while (bi
->next() != UBRK_DONE
) { 
1093     TEST_ASSERT(n 
== 2); 
1095     // Extract the binary rules as a uint8_t blob. 
1096     uint32_t ruleLength
; 
1097     const uint8_t *binRules 
= rbbi
->getBinaryRules(ruleLength
); 
1098     TEST_ASSERT(ruleLength 
> 0); 
1099     TEST_ASSERT(binRules 
!= NULL
); 
1101     // Clone the binary rules, and create a break iterator from that. 
1102     // The break iterator does not adopt the rules; we must delete when we are finished with the iterator. 
1103     uint8_t *clonedRules 
= new uint8_t[ruleLength
]; 
1104     memcpy(clonedRules
, binRules
, ruleLength
); 
1105     RuleBasedBreakIterator 
clonedBI(clonedRules
, ruleLength
, status
); 
1106     TEST_ASSERT_SUCCESS(status
); 
1108     // Check that the cloned line break iterator is nominally alive. 
1109     clonedBI
.setText(helloWorld
); 
1111     while (clonedBI
.next() != UBRK_DONE
) { 
1114     TEST_ASSERT(n 
== 2); 
1116     delete[] clonedRules
; 
1120 void RBBIAPITest::TestRefreshInputText() { 
1122      *  RefreshInput changes out the input of a Break Iterator without 
1123      *    changing anything else in the iterator's state.  Used with Java JNI, 
1124      *    when Java moves the underlying string storage.   This test 
1125      *    runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence. 
1126      *    The right set of boundaries should still be found. 
1128     UChar testStr
[]  = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0};  /* = " A B C D"  */ 
1129     UChar movedStr
[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,  0}; 
1130     UErrorCode status 
= U_ZERO_ERROR
; 
1131     UText ut1 
= UTEXT_INITIALIZER
; 
1132     UText ut2 
= UTEXT_INITIALIZER
; 
1133     RuleBasedBreakIterator 
*bi 
= (RuleBasedBreakIterator 
*)BreakIterator::createLineInstance(Locale::getEnglish(), status
); 
1134     TEST_ASSERT_SUCCESS(status
); 
1136     utext_openUChars(&ut1
, testStr
, -1, &status
); 
1137     TEST_ASSERT_SUCCESS(status
); 
1139     if (U_SUCCESS(status
)) { 
1140         bi
->setText(&ut1
, status
); 
1141         TEST_ASSERT_SUCCESS(status
); 
1143         /* Line boundaries will occur before each letter in the original string */ 
1144         TEST_ASSERT(1 == bi
->next()); 
1145         TEST_ASSERT(3 == bi
->next()); 
1147         /* Move the string, kill the original string.  */ 
1148         u_strcpy(movedStr
, testStr
); 
1149         u_memset(testStr
, 0x20, u_strlen(testStr
)); 
1150         utext_openUChars(&ut2
, movedStr
, -1, &status
); 
1151         TEST_ASSERT_SUCCESS(status
); 
1152         RuleBasedBreakIterator 
*returnedBI 
= &bi
->refreshInputText(&ut2
, status
); 
1153         TEST_ASSERT_SUCCESS(status
); 
1154         TEST_ASSERT(bi 
== returnedBI
); 
1156         /* Find the following matches, now working in the moved string. */ 
1157         TEST_ASSERT(5 == bi
->next()); 
1158         TEST_ASSERT(7 == bi
->next()); 
1159         TEST_ASSERT(8 == bi
->next()); 
1160         TEST_ASSERT(UBRK_DONE 
== bi
->next()); 
1169 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION 
1170 static void prtbrks(BreakIterator
* brk
, const UnicodeString 
&ustr
, IntlTest 
&it
) { 
1171   static const UChar PILCROW
=0x00B6, CHSTR
=0x3010, CHEND
=0x3011; // lenticular brackets 
1172   it
.logln(UnicodeString("String:'")+ustr
+UnicodeString("'")); 
1174   int32_t *pos 
= new int32_t[ustr
.length()]; 
1175   int32_t posCount 
= 0; 
1177   // calculate breaks up front, so we can print out 
1178   // sans any debugging 
1179   for(int32_t n 
= 0; (n
=brk
->next())!=UBRK_DONE
; ) { 
1180     pos
[posCount
++] = n
; 
1181     if(posCount
>=ustr
.length()) { 
1182       it
.errln("brk count exceeds string length!"); 
1187   out
.append((UChar
)CHSTR
); 
1189   for(int32_t i
=0;i
<posCount
;i
++) { 
1191     out
.append(ustr
.tempSubString(prev
,n
-prev
)); 
1192     out
.append((UChar
)PILCROW
); 
1195   out
.append(ustr
.tempSubString(prev
,ustr
.length()-prev
)); 
1196   out
.append((UChar
)CHEND
); 
1200   for(int32_t i
=0;i
<posCount
;i
++) { 
1202     sprintf(tmp
,"%d ",pos
[i
]); 
1203     out
.append(UnicodeString(tmp
)); 
1210 void RBBIAPITest::TestFilteredBreakIteratorBuilder() { 
1211 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION 
1212   UErrorCode status 
= U_ZERO_ERROR
; 
1213   LocalPointer
<FilteredBreakIteratorBuilder
> builder
; 
1214   LocalPointer
<BreakIterator
> baseBI
; 
1215   LocalPointer
<BreakIterator
> filteredBI
; 
1216   LocalPointer
<BreakIterator
> frenchBI
; 
1218   const UnicodeString 
text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited. 
1219   const UnicodeString 
ABBR_MR("Mr."); 
1220   const UnicodeString 
ABBR_CAPT("Capt."); 
1223     logln("Constructing empty builder\n"); 
1224     builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
)); 
1225     TEST_ASSERT_SUCCESS(status
); 
1227     logln("Constructing base BI\n"); 
1228     baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
)); 
1229     TEST_ASSERT_SUCCESS(status
); 
1231         logln("Building new BI\n"); 
1232     filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
)); 
1233     TEST_ASSERT_SUCCESS(status
); 
1235         if (U_SUCCESS(status
)) { 
1237         filteredBI
->setText(text
); 
1238         TEST_ASSERT(20 == filteredBI
->next()); // Mr. 
1239         TEST_ASSERT(84 == filteredBI
->next()); // recovered. 
1240         TEST_ASSERT(90 == filteredBI
->next()); // Capt. 
1241         TEST_ASSERT(181 == filteredBI
->next()); // Mr. 
1242         TEST_ASSERT(278 == filteredBI
->next()); // charge. 
1243         filteredBI
->first(); 
1244         prtbrks(filteredBI
.getAlias(), text
, *this); 
1249     logln("Constructing empty builder\n"); 
1250     builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
)); 
1251     TEST_ASSERT_SUCCESS(status
); 
1253     if (U_SUCCESS(status
)) { 
1254         logln("Adding Mr. as an exception\n"); 
1255         TEST_ASSERT(TRUE 
== builder
->suppressBreakAfter(ABBR_MR
, status
)); 
1256         TEST_ASSERT(FALSE 
== builder
->suppressBreakAfter(ABBR_MR
, status
)); // already have it 
1257         TEST_ASSERT(TRUE 
== builder
->unsuppressBreakAfter(ABBR_MR
, status
)); 
1258         TEST_ASSERT(FALSE 
== builder
->unsuppressBreakAfter(ABBR_MR
, status
)); // already removed it 
1259         TEST_ASSERT(TRUE 
== builder
->suppressBreakAfter(ABBR_MR
, status
)); 
1260         TEST_ASSERT_SUCCESS(status
); 
1262         logln("Constructing base BI\n"); 
1263         baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
)); 
1264         TEST_ASSERT_SUCCESS(status
); 
1266         logln("Building new BI\n"); 
1267         filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
)); 
1268         TEST_ASSERT_SUCCESS(status
); 
1271         filteredBI
->setText(text
); 
1272         TEST_ASSERT(84 == filteredBI
->next()); 
1273         TEST_ASSERT(90 == filteredBI
->next());// Capt. 
1274         TEST_ASSERT(278 == filteredBI
->next()); 
1275         filteredBI
->first(); 
1276         prtbrks(filteredBI
.getAlias(), text
, *this); 
1282     logln("Constructing empty builder\n"); 
1283     builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status
)); 
1284     TEST_ASSERT_SUCCESS(status
); 
1286     if (U_SUCCESS(status
)) { 
1287         logln("Adding Mr. and Capt as an exception\n"); 
1288         TEST_ASSERT(TRUE 
== builder
->suppressBreakAfter(ABBR_MR
, status
)); 
1289         TEST_ASSERT(TRUE 
== builder
->suppressBreakAfter(ABBR_CAPT
, status
)); 
1290         TEST_ASSERT_SUCCESS(status
); 
1292         logln("Constructing base BI\n"); 
1293         baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
)); 
1294         TEST_ASSERT_SUCCESS(status
); 
1296         logln("Building new BI\n"); 
1297         filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
)); 
1298         TEST_ASSERT_SUCCESS(status
); 
1301         filteredBI
->setText(text
); 
1302         TEST_ASSERT(84 == filteredBI
->next()); 
1303         TEST_ASSERT(278 == filteredBI
->next()); 
1304         filteredBI
->first(); 
1305         prtbrks(filteredBI
.getAlias(), text
, *this); 
1311     logln("Constructing English builder\n"); 
1312     builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
)); 
1313     TEST_ASSERT_SUCCESS(status
); 
1315     logln("Constructing base BI\n"); 
1316     baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
)); 
1317     TEST_ASSERT_SUCCESS(status
); 
1319     if (U_SUCCESS(status
)) { 
1320         logln("unsuppressing 'Capt'"); 
1321         TEST_ASSERT(TRUE 
== builder
->unsuppressBreakAfter(ABBR_CAPT
, status
)); 
1323         logln("Building new BI\n"); 
1324         filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
)); 
1325         TEST_ASSERT_SUCCESS(status
); 
1327         if(filteredBI
.isValid()) { 
1329           filteredBI
->setText(text
); 
1330           TEST_ASSERT(84 == filteredBI
->next()); 
1331           TEST_ASSERT(90 == filteredBI
->next()); 
1332           TEST_ASSERT(278 == filteredBI
->next()); 
1333           filteredBI
->first(); 
1334           prtbrks(filteredBI
.getAlias(), text
, *this); 
1341     logln("Constructing English builder\n"); 
1342     builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status
)); 
1343     TEST_ASSERT_SUCCESS(status
); 
1345     logln("Constructing base BI\n"); 
1346     baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status
)); 
1347     TEST_ASSERT_SUCCESS(status
); 
1349     if (U_SUCCESS(status
)) { 
1350         logln("Building new BI\n"); 
1351         filteredBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
)); 
1352         TEST_ASSERT_SUCCESS(status
); 
1354         if(filteredBI
.isValid()) { 
1356           filteredBI
->setText(text
); 
1357           TEST_ASSERT(84 == filteredBI
->next()); 
1358           TEST_ASSERT(278 == filteredBI
->next()); 
1359           filteredBI
->first(); 
1360           prtbrks(filteredBI
.getAlias(), text
, *this); 
1365   // reenable once french is in 
1367     logln("Constructing French builder"); 
1368     builder
.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status
)); 
1369     TEST_ASSERT_SUCCESS(status
); 
1371     logln("Constructing base BI\n"); 
1372     baseBI
.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status
)); 
1373     TEST_ASSERT_SUCCESS(status
); 
1375     if (U_SUCCESS(status
)) { 
1376         logln("Building new BI\n"); 
1377         frenchBI
.adoptInstead(builder
->build(baseBI
.orphan(), status
)); 
1378         TEST_ASSERT_SUCCESS(status
); 
1381     if(frenchBI
.isValid()) { 
1383       UnicodeString 
frText("C'est MM. Duval."); 
1384       frenchBI
->setText(frText
); 
1385       TEST_ASSERT(16 == frenchBI
->next()); 
1386       TEST_ASSERT(BreakIterator::DONE 
== frenchBI
->next()); 
1388       prtbrks(frenchBI
.getAlias(), frText
, *this); 
1389       logln("Testing against English:"); 
1390       filteredBI
->setText(frText
); 
1391       TEST_ASSERT(10 == filteredBI
->next()); // wrong for french, but filterBI is english. 
1392       TEST_ASSERT(16 == filteredBI
->next()); 
1393       TEST_ASSERT(BreakIterator::DONE 
== filteredBI
->next()); 
1394       filteredBI
->first(); 
1395       prtbrks(filteredBI
.getAlias(), frText
, *this); 
1398       assertTrue(WHERE
, *frenchBI   
== *frenchBI
); 
1399       assertTrue(WHERE
, *filteredBI 
!= *frenchBI
); 
1400       assertTrue(WHERE
, *frenchBI   
!= *filteredBI
); 
1402       dataerrln("French BI: not valid."); 
1407   logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION"); 
1411 //--------------------------------------------- 
1413 //--------------------------------------------- 
1415 void RBBIAPITest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ ) 
1417     if (exec
) logln((UnicodeString
)"TestSuite RuleBasedBreakIterator API "); 
1418     TESTCASE_AUTO_BEGIN
; 
1419 #if !UCONFIG_NO_FILE_IO 
1420     TESTCASE_AUTO(TestCloneEquals
); 
1421     TESTCASE_AUTO(TestgetRules
); 
1422     TESTCASE_AUTO(TestHashCode
); 
1423     TESTCASE_AUTO(TestGetSetAdoptText
); 
1424     TESTCASE_AUTO(TestIteration
); 
1426     TESTCASE_AUTO(TestBuilder
); 
1427     TESTCASE_AUTO(TestQuoteGrouping
); 
1428     TESTCASE_AUTO(TestRuleStatusVec
); 
1429     TESTCASE_AUTO(TestBug2190
); 
1430 #if !UCONFIG_NO_FILE_IO 
1431     TESTCASE_AUTO(TestRegistration
); 
1432     TESTCASE_AUTO(TestBoilerPlate
); 
1433     TESTCASE_AUTO(TestRuleStatus
); 
1434     TESTCASE_AUTO(TestRoundtripRules
); 
1435     TESTCASE_AUTO(TestGetBinaryRules
); 
1437     TESTCASE_AUTO(TestRefreshInputText
); 
1438 #if !UCONFIG_NO_BREAK_ITERATION 
1439     TESTCASE_AUTO(TestFilteredBreakIteratorBuilder
); 
1445 //--------------------------------------------- 
1446 //Internal subroutines 
1447 //--------------------------------------------- 
1449 void RBBIAPITest::doBoundaryTest(BreakIterator
& bi
, UnicodeString
& text
, int32_t *boundaries
){ 
1450      logln((UnicodeString
)"testIsBoundary():"); 
1453         for (int32_t i 
= 0; i 
< text
.length(); i
++) { 
1454             isB 
= bi
.isBoundary(i
); 
1455             logln((UnicodeString
)"bi.isBoundary(" + i 
+ ") -> " + isB
); 
1457             if (i 
== boundaries
[p
]) { 
1459                     errln((UnicodeString
)"Wrong result from isBoundary() for " + i 
+ (UnicodeString
)": expected true, got false"); 
1464                     errln((UnicodeString
)"Wrong result from isBoundary() for " + i 
+ (UnicodeString
)": expected false, got true"); 
1468 void RBBIAPITest::doTest(UnicodeString
& testString
, int32_t start
, int32_t gotoffset
, int32_t expectedOffset
, const char* expectedString
){ 
1469     UnicodeString selected
; 
1470     UnicodeString expected
=CharsToUnicodeString(expectedString
); 
1472     if(gotoffset 
!= expectedOffset
) 
1473          errln((UnicodeString
)"ERROR:****returned #" + gotoffset 
+ (UnicodeString
)" instead of #" + expectedOffset
); 
1474     if(start 
<= gotoffset
){ 
1475         testString
.extractBetween(start
, gotoffset
, selected
); 
1478         testString
.extractBetween(gotoffset
, start
, selected
); 
1480     if(selected
.compare(expected
) != 0) 
1481          errln(prettify((UnicodeString
)"ERROR:****selected \"" + selected 
+ "\" instead of \"" + expected 
+ "\"")); 
1483         logln(prettify("****selected \"" + selected 
+ "\"")); 
1486 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */