1 /********************************************************************
2 * Copyright (c) 1999-2012, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 * Date Name Description
6 * 12/14/99 Madhu Creation.
7 * 01/12/2000 Madhu updated for changed API
8 ********************************************************************/
10 #include "unicode/utypes.h"
12 #if !UCONFIG_NO_BREAK_ITERATION
14 #include "unicode/uchar.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/schriter.h"
22 #include "unicode/locid.h"
23 #include "unicode/ustring.h"
24 #include "unicode/utext.h"
28 * API Test the RuleBasedBreakIterator class
32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
33 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
35 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
36 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
38 void RBBIAPITest::TestCloneEquals()
41 UErrorCode status
=U_ZERO_ERROR
;
42 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
43 RuleBasedBreakIterator
* biequal
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
44 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
45 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
46 if(U_FAILURE(status
)){
47 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
52 UnicodeString testString
="Testing word break iterators's clone() and equals()";
53 bi1
->setText(testString
);
54 bi2
->setText(testString
);
55 biequal
->setText(testString
);
57 bi3
->setText("hello");
59 logln((UnicodeString
)"Testing equals()");
61 logln((UnicodeString
)"Testing == and !=");
62 UBool b
= (*bi1
!= *biequal
);
66 errln((UnicodeString
)"ERROR:1 RBBI's == and != operator failed.");
69 if(*bi2
== *biequal
|| *bi2
== *bi1
|| *biequal
== *bi3
)
70 errln((UnicodeString
)"ERROR:2 RBBI's == and != operator failed.");
73 // Quick test of RulesBasedBreakIterator assignment -
75 // two different iterators are !=
76 // they are == after assignment
77 // source and dest iterator produce the same next() after assignment.
78 // deleting one doesn't disable the other.
79 logln("Testing assignment");
80 RuleBasedBreakIterator
*bix
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getDefault(), status
);
81 if(U_FAILURE(status
)){
82 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
86 RuleBasedBreakIterator biDefault
, biDefault2
;
87 if(U_FAILURE(status
)){
88 errln((UnicodeString
)"FAIL : in construction of default iterator");
91 if (biDefault
== *bix
) {
92 errln((UnicodeString
)"ERROR: iterators should not compare ==");
95 if (biDefault
!= biDefault2
) {
96 errln((UnicodeString
)"ERROR: iterators should compare ==");
101 UnicodeString
HelloString("Hello Kitty");
102 bix
->setText(HelloString
);
104 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
108 errln(UnicodeString("ERROR: strings should be equal before assignment."));
111 int bixnext
= bix
->next();
112 int bi2next
= bi2
->next();
113 if (! (bixnext
== bi2next
&& bixnext
== 7)) {
114 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
117 if (bi2
->next() != 8) {
118 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
123 logln((UnicodeString
)"Testing clone()");
124 RuleBasedBreakIterator
* bi1clone
=(RuleBasedBreakIterator
*)bi1
->clone();
125 RuleBasedBreakIterator
* bi2clone
=(RuleBasedBreakIterator
*)bi2
->clone();
127 if(*bi1clone
!= *bi1
|| *bi1clone
!= *biequal
||
128 *bi1clone
== *bi3
|| *bi1clone
== *bi2
)
129 errln((UnicodeString
)"ERROR:1 RBBI's clone() method failed");
131 if(*bi2clone
== *bi1
|| *bi2clone
== *biequal
||
132 *bi2clone
== *bi3
|| *bi2clone
!= *bi2
)
133 errln((UnicodeString
)"ERROR:2 RBBI's clone() method failed");
135 if(bi1
->getText() != bi1clone
->getText() ||
136 bi2clone
->getText() != bi2
->getText() ||
137 *bi2clone
== *bi1clone
)
138 errln((UnicodeString
)"ERROR: RBBI's clone() method failed");
148 void RBBIAPITest::TestBoilerPlate()
150 UErrorCode status
= U_ZERO_ERROR
;
151 BreakIterator
* a
= BreakIterator::createWordInstance(Locale("hi"), status
);
152 BreakIterator
* b
= BreakIterator::createWordInstance(Locale("hi_IN"),status
);
153 if (U_FAILURE(status
)) {
154 errcheckln(status
, "Creation of break iterator failed %s", u_errorName(status
));
158 errln("Failed: boilerplate method operator!= does not return correct results");
160 // Japanese word break iterators are identical to root with
161 // a dictionary-based break iterator
162 BreakIterator
* c
= BreakIterator::createCharacterInstance(Locale("ja"),status
);
163 BreakIterator
* d
= BreakIterator::createCharacterInstance(Locale("root"),status
);
166 errln("Failed: boilerplate method operator== does not return correct results");
169 errln("creation of break iterator failed");
177 void RBBIAPITest::TestgetRules()
179 UErrorCode status
=U_ZERO_ERROR
;
181 RuleBasedBreakIterator
* bi1
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
182 RuleBasedBreakIterator
* bi2
=(RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
183 if(U_FAILURE(status
)){
184 errcheckln(status
, "FAIL: in construction - %s", u_errorName(status
));
192 logln((UnicodeString
)"Testing toString()");
194 bi1
->setText((UnicodeString
)"Hello there");
196 RuleBasedBreakIterator
* bi3
=(RuleBasedBreakIterator
*)bi1
->clone();
198 UnicodeString temp
=bi1
->getRules();
199 UnicodeString temp2
=bi2
->getRules();
200 UnicodeString temp3
=bi3
->getRules();
201 if( temp2
.compare(temp3
) ==0 || temp
.compare(temp2
) == 0 || temp
.compare(temp3
) != 0)
202 errln((UnicodeString
)"ERROR: error in getRules() method");
208 void RBBIAPITest::TestHashCode()
210 UErrorCode status
=U_ZERO_ERROR
;
211 RuleBasedBreakIterator
* bi1
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
212 RuleBasedBreakIterator
* bi3
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
213 RuleBasedBreakIterator
* bi2
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
214 if(U_FAILURE(status
)){
215 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
223 logln((UnicodeString
)"Testing hashCode()");
225 bi1
->setText((UnicodeString
)"Hash code");
226 bi2
->setText((UnicodeString
)"Hash code");
227 bi3
->setText((UnicodeString
)"Hash code");
229 RuleBasedBreakIterator
* bi1clone
= (RuleBasedBreakIterator
*)bi1
->clone();
230 RuleBasedBreakIterator
* bi2clone
= (RuleBasedBreakIterator
*)bi2
->clone();
232 if(bi1
->hashCode() != bi1clone
->hashCode() || bi1
->hashCode() != bi3
->hashCode() ||
233 bi1clone
->hashCode() != bi3
->hashCode() || bi2
->hashCode() != bi2clone
->hashCode())
234 errln((UnicodeString
)"ERROR: identical objects have different hashcodes");
236 if(bi1
->hashCode() == bi2
->hashCode() || bi2
->hashCode() == bi3
->hashCode() ||
237 bi1clone
->hashCode() == bi2clone
->hashCode() || bi1clone
->hashCode() == bi2
->hashCode())
238 errln((UnicodeString
)"ERROR: different objects have same hashcodes");
247 void RBBIAPITest::TestGetSetAdoptText()
249 logln((UnicodeString
)"Testing getText setText ");
250 IcuTestErrorCode
status(*this, "TestGetSetAdoptText");
251 UnicodeString str1
="first string.";
252 UnicodeString str2
="Second string.";
253 LocalPointer
<RuleBasedBreakIterator
> charIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
));
254 LocalPointer
<RuleBasedBreakIterator
> wordIter1((RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
));
255 if(status
.isFailure()){
256 errcheckln(status
, "Fail : in construction - %s", status
.errorName());
261 CharacterIterator
* text1
= new StringCharacterIterator(str1
);
262 CharacterIterator
* text1Clone
= text1
->clone();
263 CharacterIterator
* text2
= new StringCharacterIterator(str2
);
264 CharacterIterator
* text3
= new StringCharacterIterator(str2
, 3, 10, 3); // "ond str"
266 wordIter1
->setText(str1
);
267 CharacterIterator
*tci
= &wordIter1
->getText();
270 TEST_ASSERT(tstr
== str1
);
271 if(wordIter1
->current() != 0)
272 errln((UnicodeString
)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
276 wordIter1
->setText(str2
);
277 if(wordIter1
->current() != 0)
278 errln((UnicodeString
)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1
->current() + (UnicodeString
)"\n");
281 charIter1
->adoptText(text1Clone
);
282 TEST_ASSERT(wordIter1
->getText() != charIter1
->getText());
283 tci
= &wordIter1
->getText();
285 TEST_ASSERT(tstr
== str2
);
286 tci
= &charIter1
->getText();
288 TEST_ASSERT(tstr
== str1
);
291 LocalPointer
<RuleBasedBreakIterator
> rb((RuleBasedBreakIterator
*)wordIter1
->clone());
292 rb
->adoptText(text1
);
293 if(rb
->getText() != *text1
)
294 errln((UnicodeString
)"ERROR:1 error in adoptText ");
295 rb
->adoptText(text2
);
296 if(rb
->getText() != *text2
)
297 errln((UnicodeString
)"ERROR:2 error in adoptText ");
299 // Adopt where iterator range is less than the entire orignal source string.
300 // (With the change of the break engine to working with UText internally,
301 // CharacterIterators starting at positions other than zero are not supported)
302 rb
->adoptText(text3
);
303 TEST_ASSERT(rb
->preceding(2) == 0);
304 TEST_ASSERT(rb
->following(11) == BreakIterator::DONE
);
305 //if(rb->preceding(2) != 3) {
306 // errln((UnicodeString)"ERROR:3 error in adoptText ");
308 //if(rb->following(11) != BreakIterator::DONE) {
309 // errln((UnicodeString)"ERROR:4 error in adoptText ");
314 // Quick test to see if UText is working at all.
316 const char *s1
= "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
317 const char *s2
= "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
321 LocalUTextPointer
ut(utext_openUTF8(NULL
, s1
, -1, status
));
322 wordIter1
->setText(ut
.getAlias(), status
);
323 TEST_ASSERT_SUCCESS(status
);
326 pos
= wordIter1
->first();
328 pos
= wordIter1
->next();
330 pos
= wordIter1
->next();
332 pos
= wordIter1
->next();
333 TEST_ASSERT(pos
==11);
334 pos
= wordIter1
->next();
335 TEST_ASSERT(pos
==UBRK_DONE
);
338 LocalUTextPointer
ut2(utext_openUTF8(NULL
, s2
, -1, status
));
339 TEST_ASSERT_SUCCESS(status
);
340 wordIter1
->setText(ut2
.getAlias(), status
);
341 TEST_ASSERT_SUCCESS(status
);
343 pos
= wordIter1
->first();
345 pos
= wordIter1
->next();
347 pos
= wordIter1
->next();
350 pos
= wordIter1
->last();
352 pos
= wordIter1
->previous();
354 pos
= wordIter1
->previous();
356 pos
= wordIter1
->previous();
358 pos
= wordIter1
->previous();
359 TEST_ASSERT(pos
==UBRK_DONE
);
362 UnicodeString sEmpty
;
363 LocalUTextPointer
gut2(utext_openUnicodeString(NULL
, &sEmpty
, status
));
364 wordIter1
->getUText(gut2
.getAlias(), status
);
365 TEST_ASSERT_SUCCESS(status
);
370 void RBBIAPITest::TestIteration()
372 // This test just verifies that the API is present.
373 // Testing for correct operation of the break rules happens elsewhere.
375 UErrorCode status
=U_ZERO_ERROR
;
376 RuleBasedBreakIterator
* bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
377 if (U_FAILURE(status
) || bi
== NULL
) {
378 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
383 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status
);
384 if (U_FAILURE(status
) || bi
== NULL
) {
385 errcheckln(status
, "Failure creating Word break iterator. Status = %s", u_errorName(status
));
390 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status
);
391 if (U_FAILURE(status
) || bi
== NULL
) {
392 errcheckln(status
, "Failure creating Line break iterator. Status = %s", u_errorName(status
));
397 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status
);
398 if (U_FAILURE(status
) || bi
== NULL
) {
399 errcheckln(status
, "Failure creating Sentence break iterator. Status = %s", u_errorName(status
));
404 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status
);
405 if (U_FAILURE(status
) || bi
== NULL
) {
406 errcheckln(status
, "Failure creating Title break iterator. Status = %s", u_errorName(status
));
411 bi
= (RuleBasedBreakIterator
*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status
);
412 if (U_FAILURE(status
) || bi
== NULL
) {
413 errcheckln(status
, "Failure creating character break iterator. Status = %s", u_errorName(status
));
414 return; // Skip the rest of these tests.
418 UnicodeString testString
="0123456789";
419 bi
->setText(testString
);
424 errln("Incorrect value from bi->first(). Expected 0, got %d.", i
);
429 errln("Incorrect value from bi->last(). Expected 10, got %d", i
);
438 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__
, i
);
444 if (i
!= BreakIterator::DONE
) {
445 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__
, i
);
454 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__
, i
);
459 if (i
!= BreakIterator::DONE
) {
460 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__
, i
);
470 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
476 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__
, i
);
483 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__
, i
);
490 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__
, i
);
497 i
= bi
->following(4);
499 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__
, i
);
502 i
= bi
->following(9);
504 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__
, i
);
507 i
= bi
->following(10);
508 if (i
!= BreakIterator::DONE
) {
509 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__
, i
);
516 i
= bi
->preceding(4);
518 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__
, i
);
521 i
= bi
->preceding(10);
523 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__
, i
);
526 i
= bi
->preceding(1);
528 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__
, i
);
531 i
= bi
->preceding(0);
532 if (i
!= BreakIterator::DONE
) {
533 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__
, i
);
541 if (bi
->isBoundary(3) != TRUE
) {
542 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__
, i
);
546 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__
, i
);
550 if (bi
->isBoundary(11) != FALSE
) {
551 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__
, i
);
555 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__
, i
);
564 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__
, i
);
569 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__
, i
);
574 if (i
!= BreakIterator::DONE
) {
575 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__
, i
);
587 void RBBIAPITest::TestBuilder() {
588 UnicodeString rulesString1
= "$Letters = [:L:];\n"
589 "$Numbers = [:N:];\n"
592 "[^$Letters $Numbers];\n"
594 UnicodeString testString1
= "abc123..abc";
596 int32_t bounds1
[] = {0, 3, 6, 7, 8, 11};
597 UErrorCode status
=U_ZERO_ERROR
;
598 UParseError parseError
;
600 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
601 if(U_FAILURE(status
)) {
602 dataerrln("Fail : in construction - %s", u_errorName(status
));
604 bi
->setText(testString1
);
605 doBoundaryTest(*bi
, testString1
, bounds1
);
613 // Single quotes within rules imply a grouping, so that a modifier
614 // following the quoted text (* or +) applies to all of the quoted chars.
616 void RBBIAPITest::TestQuoteGrouping() {
617 UnicodeString rulesString1
= "#Here comes the rule...\n"
618 "'$@!'*;\n" // (\$\@\!)*
621 UnicodeString testString1
= "$@!$@!X$@!!X";
623 int32_t bounds1
[] = {0, 6, 7, 10, 11, 12};
624 UErrorCode status
=U_ZERO_ERROR
;
625 UParseError parseError
;
627 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
628 if(U_FAILURE(status
)) {
629 dataerrln("Fail : in construction - %s", u_errorName(status
));
631 bi
->setText(testString1
);
632 doBoundaryTest(*bi
, testString1
, bounds1
);
639 // Test word break rule status constants.
641 void RBBIAPITest::TestRuleStatus() {
643 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
644 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
645 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
646 // 012345678901234567 8 9 0
649 UnicodeString
testString1(str
);
650 int32_t bounds1
[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
651 int32_t tag_lo
[] = {UBRK_WORD_NONE
, UBRK_WORD_LETTER
, UBRK_WORD_NONE
, UBRK_WORD_LETTER
,
652 UBRK_WORD_NONE
, UBRK_WORD_NUMBER
, UBRK_WORD_NONE
,
653 UBRK_WORD_IDEO
, UBRK_WORD_NONE
};
655 int32_t tag_hi
[] = {UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
, UBRK_WORD_NONE_LIMIT
, UBRK_WORD_LETTER_LIMIT
,
656 UBRK_WORD_NONE_LIMIT
, UBRK_WORD_NUMBER_LIMIT
, UBRK_WORD_NONE_LIMIT
,
657 UBRK_WORD_IDEO_LIMIT
, UBRK_WORD_NONE_LIMIT
};
659 UErrorCode status
=U_ZERO_ERROR
;
661 RuleBasedBreakIterator
*bi
= (RuleBasedBreakIterator
*)BreakIterator::createWordInstance(Locale::getEnglish(), status
);
662 if(U_FAILURE(status
)) {
663 errcheckln(status
, "Fail : in construction - %s", u_errorName(status
));
665 bi
->setText(testString1
);
666 // First test that the breaks are in the right spots.
667 doBoundaryTest(*bi
, testString1
, bounds1
);
669 // Then go back and check tag values
672 for (pos
= bi
->first(); pos
!= BreakIterator::DONE
; pos
= bi
->next(), i
++) {
673 if (pos
!= bounds1
[i
]) {
674 errln("FAIL: unexpected word break at postion %d", pos
);
677 tag
= bi
->getRuleStatus();
678 if (tag
< tag_lo
[i
] || tag
>= tag_hi
[i
]) {
679 errln("FAIL: incorrect tag value %d at position %d", tag
, pos
);
683 // Check that we get the same tag values from getRuleStatusVec()
685 int t
= bi
->getRuleStatusVec(vec
, 10, status
);
686 TEST_ASSERT_SUCCESS(status
);
688 TEST_ASSERT(vec
[0] == tag
);
693 // Now test line break status. This test mostly is to confirm that the status constants
694 // are correctly declared in the header.
695 testString1
= "test line. \n";
698 bi
= (RuleBasedBreakIterator
*)
699 BreakIterator::createLineInstance(Locale::getEnglish(), status
);
700 if(U_FAILURE(status
)) {
701 errcheckln(status
, "failed to create word break iterator. - %s", u_errorName(status
));
707 bi
->setText(testString1
);
709 tag
= bi
->getRuleStatus();
710 for (i
=0; i
<3; i
++) {
713 success
= pos
==0 && tag
==UBRK_LINE_SOFT
; break;
715 success
= pos
==5 && tag
==UBRK_LINE_SOFT
; break;
717 success
= pos
==12 && tag
==UBRK_LINE_HARD
; break;
719 success
= FALSE
; break;
721 if (success
== FALSE
) {
722 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
727 tag
= bi
->getRuleStatus();
729 if (UBRK_LINE_SOFT
>= UBRK_LINE_SOFT_LIMIT
||
730 UBRK_LINE_HARD
>= UBRK_LINE_HARD_LIMIT
||
731 (UBRK_LINE_HARD
> UBRK_LINE_SOFT
&& UBRK_LINE_HARD
< UBRK_LINE_SOFT_LIMIT
)) {
732 errln("UBRK_LINE_* constants from header are inconsistent.");
742 // Test the vector form of break rule status.
744 void RBBIAPITest::TestRuleStatusVec() {
745 UnicodeString
rulesString( "[A-N]{100}; \n"
750 "!.*;\n", -1, US_INV
);
751 UnicodeString testString1
= "Aapz5?";
752 int32_t statusVals
[10];
756 UErrorCode status
=U_ZERO_ERROR
;
757 UParseError parseError
;
759 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString
, parseError
, status
);
760 if (U_FAILURE(status
)) {
761 dataerrln("Failure at file %s, line %d, error = %s", __FILE__
, __LINE__
, u_errorName(status
));
763 bi
->setText(testString1
);
768 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
769 TEST_ASSERT_SUCCESS(status
);
770 TEST_ASSERT(numStatuses
== 2);
771 TEST_ASSERT(statusVals
[0] == 100);
772 TEST_ASSERT(statusVals
[1] == 300);
777 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
778 TEST_ASSERT_SUCCESS(status
);
779 TEST_ASSERT(numStatuses
== 2);
780 TEST_ASSERT(statusVals
[0] == 200);
781 TEST_ASSERT(statusVals
[1] == 300);
786 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
787 TEST_ASSERT_SUCCESS(status
);
788 TEST_ASSERT(numStatuses
== 2);
789 TEST_ASSERT(statusVals
[0] == 200);
790 TEST_ASSERT(statusVals
[1] == 300);
795 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
796 TEST_ASSERT_SUCCESS(status
);
797 TEST_ASSERT(numStatuses
== 1);
798 TEST_ASSERT(statusVals
[0] == 300);
803 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
804 TEST_ASSERT_SUCCESS(status
);
805 TEST_ASSERT(numStatuses
== 2);
806 TEST_ASSERT(statusVals
[0] == 400);
807 TEST_ASSERT(statusVals
[1] == 500);
812 numStatuses
= bi
->getRuleStatusVec(statusVals
, 10, status
);
813 TEST_ASSERT_SUCCESS(status
);
814 TEST_ASSERT(numStatuses
== 1);
815 TEST_ASSERT(statusVals
[0] == 0);
818 // Check buffer overflow error handling. Char == A
823 memset(statusVals
, -1, sizeof(statusVals
));
824 numStatuses
= bi
->getRuleStatusVec(statusVals
, 0, status
);
825 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
826 TEST_ASSERT(numStatuses
== 2);
827 TEST_ASSERT(statusVals
[0] == -1);
829 status
= U_ZERO_ERROR
;
830 memset(statusVals
, -1, sizeof(statusVals
));
831 numStatuses
= bi
->getRuleStatusVec(statusVals
, 1, status
);
832 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
833 TEST_ASSERT(numStatuses
== 2);
834 TEST_ASSERT(statusVals
[0] == 100);
835 TEST_ASSERT(statusVals
[1] == -1);
837 status
= U_ZERO_ERROR
;
838 memset(statusVals
, -1, sizeof(statusVals
));
839 numStatuses
= bi
->getRuleStatusVec(statusVals
, 2, status
);
840 TEST_ASSERT_SUCCESS(status
);
841 TEST_ASSERT(numStatuses
== 2);
842 TEST_ASSERT(statusVals
[0] == 100);
843 TEST_ASSERT(statusVals
[1] == 300);
844 TEST_ASSERT(statusVals
[2] == -1);
851 // Bug 2190 Regression test. Builder crash on rule consisting of only a
852 // $variable reference
853 void RBBIAPITest::TestBug2190() {
854 UnicodeString rulesString1
= "$aaa = abcd;\n"
857 UnicodeString testString1
= "abcdabcd";
859 int32_t bounds1
[] = {0, 4, 8};
860 UErrorCode status
=U_ZERO_ERROR
;
861 UParseError parseError
;
863 RuleBasedBreakIterator
*bi
= new RuleBasedBreakIterator(rulesString1
, parseError
, status
);
864 if(U_FAILURE(status
)) {
865 dataerrln("Fail : in construction - %s", u_errorName(status
));
867 bi
->setText(testString1
);
868 doBoundaryTest(*bi
, testString1
, bounds1
);
874 void RBBIAPITest::TestRegistration() {
875 #if !UCONFIG_NO_SERVICE
876 UErrorCode status
= U_ZERO_ERROR
;
877 BreakIterator
* ja_word
= BreakIterator::createWordInstance("ja_JP", status
);
878 // ok to not delete these if we exit because of error?
879 BreakIterator
* ja_char
= BreakIterator::createCharacterInstance("ja_JP", status
);
880 BreakIterator
* root_word
= BreakIterator::createWordInstance("", status
);
881 BreakIterator
* root_char
= BreakIterator::createCharacterInstance("", status
);
883 if (status
== U_MISSING_RESOURCE_ERROR
|| status
== U_FILE_ACCESS_ERROR
) {
884 dataerrln("Error creating instances of break interactors - %s", u_errorName(status
));
894 URegistryKey key
= BreakIterator::registerInstance(ja_word
, "xx", UBRK_WORD
, status
);
896 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
897 if (ja_word
&& *ja_word
== *root_word
) {
898 errln("japan not different from root");
904 BreakIterator
* result
= BreakIterator::createWordInstance("xx_XX", status
);
907 fail
= *result
!= *ja_word
;
911 errln("bad result for xx_XX/word");
916 BreakIterator
* result
= BreakIterator::createCharacterInstance("ja_JP", status
);
919 fail
= *result
!= *ja_char
;
923 errln("bad result for ja_JP/char");
928 BreakIterator
* result
= BreakIterator::createCharacterInstance("xx_XX", status
);
931 fail
= *result
!= *root_char
;
935 errln("bad result for xx_XX/char");
940 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
942 const UnicodeString
* p
;
943 while ((p
= avail
->snext(status
))) {
944 if (p
->compare("xx") == 0) {
951 errln("did not find test locale");
956 UBool unreg
= BreakIterator::unregister(key
, status
);
958 errln("unable to unregister");
963 BreakIterator
* result
= BreakIterator::createWordInstance("en_US", status
);
964 BreakIterator
* root
= BreakIterator::createWordInstance("", status
);
967 fail
= *root
!= *result
;
972 errln("did not get root break");
977 StringEnumeration
* avail
= BreakIterator::getAvailableLocales();
979 const UnicodeString
* p
;
980 while ((p
= avail
->snext(status
))) {
981 if (p
->compare("xx") == 0) {
988 errln("found test locale");
994 UBool foundLocale
= FALSE
;
995 const Locale
*avail
= BreakIterator::getAvailableLocales(count
);
996 for (int i
=0; i
<count
; i
++) {
997 if (avail
[i
] == Locale::getEnglish()) {
1002 if (foundLocale
== FALSE
) {
1003 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1008 // ja_word was adopted by factory
1015 void RBBIAPITest::RoundtripRule(const char *dataFile
) {
1016 UErrorCode status
= U_ZERO_ERROR
;
1017 UParseError parseError
;
1018 parseError
.line
= 0;
1019 parseError
.offset
= 0;
1020 LocalUDataMemoryPointer
data(udata_open(U_ICUDATA_BRKITR
, "brk", dataFile
, &status
));
1022 const UChar
*builtSource
;
1023 const uint8_t *rbbiRules
;
1024 const uint8_t *builtRules
;
1026 if (U_FAILURE(status
)) {
1027 errcheckln(status
, "Can't open \"%s\" - %s", dataFile
, u_errorName(status
));
1031 builtRules
= (const uint8_t *)udata_getMemory(data
.getAlias());
1032 builtSource
= (const UChar
*)(builtRules
+ ((RBBIDataHeader
*)builtRules
)->fRuleSource
);
1033 RuleBasedBreakIterator
*brkItr
= new RuleBasedBreakIterator(builtSource
, parseError
, status
);
1034 if (U_FAILURE(status
)) {
1035 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1036 u_errorName(status
), parseError
.line
, parseError
.offset
);
1039 rbbiRules
= brkItr
->getBinaryRules(length
);
1040 logln("Comparing \"%s\" len=%d", dataFile
, length
);
1041 if (memcmp(builtRules
, rbbiRules
, (int32_t)length
) != 0) {
1042 errln("Built rules and rebuilt rules are different %s", dataFile
);
1048 void RBBIAPITest::TestRoundtripRules() {
1049 RoundtripRule("word");
1050 RoundtripRule("title");
1051 RoundtripRule("sent");
1052 RoundtripRule("line");
1053 RoundtripRule("char");
1055 RoundtripRule("word_POSIX");
1059 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1060 // (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1061 // This is just a sanity check, not a thorough test (e.g. we don't check that the
1062 // first delete actually frees rulesCopy).
1063 void RBBIAPITest::TestCreateFromRBBIData() {
1064 // Get some handy RBBIData
1065 const char *brkName
= "word"; // or "sent", "line", "char", etc.
1066 UErrorCode status
= U_ZERO_ERROR
;
1067 LocalUDataMemoryPointer
data(udata_open(U_ICUDATA_BRKITR
, "brk", brkName
, &status
));
1068 if ( U_SUCCESS(status
) ) {
1069 const RBBIDataHeader
* builtRules
= (const RBBIDataHeader
*)udata_getMemory(data
.getAlias());
1070 uint32_t length
= builtRules
->fLength
;
1071 RBBIWithProtectedFunctions
* brkItr
;
1073 // Try the memory-adopting constructor, need to copy the data first
1074 RBBIDataHeader
* rulesCopy
= (RBBIDataHeader
*) uprv_malloc(length
);
1076 uprv_memcpy( rulesCopy
, builtRules
, length
);
1078 brkItr
= new RBBIWithProtectedFunctions(rulesCopy
, status
);
1079 if ( U_SUCCESS(status
) ) {
1080 delete brkItr
; // this should free rulesCopy
1082 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status
) );
1083 status
= U_ZERO_ERROR
;// reset for the next test
1084 uprv_free( rulesCopy
);
1088 // Now try the non-adopting constructor
1089 brkItr
= new RBBIWithProtectedFunctions(builtRules
, RBBIWithProtectedFunctions::kDontAdopt
, status
);
1090 if ( U_SUCCESS(status
) ) {
1091 delete brkItr
; // this should NOT attempt to free builtRules
1092 if (builtRules
->fLength
!= length
) { // sanity check
1093 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1096 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status
) );
1100 // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
1102 status
= U_ZERO_ERROR
;
1103 RuleBasedBreakIterator
*rb
= (RuleBasedBreakIterator
*)BreakIterator::createWordInstance(Locale::getEnglish(), status
);
1104 if (rb
== NULL
|| U_FAILURE(status
)) {
1105 dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status
));
1108 const uint8_t *rules
= rb
->getBinaryRules(length
);
1109 RuleBasedBreakIterator
*rb2
= new RuleBasedBreakIterator(rules
, length
, status
);
1110 TEST_ASSERT_SUCCESS(status
);
1111 TEST_ASSERT(*rb
== *rb2
);
1112 UnicodeString words
= "one two three ";
1113 rb2
->setText(words
);
1114 int wordCounter
= 0;
1115 while (rb2
->next() != UBRK_DONE
) {
1118 TEST_ASSERT(wordCounter
== 6);
1120 status
= U_ZERO_ERROR
;
1121 RuleBasedBreakIterator
*rb3
= new RuleBasedBreakIterator(rules
, length
-1, status
);
1122 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1131 void RBBIAPITest::TestRefreshInputText() {
1133 * RefreshInput changes out the input of a Break Iterator without
1134 * changing anything else in the iterator's state. Used with Java JNI,
1135 * when Java moves the underlying string storage. This test
1136 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1137 * The right set of boundaries should still be found.
1139 UChar testStr
[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1140 UChar movedStr
[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1141 UErrorCode status
= U_ZERO_ERROR
;
1142 UText ut1
= UTEXT_INITIALIZER
;
1143 UText ut2
= UTEXT_INITIALIZER
;
1144 RuleBasedBreakIterator
*bi
= (RuleBasedBreakIterator
*)BreakIterator::createLineInstance(Locale::getEnglish(), status
);
1145 TEST_ASSERT_SUCCESS(status
);
1147 utext_openUChars(&ut1
, testStr
, -1, &status
);
1148 TEST_ASSERT_SUCCESS(status
);
1150 if (U_SUCCESS(status
)) {
1151 bi
->setText(&ut1
, status
);
1152 TEST_ASSERT_SUCCESS(status
);
1154 /* Line boundaries will occur before each letter in the original string */
1155 TEST_ASSERT(1 == bi
->next());
1156 TEST_ASSERT(3 == bi
->next());
1158 /* Move the string, kill the original string. */
1159 u_strcpy(movedStr
, testStr
);
1160 u_memset(testStr
, 0x20, u_strlen(testStr
));
1161 utext_openUChars(&ut2
, movedStr
, -1, &status
);
1162 TEST_ASSERT_SUCCESS(status
);
1163 RuleBasedBreakIterator
*returnedBI
= &bi
->refreshInputText(&ut2
, status
);
1164 TEST_ASSERT_SUCCESS(status
);
1165 TEST_ASSERT(bi
== returnedBI
);
1167 /* Find the following matches, now working in the moved string. */
1168 TEST_ASSERT(5 == bi
->next());
1169 TEST_ASSERT(7 == bi
->next());
1170 TEST_ASSERT(8 == bi
->next());
1171 TEST_ASSERT(UBRK_DONE
== bi
->next());
1181 //---------------------------------------------
1183 //---------------------------------------------
1185 void RBBIAPITest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
1187 if (exec
) logln((UnicodeString
)"TestSuite RuleBasedBreakIterator API ");
1189 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1190 #if !UCONFIG_NO_FILE_IO
1191 case 0: name
= "TestCloneEquals"; if (exec
) TestCloneEquals(); break;
1192 case 1: name
= "TestgetRules"; if (exec
) TestgetRules(); break;
1193 case 2: name
= "TestHashCode"; if (exec
) TestHashCode(); break;
1194 case 3: name
= "TestGetSetAdoptText"; if (exec
) TestGetSetAdoptText(); break;
1195 case 4: name
= "TestIteration"; if (exec
) TestIteration(); break;
1197 case 0: case 1: case 2: case 3: case 4: name
= "skip"; break;
1199 case 5: name
= "TestBuilder"; if (exec
) TestBuilder(); break;
1200 case 6: name
= "TestQuoteGrouping"; if (exec
) TestQuoteGrouping(); break;
1201 case 7: name
= "TestRuleStatusVec"; if (exec
) TestRuleStatusVec(); break;
1202 case 8: name
= "TestBug2190"; if (exec
) TestBug2190(); break;
1203 #if !UCONFIG_NO_FILE_IO
1204 case 9: name
= "TestRegistration"; if (exec
) TestRegistration(); break;
1205 case 10: name
= "TestBoilerPlate"; if (exec
) TestBoilerPlate(); break;
1206 case 11: name
= "TestRuleStatus"; if (exec
) TestRuleStatus(); break;
1207 case 12: name
= "TestRoundtripRules"; if (exec
) TestRoundtripRules(); break;
1208 case 13: name
= "TestCreateFromRBBIData"; if (exec
) TestCreateFromRBBIData(); break;
1210 case 9: case 10: case 11: case 12: case 13: name
= "skip"; break;
1212 case 14: name
= "TestRefreshInputText"; if (exec
) TestRefreshInputText(); break;
1214 default: name
= ""; break; // needed to end loop
1218 //---------------------------------------------
1219 //Internal subroutines
1220 //---------------------------------------------
1222 void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator
& bi
, UnicodeString
& text
, int32_t *boundaries
){
1223 logln((UnicodeString
)"testIsBoundary():");
1226 for (int32_t i
= 0; i
< text
.length(); i
++) {
1227 isB
= bi
.isBoundary(i
);
1228 logln((UnicodeString
)"bi.isBoundary(" + i
+ ") -> " + isB
);
1230 if (i
== boundaries
[p
]) {
1232 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected true, got false");
1237 errln((UnicodeString
)"Wrong result from isBoundary() for " + i
+ (UnicodeString
)": expected false, got true");
1241 void RBBIAPITest::doTest(UnicodeString
& testString
, int32_t start
, int32_t gotoffset
, int32_t expectedOffset
, const char* expectedString
){
1242 UnicodeString selected
;
1243 UnicodeString expected
=CharsToUnicodeString(expectedString
);
1245 if(gotoffset
!= expectedOffset
)
1246 errln((UnicodeString
)"ERROR:****returned #" + gotoffset
+ (UnicodeString
)" instead of #" + expectedOffset
);
1247 if(start
<= gotoffset
){
1248 testString
.extractBetween(start
, gotoffset
, selected
);
1251 testString
.extractBetween(gotoffset
, start
, selected
);
1253 if(selected
.compare(expected
) != 0)
1254 errln(prettify((UnicodeString
)"ERROR:****selected \"" + selected
+ "\" instead of \"" + expected
+ "\""));
1256 logln(prettify("****selected \"" + selected
+ "\""));
1259 //---------------------------------------------
1260 //RBBIWithProtectedFunctions class functions
1261 //---------------------------------------------
1263 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader
* data
, UErrorCode
&status
)
1264 : RuleBasedBreakIterator(data
, status
)
1268 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader
* data
, enum EDontAdopt
, UErrorCode
&status
)
1269 : RuleBasedBreakIterator(data
, RuleBasedBreakIterator::kDontAdopt
, status
)
1273 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */