]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/rbbiapts.cpp
ICU-551.41.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbiapts.cpp
1 /********************************************************************
2 * Copyright (c) 1999-2014, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 * Date Name Description
6 * 12/14/99 Madhu Creation.
7 * 01/12/2000 Madhu updated for changed API
8 ********************************************************************/
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/uchar.h"
15 #include "intltest.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/schriter.h"
18 #include "rbbiapts.h"
19 #include "rbbidata.h"
20 #include "cstring.h"
21 #include "ubrkimpl.h"
22 #include "unicode/locid.h"
23 #include "unicode/ustring.h"
24 #include "unicode/utext.h"
25 #include "cmemory.h"
26 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
27 #include "unicode/filteredbrk.h"
28 #include <stdio.h> // for sprintf
29 #endif
30 /**
31 * API Test the RuleBasedBreakIterator class
32 */
33
34
35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
36 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
37
38 #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \
39 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};}
40
41 void RBBIAPITest::TestCloneEquals()
42 {
43
44 UErrorCode status=U_ZERO_ERROR;
45 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
46 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
47 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
48 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
49 if(U_FAILURE(status)){
50 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
51 return;
52 }
53
54
55 UnicodeString testString="Testing word break iterators's clone() and equals()";
56 bi1->setText(testString);
57 bi2->setText(testString);
58 biequal->setText(testString);
59
60 bi3->setText("hello");
61
62 logln((UnicodeString)"Testing equals()");
63
64 logln((UnicodeString)"Testing == and !=");
65 UBool b = (*bi1 != *biequal);
66 b |= *bi1 == *bi2;
67 b |= *bi1 == *bi3;
68 if (b) {
69 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
70 }
71
72 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
73 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
74
75
76 // Quick test of RulesBasedBreakIterator assignment -
77 // Check that
78 // two different iterators are !=
79 // they are == after assignment
80 // source and dest iterator produce the same next() after assignment.
81 // deleting one doesn't disable the other.
82 logln("Testing assignment");
83 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
84 if(U_FAILURE(status)){
85 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
86 return;
87 }
88
89 RuleBasedBreakIterator biDefault, biDefault2;
90 if(U_FAILURE(status)){
91 errln((UnicodeString)"FAIL : in construction of default iterator");
92 return;
93 }
94 if (biDefault == *bix) {
95 errln((UnicodeString)"ERROR: iterators should not compare ==");
96 return;
97 }
98 if (biDefault != biDefault2) {
99 errln((UnicodeString)"ERROR: iterators should compare ==");
100 return;
101 }
102
103
104 UnicodeString HelloString("Hello Kitty");
105 bix->setText(HelloString);
106 if (*bix == *bi2) {
107 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
108 }
109 *bix = *bi2;
110 if (*bix != *bi2) {
111 errln(UnicodeString("ERROR: strings should be equal before assignment."));
112 }
113
114 int bixnext = bix->next();
115 int bi2next = bi2->next();
116 if (! (bixnext == bi2next && bixnext == 7)) {
117 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
118 }
119 delete bix;
120 if (bi2->next() != 8) {
121 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
122 }
123
124
125
126 logln((UnicodeString)"Testing clone()");
127 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
128 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
129
130 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
131 *bi1clone == *bi3 || *bi1clone == *bi2)
132 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
133
134 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
135 *bi2clone == *bi3 || *bi2clone != *bi2)
136 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
137
138 if(bi1->getText() != bi1clone->getText() ||
139 bi2clone->getText() != bi2->getText() ||
140 *bi2clone == *bi1clone )
141 errln((UnicodeString)"ERROR: RBBI's clone() method failed");
142
143 delete bi1clone;
144 delete bi2clone;
145 delete bi1;
146 delete bi3;
147 delete bi2;
148 delete biequal;
149 }
150
151 void RBBIAPITest::TestBoilerPlate()
152 {
153 UErrorCode status = U_ZERO_ERROR;
154 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
155 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
156 if (U_FAILURE(status)) {
157 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
158 return;
159 }
160 if(*a!=*b){
161 errln("Failed: boilerplate method operator!= does not return correct results");
162 }
163 // Japanese word break iterators are identical to root with
164 // a dictionary-based break iterator
165 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
166 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
167 if(c && d){
168 if(*c!=*d){
169 errln("Failed: boilerplate method operator== does not return correct results");
170 }
171 }else{
172 errln("creation of break iterator failed");
173 }
174 delete a;
175 delete b;
176 delete c;
177 delete d;
178 }
179
180 void RBBIAPITest::TestgetRules()
181 {
182 UErrorCode status=U_ZERO_ERROR;
183
184 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
185 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
186 if(U_FAILURE(status)){
187 errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
188 delete bi1;
189 delete bi2;
190 return;
191 }
192
193
194
195 logln((UnicodeString)"Testing toString()");
196
197 bi1->setText((UnicodeString)"Hello there");
198
199 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
200
201 UnicodeString temp=bi1->getRules();
202 UnicodeString temp2=bi2->getRules();
203 UnicodeString temp3=bi3->getRules();
204 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
205 errln((UnicodeString)"ERROR: error in getRules() method");
206
207 delete bi1;
208 delete bi2;
209 delete bi3;
210 }
211 void RBBIAPITest::TestHashCode()
212 {
213 UErrorCode status=U_ZERO_ERROR;
214 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
215 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
216 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
217 if(U_FAILURE(status)){
218 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
219 delete bi1;
220 delete bi2;
221 delete bi3;
222 return;
223 }
224
225
226 logln((UnicodeString)"Testing hashCode()");
227
228 bi1->setText((UnicodeString)"Hash code");
229 bi2->setText((UnicodeString)"Hash code");
230 bi3->setText((UnicodeString)"Hash code");
231
232 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
233 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
234
235 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
236 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
237 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
238
239 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
240 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
241 errln((UnicodeString)"ERROR: different objects have same hashcodes");
242
243 delete bi1clone;
244 delete bi2clone;
245 delete bi1;
246 delete bi2;
247 delete bi3;
248
249 }
250 void RBBIAPITest::TestGetSetAdoptText()
251 {
252 logln((UnicodeString)"Testing getText setText ");
253 IcuTestErrorCode status(*this, "TestGetSetAdoptText");
254 UnicodeString str1="first string.";
255 UnicodeString str2="Second string.";
256 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
257 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
258 if(status.isFailure()){
259 errcheckln(status, "Fail : in construction - %s", status.errorName());
260 return;
261 }
262
263
264 CharacterIterator* text1= new StringCharacterIterator(str1);
265 CharacterIterator* text1Clone = text1->clone();
266 CharacterIterator* text2= new StringCharacterIterator(str2);
267 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
268
269 wordIter1->setText(str1);
270 CharacterIterator *tci = &wordIter1->getText();
271 UnicodeString tstr;
272 tci->getText(tstr);
273 TEST_ASSERT(tstr == str1);
274 if(wordIter1->current() != 0)
275 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
276
277 wordIter1->next(2);
278
279 wordIter1->setText(str2);
280 if(wordIter1->current() != 0)
281 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
282
283
284 charIter1->adoptText(text1Clone);
285 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
286 tci = &wordIter1->getText();
287 tci->getText(tstr);
288 TEST_ASSERT(tstr == str2);
289 tci = &charIter1->getText();
290 tci->getText(tstr);
291 TEST_ASSERT(tstr == str1);
292
293
294 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
295 rb->adoptText(text1);
296 if(rb->getText() != *text1)
297 errln((UnicodeString)"ERROR:1 error in adoptText ");
298 rb->adoptText(text2);
299 if(rb->getText() != *text2)
300 errln((UnicodeString)"ERROR:2 error in adoptText ");
301
302 // Adopt where iterator range is less than the entire orignal source string.
303 // (With the change of the break engine to working with UText internally,
304 // CharacterIterators starting at positions other than zero are not supported)
305 rb->adoptText(text3);
306 TEST_ASSERT(rb->preceding(2) == 0);
307 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
308 //if(rb->preceding(2) != 3) {
309 // errln((UnicodeString)"ERROR:3 error in adoptText ");
310 //}
311 //if(rb->following(11) != BreakIterator::DONE) {
312 // errln((UnicodeString)"ERROR:4 error in adoptText ");
313 //}
314
315 // UText API
316 //
317 // Quick test to see if UText is working at all.
318 //
319 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
320 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
321 // 012345678901
322
323 status.reset();
324 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
325 wordIter1->setText(ut.getAlias(), status);
326 TEST_ASSERT_SUCCESS(status);
327
328 int32_t pos;
329 pos = wordIter1->first();
330 TEST_ASSERT(pos==0);
331 pos = wordIter1->next();
332 TEST_ASSERT(pos==5);
333 pos = wordIter1->next();
334 TEST_ASSERT(pos==6);
335 pos = wordIter1->next();
336 TEST_ASSERT(pos==11);
337 pos = wordIter1->next();
338 TEST_ASSERT(pos==UBRK_DONE);
339
340 status.reset();
341 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
342 TEST_ASSERT_SUCCESS(status);
343 wordIter1->setText(ut2.getAlias(), status);
344 TEST_ASSERT_SUCCESS(status);
345
346 pos = wordIter1->first();
347 TEST_ASSERT(pos==0);
348 pos = wordIter1->next();
349 TEST_ASSERT(pos==3);
350 pos = wordIter1->next();
351 TEST_ASSERT(pos==4);
352
353 pos = wordIter1->last();
354 TEST_ASSERT(pos==6);
355 pos = wordIter1->previous();
356 TEST_ASSERT(pos==4);
357 pos = wordIter1->previous();
358 TEST_ASSERT(pos==3);
359 pos = wordIter1->previous();
360 TEST_ASSERT(pos==0);
361 pos = wordIter1->previous();
362 TEST_ASSERT(pos==UBRK_DONE);
363
364 status.reset();
365 UnicodeString sEmpty;
366 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
367 wordIter1->getUText(gut2.getAlias(), status);
368 TEST_ASSERT_SUCCESS(status);
369 status.reset();
370 }
371
372
373 void RBBIAPITest::TestIteration()
374 {
375 // This test just verifies that the API is present.
376 // Testing for correct operation of the break rules happens elsewhere.
377
378 UErrorCode status=U_ZERO_ERROR;
379 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
380 if (U_FAILURE(status) || bi == NULL) {
381 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
382 }
383 delete bi;
384
385 status=U_ZERO_ERROR;
386 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
387 if (U_FAILURE(status) || bi == NULL) {
388 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
389 }
390 delete bi;
391
392 status=U_ZERO_ERROR;
393 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
394 if (U_FAILURE(status) || bi == NULL) {
395 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
396 }
397 delete bi;
398
399 status=U_ZERO_ERROR;
400 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
401 if (U_FAILURE(status) || bi == NULL) {
402 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
403 }
404 delete bi;
405
406 status=U_ZERO_ERROR;
407 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
408 if (U_FAILURE(status) || bi == NULL) {
409 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status));
410 }
411 delete bi;
412
413 status=U_ZERO_ERROR;
414 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
415 if (U_FAILURE(status) || bi == NULL) {
416 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
417 return; // Skip the rest of these tests.
418 }
419
420
421 UnicodeString testString="0123456789";
422 bi->setText(testString);
423
424 int32_t i;
425 i = bi->first();
426 if (i != 0) {
427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i);
428 }
429
430 i = bi->last();
431 if (i != 10) {
432 errln("Incorrect value from bi->last(). Expected 10, got %d", i);
433 }
434
435 //
436 // Previous
437 //
438 bi->last();
439 i = bi->previous();
440 if (i != 9) {
441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i);
442 }
443
444
445 bi->first();
446 i = bi->previous();
447 if (i != BreakIterator::DONE) {
448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i);
449 }
450
451 //
452 // next()
453 //
454 bi->first();
455 i = bi->next();
456 if (i != 1) {
457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i);
458 }
459
460 bi->last();
461 i = bi->next();
462 if (i != BreakIterator::DONE) {
463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i);
464 }
465
466
467 //
468 // current()
469 //
470 bi->first();
471 i = bi->current();
472 if (i != 0) {
473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
474 }
475
476 bi->next();
477 i = bi->current();
478 if (i != 1) {
479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i);
480 }
481
482 bi->last();
483 bi->next();
484 i = bi->current();
485 if (i != 10) {
486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i);
487 }
488
489 bi->first();
490 bi->previous();
491 i = bi->current();
492 if (i != 0) {
493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
494 }
495
496
497 //
498 // Following()
499 //
500 i = bi->following(4);
501 if (i != 5) {
502 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i);
503 }
504
505 i = bi->following(9);
506 if (i != 10) {
507 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i);
508 }
509
510 i = bi->following(10);
511 if (i != BreakIterator::DONE) {
512 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i);
513 }
514
515
516 //
517 // Preceding
518 //
519 i = bi->preceding(4);
520 if (i != 3) {
521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i);
522 }
523
524 i = bi->preceding(10);
525 if (i != 9) {
526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i);
527 }
528
529 i = bi->preceding(1);
530 if (i != 0) {
531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i);
532 }
533
534 i = bi->preceding(0);
535 if (i != BreakIterator::DONE) {
536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i);
537 }
538
539
540 //
541 // isBoundary()
542 //
543 bi->first();
544 if (bi->isBoundary(3) != TRUE) {
545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i);
546 }
547 i = bi->current();
548 if (i != 3) {
549 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i);
550 }
551
552
553 if (bi->isBoundary(11) != FALSE) {
554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i);
555 }
556 i = bi->current();
557 if (i != 10) {
558 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i);
559 }
560
561 //
562 // next(n)
563 //
564 bi->first();
565 i = bi->next(4);
566 if (i != 4) {
567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i);
568 }
569
570 i = bi->next(6);
571 if (i != 10) {
572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i);
573 }
574
575 bi->first();
576 i = bi->next(11);
577 if (i != BreakIterator::DONE) {
578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i);
579 }
580
581 delete bi;
582
583 }
584
585
586
587
588
589
590 void RBBIAPITest::TestBuilder() {
591 UnicodeString rulesString1 = "$Letters = [:L:];\n"
592 "$Numbers = [:N:];\n"
593 "$Letters+;\n"
594 "$Numbers+;\n"
595 "[^$Letters $Numbers];\n"
596 "!.*;\n";
597 UnicodeString testString1 = "abc123..abc";
598 // 01234567890
599 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
600 UErrorCode status=U_ZERO_ERROR;
601 UParseError parseError;
602
603 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
604 if(U_FAILURE(status)) {
605 dataerrln("Fail : in construction - %s", u_errorName(status));
606 } else {
607 bi->setText(testString1);
608 doBoundaryTest(*bi, testString1, bounds1);
609 }
610 delete bi;
611 }
612
613
614 //
615 // TestQuoteGrouping
616 // Single quotes within rules imply a grouping, so that a modifier
617 // following the quoted text (* or +) applies to all of the quoted chars.
618 //
619 void RBBIAPITest::TestQuoteGrouping() {
620 UnicodeString rulesString1 = "#Here comes the rule...\n"
621 "'$@!'*;\n" // (\$\@\!)*
622 ".;\n";
623
624 UnicodeString testString1 = "$@!$@!X$@!!X";
625 // 0123456789012
626 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
627 UErrorCode status=U_ZERO_ERROR;
628 UParseError parseError;
629
630 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
631 if(U_FAILURE(status)) {
632 dataerrln("Fail : in construction - %s", u_errorName(status));
633 } else {
634 bi->setText(testString1);
635 doBoundaryTest(*bi, testString1, bounds1);
636 }
637 delete bi;
638 }
639
640 //
641 // TestRuleStatus
642 // Test word break rule status constants.
643 //
644 void RBBIAPITest::TestRuleStatus() {
645 UChar str[30];
646 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
647 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
648 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
649 // 012345678901234567 8 9 0
650 // Katakana
651 str, 30);
652 UnicodeString testString1(str);
653 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
654 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
655 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
656 UBRK_WORD_IDEO, UBRK_WORD_NONE};
657
658 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
659 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
660 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
661
662 UErrorCode status=U_ZERO_ERROR;
663
664 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
665 if(U_FAILURE(status)) {
666 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
667 } else {
668 bi->setText(testString1);
669 // First test that the breaks are in the right spots.
670 doBoundaryTest(*bi, testString1, bounds1);
671
672 // Then go back and check tag values
673 int32_t i = 0;
674 int32_t pos, tag;
675 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
676 if (pos != bounds1[i]) {
677 errln("FAIL: unexpected word break at postion %d", pos);
678 break;
679 }
680 tag = bi->getRuleStatus();
681 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
682 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
683 break;
684 }
685
686 // Check that we get the same tag values from getRuleStatusVec()
687 int32_t vec[10];
688 int t = bi->getRuleStatusVec(vec, 10, status);
689 TEST_ASSERT_SUCCESS(status);
690 TEST_ASSERT(t==1);
691 TEST_ASSERT(vec[0] == tag);
692 }
693 }
694 delete bi;
695
696 // Now test line break status. This test mostly is to confirm that the status constants
697 // are correctly declared in the header.
698 testString1 = "test line. \n";
699 // break type s s h
700
701 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
702 if(U_FAILURE(status)) {
703 errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
704 } else {
705 int32_t i = 0;
706 int32_t pos, tag;
707 UBool success;
708
709 bi->setText(testString1);
710 pos = bi->current();
711 tag = bi->getRuleStatus();
712 for (i=0; i<3; i++) {
713 switch (i) {
714 case 0:
715 success = pos==0 && tag==UBRK_LINE_SOFT; break;
716 case 1:
717 success = pos==5 && tag==UBRK_LINE_SOFT; break;
718 case 2:
719 success = pos==12 && tag==UBRK_LINE_HARD; break;
720 default:
721 success = FALSE; break;
722 }
723 if (success == FALSE) {
724 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
725 i, pos, tag);
726 break;
727 }
728 pos = bi->next();
729 tag = bi->getRuleStatus();
730 }
731 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
732 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
733 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
734 errln("UBRK_LINE_* constants from header are inconsistent.");
735 }
736 }
737 delete bi;
738
739 }
740
741
742 //
743 // TestRuleStatusVec
744 // Test the vector form of break rule status.
745 //
746 void RBBIAPITest::TestRuleStatusVec() {
747 UnicodeString rulesString( "[A-N]{100}; \n"
748 "[a-w]{200}; \n"
749 "[\\p{L}]{300}; \n"
750 "[\\p{N}]{400}; \n"
751 "[0-5]{500}; \n"
752 "!.*;\n", -1, US_INV);
753 UnicodeString testString1 = "Aapz5?";
754 int32_t statusVals[10];
755 int32_t numStatuses;
756 int32_t pos;
757
758 UErrorCode status=U_ZERO_ERROR;
759 UParseError parseError;
760
761 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
762 if (U_FAILURE(status)) {
763 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
764 } else {
765 bi->setText(testString1);
766
767 // A
768 pos = bi->next();
769 TEST_ASSERT(pos==1);
770 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
771 TEST_ASSERT_SUCCESS(status);
772 TEST_ASSERT(numStatuses == 2);
773 TEST_ASSERT(statusVals[0] == 100);
774 TEST_ASSERT(statusVals[1] == 300);
775
776 // a
777 pos = bi->next();
778 TEST_ASSERT(pos==2);
779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780 TEST_ASSERT_SUCCESS(status);
781 TEST_ASSERT(numStatuses == 2);
782 TEST_ASSERT(statusVals[0] == 200);
783 TEST_ASSERT(statusVals[1] == 300);
784
785 // p
786 pos = bi->next();
787 TEST_ASSERT(pos==3);
788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789 TEST_ASSERT_SUCCESS(status);
790 TEST_ASSERT(numStatuses == 2);
791 TEST_ASSERT(statusVals[0] == 200);
792 TEST_ASSERT(statusVals[1] == 300);
793
794 // z
795 pos = bi->next();
796 TEST_ASSERT(pos==4);
797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798 TEST_ASSERT_SUCCESS(status);
799 TEST_ASSERT(numStatuses == 1);
800 TEST_ASSERT(statusVals[0] == 300);
801
802 // 5
803 pos = bi->next();
804 TEST_ASSERT(pos==5);
805 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
806 TEST_ASSERT_SUCCESS(status);
807 TEST_ASSERT(numStatuses == 2);
808 TEST_ASSERT(statusVals[0] == 400);
809 TEST_ASSERT(statusVals[1] == 500);
810
811 // ?
812 pos = bi->next();
813 TEST_ASSERT(pos==6);
814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815 TEST_ASSERT_SUCCESS(status);
816 TEST_ASSERT(numStatuses == 1);
817 TEST_ASSERT(statusVals[0] == 0);
818
819 //
820 // Check buffer overflow error handling. Char == A
821 //
822 bi->first();
823 pos = bi->next();
824 TEST_ASSERT(pos==1);
825 memset(statusVals, -1, sizeof(statusVals));
826 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
828 TEST_ASSERT(numStatuses == 2);
829 TEST_ASSERT(statusVals[0] == -1);
830
831 status = U_ZERO_ERROR;
832 memset(statusVals, -1, sizeof(statusVals));
833 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
834 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
835 TEST_ASSERT(numStatuses == 2);
836 TEST_ASSERT(statusVals[0] == 100);
837 TEST_ASSERT(statusVals[1] == -1);
838
839 status = U_ZERO_ERROR;
840 memset(statusVals, -1, sizeof(statusVals));
841 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
842 TEST_ASSERT_SUCCESS(status);
843 TEST_ASSERT(numStatuses == 2);
844 TEST_ASSERT(statusVals[0] == 100);
845 TEST_ASSERT(statusVals[1] == 300);
846 TEST_ASSERT(statusVals[2] == -1);
847 }
848 delete bi;
849
850 }
851
852 //
853 // Bug 2190 Regression test. Builder crash on rule consisting of only a
854 // $variable reference
855 void RBBIAPITest::TestBug2190() {
856 UnicodeString rulesString1 = "$aaa = abcd;\n"
857 "$bbb = $aaa;\n"
858 "$bbb;\n";
859 UnicodeString testString1 = "abcdabcd";
860 // 01234567890
861 int32_t bounds1[] = {0, 4, 8};
862 UErrorCode status=U_ZERO_ERROR;
863 UParseError parseError;
864
865 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
866 if(U_FAILURE(status)) {
867 dataerrln("Fail : in construction - %s", u_errorName(status));
868 } else {
869 bi->setText(testString1);
870 doBoundaryTest(*bi, testString1, bounds1);
871 }
872 delete bi;
873 }
874
875
876 void RBBIAPITest::TestRegistration() {
877 #if !UCONFIG_NO_SERVICE
878 UErrorCode status = U_ZERO_ERROR;
879 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
880 // ok to not delete these if we exit because of error?
881 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
882 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
883 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
884
885 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
886 dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
887
888 delete ja_word;
889 delete ja_char;
890 delete root_word;
891 delete root_char;
892
893 return;
894 }
895
896 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
897 {
898 #if 0 // With a dictionary based word breaking, ja_word is identical to root.
899 if (ja_word && *ja_word == *root_word) {
900 errln("japan not different from root");
901 }
902 #endif
903 }
904
905 {
906 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
907 UBool fail = TRUE;
908 if(result){
909 fail = *result != *ja_word;
910 }
911 delete result;
912 if (fail) {
913 errln("bad result for xx_XX/word");
914 }
915 }
916
917 {
918 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
919 UBool fail = TRUE;
920 if(result){
921 fail = *result != *ja_char;
922 }
923 delete result;
924 if (fail) {
925 errln("bad result for ja_JP/char");
926 }
927 }
928
929 {
930 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
931 UBool fail = TRUE;
932 if(result){
933 fail = *result != *root_char;
934 }
935 delete result;
936 if (fail) {
937 errln("bad result for xx_XX/char");
938 }
939 }
940
941 {
942 StringEnumeration* avail = BreakIterator::getAvailableLocales();
943 UBool found = FALSE;
944 const UnicodeString* p;
945 while ((p = avail->snext(status))) {
946 if (p->compare("xx") == 0) {
947 found = TRUE;
948 break;
949 }
950 }
951 delete avail;
952 if (!found) {
953 errln("did not find test locale");
954 }
955 }
956
957 {
958 UBool unreg = BreakIterator::unregister(key, status);
959 if (!unreg) {
960 errln("unable to unregister");
961 }
962 }
963
964 {
965 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
966 BreakIterator* root = BreakIterator::createWordInstance("", status);
967 UBool fail = TRUE;
968 if(root){
969 fail = *root != *result;
970 }
971 delete root;
972 delete result;
973 if (fail) {
974 errln("did not get root break");
975 }
976 }
977
978 {
979 StringEnumeration* avail = BreakIterator::getAvailableLocales();
980 UBool found = FALSE;
981 const UnicodeString* p;
982 while ((p = avail->snext(status))) {
983 if (p->compare("xx") == 0) {
984 found = TRUE;
985 break;
986 }
987 }
988 delete avail;
989 if (found) {
990 errln("found test locale");
991 }
992 }
993
994 {
995 int32_t count;
996 UBool foundLocale = FALSE;
997 const Locale *avail = BreakIterator::getAvailableLocales(count);
998 for (int i=0; i<count; i++) {
999 if (avail[i] == Locale::getEnglish()) {
1000 foundLocale = TRUE;
1001 break;
1002 }
1003 }
1004 if (foundLocale == FALSE) {
1005 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1006 }
1007 }
1008
1009
1010 // ja_word was adopted by factory
1011 delete ja_char;
1012 delete root_word;
1013 delete root_char;
1014 #endif
1015 }
1016
1017 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1018 UErrorCode status = U_ZERO_ERROR;
1019 UParseError parseError;
1020 parseError.line = 0;
1021 parseError.offset = 0;
1022 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1023 uint32_t length;
1024 const UChar *builtSource;
1025 const uint8_t *rbbiRules;
1026 const uint8_t *builtRules;
1027
1028 if (U_FAILURE(status)) {
1029 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
1030 return;
1031 }
1032
1033 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1034 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1035 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1036 if (U_FAILURE(status)) {
1037 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1038 u_errorName(status), parseError.line, parseError.offset);
1039 return;
1040 };
1041 rbbiRules = brkItr->getBinaryRules(length);
1042 logln("Comparing \"%s\" len=%d", dataFile, length);
1043 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1044 errln("Built rules and rebuilt rules are different %s", dataFile);
1045 return;
1046 }
1047 delete brkItr;
1048 }
1049
1050 void RBBIAPITest::TestRoundtripRules() {
1051 RoundtripRule("word");
1052 RoundtripRule("title");
1053 RoundtripRule("sent");
1054 RoundtripRule("line");
1055 RoundtripRule("char");
1056 if (!quick) {
1057 RoundtripRule("word_POSIX");
1058 }
1059 }
1060
1061 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1062 // (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1063 // This is just a sanity check, not a thorough test (e.g. we don't check that the
1064 // first delete actually frees rulesCopy).
1065 void RBBIAPITest::TestCreateFromRBBIData() {
1066 // Get some handy RBBIData
1067 const char *brkName = "word"; // or "sent", "line", "char", etc.
1068 UErrorCode status = U_ZERO_ERROR;
1069 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status));
1070 if ( U_SUCCESS(status) ) {
1071 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias());
1072 uint32_t length = builtRules->fLength;
1073 RBBIWithProtectedFunctions * brkItr;
1074
1075 // Try the memory-adopting constructor, need to copy the data first
1076 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
1077 if ( rulesCopy ) {
1078 uprv_memcpy( rulesCopy, builtRules, length );
1079
1080 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
1081 if ( U_SUCCESS(status) ) {
1082 delete brkItr; // this should free rulesCopy
1083 } else {
1084 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
1085 status = U_ZERO_ERROR;// reset for the next test
1086 uprv_free( rulesCopy );
1087 }
1088 }
1089
1090 // Now try the non-adopting constructor
1091 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
1092 if ( U_SUCCESS(status) ) {
1093 delete brkItr; // this should NOT attempt to free builtRules
1094 if (builtRules->fLength != length) { // sanity check
1095 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1096 }
1097 } else {
1098 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
1099 }
1100 }
1101
1102 // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...)
1103 //
1104 status = U_ZERO_ERROR;
1105 RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
1106 if (rb == NULL || U_FAILURE(status)) {
1107 dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status));
1108 } else {
1109 uint32_t length;
1110 const uint8_t *rules = rb->getBinaryRules(length);
1111 RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status);
1112 TEST_ASSERT_SUCCESS(status);
1113 TEST_ASSERT(*rb == *rb2);
1114 UnicodeString words = "one two three ";
1115 rb2->setText(words);
1116 int wordCounter = 0;
1117 while (rb2->next() != UBRK_DONE) {
1118 wordCounter++;
1119 }
1120 TEST_ASSERT(wordCounter == 6);
1121
1122 status = U_ZERO_ERROR;
1123 RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status);
1124 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1125
1126 delete rb;
1127 delete rb2;
1128 delete rb3;
1129 }
1130 }
1131
1132
1133 void RBBIAPITest::TestRefreshInputText() {
1134 /*
1135 * RefreshInput changes out the input of a Break Iterator without
1136 * changing anything else in the iterator's state. Used with Java JNI,
1137 * when Java moves the underlying string storage. This test
1138 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1139 * The right set of boundaries should still be found.
1140 */
1141 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1142 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1143 UErrorCode status = U_ZERO_ERROR;
1144 UText ut1 = UTEXT_INITIALIZER;
1145 UText ut2 = UTEXT_INITIALIZER;
1146 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1147 TEST_ASSERT_SUCCESS(status);
1148
1149 utext_openUChars(&ut1, testStr, -1, &status);
1150 TEST_ASSERT_SUCCESS(status);
1151
1152 if (U_SUCCESS(status)) {
1153 bi->setText(&ut1, status);
1154 TEST_ASSERT_SUCCESS(status);
1155
1156 /* Line boundaries will occur before each letter in the original string */
1157 TEST_ASSERT(1 == bi->next());
1158 TEST_ASSERT(3 == bi->next());
1159
1160 /* Move the string, kill the original string. */
1161 u_strcpy(movedStr, testStr);
1162 u_memset(testStr, 0x20, u_strlen(testStr));
1163 utext_openUChars(&ut2, movedStr, -1, &status);
1164 TEST_ASSERT_SUCCESS(status);
1165 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1166 TEST_ASSERT_SUCCESS(status);
1167 TEST_ASSERT(bi == returnedBI);
1168
1169 /* Find the following matches, now working in the moved string. */
1170 TEST_ASSERT(5 == bi->next());
1171 TEST_ASSERT(7 == bi->next());
1172 TEST_ASSERT(8 == bi->next());
1173 TEST_ASSERT(UBRK_DONE == bi->next());
1174
1175 utext_close(&ut1);
1176 utext_close(&ut2);
1177 }
1178 delete bi;
1179
1180 }
1181
1182 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1183 static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1184 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1185 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1186
1187 int32_t *pos = new int32_t[ustr.length()];
1188 int32_t posCount = 0;
1189
1190 // calculate breaks up front, so we can print out
1191 // sans any debugging
1192 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1193 pos[posCount++] = n;
1194 if(posCount>=ustr.length()) {
1195 it.errln("brk count exceeds string length!");
1196 return;
1197 }
1198 }
1199 UnicodeString out;
1200 out.append((UChar)CHSTR);
1201 int32_t prev = 0;
1202 for(int32_t i=0;i<posCount;i++) {
1203 int32_t n=pos[i];
1204 out.append(ustr.tempSubString(prev,n-prev));
1205 out.append((UChar)PILCROW);
1206 prev=n;
1207 }
1208 out.append(ustr.tempSubString(prev,ustr.length()-prev));
1209 out.append((UChar)CHEND);
1210 it.logln(out);
1211
1212 out.remove();
1213 for(int32_t i=0;i<posCount;i++) {
1214 char tmp[100];
1215 sprintf(tmp,"%d ",pos[i]);
1216 out.append(UnicodeString(tmp));
1217 }
1218 it.logln(out);
1219 delete [] pos;
1220 }
1221 #endif
1222
1223 void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
1224 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION
1225 UErrorCode status = U_ZERO_ERROR;
1226 LocalPointer<FilteredBreakIteratorBuilder> builder;
1227 LocalPointer<BreakIterator> baseBI;
1228 LocalPointer<BreakIterator> filteredBI;
1229 LocalPointer<BreakIterator> frenchBI;
1230
1231 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1232 const UnicodeString ABBR_MR("Mr.");
1233 const UnicodeString ABBR_CAPT("Capt.");
1234
1235 {
1236 logln("Constructing empty builder\n");
1237 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1238 TEST_ASSERT_SUCCESS(status);
1239
1240 logln("Constructing base BI\n");
1241 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1242 TEST_ASSERT_SUCCESS(status);
1243
1244 logln("Building new BI\n");
1245 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1246 TEST_ASSERT_SUCCESS(status);
1247
1248 if (U_SUCCESS(status)) {
1249 logln("Testing:");
1250 filteredBI->setText(text);
1251 TEST_ASSERT(20 == filteredBI->next()); // Mr.
1252 TEST_ASSERT(84 == filteredBI->next()); // recovered.
1253 TEST_ASSERT(90 == filteredBI->next()); // Capt.
1254 TEST_ASSERT(181 == filteredBI->next()); // Mr.
1255 TEST_ASSERT(278 == filteredBI->next()); // charge.
1256 filteredBI->first();
1257 prtbrks(filteredBI.getAlias(), text, *this);
1258 }
1259 }
1260
1261 {
1262 logln("Constructing empty builder\n");
1263 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1264 TEST_ASSERT_SUCCESS(status);
1265
1266 if (U_SUCCESS(status)) {
1267 logln("Adding Mr. as an exception\n");
1268 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1269 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1270 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1271 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1272 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1273 TEST_ASSERT_SUCCESS(status);
1274
1275 logln("Constructing base BI\n");
1276 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1277 TEST_ASSERT_SUCCESS(status);
1278
1279 logln("Building new BI\n");
1280 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1281 TEST_ASSERT_SUCCESS(status);
1282
1283 logln("Testing:");
1284 filteredBI->setText(text);
1285 TEST_ASSERT(84 == filteredBI->next());
1286 TEST_ASSERT(90 == filteredBI->next());// Capt.
1287 TEST_ASSERT(278 == filteredBI->next());
1288 filteredBI->first();
1289 prtbrks(filteredBI.getAlias(), text, *this);
1290 }
1291 }
1292
1293
1294 {
1295 logln("Constructing empty builder\n");
1296 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1297 TEST_ASSERT_SUCCESS(status);
1298
1299 if (U_SUCCESS(status)) {
1300 logln("Adding Mr. and Capt as an exception\n");
1301 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1302 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1303 TEST_ASSERT_SUCCESS(status);
1304
1305 logln("Constructing base BI\n");
1306 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1307 TEST_ASSERT_SUCCESS(status);
1308
1309 logln("Building new BI\n");
1310 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1311 TEST_ASSERT_SUCCESS(status);
1312
1313 logln("Testing:");
1314 filteredBI->setText(text);
1315 TEST_ASSERT(84 == filteredBI->next());
1316 TEST_ASSERT(278 == filteredBI->next());
1317 filteredBI->first();
1318 prtbrks(filteredBI.getAlias(), text, *this);
1319 }
1320 }
1321
1322
1323 {
1324 logln("Constructing English builder\n");
1325 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1326 TEST_ASSERT_SUCCESS(status);
1327
1328 logln("Constructing base BI\n");
1329 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1330 TEST_ASSERT_SUCCESS(status);
1331
1332 if (U_SUCCESS(status)) {
1333 logln("unsuppressing 'Capt'");
1334 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
1335
1336 logln("Building new BI\n");
1337 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1338 TEST_ASSERT_SUCCESS(status);
1339
1340 if(filteredBI.isValid()) {
1341 logln("Testing:");
1342 filteredBI->setText(text);
1343 TEST_ASSERT(84 == filteredBI->next());
1344 TEST_ASSERT(90 == filteredBI->next());
1345 TEST_ASSERT(278 == filteredBI->next());
1346 filteredBI->first();
1347 prtbrks(filteredBI.getAlias(), text, *this);
1348 }
1349 }
1350 }
1351
1352
1353 {
1354 logln("Constructing English builder\n");
1355 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1356 TEST_ASSERT_SUCCESS(status);
1357
1358 logln("Constructing base BI\n");
1359 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1360 TEST_ASSERT_SUCCESS(status);
1361
1362 if (U_SUCCESS(status)) {
1363 logln("Building new BI\n");
1364 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1365 TEST_ASSERT_SUCCESS(status);
1366
1367 if(filteredBI.isValid()) {
1368 logln("Testing:");
1369 filteredBI->setText(text);
1370 TEST_ASSERT(84 == filteredBI->next());
1371 TEST_ASSERT(278 == filteredBI->next());
1372 filteredBI->first();
1373 prtbrks(filteredBI.getAlias(), text, *this);
1374 }
1375 }
1376 }
1377
1378 // reenable once french is in
1379 {
1380 logln("Constructing French builder");
1381 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1382 TEST_ASSERT_SUCCESS(status);
1383
1384 logln("Constructing base BI\n");
1385 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1386 TEST_ASSERT_SUCCESS(status);
1387
1388 if (U_SUCCESS(status)) {
1389 logln("Building new BI\n");
1390 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1391 TEST_ASSERT_SUCCESS(status);
1392 }
1393
1394 if(frenchBI.isValid()) {
1395 logln("Testing:");
1396 UnicodeString frText("C'est MM. Duval.");
1397 frenchBI->setText(frText);
1398 TEST_ASSERT(16 == frenchBI->next());
1399 TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1400 frenchBI->first();
1401 prtbrks(frenchBI.getAlias(), frText, *this);
1402 logln("Testing against English:");
1403 filteredBI->setText(frText);
1404 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1405 TEST_ASSERT(16 == filteredBI->next());
1406 TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
1407 filteredBI->first();
1408 prtbrks(filteredBI.getAlias(), frText, *this);
1409
1410 // Verify ==
1411 TEST_ASSERT_TRUE(*frenchBI == *frenchBI);
1412 TEST_ASSERT_TRUE(*filteredBI != *frenchBI);
1413 TEST_ASSERT_TRUE(*frenchBI != *filteredBI);
1414 } else {
1415 dataerrln("French BI: not valid.");
1416 }
1417 }
1418
1419 #else
1420 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
1421 #endif
1422 }
1423
1424 //---------------------------------------------
1425 // runIndexedTest
1426 //---------------------------------------------
1427
1428 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1429 {
1430 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1431 switch (index) {
1432 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1433 #if !UCONFIG_NO_FILE_IO
1434 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1435 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1436 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1437 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1438 case 4: name = "TestIteration"; if (exec) TestIteration(); break;
1439 #else
1440 case 0: case 1: case 2: case 3: case 4: name = "skip"; break;
1441 #endif
1442 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1443 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1444 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1445 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break;
1446 #if !UCONFIG_NO_FILE_IO
1447 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break;
1448 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1449 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1450 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1451 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
1452 #else
1453 case 9: case 10: case 11: case 12: case 13: name = "skip"; break;
1454 #endif
1455 case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break;
1456
1457 #if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING
1458 case 15: name = "TestFilteredBreakIteratorBuilder"; if(exec) TestFilteredBreakIteratorBuilder(); break;
1459 #else
1460 case 15: name="skip"; break;
1461 #endif
1462 default: name = ""; break; // needed to end loop
1463 }
1464 }
1465
1466 //---------------------------------------------
1467 //Internal subroutines
1468 //---------------------------------------------
1469
1470 void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1471 logln((UnicodeString)"testIsBoundary():");
1472 int32_t p = 0;
1473 UBool isB;
1474 for (int32_t i = 0; i < text.length(); i++) {
1475 isB = bi.isBoundary(i);
1476 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1477
1478 if (i == boundaries[p]) {
1479 if (!isB)
1480 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1481 p++;
1482 }
1483 else {
1484 if (isB)
1485 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1486 }
1487 }
1488 }
1489 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1490 UnicodeString selected;
1491 UnicodeString expected=CharsToUnicodeString(expectedString);
1492
1493 if(gotoffset != expectedOffset)
1494 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1495 if(start <= gotoffset){
1496 testString.extractBetween(start, gotoffset, selected);
1497 }
1498 else{
1499 testString.extractBetween(gotoffset, start, selected);
1500 }
1501 if(selected.compare(expected) != 0)
1502 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1503 else
1504 logln(prettify("****selected \"" + selected + "\""));
1505 }
1506
1507 //---------------------------------------------
1508 //RBBIWithProtectedFunctions class functions
1509 //---------------------------------------------
1510
1511 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
1512 : RuleBasedBreakIterator(data, status)
1513 {
1514 }
1515
1516 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
1517 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
1518 {
1519 }
1520
1521 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */