]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/rbbiapts.cpp
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbiapts.cpp
1 /********************************************************************
2 * Copyright (c) 1999-2010, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 * Date Name Description
6 * 12/14/99 Madhu Creation.
7 * 01/12/2000 Madhu updated for changed API
8 ********************************************************************/
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/uchar.h"
15 #include "intltest.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/schriter.h"
18 #include "rbbiapts.h"
19 #include "rbbidata.h"
20 #include "cstring.h"
21 #include "ubrkimpl.h"
22 #include "unicode/ustring.h"
23 #include "unicode/utext.h"
24 #include "cmemory.h"
25
26 /**
27 * API Test the RuleBasedBreakIterator class
28 */
29
30
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
32 errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
33
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
36
37 void RBBIAPITest::TestCloneEquals()
38 {
39
40 UErrorCode status=U_ZERO_ERROR;
41 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
42 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
43 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
44 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
45 if(U_FAILURE(status)){
46 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
47 return;
48 }
49
50
51 UnicodeString testString="Testing word break iterators's clone() and equals()";
52 bi1->setText(testString);
53 bi2->setText(testString);
54 biequal->setText(testString);
55
56 bi3->setText("hello");
57
58 logln((UnicodeString)"Testing equals()");
59
60 logln((UnicodeString)"Testing == and !=");
61 UBool b = (*bi1 != *biequal);
62 b |= *bi1 == *bi2;
63 b |= *bi1 == *bi3;
64 if (b) {
65 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
66 }
67
68 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
69 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
70
71
72 // Quick test of RulesBasedBreakIterator assignment -
73 // Check that
74 // two different iterators are !=
75 // they are == after assignment
76 // source and dest iterator produce the same next() after assignment.
77 // deleting one doesn't disable the other.
78 logln("Testing assignment");
79 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
80 if(U_FAILURE(status)){
81 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
82 return;
83 }
84
85 RuleBasedBreakIterator biDefault, biDefault2;
86 if(U_FAILURE(status)){
87 errln((UnicodeString)"FAIL : in construction of default iterator");
88 return;
89 }
90 if (biDefault == *bix) {
91 errln((UnicodeString)"ERROR: iterators should not compare ==");
92 return;
93 }
94 if (biDefault != biDefault2) {
95 errln((UnicodeString)"ERROR: iterators should compare ==");
96 return;
97 }
98
99
100 UnicodeString HelloString("Hello Kitty");
101 bix->setText(HelloString);
102 if (*bix == *bi2) {
103 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
104 }
105 *bix = *bi2;
106 if (*bix != *bi2) {
107 errln(UnicodeString("ERROR: strings should be equal before assignment."));
108 }
109
110 int bixnext = bix->next();
111 int bi2next = bi2->next();
112 if (! (bixnext == bi2next && bixnext == 7)) {
113 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
114 }
115 delete bix;
116 if (bi2->next() != 8) {
117 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
118 }
119
120
121
122 logln((UnicodeString)"Testing clone()");
123 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
124 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
125
126 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
127 *bi1clone == *bi3 || *bi1clone == *bi2)
128 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
129
130 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
131 *bi2clone == *bi3 || *bi2clone != *bi2)
132 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
133
134 if(bi1->getText() != bi1clone->getText() ||
135 bi2clone->getText() != bi2->getText() ||
136 *bi2clone == *bi1clone )
137 errln((UnicodeString)"ERROR: RBBI's clone() method failed");
138
139 delete bi1clone;
140 delete bi2clone;
141 delete bi1;
142 delete bi3;
143 delete bi2;
144 delete biequal;
145 }
146
147 void RBBIAPITest::TestBoilerPlate()
148 {
149 UErrorCode status = U_ZERO_ERROR;
150 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
151 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
152 if (U_FAILURE(status)) {
153 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
154 return;
155 }
156 if(*a!=*b){
157 errln("Failed: boilerplate method operator!= does not return correct results");
158 }
159 BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status);
160 if(a && c){
161 if(*c==*a){
162 errln("Failed: boilerplate method opertator== does not return correct results");
163 }
164 }else{
165 errln("creation of break iterator failed");
166 }
167 delete a;
168 delete b;
169 delete c;
170 }
171
172 void RBBIAPITest::TestgetRules()
173 {
174 UErrorCode status=U_ZERO_ERROR;
175
176 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
177 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
178 if(U_FAILURE(status)){
179 errcheckln(status, "FAIL: in construction - %s", u_errorName(status));
180 delete bi1;
181 delete bi2;
182 return;
183 }
184
185
186
187 logln((UnicodeString)"Testing toString()");
188
189 bi1->setText((UnicodeString)"Hello there");
190
191 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
192
193 UnicodeString temp=bi1->getRules();
194 UnicodeString temp2=bi2->getRules();
195 UnicodeString temp3=bi3->getRules();
196 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
197 errln((UnicodeString)"ERROR: error in getRules() method");
198
199 delete bi1;
200 delete bi2;
201 delete bi3;
202 }
203 void RBBIAPITest::TestHashCode()
204 {
205 UErrorCode status=U_ZERO_ERROR;
206 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
207 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
208 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
209 if(U_FAILURE(status)){
210 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
211 delete bi1;
212 delete bi2;
213 delete bi3;
214 return;
215 }
216
217
218 logln((UnicodeString)"Testing hashCode()");
219
220 bi1->setText((UnicodeString)"Hash code");
221 bi2->setText((UnicodeString)"Hash code");
222 bi3->setText((UnicodeString)"Hash code");
223
224 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
225 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
226
227 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
228 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
229 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
230
231 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
232 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
233 errln((UnicodeString)"ERROR: different objects have same hashcodes");
234
235 delete bi1clone;
236 delete bi2clone;
237 delete bi1;
238 delete bi2;
239 delete bi3;
240
241 }
242 void RBBIAPITest::TestGetSetAdoptText()
243 {
244 logln((UnicodeString)"Testing getText setText ");
245 IcuTestErrorCode status(*this, "TestGetSetAdoptText");
246 UnicodeString str1="first string.";
247 UnicodeString str2="Second string.";
248 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
249 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
250 if(status.isFailure()){
251 errcheckln(status, "Fail : in construction - %s", status.errorName());
252 return;
253 }
254
255
256 CharacterIterator* text1= new StringCharacterIterator(str1);
257 CharacterIterator* text1Clone = text1->clone();
258 CharacterIterator* text2= new StringCharacterIterator(str2);
259 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
260
261 wordIter1->setText(str1);
262 CharacterIterator *tci = &wordIter1->getText();
263 UnicodeString tstr;
264 tci->getText(tstr);
265 TEST_ASSERT(tstr == str1);
266 if(wordIter1->current() != 0)
267 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
268
269 wordIter1->next(2);
270
271 wordIter1->setText(str2);
272 if(wordIter1->current() != 0)
273 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
274
275
276 charIter1->adoptText(text1Clone);
277 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
278 tci = &wordIter1->getText();
279 tci->getText(tstr);
280 TEST_ASSERT(tstr == str2);
281 tci = &charIter1->getText();
282 tci->getText(tstr);
283 TEST_ASSERT(tstr == str1);
284
285
286 LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone());
287 rb->adoptText(text1);
288 if(rb->getText() != *text1)
289 errln((UnicodeString)"ERROR:1 error in adoptText ");
290 rb->adoptText(text2);
291 if(rb->getText() != *text2)
292 errln((UnicodeString)"ERROR:2 error in adoptText ");
293
294 // Adopt where iterator range is less than the entire orignal source string.
295 // (With the change of the break engine to working with UText internally,
296 // CharacterIterators starting at positions other than zero are not supported)
297 rb->adoptText(text3);
298 TEST_ASSERT(rb->preceding(2) == 0);
299 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
300 //if(rb->preceding(2) != 3) {
301 // errln((UnicodeString)"ERROR:3 error in adoptText ");
302 //}
303 //if(rb->following(11) != BreakIterator::DONE) {
304 // errln((UnicodeString)"ERROR:4 error in adoptText ");
305 //}
306
307 // UText API
308 //
309 // Quick test to see if UText is working at all.
310 //
311 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
312 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
313 // 012345678901
314
315 status.reset();
316 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
317 wordIter1->setText(ut.getAlias(), status);
318 TEST_ASSERT_SUCCESS(status);
319
320 int32_t pos;
321 pos = wordIter1->first();
322 TEST_ASSERT(pos==0);
323 pos = wordIter1->next();
324 TEST_ASSERT(pos==5);
325 pos = wordIter1->next();
326 TEST_ASSERT(pos==6);
327 pos = wordIter1->next();
328 TEST_ASSERT(pos==11);
329 pos = wordIter1->next();
330 TEST_ASSERT(pos==UBRK_DONE);
331
332 status.reset();
333 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
334 TEST_ASSERT_SUCCESS(status);
335 wordIter1->setText(ut2.getAlias(), status);
336 TEST_ASSERT_SUCCESS(status);
337
338 pos = wordIter1->first();
339 TEST_ASSERT(pos==0);
340 pos = wordIter1->next();
341 TEST_ASSERT(pos==3);
342 pos = wordIter1->next();
343 TEST_ASSERT(pos==4);
344
345 pos = wordIter1->last();
346 TEST_ASSERT(pos==6);
347 pos = wordIter1->previous();
348 TEST_ASSERT(pos==4);
349 pos = wordIter1->previous();
350 TEST_ASSERT(pos==3);
351 pos = wordIter1->previous();
352 TEST_ASSERT(pos==0);
353 pos = wordIter1->previous();
354 TEST_ASSERT(pos==UBRK_DONE);
355
356 status.reset();
357 UnicodeString sEmpty;
358 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
359 wordIter1->getUText(gut2.getAlias(), status);
360 TEST_ASSERT_SUCCESS(status);
361 status.reset();
362 }
363
364
365 void RBBIAPITest::TestIteration()
366 {
367 // This test just verifies that the API is present.
368 // Testing for correct operation of the break rules happens elsewhere.
369
370 UErrorCode status=U_ZERO_ERROR;
371 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
372 if (U_FAILURE(status) || bi == NULL) {
373 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
374 }
375 delete bi;
376
377 status=U_ZERO_ERROR;
378 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
379 if (U_FAILURE(status) || bi == NULL) {
380 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
381 }
382 delete bi;
383
384 status=U_ZERO_ERROR;
385 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
386 if (U_FAILURE(status) || bi == NULL) {
387 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
388 }
389 delete bi;
390
391 status=U_ZERO_ERROR;
392 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
393 if (U_FAILURE(status) || bi == NULL) {
394 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
395 }
396 delete bi;
397
398 status=U_ZERO_ERROR;
399 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
400 if (U_FAILURE(status) || bi == NULL) {
401 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status));
402 }
403 delete bi;
404
405 status=U_ZERO_ERROR;
406 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
407 if (U_FAILURE(status) || bi == NULL) {
408 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
409 return; // Skip the rest of these tests.
410 }
411
412
413 UnicodeString testString="0123456789";
414 bi->setText(testString);
415
416 int32_t i;
417 i = bi->first();
418 if (i != 0) {
419 errln("Incorrect value from bi->first(). Expected 0, got %d.", i);
420 }
421
422 i = bi->last();
423 if (i != 10) {
424 errln("Incorrect value from bi->last(). Expected 10, got %d", i);
425 }
426
427 //
428 // Previous
429 //
430 bi->last();
431 i = bi->previous();
432 if (i != 9) {
433 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i);
434 }
435
436
437 bi->first();
438 i = bi->previous();
439 if (i != BreakIterator::DONE) {
440 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i);
441 }
442
443 //
444 // next()
445 //
446 bi->first();
447 i = bi->next();
448 if (i != 1) {
449 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i);
450 }
451
452 bi->last();
453 i = bi->next();
454 if (i != BreakIterator::DONE) {
455 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i);
456 }
457
458
459 //
460 // current()
461 //
462 bi->first();
463 i = bi->current();
464 if (i != 0) {
465 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
466 }
467
468 bi->next();
469 i = bi->current();
470 if (i != 1) {
471 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i);
472 }
473
474 bi->last();
475 bi->next();
476 i = bi->current();
477 if (i != 10) {
478 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i);
479 }
480
481 bi->first();
482 bi->previous();
483 i = bi->current();
484 if (i != 0) {
485 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
486 }
487
488
489 //
490 // Following()
491 //
492 i = bi->following(4);
493 if (i != 5) {
494 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i);
495 }
496
497 i = bi->following(9);
498 if (i != 10) {
499 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i);
500 }
501
502 i = bi->following(10);
503 if (i != BreakIterator::DONE) {
504 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i);
505 }
506
507
508 //
509 // Preceding
510 //
511 i = bi->preceding(4);
512 if (i != 3) {
513 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i);
514 }
515
516 i = bi->preceding(10);
517 if (i != 9) {
518 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i);
519 }
520
521 i = bi->preceding(1);
522 if (i != 0) {
523 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i);
524 }
525
526 i = bi->preceding(0);
527 if (i != BreakIterator::DONE) {
528 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i);
529 }
530
531
532 //
533 // isBoundary()
534 //
535 bi->first();
536 if (bi->isBoundary(3) != TRUE) {
537 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i);
538 }
539 i = bi->current();
540 if (i != 3) {
541 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i);
542 }
543
544
545 if (bi->isBoundary(11) != FALSE) {
546 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i);
547 }
548 i = bi->current();
549 if (i != 10) {
550 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i);
551 }
552
553 //
554 // next(n)
555 //
556 bi->first();
557 i = bi->next(4);
558 if (i != 4) {
559 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i);
560 }
561
562 i = bi->next(6);
563 if (i != 10) {
564 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i);
565 }
566
567 bi->first();
568 i = bi->next(11);
569 if (i != BreakIterator::DONE) {
570 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i);
571 }
572
573 delete bi;
574
575 }
576
577
578
579
580
581
582 void RBBIAPITest::TestBuilder() {
583 UnicodeString rulesString1 = "$Letters = [:L:];\n"
584 "$Numbers = [:N:];\n"
585 "$Letters+;\n"
586 "$Numbers+;\n"
587 "[^$Letters $Numbers];\n"
588 "!.*;\n";
589 UnicodeString testString1 = "abc123..abc";
590 // 01234567890
591 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
592 UErrorCode status=U_ZERO_ERROR;
593 UParseError parseError;
594
595 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
596 if(U_FAILURE(status)) {
597 dataerrln("Fail : in construction - %s", u_errorName(status));
598 } else {
599 bi->setText(testString1);
600 doBoundaryTest(*bi, testString1, bounds1);
601 }
602 delete bi;
603 }
604
605
606 //
607 // TestQuoteGrouping
608 // Single quotes within rules imply a grouping, so that a modifier
609 // following the quoted text (* or +) applies to all of the quoted chars.
610 //
611 void RBBIAPITest::TestQuoteGrouping() {
612 UnicodeString rulesString1 = "#Here comes the rule...\n"
613 "'$@!'*;\n" // (\$\@\!)*
614 ".;\n";
615
616 UnicodeString testString1 = "$@!$@!X$@!!X";
617 // 0123456789012
618 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
619 UErrorCode status=U_ZERO_ERROR;
620 UParseError parseError;
621
622 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
623 if(U_FAILURE(status)) {
624 dataerrln("Fail : in construction - %s", u_errorName(status));
625 } else {
626 bi->setText(testString1);
627 doBoundaryTest(*bi, testString1, bounds1);
628 }
629 delete bi;
630 }
631
632 //
633 // TestRuleStatus
634 // Test word break rule status constants.
635 //
636 void RBBIAPITest::TestRuleStatus() {
637 UChar str[30];
638 u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
639 // 012345678901234567 8 9 0 1 2 3 4 5 6
640 // Ideographic Katakana Hiragana
641 str, 30);
642 UnicodeString testString1(str);
643 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
644 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
645 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
646 UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE,
647 UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA};
648
649 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
650 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
651 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT,
652 UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
653
654 UErrorCode status=U_ZERO_ERROR;
655
656 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
657 if(U_FAILURE(status)) {
658 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
659 } else {
660 bi->setText(testString1);
661 // First test that the breaks are in the right spots.
662 doBoundaryTest(*bi, testString1, bounds1);
663
664 // Then go back and check tag values
665 int32_t i = 0;
666 int32_t pos, tag;
667 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
668 if (pos != bounds1[i]) {
669 errln("FAIL: unexpected word break at postion %d", pos);
670 break;
671 }
672 tag = bi->getRuleStatus();
673 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
674 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
675 break;
676 }
677
678 // Check that we get the same tag values from getRuleStatusVec()
679 int32_t vec[10];
680 int t = bi->getRuleStatusVec(vec, 10, status);
681 TEST_ASSERT_SUCCESS(status);
682 TEST_ASSERT(t==1);
683 TEST_ASSERT(vec[0] == tag);
684 }
685 }
686 delete bi;
687
688 // Now test line break status. This test mostly is to confirm that the status constants
689 // are correctly declared in the header.
690 testString1 = "test line. \n";
691 // break type s s h
692
693 bi = (RuleBasedBreakIterator *)
694 BreakIterator::createLineInstance(Locale::getEnglish(), status);
695 if(U_FAILURE(status)) {
696 errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
697 } else {
698 int32_t i = 0;
699 int32_t pos, tag;
700 UBool success;
701
702 bi->setText(testString1);
703 pos = bi->current();
704 tag = bi->getRuleStatus();
705 for (i=0; i<3; i++) {
706 switch (i) {
707 case 0:
708 success = pos==0 && tag==UBRK_LINE_SOFT; break;
709 case 1:
710 success = pos==5 && tag==UBRK_LINE_SOFT; break;
711 case 2:
712 success = pos==12 && tag==UBRK_LINE_HARD; break;
713 default:
714 success = FALSE; break;
715 }
716 if (success == FALSE) {
717 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
718 i, pos, tag);
719 break;
720 }
721 pos = bi->next();
722 tag = bi->getRuleStatus();
723 }
724 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
725 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
726 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
727 errln("UBRK_LINE_* constants from header are inconsistent.");
728 }
729 }
730 delete bi;
731
732 }
733
734
735 //
736 // TestRuleStatusVec
737 // Test the vector form of break rule status.
738 //
739 void RBBIAPITest::TestRuleStatusVec() {
740 UnicodeString rulesString( "[A-N]{100}; \n"
741 "[a-w]{200}; \n"
742 "[\\p{L}]{300}; \n"
743 "[\\p{N}]{400}; \n"
744 "[0-5]{500}; \n"
745 "!.*;\n", -1, US_INV);
746 UnicodeString testString1 = "Aapz5?";
747 int32_t statusVals[10];
748 int32_t numStatuses;
749 int32_t pos;
750
751 UErrorCode status=U_ZERO_ERROR;
752 UParseError parseError;
753
754 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
755 if (U_FAILURE(status)) {
756 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
757 } else {
758 bi->setText(testString1);
759
760 // A
761 pos = bi->next();
762 TEST_ASSERT(pos==1);
763 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
764 TEST_ASSERT_SUCCESS(status);
765 TEST_ASSERT(numStatuses == 2);
766 TEST_ASSERT(statusVals[0] == 100);
767 TEST_ASSERT(statusVals[1] == 300);
768
769 // a
770 pos = bi->next();
771 TEST_ASSERT(pos==2);
772 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
773 TEST_ASSERT_SUCCESS(status);
774 TEST_ASSERT(numStatuses == 2);
775 TEST_ASSERT(statusVals[0] == 200);
776 TEST_ASSERT(statusVals[1] == 300);
777
778 // p
779 pos = bi->next();
780 TEST_ASSERT(pos==3);
781 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
782 TEST_ASSERT_SUCCESS(status);
783 TEST_ASSERT(numStatuses == 2);
784 TEST_ASSERT(statusVals[0] == 200);
785 TEST_ASSERT(statusVals[1] == 300);
786
787 // z
788 pos = bi->next();
789 TEST_ASSERT(pos==4);
790 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
791 TEST_ASSERT_SUCCESS(status);
792 TEST_ASSERT(numStatuses == 1);
793 TEST_ASSERT(statusVals[0] == 300);
794
795 // 5
796 pos = bi->next();
797 TEST_ASSERT(pos==5);
798 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
799 TEST_ASSERT_SUCCESS(status);
800 TEST_ASSERT(numStatuses == 2);
801 TEST_ASSERT(statusVals[0] == 400);
802 TEST_ASSERT(statusVals[1] == 500);
803
804 // ?
805 pos = bi->next();
806 TEST_ASSERT(pos==6);
807 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
808 TEST_ASSERT_SUCCESS(status);
809 TEST_ASSERT(numStatuses == 1);
810 TEST_ASSERT(statusVals[0] == 0);
811
812 //
813 // Check buffer overflow error handling. Char == A
814 //
815 bi->first();
816 pos = bi->next();
817 TEST_ASSERT(pos==1);
818 memset(statusVals, -1, sizeof(statusVals));
819 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
820 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
821 TEST_ASSERT(numStatuses == 2);
822 TEST_ASSERT(statusVals[0] == -1);
823
824 status = U_ZERO_ERROR;
825 memset(statusVals, -1, sizeof(statusVals));
826 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
828 TEST_ASSERT(numStatuses == 2);
829 TEST_ASSERT(statusVals[0] == 100);
830 TEST_ASSERT(statusVals[1] == -1);
831
832 status = U_ZERO_ERROR;
833 memset(statusVals, -1, sizeof(statusVals));
834 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
835 TEST_ASSERT_SUCCESS(status);
836 TEST_ASSERT(numStatuses == 2);
837 TEST_ASSERT(statusVals[0] == 100);
838 TEST_ASSERT(statusVals[1] == 300);
839 TEST_ASSERT(statusVals[2] == -1);
840 }
841 delete bi;
842
843 }
844
845 //
846 // Bug 2190 Regression test. Builder crash on rule consisting of only a
847 // $variable reference
848 void RBBIAPITest::TestBug2190() {
849 UnicodeString rulesString1 = "$aaa = abcd;\n"
850 "$bbb = $aaa;\n"
851 "$bbb;\n";
852 UnicodeString testString1 = "abcdabcd";
853 // 01234567890
854 int32_t bounds1[] = {0, 4, 8};
855 UErrorCode status=U_ZERO_ERROR;
856 UParseError parseError;
857
858 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
859 if(U_FAILURE(status)) {
860 dataerrln("Fail : in construction - %s", u_errorName(status));
861 } else {
862 bi->setText(testString1);
863 doBoundaryTest(*bi, testString1, bounds1);
864 }
865 delete bi;
866 }
867
868
869 void RBBIAPITest::TestRegistration() {
870 #if !UCONFIG_NO_SERVICE
871 UErrorCode status = U_ZERO_ERROR;
872 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
873
874 // ok to not delete these if we exit because of error?
875 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
876 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
877 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
878
879 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
880 dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
881 delete ja_word;
882 delete ja_char;
883 delete root_word;
884 delete root_char;
885
886 return;
887 }
888
889 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
890 {
891 if (ja_word && *ja_word == *root_word) {
892 errln("japan not different from root");
893 }
894 }
895
896 {
897 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
898 UBool fail = TRUE;
899 if(result){
900 fail = *result != *ja_word;
901 }
902 delete result;
903 if (fail) {
904 errln("bad result for xx_XX/word");
905 }
906 }
907
908 {
909 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
910 UBool fail = TRUE;
911 if(result){
912 fail = *result != *ja_char;
913 }
914 delete result;
915 if (fail) {
916 errln("bad result for ja_JP/char");
917 }
918 }
919
920 {
921 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
922 UBool fail = TRUE;
923 if(result){
924 fail = *result != *root_char;
925 }
926 delete result;
927 if (fail) {
928 errln("bad result for xx_XX/char");
929 }
930 }
931
932 {
933 StringEnumeration* avail = BreakIterator::getAvailableLocales();
934 UBool found = FALSE;
935 const UnicodeString* p;
936 while ((p = avail->snext(status))) {
937 if (p->compare("xx") == 0) {
938 found = TRUE;
939 break;
940 }
941 }
942 delete avail;
943 if (!found) {
944 errln("did not find test locale");
945 }
946 }
947
948 {
949 UBool unreg = BreakIterator::unregister(key, status);
950 if (!unreg) {
951 errln("unable to unregister");
952 }
953 }
954
955 {
956 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
957 BreakIterator* root = BreakIterator::createWordInstance("", status);
958 UBool fail = TRUE;
959 if(root){
960 fail = *root != *result;
961 }
962 delete root;
963 delete result;
964 if (fail) {
965 errln("did not get root break");
966 }
967 }
968
969 {
970 StringEnumeration* avail = BreakIterator::getAvailableLocales();
971 UBool found = FALSE;
972 const UnicodeString* p;
973 while ((p = avail->snext(status))) {
974 if (p->compare("xx") == 0) {
975 found = TRUE;
976 break;
977 }
978 }
979 delete avail;
980 if (found) {
981 errln("found test locale");
982 }
983 }
984
985 {
986 int32_t count;
987 UBool foundLocale = FALSE;
988 const Locale *avail = BreakIterator::getAvailableLocales(count);
989 for (int i=0; i<count; i++) {
990 if (avail[i] == Locale::getEnglish()) {
991 foundLocale = TRUE;
992 break;
993 }
994 }
995 if (foundLocale == FALSE) {
996 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
997 }
998 }
999
1000
1001 // ja_word was adopted by factory
1002 delete ja_char;
1003 delete root_word;
1004 delete root_char;
1005 #endif
1006 }
1007
1008 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1009 UErrorCode status = U_ZERO_ERROR;
1010 UParseError parseError;
1011 parseError.line = 0;
1012 parseError.offset = 0;
1013 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
1014 uint32_t length;
1015 const UChar *builtSource;
1016 const uint8_t *rbbiRules;
1017 const uint8_t *builtRules;
1018
1019 if (U_FAILURE(status)) {
1020 errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
1021 return;
1022 }
1023
1024 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
1025 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1026 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1027 if (U_FAILURE(status)) {
1028 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1029 u_errorName(status), parseError.line, parseError.offset);
1030 return;
1031 };
1032 rbbiRules = brkItr->getBinaryRules(length);
1033 logln("Comparing \"%s\" len=%d", dataFile, length);
1034 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1035 errln("Built rules and rebuilt rules are different %s", dataFile);
1036 return;
1037 }
1038 delete brkItr;
1039 }
1040
1041 void RBBIAPITest::TestRoundtripRules() {
1042 RoundtripRule("word");
1043 RoundtripRule("title");
1044 RoundtripRule("sent");
1045 RoundtripRule("line");
1046 RoundtripRule("char");
1047 if (!quick) {
1048 RoundtripRule("word_ja");
1049 RoundtripRule("word_POSIX");
1050 }
1051 }
1052
1053 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1054 // (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1055 // This is just a sanity check, not a thorough test (e.g. we don't check that the
1056 // first delete actually frees rulesCopy).
1057 void RBBIAPITest::TestCreateFromRBBIData() {
1058 // Get some handy RBBIData
1059 const char *brkName = "word"; // or "sent", "line", "char", etc.
1060 UErrorCode status = U_ZERO_ERROR;
1061 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status));
1062 if ( U_SUCCESS(status) ) {
1063 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias());
1064 uint32_t length = builtRules->fLength;
1065 RBBIWithProtectedFunctions * brkItr;
1066
1067 // Try the memory-adopting constructor, need to copy the data first
1068 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
1069 if ( rulesCopy ) {
1070 uprv_memcpy( rulesCopy, builtRules, length );
1071
1072 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
1073 if ( U_SUCCESS(status) ) {
1074 delete brkItr; // this should free rulesCopy
1075 } else {
1076 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
1077 status = U_ZERO_ERROR;// reset for the next test
1078 uprv_free( rulesCopy );
1079 }
1080 }
1081
1082 // Now try the non-adopting constructor
1083 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
1084 if ( U_SUCCESS(status) ) {
1085 delete brkItr; // this should NOT attempt to free builtRules
1086 if (builtRules->fLength != length) { // sanity check
1087 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1088 }
1089 } else {
1090 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
1091 }
1092 }
1093 }
1094
1095 //---------------------------------------------
1096 // runIndexedTest
1097 //---------------------------------------------
1098
1099 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1100 {
1101 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1102 switch (index) {
1103 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1104 #if !UCONFIG_NO_FILE_IO
1105 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1106 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1107 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1108 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1109 case 4: name = "TestIteration"; if (exec) TestIteration(); break;
1110 #else
1111 case 0: case 1: case 2: case 3: case 4: name = "skip"; break;
1112 #endif
1113 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1114 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1115 case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1116 case 8: name = "TestBug2190"; if (exec) TestBug2190(); break;
1117 #if !UCONFIG_NO_FILE_IO
1118 case 9: name = "TestRegistration"; if (exec) TestRegistration(); break;
1119 case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1120 case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1121 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1122 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
1123 #else
1124 case 9: case 10: case 11: case 12: case 13: name = "skip"; break;
1125 #endif
1126
1127 default: name = ""; break; // needed to end loop
1128 }
1129 }
1130
1131 //---------------------------------------------
1132 //Internal subroutines
1133 //---------------------------------------------
1134
1135 void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1136 logln((UnicodeString)"testIsBoundary():");
1137 int32_t p = 0;
1138 UBool isB;
1139 for (int32_t i = 0; i < text.length(); i++) {
1140 isB = bi.isBoundary(i);
1141 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1142
1143 if (i == boundaries[p]) {
1144 if (!isB)
1145 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1146 p++;
1147 }
1148 else {
1149 if (isB)
1150 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1151 }
1152 }
1153 }
1154 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1155 UnicodeString selected;
1156 UnicodeString expected=CharsToUnicodeString(expectedString);
1157
1158 if(gotoffset != expectedOffset)
1159 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1160 if(start <= gotoffset){
1161 testString.extractBetween(start, gotoffset, selected);
1162 }
1163 else{
1164 testString.extractBetween(gotoffset, start, selected);
1165 }
1166 if(selected.compare(expected) != 0)
1167 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1168 else
1169 logln(prettify("****selected \"" + selected + "\""));
1170 }
1171
1172 //---------------------------------------------
1173 //RBBIWithProtectedFunctions class functions
1174 //---------------------------------------------
1175
1176 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
1177 : RuleBasedBreakIterator(data, status)
1178 {
1179 }
1180
1181 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
1182 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
1183 {
1184 }
1185
1186 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */