]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/rbbiapts.cpp
ICU-400.39.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbiapts.cpp
1 /********************************************************************
2 * Copyright (c) 1999-2008, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 * Date Name Description
6 * 12/14/99 Madhu Creation.
7 * 01/12/2000 Madhu updated for changed API
8 ********************************************************************/
9
10 #include "unicode/utypes.h"
11
12 #if !UCONFIG_NO_BREAK_ITERATION
13
14 #include "unicode/uchar.h"
15 #include "intltest.h"
16 #include "unicode/rbbi.h"
17 #include "unicode/schriter.h"
18 #include "rbbiapts.h"
19 #include "rbbidata.h"
20 #include "cstring.h"
21 #include "ubrkimpl.h"
22 #include "unicode/ustring.h"
23 #include "unicode/utext.h"
24 #include "cmemory.h"
25
26 /**
27 * API Test the RuleBasedBreakIterator class
28 */
29
30
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
32 errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
33
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
36
37 void RBBIAPITest::TestCloneEquals()
38 {
39
40 UErrorCode status=U_ZERO_ERROR;
41 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
42 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
43 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
44 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
45 if(U_FAILURE(status)){
46 errln((UnicodeString)"FAIL : in construction");
47 return;
48 }
49
50
51 UnicodeString testString="Testing word break iterators's clone() and equals()";
52 bi1->setText(testString);
53 bi2->setText(testString);
54 biequal->setText(testString);
55
56 bi3->setText("hello");
57
58 logln((UnicodeString)"Testing equals()");
59
60 logln((UnicodeString)"Testing == and !=");
61 UBool b = (*bi1 != *biequal);
62 b |= *bi1 == *bi2;
63 b |= *bi1 == *bi3;
64 if (b) {
65 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
66 }
67
68 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
69 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
70
71
72 // Quick test of RulesBasedBreakIterator assignment -
73 // Check that
74 // two different iterators are !=
75 // they are == after assignment
76 // source and dest iterator produce the same next() after assignment.
77 // deleting one doesn't disable the other.
78 logln("Testing assignment");
79 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
80 if(U_FAILURE(status)){
81 errln((UnicodeString)"FAIL : in construction");
82 return;
83 }
84
85 RuleBasedBreakIterator biDefault, biDefault2;
86 if(U_FAILURE(status)){
87 errln((UnicodeString)"FAIL : in construction of default iterator");
88 return;
89 }
90 if (biDefault == *bix) {
91 errln((UnicodeString)"ERROR: iterators should not compare ==");
92 return;
93 }
94 if (biDefault != biDefault2) {
95 errln((UnicodeString)"ERROR: iterators should compare ==");
96 return;
97 }
98
99
100 UnicodeString HelloString("Hello Kitty");
101 bix->setText(HelloString);
102 if (*bix == *bi2) {
103 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
104 }
105 *bix = *bi2;
106 if (*bix != *bi2) {
107 errln(UnicodeString("ERROR: strings should be equal before assignment."));
108 }
109
110 int bixnext = bix->next();
111 int bi2next = bi2->next();
112 if (! (bixnext == bi2next && bixnext == 7)) {
113 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
114 }
115 delete bix;
116 if (bi2->next() != 8) {
117 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
118 }
119
120
121
122 logln((UnicodeString)"Testing clone()");
123 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
124 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
125
126 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
127 *bi1clone == *bi3 || *bi1clone == *bi2)
128 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
129
130 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
131 *bi2clone == *bi3 || *bi2clone != *bi2)
132 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
133
134 if(bi1->getText() != bi1clone->getText() ||
135 bi2clone->getText() != bi2->getText() ||
136 *bi2clone == *bi1clone )
137 errln((UnicodeString)"ERROR: RBBI's clone() method failed");
138
139 delete bi1clone;
140 delete bi2clone;
141 delete bi1;
142 delete bi3;
143 delete bi2;
144 delete biequal;
145 }
146
147 void RBBIAPITest::TestBoilerPlate()
148 {
149 UErrorCode status = U_ZERO_ERROR;
150 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
151 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
152 if (U_FAILURE(status)) {
153 errln("Creation of break iterator failed %s", u_errorName(status));
154 return;
155 }
156 if(*a!=*b){
157 errln("Failed: boilerplate method operator!= does not return correct results");
158 }
159 BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status);
160 if(a && c){
161 if(*c==*a){
162 errln("Failed: boilerplate method opertator== does not return correct results");
163 }
164 }else{
165 errln("creation of break iterator failed");
166 }
167 delete a;
168 delete b;
169 delete c;
170 }
171
172 void RBBIAPITest::TestgetRules()
173 {
174 UErrorCode status=U_ZERO_ERROR;
175
176 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
177 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
178 if(U_FAILURE(status)){
179 errln((UnicodeString)"FAIL: in construction");
180 delete bi1;
181 delete bi2;
182 return;
183 }
184
185
186
187 logln((UnicodeString)"Testing toString()");
188
189 bi1->setText((UnicodeString)"Hello there");
190
191 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
192
193 UnicodeString temp=bi1->getRules();
194 UnicodeString temp2=bi2->getRules();
195 UnicodeString temp3=bi3->getRules();
196 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
197 errln((UnicodeString)"ERROR: error in getRules() method");
198
199 delete bi1;
200 delete bi2;
201 delete bi3;
202 }
203 void RBBIAPITest::TestHashCode()
204 {
205 UErrorCode status=U_ZERO_ERROR;
206 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
207 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
208 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
209 if(U_FAILURE(status)){
210 errln((UnicodeString)"FAIL : in construction");
211 delete bi1;
212 delete bi2;
213 delete bi3;
214 return;
215 }
216
217
218 logln((UnicodeString)"Testing hashCode()");
219
220 bi1->setText((UnicodeString)"Hash code");
221 bi2->setText((UnicodeString)"Hash code");
222 bi3->setText((UnicodeString)"Hash code");
223
224 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
225 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
226
227 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
228 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
229 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
230
231 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
232 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
233 errln((UnicodeString)"ERROR: different objects have same hashcodes");
234
235 delete bi1clone;
236 delete bi2clone;
237 delete bi1;
238 delete bi2;
239 delete bi3;
240
241 }
242 void RBBIAPITest::TestGetSetAdoptText()
243 {
244 logln((UnicodeString)"Testing getText setText ");
245 UErrorCode status=U_ZERO_ERROR;
246 UnicodeString str1="first string.";
247 UnicodeString str2="Second string.";
248 RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
249 RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
250 if(U_FAILURE(status)){
251 errln((UnicodeString)"FAIL : in construction");
252 return;
253 }
254
255
256 CharacterIterator* text1= new StringCharacterIterator(str1);
257 CharacterIterator* text1Clone = text1->clone();
258 CharacterIterator* text2= new StringCharacterIterator(str2);
259 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
260
261 wordIter1->setText(str1);
262 CharacterIterator *tci = &wordIter1->getText();
263 UnicodeString tstr;
264 tci->getText(tstr);
265 TEST_ASSERT(tstr == str1);
266 if(wordIter1->current() != 0)
267 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
268
269 wordIter1->next(2);
270
271 wordIter1->setText(str2);
272 if(wordIter1->current() != 0)
273 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
274
275
276 charIter1->adoptText(text1Clone);
277 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
278 tci = &wordIter1->getText();
279 tci->getText(tstr);
280 TEST_ASSERT(tstr == str2);
281 tci = &charIter1->getText();
282 tci->getText(tstr);
283 TEST_ASSERT(tstr == str1);
284
285
286 RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone();
287 rb->adoptText(text1);
288 if(rb->getText() != *text1)
289 errln((UnicodeString)"ERROR:1 error in adoptText ");
290 rb->adoptText(text2);
291 if(rb->getText() != *text2)
292 errln((UnicodeString)"ERROR:2 error in adoptText ");
293
294 // Adopt where iterator range is less than the entire orignal source string.
295 // (With the change of the break engine to working with UText internally,
296 // CharacterIterators starting at positions other than zero are not supported)
297 rb->adoptText(text3);
298 TEST_ASSERT(rb->preceding(2) == 0);
299 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
300 //if(rb->preceding(2) != 3) {
301 // errln((UnicodeString)"ERROR:3 error in adoptText ");
302 //}
303 //if(rb->following(11) != BreakIterator::DONE) {
304 // errln((UnicodeString)"ERROR:4 error in adoptText ");
305 //}
306
307 // UText API
308 //
309 // Quick test to see if UText is working at all.
310 //
311 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
312 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
313 // 012345678901
314
315 status = U_ZERO_ERROR;
316 UText *ut = utext_openUTF8(NULL, s1, -1, &status);
317 wordIter1->setText(ut, status);
318 TEST_ASSERT_SUCCESS(status);
319
320 int32_t pos;
321 pos = wordIter1->first();
322 TEST_ASSERT(pos==0);
323 pos = wordIter1->next();
324 TEST_ASSERT(pos==5);
325 pos = wordIter1->next();
326 TEST_ASSERT(pos==6);
327 pos = wordIter1->next();
328 TEST_ASSERT(pos==11);
329 pos = wordIter1->next();
330 TEST_ASSERT(pos==UBRK_DONE);
331
332 status = U_ZERO_ERROR;
333 UText *ut2 = utext_openUTF8(NULL, s2, -1, &status);
334 TEST_ASSERT_SUCCESS(status);
335 wordIter1->setText(ut2, status);
336 TEST_ASSERT_SUCCESS(status);
337
338 pos = wordIter1->first();
339 TEST_ASSERT(pos==0);
340 pos = wordIter1->next();
341 TEST_ASSERT(pos==3);
342 pos = wordIter1->next();
343 TEST_ASSERT(pos==4);
344
345 pos = wordIter1->last();
346 TEST_ASSERT(pos==6);
347 pos = wordIter1->previous();
348 TEST_ASSERT(pos==4);
349 pos = wordIter1->previous();
350 TEST_ASSERT(pos==3);
351 pos = wordIter1->previous();
352 TEST_ASSERT(pos==0);
353 pos = wordIter1->previous();
354 TEST_ASSERT(pos==UBRK_DONE);
355
356 status = U_ZERO_ERROR;
357 UnicodeString sEmpty;
358 UText *gut2 = utext_openUnicodeString(NULL, &sEmpty, &status);
359 wordIter1->getUText(gut2, status);
360 TEST_ASSERT_SUCCESS(status);
361 utext_close(gut2);
362
363 utext_close(ut);
364 utext_close(ut2);
365
366 delete wordIter1;
367 delete charIter1;
368 delete rb;
369
370 }
371
372
373 void RBBIAPITest::TestIteration()
374 {
375 // This test just verifies that the API is present.
376 // Testing for correct operation of the break rules happens elsewhere.
377
378 UErrorCode status=U_ZERO_ERROR;
379 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
380 if (U_FAILURE(status) || bi == NULL) {
381 errln("Failure creating character break iterator. Status = %s", u_errorName(status));
382 }
383 delete bi;
384
385 status=U_ZERO_ERROR;
386 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
387 if (U_FAILURE(status) || bi == NULL) {
388 errln("Failure creating Word break iterator. Status = %s", u_errorName(status));
389 }
390 delete bi;
391
392 status=U_ZERO_ERROR;
393 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
394 if (U_FAILURE(status) || bi == NULL) {
395 errln("Failure creating Line break iterator. Status = %s", u_errorName(status));
396 }
397 delete bi;
398
399 status=U_ZERO_ERROR;
400 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
401 if (U_FAILURE(status) || bi == NULL) {
402 errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status));
403 }
404 delete bi;
405
406 status=U_ZERO_ERROR;
407 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
408 if (U_FAILURE(status) || bi == NULL) {
409 errln("Failure creating Title break iterator. Status = %s", u_errorName(status));
410 }
411 delete bi;
412
413 status=U_ZERO_ERROR;
414 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
415 if (U_FAILURE(status) || bi == NULL) {
416 errln("Failure creating character break iterator. Status = %s", u_errorName(status));
417 return; // Skip the rest of these tests.
418 }
419
420
421 UnicodeString testString="0123456789";
422 bi->setText(testString);
423
424 int32_t i;
425 i = bi->first();
426 if (i != 0) {
427 errln("Incorrect value from bi->first(). Expected 0, got %d.", i);
428 }
429
430 i = bi->last();
431 if (i != 10) {
432 errln("Incorrect value from bi->last(). Expected 10, got %d", i);
433 }
434
435 //
436 // Previous
437 //
438 bi->last();
439 i = bi->previous();
440 if (i != 9) {
441 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i);
442 }
443
444
445 bi->first();
446 i = bi->previous();
447 if (i != BreakIterator::DONE) {
448 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i);
449 }
450
451 //
452 // next()
453 //
454 bi->first();
455 i = bi->next();
456 if (i != 1) {
457 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i);
458 }
459
460 bi->last();
461 i = bi->next();
462 if (i != BreakIterator::DONE) {
463 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i);
464 }
465
466
467 //
468 // current()
469 //
470 bi->first();
471 i = bi->current();
472 if (i != 0) {
473 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
474 }
475
476 bi->next();
477 i = bi->current();
478 if (i != 1) {
479 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i);
480 }
481
482 bi->last();
483 bi->next();
484 i = bi->current();
485 if (i != 10) {
486 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i);
487 }
488
489 bi->first();
490 bi->previous();
491 i = bi->current();
492 if (i != 0) {
493 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
494 }
495
496
497 //
498 // Following()
499 //
500 i = bi->following(4);
501 if (i != 5) {
502 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i);
503 }
504
505 i = bi->following(9);
506 if (i != 10) {
507 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i);
508 }
509
510 i = bi->following(10);
511 if (i != BreakIterator::DONE) {
512 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i);
513 }
514
515
516 //
517 // Preceding
518 //
519 i = bi->preceding(4);
520 if (i != 3) {
521 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i);
522 }
523
524 i = bi->preceding(10);
525 if (i != 9) {
526 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i);
527 }
528
529 i = bi->preceding(1);
530 if (i != 0) {
531 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i);
532 }
533
534 i = bi->preceding(0);
535 if (i != BreakIterator::DONE) {
536 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i);
537 }
538
539
540 //
541 // isBoundary()
542 //
543 bi->first();
544 if (bi->isBoundary(3) != TRUE) {
545 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i);
546 }
547 i = bi->current();
548 if (i != 3) {
549 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i);
550 }
551
552
553 if (bi->isBoundary(11) != FALSE) {
554 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i);
555 }
556 i = bi->current();
557 if (i != 10) {
558 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i);
559 }
560
561 //
562 // next(n)
563 //
564 bi->first();
565 i = bi->next(4);
566 if (i != 4) {
567 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i);
568 }
569
570 i = bi->next(6);
571 if (i != 10) {
572 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i);
573 }
574
575 bi->first();
576 i = bi->next(11);
577 if (i != BreakIterator::DONE) {
578 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i);
579 }
580
581 delete bi;
582
583 }
584
585
586
587
588
589
590 void RBBIAPITest::TestBuilder() {
591 UnicodeString rulesString1 = "$Letters = [:L:];\n"
592 "$Numbers = [:N:];\n"
593 "$Letters+;\n"
594 "$Numbers+;\n"
595 "[^$Letters $Numbers];\n"
596 "!.*;\n";
597 UnicodeString testString1 = "abc123..abc";
598 // 01234567890
599 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
600 UErrorCode status=U_ZERO_ERROR;
601 UParseError parseError;
602
603 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
604 if(U_FAILURE(status)) {
605 errln("FAIL : in construction");
606 } else {
607 bi->setText(testString1);
608 doBoundaryTest(*bi, testString1, bounds1);
609 }
610 delete bi;
611 }
612
613
614 //
615 // TestQuoteGrouping
616 // Single quotes within rules imply a grouping, so that a modifier
617 // following the quoted text (* or +) applies to all of the quoted chars.
618 //
619 void RBBIAPITest::TestQuoteGrouping() {
620 UnicodeString rulesString1 = "#Here comes the rule...\n"
621 "'$@!'*;\n" // (\$\@\!)*
622 ".;\n";
623
624 UnicodeString testString1 = "$@!$@!X$@!!X";
625 // 0123456789012
626 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
627 UErrorCode status=U_ZERO_ERROR;
628 UParseError parseError;
629
630 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
631 if(U_FAILURE(status)) {
632 errln("FAIL : in construction");
633 } else {
634 bi->setText(testString1);
635 doBoundaryTest(*bi, testString1, bounds1);
636 }
637 delete bi;
638 }
639
640 //
641 // TestRuleStatus
642 // Test word break rule status constants.
643 //
644 void RBBIAPITest::TestRuleStatus() {
645 UChar str[30];
646 u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
647 // 012345678901234567 8 9 0 1 2 3 4 5 6
648 // Ideographic Katakana Hiragana
649 str, 30);
650 UnicodeString testString1(str);
651 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
652 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
653 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
654 UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE,
655 UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA};
656
657 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
658 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
659 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT,
660 UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
661
662 UErrorCode status=U_ZERO_ERROR;
663
664 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
665 if(U_FAILURE(status)) {
666 errln("FAIL : in construction");
667 } else {
668 bi->setText(testString1);
669 // First test that the breaks are in the right spots.
670 doBoundaryTest(*bi, testString1, bounds1);
671
672 // Then go back and check tag values
673 int32_t i = 0;
674 int32_t pos, tag;
675 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
676 if (pos != bounds1[i]) {
677 errln("FAIL: unexpected word break at postion %d", pos);
678 break;
679 }
680 tag = bi->getRuleStatus();
681 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
682 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
683 break;
684 }
685
686 // Check that we get the same tag values from getRuleStatusVec()
687 int32_t vec[10];
688 int t = bi->getRuleStatusVec(vec, 10, status);
689 TEST_ASSERT_SUCCESS(status);
690 TEST_ASSERT(t==1);
691 TEST_ASSERT(vec[0] == tag);
692 }
693 }
694 delete bi;
695
696 // Now test line break status. This test mostly is to confirm that the status constants
697 // are correctly declared in the header.
698 testString1 = "test line. \n";
699 // break type s s h
700
701 bi = (RuleBasedBreakIterator *)
702 BreakIterator::createLineInstance(Locale::getEnglish(), status);
703 if(U_FAILURE(status)) {
704 errln("failed to create word break iterator.");
705 } else {
706 int32_t i = 0;
707 int32_t pos, tag;
708 UBool success;
709
710 bi->setText(testString1);
711 pos = bi->current();
712 tag = bi->getRuleStatus();
713 for (i=0; i<3; i++) {
714 switch (i) {
715 case 0:
716 success = pos==0 && tag==UBRK_LINE_SOFT; break;
717 case 1:
718 success = pos==5 && tag==UBRK_LINE_SOFT; break;
719 case 2:
720 success = pos==12 && tag==UBRK_LINE_HARD; break;
721 default:
722 success = FALSE; break;
723 }
724 if (success == FALSE) {
725 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
726 i, pos, tag);
727 break;
728 }
729 pos = bi->next();
730 tag = bi->getRuleStatus();
731 }
732 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
733 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
734 UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) {
735 errln("UBRK_LINE_* constants from header are inconsistent.");
736 }
737 }
738 delete bi;
739
740 }
741
742
743 //
744 // TestRuleStatusVec
745 // Test the vector form of break rule status.
746 //
747 void RBBIAPITest::TestRuleStatusVec() {
748 UnicodeString rulesString( "[A-N]{100}; \n"
749 "[a-w]{200}; \n"
750 "[\\p{L}]{300}; \n"
751 "[\\p{N}]{400}; \n"
752 "[0-5]{500}; \n"
753 "!.*;\n", -1, US_INV);
754 UnicodeString testString1 = "Aapz5?";
755 int32_t statusVals[10];
756 int32_t numStatuses;
757 int32_t pos;
758
759 UErrorCode status=U_ZERO_ERROR;
760 UParseError parseError;
761
762 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
763 TEST_ASSERT_SUCCESS(status);
764 if (U_SUCCESS(status)) {
765 bi->setText(testString1);
766
767 // A
768 pos = bi->next();
769 TEST_ASSERT(pos==1);
770 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
771 TEST_ASSERT_SUCCESS(status);
772 TEST_ASSERT(numStatuses == 2);
773 TEST_ASSERT(statusVals[0] == 100);
774 TEST_ASSERT(statusVals[1] == 300);
775
776 // a
777 pos = bi->next();
778 TEST_ASSERT(pos==2);
779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780 TEST_ASSERT_SUCCESS(status);
781 TEST_ASSERT(numStatuses == 2);
782 TEST_ASSERT(statusVals[0] == 200);
783 TEST_ASSERT(statusVals[1] == 300);
784
785 // p
786 pos = bi->next();
787 TEST_ASSERT(pos==3);
788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789 TEST_ASSERT_SUCCESS(status);
790 TEST_ASSERT(numStatuses == 2);
791 TEST_ASSERT(statusVals[0] == 200);
792 TEST_ASSERT(statusVals[1] == 300);
793
794 // z
795 pos = bi->next();
796 TEST_ASSERT(pos==4);
797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798 TEST_ASSERT_SUCCESS(status);
799 TEST_ASSERT(numStatuses == 1);
800 TEST_ASSERT(statusVals[0] == 300);
801
802 // 5
803 pos = bi->next();
804 TEST_ASSERT(pos==5);
805 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
806 TEST_ASSERT_SUCCESS(status);
807 TEST_ASSERT(numStatuses == 2);
808 TEST_ASSERT(statusVals[0] == 400);
809 TEST_ASSERT(statusVals[1] == 500);
810
811 // ?
812 pos = bi->next();
813 TEST_ASSERT(pos==6);
814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815 TEST_ASSERT_SUCCESS(status);
816 TEST_ASSERT(numStatuses == 1);
817 TEST_ASSERT(statusVals[0] == 0);
818
819 //
820 // Check buffer overflow error handling. Char == A
821 //
822 bi->first();
823 pos = bi->next();
824 TEST_ASSERT(pos==1);
825 memset(statusVals, -1, sizeof(statusVals));
826 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
827 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
828 TEST_ASSERT(numStatuses == 2);
829 TEST_ASSERT(statusVals[0] == -1);
830
831 status = U_ZERO_ERROR;
832 memset(statusVals, -1, sizeof(statusVals));
833 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
834 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
835 TEST_ASSERT(numStatuses == 2);
836 TEST_ASSERT(statusVals[0] == 100);
837 TEST_ASSERT(statusVals[1] == -1);
838
839 status = U_ZERO_ERROR;
840 memset(statusVals, -1, sizeof(statusVals));
841 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
842 TEST_ASSERT_SUCCESS(status);
843 TEST_ASSERT(numStatuses == 2);
844 TEST_ASSERT(statusVals[0] == 100);
845 TEST_ASSERT(statusVals[1] == 300);
846 TEST_ASSERT(statusVals[2] == -1);
847 }
848 delete bi;
849
850 }
851
852 //
853 // Bug 2190 Regression test. Builder crash on rule consisting of only a
854 // $variable reference
855 void RBBIAPITest::TestBug2190() {
856 UnicodeString rulesString1 = "$aaa = abcd;\n"
857 "$bbb = $aaa;\n"
858 "$bbb;\n";
859 UnicodeString testString1 = "abcdabcd";
860 // 01234567890
861 int32_t bounds1[] = {0, 4, 8};
862 UErrorCode status=U_ZERO_ERROR;
863 UParseError parseError;
864
865 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
866 if(U_FAILURE(status)) {
867 errln("FAIL : in construction");
868 } else {
869 bi->setText(testString1);
870 doBoundaryTest(*bi, testString1, bounds1);
871 }
872 delete bi;
873 }
874
875
876 void RBBIAPITest::TestRegistration() {
877 #if !UCONFIG_NO_SERVICE
878 UErrorCode status = U_ZERO_ERROR;
879 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
880
881 // ok to not delete these if we exit because of error?
882 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
883 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
884 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
885
886 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
887 {
888 if (ja_word && *ja_word == *root_word) {
889 errln("japan not different from root");
890 }
891 }
892
893 {
894 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
895 UBool fail = TRUE;
896 if(result){
897 fail = *result != *ja_word;
898 }
899 delete result;
900 if (fail) {
901 errln("bad result for xx_XX/word");
902 }
903 }
904
905 {
906 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
907 UBool fail = TRUE;
908 if(result){
909 fail = *result != *ja_char;
910 }
911 delete result;
912 if (fail) {
913 errln("bad result for ja_JP/char");
914 }
915 }
916
917 {
918 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
919 UBool fail = TRUE;
920 if(result){
921 fail = *result != *root_char;
922 }
923 delete result;
924 if (fail) {
925 errln("bad result for xx_XX/char");
926 }
927 }
928
929 {
930 StringEnumeration* avail = BreakIterator::getAvailableLocales();
931 UBool found = FALSE;
932 const UnicodeString* p;
933 while ((p = avail->snext(status))) {
934 if (p->compare("xx") == 0) {
935 found = TRUE;
936 break;
937 }
938 }
939 delete avail;
940 if (!found) {
941 errln("did not find test locale");
942 }
943 }
944
945 {
946 UBool unreg = BreakIterator::unregister(key, status);
947 if (!unreg) {
948 errln("unable to unregister");
949 }
950 }
951
952 {
953 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
954 BreakIterator* root = BreakIterator::createWordInstance("", status);
955 UBool fail = TRUE;
956 if(root){
957 fail = *root != *result;
958 }
959 delete root;
960 delete result;
961 if (fail) {
962 errln("did not get root break");
963 }
964 }
965
966 {
967 StringEnumeration* avail = BreakIterator::getAvailableLocales();
968 UBool found = FALSE;
969 const UnicodeString* p;
970 while ((p = avail->snext(status))) {
971 if (p->compare("xx") == 0) {
972 found = TRUE;
973 break;
974 }
975 }
976 delete avail;
977 if (found) {
978 errln("found test locale");
979 }
980 }
981
982 {
983 int32_t count;
984 UBool foundLocale = FALSE;
985 const Locale *avail = BreakIterator::getAvailableLocales(count);
986 for (int i=0; i<count; i++) {
987 if (avail[i] == Locale::getEnglish()) {
988 foundLocale = TRUE;
989 break;
990 }
991 }
992 if (foundLocale == FALSE) {
993 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
994 }
995 }
996
997
998 // ja_word was adopted by factory
999 delete ja_char;
1000 delete root_word;
1001 delete root_char;
1002 #endif
1003 }
1004
1005 void RBBIAPITest::RoundtripRule(const char *dataFile) {
1006 UErrorCode status = U_ZERO_ERROR;
1007 UParseError parseError;
1008 parseError.line = 0;
1009 parseError.offset = 0;
1010 UDataMemory *data = udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status);
1011 uint32_t length;
1012 const UChar *builtSource;
1013 const uint8_t *rbbiRules;
1014 const uint8_t *builtRules;
1015
1016 if (U_FAILURE(status)) {
1017 errln("Can't open \"%s\"", dataFile);
1018 return;
1019 }
1020
1021 builtRules = (const uint8_t *)udata_getMemory(data);
1022 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
1023 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
1024 if (U_FAILURE(status)) {
1025 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1026 u_errorName(status), parseError.line, parseError.offset);
1027 return;
1028 };
1029 rbbiRules = brkItr->getBinaryRules(length);
1030 logln("Comparing \"%s\" len=%d", dataFile, length);
1031 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
1032 errln("Built rules and rebuilt rules are different %s", dataFile);
1033 return;
1034 }
1035 delete brkItr;
1036 udata_close(data);
1037 }
1038
1039 void RBBIAPITest::TestRoundtripRules() {
1040 RoundtripRule("word");
1041 RoundtripRule("title");
1042 RoundtripRule("sent");
1043 RoundtripRule("line");
1044 RoundtripRule("char");
1045 if (!quick) {
1046 RoundtripRule("word_ja");
1047 RoundtripRule("word_POSIX");
1048 }
1049 }
1050
1051 // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader*
1052 // (these are protected so we access them via a local class RBBIWithProtectedFunctions).
1053 // This is just a sanity check, not a thorough test (e.g. we don't check that the
1054 // first delete actually frees rulesCopy).
1055 void RBBIAPITest::TestCreateFromRBBIData() {
1056 // Get some handy RBBIData
1057 const char *brkName = "word"; // or "sent", "line", "char", etc.
1058 UErrorCode status = U_ZERO_ERROR;
1059 UDataMemory * data = udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status);
1060 if ( U_SUCCESS(status) ) {
1061 const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data);
1062 uint32_t length = builtRules->fLength;
1063 RBBIWithProtectedFunctions * brkItr;
1064
1065 // Try the memory-adopting constructor, need to copy the data first
1066 RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length);
1067 if ( rulesCopy ) {
1068 uprv_memcpy( rulesCopy, builtRules, length );
1069
1070 brkItr = new RBBIWithProtectedFunctions(rulesCopy, status);
1071 if ( U_SUCCESS(status) ) {
1072 delete brkItr; // this should free rulesCopy
1073 } else {
1074 errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) );
1075 status = U_ZERO_ERROR;// reset for the next test
1076 uprv_free( rulesCopy );
1077 }
1078 }
1079
1080 // Now try the non-adopting constructor
1081 brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status);
1082 if ( U_SUCCESS(status) ) {
1083 delete brkItr; // this should NOT attempt to free builtRules
1084 if (builtRules->fLength != length) { // sanity check
1085 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" );
1086 }
1087 } else {
1088 errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) );
1089 }
1090
1091 udata_close(data);
1092 }
1093 }
1094
1095 //---------------------------------------------
1096 // runIndexedTest
1097 //---------------------------------------------
1098
1099 void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1100 {
1101 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
1102 switch (index) {
1103 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
1104 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
1105 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;
1106 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;
1107 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
1108 case 4: name = "TestIteration"; if (exec) TestIteration(); break;
1109 case 5: name = "TestBuilder"; if (exec) TestBuilder(); break;
1110 case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
1111 case 7: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
1112 case 8: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1113 case 9: name = "TestBug2190"; if (exec) TestBug2190(); break;
1114 case 10: name = "TestRegistration"; if (exec) TestRegistration(); break;
1115 case 11: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1116 case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1117 case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break;
1118
1119 default: name = ""; break; // needed to end loop
1120 }
1121 }
1122
1123 //---------------------------------------------
1124 //Internal subroutines
1125 //---------------------------------------------
1126
1127 void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1128 logln((UnicodeString)"testIsBoundary():");
1129 int32_t p = 0;
1130 UBool isB;
1131 for (int32_t i = 0; i < text.length(); i++) {
1132 isB = bi.isBoundary(i);
1133 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1134
1135 if (i == boundaries[p]) {
1136 if (!isB)
1137 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1138 p++;
1139 }
1140 else {
1141 if (isB)
1142 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1143 }
1144 }
1145 }
1146 void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1147 UnicodeString selected;
1148 UnicodeString expected=CharsToUnicodeString(expectedString);
1149
1150 if(gotoffset != expectedOffset)
1151 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1152 if(start <= gotoffset){
1153 testString.extractBetween(start, gotoffset, selected);
1154 }
1155 else{
1156 testString.extractBetween(gotoffset, start, selected);
1157 }
1158 if(selected.compare(expected) != 0)
1159 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1160 else
1161 logln(prettify("****selected \"" + selected + "\""));
1162 }
1163
1164 //---------------------------------------------
1165 //RBBIWithProtectedFunctions class functions
1166 //---------------------------------------------
1167
1168 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status)
1169 : RuleBasedBreakIterator(data, status)
1170 {
1171 }
1172
1173 RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status)
1174 : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status)
1175 {
1176 }
1177
1178 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */