]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/intltest/rbbiapts.cpp
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbiapts.cpp
... / ...
CommitLineData
1/********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1999-2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6/************************************************************************
7* Date Name Description
8* 12/14/99 Madhu Creation.
9* 01/12/2000 Madhu updated for changed API
10************************************************************************/
11
12#include "unicode/utypes.h"
13
14#if !UCONFIG_NO_BREAK_ITERATION
15
16#include "unicode/uchar.h"
17#include "intltest.h"
18#include "unicode/rbbi.h"
19#include "unicode/schriter.h"
20#include "rbbiapts.h"
21#include "rbbidata.h"
22#include "cstring.h"
23#include "unicode/ustring.h"
24
25/**
26 * API Test the RuleBasedBreakIterator class
27 */
28
29
30#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\
31errln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}}
32
33#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
34errln("Test Failure at file %s, line %d", __FILE__, __LINE__);}}
35
36void RBBIAPITest::TestCloneEquals()
37{
38
39 UErrorCode status=U_ZERO_ERROR;
40 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
41 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
42 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
43 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
44 if(U_FAILURE(status)){
45 errln((UnicodeString)"FAIL : in construction");
46 return;
47 }
48
49
50 UnicodeString testString="Testing word break iterators's clone() and equals()";
51 bi1->setText(testString);
52 bi2->setText(testString);
53 biequal->setText(testString);
54
55 bi3->setText("hello");
56
57 logln((UnicodeString)"Testing equals()");
58
59 logln((UnicodeString)"Testing == and !=");
60 UBool b = (*bi1 != *biequal);
61 b |= *bi1 == *bi2;
62 b |= *bi1 == *bi3;
63 if (b) {
64 errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
65 }
66
67 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
68 errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
69
70
71 // Quick test of RulesBasedBreakIterator assignment -
72 // Check that
73 // two different iterators are !=
74 // they are == after assignment
75 // source and dest iterator produce the same next() after assignment.
76 // deleting one doesn't disable the other.
77 logln("Testing assignment");
78 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
79 if(U_FAILURE(status)){
80 errln((UnicodeString)"FAIL : in construction");
81 return;
82 }
83
84 RuleBasedBreakIterator biDefault, biDefault2;
85 if(U_FAILURE(status)){
86 errln((UnicodeString)"FAIL : in construction of default iterator");
87 return;
88 }
89 if (biDefault == *bix) {
90 errln((UnicodeString)"ERROR: iterators should not compare ==");
91 return;
92 }
93 if (biDefault != biDefault2) {
94 errln((UnicodeString)"ERROR: iterators should compare ==");
95 return;
96 }
97
98
99 UnicodeString HelloString("Hello Kitty");
100 bix->setText(HelloString);
101 if (*bix == *bi2) {
102 errln(UnicodeString("ERROR: strings should not be equal before assignment."));
103 }
104 *bix = *bi2;
105 if (*bix != *bi2) {
106 errln(UnicodeString("ERROR: strings should be equal before assignment."));
107 }
108
109 int bixnext = bix->next();
110 int bi2next = bi2->next();
111 if (! (bixnext == bi2next && bixnext == 7)) {
112 errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
113 }
114 delete bix;
115 if (bi2->next() != 8) {
116 errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
117 }
118
119
120
121 logln((UnicodeString)"Testing clone()");
122 RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
123 RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
124
125 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
126 *bi1clone == *bi3 || *bi1clone == *bi2)
127 errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
128
129 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
130 *bi2clone == *bi3 || *bi2clone != *bi2)
131 errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
132
133 if(bi1->getText() != bi1clone->getText() ||
134 bi2clone->getText() != bi2->getText() ||
135 *bi2clone == *bi1clone )
136 errln((UnicodeString)"ERROR: RBBI's clone() method failed");
137
138 delete bi1clone;
139 delete bi2clone;
140 delete bi1;
141 delete bi3;
142 delete bi2;
143 delete biequal;
144}
145
146void RBBIAPITest::TestBoilerPlate()
147{
148 UErrorCode status = U_ZERO_ERROR;
149 BreakIterator* a = BreakIterator::createLineInstance(Locale("hi"), status);
150 BreakIterator* b = BreakIterator::createLineInstance(Locale("hi_IN"),status);
151 if (U_FAILURE(status)) {
152 errln("Creation of break iterator failed %s", u_errorName(status));
153 return;
154 }
155 if(*a!=*b){
156 errln("Failed: boilerplate method operator!= does not return correct results");
157 }
158 BreakIterator* c = BreakIterator::createLineInstance(Locale("th"),status);
159 if(a && c){
160 if(*c==*a){
161 errln("Failed: boilerplate method opertator== does not return correct results");
162 }
163 }else{
164 errln("creation of break iterator failed");
165 }
166 delete a;
167 delete b;
168 delete c;
169}
170
171void RBBIAPITest::TestgetRules()
172{
173 UErrorCode status=U_ZERO_ERROR;
174
175 RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
176 RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
177 if(U_FAILURE(status)){
178 errln((UnicodeString)"FAIL: in construction");
179 delete bi1;
180 delete bi2;
181 return;
182 }
183
184
185
186 logln((UnicodeString)"Testing toString()");
187
188 bi1->setText((UnicodeString)"Hello there");
189
190 RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone();
191
192 UnicodeString temp=bi1->getRules();
193 UnicodeString temp2=bi2->getRules();
194 UnicodeString temp3=bi3->getRules();
195 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
196 errln((UnicodeString)"ERROR: error in getRules() method");
197
198 delete bi1;
199 delete bi2;
200 delete bi3;
201}
202void RBBIAPITest::TestHashCode()
203{
204 UErrorCode status=U_ZERO_ERROR;
205 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
206 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
207 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
208 if(U_FAILURE(status)){
209 errln((UnicodeString)"FAIL : in construction");
210 delete bi1;
211 delete bi2;
212 delete bi3;
213 return;
214 }
215
216
217 logln((UnicodeString)"Testing hashCode()");
218
219 bi1->setText((UnicodeString)"Hash code");
220 bi2->setText((UnicodeString)"Hash code");
221 bi3->setText((UnicodeString)"Hash code");
222
223 RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone();
224 RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone();
225
226 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
227 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
228 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
229
230 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
231 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
232 errln((UnicodeString)"ERROR: different objects have same hashcodes");
233
234 delete bi1clone;
235 delete bi2clone;
236 delete bi1;
237 delete bi2;
238 delete bi3;
239
240}
241void RBBIAPITest::TestGetSetAdoptText()
242{
243 logln((UnicodeString)"Testing getText setText ");
244 UErrorCode status=U_ZERO_ERROR;
245 UnicodeString str1="first string.";
246 UnicodeString str2="Second string.";
247 RuleBasedBreakIterator* charIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
248 RuleBasedBreakIterator* wordIter1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
249 if(U_FAILURE(status)){
250 errln((UnicodeString)"FAIL : in construction");
251 return;
252 }
253
254
255 CharacterIterator* text1= new StringCharacterIterator(str1);
256 CharacterIterator* text1Clone = text1->clone();
257 CharacterIterator* text2= new StringCharacterIterator(str2);
258 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
259
260 wordIter1->setText(str1);
261 if(wordIter1->getText() != *text1)
262 errln((UnicodeString)"ERROR:1 error in setText or getText ");
263 if(wordIter1->current() != 0)
264 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
265
266 wordIter1->next(2);
267
268 wordIter1->setText(str2);
269 if(wordIter1->current() != 0)
270 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
271
272
273 charIter1->adoptText(text1Clone);
274 if( wordIter1->getText() == charIter1->getText() ||
275 wordIter1->getText() != *text2 || charIter1->getText() != *text1 )
276 errln((UnicodeString)"ERROR:2 error is getText or setText()");
277
278 RuleBasedBreakIterator* rb=(RuleBasedBreakIterator*)wordIter1->clone();
279 rb->adoptText(text1);
280 if(rb->getText() != *text1)
281 errln((UnicodeString)"ERROR:1 error in adoptText ");
282 rb->adoptText(text2);
283 if(rb->getText() != *text2)
284 errln((UnicodeString)"ERROR:2 error in adoptText ");
285
286 // Adopt where iterator range is less than the entire orignal source string.
287 rb->adoptText(text3);
288 if(rb->preceding(2) != 3) {
289 errln((UnicodeString)"ERROR:3 error in adoptText ");
290 }
291 if(rb->following(11) != BreakIterator::DONE) {
292 errln((UnicodeString)"ERROR:4 error in adoptText ");
293 }
294
295 delete wordIter1;
296 delete charIter1;
297 delete rb;
298
299 }
300
301
302void RBBIAPITest::TestIteration()
303{
304 // This test just verifies that the API is present.
305 // Testing for correct operation of the break rules happens elsewhere.
306
307 UErrorCode status=U_ZERO_ERROR;
308 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
309 if (U_FAILURE(status) || bi == NULL) {
310 errln("Failure creating character break iterator. Status = %s", u_errorName(status));
311 }
312 delete bi;
313
314 status=U_ZERO_ERROR;
315 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
316 if (U_FAILURE(status) || bi == NULL) {
317 errln("Failure creating Word break iterator. Status = %s", u_errorName(status));
318 }
319 delete bi;
320
321 status=U_ZERO_ERROR;
322 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
323 if (U_FAILURE(status) || bi == NULL) {
324 errln("Failure creating Line break iterator. Status = %s", u_errorName(status));
325 }
326 delete bi;
327
328 status=U_ZERO_ERROR;
329 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
330 if (U_FAILURE(status) || bi == NULL) {
331 errln("Failure creating Sentence break iterator. Status = %s", u_errorName(status));
332 }
333 delete bi;
334
335 status=U_ZERO_ERROR;
336 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
337 if (U_FAILURE(status) || bi == NULL) {
338 errln("Failure creating Title break iterator. Status = %s", u_errorName(status));
339 }
340 delete bi;
341
342 status=U_ZERO_ERROR;
343 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
344 if (U_FAILURE(status) || bi == NULL) {
345 errln("Failure creating character break iterator. Status = %s", u_errorName(status));
346 return; // Skip the rest of these tests.
347 }
348
349
350 UnicodeString testString="0123456789";
351 bi->setText(testString);
352
353 int32_t i;
354 i = bi->first();
355 if (i != 0) {
356 errln("Incorrect value from bi->first(). Expected 0, got %d.", i);
357 }
358
359 i = bi->last();
360 if (i != 10) {
361 errln("Incorrect value from bi->last(). Expected 10, got %d", i);
362 }
363
364 //
365 // Previous
366 //
367 bi->last();
368 i = bi->previous();
369 if (i != 9) {
370 errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i);
371 }
372
373
374 bi->first();
375 i = bi->previous();
376 if (i != BreakIterator::DONE) {
377 errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i);
378 }
379
380 //
381 // next()
382 //
383 bi->first();
384 i = bi->next();
385 if (i != 1) {
386 errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i);
387 }
388
389 bi->last();
390 i = bi->next();
391 if (i != BreakIterator::DONE) {
392 errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i);
393 }
394
395
396 //
397 // current()
398 //
399 bi->first();
400 i = bi->current();
401 if (i != 0) {
402 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
403 }
404
405 bi->next();
406 i = bi->current();
407 if (i != 1) {
408 errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i);
409 }
410
411 bi->last();
412 bi->next();
413 i = bi->current();
414 if (i != 10) {
415 errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i);
416 }
417
418 bi->first();
419 bi->previous();
420 i = bi->current();
421 if (i != 0) {
422 errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i);
423 }
424
425
426 //
427 // Following()
428 //
429 i = bi->following(4);
430 if (i != 5) {
431 errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i);
432 }
433
434 i = bi->following(9);
435 if (i != 10) {
436 errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i);
437 }
438
439 i = bi->following(10);
440 if (i != BreakIterator::DONE) {
441 errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i);
442 }
443
444
445 //
446 // Preceding
447 //
448 i = bi->preceding(4);
449 if (i != 3) {
450 errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i);
451 }
452
453 i = bi->preceding(10);
454 if (i != 9) {
455 errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i);
456 }
457
458 i = bi->preceding(1);
459 if (i != 0) {
460 errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i);
461 }
462
463 i = bi->preceding(0);
464 if (i != BreakIterator::DONE) {
465 errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i);
466 }
467
468
469 //
470 // isBoundary()
471 //
472 bi->first();
473 if (bi->isBoundary(3) != TRUE) {
474 errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i);
475 }
476 i = bi->current();
477 if (i != 3) {
478 errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i);
479 }
480
481
482 if (bi->isBoundary(11) != FALSE) {
483 errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i);
484 }
485 i = bi->current();
486 if (i != 10) {
487 errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i);
488 }
489
490 //
491 // next(n)
492 //
493 bi->first();
494 i = bi->next(4);
495 if (i != 4) {
496 errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i);
497 }
498
499 i = bi->next(6);
500 if (i != 10) {
501 errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i);
502 }
503
504 bi->first();
505 i = bi->next(11);
506 if (i != BreakIterator::DONE) {
507 errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i);
508 }
509
510 delete bi;
511
512}
513
514
515
516
517
518
519void RBBIAPITest::TestBuilder() {
520 UnicodeString rulesString1 = "$Letters = [:L:];\n"
521 "$Numbers = [:N:];\n"
522 "$Letters+;\n"
523 "$Numbers+;\n"
524 "[^$Letters $Numbers];\n"
525 "!.*;\n";
526 UnicodeString testString1 = "abc123..abc";
527 // 01234567890
528 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
529 UErrorCode status=U_ZERO_ERROR;
530 UParseError parseError;
531
532 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
533 if(U_FAILURE(status)) {
534 errln("FAIL : in construction");
535 } else {
536 bi->setText(testString1);
537 doBoundaryTest(*bi, testString1, bounds1);
538 }
539 delete bi;
540}
541
542
543//
544// TestQuoteGrouping
545// Single quotes within rules imply a grouping, so that a modifier
546// following the quoted text (* or +) applies to all of the quoted chars.
547//
548void RBBIAPITest::TestQuoteGrouping() {
549 UnicodeString rulesString1 = "#Here comes the rule...\n"
550 "'$@!'*;\n" // (\$\@\!)*
551 ".;\n";
552
553 UnicodeString testString1 = "$@!$@!X$@!!X";
554 // 0123456789012
555 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
556 UErrorCode status=U_ZERO_ERROR;
557 UParseError parseError;
558
559 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
560 if(U_FAILURE(status)) {
561 errln("FAIL : in construction");
562 } else {
563 bi->setText(testString1);
564 doBoundaryTest(*bi, testString1, bounds1);
565 }
566 delete bi;
567}
568
569//
570// TestRuleStatus
571// Test word break rule status constants.
572//
573void RBBIAPITest::TestRuleStatus() {
574 UChar str[30];
575 u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
576 // 012345678901234567 8 9 0 1 2 3 4 5 6
577 // Ideographic Katakana Hiragana
578 str, 30);
579 UnicodeString testString1(str);
580 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
581 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
582 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
583 UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE,
584 UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA};
585
586 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
587 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
588 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT,
589 UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
590
591 UErrorCode status=U_ZERO_ERROR;
592
593 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getDefault(), status);
594 if(U_FAILURE(status)) {
595 errln("FAIL : in construction");
596 } else {
597 bi->setText(testString1);
598 // First test that the breaks are in the right spots.
599 doBoundaryTest(*bi, testString1, bounds1);
600
601 // Then go back and check tag values
602 int32_t i = 0;
603 int32_t pos, tag;
604 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
605 if (pos != bounds1[i]) {
606 errln("FAIL: unexpected word break at postion %d", pos);
607 break;
608 }
609 tag = bi->getRuleStatus();
610 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
611 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
612 break;
613 }
614
615 // Check that we get the same tag values from getRuleStatusVec()
616 int32_t vec[10];
617 int t = bi->getRuleStatusVec(vec, 10, status);
618 TEST_ASSERT_SUCCESS(status);
619 TEST_ASSERT(t==1);
620 TEST_ASSERT(vec[0] == tag);
621 }
622 }
623 delete bi;
624
625 // Now test line break status. This test mostly is to confirm that the status constants
626 // are correctly declared in the header.
627 testString1 = "test line. \n";
628 // break type s s h
629
630 bi = (RuleBasedBreakIterator *)
631 BreakIterator::createLineInstance(Locale::getEnglish(), status);
632 if(U_FAILURE(status)) {
633 errln("failed to create word break iterator.");
634 } else {
635 int32_t i = 0;
636 int32_t pos, tag;
637 UBool success;
638
639 bi->setText(testString1);
640 pos = bi->current();
641 tag = bi->getRuleStatus();
642 for (i=0; i<3; i++) {
643 switch (i) {
644 case 0:
645 success = pos==0 && tag==UBRK_LINE_SOFT; break;
646 case 1:
647 success = pos==5 && tag==UBRK_LINE_SOFT; break;
648 case 2:
649 success = pos==12 && tag==UBRK_LINE_HARD; break;
650 default:
651 success = FALSE; break;
652 }
653 if (success == FALSE) {
654 errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
655 i, pos, tag);
656 break;
657 }
658 pos = bi->next();
659 tag = bi->getRuleStatus();
660 }
661 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
662 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
663 UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT ) {
664 errln("UBRK_LINE_* constants from header are inconsistent.");
665 }
666 }
667 delete bi;
668
669}
670
671
672//
673// TestRuleStatusVec
674// Test the vector form of break rule status.
675//
676void RBBIAPITest::TestRuleStatusVec() {
677 UnicodeString rulesString = "[A-N]{100}; \n"
678 "[a-w]{200}; \n"
679 "[\\p{L}]{300}; \n"
680 "[\\p{N}]{400}; \n"
681 "[0-5]{500}; \n"
682 "!.*;\n";
683 UnicodeString testString1 = "Aapz5?";
684 int32_t statusVals[10];
685 int32_t numStatuses;
686 int32_t pos;
687
688 UErrorCode status=U_ZERO_ERROR;
689 UParseError parseError;
690
691 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
692 TEST_ASSERT_SUCCESS(status);
693 if (U_SUCCESS(status)) {
694 bi->setText(testString1);
695
696 // A
697 pos = bi->next();
698 TEST_ASSERT(pos==1);
699 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
700 TEST_ASSERT_SUCCESS(status);
701 TEST_ASSERT(numStatuses == 2);
702 TEST_ASSERT(statusVals[0] == 100);
703 TEST_ASSERT(statusVals[1] == 300);
704
705 // a
706 pos = bi->next();
707 TEST_ASSERT(pos==2);
708 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
709 TEST_ASSERT_SUCCESS(status);
710 TEST_ASSERT(numStatuses == 2);
711 TEST_ASSERT(statusVals[0] == 200);
712 TEST_ASSERT(statusVals[1] == 300);
713
714 // p
715 pos = bi->next();
716 TEST_ASSERT(pos==3);
717 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
718 TEST_ASSERT_SUCCESS(status);
719 TEST_ASSERT(numStatuses == 2);
720 TEST_ASSERT(statusVals[0] == 200);
721 TEST_ASSERT(statusVals[1] == 300);
722
723 // z
724 pos = bi->next();
725 TEST_ASSERT(pos==4);
726 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
727 TEST_ASSERT_SUCCESS(status);
728 TEST_ASSERT(numStatuses == 1);
729 TEST_ASSERT(statusVals[0] == 300);
730
731 // 5
732 pos = bi->next();
733 TEST_ASSERT(pos==5);
734 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
735 TEST_ASSERT_SUCCESS(status);
736 TEST_ASSERT(numStatuses == 2);
737 TEST_ASSERT(statusVals[0] == 400);
738 TEST_ASSERT(statusVals[1] == 500);
739
740 // ?
741 pos = bi->next();
742 TEST_ASSERT(pos==6);
743 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
744 TEST_ASSERT_SUCCESS(status);
745 TEST_ASSERT(numStatuses == 1);
746 TEST_ASSERT(statusVals[0] == 0);
747
748 //
749 // Check buffer overflow error handling. Char == A
750 //
751 bi->first();
752 pos = bi->next();
753 TEST_ASSERT(pos==1);
754 memset(statusVals, -1, sizeof(statusVals));
755 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
756 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
757 TEST_ASSERT(numStatuses == 2);
758 TEST_ASSERT(statusVals[0] == -1);
759
760 status = U_ZERO_ERROR;
761 memset(statusVals, -1, sizeof(statusVals));
762 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
763 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
764 TEST_ASSERT(numStatuses == 2);
765 TEST_ASSERT(statusVals[0] == 100);
766 TEST_ASSERT(statusVals[1] == -1);
767
768 status = U_ZERO_ERROR;
769 memset(statusVals, -1, sizeof(statusVals));
770 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
771 TEST_ASSERT_SUCCESS(status);
772 TEST_ASSERT(numStatuses == 2);
773 TEST_ASSERT(statusVals[0] == 100);
774 TEST_ASSERT(statusVals[1] == 300);
775 TEST_ASSERT(statusVals[2] == -1);
776 }
777 delete bi;
778
779}
780
781//
782// Bug 2190 Regression test. Builder crash on rule consisting of only a
783// $variable reference
784void RBBIAPITest::TestBug2190() {
785 UnicodeString rulesString1 = "$aaa = abcd;\n"
786 "$bbb = $aaa;\n"
787 "$bbb;\n";
788 UnicodeString testString1 = "abcdabcd";
789 // 01234567890
790 int32_t bounds1[] = {0, 4, 8};
791 UErrorCode status=U_ZERO_ERROR;
792 UParseError parseError;
793
794 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
795 if(U_FAILURE(status)) {
796 errln("FAIL : in construction");
797 } else {
798 bi->setText(testString1);
799 doBoundaryTest(*bi, testString1, bounds1);
800 }
801 delete bi;
802}
803
804
805void RBBIAPITest::TestRegistration() {
806#if !UCONFIG_NO_SERVICE
807 UErrorCode status = U_ZERO_ERROR;
808 BreakIterator* thai_word = BreakIterator::createWordInstance("th_TH", status);
809
810 // ok to not delete these if we exit because of error?
811 BreakIterator* thai_char = BreakIterator::createCharacterInstance("th_TH", status);
812 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
813 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
814
815 URegistryKey key = BreakIterator::registerInstance(thai_word, "xx", UBRK_WORD, status);
816 {
817 if (thai_word && *thai_word == *root_word) {
818 errln("thai not different from root");
819 }
820 }
821
822 {
823 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
824 UBool fail = TRUE;
825 if(result){
826 fail = *result != *thai_word;
827 }
828 delete result;
829 if (fail) {
830 errln("bad result for xx_XX/word");
831 }
832 }
833
834 {
835 BreakIterator* result = BreakIterator::createCharacterInstance("th_TH", status);
836 UBool fail = TRUE;
837 if(result){
838 fail = *result != *thai_char;
839 }
840 delete result;
841 if (fail) {
842 errln("bad result for th_TH/char");
843 }
844 }
845
846 {
847 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
848 UBool fail = TRUE;
849 if(result){
850 fail = *result != *root_char;
851 }
852 delete result;
853 if (fail) {
854 errln("bad result for xx_XX/char");
855 }
856 }
857
858 {
859 StringEnumeration* avail = BreakIterator::getAvailableLocales();
860 UBool found = FALSE;
861 const UnicodeString* p;
862 while ((p = avail->snext(status))) {
863 if (p->compare("xx") == 0) {
864 found = TRUE;
865 break;
866 }
867 }
868 delete avail;
869 if (!found) {
870 errln("did not find test locale");
871 }
872 }
873
874 {
875 UBool unreg = BreakIterator::unregister(key, status);
876 if (!unreg) {
877 errln("unable to unregister");
878 }
879 }
880
881 {
882 BreakIterator* result = BreakIterator::createWordInstance("xx", status);
883 BreakIterator* root = BreakIterator::createWordInstance("", status);
884 UBool fail = TRUE;
885 if(root){
886 fail = *root != *result;
887 }
888 delete root;
889 delete result;
890 if (fail) {
891 errln("did not get root break");
892 }
893 }
894
895 {
896 StringEnumeration* avail = BreakIterator::getAvailableLocales();
897 UBool found = FALSE;
898 const UnicodeString* p;
899 while ((p = avail->snext(status))) {
900 if (p->compare("xx") == 0) {
901 found = TRUE;
902 break;
903 }
904 }
905 delete avail;
906 if (found) {
907 errln("found test locale");
908 }
909 }
910
911 {
912 int32_t count;
913 UBool foundLocale = FALSE;
914 const Locale *avail = BreakIterator::getAvailableLocales(count);
915 for (int i=0; i<count; i++) {
916 if (avail[i] == Locale::getEnglish()) {
917 foundLocale = TRUE;
918 break;
919 }
920 }
921 if (foundLocale == FALSE) {
922 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
923 }
924 }
925
926
927 // that_word was adopted by factory
928 delete thai_char;
929 delete root_word;
930 delete root_char;
931#endif
932}
933
934void RBBIAPITest::RoundtripRule(const char *dataFile) {
935 UErrorCode status = U_ZERO_ERROR;
936 UParseError parseError;
937 parseError.line = 0;
938 parseError.offset = 0;
939 UDataMemory *data = udata_open(NULL, "brk", dataFile, &status);
940 uint32_t length;
941 const UChar *builtSource;
942 const uint8_t *rbbiRules;
943 const uint8_t *builtRules;
944
945 if (U_FAILURE(status)) {
946 errln("Can't open \"%s\"", dataFile);
947 return;
948 }
949
950 builtRules = (const uint8_t *)udata_getMemory(data);
951 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
952 RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
953 if (U_FAILURE(status)) {
954 errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
955 u_errorName(status), parseError.line, parseError.offset);
956 return;
957 };
958 rbbiRules = brkItr->getBinaryRules(length);
959 logln("Comparing \"%s\" len=%d", dataFile, length);
960 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
961 errln("Built rules and rebuilt rules are different %s", dataFile);
962 return;
963 }
964 delete brkItr;
965 udata_close(data);
966}
967
968void RBBIAPITest::TestRoundtripRules() {
969 RoundtripRule("word");
970 RoundtripRule("title");
971 RoundtripRule("sent");
972 RoundtripRule("line");
973 RoundtripRule("char");
974 if (!quick) {
975 RoundtripRule("word_th");
976 RoundtripRule("line_th");
977 }
978}
979
980//---------------------------------------------
981// runIndexedTest
982//---------------------------------------------
983
984void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
985{
986 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
987 switch (index) {
988 // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break;
989 case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break;
990 case 1: name = "TestgetRules"; if (exec) TestgetRules(); break;
991 case 2: name = "TestHashCode"; if (exec) TestHashCode(); break;
992 case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break;
993 case 4: name = "TestIteration"; if (exec) TestIteration(); break;
994 case 5: name = "extra"; break; // Extra
995 case 6: name = "extra"; break; // Extra
996 case 7: name = "TestBuilder"; if (exec) TestBuilder(); break;
997 case 8: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break;
998 case 9: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break;
999 case 10: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break;
1000 case 11: name = "TestBug2190"; if (exec) TestBug2190(); break;
1001 case 12: name = "TestRegistration"; if (exec) TestRegistration(); break;
1002 case 13: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break;
1003 case 14: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break;
1004
1005 default: name = ""; break; // needed to end loop
1006 }
1007}
1008
1009//---------------------------------------------
1010//Internal subroutines
1011//---------------------------------------------
1012
1013void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){
1014 logln((UnicodeString)"testIsBoundary():");
1015 int32_t p = 0;
1016 UBool isB;
1017 for (int32_t i = 0; i < text.length(); i++) {
1018 isB = bi.isBoundary(i);
1019 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1020
1021 if (i == boundaries[p]) {
1022 if (!isB)
1023 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1024 p++;
1025 }
1026 else {
1027 if (isB)
1028 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1029 }
1030 }
1031}
1032void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1033 UnicodeString selected;
1034 UnicodeString expected=CharsToUnicodeString(expectedString);
1035
1036 if(gotoffset != expectedOffset)
1037 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1038 if(start <= gotoffset){
1039 testString.extractBetween(start, gotoffset, selected);
1040 }
1041 else{
1042 testString.extractBetween(gotoffset, start, selected);
1043 }
1044 if(selected.compare(expected) != 0)
1045 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1046 else
1047 logln(prettify("****selected \"" + selected + "\""));
1048}
1049
1050#endif /* #if !UCONFIG_NO_BREAK_ITERATION */