]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/rbbiapts.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbiapts.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f 3/********************************************************************
f3c0d7a5 4 * Copyright (c) 1999-2016, International Business Machines
46f4442e
A
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * Date Name Description
8 * 12/14/99 Madhu Creation.
9 * 01/12/2000 Madhu updated for changed API
b75a7d8f 10 ********************************************************************/
b75a7d8f
A
11
12#include "unicode/utypes.h"
13
14#if !UCONFIG_NO_BREAK_ITERATION
15
16#include "unicode/uchar.h"
17#include "intltest.h"
18#include "unicode/rbbi.h"
19#include "unicode/schriter.h"
20#include "rbbiapts.h"
21#include "rbbidata.h"
22#include "cstring.h"
73c04bcf 23#include "ubrkimpl.h"
4388f060 24#include "unicode/locid.h"
374ca955 25#include "unicode/ustring.h"
73c04bcf 26#include "unicode/utext.h"
46f4442e 27#include "cmemory.h"
f3c0d7a5 28#if !UCONFIG_NO_BREAK_ITERATION
57a6839d
A
29#include "unicode/filteredbrk.h"
30#include <stdio.h> // for sprintf
31#endif
b75a7d8f
A
32/**
33 * API Test the RuleBasedBreakIterator class
34 */
35
36
340931cb
A
37#define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
38 if (U_FAILURE(status)) { \
39 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); \
40 } \
41} UPRV_BLOCK_MACRO_END
42
43#define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
44 if ((expr) == FALSE) { \
45 errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr); \
46 } \
47} UPRV_BLOCK_MACRO_END
b75a7d8f
A
48
49void RBBIAPITest::TestCloneEquals()
50{
51
52 UErrorCode status=U_ZERO_ERROR;
53 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
54 RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
55 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
56 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
57 if(U_FAILURE(status)){
729e4ab9 58 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
b75a7d8f
A
59 return;
60 }
61
62
63 UnicodeString testString="Testing word break iterators's clone() and equals()";
64 bi1->setText(testString);
65 bi2->setText(testString);
66 biequal->setText(testString);
67
68 bi3->setText("hello");
69
70 logln((UnicodeString)"Testing equals()");
71
72 logln((UnicodeString)"Testing == and !=");
73 UBool b = (*bi1 != *biequal);
74 b |= *bi1 == *bi2;
75 b |= *bi1 == *bi3;
76 if (b) {
0f5d89e8 77 errln("%s:%d ERROR:1 RBBI's == and != operator failed.", __FILE__, __LINE__);
b75a7d8f
A
78 }
79
80 if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
0f5d89e8 81 errln("%s:%d ERROR:2 RBBI's == and != operator failed.", __FILE__, __LINE__);
b75a7d8f
A
82
83
46f4442e 84 // Quick test of RulesBasedBreakIterator assignment -
b75a7d8f
A
85 // Check that
86 // two different iterators are !=
87 // they are == after assignment
88 // source and dest iterator produce the same next() after assignment.
89 // deleting one doesn't disable the other.
90 logln("Testing assignment");
73c04bcf 91 RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
b75a7d8f 92 if(U_FAILURE(status)){
729e4ab9 93 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
b75a7d8f
A
94 return;
95 }
96
97 RuleBasedBreakIterator biDefault, biDefault2;
98 if(U_FAILURE(status)){
0f5d89e8 99 errln("%s:%d FAIL : in construction of default iterator", __FILE__, __LINE__);
b75a7d8f
A
100 return;
101 }
102 if (biDefault == *bix) {
0f5d89e8 103 errln("%s:%d ERROR: iterators should not compare ==", __FILE__, __LINE__);
b75a7d8f
A
104 return;
105 }
106 if (biDefault != biDefault2) {
0f5d89e8 107 errln("%s:%d ERROR: iterators should compare ==", __FILE__, __LINE__);
b75a7d8f
A
108 return;
109 }
110
111
112 UnicodeString HelloString("Hello Kitty");
113 bix->setText(HelloString);
114 if (*bix == *bi2) {
0f5d89e8 115 errln("%s:%d ERROR: strings should not be equal before assignment.", __FILE__, __LINE__);
b75a7d8f
A
116 }
117 *bix = *bi2;
118 if (*bix != *bi2) {
0f5d89e8 119 errln("%s:%d ERROR: strings should be equal before assignment.", __FILE__, __LINE__);
b75a7d8f
A
120 }
121
122 int bixnext = bix->next();
123 int bi2next = bi2->next();
124 if (! (bixnext == bi2next && bixnext == 7)) {
0f5d89e8 125 errln("%s:%d ERROR: iterators behaved differently after assignment.", __FILE__, __LINE__);
b75a7d8f
A
126 }
127 delete bix;
128 if (bi2->next() != 8) {
0f5d89e8 129 errln("%s:%d ERROR: iterator.next() failed after deleting copy.", __FILE__, __LINE__);
b75a7d8f
A
130 }
131
132
133
134 logln((UnicodeString)"Testing clone()");
340931cb
A
135 RuleBasedBreakIterator* bi1clone = bi1->clone();
136 RuleBasedBreakIterator* bi2clone = bi2->clone();
b75a7d8f 137
46f4442e 138 if(*bi1clone != *bi1 || *bi1clone != *biequal ||
b75a7d8f 139 *bi1clone == *bi3 || *bi1clone == *bi2)
0f5d89e8 140 errln("%s:%d ERROR:1 RBBI's clone() method failed", __FILE__, __LINE__);
b75a7d8f 141
46f4442e 142 if(*bi2clone == *bi1 || *bi2clone == *biequal ||
b75a7d8f 143 *bi2clone == *bi3 || *bi2clone != *bi2)
0f5d89e8 144 errln("%s:%d ERROR:2 RBBI's clone() method failed", __FILE__, __LINE__);
b75a7d8f
A
145
146 if(bi1->getText() != bi1clone->getText() ||
46f4442e 147 bi2clone->getText() != bi2->getText() ||
b75a7d8f 148 *bi2clone == *bi1clone )
0f5d89e8 149 errln("%s:%d ERROR: RBBI's clone() method failed", __FILE__, __LINE__);
b75a7d8f
A
150
151 delete bi1clone;
152 delete bi2clone;
153 delete bi1;
154 delete bi3;
155 delete bi2;
156 delete biequal;
157}
158
159void RBBIAPITest::TestBoilerPlate()
160{
161 UErrorCode status = U_ZERO_ERROR;
73c04bcf
A
162 BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status);
163 BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status);
374ca955 164 if (U_FAILURE(status)) {
729e4ab9 165 errcheckln(status, "Creation of break iterator failed %s", u_errorName(status));
374ca955
A
166 return;
167 }
b75a7d8f
A
168 if(*a!=*b){
169 errln("Failed: boilerplate method operator!= does not return correct results");
170 }
51004dcb
A
171 // Japanese word break iterators are identical to root with
172 // a dictionary-based break iterator
173 BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status);
174 BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status);
175 if(c && d){
176 if(*c!=*d){
177 errln("Failed: boilerplate method operator== does not return correct results");
374ca955
A
178 }
179 }else{
180 errln("creation of break iterator failed");
b75a7d8f
A
181 }
182 delete a;
183 delete b;
184 delete c;
51004dcb 185 delete d;
b75a7d8f
A
186}
187
188void RBBIAPITest::TestgetRules()
189{
190 UErrorCode status=U_ZERO_ERROR;
191
0f5d89e8
A
192 LocalPointer<RuleBasedBreakIterator> bi1(
193 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status), status);
194 LocalPointer<RuleBasedBreakIterator> bi2(
195 (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status), status);
b75a7d8f 196 if(U_FAILURE(status)){
0f5d89e8 197 errcheckln(status, "%s:%d, FAIL: in construction - %s", __FILE__, __LINE__, u_errorName(status));
b75a7d8f
A
198 return;
199 }
200
0f5d89e8 201 logln((UnicodeString)"Testing getRules()");
b75a7d8f 202
0f5d89e8
A
203 UnicodeString text(u"Hello there");
204 bi1->setText(text);
b75a7d8f 205
340931cb 206 LocalPointer <RuleBasedBreakIterator> bi3(bi1->clone());
b75a7d8f
A
207
208 UnicodeString temp=bi1->getRules();
209 UnicodeString temp2=bi2->getRules();
210 UnicodeString temp3=bi3->getRules();
211 if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0)
0f5d89e8 212 errln("%s:%d ERROR: error in getRules() method", __FILE__, __LINE__);
b75a7d8f 213
0f5d89e8
A
214 RuleBasedBreakIterator bi4; // Default RuleBasedBreakIterator constructor gives empty shell with empty rules.
215 if (!bi4.getRules().isEmpty()) {
216 errln("%s:%d Empty string expected.", __FILE__, __LINE__);
217 }
b75a7d8f 218}
0f5d89e8 219
b75a7d8f
A
220void RBBIAPITest::TestHashCode()
221{
222 UErrorCode status=U_ZERO_ERROR;
223 RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
224 RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
225 RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
226 if(U_FAILURE(status)){
729e4ab9 227 errcheckln(status, "Fail : in construction - %s", u_errorName(status));
b75a7d8f
A
228 delete bi1;
229 delete bi2;
230 delete bi3;
231 return;
232 }
233
234
235 logln((UnicodeString)"Testing hashCode()");
236
237 bi1->setText((UnicodeString)"Hash code");
238 bi2->setText((UnicodeString)"Hash code");
239 bi3->setText((UnicodeString)"Hash code");
240
340931cb
A
241 RuleBasedBreakIterator* bi1clone= bi1->clone();
242 RuleBasedBreakIterator* bi2clone= bi2->clone();
b75a7d8f
A
243
244 if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() ||
245 bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode())
246 errln((UnicodeString)"ERROR: identical objects have different hashcodes");
247
248 if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() ||
249 bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode())
250 errln((UnicodeString)"ERROR: different objects have same hashcodes");
251
252 delete bi1clone;
46f4442e 253 delete bi2clone;
b75a7d8f
A
254 delete bi1;
255 delete bi2;
256 delete bi3;
257
258}
259void RBBIAPITest::TestGetSetAdoptText()
260{
261 logln((UnicodeString)"Testing getText setText ");
729e4ab9 262 IcuTestErrorCode status(*this, "TestGetSetAdoptText");
b75a7d8f
A
263 UnicodeString str1="first string.";
264 UnicodeString str2="Second string.";
729e4ab9
A
265 LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status));
266 LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status));
267 if(status.isFailure()){
268 errcheckln(status, "Fail : in construction - %s", status.errorName());
b75a7d8f
A
269 return;
270 }
271
272
273 CharacterIterator* text1= new StringCharacterIterator(str1);
274 CharacterIterator* text1Clone = text1->clone();
275 CharacterIterator* text2= new StringCharacterIterator(str2);
276 CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str"
46f4442e 277
b75a7d8f 278 wordIter1->setText(str1);
73c04bcf
A
279 CharacterIterator *tci = &wordIter1->getText();
280 UnicodeString tstr;
281 tci->getText(tstr);
282 TEST_ASSERT(tstr == str1);
b75a7d8f
A
283 if(wordIter1->current() != 0)
284 errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
285
286 wordIter1->next(2);
287
288 wordIter1->setText(str2);
289 if(wordIter1->current() != 0)
290 errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n");
291
292
293 charIter1->adoptText(text1Clone);
73c04bcf
A
294 TEST_ASSERT(wordIter1->getText() != charIter1->getText());
295 tci = &wordIter1->getText();
296 tci->getText(tstr);
297 TEST_ASSERT(tstr == str2);
298 tci = &charIter1->getText();
299 tci->getText(tstr);
300 TEST_ASSERT(tstr == str1);
301
b75a7d8f 302
340931cb 303 LocalPointer<RuleBasedBreakIterator> rb(wordIter1->clone());
b75a7d8f
A
304 rb->adoptText(text1);
305 if(rb->getText() != *text1)
306 errln((UnicodeString)"ERROR:1 error in adoptText ");
307 rb->adoptText(text2);
308 if(rb->getText() != *text2)
309 errln((UnicodeString)"ERROR:2 error in adoptText ");
310
311 // Adopt where iterator range is less than the entire orignal source string.
73c04bcf
A
312 // (With the change of the break engine to working with UText internally,
313 // CharacterIterators starting at positions other than zero are not supported)
b75a7d8f 314 rb->adoptText(text3);
73c04bcf
A
315 TEST_ASSERT(rb->preceding(2) == 0);
316 TEST_ASSERT(rb->following(11) == BreakIterator::DONE);
317 //if(rb->preceding(2) != 3) {
318 // errln((UnicodeString)"ERROR:3 error in adoptText ");
319 //}
320 //if(rb->following(11) != BreakIterator::DONE) {
321 // errln((UnicodeString)"ERROR:4 error in adoptText ");
322 //}
323
324 // UText API
325 //
326 // Quick test to see if UText is working at all.
327 //
328 const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */
329 const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */
330 // 012345678901
331
729e4ab9
A
332 status.reset();
333 LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status));
334 wordIter1->setText(ut.getAlias(), status);
73c04bcf
A
335 TEST_ASSERT_SUCCESS(status);
336
337 int32_t pos;
338 pos = wordIter1->first();
339 TEST_ASSERT(pos==0);
340 pos = wordIter1->next();
341 TEST_ASSERT(pos==5);
342 pos = wordIter1->next();
343 TEST_ASSERT(pos==6);
344 pos = wordIter1->next();
345 TEST_ASSERT(pos==11);
346 pos = wordIter1->next();
347 TEST_ASSERT(pos==UBRK_DONE);
348
729e4ab9
A
349 status.reset();
350 LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status));
73c04bcf 351 TEST_ASSERT_SUCCESS(status);
729e4ab9 352 wordIter1->setText(ut2.getAlias(), status);
73c04bcf
A
353 TEST_ASSERT_SUCCESS(status);
354
355 pos = wordIter1->first();
356 TEST_ASSERT(pos==0);
357 pos = wordIter1->next();
358 TEST_ASSERT(pos==3);
359 pos = wordIter1->next();
360 TEST_ASSERT(pos==4);
361
362 pos = wordIter1->last();
363 TEST_ASSERT(pos==6);
364 pos = wordIter1->previous();
365 TEST_ASSERT(pos==4);
366 pos = wordIter1->previous();
367 TEST_ASSERT(pos==3);
368 pos = wordIter1->previous();
369 TEST_ASSERT(pos==0);
370 pos = wordIter1->previous();
371 TEST_ASSERT(pos==UBRK_DONE);
372
729e4ab9 373 status.reset();
73c04bcf 374 UnicodeString sEmpty;
729e4ab9
A
375 LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status));
376 wordIter1->getUText(gut2.getAlias(), status);
73c04bcf 377 TEST_ASSERT_SUCCESS(status);
729e4ab9
A
378 status.reset();
379}
46f4442e 380
b75a7d8f 381
b75a7d8f
A
382void RBBIAPITest::TestIteration()
383{
384 // This test just verifies that the API is present.
385 // Testing for correct operation of the break rules happens elsewhere.
386
387 UErrorCode status=U_ZERO_ERROR;
388 RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
389 if (U_FAILURE(status) || bi == NULL) {
729e4ab9 390 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
b75a7d8f
A
391 }
392 delete bi;
393
394 status=U_ZERO_ERROR;
395 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
396 if (U_FAILURE(status) || bi == NULL) {
729e4ab9 397 errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status));
b75a7d8f
A
398 }
399 delete bi;
400
401 status=U_ZERO_ERROR;
402 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status);
403 if (U_FAILURE(status) || bi == NULL) {
729e4ab9 404 errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status));
b75a7d8f
A
405 }
406 delete bi;
407
408 status=U_ZERO_ERROR;
409 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status);
410 if (U_FAILURE(status) || bi == NULL) {
729e4ab9 411 errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status));
b75a7d8f
A
412 }
413 delete bi;
414
415 status=U_ZERO_ERROR;
416 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status);
417 if (U_FAILURE(status) || bi == NULL) {
729e4ab9 418 errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status));
b75a7d8f
A
419 }
420 delete bi;
421
422 status=U_ZERO_ERROR;
423 bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
424 if (U_FAILURE(status) || bi == NULL) {
729e4ab9 425 errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status));
b75a7d8f
A
426 return; // Skip the rest of these tests.
427 }
428
429
430 UnicodeString testString="0123456789";
431 bi->setText(testString);
432
433 int32_t i;
434 i = bi->first();
435 if (i != 0) {
0f5d89e8 436 errln("%s:%d Incorrect value from bi->first(). Expected 0, got %d.", __FILE__, __LINE__, i);
b75a7d8f
A
437 }
438
439 i = bi->last();
440 if (i != 10) {
0f5d89e8 441 errln("%s:%d Incorrect value from bi->last(). Expected 10, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
442 }
443
444 //
445 // Previous
446 //
447 bi->last();
448 i = bi->previous();
449 if (i != 9) {
0f5d89e8 450 errln("%s:%d Incorrect value from bi->last(). Expected 9, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
451 }
452
453
454 bi->first();
455 i = bi->previous();
456 if (i != BreakIterator::DONE) {
0f5d89e8 457 errln("%s:%d Incorrect value from bi->previous(). Expected DONE, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
458 }
459
460 //
461 // next()
462 //
463 bi->first();
464 i = bi->next();
465 if (i != 1) {
0f5d89e8 466 errln("%s:%d Incorrect value from bi->next(). Expected 1, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
467 }
468
469 bi->last();
470 i = bi->next();
471 if (i != BreakIterator::DONE) {
0f5d89e8 472 errln("%s:%d Incorrect value from bi->next(). Expected DONE, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
473 }
474
475
476 //
477 // current()
478 //
479 bi->first();
480 i = bi->current();
481 if (i != 0) {
0f5d89e8 482 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
483 }
484
485 bi->next();
486 i = bi->current();
487 if (i != 1) {
0f5d89e8 488 errln("%s:%d Incorrect value from bi->current(). Expected 1, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
489 }
490
491 bi->last();
492 bi->next();
493 i = bi->current();
494 if (i != 10) {
0f5d89e8 495 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
496 }
497
498 bi->first();
499 bi->previous();
500 i = bi->current();
501 if (i != 0) {
0f5d89e8 502 errln("%s:%d Incorrect value from bi->current(). Expected 0, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
503 }
504
505
506 //
507 // Following()
508 //
509 i = bi->following(4);
510 if (i != 5) {
0f5d89e8 511 errln("%s:%d Incorrect value from bi->following(). Expected 5, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
512 }
513
514 i = bi->following(9);
515 if (i != 10) {
0f5d89e8 516 errln("%s:%d Incorrect value from bi->following(). Expected 10, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
517 }
518
519 i = bi->following(10);
520 if (i != BreakIterator::DONE) {
0f5d89e8 521 errln("%s:%d Incorrect value from bi->following(). Expected DONE, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
522 }
523
524
525 //
526 // Preceding
527 //
528 i = bi->preceding(4);
529 if (i != 3) {
0f5d89e8 530 errln("%s:%d Incorrect value from bi->preceding(). Expected 3, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
531 }
532
533 i = bi->preceding(10);
534 if (i != 9) {
0f5d89e8 535 errln("%s:%d Incorrect value from bi->preceding(). Expected 9, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
536 }
537
538 i = bi->preceding(1);
539 if (i != 0) {
0f5d89e8 540 errln("%s:%d Incorrect value from bi->preceding(). Expected 0, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
541 }
542
543 i = bi->preceding(0);
544 if (i != BreakIterator::DONE) {
0f5d89e8 545 errln("%s:%d Incorrect value from bi->preceding(). Expected DONE, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
546 }
547
548
549 //
550 // isBoundary()
551 //
552 bi->first();
553 if (bi->isBoundary(3) != TRUE) {
0f5d89e8 554 errln("%s:%d Incorrect value from bi->isBoudary(). Expected TRUE, got FALSE", __FILE__, __LINE__, i);
b75a7d8f
A
555 }
556 i = bi->current();
557 if (i != 3) {
0f5d89e8 558 errln("%s:%d Incorrect value from bi->current(). Expected 3, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
559 }
560
561
562 if (bi->isBoundary(11) != FALSE) {
0f5d89e8 563 errln("%s:%d Incorrect value from bi->isBoudary(). Expected FALSE, got TRUE", __FILE__, __LINE__, i);
b75a7d8f
A
564 }
565 i = bi->current();
566 if (i != 10) {
0f5d89e8 567 errln("%s:%d Incorrect value from bi->current(). Expected 10, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
568 }
569
570 //
571 // next(n)
572 //
573 bi->first();
574 i = bi->next(4);
575 if (i != 4) {
0f5d89e8 576 errln("%s:%d Incorrect value from bi->next(). Expected 4, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
577 }
578
579 i = bi->next(6);
580 if (i != 10) {
0f5d89e8 581 errln("%s:%d Incorrect value from bi->next(). Expected 10, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
582 }
583
584 bi->first();
585 i = bi->next(11);
586 if (i != BreakIterator::DONE) {
0f5d89e8 587 errln("%s:%d Incorrect value from bi->next(). Expected BreakIterator::DONE, got %d", __FILE__, __LINE__, i);
b75a7d8f
A
588 }
589
590 delete bi;
591
592}
593
594
595
596
597
598
599void RBBIAPITest::TestBuilder() {
600 UnicodeString rulesString1 = "$Letters = [:L:];\n"
601 "$Numbers = [:N:];\n"
602 "$Letters+;\n"
603 "$Numbers+;\n"
604 "[^$Letters $Numbers];\n"
605 "!.*;\n";
606 UnicodeString testString1 = "abc123..abc";
607 // 01234567890
608 int32_t bounds1[] = {0, 3, 6, 7, 8, 11};
609 UErrorCode status=U_ZERO_ERROR;
610 UParseError parseError;
46f4442e 611
b75a7d8f
A
612 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
613 if(U_FAILURE(status)) {
729e4ab9 614 dataerrln("Fail : in construction - %s", u_errorName(status));
b75a7d8f
A
615 } else {
616 bi->setText(testString1);
617 doBoundaryTest(*bi, testString1, bounds1);
618 }
619 delete bi;
620}
621
622
623//
624// TestQuoteGrouping
625// Single quotes within rules imply a grouping, so that a modifier
626// following the quoted text (* or +) applies to all of the quoted chars.
627//
628void RBBIAPITest::TestQuoteGrouping() {
629 UnicodeString rulesString1 = "#Here comes the rule...\n"
630 "'$@!'*;\n" // (\$\@\!)*
631 ".;\n";
632
633 UnicodeString testString1 = "$@!$@!X$@!!X";
634 // 0123456789012
635 int32_t bounds1[] = {0, 6, 7, 10, 11, 12};
636 UErrorCode status=U_ZERO_ERROR;
637 UParseError parseError;
46f4442e 638
b75a7d8f
A
639 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
640 if(U_FAILURE(status)) {
729e4ab9 641 dataerrln("Fail : in construction - %s", u_errorName(status));
b75a7d8f
A
642 } else {
643 bi->setText(testString1);
644 doBoundaryTest(*bi, testString1, bounds1);
645 }
646 delete bi;
647}
648
649//
374ca955 650// TestRuleStatus
b75a7d8f
A
651// Test word break rule status constants.
652//
374ca955 653void RBBIAPITest::TestRuleStatus() {
46f4442e 654 UChar str[30];
51004dcb
A
655 //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
656 // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
657 u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
57a6839d
A
658 // 012345678901234567 8 9 0
659 // Katakana
374ca955
A
660 str, 30);
661 UnicodeString testString1(str);
51004dcb 662 int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
b75a7d8f
A
663 int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
664 UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
51004dcb 665 UBRK_WORD_IDEO, UBRK_WORD_NONE};
b75a7d8f
A
666
667 int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
668 UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
51004dcb 669 UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
b75a7d8f
A
670
671 UErrorCode status=U_ZERO_ERROR;
46f4442e 672
57a6839d 673 BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
b75a7d8f 674 if(U_FAILURE(status)) {
0f5d89e8 675 errcheckln(status, "%s:%d Fail in construction - %s", __FILE__, __LINE__, u_errorName(status));
b75a7d8f
A
676 } else {
677 bi->setText(testString1);
678 // First test that the breaks are in the right spots.
679 doBoundaryTest(*bi, testString1, bounds1);
680
681 // Then go back and check tag values
682 int32_t i = 0;
683 int32_t pos, tag;
684 for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
685 if (pos != bounds1[i]) {
0f5d89e8 686 errln("%s:%d FAIL: unexpected word break at postion %d", __FILE__, __LINE__, pos);
b75a7d8f
A
687 break;
688 }
689 tag = bi->getRuleStatus();
690 if (tag < tag_lo[i] || tag >= tag_hi[i]) {
0f5d89e8 691 errln("%s:%d FAIL: incorrect tag value %d at position %d", __FILE__, __LINE__, tag, pos);
b75a7d8f
A
692 break;
693 }
46f4442e 694
374ca955
A
695 // Check that we get the same tag values from getRuleStatusVec()
696 int32_t vec[10];
697 int t = bi->getRuleStatusVec(vec, 10, status);
698 TEST_ASSERT_SUCCESS(status);
699 TEST_ASSERT(t==1);
700 TEST_ASSERT(vec[0] == tag);
b75a7d8f
A
701 }
702 }
703 delete bi;
374ca955
A
704
705 // Now test line break status. This test mostly is to confirm that the status constants
706 // are correctly declared in the header.
707 testString1 = "test line. \n";
708 // break type s s h
709
57a6839d 710 bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
374ca955 711 if(U_FAILURE(status)) {
0f5d89e8 712 errcheckln(status, "%s:%d failed to create line break iterator. - %s", __FILE__, __LINE__, u_errorName(status));
374ca955
A
713 } else {
714 int32_t i = 0;
715 int32_t pos, tag;
716 UBool success;
717
718 bi->setText(testString1);
719 pos = bi->current();
720 tag = bi->getRuleStatus();
721 for (i=0; i<3; i++) {
722 switch (i) {
723 case 0:
724 success = pos==0 && tag==UBRK_LINE_SOFT; break;
725 case 1:
726 success = pos==5 && tag==UBRK_LINE_SOFT; break;
727 case 2:
728 success = pos==12 && tag==UBRK_LINE_HARD; break;
729 default:
730 success = FALSE; break;
731 }
732 if (success == FALSE) {
0f5d89e8
A
733 errln("%s:%d: incorrect line break status or position. i=%d, pos=%d, tag=%d",
734 __FILE__, __LINE__, i, pos, tag);
374ca955
A
735 break;
736 }
737 pos = bi->next();
738 tag = bi->getRuleStatus();
739 }
740 if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
741 UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
729e4ab9 742 (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
0f5d89e8 743 errln("%s:%d UBRK_LINE_* constants from header are inconsistent.", __FILE__, __LINE__);
374ca955
A
744 }
745 }
746 delete bi;
747
b75a7d8f
A
748}
749
750
374ca955
A
751//
752// TestRuleStatusVec
753// Test the vector form of break rule status.
754//
755void RBBIAPITest::TestRuleStatusVec() {
46f4442e 756 UnicodeString rulesString( "[A-N]{100}; \n"
374ca955
A
757 "[a-w]{200}; \n"
758 "[\\p{L}]{300}; \n"
759 "[\\p{N}]{400}; \n"
760 "[0-5]{500}; \n"
46f4442e 761 "!.*;\n", -1, US_INV);
374ca955
A
762 UnicodeString testString1 = "Aapz5?";
763 int32_t statusVals[10];
764 int32_t numStatuses;
765 int32_t pos;
766
767 UErrorCode status=U_ZERO_ERROR;
768 UParseError parseError;
46f4442e 769
374ca955 770 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
729e4ab9
A
771 if (U_FAILURE(status)) {
772 dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
773 } else {
374ca955
A
774 bi->setText(testString1);
775
776 // A
777 pos = bi->next();
778 TEST_ASSERT(pos==1);
779 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
780 TEST_ASSERT_SUCCESS(status);
781 TEST_ASSERT(numStatuses == 2);
782 TEST_ASSERT(statusVals[0] == 100);
783 TEST_ASSERT(statusVals[1] == 300);
784
785 // a
786 pos = bi->next();
787 TEST_ASSERT(pos==2);
788 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
789 TEST_ASSERT_SUCCESS(status);
790 TEST_ASSERT(numStatuses == 2);
791 TEST_ASSERT(statusVals[0] == 200);
792 TEST_ASSERT(statusVals[1] == 300);
793
794 // p
795 pos = bi->next();
796 TEST_ASSERT(pos==3);
797 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
798 TEST_ASSERT_SUCCESS(status);
799 TEST_ASSERT(numStatuses == 2);
800 TEST_ASSERT(statusVals[0] == 200);
801 TEST_ASSERT(statusVals[1] == 300);
802
803 // z
804 pos = bi->next();
805 TEST_ASSERT(pos==4);
806 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
807 TEST_ASSERT_SUCCESS(status);
808 TEST_ASSERT(numStatuses == 1);
809 TEST_ASSERT(statusVals[0] == 300);
810
811 // 5
812 pos = bi->next();
813 TEST_ASSERT(pos==5);
814 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
815 TEST_ASSERT_SUCCESS(status);
816 TEST_ASSERT(numStatuses == 2);
817 TEST_ASSERT(statusVals[0] == 400);
818 TEST_ASSERT(statusVals[1] == 500);
819
820 // ?
821 pos = bi->next();
822 TEST_ASSERT(pos==6);
823 numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
824 TEST_ASSERT_SUCCESS(status);
825 TEST_ASSERT(numStatuses == 1);
826 TEST_ASSERT(statusVals[0] == 0);
827
828 //
46f4442e 829 // Check buffer overflow error handling. Char == A
374ca955
A
830 //
831 bi->first();
832 pos = bi->next();
833 TEST_ASSERT(pos==1);
834 memset(statusVals, -1, sizeof(statusVals));
835 numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
836 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
837 TEST_ASSERT(numStatuses == 2);
838 TEST_ASSERT(statusVals[0] == -1);
839
840 status = U_ZERO_ERROR;
841 memset(statusVals, -1, sizeof(statusVals));
842 numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
843 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
844 TEST_ASSERT(numStatuses == 2);
845 TEST_ASSERT(statusVals[0] == 100);
846 TEST_ASSERT(statusVals[1] == -1);
847
848 status = U_ZERO_ERROR;
849 memset(statusVals, -1, sizeof(statusVals));
850 numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
851 TEST_ASSERT_SUCCESS(status);
852 TEST_ASSERT(numStatuses == 2);
853 TEST_ASSERT(statusVals[0] == 100);
854 TEST_ASSERT(statusVals[1] == 300);
855 TEST_ASSERT(statusVals[2] == -1);
856 }
857 delete bi;
858
859}
860
b75a7d8f
A
861//
862// Bug 2190 Regression test. Builder crash on rule consisting of only a
863// $variable reference
864void RBBIAPITest::TestBug2190() {
865 UnicodeString rulesString1 = "$aaa = abcd;\n"
866 "$bbb = $aaa;\n"
867 "$bbb;\n";
868 UnicodeString testString1 = "abcdabcd";
869 // 01234567890
870 int32_t bounds1[] = {0, 4, 8};
871 UErrorCode status=U_ZERO_ERROR;
872 UParseError parseError;
46f4442e 873
b75a7d8f
A
874 RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
875 if(U_FAILURE(status)) {
729e4ab9 876 dataerrln("Fail : in construction - %s", u_errorName(status));
b75a7d8f
A
877 } else {
878 bi->setText(testString1);
879 doBoundaryTest(*bi, testString1, bounds1);
880 }
881 delete bi;
882}
883
884
885void RBBIAPITest::TestRegistration() {
374ca955 886#if !UCONFIG_NO_SERVICE
b75a7d8f 887 UErrorCode status = U_ZERO_ERROR;
73c04bcf 888 BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status);
b75a7d8f 889 // ok to not delete these if we exit because of error?
73c04bcf 890 BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status);
b75a7d8f
A
891 BreakIterator* root_word = BreakIterator::createWordInstance("", status);
892 BreakIterator* root_char = BreakIterator::createCharacterInstance("", status);
57a6839d 893
729e4ab9
A
894 if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) {
895 dataerrln("Error creating instances of break interactors - %s", u_errorName(status));
51004dcb 896
729e4ab9
A
897 delete ja_word;
898 delete ja_char;
899 delete root_word;
900 delete root_char;
57a6839d 901
729e4ab9
A
902 return;
903 }
46f4442e 904
73c04bcf 905 URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status);
b75a7d8f 906 {
51004dcb 907#if 0 // With a dictionary based word breaking, ja_word is identical to root.
73c04bcf
A
908 if (ja_word && *ja_word == *root_word) {
909 errln("japan not different from root");
b75a7d8f 910 }
51004dcb 911#endif
b75a7d8f 912 }
46f4442e 913
b75a7d8f
A
914 {
915 BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status);
374ca955
A
916 UBool fail = TRUE;
917 if(result){
73c04bcf 918 fail = *result != *ja_word;
374ca955 919 }
b75a7d8f
A
920 delete result;
921 if (fail) {
922 errln("bad result for xx_XX/word");
923 }
924 }
46f4442e 925
b75a7d8f 926 {
73c04bcf 927 BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status);
374ca955
A
928 UBool fail = TRUE;
929 if(result){
73c04bcf 930 fail = *result != *ja_char;
374ca955 931 }
b75a7d8f
A
932 delete result;
933 if (fail) {
73c04bcf 934 errln("bad result for ja_JP/char");
b75a7d8f
A
935 }
936 }
46f4442e 937
b75a7d8f
A
938 {
939 BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status);
374ca955
A
940 UBool fail = TRUE;
941 if(result){
942 fail = *result != *root_char;
943 }
b75a7d8f
A
944 delete result;
945 if (fail) {
946 errln("bad result for xx_XX/char");
947 }
948 }
46f4442e 949
b75a7d8f
A
950 {
951 StringEnumeration* avail = BreakIterator::getAvailableLocales();
952 UBool found = FALSE;
953 const UnicodeString* p;
954 while ((p = avail->snext(status))) {
955 if (p->compare("xx") == 0) {
956 found = TRUE;
957 break;
958 }
959 }
960 delete avail;
961 if (!found) {
962 errln("did not find test locale");
963 }
964 }
46f4442e 965
b75a7d8f
A
966 {
967 UBool unreg = BreakIterator::unregister(key, status);
968 if (!unreg) {
969 errln("unable to unregister");
970 }
971 }
46f4442e 972
b75a7d8f 973 {
73c04bcf 974 BreakIterator* result = BreakIterator::createWordInstance("en_US", status);
b75a7d8f 975 BreakIterator* root = BreakIterator::createWordInstance("", status);
374ca955
A
976 UBool fail = TRUE;
977 if(root){
978 fail = *root != *result;
979 }
b75a7d8f
A
980 delete root;
981 delete result;
982 if (fail) {
983 errln("did not get root break");
984 }
985 }
46f4442e 986
b75a7d8f
A
987 {
988 StringEnumeration* avail = BreakIterator::getAvailableLocales();
989 UBool found = FALSE;
990 const UnicodeString* p;
991 while ((p = avail->snext(status))) {
992 if (p->compare("xx") == 0) {
993 found = TRUE;
994 break;
995 }
996 }
997 delete avail;
998 if (found) {
999 errln("found test locale");
1000 }
1001 }
46f4442e 1002
b75a7d8f
A
1003 {
1004 int32_t count;
1005 UBool foundLocale = FALSE;
1006 const Locale *avail = BreakIterator::getAvailableLocales(count);
1007 for (int i=0; i<count; i++) {
1008 if (avail[i] == Locale::getEnglish()) {
1009 foundLocale = TRUE;
1010 break;
1011 }
1012 }
1013 if (foundLocale == FALSE) {
1014 errln("BreakIterator::getAvailableLocales(&count), failed to find EN.");
1015 }
1016 }
46f4442e
A
1017
1018
73c04bcf
A
1019 // ja_word was adopted by factory
1020 delete ja_char;
b75a7d8f
A
1021 delete root_word;
1022 delete root_char;
374ca955 1023#endif
b75a7d8f
A
1024}
1025
1026void RBBIAPITest::RoundtripRule(const char *dataFile) {
1027 UErrorCode status = U_ZERO_ERROR;
1028 UParseError parseError;
374ca955
A
1029 parseError.line = 0;
1030 parseError.offset = 0;
729e4ab9 1031 LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
b75a7d8f
A
1032 uint32_t length;
1033 const UChar *builtSource;
1034 const uint8_t *rbbiRules;
1035 const uint8_t *builtRules;
1036
1037 if (U_FAILURE(status)) {
f3c0d7a5 1038 errcheckln(status, "%s:%d Can't open \"%s\" - %s", __FILE__, __LINE__, dataFile, u_errorName(status));
b75a7d8f
A
1039 return;
1040 }
1041
729e4ab9 1042 builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
b75a7d8f 1043 builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
0f5d89e8 1044 LocalPointer<RuleBasedBreakIterator> brkItr (new RuleBasedBreakIterator(builtSource, parseError, status));
b75a7d8f 1045 if (U_FAILURE(status)) {
f3c0d7a5
A
1046 errln("%s:%d createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
1047 __FILE__, __LINE__, u_errorName(status), parseError.line, parseError.offset);
1048 errln(UnicodeString(builtSource));
b75a7d8f 1049 return;
340931cb 1050 }
b75a7d8f
A
1051 rbbiRules = brkItr->getBinaryRules(length);
1052 logln("Comparing \"%s\" len=%d", dataFile, length);
1053 if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
f3c0d7a5 1054 errln("%s:%d Built rules and rebuilt rules are different %s", __FILE__, __LINE__, dataFile);
b75a7d8f
A
1055 return;
1056 }
b75a7d8f
A
1057}
1058
1059void RBBIAPITest::TestRoundtripRules() {
1060 RoundtripRule("word");
1061 RoundtripRule("title");
1062 RoundtripRule("sent");
1063 RoundtripRule("line");
1064 RoundtripRule("char");
1065 if (!quick) {
73c04bcf 1066 RoundtripRule("word_POSIX");
b75a7d8f
A
1067 }
1068}
1069
57a6839d 1070
f3c0d7a5
A
1071// Check getBinaryRules() and construction of a break iterator from those rules.
1072
1073void RBBIAPITest::TestGetBinaryRules() {
1074 UErrorCode status=U_ZERO_ERROR;
1075 LocalPointer<BreakIterator> bi(BreakIterator::createLineInstance(Locale::getEnglish(), status));
1076 if (U_FAILURE(status)) {
1077 dataerrln("FAIL: BreakIterator::createLineInstance for Locale::getEnglish(): %s", u_errorName(status));
1078 return;
1079 }
1080 RuleBasedBreakIterator *rbbi = dynamic_cast<RuleBasedBreakIterator *>(bi.getAlias());
1081 if (rbbi == NULL) {
1082 dataerrln("FAIL: RuleBasedBreakIterator is NULL");
1083 return;
46f4442e 1084 }
4388f060 1085
f3c0d7a5
A
1086 // Check that the new line break iterator is nominally functional.
1087 UnicodeString helloWorld("Hello, World!");
1088 rbbi->setText(helloWorld);
1089 int n = 0;
1090 while (bi->next() != UBRK_DONE) {
1091 ++n;
1092 }
1093 TEST_ASSERT(n == 2);
4388f060 1094
f3c0d7a5
A
1095 // Extract the binary rules as a uint8_t blob.
1096 uint32_t ruleLength;
1097 const uint8_t *binRules = rbbi->getBinaryRules(ruleLength);
1098 TEST_ASSERT(ruleLength > 0);
1099 TEST_ASSERT(binRules != NULL);
4388f060 1100
f3c0d7a5
A
1101 // Clone the binary rules, and create a break iterator from that.
1102 // The break iterator does not adopt the rules; we must delete when we are finished with the iterator.
1103 uint8_t *clonedRules = new uint8_t[ruleLength];
1104 memcpy(clonedRules, binRules, ruleLength);
1105 RuleBasedBreakIterator clonedBI(clonedRules, ruleLength, status);
1106 TEST_ASSERT_SUCCESS(status);
1107
1108 // Check that the cloned line break iterator is nominally alive.
1109 clonedBI.setText(helloWorld);
1110 n = 0;
1111 while (clonedBI.next() != UBRK_DONE) {
1112 ++n;
4388f060 1113 }
f3c0d7a5
A
1114 TEST_ASSERT(n == 2);
1115
1116 delete[] clonedRules;
46f4442e
A
1117}
1118
4388f060
A
1119
1120void RBBIAPITest::TestRefreshInputText() {
1121 /*
1122 * RefreshInput changes out the input of a Break Iterator without
1123 * changing anything else in the iterator's state. Used with Java JNI,
1124 * when Java moves the underlying string storage. This test
1125 * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
1126 * The right set of boundaries should still be found.
1127 */
1128 UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
1129 UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
1130 UErrorCode status = U_ZERO_ERROR;
1131 UText ut1 = UTEXT_INITIALIZER;
1132 UText ut2 = UTEXT_INITIALIZER;
1133 RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
1134 TEST_ASSERT_SUCCESS(status);
1135
1136 utext_openUChars(&ut1, testStr, -1, &status);
1137 TEST_ASSERT_SUCCESS(status);
1138
1139 if (U_SUCCESS(status)) {
1140 bi->setText(&ut1, status);
1141 TEST_ASSERT_SUCCESS(status);
1142
1143 /* Line boundaries will occur before each letter in the original string */
1144 TEST_ASSERT(1 == bi->next());
1145 TEST_ASSERT(3 == bi->next());
1146
1147 /* Move the string, kill the original string. */
1148 u_strcpy(movedStr, testStr);
1149 u_memset(testStr, 0x20, u_strlen(testStr));
1150 utext_openUChars(&ut2, movedStr, -1, &status);
1151 TEST_ASSERT_SUCCESS(status);
1152 RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
1153 TEST_ASSERT_SUCCESS(status);
1154 TEST_ASSERT(bi == returnedBI);
1155
1156 /* Find the following matches, now working in the moved string. */
1157 TEST_ASSERT(5 == bi->next());
1158 TEST_ASSERT(7 == bi->next());
1159 TEST_ASSERT(8 == bi->next());
1160 TEST_ASSERT(UBRK_DONE == bi->next());
57a6839d 1161
4388f060
A
1162 utext_close(&ut1);
1163 utext_close(&ut2);
1164 }
1165 delete bi;
1166
1167}
1168
f3c0d7a5 1169#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
57a6839d
A
1170static void prtbrks(BreakIterator* brk, const UnicodeString &ustr, IntlTest &it) {
1171 static const UChar PILCROW=0x00B6, CHSTR=0x3010, CHEND=0x3011; // lenticular brackets
1172 it.logln(UnicodeString("String:'")+ustr+UnicodeString("'"));
1173
1174 int32_t *pos = new int32_t[ustr.length()];
1175 int32_t posCount = 0;
1176
1177 // calculate breaks up front, so we can print out
1178 // sans any debugging
1179 for(int32_t n = 0; (n=brk->next())!=UBRK_DONE; ) {
1180 pos[posCount++] = n;
1181 if(posCount>=ustr.length()) {
1182 it.errln("brk count exceeds string length!");
1183 return;
1184 }
1185 }
1186 UnicodeString out;
1187 out.append((UChar)CHSTR);
1188 int32_t prev = 0;
1189 for(int32_t i=0;i<posCount;i++) {
1190 int32_t n=pos[i];
1191 out.append(ustr.tempSubString(prev,n-prev));
1192 out.append((UChar)PILCROW);
1193 prev=n;
1194 }
1195 out.append(ustr.tempSubString(prev,ustr.length()-prev));
1196 out.append((UChar)CHEND);
1197 it.logln(out);
1198
1199 out.remove();
1200 for(int32_t i=0;i<posCount;i++) {
1201 char tmp[100];
1202 sprintf(tmp,"%d ",pos[i]);
1203 out.append(UnicodeString(tmp));
1204 }
1205 it.logln(out);
1206 delete [] pos;
1207}
1208#endif
1209
1210void RBBIAPITest::TestFilteredBreakIteratorBuilder() {
f3c0d7a5 1211#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
57a6839d
A
1212 UErrorCode status = U_ZERO_ERROR;
1213 LocalPointer<FilteredBreakIteratorBuilder> builder;
1214 LocalPointer<BreakIterator> baseBI;
1215 LocalPointer<BreakIterator> filteredBI;
b331163b 1216 LocalPointer<BreakIterator> frenchBI;
57a6839d
A
1217
1218 const UnicodeString text("In the meantime Mr. Weston arrived with his small ship, which he had now recovered. Capt. Gorges, who informed the Sgt. here that one purpose of his going east was to meet with Mr. Weston, took this opportunity to call him to account for some abuses he had to lay to his charge."); // (William Bradford, public domain. http://catalog.hathitrust.org/Record/008651224 ) - edited.
1219 const UnicodeString ABBR_MR("Mr.");
1220 const UnicodeString ABBR_CAPT("Capt.");
1221
1222 {
1223 logln("Constructing empty builder\n");
1224 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1225 TEST_ASSERT_SUCCESS(status);
1226
1227 logln("Constructing base BI\n");
1228 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1229 TEST_ASSERT_SUCCESS(status);
1230
b331163b 1231 logln("Building new BI\n");
57a6839d
A
1232 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1233 TEST_ASSERT_SUCCESS(status);
1234
b331163b
A
1235 if (U_SUCCESS(status)) {
1236 logln("Testing:");
1237 filteredBI->setText(text);
1238 TEST_ASSERT(20 == filteredBI->next()); // Mr.
1239 TEST_ASSERT(84 == filteredBI->next()); // recovered.
1240 TEST_ASSERT(90 == filteredBI->next()); // Capt.
1241 TEST_ASSERT(181 == filteredBI->next()); // Mr.
1242 TEST_ASSERT(278 == filteredBI->next()); // charge.
1243 filteredBI->first();
1244 prtbrks(filteredBI.getAlias(), text, *this);
1245 }
57a6839d
A
1246 }
1247
1248 {
1249 logln("Constructing empty builder\n");
1250 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1251 TEST_ASSERT_SUCCESS(status);
1252
b331163b
A
1253 if (U_SUCCESS(status)) {
1254 logln("Adding Mr. as an exception\n");
1255 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1256 TEST_ASSERT(FALSE == builder->suppressBreakAfter(ABBR_MR, status)); // already have it
1257 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_MR, status));
1258 TEST_ASSERT(FALSE == builder->unsuppressBreakAfter(ABBR_MR, status)); // already removed it
1259 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1260 TEST_ASSERT_SUCCESS(status);
57a6839d 1261
b331163b
A
1262 logln("Constructing base BI\n");
1263 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1264 TEST_ASSERT_SUCCESS(status);
57a6839d 1265
b331163b
A
1266 logln("Building new BI\n");
1267 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1268 TEST_ASSERT_SUCCESS(status);
57a6839d 1269
b331163b
A
1270 logln("Testing:");
1271 filteredBI->setText(text);
1272 TEST_ASSERT(84 == filteredBI->next());
1273 TEST_ASSERT(90 == filteredBI->next());// Capt.
1274 TEST_ASSERT(278 == filteredBI->next());
1275 filteredBI->first();
1276 prtbrks(filteredBI.getAlias(), text, *this);
1277 }
57a6839d
A
1278 }
1279
1280
1281 {
1282 logln("Constructing empty builder\n");
1283 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(status));
1284 TEST_ASSERT_SUCCESS(status);
1285
b331163b
A
1286 if (U_SUCCESS(status)) {
1287 logln("Adding Mr. and Capt as an exception\n");
1288 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_MR, status));
1289 TEST_ASSERT(TRUE == builder->suppressBreakAfter(ABBR_CAPT, status));
1290 TEST_ASSERT_SUCCESS(status);
57a6839d 1291
b331163b
A
1292 logln("Constructing base BI\n");
1293 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1294 TEST_ASSERT_SUCCESS(status);
57a6839d 1295
b331163b
A
1296 logln("Building new BI\n");
1297 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1298 TEST_ASSERT_SUCCESS(status);
57a6839d 1299
b331163b
A
1300 logln("Testing:");
1301 filteredBI->setText(text);
1302 TEST_ASSERT(84 == filteredBI->next());
1303 TEST_ASSERT(278 == filteredBI->next());
1304 filteredBI->first();
1305 prtbrks(filteredBI.getAlias(), text, *this);
1306 }
57a6839d
A
1307 }
1308
1309
1310 {
1311 logln("Constructing English builder\n");
1312 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1313 TEST_ASSERT_SUCCESS(status);
1314
1315 logln("Constructing base BI\n");
1316 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1317 TEST_ASSERT_SUCCESS(status);
1318
b331163b
A
1319 if (U_SUCCESS(status)) {
1320 logln("unsuppressing 'Capt'");
1321 TEST_ASSERT(TRUE == builder->unsuppressBreakAfter(ABBR_CAPT, status));
57a6839d 1322
b331163b
A
1323 logln("Building new BI\n");
1324 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1325 TEST_ASSERT_SUCCESS(status);
57a6839d 1326
b331163b
A
1327 if(filteredBI.isValid()) {
1328 logln("Testing:");
1329 filteredBI->setText(text);
1330 TEST_ASSERT(84 == filteredBI->next());
1331 TEST_ASSERT(90 == filteredBI->next());
1332 TEST_ASSERT(278 == filteredBI->next());
1333 filteredBI->first();
1334 prtbrks(filteredBI.getAlias(), text, *this);
1335 }
57a6839d
A
1336 }
1337 }
1338
1339
1340 {
1341 logln("Constructing English builder\n");
1342 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getEnglish(), status));
1343 TEST_ASSERT_SUCCESS(status);
1344
1345 logln("Constructing base BI\n");
1346 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getEnglish(), status));
1347 TEST_ASSERT_SUCCESS(status);
1348
b331163b
A
1349 if (U_SUCCESS(status)) {
1350 logln("Building new BI\n");
1351 filteredBI.adoptInstead(builder->build(baseBI.orphan(), status));
1352 TEST_ASSERT_SUCCESS(status);
57a6839d 1353
b331163b
A
1354 if(filteredBI.isValid()) {
1355 logln("Testing:");
1356 filteredBI->setText(text);
1357 TEST_ASSERT(84 == filteredBI->next());
1358 TEST_ASSERT(278 == filteredBI->next());
1359 filteredBI->first();
1360 prtbrks(filteredBI.getAlias(), text, *this);
1361 }
57a6839d
A
1362 }
1363 }
1364
57a6839d
A
1365 // reenable once french is in
1366 {
1367 logln("Constructing French builder");
1368 builder.adoptInstead(FilteredBreakIteratorBuilder::createInstance(Locale::getFrench(), status));
1369 TEST_ASSERT_SUCCESS(status);
1370
1371 logln("Constructing base BI\n");
1372 baseBI.adoptInstead(BreakIterator::createSentenceInstance(Locale::getFrench(), status));
1373 TEST_ASSERT_SUCCESS(status);
1374
b331163b
A
1375 if (U_SUCCESS(status)) {
1376 logln("Building new BI\n");
1377 frenchBI.adoptInstead(builder->build(baseBI.orphan(), status));
1378 TEST_ASSERT_SUCCESS(status);
1379 }
57a6839d 1380
b331163b 1381 if(frenchBI.isValid()) {
57a6839d 1382 logln("Testing:");
b331163b
A
1383 UnicodeString frText("C'est MM. Duval.");
1384 frenchBI->setText(frText);
1385 TEST_ASSERT(16 == frenchBI->next());
1386 TEST_ASSERT(BreakIterator::DONE == frenchBI->next());
1387 frenchBI->first();
1388 prtbrks(frenchBI.getAlias(), frText, *this);
1389 logln("Testing against English:");
1390 filteredBI->setText(frText);
1391 TEST_ASSERT(10 == filteredBI->next()); // wrong for french, but filterBI is english.
1392 TEST_ASSERT(16 == filteredBI->next());
1393 TEST_ASSERT(BreakIterator::DONE == filteredBI->next());
57a6839d 1394 filteredBI->first();
b331163b
A
1395 prtbrks(filteredBI.getAlias(), frText, *this);
1396
1397 // Verify ==
3d1f044b
A
1398 assertTrue(WHERE, *frenchBI == *frenchBI);
1399 assertTrue(WHERE, *filteredBI != *frenchBI);
1400 assertTrue(WHERE, *frenchBI != *filteredBI);
b331163b
A
1401 } else {
1402 dataerrln("French BI: not valid.");
1403 }
57a6839d 1404 }
57a6839d
A
1405
1406#else
f3c0d7a5 1407 logln("Skipped- not: !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION");
57a6839d
A
1408#endif
1409}
4388f060 1410
b75a7d8f
A
1411//---------------------------------------------
1412// runIndexedTest
1413//---------------------------------------------
1414
1415void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
1416{
1417 if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API ");
f3c0d7a5 1418 TESTCASE_AUTO_BEGIN;
729e4ab9 1419#if !UCONFIG_NO_FILE_IO
f3c0d7a5
A
1420 TESTCASE_AUTO(TestCloneEquals);
1421 TESTCASE_AUTO(TestgetRules);
1422 TESTCASE_AUTO(TestHashCode);
1423 TESTCASE_AUTO(TestGetSetAdoptText);
1424 TESTCASE_AUTO(TestIteration);
729e4ab9 1425#endif
f3c0d7a5
A
1426 TESTCASE_AUTO(TestBuilder);
1427 TESTCASE_AUTO(TestQuoteGrouping);
1428 TESTCASE_AUTO(TestRuleStatusVec);
1429 TESTCASE_AUTO(TestBug2190);
729e4ab9 1430#if !UCONFIG_NO_FILE_IO
f3c0d7a5
A
1431 TESTCASE_AUTO(TestRegistration);
1432 TESTCASE_AUTO(TestBoilerPlate);
1433 TESTCASE_AUTO(TestRuleStatus);
1434 TESTCASE_AUTO(TestRoundtripRules);
1435 TESTCASE_AUTO(TestGetBinaryRules);
729e4ab9 1436#endif
f3c0d7a5
A
1437 TESTCASE_AUTO(TestRefreshInputText);
1438#if !UCONFIG_NO_BREAK_ITERATION
1439 TESTCASE_AUTO(TestFilteredBreakIteratorBuilder);
57a6839d 1440#endif
f3c0d7a5 1441 TESTCASE_AUTO_END;
b75a7d8f
A
1442}
1443
f3c0d7a5 1444
b75a7d8f
A
1445//---------------------------------------------
1446//Internal subroutines
1447//---------------------------------------------
1448
57a6839d 1449void RBBIAPITest::doBoundaryTest(BreakIterator& bi, UnicodeString& text, int32_t *boundaries){
b75a7d8f
A
1450 logln((UnicodeString)"testIsBoundary():");
1451 int32_t p = 0;
1452 UBool isB;
1453 for (int32_t i = 0; i < text.length(); i++) {
1454 isB = bi.isBoundary(i);
1455 logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
1456
1457 if (i == boundaries[p]) {
1458 if (!isB)
1459 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
1460 p++;
1461 }
1462 else {
1463 if (isB)
1464 errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
1465 }
1466 }
1467}
1468void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){
1469 UnicodeString selected;
1470 UnicodeString expected=CharsToUnicodeString(expectedString);
1471
1472 if(gotoffset != expectedOffset)
1473 errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset);
1474 if(start <= gotoffset){
46f4442e 1475 testString.extractBetween(start, gotoffset, selected);
b75a7d8f
A
1476 }
1477 else{
1478 testString.extractBetween(gotoffset, start, selected);
1479 }
1480 if(selected.compare(expected) != 0)
1481 errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\""));
1482 else
1483 logln(prettify("****selected \"" + selected + "\""));
1484}
1485
1486#endif /* #if !UCONFIG_NO_BREAK_ITERATION */