]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /******************************************************************** |
4388f060 | 2 | * Copyright (c) 1999-2011, International Business Machines |
46f4442e A |
3 | * Corporation and others. All Rights Reserved. |
4 | ******************************************************************** | |
5 | * Date Name Description | |
6 | * 12/14/99 Madhu Creation. | |
7 | * 01/12/2000 Madhu updated for changed API | |
b75a7d8f | 8 | ********************************************************************/ |
b75a7d8f A |
9 | |
10 | #include "unicode/utypes.h" | |
11 | ||
12 | #if !UCONFIG_NO_BREAK_ITERATION | |
13 | ||
14 | #include "unicode/uchar.h" | |
15 | #include "intltest.h" | |
16 | #include "unicode/rbbi.h" | |
17 | #include "unicode/schriter.h" | |
18 | #include "rbbiapts.h" | |
19 | #include "rbbidata.h" | |
20 | #include "cstring.h" | |
73c04bcf | 21 | #include "ubrkimpl.h" |
4388f060 | 22 | #include "unicode/locid.h" |
374ca955 | 23 | #include "unicode/ustring.h" |
73c04bcf | 24 | #include "unicode/utext.h" |
46f4442e | 25 | #include "cmemory.h" |
b75a7d8f A |
26 | |
27 | /** | |
28 | * API Test the RuleBasedBreakIterator class | |
29 | */ | |
30 | ||
31 | ||
374ca955 | 32 | #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ |
4388f060 | 33 | dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} |
374ca955 | 34 | |
4388f060 A |
35 | #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ |
36 | errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} | |
b75a7d8f A |
37 | |
38 | void RBBIAPITest::TestCloneEquals() | |
39 | { | |
40 | ||
41 | UErrorCode status=U_ZERO_ERROR; | |
42 | RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
43 | RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
44 | RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
45 | RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
46 | if(U_FAILURE(status)){ | |
729e4ab9 | 47 | errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
48 | return; |
49 | } | |
50 | ||
51 | ||
52 | UnicodeString testString="Testing word break iterators's clone() and equals()"; | |
53 | bi1->setText(testString); | |
54 | bi2->setText(testString); | |
55 | biequal->setText(testString); | |
56 | ||
57 | bi3->setText("hello"); | |
58 | ||
59 | logln((UnicodeString)"Testing equals()"); | |
60 | ||
61 | logln((UnicodeString)"Testing == and !="); | |
62 | UBool b = (*bi1 != *biequal); | |
63 | b |= *bi1 == *bi2; | |
64 | b |= *bi1 == *bi3; | |
65 | if (b) { | |
66 | errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); | |
67 | } | |
68 | ||
69 | if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) | |
70 | errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); | |
71 | ||
72 | ||
46f4442e | 73 | // Quick test of RulesBasedBreakIterator assignment - |
b75a7d8f A |
74 | // Check that |
75 | // two different iterators are != | |
76 | // they are == after assignment | |
77 | // source and dest iterator produce the same next() after assignment. | |
78 | // deleting one doesn't disable the other. | |
79 | logln("Testing assignment"); | |
73c04bcf | 80 | RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); |
b75a7d8f | 81 | if(U_FAILURE(status)){ |
729e4ab9 | 82 | errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
83 | return; |
84 | } | |
85 | ||
86 | RuleBasedBreakIterator biDefault, biDefault2; | |
87 | if(U_FAILURE(status)){ | |
88 | errln((UnicodeString)"FAIL : in construction of default iterator"); | |
89 | return; | |
90 | } | |
91 | if (biDefault == *bix) { | |
92 | errln((UnicodeString)"ERROR: iterators should not compare =="); | |
93 | return; | |
94 | } | |
95 | if (biDefault != biDefault2) { | |
96 | errln((UnicodeString)"ERROR: iterators should compare =="); | |
97 | return; | |
98 | } | |
99 | ||
100 | ||
101 | UnicodeString HelloString("Hello Kitty"); | |
102 | bix->setText(HelloString); | |
103 | if (*bix == *bi2) { | |
104 | errln(UnicodeString("ERROR: strings should not be equal before assignment.")); | |
105 | } | |
106 | *bix = *bi2; | |
107 | if (*bix != *bi2) { | |
108 | errln(UnicodeString("ERROR: strings should be equal before assignment.")); | |
109 | } | |
110 | ||
111 | int bixnext = bix->next(); | |
112 | int bi2next = bi2->next(); | |
113 | if (! (bixnext == bi2next && bixnext == 7)) { | |
114 | errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); | |
115 | } | |
116 | delete bix; | |
117 | if (bi2->next() != 8) { | |
118 | errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); | |
119 | } | |
120 | ||
121 | ||
122 | ||
123 | logln((UnicodeString)"Testing clone()"); | |
124 | RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); | |
125 | RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); | |
126 | ||
46f4442e | 127 | if(*bi1clone != *bi1 || *bi1clone != *biequal || |
b75a7d8f A |
128 | *bi1clone == *bi3 || *bi1clone == *bi2) |
129 | errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); | |
130 | ||
46f4442e | 131 | if(*bi2clone == *bi1 || *bi2clone == *biequal || |
b75a7d8f A |
132 | *bi2clone == *bi3 || *bi2clone != *bi2) |
133 | errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); | |
134 | ||
135 | if(bi1->getText() != bi1clone->getText() || | |
46f4442e | 136 | bi2clone->getText() != bi2->getText() || |
b75a7d8f A |
137 | *bi2clone == *bi1clone ) |
138 | errln((UnicodeString)"ERROR: RBBI's clone() method failed"); | |
139 | ||
140 | delete bi1clone; | |
141 | delete bi2clone; | |
142 | delete bi1; | |
143 | delete bi3; | |
144 | delete bi2; | |
145 | delete biequal; | |
146 | } | |
147 | ||
148 | void RBBIAPITest::TestBoilerPlate() | |
149 | { | |
150 | UErrorCode status = U_ZERO_ERROR; | |
73c04bcf A |
151 | BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); |
152 | BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); | |
374ca955 | 153 | if (U_FAILURE(status)) { |
729e4ab9 | 154 | errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); |
374ca955 A |
155 | return; |
156 | } | |
b75a7d8f A |
157 | if(*a!=*b){ |
158 | errln("Failed: boilerplate method operator!= does not return correct results"); | |
159 | } | |
73c04bcf | 160 | BreakIterator* c = BreakIterator::createWordInstance(Locale("ja"),status); |
374ca955 A |
161 | if(a && c){ |
162 | if(*c==*a){ | |
163 | errln("Failed: boilerplate method opertator== does not return correct results"); | |
164 | } | |
165 | }else{ | |
166 | errln("creation of break iterator failed"); | |
b75a7d8f A |
167 | } |
168 | delete a; | |
169 | delete b; | |
170 | delete c; | |
171 | } | |
172 | ||
173 | void RBBIAPITest::TestgetRules() | |
174 | { | |
175 | UErrorCode status=U_ZERO_ERROR; | |
176 | ||
177 | RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
178 | RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
179 | if(U_FAILURE(status)){ | |
729e4ab9 | 180 | errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); |
b75a7d8f A |
181 | delete bi1; |
182 | delete bi2; | |
183 | return; | |
184 | } | |
185 | ||
186 | ||
187 | ||
188 | logln((UnicodeString)"Testing toString()"); | |
189 | ||
190 | bi1->setText((UnicodeString)"Hello there"); | |
191 | ||
192 | RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); | |
193 | ||
194 | UnicodeString temp=bi1->getRules(); | |
195 | UnicodeString temp2=bi2->getRules(); | |
196 | UnicodeString temp3=bi3->getRules(); | |
197 | if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) | |
198 | errln((UnicodeString)"ERROR: error in getRules() method"); | |
199 | ||
200 | delete bi1; | |
201 | delete bi2; | |
202 | delete bi3; | |
203 | } | |
204 | void RBBIAPITest::TestHashCode() | |
205 | { | |
206 | UErrorCode status=U_ZERO_ERROR; | |
207 | RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
208 | RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
209 | RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
210 | if(U_FAILURE(status)){ | |
729e4ab9 | 211 | errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
212 | delete bi1; |
213 | delete bi2; | |
214 | delete bi3; | |
215 | return; | |
216 | } | |
217 | ||
218 | ||
219 | logln((UnicodeString)"Testing hashCode()"); | |
220 | ||
221 | bi1->setText((UnicodeString)"Hash code"); | |
222 | bi2->setText((UnicodeString)"Hash code"); | |
223 | bi3->setText((UnicodeString)"Hash code"); | |
224 | ||
225 | RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); | |
226 | RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); | |
227 | ||
228 | if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || | |
229 | bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) | |
230 | errln((UnicodeString)"ERROR: identical objects have different hashcodes"); | |
231 | ||
232 | if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || | |
233 | bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) | |
234 | errln((UnicodeString)"ERROR: different objects have same hashcodes"); | |
235 | ||
236 | delete bi1clone; | |
46f4442e | 237 | delete bi2clone; |
b75a7d8f A |
238 | delete bi1; |
239 | delete bi2; | |
240 | delete bi3; | |
241 | ||
242 | } | |
243 | void RBBIAPITest::TestGetSetAdoptText() | |
244 | { | |
245 | logln((UnicodeString)"Testing getText setText "); | |
729e4ab9 | 246 | IcuTestErrorCode status(*this, "TestGetSetAdoptText"); |
b75a7d8f A |
247 | UnicodeString str1="first string."; |
248 | UnicodeString str2="Second string."; | |
729e4ab9 A |
249 | LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); |
250 | LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)); | |
251 | if(status.isFailure()){ | |
252 | errcheckln(status, "Fail : in construction - %s", status.errorName()); | |
b75a7d8f A |
253 | return; |
254 | } | |
255 | ||
256 | ||
257 | CharacterIterator* text1= new StringCharacterIterator(str1); | |
258 | CharacterIterator* text1Clone = text1->clone(); | |
259 | CharacterIterator* text2= new StringCharacterIterator(str2); | |
260 | CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" | |
46f4442e | 261 | |
b75a7d8f | 262 | wordIter1->setText(str1); |
73c04bcf A |
263 | CharacterIterator *tci = &wordIter1->getText(); |
264 | UnicodeString tstr; | |
265 | tci->getText(tstr); | |
266 | TEST_ASSERT(tstr == str1); | |
b75a7d8f A |
267 | if(wordIter1->current() != 0) |
268 | errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); | |
269 | ||
270 | wordIter1->next(2); | |
271 | ||
272 | wordIter1->setText(str2); | |
273 | if(wordIter1->current() != 0) | |
274 | errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); | |
275 | ||
276 | ||
277 | charIter1->adoptText(text1Clone); | |
73c04bcf A |
278 | TEST_ASSERT(wordIter1->getText() != charIter1->getText()); |
279 | tci = &wordIter1->getText(); | |
280 | tci->getText(tstr); | |
281 | TEST_ASSERT(tstr == str2); | |
282 | tci = &charIter1->getText(); | |
283 | tci->getText(tstr); | |
284 | TEST_ASSERT(tstr == str1); | |
285 | ||
b75a7d8f | 286 | |
729e4ab9 | 287 | LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone()); |
b75a7d8f A |
288 | rb->adoptText(text1); |
289 | if(rb->getText() != *text1) | |
290 | errln((UnicodeString)"ERROR:1 error in adoptText "); | |
291 | rb->adoptText(text2); | |
292 | if(rb->getText() != *text2) | |
293 | errln((UnicodeString)"ERROR:2 error in adoptText "); | |
294 | ||
295 | // Adopt where iterator range is less than the entire orignal source string. | |
73c04bcf A |
296 | // (With the change of the break engine to working with UText internally, |
297 | // CharacterIterators starting at positions other than zero are not supported) | |
b75a7d8f | 298 | rb->adoptText(text3); |
73c04bcf A |
299 | TEST_ASSERT(rb->preceding(2) == 0); |
300 | TEST_ASSERT(rb->following(11) == BreakIterator::DONE); | |
301 | //if(rb->preceding(2) != 3) { | |
302 | // errln((UnicodeString)"ERROR:3 error in adoptText "); | |
303 | //} | |
304 | //if(rb->following(11) != BreakIterator::DONE) { | |
305 | // errln((UnicodeString)"ERROR:4 error in adoptText "); | |
306 | //} | |
307 | ||
308 | // UText API | |
309 | // | |
310 | // Quick test to see if UText is working at all. | |
311 | // | |
312 | const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ | |
313 | const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ | |
314 | // 012345678901 | |
315 | ||
729e4ab9 A |
316 | status.reset(); |
317 | LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); | |
318 | wordIter1->setText(ut.getAlias(), status); | |
73c04bcf A |
319 | TEST_ASSERT_SUCCESS(status); |
320 | ||
321 | int32_t pos; | |
322 | pos = wordIter1->first(); | |
323 | TEST_ASSERT(pos==0); | |
324 | pos = wordIter1->next(); | |
325 | TEST_ASSERT(pos==5); | |
326 | pos = wordIter1->next(); | |
327 | TEST_ASSERT(pos==6); | |
328 | pos = wordIter1->next(); | |
329 | TEST_ASSERT(pos==11); | |
330 | pos = wordIter1->next(); | |
331 | TEST_ASSERT(pos==UBRK_DONE); | |
332 | ||
729e4ab9 A |
333 | status.reset(); |
334 | LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); | |
73c04bcf | 335 | TEST_ASSERT_SUCCESS(status); |
729e4ab9 | 336 | wordIter1->setText(ut2.getAlias(), status); |
73c04bcf A |
337 | TEST_ASSERT_SUCCESS(status); |
338 | ||
339 | pos = wordIter1->first(); | |
340 | TEST_ASSERT(pos==0); | |
341 | pos = wordIter1->next(); | |
342 | TEST_ASSERT(pos==3); | |
343 | pos = wordIter1->next(); | |
344 | TEST_ASSERT(pos==4); | |
345 | ||
346 | pos = wordIter1->last(); | |
347 | TEST_ASSERT(pos==6); | |
348 | pos = wordIter1->previous(); | |
349 | TEST_ASSERT(pos==4); | |
350 | pos = wordIter1->previous(); | |
351 | TEST_ASSERT(pos==3); | |
352 | pos = wordIter1->previous(); | |
353 | TEST_ASSERT(pos==0); | |
354 | pos = wordIter1->previous(); | |
355 | TEST_ASSERT(pos==UBRK_DONE); | |
356 | ||
729e4ab9 | 357 | status.reset(); |
73c04bcf | 358 | UnicodeString sEmpty; |
729e4ab9 A |
359 | LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); |
360 | wordIter1->getUText(gut2.getAlias(), status); | |
73c04bcf | 361 | TEST_ASSERT_SUCCESS(status); |
729e4ab9 A |
362 | status.reset(); |
363 | } | |
46f4442e | 364 | |
b75a7d8f | 365 | |
b75a7d8f A |
366 | void RBBIAPITest::TestIteration() |
367 | { | |
368 | // This test just verifies that the API is present. | |
369 | // Testing for correct operation of the break rules happens elsewhere. | |
370 | ||
371 | UErrorCode status=U_ZERO_ERROR; | |
372 | RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
373 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 374 | errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
375 | } |
376 | delete bi; | |
377 | ||
378 | status=U_ZERO_ERROR; | |
379 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
380 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 381 | errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
382 | } |
383 | delete bi; | |
384 | ||
385 | status=U_ZERO_ERROR; | |
386 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); | |
387 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 388 | errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
389 | } |
390 | delete bi; | |
391 | ||
392 | status=U_ZERO_ERROR; | |
393 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); | |
394 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 395 | errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
396 | } |
397 | delete bi; | |
398 | ||
399 | status=U_ZERO_ERROR; | |
400 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); | |
401 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 402 | errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
403 | } |
404 | delete bi; | |
405 | ||
406 | status=U_ZERO_ERROR; | |
407 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
408 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 409 | errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
410 | return; // Skip the rest of these tests. |
411 | } | |
412 | ||
413 | ||
414 | UnicodeString testString="0123456789"; | |
415 | bi->setText(testString); | |
416 | ||
417 | int32_t i; | |
418 | i = bi->first(); | |
419 | if (i != 0) { | |
420 | errln("Incorrect value from bi->first(). Expected 0, got %d.", i); | |
421 | } | |
422 | ||
423 | i = bi->last(); | |
424 | if (i != 10) { | |
425 | errln("Incorrect value from bi->last(). Expected 10, got %d", i); | |
426 | } | |
427 | ||
428 | // | |
429 | // Previous | |
430 | // | |
431 | bi->last(); | |
432 | i = bi->previous(); | |
433 | if (i != 9) { | |
434 | errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); | |
435 | } | |
436 | ||
437 | ||
438 | bi->first(); | |
439 | i = bi->previous(); | |
440 | if (i != BreakIterator::DONE) { | |
441 | errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); | |
442 | } | |
443 | ||
444 | // | |
445 | // next() | |
446 | // | |
447 | bi->first(); | |
448 | i = bi->next(); | |
449 | if (i != 1) { | |
450 | errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); | |
451 | } | |
452 | ||
453 | bi->last(); | |
454 | i = bi->next(); | |
455 | if (i != BreakIterator::DONE) { | |
456 | errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); | |
457 | } | |
458 | ||
459 | ||
460 | // | |
461 | // current() | |
462 | // | |
463 | bi->first(); | |
464 | i = bi->current(); | |
465 | if (i != 0) { | |
466 | errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); | |
467 | } | |
468 | ||
469 | bi->next(); | |
470 | i = bi->current(); | |
471 | if (i != 1) { | |
472 | errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); | |
473 | } | |
474 | ||
475 | bi->last(); | |
476 | bi->next(); | |
477 | i = bi->current(); | |
478 | if (i != 10) { | |
479 | errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); | |
480 | } | |
481 | ||
482 | bi->first(); | |
483 | bi->previous(); | |
484 | i = bi->current(); | |
485 | if (i != 0) { | |
486 | errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); | |
487 | } | |
488 | ||
489 | ||
490 | // | |
491 | // Following() | |
492 | // | |
493 | i = bi->following(4); | |
494 | if (i != 5) { | |
495 | errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); | |
496 | } | |
497 | ||
498 | i = bi->following(9); | |
499 | if (i != 10) { | |
500 | errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); | |
501 | } | |
502 | ||
503 | i = bi->following(10); | |
504 | if (i != BreakIterator::DONE) { | |
505 | errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); | |
506 | } | |
507 | ||
508 | ||
509 | // | |
510 | // Preceding | |
511 | // | |
512 | i = bi->preceding(4); | |
513 | if (i != 3) { | |
514 | errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); | |
515 | } | |
516 | ||
517 | i = bi->preceding(10); | |
518 | if (i != 9) { | |
519 | errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); | |
520 | } | |
521 | ||
522 | i = bi->preceding(1); | |
523 | if (i != 0) { | |
524 | errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); | |
525 | } | |
526 | ||
527 | i = bi->preceding(0); | |
528 | if (i != BreakIterator::DONE) { | |
529 | errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); | |
530 | } | |
531 | ||
532 | ||
533 | // | |
534 | // isBoundary() | |
535 | // | |
536 | bi->first(); | |
537 | if (bi->isBoundary(3) != TRUE) { | |
538 | errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); | |
539 | } | |
540 | i = bi->current(); | |
541 | if (i != 3) { | |
542 | errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); | |
543 | } | |
544 | ||
545 | ||
546 | if (bi->isBoundary(11) != FALSE) { | |
547 | errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); | |
548 | } | |
549 | i = bi->current(); | |
550 | if (i != 10) { | |
551 | errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); | |
552 | } | |
553 | ||
554 | // | |
555 | // next(n) | |
556 | // | |
557 | bi->first(); | |
558 | i = bi->next(4); | |
559 | if (i != 4) { | |
560 | errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); | |
561 | } | |
562 | ||
563 | i = bi->next(6); | |
564 | if (i != 10) { | |
565 | errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); | |
566 | } | |
567 | ||
568 | bi->first(); | |
569 | i = bi->next(11); | |
570 | if (i != BreakIterator::DONE) { | |
571 | errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); | |
572 | } | |
573 | ||
574 | delete bi; | |
575 | ||
576 | } | |
577 | ||
578 | ||
579 | ||
580 | ||
581 | ||
582 | ||
583 | void RBBIAPITest::TestBuilder() { | |
584 | UnicodeString rulesString1 = "$Letters = [:L:];\n" | |
585 | "$Numbers = [:N:];\n" | |
586 | "$Letters+;\n" | |
587 | "$Numbers+;\n" | |
588 | "[^$Letters $Numbers];\n" | |
589 | "!.*;\n"; | |
590 | UnicodeString testString1 = "abc123..abc"; | |
591 | // 01234567890 | |
592 | int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; | |
593 | UErrorCode status=U_ZERO_ERROR; | |
594 | UParseError parseError; | |
46f4442e | 595 | |
b75a7d8f A |
596 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); |
597 | if(U_FAILURE(status)) { | |
729e4ab9 | 598 | dataerrln("Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
599 | } else { |
600 | bi->setText(testString1); | |
601 | doBoundaryTest(*bi, testString1, bounds1); | |
602 | } | |
603 | delete bi; | |
604 | } | |
605 | ||
606 | ||
607 | // | |
608 | // TestQuoteGrouping | |
609 | // Single quotes within rules imply a grouping, so that a modifier | |
610 | // following the quoted text (* or +) applies to all of the quoted chars. | |
611 | // | |
612 | void RBBIAPITest::TestQuoteGrouping() { | |
613 | UnicodeString rulesString1 = "#Here comes the rule...\n" | |
614 | "'$@!'*;\n" // (\$\@\!)* | |
615 | ".;\n"; | |
616 | ||
617 | UnicodeString testString1 = "$@!$@!X$@!!X"; | |
618 | // 0123456789012 | |
619 | int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; | |
620 | UErrorCode status=U_ZERO_ERROR; | |
621 | UParseError parseError; | |
46f4442e | 622 | |
b75a7d8f A |
623 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); |
624 | if(U_FAILURE(status)) { | |
729e4ab9 | 625 | dataerrln("Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
626 | } else { |
627 | bi->setText(testString1); | |
628 | doBoundaryTest(*bi, testString1, bounds1); | |
629 | } | |
630 | delete bi; | |
631 | } | |
632 | ||
633 | // | |
374ca955 | 634 | // TestRuleStatus |
b75a7d8f A |
635 | // Test word break rule status constants. |
636 | // | |
374ca955 | 637 | void RBBIAPITest::TestRuleStatus() { |
46f4442e | 638 | UChar str[30]; |
374ca955 A |
639 | u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094", |
640 | // 012345678901234567 8 9 0 1 2 3 4 5 6 | |
641 | // Ideographic Katakana Hiragana | |
642 | str, 30); | |
643 | UnicodeString testString1(str); | |
644 | int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26}; | |
b75a7d8f A |
645 | int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, |
646 | UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, | |
647 | UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE, | |
648 | UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA}; | |
649 | ||
650 | int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, | |
651 | UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, | |
652 | UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT, | |
653 | UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT}; | |
654 | ||
655 | UErrorCode status=U_ZERO_ERROR; | |
46f4442e | 656 | |
73c04bcf | 657 | RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); |
b75a7d8f | 658 | if(U_FAILURE(status)) { |
729e4ab9 | 659 | errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
660 | } else { |
661 | bi->setText(testString1); | |
662 | // First test that the breaks are in the right spots. | |
663 | doBoundaryTest(*bi, testString1, bounds1); | |
664 | ||
665 | // Then go back and check tag values | |
666 | int32_t i = 0; | |
667 | int32_t pos, tag; | |
668 | for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { | |
669 | if (pos != bounds1[i]) { | |
670 | errln("FAIL: unexpected word break at postion %d", pos); | |
671 | break; | |
672 | } | |
673 | tag = bi->getRuleStatus(); | |
674 | if (tag < tag_lo[i] || tag >= tag_hi[i]) { | |
675 | errln("FAIL: incorrect tag value %d at position %d", tag, pos); | |
676 | break; | |
677 | } | |
46f4442e | 678 | |
374ca955 A |
679 | // Check that we get the same tag values from getRuleStatusVec() |
680 | int32_t vec[10]; | |
681 | int t = bi->getRuleStatusVec(vec, 10, status); | |
682 | TEST_ASSERT_SUCCESS(status); | |
683 | TEST_ASSERT(t==1); | |
684 | TEST_ASSERT(vec[0] == tag); | |
b75a7d8f A |
685 | } |
686 | } | |
687 | delete bi; | |
374ca955 A |
688 | |
689 | // Now test line break status. This test mostly is to confirm that the status constants | |
690 | // are correctly declared in the header. | |
691 | testString1 = "test line. \n"; | |
692 | // break type s s h | |
693 | ||
694 | bi = (RuleBasedBreakIterator *) | |
695 | BreakIterator::createLineInstance(Locale::getEnglish(), status); | |
696 | if(U_FAILURE(status)) { | |
729e4ab9 | 697 | errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status)); |
374ca955 A |
698 | } else { |
699 | int32_t i = 0; | |
700 | int32_t pos, tag; | |
701 | UBool success; | |
702 | ||
703 | bi->setText(testString1); | |
704 | pos = bi->current(); | |
705 | tag = bi->getRuleStatus(); | |
706 | for (i=0; i<3; i++) { | |
707 | switch (i) { | |
708 | case 0: | |
709 | success = pos==0 && tag==UBRK_LINE_SOFT; break; | |
710 | case 1: | |
711 | success = pos==5 && tag==UBRK_LINE_SOFT; break; | |
712 | case 2: | |
713 | success = pos==12 && tag==UBRK_LINE_HARD; break; | |
714 | default: | |
715 | success = FALSE; break; | |
716 | } | |
717 | if (success == FALSE) { | |
718 | errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", | |
719 | i, pos, tag); | |
720 | break; | |
721 | } | |
722 | pos = bi->next(); | |
723 | tag = bi->getRuleStatus(); | |
724 | } | |
725 | if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || | |
726 | UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || | |
729e4ab9 | 727 | (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) { |
374ca955 A |
728 | errln("UBRK_LINE_* constants from header are inconsistent."); |
729 | } | |
730 | } | |
731 | delete bi; | |
732 | ||
b75a7d8f A |
733 | } |
734 | ||
735 | ||
374ca955 A |
736 | // |
737 | // TestRuleStatusVec | |
738 | // Test the vector form of break rule status. | |
739 | // | |
740 | void RBBIAPITest::TestRuleStatusVec() { | |
46f4442e | 741 | UnicodeString rulesString( "[A-N]{100}; \n" |
374ca955 A |
742 | "[a-w]{200}; \n" |
743 | "[\\p{L}]{300}; \n" | |
744 | "[\\p{N}]{400}; \n" | |
745 | "[0-5]{500}; \n" | |
46f4442e | 746 | "!.*;\n", -1, US_INV); |
374ca955 A |
747 | UnicodeString testString1 = "Aapz5?"; |
748 | int32_t statusVals[10]; | |
749 | int32_t numStatuses; | |
750 | int32_t pos; | |
751 | ||
752 | UErrorCode status=U_ZERO_ERROR; | |
753 | UParseError parseError; | |
46f4442e | 754 | |
374ca955 | 755 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); |
729e4ab9 A |
756 | if (U_FAILURE(status)) { |
757 | dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); | |
758 | } else { | |
374ca955 A |
759 | bi->setText(testString1); |
760 | ||
761 | // A | |
762 | pos = bi->next(); | |
763 | TEST_ASSERT(pos==1); | |
764 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
765 | TEST_ASSERT_SUCCESS(status); | |
766 | TEST_ASSERT(numStatuses == 2); | |
767 | TEST_ASSERT(statusVals[0] == 100); | |
768 | TEST_ASSERT(statusVals[1] == 300); | |
769 | ||
770 | // a | |
771 | pos = bi->next(); | |
772 | TEST_ASSERT(pos==2); | |
773 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
774 | TEST_ASSERT_SUCCESS(status); | |
775 | TEST_ASSERT(numStatuses == 2); | |
776 | TEST_ASSERT(statusVals[0] == 200); | |
777 | TEST_ASSERT(statusVals[1] == 300); | |
778 | ||
779 | // p | |
780 | pos = bi->next(); | |
781 | TEST_ASSERT(pos==3); | |
782 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
783 | TEST_ASSERT_SUCCESS(status); | |
784 | TEST_ASSERT(numStatuses == 2); | |
785 | TEST_ASSERT(statusVals[0] == 200); | |
786 | TEST_ASSERT(statusVals[1] == 300); | |
787 | ||
788 | // z | |
789 | pos = bi->next(); | |
790 | TEST_ASSERT(pos==4); | |
791 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
792 | TEST_ASSERT_SUCCESS(status); | |
793 | TEST_ASSERT(numStatuses == 1); | |
794 | TEST_ASSERT(statusVals[0] == 300); | |
795 | ||
796 | // 5 | |
797 | pos = bi->next(); | |
798 | TEST_ASSERT(pos==5); | |
799 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
800 | TEST_ASSERT_SUCCESS(status); | |
801 | TEST_ASSERT(numStatuses == 2); | |
802 | TEST_ASSERT(statusVals[0] == 400); | |
803 | TEST_ASSERT(statusVals[1] == 500); | |
804 | ||
805 | // ? | |
806 | pos = bi->next(); | |
807 | TEST_ASSERT(pos==6); | |
808 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
809 | TEST_ASSERT_SUCCESS(status); | |
810 | TEST_ASSERT(numStatuses == 1); | |
811 | TEST_ASSERT(statusVals[0] == 0); | |
812 | ||
813 | // | |
46f4442e | 814 | // Check buffer overflow error handling. Char == A |
374ca955 A |
815 | // |
816 | bi->first(); | |
817 | pos = bi->next(); | |
818 | TEST_ASSERT(pos==1); | |
819 | memset(statusVals, -1, sizeof(statusVals)); | |
820 | numStatuses = bi->getRuleStatusVec(statusVals, 0, status); | |
821 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
822 | TEST_ASSERT(numStatuses == 2); | |
823 | TEST_ASSERT(statusVals[0] == -1); | |
824 | ||
825 | status = U_ZERO_ERROR; | |
826 | memset(statusVals, -1, sizeof(statusVals)); | |
827 | numStatuses = bi->getRuleStatusVec(statusVals, 1, status); | |
828 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
829 | TEST_ASSERT(numStatuses == 2); | |
830 | TEST_ASSERT(statusVals[0] == 100); | |
831 | TEST_ASSERT(statusVals[1] == -1); | |
832 | ||
833 | status = U_ZERO_ERROR; | |
834 | memset(statusVals, -1, sizeof(statusVals)); | |
835 | numStatuses = bi->getRuleStatusVec(statusVals, 2, status); | |
836 | TEST_ASSERT_SUCCESS(status); | |
837 | TEST_ASSERT(numStatuses == 2); | |
838 | TEST_ASSERT(statusVals[0] == 100); | |
839 | TEST_ASSERT(statusVals[1] == 300); | |
840 | TEST_ASSERT(statusVals[2] == -1); | |
841 | } | |
842 | delete bi; | |
843 | ||
844 | } | |
845 | ||
b75a7d8f A |
846 | // |
847 | // Bug 2190 Regression test. Builder crash on rule consisting of only a | |
848 | // $variable reference | |
849 | void RBBIAPITest::TestBug2190() { | |
850 | UnicodeString rulesString1 = "$aaa = abcd;\n" | |
851 | "$bbb = $aaa;\n" | |
852 | "$bbb;\n"; | |
853 | UnicodeString testString1 = "abcdabcd"; | |
854 | // 01234567890 | |
855 | int32_t bounds1[] = {0, 4, 8}; | |
856 | UErrorCode status=U_ZERO_ERROR; | |
857 | UParseError parseError; | |
46f4442e | 858 | |
b75a7d8f A |
859 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); |
860 | if(U_FAILURE(status)) { | |
729e4ab9 | 861 | dataerrln("Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
862 | } else { |
863 | bi->setText(testString1); | |
864 | doBoundaryTest(*bi, testString1, bounds1); | |
865 | } | |
866 | delete bi; | |
867 | } | |
868 | ||
869 | ||
870 | void RBBIAPITest::TestRegistration() { | |
374ca955 | 871 | #if !UCONFIG_NO_SERVICE |
b75a7d8f | 872 | UErrorCode status = U_ZERO_ERROR; |
73c04bcf | 873 | BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); |
46f4442e | 874 | |
b75a7d8f | 875 | // ok to not delete these if we exit because of error? |
73c04bcf | 876 | BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); |
b75a7d8f A |
877 | BreakIterator* root_word = BreakIterator::createWordInstance("", status); |
878 | BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); | |
729e4ab9 A |
879 | |
880 | if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { | |
881 | dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); | |
882 | delete ja_word; | |
883 | delete ja_char; | |
884 | delete root_word; | |
885 | delete root_char; | |
886 | ||
887 | return; | |
888 | } | |
46f4442e | 889 | |
73c04bcf | 890 | URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); |
b75a7d8f | 891 | { |
73c04bcf A |
892 | if (ja_word && *ja_word == *root_word) { |
893 | errln("japan not different from root"); | |
b75a7d8f A |
894 | } |
895 | } | |
46f4442e | 896 | |
b75a7d8f A |
897 | { |
898 | BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); | |
374ca955 A |
899 | UBool fail = TRUE; |
900 | if(result){ | |
73c04bcf | 901 | fail = *result != *ja_word; |
374ca955 | 902 | } |
b75a7d8f A |
903 | delete result; |
904 | if (fail) { | |
905 | errln("bad result for xx_XX/word"); | |
906 | } | |
907 | } | |
46f4442e | 908 | |
b75a7d8f | 909 | { |
73c04bcf | 910 | BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); |
374ca955 A |
911 | UBool fail = TRUE; |
912 | if(result){ | |
73c04bcf | 913 | fail = *result != *ja_char; |
374ca955 | 914 | } |
b75a7d8f A |
915 | delete result; |
916 | if (fail) { | |
73c04bcf | 917 | errln("bad result for ja_JP/char"); |
b75a7d8f A |
918 | } |
919 | } | |
46f4442e | 920 | |
b75a7d8f A |
921 | { |
922 | BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); | |
374ca955 A |
923 | UBool fail = TRUE; |
924 | if(result){ | |
925 | fail = *result != *root_char; | |
926 | } | |
b75a7d8f A |
927 | delete result; |
928 | if (fail) { | |
929 | errln("bad result for xx_XX/char"); | |
930 | } | |
931 | } | |
46f4442e | 932 | |
b75a7d8f A |
933 | { |
934 | StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
935 | UBool found = FALSE; | |
936 | const UnicodeString* p; | |
937 | while ((p = avail->snext(status))) { | |
938 | if (p->compare("xx") == 0) { | |
939 | found = TRUE; | |
940 | break; | |
941 | } | |
942 | } | |
943 | delete avail; | |
944 | if (!found) { | |
945 | errln("did not find test locale"); | |
946 | } | |
947 | } | |
46f4442e | 948 | |
b75a7d8f A |
949 | { |
950 | UBool unreg = BreakIterator::unregister(key, status); | |
951 | if (!unreg) { | |
952 | errln("unable to unregister"); | |
953 | } | |
954 | } | |
46f4442e | 955 | |
b75a7d8f | 956 | { |
73c04bcf | 957 | BreakIterator* result = BreakIterator::createWordInstance("en_US", status); |
b75a7d8f | 958 | BreakIterator* root = BreakIterator::createWordInstance("", status); |
374ca955 A |
959 | UBool fail = TRUE; |
960 | if(root){ | |
961 | fail = *root != *result; | |
962 | } | |
b75a7d8f A |
963 | delete root; |
964 | delete result; | |
965 | if (fail) { | |
966 | errln("did not get root break"); | |
967 | } | |
968 | } | |
46f4442e | 969 | |
b75a7d8f A |
970 | { |
971 | StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
972 | UBool found = FALSE; | |
973 | const UnicodeString* p; | |
974 | while ((p = avail->snext(status))) { | |
975 | if (p->compare("xx") == 0) { | |
976 | found = TRUE; | |
977 | break; | |
978 | } | |
979 | } | |
980 | delete avail; | |
981 | if (found) { | |
982 | errln("found test locale"); | |
983 | } | |
984 | } | |
46f4442e | 985 | |
b75a7d8f A |
986 | { |
987 | int32_t count; | |
988 | UBool foundLocale = FALSE; | |
989 | const Locale *avail = BreakIterator::getAvailableLocales(count); | |
990 | for (int i=0; i<count; i++) { | |
991 | if (avail[i] == Locale::getEnglish()) { | |
992 | foundLocale = TRUE; | |
993 | break; | |
994 | } | |
995 | } | |
996 | if (foundLocale == FALSE) { | |
997 | errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); | |
998 | } | |
999 | } | |
46f4442e A |
1000 | |
1001 | ||
73c04bcf A |
1002 | // ja_word was adopted by factory |
1003 | delete ja_char; | |
b75a7d8f A |
1004 | delete root_word; |
1005 | delete root_char; | |
374ca955 | 1006 | #endif |
b75a7d8f A |
1007 | } |
1008 | ||
1009 | void RBBIAPITest::RoundtripRule(const char *dataFile) { | |
1010 | UErrorCode status = U_ZERO_ERROR; | |
1011 | UParseError parseError; | |
374ca955 A |
1012 | parseError.line = 0; |
1013 | parseError.offset = 0; | |
729e4ab9 | 1014 | LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status)); |
b75a7d8f A |
1015 | uint32_t length; |
1016 | const UChar *builtSource; | |
1017 | const uint8_t *rbbiRules; | |
1018 | const uint8_t *builtRules; | |
1019 | ||
1020 | if (U_FAILURE(status)) { | |
729e4ab9 | 1021 | errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status)); |
b75a7d8f A |
1022 | return; |
1023 | } | |
1024 | ||
729e4ab9 | 1025 | builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); |
b75a7d8f A |
1026 | builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); |
1027 | RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); | |
1028 | if (U_FAILURE(status)) { | |
1029 | errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", | |
1030 | u_errorName(status), parseError.line, parseError.offset); | |
1031 | return; | |
1032 | }; | |
1033 | rbbiRules = brkItr->getBinaryRules(length); | |
1034 | logln("Comparing \"%s\" len=%d", dataFile, length); | |
1035 | if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { | |
1036 | errln("Built rules and rebuilt rules are different %s", dataFile); | |
1037 | return; | |
1038 | } | |
1039 | delete brkItr; | |
b75a7d8f A |
1040 | } |
1041 | ||
1042 | void RBBIAPITest::TestRoundtripRules() { | |
1043 | RoundtripRule("word"); | |
1044 | RoundtripRule("title"); | |
1045 | RoundtripRule("sent"); | |
1046 | RoundtripRule("line"); | |
1047 | RoundtripRule("char"); | |
1048 | if (!quick) { | |
73c04bcf A |
1049 | RoundtripRule("word_ja"); |
1050 | RoundtripRule("word_POSIX"); | |
b75a7d8f A |
1051 | } |
1052 | } | |
1053 | ||
46f4442e A |
1054 | // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* |
1055 | // (these are protected so we access them via a local class RBBIWithProtectedFunctions). | |
1056 | // This is just a sanity check, not a thorough test (e.g. we don't check that the | |
1057 | // first delete actually frees rulesCopy). | |
1058 | void RBBIAPITest::TestCreateFromRBBIData() { | |
1059 | // Get some handy RBBIData | |
1060 | const char *brkName = "word"; // or "sent", "line", "char", etc. | |
1061 | UErrorCode status = U_ZERO_ERROR; | |
729e4ab9 | 1062 | LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status)); |
46f4442e | 1063 | if ( U_SUCCESS(status) ) { |
729e4ab9 | 1064 | const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias()); |
46f4442e A |
1065 | uint32_t length = builtRules->fLength; |
1066 | RBBIWithProtectedFunctions * brkItr; | |
1067 | ||
1068 | // Try the memory-adopting constructor, need to copy the data first | |
1069 | RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); | |
1070 | if ( rulesCopy ) { | |
1071 | uprv_memcpy( rulesCopy, builtRules, length ); | |
1072 | ||
1073 | brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); | |
1074 | if ( U_SUCCESS(status) ) { | |
1075 | delete brkItr; // this should free rulesCopy | |
1076 | } else { | |
1077 | errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) ); | |
1078 | status = U_ZERO_ERROR;// reset for the next test | |
1079 | uprv_free( rulesCopy ); | |
1080 | } | |
1081 | } | |
1082 | ||
1083 | // Now try the non-adopting constructor | |
1084 | brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status); | |
1085 | if ( U_SUCCESS(status) ) { | |
1086 | delete brkItr; // this should NOT attempt to free builtRules | |
1087 | if (builtRules->fLength != length) { // sanity check | |
1088 | errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" ); | |
1089 | } | |
1090 | } else { | |
1091 | errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) ); | |
1092 | } | |
46f4442e | 1093 | } |
4388f060 A |
1094 | |
1095 | // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...) | |
1096 | // | |
1097 | status = U_ZERO_ERROR; | |
1098 | RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); | |
1099 | if (rb == NULL || U_FAILURE(status)) { | |
1100 | dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status)); | |
1101 | } else { | |
1102 | uint32_t length; | |
1103 | const uint8_t *rules = rb->getBinaryRules(length); | |
1104 | RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status); | |
1105 | TEST_ASSERT_SUCCESS(status); | |
1106 | TEST_ASSERT(*rb == *rb2); | |
1107 | UnicodeString words = "one two three "; | |
1108 | rb2->setText(words); | |
1109 | int wordCounter = 0; | |
1110 | while (rb2->next() != UBRK_DONE) { | |
1111 | wordCounter++; | |
1112 | } | |
1113 | TEST_ASSERT(wordCounter == 6); | |
1114 | ||
1115 | status = U_ZERO_ERROR; | |
1116 | RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status); | |
1117 | TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | |
1118 | ||
1119 | delete rb; | |
1120 | delete rb2; | |
1121 | delete rb3; | |
1122 | } | |
46f4442e A |
1123 | } |
1124 | ||
4388f060 A |
1125 | |
1126 | void RBBIAPITest::TestRefreshInputText() { | |
1127 | /* | |
1128 | * RefreshInput changes out the input of a Break Iterator without | |
1129 | * changing anything else in the iterator's state. Used with Java JNI, | |
1130 | * when Java moves the underlying string storage. This test | |
1131 | * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence. | |
1132 | * The right set of boundaries should still be found. | |
1133 | */ | |
1134 | UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */ | |
1135 | UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; | |
1136 | UErrorCode status = U_ZERO_ERROR; | |
1137 | UText ut1 = UTEXT_INITIALIZER; | |
1138 | UText ut2 = UTEXT_INITIALIZER; | |
1139 | RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); | |
1140 | TEST_ASSERT_SUCCESS(status); | |
1141 | ||
1142 | utext_openUChars(&ut1, testStr, -1, &status); | |
1143 | TEST_ASSERT_SUCCESS(status); | |
1144 | ||
1145 | if (U_SUCCESS(status)) { | |
1146 | bi->setText(&ut1, status); | |
1147 | TEST_ASSERT_SUCCESS(status); | |
1148 | ||
1149 | /* Line boundaries will occur before each letter in the original string */ | |
1150 | TEST_ASSERT(1 == bi->next()); | |
1151 | TEST_ASSERT(3 == bi->next()); | |
1152 | ||
1153 | /* Move the string, kill the original string. */ | |
1154 | u_strcpy(movedStr, testStr); | |
1155 | u_memset(testStr, 0x20, u_strlen(testStr)); | |
1156 | utext_openUChars(&ut2, movedStr, -1, &status); | |
1157 | TEST_ASSERT_SUCCESS(status); | |
1158 | RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status); | |
1159 | TEST_ASSERT_SUCCESS(status); | |
1160 | TEST_ASSERT(bi == returnedBI); | |
1161 | ||
1162 | /* Find the following matches, now working in the moved string. */ | |
1163 | TEST_ASSERT(5 == bi->next()); | |
1164 | TEST_ASSERT(7 == bi->next()); | |
1165 | TEST_ASSERT(8 == bi->next()); | |
1166 | TEST_ASSERT(UBRK_DONE == bi->next()); | |
1167 | ||
1168 | utext_close(&ut1); | |
1169 | utext_close(&ut2); | |
1170 | } | |
1171 | delete bi; | |
1172 | ||
1173 | } | |
1174 | ||
1175 | ||
b75a7d8f A |
1176 | //--------------------------------------------- |
1177 | // runIndexedTest | |
1178 | //--------------------------------------------- | |
1179 | ||
1180 | void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) | |
1181 | { | |
1182 | if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); | |
1183 | switch (index) { | |
1184 | // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; | |
729e4ab9 | 1185 | #if !UCONFIG_NO_FILE_IO |
b75a7d8f A |
1186 | case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; |
1187 | case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; | |
1188 | case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; | |
1189 | case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; | |
1190 | case 4: name = "TestIteration"; if (exec) TestIteration(); break; | |
729e4ab9 A |
1191 | #else |
1192 | case 0: case 1: case 2: case 3: case 4: name = "skip"; break; | |
1193 | #endif | |
73c04bcf A |
1194 | case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; |
1195 | case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; | |
729e4ab9 A |
1196 | case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; |
1197 | case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; | |
1198 | #if !UCONFIG_NO_FILE_IO | |
1199 | case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; | |
1200 | case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; | |
1201 | case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; | |
73c04bcf | 1202 | case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; |
46f4442e | 1203 | case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break; |
729e4ab9 A |
1204 | #else |
1205 | case 9: case 10: case 11: case 12: case 13: name = "skip"; break; | |
1206 | #endif | |
4388f060 | 1207 | case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break; |
374ca955 A |
1208 | |
1209 | default: name = ""; break; // needed to end loop | |
b75a7d8f A |
1210 | } |
1211 | } | |
1212 | ||
1213 | //--------------------------------------------- | |
1214 | //Internal subroutines | |
1215 | //--------------------------------------------- | |
1216 | ||
1217 | void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){ | |
1218 | logln((UnicodeString)"testIsBoundary():"); | |
1219 | int32_t p = 0; | |
1220 | UBool isB; | |
1221 | for (int32_t i = 0; i < text.length(); i++) { | |
1222 | isB = bi.isBoundary(i); | |
1223 | logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); | |
1224 | ||
1225 | if (i == boundaries[p]) { | |
1226 | if (!isB) | |
1227 | errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); | |
1228 | p++; | |
1229 | } | |
1230 | else { | |
1231 | if (isB) | |
1232 | errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); | |
1233 | } | |
1234 | } | |
1235 | } | |
1236 | void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ | |
1237 | UnicodeString selected; | |
1238 | UnicodeString expected=CharsToUnicodeString(expectedString); | |
1239 | ||
1240 | if(gotoffset != expectedOffset) | |
1241 | errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); | |
1242 | if(start <= gotoffset){ | |
46f4442e | 1243 | testString.extractBetween(start, gotoffset, selected); |
b75a7d8f A |
1244 | } |
1245 | else{ | |
1246 | testString.extractBetween(gotoffset, start, selected); | |
1247 | } | |
1248 | if(selected.compare(expected) != 0) | |
1249 | errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); | |
1250 | else | |
1251 | logln(prettify("****selected \"" + selected + "\"")); | |
1252 | } | |
1253 | ||
46f4442e A |
1254 | //--------------------------------------------- |
1255 | //RBBIWithProtectedFunctions class functions | |
1256 | //--------------------------------------------- | |
1257 | ||
1258 | RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status) | |
1259 | : RuleBasedBreakIterator(data, status) | |
1260 | { | |
1261 | } | |
1262 | ||
1263 | RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status) | |
1264 | : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) | |
1265 | { | |
1266 | } | |
1267 | ||
b75a7d8f | 1268 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |