]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /******************************************************************** |
51004dcb | 2 | * Copyright (c) 1999-2012, International Business Machines |
46f4442e A |
3 | * Corporation and others. All Rights Reserved. |
4 | ******************************************************************** | |
5 | * Date Name Description | |
6 | * 12/14/99 Madhu Creation. | |
7 | * 01/12/2000 Madhu updated for changed API | |
b75a7d8f | 8 | ********************************************************************/ |
b75a7d8f A |
9 | |
10 | #include "unicode/utypes.h" | |
11 | ||
12 | #if !UCONFIG_NO_BREAK_ITERATION | |
13 | ||
14 | #include "unicode/uchar.h" | |
15 | #include "intltest.h" | |
16 | #include "unicode/rbbi.h" | |
17 | #include "unicode/schriter.h" | |
18 | #include "rbbiapts.h" | |
19 | #include "rbbidata.h" | |
20 | #include "cstring.h" | |
73c04bcf | 21 | #include "ubrkimpl.h" |
4388f060 | 22 | #include "unicode/locid.h" |
374ca955 | 23 | #include "unicode/ustring.h" |
73c04bcf | 24 | #include "unicode/utext.h" |
46f4442e | 25 | #include "cmemory.h" |
b75a7d8f A |
26 | |
27 | /** | |
28 | * API Test the RuleBasedBreakIterator class | |
29 | */ | |
30 | ||
31 | ||
374ca955 | 32 | #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) {\ |
4388f060 | 33 | dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));}} |
374ca955 | 34 | |
4388f060 A |
35 | #define TEST_ASSERT(expr) {if ((expr) == FALSE) { \ |
36 | errln("Test Failure at file %s, line %d: \"%s\" is false.\n", __FILE__, __LINE__, #expr);};} | |
b75a7d8f A |
37 | |
38 | void RBBIAPITest::TestCloneEquals() | |
39 | { | |
40 | ||
41 | UErrorCode status=U_ZERO_ERROR; | |
42 | RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
43 | RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
44 | RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
45 | RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
46 | if(U_FAILURE(status)){ | |
729e4ab9 | 47 | errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
48 | return; |
49 | } | |
50 | ||
51 | ||
52 | UnicodeString testString="Testing word break iterators's clone() and equals()"; | |
53 | bi1->setText(testString); | |
54 | bi2->setText(testString); | |
55 | biequal->setText(testString); | |
56 | ||
57 | bi3->setText("hello"); | |
58 | ||
59 | logln((UnicodeString)"Testing equals()"); | |
60 | ||
61 | logln((UnicodeString)"Testing == and !="); | |
62 | UBool b = (*bi1 != *biequal); | |
63 | b |= *bi1 == *bi2; | |
64 | b |= *bi1 == *bi3; | |
65 | if (b) { | |
66 | errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed."); | |
67 | } | |
68 | ||
69 | if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3) | |
70 | errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed."); | |
71 | ||
72 | ||
46f4442e | 73 | // Quick test of RulesBasedBreakIterator assignment - |
b75a7d8f A |
74 | // Check that |
75 | // two different iterators are != | |
76 | // they are == after assignment | |
77 | // source and dest iterator produce the same next() after assignment. | |
78 | // deleting one doesn't disable the other. | |
79 | logln("Testing assignment"); | |
73c04bcf | 80 | RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status); |
b75a7d8f | 81 | if(U_FAILURE(status)){ |
729e4ab9 | 82 | errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
83 | return; |
84 | } | |
85 | ||
86 | RuleBasedBreakIterator biDefault, biDefault2; | |
87 | if(U_FAILURE(status)){ | |
88 | errln((UnicodeString)"FAIL : in construction of default iterator"); | |
89 | return; | |
90 | } | |
91 | if (biDefault == *bix) { | |
92 | errln((UnicodeString)"ERROR: iterators should not compare =="); | |
93 | return; | |
94 | } | |
95 | if (biDefault != biDefault2) { | |
96 | errln((UnicodeString)"ERROR: iterators should compare =="); | |
97 | return; | |
98 | } | |
99 | ||
100 | ||
101 | UnicodeString HelloString("Hello Kitty"); | |
102 | bix->setText(HelloString); | |
103 | if (*bix == *bi2) { | |
104 | errln(UnicodeString("ERROR: strings should not be equal before assignment.")); | |
105 | } | |
106 | *bix = *bi2; | |
107 | if (*bix != *bi2) { | |
108 | errln(UnicodeString("ERROR: strings should be equal before assignment.")); | |
109 | } | |
110 | ||
111 | int bixnext = bix->next(); | |
112 | int bi2next = bi2->next(); | |
113 | if (! (bixnext == bi2next && bixnext == 7)) { | |
114 | errln(UnicodeString("ERROR: iterators behaved differently after assignment.")); | |
115 | } | |
116 | delete bix; | |
117 | if (bi2->next() != 8) { | |
118 | errln(UnicodeString("ERROR: iterator.next() failed after deleting copy.")); | |
119 | } | |
120 | ||
121 | ||
122 | ||
123 | logln((UnicodeString)"Testing clone()"); | |
124 | RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone(); | |
125 | RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone(); | |
126 | ||
46f4442e | 127 | if(*bi1clone != *bi1 || *bi1clone != *biequal || |
b75a7d8f A |
128 | *bi1clone == *bi3 || *bi1clone == *bi2) |
129 | errln((UnicodeString)"ERROR:1 RBBI's clone() method failed"); | |
130 | ||
46f4442e | 131 | if(*bi2clone == *bi1 || *bi2clone == *biequal || |
b75a7d8f A |
132 | *bi2clone == *bi3 || *bi2clone != *bi2) |
133 | errln((UnicodeString)"ERROR:2 RBBI's clone() method failed"); | |
134 | ||
135 | if(bi1->getText() != bi1clone->getText() || | |
46f4442e | 136 | bi2clone->getText() != bi2->getText() || |
b75a7d8f A |
137 | *bi2clone == *bi1clone ) |
138 | errln((UnicodeString)"ERROR: RBBI's clone() method failed"); | |
139 | ||
140 | delete bi1clone; | |
141 | delete bi2clone; | |
142 | delete bi1; | |
143 | delete bi3; | |
144 | delete bi2; | |
145 | delete biequal; | |
146 | } | |
147 | ||
148 | void RBBIAPITest::TestBoilerPlate() | |
149 | { | |
150 | UErrorCode status = U_ZERO_ERROR; | |
73c04bcf A |
151 | BreakIterator* a = BreakIterator::createWordInstance(Locale("hi"), status); |
152 | BreakIterator* b = BreakIterator::createWordInstance(Locale("hi_IN"),status); | |
374ca955 | 153 | if (U_FAILURE(status)) { |
729e4ab9 | 154 | errcheckln(status, "Creation of break iterator failed %s", u_errorName(status)); |
374ca955 A |
155 | return; |
156 | } | |
b75a7d8f A |
157 | if(*a!=*b){ |
158 | errln("Failed: boilerplate method operator!= does not return correct results"); | |
159 | } | |
51004dcb A |
160 | // Japanese word break iterators are identical to root with |
161 | // a dictionary-based break iterator | |
162 | BreakIterator* c = BreakIterator::createCharacterInstance(Locale("ja"),status); | |
163 | BreakIterator* d = BreakIterator::createCharacterInstance(Locale("root"),status); | |
164 | if(c && d){ | |
165 | if(*c!=*d){ | |
166 | errln("Failed: boilerplate method operator== does not return correct results"); | |
374ca955 A |
167 | } |
168 | }else{ | |
169 | errln("creation of break iterator failed"); | |
b75a7d8f A |
170 | } |
171 | delete a; | |
172 | delete b; | |
173 | delete c; | |
51004dcb | 174 | delete d; |
b75a7d8f A |
175 | } |
176 | ||
177 | void RBBIAPITest::TestgetRules() | |
178 | { | |
179 | UErrorCode status=U_ZERO_ERROR; | |
180 | ||
181 | RuleBasedBreakIterator* bi1=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
182 | RuleBasedBreakIterator* bi2=(RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
183 | if(U_FAILURE(status)){ | |
729e4ab9 | 184 | errcheckln(status, "FAIL: in construction - %s", u_errorName(status)); |
b75a7d8f A |
185 | delete bi1; |
186 | delete bi2; | |
187 | return; | |
188 | } | |
189 | ||
190 | ||
191 | ||
192 | logln((UnicodeString)"Testing toString()"); | |
193 | ||
194 | bi1->setText((UnicodeString)"Hello there"); | |
195 | ||
196 | RuleBasedBreakIterator* bi3 =(RuleBasedBreakIterator*)bi1->clone(); | |
197 | ||
198 | UnicodeString temp=bi1->getRules(); | |
199 | UnicodeString temp2=bi2->getRules(); | |
200 | UnicodeString temp3=bi3->getRules(); | |
201 | if( temp2.compare(temp3) ==0 || temp.compare(temp2) == 0 || temp.compare(temp3) != 0) | |
202 | errln((UnicodeString)"ERROR: error in getRules() method"); | |
203 | ||
204 | delete bi1; | |
205 | delete bi2; | |
206 | delete bi3; | |
207 | } | |
208 | void RBBIAPITest::TestHashCode() | |
209 | { | |
210 | UErrorCode status=U_ZERO_ERROR; | |
211 | RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
212 | RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
213 | RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
214 | if(U_FAILURE(status)){ | |
729e4ab9 | 215 | errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
216 | delete bi1; |
217 | delete bi2; | |
218 | delete bi3; | |
219 | return; | |
220 | } | |
221 | ||
222 | ||
223 | logln((UnicodeString)"Testing hashCode()"); | |
224 | ||
225 | bi1->setText((UnicodeString)"Hash code"); | |
226 | bi2->setText((UnicodeString)"Hash code"); | |
227 | bi3->setText((UnicodeString)"Hash code"); | |
228 | ||
229 | RuleBasedBreakIterator* bi1clone= (RuleBasedBreakIterator*)bi1->clone(); | |
230 | RuleBasedBreakIterator* bi2clone= (RuleBasedBreakIterator*)bi2->clone(); | |
231 | ||
232 | if(bi1->hashCode() != bi1clone->hashCode() || bi1->hashCode() != bi3->hashCode() || | |
233 | bi1clone->hashCode() != bi3->hashCode() || bi2->hashCode() != bi2clone->hashCode()) | |
234 | errln((UnicodeString)"ERROR: identical objects have different hashcodes"); | |
235 | ||
236 | if(bi1->hashCode() == bi2->hashCode() || bi2->hashCode() == bi3->hashCode() || | |
237 | bi1clone->hashCode() == bi2clone->hashCode() || bi1clone->hashCode() == bi2->hashCode()) | |
238 | errln((UnicodeString)"ERROR: different objects have same hashcodes"); | |
239 | ||
240 | delete bi1clone; | |
46f4442e | 241 | delete bi2clone; |
b75a7d8f A |
242 | delete bi1; |
243 | delete bi2; | |
244 | delete bi3; | |
245 | ||
246 | } | |
247 | void RBBIAPITest::TestGetSetAdoptText() | |
248 | { | |
249 | logln((UnicodeString)"Testing getText setText "); | |
729e4ab9 | 250 | IcuTestErrorCode status(*this, "TestGetSetAdoptText"); |
b75a7d8f A |
251 | UnicodeString str1="first string."; |
252 | UnicodeString str2="Second string."; | |
729e4ab9 A |
253 | LocalPointer<RuleBasedBreakIterator> charIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status)); |
254 | LocalPointer<RuleBasedBreakIterator> wordIter1((RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status)); | |
255 | if(status.isFailure()){ | |
256 | errcheckln(status, "Fail : in construction - %s", status.errorName()); | |
b75a7d8f A |
257 | return; |
258 | } | |
259 | ||
260 | ||
261 | CharacterIterator* text1= new StringCharacterIterator(str1); | |
262 | CharacterIterator* text1Clone = text1->clone(); | |
263 | CharacterIterator* text2= new StringCharacterIterator(str2); | |
264 | CharacterIterator* text3= new StringCharacterIterator(str2, 3, 10, 3); // "ond str" | |
46f4442e | 265 | |
b75a7d8f | 266 | wordIter1->setText(str1); |
73c04bcf A |
267 | CharacterIterator *tci = &wordIter1->getText(); |
268 | UnicodeString tstr; | |
269 | tci->getText(tstr); | |
270 | TEST_ASSERT(tstr == str1); | |
b75a7d8f A |
271 | if(wordIter1->current() != 0) |
272 | errln((UnicodeString)"ERROR:1 setText did not set the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); | |
273 | ||
274 | wordIter1->next(2); | |
275 | ||
276 | wordIter1->setText(str2); | |
277 | if(wordIter1->current() != 0) | |
278 | errln((UnicodeString)"ERROR:2 setText did not reset the iteration position to the beginning of the text, it is" + wordIter1->current() + (UnicodeString)"\n"); | |
279 | ||
280 | ||
281 | charIter1->adoptText(text1Clone); | |
73c04bcf A |
282 | TEST_ASSERT(wordIter1->getText() != charIter1->getText()); |
283 | tci = &wordIter1->getText(); | |
284 | tci->getText(tstr); | |
285 | TEST_ASSERT(tstr == str2); | |
286 | tci = &charIter1->getText(); | |
287 | tci->getText(tstr); | |
288 | TEST_ASSERT(tstr == str1); | |
289 | ||
b75a7d8f | 290 | |
729e4ab9 | 291 | LocalPointer<RuleBasedBreakIterator> rb((RuleBasedBreakIterator*)wordIter1->clone()); |
b75a7d8f A |
292 | rb->adoptText(text1); |
293 | if(rb->getText() != *text1) | |
294 | errln((UnicodeString)"ERROR:1 error in adoptText "); | |
295 | rb->adoptText(text2); | |
296 | if(rb->getText() != *text2) | |
297 | errln((UnicodeString)"ERROR:2 error in adoptText "); | |
298 | ||
299 | // Adopt where iterator range is less than the entire orignal source string. | |
73c04bcf A |
300 | // (With the change of the break engine to working with UText internally, |
301 | // CharacterIterators starting at positions other than zero are not supported) | |
b75a7d8f | 302 | rb->adoptText(text3); |
73c04bcf A |
303 | TEST_ASSERT(rb->preceding(2) == 0); |
304 | TEST_ASSERT(rb->following(11) == BreakIterator::DONE); | |
305 | //if(rb->preceding(2) != 3) { | |
306 | // errln((UnicodeString)"ERROR:3 error in adoptText "); | |
307 | //} | |
308 | //if(rb->following(11) != BreakIterator::DONE) { | |
309 | // errln((UnicodeString)"ERROR:4 error in adoptText "); | |
310 | //} | |
311 | ||
312 | // UText API | |
313 | // | |
314 | // Quick test to see if UText is working at all. | |
315 | // | |
316 | const char *s1 = "\x68\x65\x6C\x6C\x6F\x20\x77\x6F\x72\x6C\x64"; /* "hello world" in UTF-8 */ | |
317 | const char *s2 = "\x73\x65\x65\x20\x79\x61"; /* "see ya" in UTF-8 */ | |
318 | // 012345678901 | |
319 | ||
729e4ab9 A |
320 | status.reset(); |
321 | LocalUTextPointer ut(utext_openUTF8(NULL, s1, -1, status)); | |
322 | wordIter1->setText(ut.getAlias(), status); | |
73c04bcf A |
323 | TEST_ASSERT_SUCCESS(status); |
324 | ||
325 | int32_t pos; | |
326 | pos = wordIter1->first(); | |
327 | TEST_ASSERT(pos==0); | |
328 | pos = wordIter1->next(); | |
329 | TEST_ASSERT(pos==5); | |
330 | pos = wordIter1->next(); | |
331 | TEST_ASSERT(pos==6); | |
332 | pos = wordIter1->next(); | |
333 | TEST_ASSERT(pos==11); | |
334 | pos = wordIter1->next(); | |
335 | TEST_ASSERT(pos==UBRK_DONE); | |
336 | ||
729e4ab9 A |
337 | status.reset(); |
338 | LocalUTextPointer ut2(utext_openUTF8(NULL, s2, -1, status)); | |
73c04bcf | 339 | TEST_ASSERT_SUCCESS(status); |
729e4ab9 | 340 | wordIter1->setText(ut2.getAlias(), status); |
73c04bcf A |
341 | TEST_ASSERT_SUCCESS(status); |
342 | ||
343 | pos = wordIter1->first(); | |
344 | TEST_ASSERT(pos==0); | |
345 | pos = wordIter1->next(); | |
346 | TEST_ASSERT(pos==3); | |
347 | pos = wordIter1->next(); | |
348 | TEST_ASSERT(pos==4); | |
349 | ||
350 | pos = wordIter1->last(); | |
351 | TEST_ASSERT(pos==6); | |
352 | pos = wordIter1->previous(); | |
353 | TEST_ASSERT(pos==4); | |
354 | pos = wordIter1->previous(); | |
355 | TEST_ASSERT(pos==3); | |
356 | pos = wordIter1->previous(); | |
357 | TEST_ASSERT(pos==0); | |
358 | pos = wordIter1->previous(); | |
359 | TEST_ASSERT(pos==UBRK_DONE); | |
360 | ||
729e4ab9 | 361 | status.reset(); |
73c04bcf | 362 | UnicodeString sEmpty; |
729e4ab9 A |
363 | LocalUTextPointer gut2(utext_openUnicodeString(NULL, &sEmpty, status)); |
364 | wordIter1->getUText(gut2.getAlias(), status); | |
73c04bcf | 365 | TEST_ASSERT_SUCCESS(status); |
729e4ab9 A |
366 | status.reset(); |
367 | } | |
46f4442e | 368 | |
b75a7d8f | 369 | |
b75a7d8f A |
370 | void RBBIAPITest::TestIteration() |
371 | { | |
372 | // This test just verifies that the API is present. | |
373 | // Testing for correct operation of the break rules happens elsewhere. | |
374 | ||
375 | UErrorCode status=U_ZERO_ERROR; | |
376 | RuleBasedBreakIterator* bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
377 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 378 | errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
379 | } |
380 | delete bi; | |
381 | ||
382 | status=U_ZERO_ERROR; | |
383 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status); | |
384 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 385 | errcheckln(status, "Failure creating Word break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
386 | } |
387 | delete bi; | |
388 | ||
389 | status=U_ZERO_ERROR; | |
390 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createLineInstance(Locale::getDefault(), status); | |
391 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 392 | errcheckln(status, "Failure creating Line break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
393 | } |
394 | delete bi; | |
395 | ||
396 | status=U_ZERO_ERROR; | |
397 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createSentenceInstance(Locale::getDefault(), status); | |
398 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 399 | errcheckln(status, "Failure creating Sentence break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
400 | } |
401 | delete bi; | |
402 | ||
403 | status=U_ZERO_ERROR; | |
404 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createTitleInstance(Locale::getDefault(), status); | |
405 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 406 | errcheckln(status, "Failure creating Title break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
407 | } |
408 | delete bi; | |
409 | ||
410 | status=U_ZERO_ERROR; | |
411 | bi = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status); | |
412 | if (U_FAILURE(status) || bi == NULL) { | |
729e4ab9 | 413 | errcheckln(status, "Failure creating character break iterator. Status = %s", u_errorName(status)); |
b75a7d8f A |
414 | return; // Skip the rest of these tests. |
415 | } | |
416 | ||
417 | ||
418 | UnicodeString testString="0123456789"; | |
419 | bi->setText(testString); | |
420 | ||
421 | int32_t i; | |
422 | i = bi->first(); | |
423 | if (i != 0) { | |
424 | errln("Incorrect value from bi->first(). Expected 0, got %d.", i); | |
425 | } | |
426 | ||
427 | i = bi->last(); | |
428 | if (i != 10) { | |
429 | errln("Incorrect value from bi->last(). Expected 10, got %d", i); | |
430 | } | |
431 | ||
432 | // | |
433 | // Previous | |
434 | // | |
435 | bi->last(); | |
436 | i = bi->previous(); | |
437 | if (i != 9) { | |
438 | errln("Incorrect value from bi->last() at line %d. Expected 9, got %d", __LINE__, i); | |
439 | } | |
440 | ||
441 | ||
442 | bi->first(); | |
443 | i = bi->previous(); | |
444 | if (i != BreakIterator::DONE) { | |
445 | errln("Incorrect value from bi->previous() at line %d. Expected DONE, got %d", __LINE__, i); | |
446 | } | |
447 | ||
448 | // | |
449 | // next() | |
450 | // | |
451 | bi->first(); | |
452 | i = bi->next(); | |
453 | if (i != 1) { | |
454 | errln("Incorrect value from bi->next() at line %d. Expected 1, got %d", __LINE__, i); | |
455 | } | |
456 | ||
457 | bi->last(); | |
458 | i = bi->next(); | |
459 | if (i != BreakIterator::DONE) { | |
460 | errln("Incorrect value from bi->next() at line %d. Expected DONE, got %d", __LINE__, i); | |
461 | } | |
462 | ||
463 | ||
464 | // | |
465 | // current() | |
466 | // | |
467 | bi->first(); | |
468 | i = bi->current(); | |
469 | if (i != 0) { | |
470 | errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); | |
471 | } | |
472 | ||
473 | bi->next(); | |
474 | i = bi->current(); | |
475 | if (i != 1) { | |
476 | errln("Incorrect value from bi->previous() at line %d. Expected 1, got %d", __LINE__, i); | |
477 | } | |
478 | ||
479 | bi->last(); | |
480 | bi->next(); | |
481 | i = bi->current(); | |
482 | if (i != 10) { | |
483 | errln("Incorrect value from bi->previous() at line %d. Expected 10, got %d", __LINE__, i); | |
484 | } | |
485 | ||
486 | bi->first(); | |
487 | bi->previous(); | |
488 | i = bi->current(); | |
489 | if (i != 0) { | |
490 | errln("Incorrect value from bi->previous() at line %d. Expected 0, got %d", __LINE__, i); | |
491 | } | |
492 | ||
493 | ||
494 | // | |
495 | // Following() | |
496 | // | |
497 | i = bi->following(4); | |
498 | if (i != 5) { | |
499 | errln("Incorrect value from bi->following() at line %d. Expected 5, got %d", __LINE__, i); | |
500 | } | |
501 | ||
502 | i = bi->following(9); | |
503 | if (i != 10) { | |
504 | errln("Incorrect value from bi->following() at line %d. Expected 10, got %d", __LINE__, i); | |
505 | } | |
506 | ||
507 | i = bi->following(10); | |
508 | if (i != BreakIterator::DONE) { | |
509 | errln("Incorrect value from bi->following() at line %d. Expected DONE, got %d", __LINE__, i); | |
510 | } | |
511 | ||
512 | ||
513 | // | |
514 | // Preceding | |
515 | // | |
516 | i = bi->preceding(4); | |
517 | if (i != 3) { | |
518 | errln("Incorrect value from bi->preceding() at line %d. Expected 3, got %d", __LINE__, i); | |
519 | } | |
520 | ||
521 | i = bi->preceding(10); | |
522 | if (i != 9) { | |
523 | errln("Incorrect value from bi->preceding() at line %d. Expected 9, got %d", __LINE__, i); | |
524 | } | |
525 | ||
526 | i = bi->preceding(1); | |
527 | if (i != 0) { | |
528 | errln("Incorrect value from bi->preceding() at line %d. Expected 0, got %d", __LINE__, i); | |
529 | } | |
530 | ||
531 | i = bi->preceding(0); | |
532 | if (i != BreakIterator::DONE) { | |
533 | errln("Incorrect value from bi->preceding() at line %d. Expected DONE, got %d", __LINE__, i); | |
534 | } | |
535 | ||
536 | ||
537 | // | |
538 | // isBoundary() | |
539 | // | |
540 | bi->first(); | |
541 | if (bi->isBoundary(3) != TRUE) { | |
542 | errln("Incorrect value from bi->isBoudary() at line %d. Expected TRUE, got FALSE", __LINE__, i); | |
543 | } | |
544 | i = bi->current(); | |
545 | if (i != 3) { | |
546 | errln("Incorrect value from bi->current() at line %d. Expected 3, got %d", __LINE__, i); | |
547 | } | |
548 | ||
549 | ||
550 | if (bi->isBoundary(11) != FALSE) { | |
551 | errln("Incorrect value from bi->isBoudary() at line %d. Expected FALSE, got TRUE", __LINE__, i); | |
552 | } | |
553 | i = bi->current(); | |
554 | if (i != 10) { | |
555 | errln("Incorrect value from bi->current() at line %d. Expected 10, got %d", __LINE__, i); | |
556 | } | |
557 | ||
558 | // | |
559 | // next(n) | |
560 | // | |
561 | bi->first(); | |
562 | i = bi->next(4); | |
563 | if (i != 4) { | |
564 | errln("Incorrect value from bi->next() at line %d. Expected 4, got %d", __LINE__, i); | |
565 | } | |
566 | ||
567 | i = bi->next(6); | |
568 | if (i != 10) { | |
569 | errln("Incorrect value from bi->next() at line %d. Expected 10, got %d", __LINE__, i); | |
570 | } | |
571 | ||
572 | bi->first(); | |
573 | i = bi->next(11); | |
574 | if (i != BreakIterator::DONE) { | |
575 | errln("Incorrect value from bi->next() at line %d. Expected BreakIterator::DONE, got %d", __LINE__, i); | |
576 | } | |
577 | ||
578 | delete bi; | |
579 | ||
580 | } | |
581 | ||
582 | ||
583 | ||
584 | ||
585 | ||
586 | ||
587 | void RBBIAPITest::TestBuilder() { | |
588 | UnicodeString rulesString1 = "$Letters = [:L:];\n" | |
589 | "$Numbers = [:N:];\n" | |
590 | "$Letters+;\n" | |
591 | "$Numbers+;\n" | |
592 | "[^$Letters $Numbers];\n" | |
593 | "!.*;\n"; | |
594 | UnicodeString testString1 = "abc123..abc"; | |
595 | // 01234567890 | |
596 | int32_t bounds1[] = {0, 3, 6, 7, 8, 11}; | |
597 | UErrorCode status=U_ZERO_ERROR; | |
598 | UParseError parseError; | |
46f4442e | 599 | |
b75a7d8f A |
600 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); |
601 | if(U_FAILURE(status)) { | |
729e4ab9 | 602 | dataerrln("Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
603 | } else { |
604 | bi->setText(testString1); | |
605 | doBoundaryTest(*bi, testString1, bounds1); | |
606 | } | |
607 | delete bi; | |
608 | } | |
609 | ||
610 | ||
611 | // | |
612 | // TestQuoteGrouping | |
613 | // Single quotes within rules imply a grouping, so that a modifier | |
614 | // following the quoted text (* or +) applies to all of the quoted chars. | |
615 | // | |
616 | void RBBIAPITest::TestQuoteGrouping() { | |
617 | UnicodeString rulesString1 = "#Here comes the rule...\n" | |
618 | "'$@!'*;\n" // (\$\@\!)* | |
619 | ".;\n"; | |
620 | ||
621 | UnicodeString testString1 = "$@!$@!X$@!!X"; | |
622 | // 0123456789012 | |
623 | int32_t bounds1[] = {0, 6, 7, 10, 11, 12}; | |
624 | UErrorCode status=U_ZERO_ERROR; | |
625 | UParseError parseError; | |
46f4442e | 626 | |
b75a7d8f A |
627 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); |
628 | if(U_FAILURE(status)) { | |
729e4ab9 | 629 | dataerrln("Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
630 | } else { |
631 | bi->setText(testString1); | |
632 | doBoundaryTest(*bi, testString1, bounds1); | |
633 | } | |
634 | delete bi; | |
635 | } | |
636 | ||
637 | // | |
374ca955 | 638 | // TestRuleStatus |
b75a7d8f A |
639 | // Test word break rule status constants. |
640 | // | |
374ca955 | 641 | void RBBIAPITest::TestRuleStatus() { |
46f4442e | 642 | UChar str[30]; |
51004dcb A |
643 | //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing |
644 | // changed UBRK_WORD_KANA to UBRK_WORD_IDEO | |
645 | u_unescape("plain word 123.45 \\u30a1\\u30a2 ", | |
646 | // 012345678901234567 8 9 0 | |
647 | // Katakana | |
374ca955 A |
648 | str, 30); |
649 | UnicodeString testString1(str); | |
51004dcb | 650 | int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21}; |
b75a7d8f A |
651 | int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER, |
652 | UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE, | |
51004dcb | 653 | UBRK_WORD_IDEO, UBRK_WORD_NONE}; |
b75a7d8f A |
654 | |
655 | int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, | |
656 | UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT, | |
51004dcb | 657 | UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT}; |
b75a7d8f A |
658 | |
659 | UErrorCode status=U_ZERO_ERROR; | |
46f4442e | 660 | |
73c04bcf | 661 | RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); |
b75a7d8f | 662 | if(U_FAILURE(status)) { |
729e4ab9 | 663 | errcheckln(status, "Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
664 | } else { |
665 | bi->setText(testString1); | |
666 | // First test that the breaks are in the right spots. | |
667 | doBoundaryTest(*bi, testString1, bounds1); | |
668 | ||
669 | // Then go back and check tag values | |
670 | int32_t i = 0; | |
671 | int32_t pos, tag; | |
672 | for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) { | |
673 | if (pos != bounds1[i]) { | |
674 | errln("FAIL: unexpected word break at postion %d", pos); | |
675 | break; | |
676 | } | |
677 | tag = bi->getRuleStatus(); | |
678 | if (tag < tag_lo[i] || tag >= tag_hi[i]) { | |
679 | errln("FAIL: incorrect tag value %d at position %d", tag, pos); | |
680 | break; | |
681 | } | |
46f4442e | 682 | |
374ca955 A |
683 | // Check that we get the same tag values from getRuleStatusVec() |
684 | int32_t vec[10]; | |
685 | int t = bi->getRuleStatusVec(vec, 10, status); | |
686 | TEST_ASSERT_SUCCESS(status); | |
687 | TEST_ASSERT(t==1); | |
688 | TEST_ASSERT(vec[0] == tag); | |
b75a7d8f A |
689 | } |
690 | } | |
691 | delete bi; | |
374ca955 A |
692 | |
693 | // Now test line break status. This test mostly is to confirm that the status constants | |
694 | // are correctly declared in the header. | |
695 | testString1 = "test line. \n"; | |
696 | // break type s s h | |
697 | ||
698 | bi = (RuleBasedBreakIterator *) | |
699 | BreakIterator::createLineInstance(Locale::getEnglish(), status); | |
700 | if(U_FAILURE(status)) { | |
729e4ab9 | 701 | errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status)); |
374ca955 A |
702 | } else { |
703 | int32_t i = 0; | |
704 | int32_t pos, tag; | |
705 | UBool success; | |
706 | ||
707 | bi->setText(testString1); | |
708 | pos = bi->current(); | |
709 | tag = bi->getRuleStatus(); | |
710 | for (i=0; i<3; i++) { | |
711 | switch (i) { | |
712 | case 0: | |
713 | success = pos==0 && tag==UBRK_LINE_SOFT; break; | |
714 | case 1: | |
715 | success = pos==5 && tag==UBRK_LINE_SOFT; break; | |
716 | case 2: | |
717 | success = pos==12 && tag==UBRK_LINE_HARD; break; | |
718 | default: | |
719 | success = FALSE; break; | |
720 | } | |
721 | if (success == FALSE) { | |
722 | errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d", | |
723 | i, pos, tag); | |
724 | break; | |
725 | } | |
726 | pos = bi->next(); | |
727 | tag = bi->getRuleStatus(); | |
728 | } | |
729 | if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT || | |
730 | UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT || | |
729e4ab9 | 731 | (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) { |
374ca955 A |
732 | errln("UBRK_LINE_* constants from header are inconsistent."); |
733 | } | |
734 | } | |
735 | delete bi; | |
736 | ||
b75a7d8f A |
737 | } |
738 | ||
739 | ||
374ca955 A |
740 | // |
741 | // TestRuleStatusVec | |
742 | // Test the vector form of break rule status. | |
743 | // | |
744 | void RBBIAPITest::TestRuleStatusVec() { | |
46f4442e | 745 | UnicodeString rulesString( "[A-N]{100}; \n" |
374ca955 A |
746 | "[a-w]{200}; \n" |
747 | "[\\p{L}]{300}; \n" | |
748 | "[\\p{N}]{400}; \n" | |
749 | "[0-5]{500}; \n" | |
46f4442e | 750 | "!.*;\n", -1, US_INV); |
374ca955 A |
751 | UnicodeString testString1 = "Aapz5?"; |
752 | int32_t statusVals[10]; | |
753 | int32_t numStatuses; | |
754 | int32_t pos; | |
755 | ||
756 | UErrorCode status=U_ZERO_ERROR; | |
757 | UParseError parseError; | |
46f4442e | 758 | |
374ca955 | 759 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status); |
729e4ab9 A |
760 | if (U_FAILURE(status)) { |
761 | dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status)); | |
762 | } else { | |
374ca955 A |
763 | bi->setText(testString1); |
764 | ||
765 | // A | |
766 | pos = bi->next(); | |
767 | TEST_ASSERT(pos==1); | |
768 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
769 | TEST_ASSERT_SUCCESS(status); | |
770 | TEST_ASSERT(numStatuses == 2); | |
771 | TEST_ASSERT(statusVals[0] == 100); | |
772 | TEST_ASSERT(statusVals[1] == 300); | |
773 | ||
774 | // a | |
775 | pos = bi->next(); | |
776 | TEST_ASSERT(pos==2); | |
777 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
778 | TEST_ASSERT_SUCCESS(status); | |
779 | TEST_ASSERT(numStatuses == 2); | |
780 | TEST_ASSERT(statusVals[0] == 200); | |
781 | TEST_ASSERT(statusVals[1] == 300); | |
782 | ||
783 | // p | |
784 | pos = bi->next(); | |
785 | TEST_ASSERT(pos==3); | |
786 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
787 | TEST_ASSERT_SUCCESS(status); | |
788 | TEST_ASSERT(numStatuses == 2); | |
789 | TEST_ASSERT(statusVals[0] == 200); | |
790 | TEST_ASSERT(statusVals[1] == 300); | |
791 | ||
792 | // z | |
793 | pos = bi->next(); | |
794 | TEST_ASSERT(pos==4); | |
795 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
796 | TEST_ASSERT_SUCCESS(status); | |
797 | TEST_ASSERT(numStatuses == 1); | |
798 | TEST_ASSERT(statusVals[0] == 300); | |
799 | ||
800 | // 5 | |
801 | pos = bi->next(); | |
802 | TEST_ASSERT(pos==5); | |
803 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
804 | TEST_ASSERT_SUCCESS(status); | |
805 | TEST_ASSERT(numStatuses == 2); | |
806 | TEST_ASSERT(statusVals[0] == 400); | |
807 | TEST_ASSERT(statusVals[1] == 500); | |
808 | ||
809 | // ? | |
810 | pos = bi->next(); | |
811 | TEST_ASSERT(pos==6); | |
812 | numStatuses = bi->getRuleStatusVec(statusVals, 10, status); | |
813 | TEST_ASSERT_SUCCESS(status); | |
814 | TEST_ASSERT(numStatuses == 1); | |
815 | TEST_ASSERT(statusVals[0] == 0); | |
816 | ||
817 | // | |
46f4442e | 818 | // Check buffer overflow error handling. Char == A |
374ca955 A |
819 | // |
820 | bi->first(); | |
821 | pos = bi->next(); | |
822 | TEST_ASSERT(pos==1); | |
823 | memset(statusVals, -1, sizeof(statusVals)); | |
824 | numStatuses = bi->getRuleStatusVec(statusVals, 0, status); | |
825 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
826 | TEST_ASSERT(numStatuses == 2); | |
827 | TEST_ASSERT(statusVals[0] == -1); | |
828 | ||
829 | status = U_ZERO_ERROR; | |
830 | memset(statusVals, -1, sizeof(statusVals)); | |
831 | numStatuses = bi->getRuleStatusVec(statusVals, 1, status); | |
832 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
833 | TEST_ASSERT(numStatuses == 2); | |
834 | TEST_ASSERT(statusVals[0] == 100); | |
835 | TEST_ASSERT(statusVals[1] == -1); | |
836 | ||
837 | status = U_ZERO_ERROR; | |
838 | memset(statusVals, -1, sizeof(statusVals)); | |
839 | numStatuses = bi->getRuleStatusVec(statusVals, 2, status); | |
840 | TEST_ASSERT_SUCCESS(status); | |
841 | TEST_ASSERT(numStatuses == 2); | |
842 | TEST_ASSERT(statusVals[0] == 100); | |
843 | TEST_ASSERT(statusVals[1] == 300); | |
844 | TEST_ASSERT(statusVals[2] == -1); | |
845 | } | |
846 | delete bi; | |
847 | ||
848 | } | |
849 | ||
b75a7d8f A |
850 | // |
851 | // Bug 2190 Regression test. Builder crash on rule consisting of only a | |
852 | // $variable reference | |
853 | void RBBIAPITest::TestBug2190() { | |
854 | UnicodeString rulesString1 = "$aaa = abcd;\n" | |
855 | "$bbb = $aaa;\n" | |
856 | "$bbb;\n"; | |
857 | UnicodeString testString1 = "abcdabcd"; | |
858 | // 01234567890 | |
859 | int32_t bounds1[] = {0, 4, 8}; | |
860 | UErrorCode status=U_ZERO_ERROR; | |
861 | UParseError parseError; | |
46f4442e | 862 | |
b75a7d8f A |
863 | RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status); |
864 | if(U_FAILURE(status)) { | |
729e4ab9 | 865 | dataerrln("Fail : in construction - %s", u_errorName(status)); |
b75a7d8f A |
866 | } else { |
867 | bi->setText(testString1); | |
868 | doBoundaryTest(*bi, testString1, bounds1); | |
869 | } | |
870 | delete bi; | |
871 | } | |
872 | ||
873 | ||
874 | void RBBIAPITest::TestRegistration() { | |
374ca955 | 875 | #if !UCONFIG_NO_SERVICE |
b75a7d8f | 876 | UErrorCode status = U_ZERO_ERROR; |
73c04bcf | 877 | BreakIterator* ja_word = BreakIterator::createWordInstance("ja_JP", status); |
b75a7d8f | 878 | // ok to not delete these if we exit because of error? |
73c04bcf | 879 | BreakIterator* ja_char = BreakIterator::createCharacterInstance("ja_JP", status); |
b75a7d8f A |
880 | BreakIterator* root_word = BreakIterator::createWordInstance("", status); |
881 | BreakIterator* root_char = BreakIterator::createCharacterInstance("", status); | |
729e4ab9 A |
882 | |
883 | if (status == U_MISSING_RESOURCE_ERROR || status == U_FILE_ACCESS_ERROR) { | |
884 | dataerrln("Error creating instances of break interactors - %s", u_errorName(status)); | |
51004dcb | 885 | |
729e4ab9 A |
886 | delete ja_word; |
887 | delete ja_char; | |
888 | delete root_word; | |
889 | delete root_char; | |
890 | ||
891 | return; | |
892 | } | |
46f4442e | 893 | |
73c04bcf | 894 | URegistryKey key = BreakIterator::registerInstance(ja_word, "xx", UBRK_WORD, status); |
b75a7d8f | 895 | { |
51004dcb | 896 | #if 0 // With a dictionary based word breaking, ja_word is identical to root. |
73c04bcf A |
897 | if (ja_word && *ja_word == *root_word) { |
898 | errln("japan not different from root"); | |
b75a7d8f | 899 | } |
51004dcb | 900 | #endif |
b75a7d8f | 901 | } |
46f4442e | 902 | |
b75a7d8f A |
903 | { |
904 | BreakIterator* result = BreakIterator::createWordInstance("xx_XX", status); | |
374ca955 A |
905 | UBool fail = TRUE; |
906 | if(result){ | |
73c04bcf | 907 | fail = *result != *ja_word; |
374ca955 | 908 | } |
b75a7d8f A |
909 | delete result; |
910 | if (fail) { | |
911 | errln("bad result for xx_XX/word"); | |
912 | } | |
913 | } | |
46f4442e | 914 | |
b75a7d8f | 915 | { |
73c04bcf | 916 | BreakIterator* result = BreakIterator::createCharacterInstance("ja_JP", status); |
374ca955 A |
917 | UBool fail = TRUE; |
918 | if(result){ | |
73c04bcf | 919 | fail = *result != *ja_char; |
374ca955 | 920 | } |
b75a7d8f A |
921 | delete result; |
922 | if (fail) { | |
73c04bcf | 923 | errln("bad result for ja_JP/char"); |
b75a7d8f A |
924 | } |
925 | } | |
46f4442e | 926 | |
b75a7d8f A |
927 | { |
928 | BreakIterator* result = BreakIterator::createCharacterInstance("xx_XX", status); | |
374ca955 A |
929 | UBool fail = TRUE; |
930 | if(result){ | |
931 | fail = *result != *root_char; | |
932 | } | |
b75a7d8f A |
933 | delete result; |
934 | if (fail) { | |
935 | errln("bad result for xx_XX/char"); | |
936 | } | |
937 | } | |
46f4442e | 938 | |
b75a7d8f A |
939 | { |
940 | StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
941 | UBool found = FALSE; | |
942 | const UnicodeString* p; | |
943 | while ((p = avail->snext(status))) { | |
944 | if (p->compare("xx") == 0) { | |
945 | found = TRUE; | |
946 | break; | |
947 | } | |
948 | } | |
949 | delete avail; | |
950 | if (!found) { | |
951 | errln("did not find test locale"); | |
952 | } | |
953 | } | |
46f4442e | 954 | |
b75a7d8f A |
955 | { |
956 | UBool unreg = BreakIterator::unregister(key, status); | |
957 | if (!unreg) { | |
958 | errln("unable to unregister"); | |
959 | } | |
960 | } | |
46f4442e | 961 | |
b75a7d8f | 962 | { |
73c04bcf | 963 | BreakIterator* result = BreakIterator::createWordInstance("en_US", status); |
b75a7d8f | 964 | BreakIterator* root = BreakIterator::createWordInstance("", status); |
374ca955 A |
965 | UBool fail = TRUE; |
966 | if(root){ | |
967 | fail = *root != *result; | |
968 | } | |
b75a7d8f A |
969 | delete root; |
970 | delete result; | |
971 | if (fail) { | |
972 | errln("did not get root break"); | |
973 | } | |
974 | } | |
46f4442e | 975 | |
b75a7d8f A |
976 | { |
977 | StringEnumeration* avail = BreakIterator::getAvailableLocales(); | |
978 | UBool found = FALSE; | |
979 | const UnicodeString* p; | |
980 | while ((p = avail->snext(status))) { | |
981 | if (p->compare("xx") == 0) { | |
982 | found = TRUE; | |
983 | break; | |
984 | } | |
985 | } | |
986 | delete avail; | |
987 | if (found) { | |
988 | errln("found test locale"); | |
989 | } | |
990 | } | |
46f4442e | 991 | |
b75a7d8f A |
992 | { |
993 | int32_t count; | |
994 | UBool foundLocale = FALSE; | |
995 | const Locale *avail = BreakIterator::getAvailableLocales(count); | |
996 | for (int i=0; i<count; i++) { | |
997 | if (avail[i] == Locale::getEnglish()) { | |
998 | foundLocale = TRUE; | |
999 | break; | |
1000 | } | |
1001 | } | |
1002 | if (foundLocale == FALSE) { | |
1003 | errln("BreakIterator::getAvailableLocales(&count), failed to find EN."); | |
1004 | } | |
1005 | } | |
46f4442e A |
1006 | |
1007 | ||
73c04bcf A |
1008 | // ja_word was adopted by factory |
1009 | delete ja_char; | |
b75a7d8f A |
1010 | delete root_word; |
1011 | delete root_char; | |
374ca955 | 1012 | #endif |
b75a7d8f A |
1013 | } |
1014 | ||
1015 | void RBBIAPITest::RoundtripRule(const char *dataFile) { | |
1016 | UErrorCode status = U_ZERO_ERROR; | |
1017 | UParseError parseError; | |
374ca955 A |
1018 | parseError.line = 0; |
1019 | parseError.offset = 0; | |
729e4ab9 | 1020 | LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status)); |
b75a7d8f A |
1021 | uint32_t length; |
1022 | const UChar *builtSource; | |
1023 | const uint8_t *rbbiRules; | |
1024 | const uint8_t *builtRules; | |
1025 | ||
1026 | if (U_FAILURE(status)) { | |
729e4ab9 | 1027 | errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status)); |
b75a7d8f A |
1028 | return; |
1029 | } | |
1030 | ||
729e4ab9 | 1031 | builtRules = (const uint8_t *)udata_getMemory(data.getAlias()); |
b75a7d8f A |
1032 | builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource); |
1033 | RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status); | |
1034 | if (U_FAILURE(status)) { | |
1035 | errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n", | |
1036 | u_errorName(status), parseError.line, parseError.offset); | |
1037 | return; | |
1038 | }; | |
1039 | rbbiRules = brkItr->getBinaryRules(length); | |
1040 | logln("Comparing \"%s\" len=%d", dataFile, length); | |
1041 | if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) { | |
1042 | errln("Built rules and rebuilt rules are different %s", dataFile); | |
1043 | return; | |
1044 | } | |
1045 | delete brkItr; | |
b75a7d8f A |
1046 | } |
1047 | ||
1048 | void RBBIAPITest::TestRoundtripRules() { | |
1049 | RoundtripRule("word"); | |
1050 | RoundtripRule("title"); | |
1051 | RoundtripRule("sent"); | |
1052 | RoundtripRule("line"); | |
1053 | RoundtripRule("char"); | |
1054 | if (!quick) { | |
73c04bcf | 1055 | RoundtripRule("word_POSIX"); |
b75a7d8f A |
1056 | } |
1057 | } | |
1058 | ||
46f4442e A |
1059 | // Try out the RuleBasedBreakIterator constructors that take RBBIDataHeader* |
1060 | // (these are protected so we access them via a local class RBBIWithProtectedFunctions). | |
1061 | // This is just a sanity check, not a thorough test (e.g. we don't check that the | |
1062 | // first delete actually frees rulesCopy). | |
1063 | void RBBIAPITest::TestCreateFromRBBIData() { | |
1064 | // Get some handy RBBIData | |
1065 | const char *brkName = "word"; // or "sent", "line", "char", etc. | |
1066 | UErrorCode status = U_ZERO_ERROR; | |
729e4ab9 | 1067 | LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", brkName, &status)); |
46f4442e | 1068 | if ( U_SUCCESS(status) ) { |
729e4ab9 | 1069 | const RBBIDataHeader * builtRules = (const RBBIDataHeader *)udata_getMemory(data.getAlias()); |
46f4442e A |
1070 | uint32_t length = builtRules->fLength; |
1071 | RBBIWithProtectedFunctions * brkItr; | |
1072 | ||
1073 | // Try the memory-adopting constructor, need to copy the data first | |
1074 | RBBIDataHeader * rulesCopy = (RBBIDataHeader *) uprv_malloc(length); | |
1075 | if ( rulesCopy ) { | |
1076 | uprv_memcpy( rulesCopy, builtRules, length ); | |
1077 | ||
1078 | brkItr = new RBBIWithProtectedFunctions(rulesCopy, status); | |
1079 | if ( U_SUCCESS(status) ) { | |
1080 | delete brkItr; // this should free rulesCopy | |
1081 | } else { | |
1082 | errln("create RuleBasedBreakIterator from RBBIData (adopted): ICU Error \"%s\"\n", u_errorName(status) ); | |
1083 | status = U_ZERO_ERROR;// reset for the next test | |
1084 | uprv_free( rulesCopy ); | |
1085 | } | |
1086 | } | |
1087 | ||
1088 | // Now try the non-adopting constructor | |
1089 | brkItr = new RBBIWithProtectedFunctions(builtRules, RBBIWithProtectedFunctions::kDontAdopt, status); | |
1090 | if ( U_SUCCESS(status) ) { | |
1091 | delete brkItr; // this should NOT attempt to free builtRules | |
1092 | if (builtRules->fLength != length) { // sanity check | |
1093 | errln("create RuleBasedBreakIterator from RBBIData (non-adopted): delete affects data\n" ); | |
1094 | } | |
1095 | } else { | |
1096 | errln("create RuleBasedBreakIterator from RBBIData (non-adopted): ICU Error \"%s\"\n", u_errorName(status) ); | |
1097 | } | |
46f4442e | 1098 | } |
4388f060 A |
1099 | |
1100 | // getBinaryRules() and RuleBasedBreakIterator(uint8_t binaryRules, ...) | |
1101 | // | |
1102 | status = U_ZERO_ERROR; | |
1103 | RuleBasedBreakIterator *rb = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status); | |
1104 | if (rb == NULL || U_FAILURE(status)) { | |
1105 | dataerrln("Unable to create BreakIterator::createWordInstance (Locale::getEnglish) - %s", u_errorName(status)); | |
1106 | } else { | |
1107 | uint32_t length; | |
1108 | const uint8_t *rules = rb->getBinaryRules(length); | |
1109 | RuleBasedBreakIterator *rb2 = new RuleBasedBreakIterator(rules, length, status); | |
1110 | TEST_ASSERT_SUCCESS(status); | |
1111 | TEST_ASSERT(*rb == *rb2); | |
1112 | UnicodeString words = "one two three "; | |
1113 | rb2->setText(words); | |
1114 | int wordCounter = 0; | |
1115 | while (rb2->next() != UBRK_DONE) { | |
1116 | wordCounter++; | |
1117 | } | |
1118 | TEST_ASSERT(wordCounter == 6); | |
1119 | ||
1120 | status = U_ZERO_ERROR; | |
1121 | RuleBasedBreakIterator *rb3 = new RuleBasedBreakIterator(rules, length-1, status); | |
1122 | TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR); | |
1123 | ||
1124 | delete rb; | |
1125 | delete rb2; | |
1126 | delete rb3; | |
1127 | } | |
46f4442e A |
1128 | } |
1129 | ||
4388f060 A |
1130 | |
1131 | void RBBIAPITest::TestRefreshInputText() { | |
1132 | /* | |
1133 | * RefreshInput changes out the input of a Break Iterator without | |
1134 | * changing anything else in the iterator's state. Used with Java JNI, | |
1135 | * when Java moves the underlying string storage. This test | |
1136 | * runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence. | |
1137 | * The right set of boundaries should still be found. | |
1138 | */ | |
1139 | UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */ | |
1140 | UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; | |
1141 | UErrorCode status = U_ZERO_ERROR; | |
1142 | UText ut1 = UTEXT_INITIALIZER; | |
1143 | UText ut2 = UTEXT_INITIALIZER; | |
1144 | RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status); | |
1145 | TEST_ASSERT_SUCCESS(status); | |
1146 | ||
1147 | utext_openUChars(&ut1, testStr, -1, &status); | |
1148 | TEST_ASSERT_SUCCESS(status); | |
1149 | ||
1150 | if (U_SUCCESS(status)) { | |
1151 | bi->setText(&ut1, status); | |
1152 | TEST_ASSERT_SUCCESS(status); | |
1153 | ||
1154 | /* Line boundaries will occur before each letter in the original string */ | |
1155 | TEST_ASSERT(1 == bi->next()); | |
1156 | TEST_ASSERT(3 == bi->next()); | |
1157 | ||
1158 | /* Move the string, kill the original string. */ | |
1159 | u_strcpy(movedStr, testStr); | |
1160 | u_memset(testStr, 0x20, u_strlen(testStr)); | |
1161 | utext_openUChars(&ut2, movedStr, -1, &status); | |
1162 | TEST_ASSERT_SUCCESS(status); | |
1163 | RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status); | |
1164 | TEST_ASSERT_SUCCESS(status); | |
1165 | TEST_ASSERT(bi == returnedBI); | |
1166 | ||
1167 | /* Find the following matches, now working in the moved string. */ | |
1168 | TEST_ASSERT(5 == bi->next()); | |
1169 | TEST_ASSERT(7 == bi->next()); | |
1170 | TEST_ASSERT(8 == bi->next()); | |
1171 | TEST_ASSERT(UBRK_DONE == bi->next()); | |
1172 | ||
1173 | utext_close(&ut1); | |
1174 | utext_close(&ut2); | |
1175 | } | |
1176 | delete bi; | |
1177 | ||
1178 | } | |
1179 | ||
1180 | ||
b75a7d8f A |
1181 | //--------------------------------------------- |
1182 | // runIndexedTest | |
1183 | //--------------------------------------------- | |
1184 | ||
1185 | void RBBIAPITest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ ) | |
1186 | { | |
1187 | if (exec) logln((UnicodeString)"TestSuite RuleBasedBreakIterator API "); | |
1188 | switch (index) { | |
1189 | // case 0: name = "TestConstruction"; if (exec) TestConstruction(); break; | |
729e4ab9 | 1190 | #if !UCONFIG_NO_FILE_IO |
b75a7d8f A |
1191 | case 0: name = "TestCloneEquals"; if (exec) TestCloneEquals(); break; |
1192 | case 1: name = "TestgetRules"; if (exec) TestgetRules(); break; | |
1193 | case 2: name = "TestHashCode"; if (exec) TestHashCode(); break; | |
1194 | case 3: name = "TestGetSetAdoptText"; if (exec) TestGetSetAdoptText(); break; | |
1195 | case 4: name = "TestIteration"; if (exec) TestIteration(); break; | |
729e4ab9 A |
1196 | #else |
1197 | case 0: case 1: case 2: case 3: case 4: name = "skip"; break; | |
1198 | #endif | |
73c04bcf A |
1199 | case 5: name = "TestBuilder"; if (exec) TestBuilder(); break; |
1200 | case 6: name = "TestQuoteGrouping"; if (exec) TestQuoteGrouping(); break; | |
729e4ab9 A |
1201 | case 7: name = "TestRuleStatusVec"; if (exec) TestRuleStatusVec(); break; |
1202 | case 8: name = "TestBug2190"; if (exec) TestBug2190(); break; | |
1203 | #if !UCONFIG_NO_FILE_IO | |
1204 | case 9: name = "TestRegistration"; if (exec) TestRegistration(); break; | |
1205 | case 10: name = "TestBoilerPlate"; if (exec) TestBoilerPlate(); break; | |
1206 | case 11: name = "TestRuleStatus"; if (exec) TestRuleStatus(); break; | |
73c04bcf | 1207 | case 12: name = "TestRoundtripRules"; if (exec) TestRoundtripRules(); break; |
46f4442e | 1208 | case 13: name = "TestCreateFromRBBIData"; if (exec) TestCreateFromRBBIData(); break; |
729e4ab9 A |
1209 | #else |
1210 | case 9: case 10: case 11: case 12: case 13: name = "skip"; break; | |
1211 | #endif | |
4388f060 | 1212 | case 14: name = "TestRefreshInputText"; if (exec) TestRefreshInputText(); break; |
374ca955 A |
1213 | |
1214 | default: name = ""; break; // needed to end loop | |
b75a7d8f A |
1215 | } |
1216 | } | |
1217 | ||
1218 | //--------------------------------------------- | |
1219 | //Internal subroutines | |
1220 | //--------------------------------------------- | |
1221 | ||
1222 | void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){ | |
1223 | logln((UnicodeString)"testIsBoundary():"); | |
1224 | int32_t p = 0; | |
1225 | UBool isB; | |
1226 | for (int32_t i = 0; i < text.length(); i++) { | |
1227 | isB = bi.isBoundary(i); | |
1228 | logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB); | |
1229 | ||
1230 | if (i == boundaries[p]) { | |
1231 | if (!isB) | |
1232 | errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false"); | |
1233 | p++; | |
1234 | } | |
1235 | else { | |
1236 | if (isB) | |
1237 | errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true"); | |
1238 | } | |
1239 | } | |
1240 | } | |
1241 | void RBBIAPITest::doTest(UnicodeString& testString, int32_t start, int32_t gotoffset, int32_t expectedOffset, const char* expectedString){ | |
1242 | UnicodeString selected; | |
1243 | UnicodeString expected=CharsToUnicodeString(expectedString); | |
1244 | ||
1245 | if(gotoffset != expectedOffset) | |
1246 | errln((UnicodeString)"ERROR:****returned #" + gotoffset + (UnicodeString)" instead of #" + expectedOffset); | |
1247 | if(start <= gotoffset){ | |
46f4442e | 1248 | testString.extractBetween(start, gotoffset, selected); |
b75a7d8f A |
1249 | } |
1250 | else{ | |
1251 | testString.extractBetween(gotoffset, start, selected); | |
1252 | } | |
1253 | if(selected.compare(expected) != 0) | |
1254 | errln(prettify((UnicodeString)"ERROR:****selected \"" + selected + "\" instead of \"" + expected + "\"")); | |
1255 | else | |
1256 | logln(prettify("****selected \"" + selected + "\"")); | |
1257 | } | |
1258 | ||
46f4442e A |
1259 | //--------------------------------------------- |
1260 | //RBBIWithProtectedFunctions class functions | |
1261 | //--------------------------------------------- | |
1262 | ||
1263 | RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(RBBIDataHeader* data, UErrorCode &status) | |
1264 | : RuleBasedBreakIterator(data, status) | |
1265 | { | |
1266 | } | |
1267 | ||
1268 | RBBIWithProtectedFunctions::RBBIWithProtectedFunctions(const RBBIDataHeader* data, enum EDontAdopt, UErrorCode &status) | |
1269 | : RuleBasedBreakIterator(data, RuleBasedBreakIterator::kDontAdopt, status) | |
1270 | { | |
1271 | } | |
1272 | ||
b75a7d8f | 1273 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |