1 /********************************************************************
3 * Copyright (c) 1997-2001, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
12 * Madhu Katragadda Ported for C API, added extra functions and tests
13 *********************************************************************************
16 /* C FUNCTIONALITY AND REGRESSION TEST FOR BREAKITERATOR */
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_BREAK_ITERATION
24 #include "unicode/uloc.h"
25 #include "unicode/ubrk.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
33 /* -------------------------------------------------------------------------- */
35 * "Vector" structure for holding test tables
36 * (this strucure is actually a linked list, but we use the name and API of the
37 * java.util.Vector class to keep as much of our test code as possible the same.)
43 typedef struct Vector1 Vector
;
45 void addElement(Vector
*q
, const char* string
)
50 p
=(Vector
*)malloc(sizeof(Vector
));
51 p
->text
=(UChar
*)malloc(sizeof(UChar
) * (uprv_strlen(string
)+1));
52 u_uastrcpy(p
->text
, string
);
59 UChar
* addElement2(Vector
*q
, const UChar
* string
)
63 p
=(Vector
*)malloc(sizeof(Vector
));
64 p
->text
=(UChar
*)malloc(sizeof(UChar
) * (u_strlen(string
)+1));
65 u_strcpy(p
->text
, string
);
71 return (UChar
*)string
;
75 void cleanupVector(Vector
*q
) {
85 int32_t Count(Vector
*q
)
95 UChar
* elementAt(Vector
*q
, int32_t pos
)
104 /* Just to make it easier to use with UChar array.*/
106 UChar
* UCharToUCharArray(const UChar uchar
)
110 buffer
=(UChar
*)malloc(sizeof(uchar
) * 2);
119 UChar
* extractBetween(int32_t start
, int32_t end
, UChar
* text
)
123 temp
=(UChar
*)malloc(sizeof(UChar
) * ((u_strlen(text
)-start
)+1));
124 result
=(UChar
*)malloc(sizeof(UChar
) * ((end
-start
)+1));
125 u_strcpy(temp
, &text
[start
]);
126 u_strncpy(result
, temp
, end
-start
);
127 result
[end
-start
] = 0;
131 /* -------------------------------------------------------------------------------------- */
133 * BrealIterator Regression Test is medium top level test class for everything in the C BreakIterator API
134 * (ubrk.h and ubrk.c).
139 const UChar cannedTestArray
[] = {
140 0x0001, 0x0002, 0x0003, 0x0004, 0x0020, 0x0021, 0x005c, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028, 0x0029,
141 0x002b, 0x002d, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x003c, 0x003d, 0x003e, 0x0041, 0x0042, 0x0043, 0x0044,
142 0x0045, 0x005B, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x007b, 0x007d, 0x007c,
143 0x002c, 0x00a0, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00ab, 0x00ad, 0x00ae, 0x00af,
144 0x00b0, 0x00b2, 0x00b3, 0x00b4, 0x00b9, 0x00bb, 0x00bc, 0x00bd, 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x0300,
145 0x0301, 0x0302, 0x0303, 0x0304, 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x0903, 0x093e, 0x093f, 0x0940, 0x0949,
146 0x0f3a, 0x0f3b, 0x2000, 0x2001, 0x2002, 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, 0x2011, 0x2012, 0x2028, 0x2029,
147 0x202a, 0x203e, 0x203f, 0x2040, 0x20dd, 0x20de, 0x20df, 0x20e0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x0000
153 /*--------------------------------------------- */
155 /*--------------------------------------------- */
157 void AllocateTextBoundary()
160 cannedTestChars
=(UChar
*)malloc(sizeof(UChar
) * (u_strlen(cannedTestArray
) + 10));
161 u_uastrcpy(cannedTestChars
,"");
162 u_uastrcpy(cannedTestChars
,"0x0000");
163 u_strcat(cannedTestChars
, cannedTestArray
);
167 void FreeTextBoundary()
169 free(cannedTestChars
);
173 void addTestWordData()
178 wordSelectionData
=(Vector
*)malloc(sizeof(Vector
));
179 wordSelectionData
->text
=(UChar
*)malloc(sizeof(UChar
) * 6);
180 u_uastrcpy(wordSelectionData
->text
, "12,34");
181 wordSelectionData
->link
=NULL
;
183 addElement(wordSelectionData
, " ");
184 free(addElement2(wordSelectionData
, UCharToUCharArray((UChar
)(0x00A2)))); /*cent sign */
185 free(addElement2(wordSelectionData
, UCharToUCharArray((UChar
)(0x00A3)))); /*pound sign */
186 free(addElement2(wordSelectionData
, UCharToUCharArray((UChar
)(0x00A4)))); /*currency sign */
187 free(addElement2(wordSelectionData
, UCharToUCharArray((UChar
)(0x00A5)))); /*yen sign */
188 addElement(wordSelectionData
, "alpha-beta-gamma");
189 addElement(wordSelectionData
, ".");
190 addElement(wordSelectionData
, " ");
191 addElement(wordSelectionData
, "Badges");
192 addElement(wordSelectionData
, "?");
193 addElement(wordSelectionData
, " ");
194 addElement(wordSelectionData
, "BADGES");
195 addElement(wordSelectionData
, "!");
196 addElement(wordSelectionData
, "?");
197 addElement(wordSelectionData
, "!");
198 addElement(wordSelectionData
, " ");
199 addElement(wordSelectionData
, "We");
200 addElement(wordSelectionData
, " ");
201 addElement(wordSelectionData
, "don't");
202 addElement(wordSelectionData
, " ");
203 addElement(wordSelectionData
, "need");
204 addElement(wordSelectionData
, " ");
205 addElement(wordSelectionData
, "no");
206 addElement(wordSelectionData
, " ");
207 addElement(wordSelectionData
, "STINKING");
208 addElement(wordSelectionData
, " ");
209 addElement(wordSelectionData
, "BADGES");
210 addElement(wordSelectionData
, "!");
211 addElement(wordSelectionData
, "!");
212 addElement(wordSelectionData
, "!");
214 addElement(wordSelectionData
, "012.566,5");
215 addElement(wordSelectionData
, " ");
216 addElement(wordSelectionData
, "123.3434,900");
217 addElement(wordSelectionData
, " ");
218 addElement(wordSelectionData
, "1000,233,456.000");
219 addElement(wordSelectionData
, " ");
220 addElement(wordSelectionData
, "1,23.322%");
221 addElement(wordSelectionData
, " ");
222 addElement(wordSelectionData
, "123.1222");
224 addElement(wordSelectionData
, " ");
225 addElement(wordSelectionData
, "$123,000.20");
227 addElement(wordSelectionData
, " ");
228 addElement(wordSelectionData
, "179.01%");
230 addElement(wordSelectionData
, "Hello");
231 addElement(wordSelectionData
, ",");
232 addElement(wordSelectionData
, " ");
233 addElement(wordSelectionData
, "how");
234 addElement(wordSelectionData
, " ");
235 addElement(wordSelectionData
, "are");
236 addElement(wordSelectionData
, " ");
237 addElement(wordSelectionData
, "you");
238 addElement(wordSelectionData
, " ");
239 addElement(wordSelectionData
, "X");
240 addElement(wordSelectionData
, " ");
242 addElement(wordSelectionData
, "Now");
243 addElement(wordSelectionData
, "\r");
244 addElement(wordSelectionData
, "is");
245 addElement(wordSelectionData
, "\n");
246 addElement(wordSelectionData
, "the");
247 addElement(wordSelectionData
, "\r\n");
248 addElement(wordSelectionData
, "time");
249 addElement(wordSelectionData
, "\n");
250 addElement(wordSelectionData
, "\r");
251 addElement(wordSelectionData
, "for");
252 addElement(wordSelectionData
, "\r");
253 addElement(wordSelectionData
, "\r");
254 addElement(wordSelectionData
, "all");
255 addElement(wordSelectionData
, " ");
257 /* to test for bug #4097779 */
258 free(addElement2(wordSelectionData
, CharsToUChars("aa\\u0300a")));
259 addElement(wordSelectionData
, " ");
261 /* to test for bug #4098467
262 What follows is a string of Korean characters (I found it in the Yellow Pages
263 ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
264 it correctly), first as precomposed syllables, and then as conjoining jamo.
265 Both sequences should be semantically identical and break the same way.
266 precomposed syllables... */
267 free(addElement2(wordSelectionData
, CharsToUChars("\\uc0c1\\ud56d")));
268 addElement(wordSelectionData
, " ");
269 free(addElement2(wordSelectionData
, CharsToUChars("\\ud55c\\uc778")));
270 addElement(wordSelectionData
, " ");
271 free(addElement2(wordSelectionData
, CharsToUChars("\\uc5f0\\ud569")));
272 addElement(wordSelectionData
, " ");
273 free(addElement2(wordSelectionData
, CharsToUChars("\\uc7a5\\ub85c\\uad50\\ud68c")));
274 addElement(wordSelectionData
, " ");
275 /* conjoining jamo... */
276 free(addElement2(wordSelectionData
, CharsToUChars("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc")));
277 addElement(wordSelectionData
, " ");
278 free(addElement2(wordSelectionData
, CharsToUChars("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab")));
279 addElement(wordSelectionData
, " ");
280 free(addElement2(wordSelectionData
, CharsToUChars("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8")));
281 addElement(wordSelectionData
, " ");
282 free(addElement2(wordSelectionData
, CharsToUChars("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c")));
283 addElement(wordSelectionData
, " ");
285 /* this is a test for bug #4117554: the ideographic iteration mark (U+3005) should
286 count as a Kanji character for the purposes of word breaking */
287 addElement(wordSelectionData
, "abc");
288 free(addElement2(wordSelectionData
, CharsToUChars("\\u4e01\\u4e02\\u3005\\u4e03\\u4e03")));
289 addElement(wordSelectionData
, "abc");
291 elems
= Count(wordSelectionData
);
292 log_verbose("In word, the no: of words are: %d\n", elems
);
293 testWordText
= createTestData(wordSelectionData
, elems
);
298 const UChar kParagraphSeparator
= 0x2029;
299 const UChar kLineSeparator
= 0x2028;
304 void addTestSentenceData()
310 sentenceSelectionData
=(Vector
*)malloc(sizeof(Vector
));
311 sentenceSelectionData
->text
=(UChar
*)malloc(sizeof(UChar
) * (strlen("This is a simple sample sentence. ")+1));
312 u_uastrcpy(sentenceSelectionData
->text
, "This is a simple sample sentence. ");
313 sentenceSelectionData
->link
=NULL
;
315 /* addElement(sentenceSelectionData, "This is a simple sample sentence. "); */
316 addElement(sentenceSelectionData
, "(This is it.) ");
317 addElement(sentenceSelectionData
, "This is a simple sample sentence. ");
318 addElement(sentenceSelectionData
, "\"This isn\'t it.\" ");
319 addElement(sentenceSelectionData
, "Hi! ");
320 addElement(sentenceSelectionData
, "This is a simple sample sentence. ");
321 addElement(sentenceSelectionData
, "It does not have to make any sense as you can see. ");
322 addElement(sentenceSelectionData
, "Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");
323 addElement(sentenceSelectionData
, "Che la dritta via aveo smarrita. ");
324 addElement(sentenceSelectionData
, "He said, that I said, that you said!! ");
326 u_uastrcpy(temp
, "Don't rock the boat");
327 td
= UCharToUCharArray(kParagraphSeparator
);
330 addElement2(sentenceSelectionData
, temp
);
332 addElement(sentenceSelectionData
, "Because I am the daddy, that is why. ");
333 addElement(sentenceSelectionData
, "Not on my time (el timo.)! ");
335 u_uastrcpy(temp
, "So what!!");
336 td
= UCharToUCharArray(kParagraphSeparator
);
339 addElement2(sentenceSelectionData
, temp
);
341 addElement(sentenceSelectionData
, "\"But now,\" he said, \"I know!\" ");
342 addElement(sentenceSelectionData
, "Harris thumbed down several, including \"Away We Go\" (which became the huge success Oklahoma!). ");
343 addElement(sentenceSelectionData
, "One species, B. anthracis, is highly virulent.\n");
344 addElement(sentenceSelectionData
, "Wolf said about Sounder:\"Beautifully thought-out and directed.\" ");
345 addElement(sentenceSelectionData
, "Have you ever said, \"This is where\tI shall live\"? ");
346 addElement(sentenceSelectionData
, "He answered, \"You may not!\" ");
347 addElement(sentenceSelectionData
, "Another popular saying is: \"How do you do?\". ");
348 addElement(sentenceSelectionData
, "Yet another popular saying is: \'I\'m fine thanks.\' ");
349 addElement(sentenceSelectionData
, "What is the proper use of the abbreviation pp.? ");
350 addElement(sentenceSelectionData
, "Yes, I am definatelly 12\" tall!!");
352 /* test for bug #4113835: \n and \r count as spaces, not as paragraph breaks */
353 u_uastrcpy(temp
, "Now\ris\nthe\r\ntime\n\rfor\r\rall");
354 td
= UCharToUCharArray(kParagraphSeparator
);
357 addElement2(sentenceSelectionData
, temp
);
359 /* test for bug #4117554: Treat fullwidth variants of .!? the same as their
360 normal counterparts */
361 free(addElement2(sentenceSelectionData
, CharsToUChars("I know I'm right\\uff0e ")));
362 free(addElement2(sentenceSelectionData
, CharsToUChars("Right\\uff1f ")));
363 free(addElement2(sentenceSelectionData
, CharsToUChars("Right\\uff01 ")));
365 /* test for bug #4117554: Break sentence between a sentence terminator and
366 opening punctuation */
367 addElement(sentenceSelectionData
, "no?");
368 u_uastrcpy(temp
, "(yes)");
369 td
= CharsToUChars("\\u2029");
372 addElement2(sentenceSelectionData
, temp
);
374 /* test for bug #4158381: Don't break sentence after period if it isn't
375 followed by a space */
376 addElement(sentenceSelectionData
, "Test <code>Flags.Flag</code> class. ");
377 u_uastrcpy(temp
, "Another test.");
378 td
= CharsToUChars("\\u2029");
381 addElement2(sentenceSelectionData
, temp
);
383 /* test for bug #4158381: No breaks when there are no terminators around */
384 addElement(sentenceSelectionData
, "<P>Provides a set of "lightweight" (all-java<FONT SIZE=\"-2\"><SUP>TM</SUP></FONT> language) components that, to the maximum degree possible, work the same on all platforms. ");
385 u_uastrcpy(temp
, "Another test.");
386 td
= CharsToUChars("\\u2029");
389 addElement2(sentenceSelectionData
, temp
);
391 /* test for bug #4143071: Make sure sentences that end with digits work right */
392 addElement(sentenceSelectionData
, "Today is the 27th of May, 1998. ");
393 addElement(sentenceSelectionData
, "Tomorrow with be 28 May 1998. ");
394 u_uastrcpy(temp
, "The day after will be the 30th.");
395 td
= CharsToUChars("\\u2029");
398 addElement2(sentenceSelectionData
, temp
);
400 /* test for bug #4152416: Make sure sentences ending with a capital
401 letter are treated correctly */
402 addElement(sentenceSelectionData
, "The type of all primitive <code>boolean</code> values accessed in the target VM. ");
403 u_uastrcpy(temp
, "Calls to xxx will return an implementor of this interface.");
404 td
= CharsToUChars("\\u2029");
407 addElement2(sentenceSelectionData
, temp
);
410 /* test for bug #4152117: Make sure sentence breaking is handling
411 punctuation correctly */
412 addElement(sentenceSelectionData
, "Constructs a randomly generated BigInteger, uniformly distributed over the range <tt>0</tt> to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. ");
413 addElement(sentenceSelectionData
, "The uniformity of the distribution assumes that a fair source of random bits is provided in <tt>rnd</tt>. ");
414 u_uastrcpy(temp
, "Note that this constructor always constructs a non-negative BigInteger.");
415 td
= CharsToUChars("\\u2029");
418 addElement2(sentenceSelectionData
, temp
);
420 elems
= Count(sentenceSelectionData
);
421 log_verbose("In sentence: the no: of sentences are %d\n", elems
);
422 testSentenceText
= createTestData(sentenceSelectionData
, elems
);
431 void addTestLineData()
435 lineSelectionData
=(Vector
*)malloc(sizeof(Vector
));
436 lineSelectionData
->text
=(UChar
*)malloc(sizeof(UChar
) * 7);
437 u_uastrcpy(lineSelectionData
->text
, "Multi-");
438 lineSelectionData
->link
=NULL
;
440 /* lineSelectionData->addElement("Multi-"); */
441 addElement(lineSelectionData
, "Level ");
442 addElement(lineSelectionData
, "example ");
443 addElement(lineSelectionData
, "of ");
444 addElement(lineSelectionData
, "a ");
445 addElement(lineSelectionData
, "semi-");
446 addElement(lineSelectionData
, "idiotic ");
447 addElement(lineSelectionData
, "non-");
448 addElement(lineSelectionData
, "sensical ");
449 addElement(lineSelectionData
, "(non-");
450 addElement(lineSelectionData
, "important) ");
451 addElement(lineSelectionData
, "sentence. ");
453 addElement(lineSelectionData
, "Hi ");
454 addElement(lineSelectionData
, "Hello ");
455 addElement(lineSelectionData
, "How\n");
456 addElement(lineSelectionData
, "are\r");
459 free(addElement2(lineSelectionData
, CharsToUChars("you\\u2028"))); /* lineSeperator */
461 addElement(lineSelectionData
, "fine.\t");
462 addElement(lineSelectionData
, "good. ");
464 addElement(lineSelectionData
, "Now\r");
465 addElement(lineSelectionData
, "is\n");
466 addElement(lineSelectionData
, "the\r\n");
467 addElement(lineSelectionData
, "time\n");
468 addElement(lineSelectionData
, "\r");
469 addElement(lineSelectionData
, "for\r");
470 addElement(lineSelectionData
, "\r");
471 addElement(lineSelectionData
, "all ");
473 /* to test for bug #4068133 */
474 free(addElement2(lineSelectionData
, CharsToUChars("\\u96f6")));
475 free(addElement2(lineSelectionData
, CharsToUChars("\\u4e00\\u3002")));
476 free(addElement2(lineSelectionData
, CharsToUChars("\\u4e8c\\u3001")));
477 free(addElement2(lineSelectionData
, CharsToUChars("\\u4e09\\u3002\\u3001")));
478 free(addElement2(lineSelectionData
, CharsToUChars("\\u56db\\u3001\\u3002\\u3001")));
481 free(addElement2(lineSelectionData
, CharsToUChars("\\u4e94,")));
483 free(addElement2(lineSelectionData
, CharsToUChars("\\u516d.")));
485 free(addElement2(lineSelectionData
, CharsToUChars("\\u4e03.\\u3001,\\u3002")));
486 free(addElement2(lineSelectionData
, CharsToUChars("\\u516b")));
488 /* to test for bug #4086052 */
489 free(addElement2(lineSelectionData
, CharsToUChars("foo\\u00a0bar ")));
491 /* to test for bug #4097920 */
492 addElement(lineSelectionData
, "dog,");
493 addElement(lineSelectionData
, "cat,");
494 addElement(lineSelectionData
, "mouse ");
495 addElement(lineSelectionData
, "(one)");
496 addElement(lineSelectionData
, "(two)\n");
498 /* to test for bug #4035266 */
499 addElement(lineSelectionData
, "The ");
500 addElement(lineSelectionData
, "balance ");
501 addElement(lineSelectionData
, "is ");
502 addElement(lineSelectionData
, "$-23,456.78, ");
503 addElement(lineSelectionData
, "not ");
504 addElement(lineSelectionData
, "-$32,456.78!\n");
506 /* to test for bug #4098467
507 What follows is a string of Korean characters (I found it in the Yellow Pages
508 ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
509 it correctly), first as precomposed syllables, and then as conjoining jamo.
510 Both sequences should be semantically identical and break the same way.
511 precomposed syllables... */
512 free(addElement2(lineSelectionData
, CharsToUChars("\\uc0c1\\ud56d ")));
513 free(addElement2(lineSelectionData
, CharsToUChars("\\ud55c\\uc778 ")));
514 free(addElement2(lineSelectionData
, CharsToUChars("\\uc5f0\\ud569 ")));
515 free(addElement2(lineSelectionData
, CharsToUChars("\\uc7a5\\ub85c\\uad50\\ud68c ")));
516 /* conjoining jamo... */
517 free(addElement2(lineSelectionData
, CharsToUChars("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc ")));
518 free(addElement2(lineSelectionData
, CharsToUChars("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab ")));
519 free(addElement2(lineSelectionData
, CharsToUChars("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8 ")));
520 free(addElement2(lineSelectionData
, CharsToUChars("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c")));
522 /* to test for bug #4117554: Fullwidth .!? should be treated as postJwrd */
523 free(addElement2(lineSelectionData
, CharsToUChars("\\u4e01\\uff0e")));
524 free(addElement2(lineSelectionData
, CharsToUChars("\\u4e02\\uff01")));
525 free(addElement2(lineSelectionData
, CharsToUChars("\\u4e03\\uff1f")));
527 elems
= Count(lineSelectionData
);
528 log_verbose("In line: the no: of lines are %d\n", elems
);
529 testLineText
= createTestData(lineSelectionData
, elems
);
536 const UChar* graveS = "S" + (UniChar)0x0300;
537 const UChar* acuteBelowI = "i" + UCharToUCharArray(0x0317);
538 const UChar* acuteE = "e" + UCharToUCharArray(0x0301);
539 const UChar* circumflexA = "a" + UCharToUCharArray(0x0302);
540 const UChar* tildeE = "e" + UCharToUCharArray(0x0303);
546 void addTestCharacterData()
552 characterSelectionData
=(Vector
*)malloc(sizeof(Vector
));
553 characterSelectionData
->text
=(UChar
*)malloc(sizeof(UChar
) * 2);
554 u_uastrcpy(characterSelectionData
->text
, "B");
555 characterSelectionData
->link
=NULL
;
557 u_uastrcpy(temp
, "S");
558 td
= UCharToUCharArray(0x0317);
561 /*u_strcat(temp, UCharToUCharArray(0x0317));*/
562 addElement2(characterSelectionData
, temp
); /* graveS */
564 u_uastrcpy(temp
, "i");
565 td
= UCharToUCharArray(0x0301);
568 /*u_strcat(temp, UCharToUCharArray(0x0301));*/
569 addElement2(characterSelectionData
, temp
); /* acuteBelowI */
571 addElement(characterSelectionData
, "m");
572 addElement(characterSelectionData
, "p");
573 addElement(characterSelectionData
, "l");
575 u_uastrcpy(temp
, "e");
576 td
= UCharToUCharArray(0x0301);
579 addElement2(characterSelectionData
, temp
);/* acuteE */
581 addElement(characterSelectionData
, " ");
582 addElement(characterSelectionData
, "s");
584 u_uastrcpy(temp
, "a");
585 td
= UCharToUCharArray(0x0302);
588 addElement2(characterSelectionData
, temp
);/* circumflexA */
590 addElement(characterSelectionData
, "m");
591 addElement(characterSelectionData
, "p");
592 addElement(characterSelectionData
, "l");
594 u_uastrcpy(temp
, "e");
595 td
= UCharToUCharArray(0x0303);
598 addElement2(characterSelectionData
, temp
); /* tildeE */
600 addElement(characterSelectionData
, ".");
601 addElement(characterSelectionData
, "w");
603 u_uastrcpy(temp
, "a");
604 td
= UCharToUCharArray(0x0302);
607 addElement2(characterSelectionData
, temp
);/* circumflexA */
609 addElement(characterSelectionData
, "w");
610 addElement(characterSelectionData
, "a");
611 addElement(characterSelectionData
, "f");
612 addElement(characterSelectionData
, "q");
613 addElement(characterSelectionData
, "\n");
614 addElement(characterSelectionData
, "\r");
615 addElement(characterSelectionData
, "\r\n");
616 addElement(characterSelectionData
, "\n");
617 addElement(characterSelectionData
, "E");
618 /* to test for bug #4098467
619 What follows is a string of Korean characters (I found it in the Yellow Pages
620 ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
621 it correctly), first as precomposed syllables, and then as conjoining jamo.
622 Both sequences should be semantically identical and break the same way.
623 precomposed syllables... */
624 free(addElement2(characterSelectionData
, CharsToUChars("\\uc0c1")));
625 free(addElement2(characterSelectionData
, CharsToUChars("\\ud56d")));
626 addElement(characterSelectionData
, " ");
627 free(addElement2(characterSelectionData
, CharsToUChars("\\ud55c")));
628 free(addElement2(characterSelectionData
, CharsToUChars("\\uc778")));
629 addElement(characterSelectionData
, " ");
630 free(addElement2(characterSelectionData
, CharsToUChars("\\uc5f0")));
631 free(addElement2(characterSelectionData
, CharsToUChars("\\ud569")));
632 addElement(characterSelectionData
, " ");
633 free(addElement2(characterSelectionData
, CharsToUChars("\\uc7a5")));
634 free(addElement2(characterSelectionData
, CharsToUChars("\\ub85c")));
635 free(addElement2(characterSelectionData
, CharsToUChars("\\uad50")));
636 free(addElement2(characterSelectionData
, CharsToUChars("\\ud68c")));
637 addElement(characterSelectionData
, " ");
638 /* conjoining jamo... */
639 free(addElement2(characterSelectionData
, CharsToUChars("\\u1109\\u1161\\u11bc")));
640 free(addElement2(characterSelectionData
, CharsToUChars("\\u1112\\u1161\\u11bc")));
641 addElement(characterSelectionData
, " ");
642 free(addElement2(characterSelectionData
, CharsToUChars("\\u1112\\u1161\\u11ab")));
643 free(addElement2(characterSelectionData
, CharsToUChars("\\u110b\\u1175\\u11ab")));
644 addElement(characterSelectionData
, " ");
645 free(addElement2(characterSelectionData
, CharsToUChars("\\u110b\\u1167\\u11ab")));
646 free(addElement2(characterSelectionData
, CharsToUChars("\\u1112\\u1161\\u11b8")));
647 addElement(characterSelectionData
, " ");
648 free(addElement2(characterSelectionData
, CharsToUChars("\\u110c\\u1161\\u11bc")));
649 free(addElement2(characterSelectionData
, CharsToUChars("\\u1105\\u1169")));
650 free(addElement2(characterSelectionData
, CharsToUChars("\\u1100\\u116d")));
651 free(addElement2(characterSelectionData
, CharsToUChars("\\u1112\\u116c")));
653 elems
= Count(characterSelectionData
);
654 log_verbose("In character: the no: of characters are %d", elems
);
655 testCharacterText
= createTestData(characterSelectionData
, elems
);
658 UChar
* createTestData(Vector
*select
, int32_t e
)
662 result
=(UChar
*)malloc(sizeof(UChar
) * 2);
663 u_uastrcpy(result
, "");
666 len
=u_strlen(result
)+1;
667 result
=(UChar
*)realloc(result
, sizeof(UChar
) * (len
+ u_strlen(elementAt(select
,i
))));
668 u_strcat(result
, elementAt(select
,i
));
675 /*---------------------------------------------
677 --------------------------------------------- */
679 void TestForwardSentenceSelection()
681 UErrorCode status
= U_ZERO_ERROR
;
683 addTestSentenceData();
684 e
= ubrk_open(UBRK_SENTENCE
, "en_US", testSentenceText
, u_strlen(testSentenceText
), &status
);
685 if(U_FAILURE(status
)){
686 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
689 /* sample(e, testSentenceText); */
690 log_verbose("Testing forward sentence selection.....\n");
691 doForwardSelectionTest(e
, testSentenceText
, sentenceSelectionData
);
693 cleanupVector(sentenceSelectionData
);
694 free(testSentenceText
);
695 /*free(sentenceSelectionData);*/
698 void TestFirstSentenceSelection()
700 UErrorCode status
= U_ZERO_ERROR
;
702 addTestSentenceData();
703 e
= ubrk_open(UBRK_SENTENCE
, "en_US", testSentenceText
, u_strlen(testSentenceText
), &status
);
704 if(U_FAILURE(status
)){
705 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
708 log_verbose("Testing first sentence selection.....\n");
709 doFirstSelectionTest(e
, testSentenceText
, sentenceSelectionData
);
711 cleanupVector(sentenceSelectionData
);
712 free(testSentenceText
);
713 /*free(sentenceSelectionData);*/
716 void TestLastSentenceSelection()
718 UErrorCode status
= U_ZERO_ERROR
;
720 addTestSentenceData();
721 e
= ubrk_open(UBRK_SENTENCE
, "en_US", testSentenceText
, u_strlen(testSentenceText
), &status
);
722 if(U_FAILURE(status
)){
723 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
726 log_verbose("Testing last sentence selection.....\n");
727 doLastSelectionTest(e
, testSentenceText
, sentenceSelectionData
);
729 cleanupVector(sentenceSelectionData
);
730 free(testSentenceText
);
731 /*free(sentenceSelectionData);*/
734 void TestBackwardSentenceSelection()
736 UErrorCode status
= U_ZERO_ERROR
;
738 addTestSentenceData();
739 e
= ubrk_open(UBRK_SENTENCE
, "en_US", testSentenceText
, u_strlen(testSentenceText
), &status
);
740 if(U_FAILURE(status
)){
741 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
744 log_verbose("Testing backward sentence selection.....\n");
745 doBackwardSelectionTest(e
, testSentenceText
, sentenceSelectionData
);
747 cleanupVector(sentenceSelectionData
);
748 free(testSentenceText
);
749 /*free(sentenceSelectionData);*/
752 void TestForwardSentenceIndexSelection()
754 UErrorCode status
= U_ZERO_ERROR
;
756 addTestSentenceData();
757 e
= ubrk_open(UBRK_SENTENCE
, "en_US", testSentenceText
, u_strlen(testSentenceText
), &status
);
758 if(U_FAILURE(status
)){
759 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
762 log_verbose("Testing sentence forward index selection.....\n");
763 doForwardIndexSelectionTest(e
, testSentenceText
, sentenceSelectionData
);
765 cleanupVector(sentenceSelectionData
);
766 free(testSentenceText
);
767 /*free(sentenceSelectionData);*/
770 void TestBackwardSentenceIndexSelection()
772 UErrorCode status
= U_ZERO_ERROR
;
774 addTestSentenceData();
775 e
= ubrk_open(UBRK_SENTENCE
, "en_US", testSentenceText
, u_strlen(testSentenceText
), &status
);
776 if(U_FAILURE(status
)){
777 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
780 log_verbose("Testing sentence backward index selection.....\n");
781 doBackwardIndexSelectionTest(e
, testSentenceText
, sentenceSelectionData
);
783 cleanupVector(sentenceSelectionData
);
784 free(testSentenceText
);
785 /*free(sentenceSelectionData);*/
789 void TestSentenceInvariants()
794 AllocateTextBoundary();
795 x
=u_strlen(cannedTestChars
);
796 s
=(UChar
*)malloc(sizeof(UChar
) * (x
+ 15));
797 u_strcpy(s
, cannedTestChars
);
798 tempStr
= CharsToUChars(".,\\u3001\\u3002\\u3041\\u3042\\u3043\\ufeff");
799 u_strcat(s
, tempStr
);
801 log_verbose("Testing sentence Other invariants.....\n");
802 doOtherInvariantTest(UBRK_SENTENCE
, s
);
807 /*---------------------------------------------
809 --------------------------------------------- */
811 void TestForwardWordSelection()
813 UErrorCode status
= U_ZERO_ERROR
;
816 e
= ubrk_open(UBRK_WORD
, "en_US", testWordText
, u_strlen(testWordText
), &status
);
817 if(U_FAILURE(status
)){
818 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
821 /* sample(e, testWordText); */
822 log_verbose("Testing forward word selection.....\n");
823 doForwardSelectionTest(e
, testWordText
, wordSelectionData
);
824 doForwardSelectionTest(e
, testWordText
, wordSelectionData
);
826 cleanupVector(wordSelectionData
);
828 /*free(wordSelectionData);*/
831 void TestFirstWordSelection()
833 UErrorCode status
= U_ZERO_ERROR
;
836 e
= ubrk_open(UBRK_WORD
, "en_US", testWordText
, u_strlen(testWordText
), &status
);
837 if(U_FAILURE(status
)){
838 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
841 log_verbose("Testing first word selection.....\n");
842 doFirstSelectionTest(e
, testWordText
, wordSelectionData
);
844 cleanupVector(wordSelectionData
);
846 /*free(wordSelectionData);*/
849 void TestLastWordSelection()
851 UErrorCode status
= U_ZERO_ERROR
;
854 e
= ubrk_open(UBRK_WORD
, "en_US", testWordText
, u_strlen(testWordText
), &status
);
855 if(U_FAILURE(status
)){
856 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
859 log_verbose("Testing last word selection.....\n");
860 doLastSelectionTest(e
, testWordText
, wordSelectionData
);
862 cleanupVector(wordSelectionData
);
864 /*free(wordSelectionData);*/
867 void TestBackwardWordSelection()
869 UErrorCode status
= U_ZERO_ERROR
;
872 e
= ubrk_open(UBRK_WORD
, "en_US", testWordText
, u_strlen(testWordText
), &status
);
873 if(U_FAILURE(status
)){
874 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
877 log_verbose("Testing backward word selection.....\n");
878 doBackwardSelectionTest(e
, testWordText
, wordSelectionData
);
880 cleanupVector(wordSelectionData
);
882 /*free(wordSelectionData);*/
885 void TestForwardWordIndexSelection()
887 UErrorCode status
= U_ZERO_ERROR
;
890 e
= ubrk_open(UBRK_WORD
, "en_US", testWordText
, u_strlen(testWordText
), &status
);
891 if(U_FAILURE(status
)){
892 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
895 log_verbose("Testing forward word index selection.....\n");
896 doForwardIndexSelectionTest(e
, testWordText
, wordSelectionData
);
898 cleanupVector(wordSelectionData
);
900 /*free(wordSelectionData);*/
903 void TestBackwardWordIndexSelection()
905 UErrorCode status
= U_ZERO_ERROR
;
908 e
= ubrk_open(UBRK_WORD
, "en_US", testWordText
, u_strlen(testWordText
), &status
);
909 if(U_FAILURE(status
)){
910 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
913 log_verbose("Testing backward word index selection.....\n");
914 doBackwardIndexSelectionTest(e
, testWordText
, wordSelectionData
);
916 cleanupVector(wordSelectionData
);
918 /*free(wordSelectionData);*/
921 void TestWordInvariants()
926 AllocateTextBoundary();
927 x
=u_strlen(cannedTestChars
);
928 s
=(UChar
*)malloc(sizeof(UChar
) * (x
+ 15));
929 u_strcpy(s
, cannedTestChars
);
930 tempStr
= CharsToUChars("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02");
931 u_strcat(s
, tempStr
);
933 log_verbose("Testing word break invariant.....\n");
934 doBreakInvariantTest(UBRK_WORD
, s
);
935 u_strcpy(s
, cannedTestChars
);
936 tempStr
= CharsToUChars("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02");
937 u_strcat(s
, tempStr
);
939 doOtherInvariantTest(UBRK_WORD
, s
);
944 /*---------------------------------------------
946 --------------------------------------------- */
948 void TestForwardLineSelection()
950 UErrorCode status
= U_ZERO_ERROR
;
953 e
= ubrk_open(UBRK_LINE
, "en_US", testLineText
, u_strlen(testLineText
), &status
);
954 if(U_FAILURE(status
)){
955 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
958 log_verbose("Testing forward line selection.....\n");
959 doForwardSelectionTest(e
, testLineText
, lineSelectionData
);
961 cleanupVector(lineSelectionData
);
965 void TestFirstLineSelection()
967 UErrorCode status
= U_ZERO_ERROR
;
970 e
= ubrk_open(UBRK_LINE
, "en_US", testLineText
, u_strlen(testLineText
), &status
);
971 if(U_FAILURE(status
)){
972 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
975 log_verbose("Testing first line selection.....\n");
976 doFirstSelectionTest(e
, testLineText
, lineSelectionData
);
978 cleanupVector(lineSelectionData
);
980 /*free(lineSelectionData);*/
983 void TestLastLineSelection()
985 UErrorCode status
= U_ZERO_ERROR
;
988 e
= ubrk_open(UBRK_LINE
, "en_US", testLineText
, u_strlen(testLineText
), &status
);
989 if(U_FAILURE(status
)){
990 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
993 log_verbose("Testing last line selection.....\n");
994 doLastSelectionTest(e
, testLineText
, lineSelectionData
);
996 cleanupVector(lineSelectionData
);
998 /*free(lineSelectionData);*/
1001 void TestBackwardLineSelection()
1003 UErrorCode status
= U_ZERO_ERROR
;
1006 e
= ubrk_open(UBRK_LINE
, "en_US", testLineText
, u_strlen(testLineText
), &status
);
1007 if(U_FAILURE(status
)){
1008 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1011 log_verbose("Testing backward line selection.....\n");
1012 doBackwardSelectionTest(e
, testLineText
, lineSelectionData
);
1014 cleanupVector(lineSelectionData
);
1016 /*free(lineSelectionData);*/
1019 void TestForwardLineIndexSelection()
1021 UErrorCode status
= U_ZERO_ERROR
;
1024 e
= ubrk_open(UBRK_LINE
, "en_US", testLineText
, u_strlen(testLineText
), &status
);
1025 if(U_FAILURE(status
)){
1026 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1029 log_verbose("Testing forward line index selection.....\n");
1030 doForwardIndexSelectionTest(e
, testLineText
, lineSelectionData
);
1032 cleanupVector(lineSelectionData
);
1034 /*free(lineSelectionData);*/
1037 void TestBackwardLineIndexSelection()
1039 UErrorCode status
= U_ZERO_ERROR
;
1042 e
= ubrk_open(UBRK_LINE
, "en_US", testLineText
, u_strlen(testLineText
), &status
);
1043 if(U_FAILURE(status
)){
1044 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1047 log_verbose("Testing backward line index selection.....\n");
1048 doBackwardIndexSelectionTest(e
, testLineText
, lineSelectionData
);
1050 cleanupVector(lineSelectionData
);
1052 /*free(lineSelectionData);*/
1055 void TestLineInvariants()
1061 UErrorCode status
= U_ZERO_ERROR
;
1062 UChar noBreak
[10], dashes
[10];
1068 AllocateTextBoundary();
1069 s
=(UChar
*)malloc(sizeof(UChar
) * (u_strlen(cannedTestChars
) + 20));
1070 u_strcpy(s
, cannedTestChars
);
1071 ustr
= CharsToUChars(".,;:\\u3001\\u3002\\u3041\\u3042\\u3043\\u3044\\u3045\\u30a3\\u4e00\\u4e01\\u4e02");
1074 log_verbose("Testing line break Invariant.....\n");
1075 doBreakInvariantTest(UBRK_LINE
, s
);
1076 log_verbose("Testing line other Invariant....\n");
1077 doOtherInvariantTest(UBRK_LINE
, s
);
1081 u_uastrcpy(work
, "aaa");
1082 /* in addition to the other invariants, a line-break iterator should make sure that:
1083 it doesn't break around the non-breaking characters */
1084 e
= ubrk_open(UBRK_LINE
, "en_US", work
, u_strlen(work
), &status
);
1086 status
=U_ZERO_ERROR
;
1087 ustr
= CharsToUChars("\\u00a0\\u2007\\u2011\\ufeff");
1088 u_strcpy(noBreak
, ustr
);
1091 for (i
= 0; i
< sLen
; i
++) {
1093 if (c
== '\r' || c
== '\n' || c
== 0x2029 || c
== 0x2028 || c
== 0x0003)
1096 for (j
= 0; j
< u_strlen(noBreak
); j
++) {
1097 work
[1] = noBreak
[j
];
1098 for (k
= 0; k
< sLen
; k
++) {
1100 ubrk_setText(e
, work
, u_strlen(work
), &status
);
1101 if(U_FAILURE(status
)){
1102 log_err("FAIL: Error in opening the word break Iterator in testLineInvaiants:\n %s\n", myErrorName(status
));
1105 for (l
= ubrk_first(e
); l
!= UBRK_DONE
; l
= ubrk_next(e
))
1106 if (l
== 1 || l
== 2) {
1107 log_err("Got break between U+%s and U+%s\n", austrdup(UCharToUCharArray(work
[l
- 1])),
1108 austrdup(UCharToUCharArray(work
[l
])) );
1111 if (errorCount
>= 75)
1118 /* it does break after hyphens (unless they're followed by a digit, a non-spacing mark,
1119 a currency symbol, a non-breaking space, or a line or paragraph separator) */
1120 ustr
= CharsToUChars("-\\u00ad\\u2010\\u2012\\u2013\\u2014");
1121 u_strcpy(dashes
, ustr
);
1124 for (i
= 0; i
< sLen
; i
++) {
1126 for (j
= 0; j
< u_strlen(dashes
); j
++) {
1127 work
[1] = dashes
[j
];
1128 for (k
= 0; k
< sLen
; k
++) {
1130 if (u_charType(c
) == U_DECIMAL_DIGIT_NUMBER
||
1131 u_charType(c
) == U_OTHER_NUMBER
||
1132 u_charType(c
) == U_NON_SPACING_MARK
||
1133 u_charType(c
) == U_ENCLOSING_MARK
||
1134 u_charType(c
) == U_CURRENCY_SYMBOL
||
1135 u_charType(c
) == U_SPACE_SEPARATOR
||
1136 u_charType(c
) == U_DASH_PUNCTUATION
||
1137 u_charType(c
) == U_CONTROL_CHAR
||
1138 u_charType(c
) == U_FORMAT_CHAR
||
1139 c
== '\n' || c
== '\r' || c
== 0x2028 || c
== 0x2029 ||
1140 c
== 0x0003 || c
== 0x00a0 || c
== 0x2007 || c
== 0x2011 ||
1144 ubrk_setText(e
, work
, u_strlen(work
), &status
);
1145 if(U_FAILURE(status
)){
1146 log_err("FAIL: Error in setting text on the word break Iterator in testLineInvaiants:\n %s \n", myErrorName(status
));
1150 for (l
= ubrk_first(e
); l
!= UBRK_DONE
; l
= ubrk_next(e
))
1154 log_err("Didn't get break between U+%s and U+%s\n", austrdup(UCharToUCharArray(work
[1])),
1155 austrdup(UCharToUCharArray(work
[2])) );
1157 if (errorCount
>= 75)
1167 /*---------------------------------------------
1168 CharacterBreak tests
1169 --------------------------------------------- */
1171 void TestForwardCharacterSelection()
1173 UErrorCode status
= U_ZERO_ERROR
;
1175 addTestCharacterData();
1176 e
= ubrk_open(UBRK_CHARACTER
, "en_US", testCharacterText
, u_strlen(testCharacterText
), &status
);
1177 if(U_FAILURE(status
)){
1178 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1181 log_verbose("Testing forward character selection.....\n");
1182 doForwardSelectionTest(e
, testCharacterText
, characterSelectionData
);
1184 cleanupVector(characterSelectionData
);
1185 free(testCharacterText
);
1186 /*free(characterSelectionData);*/
1189 void TestFirstCharacterSelection()
1191 UErrorCode status
= U_ZERO_ERROR
;
1193 addTestCharacterData();
1194 e
= ubrk_open(UBRK_CHARACTER
, "en_US", testCharacterText
, u_strlen(testCharacterText
), &status
);
1195 if(U_FAILURE(status
)){
1196 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1199 log_verbose("Testing first character selection.....\n");
1200 doFirstSelectionTest(e
, testCharacterText
, characterSelectionData
);
1202 cleanupVector(characterSelectionData
);
1203 free(testCharacterText
);
1204 /*free(characterSelectionData);*/
1207 void TestLastCharacterSelection()
1209 UErrorCode status
= U_ZERO_ERROR
;
1211 addTestCharacterData();
1212 e
= ubrk_open(UBRK_CHARACTER
, "en_US", testCharacterText
, u_strlen(testCharacterText
), &status
);
1213 if(U_FAILURE(status
)){
1214 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1217 log_verbose("Testing last character selection.....\n");
1218 doLastSelectionTest(e
, testCharacterText
, characterSelectionData
);
1220 cleanupVector(characterSelectionData
);
1221 free(testCharacterText
);
1222 /*free(characterSelectionData);*/
1225 void TestBackwardCharacterSelection()
1227 UErrorCode status
= U_ZERO_ERROR
;
1229 addTestCharacterData();
1230 e
= ubrk_open(UBRK_CHARACTER
, "en_US", testCharacterText
, u_strlen(testCharacterText
), &status
);
1231 if(U_FAILURE(status
)){
1232 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1235 log_verbose("Testing backward character selection.....\n");
1236 doBackwardSelectionTest(e
, testCharacterText
, characterSelectionData
);
1238 cleanupVector(characterSelectionData
);
1239 free(testCharacterText
);
1240 /*free(characterSelectionData);*/
1243 void TestForwardCharacterIndexSelection()
1245 UErrorCode status
= U_ZERO_ERROR
;
1247 addTestCharacterData();
1248 e
= ubrk_open(UBRK_CHARACTER
, "en_US", testCharacterText
, u_strlen(testCharacterText
), &status
);
1249 if(U_FAILURE(status
)){
1250 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1253 log_verbose("Testing forward index character selection.....\n");
1254 doForwardIndexSelectionTest(e
, testCharacterText
, characterSelectionData
);
1256 cleanupVector(characterSelectionData
);
1257 free(testCharacterText
);
1258 /*free(characterSelectionData);*/
1261 void TestBackwardCharacterIndexSelection()
1263 UErrorCode status
= U_ZERO_ERROR
;
1265 addTestCharacterData();
1266 e
= ubrk_open(UBRK_CHARACTER
, "en_US", testCharacterText
, u_strlen(testCharacterText
), &status
);
1267 if(U_FAILURE(status
)){
1268 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1271 log_verbose("Testing backward character index selection.....\n");
1272 doBackwardSelectionTest(e
, testCharacterText
, characterSelectionData
);
1274 cleanupVector(characterSelectionData
);
1275 free(testCharacterText
);
1276 /*free(characterSelectionData);*/
1279 void TestCharacterInvariants()
1284 AllocateTextBoundary();
1285 s
=(UChar
*)malloc(sizeof(UChar
) * (u_strlen(cannedTestChars
) + 15));
1286 u_strcpy(s
, cannedTestChars
);
1287 tempStr
= CharsToUChars("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa");
1288 u_strcat(s
, tempStr
);
1290 log_verbose("Testing character break invariant.....\n");
1291 doBreakInvariantTest(UBRK_CHARACTER
, s
);
1292 u_strcpy(s
, cannedTestChars
);
1293 tempStr
= CharsToUChars("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa");
1294 u_strcat(s
, tempStr
);
1296 log_verbose("Testing character other invariant.....\n");
1297 doOtherInvariantTest(UBRK_CHARACTER
, s
);
1301 /*---------------------------------------------
1303 --------------------------------------------- */
1306 void TestPreceding()
1308 int32_t p1
, p2
, p3
, p4
, f
, p
;
1311 UErrorCode status
= U_ZERO_ERROR
;
1312 u_uastrcpy(words3
, "aaa bbb ccc");
1313 log_verbose("Testting preceding...\n");
1314 e
= ubrk_open(UBRK_WORD
, "en_US", words3
, u_strlen(words3
), &status
);
1315 if(U_FAILURE(status
)){
1316 log_err("FAIL: Error in ubrk_open() for word breakiterator: %s\n", myErrorName(status
));
1324 f
= ubrk_following(e
, p2
+1);
1325 p
= ubrk_preceding(e
, p2
+1);
1327 if (f
!=p3
) log_err("Error in TestPreceding: %d!=%d\n", (int32_t)f
, (int32_t)p3
);
1328 if (p
!=p2
) log_err("Error in TestPreceding: %d!=%d\n", (int32_t)p
, (int32_t)p2
);
1336 void TestEndBehaviour()
1338 int32_t end
, previous
;
1339 UErrorCode status
= U_ZERO_ERROR
;
1341 UChar testString
[5];
1342 u_uastrcpy(testString
, "boo");
1343 log_verbose("Testing end behaviour\n");
1344 wb
= ubrk_open(UBRK_WORD
, "en_US", testString
, u_strlen(testString
), &status
);
1345 if(U_FAILURE(status
)){
1346 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status
));
1352 previous
=ubrk_previous(wb
);
1353 log_verbose("end= %d and previous=%d %d\n", end
, previous
, ubrk_previous(wb
));
1360 /*---------------------------------------------
1361 Test implementation routines
1362 --------------------------------------------- */
1364 void doForwardSelectionTest(UBreakIterator
* iterator
, UChar
* testText
, Vector
* result
)
1366 UChar
*expectedResult
, *selectionResult
;
1367 int32_t lastOffset
, offset
;
1368 int32_t forwardSelectionCounter
= 0;
1369 int32_t forwardSelectionOffset
= 0;
1371 log_verbose("doForwardSelectionTest text of length: %d\n", u_strlen(testText
));
1374 lastOffset
= ubrk_first(iterator
);
1375 offset
= ubrk_next(iterator
);
1376 while(offset
!=UBRK_DONE
&& forwardSelectionCounter
< Count(result
)) {
1378 if (offset
!= ubrk_current(iterator
)){
1379 log_err("current() failed: it returned %d and offset was %d\n", ubrk_current(iterator
), offset
);
1381 expectedResult
=elementAt(result
, forwardSelectionCounter
);
1382 forwardSelectionOffset
+= u_strlen(expectedResult
);
1384 selectionResult
=extractBetween(lastOffset
, offset
, testText
);
1385 if (offset
!= forwardSelectionOffset
) {
1386 log_err("\n*** Selection #%d\n expected : %s - length %d\n\rselected : %s - length %d\n",
1387 forwardSelectionCounter
, austrdup(expectedResult
), u_strlen(expectedResult
),
1388 austrdup(selectionResult
), u_strlen(selectionResult
) );
1390 log_verbose("#%d [\"%d\",\"%d\"] : %s\n", forwardSelectionCounter
, lastOffset
, offset
,
1391 austrdup(selectionResult
));
1393 forwardSelectionCounter
++;
1394 lastOffset
= offset
;
1395 offset
= ubrk_next(iterator
);
1396 free(selectionResult
);
1398 if (forwardSelectionCounter
< Count(result
) - 1){
1399 log_err("\n*** Selection #%d not found at offset %d !!!\n", forwardSelectionCounter
, offset
);
1401 else if (forwardSelectionCounter
>= Count(result
) && offset
!= UBRK_DONE
){
1402 log_err("\n*** Selection #%d should not exist at offset %d !!!\n", forwardSelectionCounter
, offset
);
1405 void doBackwardSelectionTest(UBreakIterator
* iterator
, UChar
* testText
, Vector
* result
)
1407 UChar
* expectedResult
;
1408 UChar
* selectionResult
;
1409 int32_t backwardSelectionCounter
, neededOffset
, lastOffset
, offset
;
1410 backwardSelectionCounter
= (Count(result
) - 1);
1411 neededOffset
= u_strlen(testText
);
1412 lastOffset
= ubrk_last(iterator
);
1413 offset
= ubrk_previous(iterator
);
1415 log_verbose("doBackwardSelectionTest text of length: %d\n", u_strlen(testText
));
1416 while(offset
!= UBRK_DONE
)
1418 expectedResult
= elementAt(result
, backwardSelectionCounter
);
1419 neededOffset
-= u_strlen(expectedResult
);
1420 selectionResult
=extractBetween(offset
, lastOffset
, testText
);
1421 if(offset
!= neededOffset
) {
1422 log_err("\n*** Selection #%d\nExpected : %d > %s < \n\rSelected : %d > %s < \n",
1423 backwardSelectionCounter
, neededOffset
, austrdup(expectedResult
),
1424 offset
, austrdup(selectionResult
) );
1427 log_verbose("#%d : %s\n", backwardSelectionCounter
, selectionResult
);
1428 backwardSelectionCounter
--;
1429 lastOffset
= offset
;
1430 offset
= ubrk_previous(iterator
);
1431 free(selectionResult
);
1433 if (backwardSelectionCounter
>= 0 && offset
!= UBRK_DONE
){
1434 log_err("*** Selection #%d not found!!!\n", backwardSelectionCounter
);
1438 void doFirstSelectionTest(UBreakIterator
* iterator
, UChar
* testText
, Vector
* result
)
1440 int32_t selectionStart
, selectionEnd
;
1441 UChar
* expectedFirstSelection
=NULL
;
1442 UChar
* tempFirst
= NULL
;
1443 UBool success
= TRUE
;
1445 log_verbose("doFirstSelectionTest.......\n");
1447 selectionStart
= ubrk_first(iterator
);
1448 selectionEnd
= ubrk_next(iterator
);
1449 if(selectionEnd
!= UBRK_DONE
) {
1451 tempFirst
=extractBetween(selectionStart
, selectionEnd
, testText
);
1452 expectedFirstSelection
= elementAt(result
,0);
1454 if(u_strcmp(tempFirst
,expectedFirstSelection
)!=0) {
1455 log_err("\n### Error in doFirstSelectionTest. First selection not equal to what expected\n");
1456 log_err("Expected: %s - length %d\n\rSelected: %s - length %d\n",
1457 austrdup(expectedFirstSelection
), u_strlen(expectedFirstSelection
),
1458 austrdup(tempFirst
), u_strlen(tempFirst
));
1462 else if (selectionStart
!= 0 || u_strlen(testText
)!= 0) {
1463 log_err("\n### Error in doFirstSelectionTest. Could not get first selection.\n\r start= %d end= %d\n",
1464 selectionStart
, selectionEnd
);
1469 log_verbose("doFirstSelectionTest\n\nExpexcted first selection: %s\nCalculated first selection: %s is correct\n",
1470 austrdup(expectedFirstSelection
), austrdup(tempFirst
) );
1473 if(tempFirst
!= NULL
) {
1479 void doLastSelectionTest(UBreakIterator
* iterator
, UChar
* testText
, Vector
* result
)
1481 int32_t selectionEnd
, selectionStart
;
1482 UChar
*expectedLastSelection
=NULL
;
1483 UChar
*tempLast
= NULL
;
1484 UBool success
= TRUE
;
1486 log_verbose("doLastSelectionTest.......\n");
1488 selectionEnd
= ubrk_last(iterator
);
1489 selectionStart
= ubrk_previous(iterator
);
1492 if(selectionStart
!= UBRK_DONE
) {
1493 tempLast
=extractBetween(selectionStart
, selectionEnd
, testText
);
1494 expectedLastSelection
= elementAt(result
,Count(result
)-1);
1495 if(u_strcmp(tempLast
,expectedLastSelection
)!=0) {
1496 log_err("\n\n### Error in doLastSelectionTest. Last selection not equal to what expected.\n");
1497 log_err("Expected: %s - length %d\n\r Selected: %s - length %d\n",
1498 austrdup(expectedLastSelection
), u_strlen(expectedLastSelection
),
1499 austrdup(tempLast
), u_strlen(tempLast
) );
1504 else if (selectionEnd
!= 0 || u_strlen(testText
)!= 0) {
1505 log_err("\n### Error in doLastSelectionTest. Could not get last selection. [%d,%d]\n", selectionStart
,
1510 log_verbose("doLastSelectionTest\n\nExpected Last selection: %s \n", austrdup(expectedLastSelection
));
1511 log_verbose("Calculated last Selection: %s is correct\n", austrdup(tempLast
) );
1514 if(tempLast
!=NULL
) {
1520 * @bug 4052418 4068139
1522 void doForwardIndexSelectionTest(UBreakIterator
* iterator
, UChar
* testText
, Vector
* result
)
1524 int32_t arrayCount
, textLength
;
1525 int32_t selBegin
, selEnd
, current
, entry
, pos
;
1528 log_verbose("doForwardIndexSelectionTest text of length: %d\n", u_strlen(testText
));
1529 arrayCount
= Count(result
);
1530 textLength
= u_strlen(testText
);
1532 for(offset
= 0; offset
< textLength
; offset
++) {
1533 selBegin
= ubrk_preceding(iterator
, offset
);
1534 selEnd
= ubrk_following(iterator
, offset
);
1538 if (selBegin
!= UBRK_DONE
) {
1539 while (pos
< selBegin
&& entry
< arrayCount
) {
1540 pos
+= u_strlen(elementAt(result
, entry
));
1543 if (pos
!= selBegin
) {
1544 log_err("With offset = %d, got back spurious %d from preceding\n", offset
, selBegin
);
1548 pos
+= u_strlen(elementAt(result
, entry
));
1552 current
=ubrk_current(iterator
);
1554 if (pos
!= selEnd
) {
1555 log_err("With offset = %d, got back erroneous %d from follwoing\n", offset
, selEnd
);
1563 * @bug 4052418 4068139
1565 void doBackwardIndexSelectionTest(UBreakIterator
* iterator
, UChar
* testText
, Vector
* result
)
1567 int32_t arrayCount
, textLength
;
1568 int32_t selBegin
, selEnd
, current
, entry
, pos
;
1571 log_verbose("doBackwardIndexSelectionTest text of length: %d\n", u_strlen(testText
));
1572 arrayCount
= Count(result
);
1573 textLength
= u_strlen(testText
);
1575 for(offset
= textLength
-1; offset
>= 0; offset
--) {
1576 selBegin
= ubrk_preceding(iterator
, offset
);
1577 selEnd
= ubrk_following(iterator
, offset
);
1581 if (selBegin
!= UBRK_DONE
) {
1582 while (pos
< selBegin
&& entry
< arrayCount
) {
1583 pos
+= u_strlen(elementAt(result
, entry
));
1586 if (pos
!= selBegin
) {
1587 log_err("With offset = %d, got back spurious %d from preceding\n", offset
, selBegin
);
1591 pos
+= u_strlen(elementAt(result
, entry
));
1595 current
=ubrk_current(iterator
);
1597 if (pos
!= selEnd
) {
1598 log_err("With offset = %d, got back erroneous %d from following\n", offset
, selEnd
);
1607 void doBreakInvariantTest(UBreakIteratorType type
, UChar
* testChars
)
1612 UErrorCode status
= U_ZERO_ERROR
;
1619 status
=U_ZERO_ERROR
;
1621 u_uastrcpy(work
, "aaa");
1623 log_verbose("doBreakInvariantTest text of length: %d\n", u_strlen(testChars
));
1624 /* a break should always occur after CR (unless followed by LF), LF, PS, and LS */
1626 ustr
= CharsToUChars("\r\n\\u2029\\u2028");
1627 u_strcpy(breaks
, ustr
);
1630 tb
= ubrk_open(type
, "en_US", work
, u_strlen(work
), &status
);
1632 for (i
= 0; i
< u_strlen(breaks
); i
++) {
1633 work
[1] = breaks
[i
];
1634 for (j
= 0; j
< u_strlen(testChars
); j
++) {
1635 work
[0] = testChars
[j
];
1636 for (k
= 0; k
< u_strlen(testChars
); k
++) {
1639 /* if a cr is followed by lf, ps, ls or etx, don't do the check (that's
1640 not supposed to work) */
1641 if (work
[1] == '\r' && (c
== '\n' || c
== 0x2029
1642 || c
== 0x2028 || c
== 0x0003))
1645 work
[2] = testChars
[k
];
1646 ubrk_setText(tb
, work
, u_strlen(work
), &status
);
1647 if(U_FAILURE(status
)){
1648 log_err("ERROR in opening the breakIterator in doVariant Function: %s\n", myErrorName(status
));
1651 for (l
= ubrk_first(tb
); l
!= UBRK_DONE
; l
= ubrk_next(tb
)) {
1656 log_err("No break between U+%s and U+%s\n", austrdup(UCharToUCharArray(work
[1])),
1657 austrdup(UCharToUCharArray(work
[2])) );
1659 if (errorCount
>= 75)
1668 void doOtherInvariantTest(UBreakIteratorType type
, UChar
* testChars
)
1673 UErrorCode status
= U_ZERO_ERROR
;
1676 int32_t errorCount
= 0;
1677 status
=U_ZERO_ERROR
;
1679 u_uastrcpy(work
, "a\r\na");
1681 log_verbose("doOtherInvariantTest text of length: %d\n", u_strlen(testChars
));
1683 tb
= ubrk_open(type
, "en_us", work
, u_strlen(work
), &status
);
1685 /* a break should never occur between CR and LF */
1686 for (i
= 0; i
< u_strlen(testChars
); i
++) {
1687 work
[0] = testChars
[i
];
1688 for (j
= 0; j
< u_strlen(testChars
); j
++) {
1689 work
[3] = testChars
[j
];
1690 ubrk_setText(tb
, work
, u_strlen(work
), &status
);
1691 if(U_FAILURE(status
)){
1692 log_err("ERROR in opening the breakIterator in doVariant Function: %s\n", myErrorName(status
));
1694 for ( k
= ubrk_first(tb
); k
!= UBRK_DONE
; k
= ubrk_next(tb
))
1696 log_err("Break between CR and LF in string U+%s, U+d U+a U+%s\n",
1697 austrdup(UCharToUCharArray(work
[0])), austrdup(UCharToUCharArray(work
[3])) );
1699 if (errorCount
>= 75)
1705 /* a break should never occur before a non-spacing mark, unless the preceding
1706 character is CR, LF, PS, or LS */
1707 u_uastrcpy(work
,"aaaa");
1708 for (i
= 0; i
< u_strlen(testChars
); i
++) {
1710 if (c
== '\n' || c
== '\r' || c
== 0x2029 || c
== 0x2028 || c
== 0x0003)
1713 for (j
= 0; j
< u_strlen(testChars
); j
++) {
1715 if ((u_charType(c
) != U_NON_SPACING_MARK
) &&
1716 (u_charType(c
) != U_ENCLOSING_MARK
))
1719 ubrk_setText(tb
, work
, u_strlen(work
), &status
);
1720 if(U_FAILURE(status
)){
1721 log_err("ERROR in opening the breakIterator in doOtherVariant Function %s\n", myErrorName(status
));
1723 for (k
= ubrk_first(tb
); k
!= UBRK_DONE
; k
= ubrk_next(tb
))
1725 log_err("Break between U+%s and U+%s\n", austrdup(UCharToUCharArray(work
[1])),
1726 austrdup(UCharToUCharArray(work
[2])) );
1728 if (errorCount
>= 75)
1736 void sample(UBreakIterator
* tb
, UChar
* text
)
1741 log_verbose("-------------------------\n");
1742 log_verbose("%s of length %d\n", austrdup(text
), u_strlen(text
));
1744 start
= ubrk_first(tb
);
1745 for (end
= ubrk_next(tb
); end
!= UBRK_DONE
; end
= ubrk_next(tb
)) {
1746 substring
=extractBetween(start
, end
, text
);
1747 log_err("[%d,%d] \"%s\" \n", start
, end
, austrdup(substring
) );
1754 void addBrkIterRegrTest(TestNode
** root
);
1756 void addBrkIterRegrTest(TestNode
** root
)
1760 /* These tests are removed becaue
1761 * 1. The test data is completely redundant with that in the C++ break iterator tests
1762 * 2. The data here is stale, and I don't want to copy all of the changes from the C++ tests, and
1763 * 3. The C API is covered by the API tests.
1766 addTest(root
, &TestForwardWordSelection
, "tstxtbd/cregrtst/TestForwardWordSelection" );
1767 addTest(root
, &TestBackwardWordSelection
, "tstxtbd/cregrtst/TestBackwardWordSelection" );
1768 addTest(root
, &TestFirstWordSelection
, "tstxtbd/cregrtst/TestFirstWordSelection" );
1769 addTest(root
, &TestLastWordSelection
, "tstxtbd/cregrtst/TestLastWordSelection" );
1770 addTest(root
, &TestForwardWordIndexSelection
, "tstxtbd/cregrtst/TestForwardWordIndexSelection");
1771 addTest(root
, &TestBackwardWordIndexSelection
, "tstxtbd/cregrtst/TestBackwardWordIndexSelection");
1772 addTest(root
, &TestForwardSentenceSelection
, "tstxtbd/cregrtst/TestForwardSentenceSelection");
1773 addTest(root
, &TestBackwardSentenceSelection
, "tstxtbd/cregrtst/TestBackwardSentenceSelection");
1774 addTest(root
, &TestFirstSentenceSelection
, "tstxtbd/cregrtst/TestFirstSentenceSelection");
1775 addTest(root
, &TestLastSentenceSelection
, "tstxtbd/cregrtst/TestLastSentenceSelection");
1776 addTest(root
, &TestForwardSentenceIndexSelection
, "tstxtbd/cregrtst/TestForwardSentenceIndexSelection");
1777 addTest(root
, &TestBackwardSentenceIndexSelection
, "tstxtbd/cregrtst/TestBackwardSentenceIndexSelection");
1779 addTest(root
, &TestForwardLineSelection
, "tstxtbd/cregrtst/TestForwardLineSelection");
1780 addTest(root
, &TestBackwardLineSelection
, "tstxtbd/cregrtst/TestBackwardLineSelection");
1781 addTest(root
, &TestFirstLineSelection
, "tstxtbd/cregrtst/TestFirstLineSelection");
1782 addTest(root
, &TestLastLineSelection
, "tstxtbd/cregrtst/TestLastLineSelection");
1783 addTest(root
, &TestForwardLineIndexSelection
, "tstxtbd/cregrtst/TestForwardLineIndexSelection");
1784 addTest(root
, &TestBackwardLineIndexSelection
, "tstxtbd/cregrtst/TestBackwardLineIndexSelection");
1786 addTest(root
, &TestForwardCharacterSelection
, "tstxtbd/cregrtst/TestForwardCharacterSelection");
1787 addTest(root
, &TestBackwardCharacterSelection
, "tstxtbd/cregrtst/TestBackwardCharacterSelection");
1788 addTest(root
, &TestFirstCharacterSelection
, "tstxtbd/cregrtst/TestFirstCharacterSelection");
1789 addTest(root
, &TestLastCharacterSelection
, "tstxtbd/cregrtst/TestLastCharacterSelection");
1790 addTest(root
, &TestForwardCharacterIndexSelection
, "tstxtbd/cregrtst/TestForwardCharacterIndexSelection");
1791 addTest(root
, &TestBackwardCharacterIndexSelection
, "tstxtbd/cregrtst/TestBackwardCharacterIndexSelection");
1793 addTest(root
, &TestPreceding
, "tstxtbd/cregrtst/TestPreceding");
1794 addTest(root
, &TestEndBehaviour
, "tstxtbd/cregrtst/TestEndBehaviour");
1796 addTest(root
, &TestWordInvariants
, "tstxtbd/cregrtst/TestWordInvariants");
1797 addTest(root
, &TestSentenceInvariants
, "tstxtbd/cregrtst/TestSentenceInvariants");
1798 addTest(root
, &TestCharacterInvariants
, "tstxtbd/cregrtst/TestCharacterInvariants");
1799 addTest(root
, &TestLineInvariants
, "tstxtbd/cregrtst/TestLineInvariants");
1804 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */