]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/cregrtst.c
ICU-6.2.10.tar.gz
[apple/icu.git] / icuSources / test / cintltst / cregrtst.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2001, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File CREGRTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Madhu Katragadda Ported for C API, added extra functions and tests
13 *********************************************************************************
14 */
15
16 /* C FUNCTIONALITY AND REGRESSION TEST FOR BREAKITERATOR */
17
18 #include <stdlib.h>
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_BREAK_ITERATION
23
24 #include "unicode/uloc.h"
25 #include "unicode/ubrk.h"
26 #include "unicode/uchar.h"
27 #include "unicode/ustring.h"
28 #include "cintltst.h"
29 #include "cregrtst.h"
30 #include "ccolltst.h"
31 #include "cstring.h"
32
33 /* -------------------------------------------------------------------------- */
34 /**
35 * "Vector" structure for holding test tables
36 * (this strucure is actually a linked list, but we use the name and API of the
37 * java.util.Vector class to keep as much of our test code as possible the same.)
38 */
39 struct Vector1 {
40 UChar *text;
41 struct Vector1 *link;
42 };
43 typedef struct Vector1 Vector;
44
45 void addElement(Vector *q, const char* string)
46 {
47
48 Vector *p;
49
50 p=(Vector*)malloc(sizeof(Vector));
51 p->text=(UChar*)malloc(sizeof(UChar) * (uprv_strlen(string)+1));
52 u_uastrcpy(p->text, string);
53 p->link=NULL;
54 while(q->link!=NULL)
55 q=q->link;
56 q->link=p;
57
58 }
59 UChar* addElement2(Vector *q, const UChar* string)
60 {
61 Vector *p;
62
63 p=(Vector*)malloc(sizeof(Vector));
64 p->text=(UChar*)malloc(sizeof(UChar) * (u_strlen(string)+1));
65 u_strcpy(p->text, string);
66 p->link=NULL;
67 while(q->link!=NULL)
68 q=q->link;
69 q->link=p;
70
71 return (UChar *)string;
72
73 }
74
75 void cleanupVector(Vector *q) {
76 Vector *p;
77 while(q != NULL) {
78 p = q->link;
79 free(q->text);
80 free(q);
81 q = p;
82 }
83 }
84
85 int32_t Count(Vector *q)
86 {
87 int32_t c=0;
88 while(q!=NULL){
89 q=q->link;
90 c++;
91 }
92 return c;
93 }
94
95 UChar* elementAt(Vector *q, int32_t pos)
96 {
97 int32_t i=0;
98 if(q==NULL)
99 return NULL;
100 for(i=0;i<pos;i++)
101 q=q->link;
102 return (q->text);
103 }
104 /* Just to make it easier to use with UChar array.*/
105
106 UChar* UCharToUCharArray(const UChar uchar)
107 {
108 UChar *buffer;
109 UChar *alias;
110 buffer=(UChar*)malloc(sizeof(uchar) * 2);
111 alias=buffer;
112 *alias=uchar;
113 alias++;
114 *alias=0x0000;
115 return buffer;
116 }
117
118
119 UChar* extractBetween(int32_t start, int32_t end, UChar* text)
120 {
121 UChar* result;
122 UChar* temp;
123 temp=(UChar*)malloc(sizeof(UChar) * ((u_strlen(text)-start)+1));
124 result=(UChar*)malloc(sizeof(UChar) * ((end-start)+1));
125 u_strcpy(temp, &text[start]);
126 u_strncpy(result, temp, end-start);
127 result[end-start] = 0;
128 free(temp);
129 return result;
130 }
131 /* -------------------------------------------------------------------------------------- */
132 /**
133 * BrealIterator Regression Test is medium top level test class for everything in the C BreakIterator API
134 * (ubrk.h and ubrk.c).
135 */
136
137
138
139 const UChar cannedTestArray[] = {
140 0x0001, 0x0002, 0x0003, 0x0004, 0x0020, 0x0021, 0x005c, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0028, 0x0029,
141 0x002b, 0x002d, 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x003c, 0x003d, 0x003e, 0x0041, 0x0042, 0x0043, 0x0044,
142 0x0045, 0x005B, 0x005d, 0x005e, 0x005f, 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x007b, 0x007d, 0x007c,
143 0x002c, 0x00a0, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00ab, 0x00ad, 0x00ae, 0x00af,
144 0x00b0, 0x00b2, 0x00b3, 0x00b4, 0x00b9, 0x00bb, 0x00bc, 0x00bd, 0x02b0, 0x02b1, 0x02b2, 0x02b3, 0x02b4, 0x0300,
145 0x0301, 0x0302, 0x0303, 0x0304, 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x0903, 0x093e, 0x093f, 0x0940, 0x0949,
146 0x0f3a, 0x0f3b, 0x2000, 0x2001, 0x2002, 0x200c, 0x200d, 0x200e, 0x200f, 0x2010, 0x2011, 0x2012, 0x2028, 0x2029,
147 0x202a, 0x203e, 0x203f, 0x2040, 0x20dd, 0x20de, 0x20df, 0x20e0, 0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x0000
148 };
149
150
151
152
153 /*--------------------------------------------- */
154 /* setup methods */
155 /*--------------------------------------------- */
156
157 void AllocateTextBoundary()
158 {
159
160 cannedTestChars=(UChar*)malloc(sizeof(UChar) * (u_strlen(cannedTestArray) + 10));
161 u_uastrcpy(cannedTestChars,"");
162 u_uastrcpy(cannedTestChars,"0x0000");
163 u_strcat(cannedTestChars, cannedTestArray);
164
165 }
166
167 void FreeTextBoundary()
168 {
169 free(cannedTestChars);
170 }
171
172 /*Add Word Data*/
173 void addTestWordData()
174 {
175 int32_t elems;
176
177
178 wordSelectionData=(Vector*)malloc(sizeof(Vector));
179 wordSelectionData->text=(UChar*)malloc(sizeof(UChar) * 6);
180 u_uastrcpy(wordSelectionData->text, "12,34");
181 wordSelectionData->link=NULL;
182
183 addElement(wordSelectionData, " ");
184 free(addElement2(wordSelectionData, UCharToUCharArray((UChar)(0x00A2)))); /*cent sign */
185 free(addElement2(wordSelectionData, UCharToUCharArray((UChar)(0x00A3)))); /*pound sign */
186 free(addElement2(wordSelectionData, UCharToUCharArray((UChar)(0x00A4)))); /*currency sign */
187 free(addElement2(wordSelectionData, UCharToUCharArray((UChar)(0x00A5)))); /*yen sign */
188 addElement(wordSelectionData, "alpha-beta-gamma");
189 addElement(wordSelectionData, ".");
190 addElement(wordSelectionData, " ");
191 addElement(wordSelectionData, "Badges");
192 addElement(wordSelectionData, "?");
193 addElement(wordSelectionData, " ");
194 addElement(wordSelectionData, "BADGES");
195 addElement(wordSelectionData, "!");
196 addElement(wordSelectionData, "?");
197 addElement(wordSelectionData, "!");
198 addElement(wordSelectionData, " ");
199 addElement(wordSelectionData, "We");
200 addElement(wordSelectionData, " ");
201 addElement(wordSelectionData, "don't");
202 addElement(wordSelectionData, " ");
203 addElement(wordSelectionData, "need");
204 addElement(wordSelectionData, " ");
205 addElement(wordSelectionData, "no");
206 addElement(wordSelectionData, " ");
207 addElement(wordSelectionData, "STINKING");
208 addElement(wordSelectionData, " ");
209 addElement(wordSelectionData, "BADGES");
210 addElement(wordSelectionData, "!");
211 addElement(wordSelectionData, "!");
212 addElement(wordSelectionData, "!");
213
214 addElement(wordSelectionData, "012.566,5");
215 addElement(wordSelectionData, " ");
216 addElement(wordSelectionData, "123.3434,900");
217 addElement(wordSelectionData, " ");
218 addElement(wordSelectionData, "1000,233,456.000");
219 addElement(wordSelectionData, " ");
220 addElement(wordSelectionData, "1,23.322%");
221 addElement(wordSelectionData, " ");
222 addElement(wordSelectionData, "123.1222");
223
224 addElement(wordSelectionData, " ");
225 addElement(wordSelectionData, "$123,000.20");
226
227 addElement(wordSelectionData, " ");
228 addElement(wordSelectionData, "179.01%");
229
230 addElement(wordSelectionData, "Hello");
231 addElement(wordSelectionData, ",");
232 addElement(wordSelectionData, " ");
233 addElement(wordSelectionData, "how");
234 addElement(wordSelectionData, " ");
235 addElement(wordSelectionData, "are");
236 addElement(wordSelectionData, " ");
237 addElement(wordSelectionData, "you");
238 addElement(wordSelectionData, " ");
239 addElement(wordSelectionData, "X");
240 addElement(wordSelectionData, " ");
241
242 addElement(wordSelectionData, "Now");
243 addElement(wordSelectionData, "\r");
244 addElement(wordSelectionData, "is");
245 addElement(wordSelectionData, "\n");
246 addElement(wordSelectionData, "the");
247 addElement(wordSelectionData, "\r\n");
248 addElement(wordSelectionData, "time");
249 addElement(wordSelectionData, "\n");
250 addElement(wordSelectionData, "\r");
251 addElement(wordSelectionData, "for");
252 addElement(wordSelectionData, "\r");
253 addElement(wordSelectionData, "\r");
254 addElement(wordSelectionData, "all");
255 addElement(wordSelectionData, " ");
256
257 /* to test for bug #4097779 */
258 free(addElement2(wordSelectionData, CharsToUChars("aa\\u0300a")));
259 addElement(wordSelectionData, " ");
260
261 /* to test for bug #4098467
262 What follows is a string of Korean characters (I found it in the Yellow Pages
263 ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
264 it correctly), first as precomposed syllables, and then as conjoining jamo.
265 Both sequences should be semantically identical and break the same way.
266 precomposed syllables... */
267 free(addElement2(wordSelectionData, CharsToUChars("\\uc0c1\\ud56d")));
268 addElement(wordSelectionData, " ");
269 free(addElement2(wordSelectionData, CharsToUChars("\\ud55c\\uc778")));
270 addElement(wordSelectionData, " ");
271 free(addElement2(wordSelectionData, CharsToUChars("\\uc5f0\\ud569")));
272 addElement(wordSelectionData, " ");
273 free(addElement2(wordSelectionData, CharsToUChars("\\uc7a5\\ub85c\\uad50\\ud68c")));
274 addElement(wordSelectionData, " ");
275 /* conjoining jamo... */
276 free(addElement2(wordSelectionData, CharsToUChars("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc")));
277 addElement(wordSelectionData, " ");
278 free(addElement2(wordSelectionData, CharsToUChars("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab")));
279 addElement(wordSelectionData, " ");
280 free(addElement2(wordSelectionData, CharsToUChars("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8")));
281 addElement(wordSelectionData, " ");
282 free(addElement2(wordSelectionData, CharsToUChars("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c")));
283 addElement(wordSelectionData, " ");
284
285 /* this is a test for bug #4117554: the ideographic iteration mark (U+3005) should
286 count as a Kanji character for the purposes of word breaking */
287 addElement(wordSelectionData, "abc");
288 free(addElement2(wordSelectionData, CharsToUChars("\\u4e01\\u4e02\\u3005\\u4e03\\u4e03")));
289 addElement(wordSelectionData, "abc");
290
291 elems= Count(wordSelectionData);
292 log_verbose("In word, the no: of words are: %d\n", elems);
293 testWordText = createTestData(wordSelectionData, elems);
294
295
296 }
297
298 const UChar kParagraphSeparator = 0x2029;
299 const UChar kLineSeparator = 0x2028;
300
301 /**
302 * Add Sentence Data
303 */
304 void addTestSentenceData()
305 {
306 int32_t elems;
307 UChar temp[100];
308 UChar *td;
309
310 sentenceSelectionData=(Vector*)malloc(sizeof(Vector));
311 sentenceSelectionData->text=(UChar*)malloc(sizeof(UChar) * (strlen("This is a simple sample sentence. ")+1));
312 u_uastrcpy(sentenceSelectionData->text, "This is a simple sample sentence. ");
313 sentenceSelectionData->link=NULL;
314
315 /* addElement(sentenceSelectionData, "This is a simple sample sentence. "); */
316 addElement(sentenceSelectionData, "(This is it.) ");
317 addElement(sentenceSelectionData, "This is a simple sample sentence. ");
318 addElement(sentenceSelectionData, "\"This isn\'t it.\" ");
319 addElement(sentenceSelectionData, "Hi! ");
320 addElement(sentenceSelectionData, "This is a simple sample sentence. ");
321 addElement(sentenceSelectionData, "It does not have to make any sense as you can see. ");
322 addElement(sentenceSelectionData, "Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. ");
323 addElement(sentenceSelectionData, "Che la dritta via aveo smarrita. ");
324 addElement(sentenceSelectionData, "He said, that I said, that you said!! ");
325
326 u_uastrcpy(temp, "Don't rock the boat");
327 td = UCharToUCharArray(kParagraphSeparator);
328 u_strcat(temp, td);
329 free(td);
330 addElement2(sentenceSelectionData, temp);
331
332 addElement(sentenceSelectionData, "Because I am the daddy, that is why. ");
333 addElement(sentenceSelectionData, "Not on my time (el timo.)! ");
334
335 u_uastrcpy(temp, "So what!!");
336 td = UCharToUCharArray(kParagraphSeparator);
337 u_strcat(temp, td);
338 free(td);
339 addElement2(sentenceSelectionData, temp);
340
341 addElement(sentenceSelectionData, "\"But now,\" he said, \"I know!\" ");
342 addElement(sentenceSelectionData, "Harris thumbed down several, including \"Away We Go\" (which became the huge success Oklahoma!). ");
343 addElement(sentenceSelectionData, "One species, B. anthracis, is highly virulent.\n");
344 addElement(sentenceSelectionData, "Wolf said about Sounder:\"Beautifully thought-out and directed.\" ");
345 addElement(sentenceSelectionData, "Have you ever said, \"This is where\tI shall live\"? ");
346 addElement(sentenceSelectionData, "He answered, \"You may not!\" ");
347 addElement(sentenceSelectionData, "Another popular saying is: \"How do you do?\". ");
348 addElement(sentenceSelectionData, "Yet another popular saying is: \'I\'m fine thanks.\' ");
349 addElement(sentenceSelectionData, "What is the proper use of the abbreviation pp.? ");
350 addElement(sentenceSelectionData, "Yes, I am definatelly 12\" tall!!");
351
352 /* test for bug #4113835: \n and \r count as spaces, not as paragraph breaks */
353 u_uastrcpy(temp, "Now\ris\nthe\r\ntime\n\rfor\r\rall");
354 td = UCharToUCharArray(kParagraphSeparator);
355 u_strcat(temp, td);
356 free(td);
357 addElement2(sentenceSelectionData, temp);
358
359 /* test for bug #4117554: Treat fullwidth variants of .!? the same as their
360 normal counterparts */
361 free(addElement2(sentenceSelectionData, CharsToUChars("I know I'm right\\uff0e ")));
362 free(addElement2(sentenceSelectionData, CharsToUChars("Right\\uff1f ")));
363 free(addElement2(sentenceSelectionData, CharsToUChars("Right\\uff01 ")));
364
365 /* test for bug #4117554: Break sentence between a sentence terminator and
366 opening punctuation */
367 addElement(sentenceSelectionData, "no?");
368 u_uastrcpy(temp, "(yes)");
369 td = CharsToUChars("\\u2029");
370 u_strcat(temp, td);
371 free(td);
372 addElement2(sentenceSelectionData, temp);
373
374 /* test for bug #4158381: Don't break sentence after period if it isn't
375 followed by a space */
376 addElement(sentenceSelectionData, "Test <code>Flags.Flag</code> class. ");
377 u_uastrcpy(temp, "Another test.");
378 td = CharsToUChars("\\u2029");
379 u_strcat(temp, td);
380 free(td);
381 addElement2(sentenceSelectionData, temp);
382
383 /* test for bug #4158381: No breaks when there are no terminators around */
384 addElement(sentenceSelectionData, "<P>Provides a set of &quot;lightweight&quot; (all-java<FONT SIZE=\"-2\"><SUP>TM</SUP></FONT> language) components that, to the maximum degree possible, work the same on all platforms. ");
385 u_uastrcpy(temp, "Another test.");
386 td = CharsToUChars("\\u2029");
387 u_strcat(temp, td);
388 free(td);
389 addElement2(sentenceSelectionData, temp);
390
391 /* test for bug #4143071: Make sure sentences that end with digits work right */
392 addElement(sentenceSelectionData, "Today is the 27th of May, 1998. ");
393 addElement(sentenceSelectionData, "Tomorrow with be 28 May 1998. ");
394 u_uastrcpy(temp, "The day after will be the 30th.");
395 td = CharsToUChars("\\u2029");
396 u_strcat(temp, td);
397 free(td);
398 addElement2(sentenceSelectionData, temp);
399
400 /* test for bug #4152416: Make sure sentences ending with a capital
401 letter are treated correctly */
402 addElement(sentenceSelectionData, "The type of all primitive <code>boolean</code> values accessed in the target VM. ");
403 u_uastrcpy(temp, "Calls to xxx will return an implementor of this interface.");
404 td = CharsToUChars("\\u2029");
405 u_strcat(temp, td);
406 free(td);
407 addElement2(sentenceSelectionData, temp);
408
409
410 /* test for bug #4152117: Make sure sentence breaking is handling
411 punctuation correctly */
412 addElement(sentenceSelectionData, "Constructs a randomly generated BigInteger, uniformly distributed over the range <tt>0</tt> to <tt>(2<sup>numBits</sup> - 1)</tt>, inclusive. ");
413 addElement(sentenceSelectionData, "The uniformity of the distribution assumes that a fair source of random bits is provided in <tt>rnd</tt>. ");
414 u_uastrcpy(temp, "Note that this constructor always constructs a non-negative BigInteger.");
415 td = CharsToUChars("\\u2029");
416 u_strcat(temp, td);
417 free(td);
418 addElement2(sentenceSelectionData, temp);
419
420 elems = Count(sentenceSelectionData);
421 log_verbose("In sentence: the no: of sentences are %d\n", elems);
422 testSentenceText = createTestData(sentenceSelectionData, elems);
423
424
425 }
426
427 /**
428 * Add Line Data
429 */
430
431 void addTestLineData()
432 {
433 int32_t elems;
434
435 lineSelectionData=(Vector*)malloc(sizeof(Vector));
436 lineSelectionData->text=(UChar*)malloc(sizeof(UChar) * 7);
437 u_uastrcpy(lineSelectionData->text, "Multi-");
438 lineSelectionData->link=NULL;
439
440 /* lineSelectionData->addElement("Multi-"); */
441 addElement(lineSelectionData, "Level ");
442 addElement(lineSelectionData, "example ");
443 addElement(lineSelectionData, "of ");
444 addElement(lineSelectionData, "a ");
445 addElement(lineSelectionData, "semi-");
446 addElement(lineSelectionData, "idiotic ");
447 addElement(lineSelectionData, "non-");
448 addElement(lineSelectionData, "sensical ");
449 addElement(lineSelectionData, "(non-");
450 addElement(lineSelectionData, "important) ");
451 addElement(lineSelectionData, "sentence. ");
452
453 addElement(lineSelectionData, "Hi ");
454 addElement(lineSelectionData, "Hello ");
455 addElement(lineSelectionData, "How\n");
456 addElement(lineSelectionData, "are\r");
457
458
459 free(addElement2(lineSelectionData, CharsToUChars("you\\u2028"))); /* lineSeperator */
460
461 addElement(lineSelectionData, "fine.\t");
462 addElement(lineSelectionData, "good. ");
463
464 addElement(lineSelectionData, "Now\r");
465 addElement(lineSelectionData, "is\n");
466 addElement(lineSelectionData, "the\r\n");
467 addElement(lineSelectionData, "time\n");
468 addElement(lineSelectionData, "\r");
469 addElement(lineSelectionData, "for\r");
470 addElement(lineSelectionData, "\r");
471 addElement(lineSelectionData, "all ");
472
473 /* to test for bug #4068133 */
474 free(addElement2(lineSelectionData, CharsToUChars("\\u96f6")));
475 free(addElement2(lineSelectionData, CharsToUChars("\\u4e00\\u3002")));
476 free(addElement2(lineSelectionData, CharsToUChars("\\u4e8c\\u3001")));
477 free(addElement2(lineSelectionData, CharsToUChars("\\u4e09\\u3002\\u3001")));
478 free(addElement2(lineSelectionData, CharsToUChars("\\u56db\\u3001\\u3002\\u3001")));
479
480
481 free(addElement2(lineSelectionData, CharsToUChars("\\u4e94,")));
482
483 free(addElement2(lineSelectionData, CharsToUChars("\\u516d.")));
484
485 free(addElement2(lineSelectionData, CharsToUChars("\\u4e03.\\u3001,\\u3002")));
486 free(addElement2(lineSelectionData, CharsToUChars("\\u516b")));
487
488 /* to test for bug #4086052 */
489 free(addElement2(lineSelectionData, CharsToUChars("foo\\u00a0bar ")));
490
491 /* to test for bug #4097920 */
492 addElement(lineSelectionData, "dog,");
493 addElement(lineSelectionData, "cat,");
494 addElement(lineSelectionData, "mouse ");
495 addElement(lineSelectionData, "(one)");
496 addElement(lineSelectionData, "(two)\n");
497
498 /* to test for bug #4035266 */
499 addElement(lineSelectionData, "The ");
500 addElement(lineSelectionData, "balance ");
501 addElement(lineSelectionData, "is ");
502 addElement(lineSelectionData, "$-23,456.78, ");
503 addElement(lineSelectionData, "not ");
504 addElement(lineSelectionData, "-$32,456.78!\n");
505
506 /* to test for bug #4098467
507 What follows is a string of Korean characters (I found it in the Yellow Pages
508 ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
509 it correctly), first as precomposed syllables, and then as conjoining jamo.
510 Both sequences should be semantically identical and break the same way.
511 precomposed syllables... */
512 free(addElement2(lineSelectionData, CharsToUChars("\\uc0c1\\ud56d ")));
513 free(addElement2(lineSelectionData, CharsToUChars("\\ud55c\\uc778 ")));
514 free(addElement2(lineSelectionData, CharsToUChars("\\uc5f0\\ud569 ")));
515 free(addElement2(lineSelectionData, CharsToUChars("\\uc7a5\\ub85c\\uad50\\ud68c ")));
516 /* conjoining jamo... */
517 free(addElement2(lineSelectionData, CharsToUChars("\\u1109\\u1161\\u11bc\\u1112\\u1161\\u11bc ")));
518 free(addElement2(lineSelectionData, CharsToUChars("\\u1112\\u1161\\u11ab\\u110b\\u1175\\u11ab ")));
519 free(addElement2(lineSelectionData, CharsToUChars("\\u110b\\u1167\\u11ab\\u1112\\u1161\\u11b8 ")));
520 free(addElement2(lineSelectionData, CharsToUChars("\\u110c\\u1161\\u11bc\\u1105\\u1169\\u1100\\u116d\\u1112\\u116c")));
521
522 /* to test for bug #4117554: Fullwidth .!? should be treated as postJwrd */
523 free(addElement2(lineSelectionData, CharsToUChars("\\u4e01\\uff0e")));
524 free(addElement2(lineSelectionData, CharsToUChars("\\u4e02\\uff01")));
525 free(addElement2(lineSelectionData, CharsToUChars("\\u4e03\\uff1f")));
526
527 elems = Count(lineSelectionData);
528 log_verbose("In line: the no: of lines are %d\n", elems);
529 testLineText = createTestData(lineSelectionData, elems);
530
531
532 }
533
534 /*
535
536 const UChar* graveS = "S" + (UniChar)0x0300;
537 const UChar* acuteBelowI = "i" + UCharToUCharArray(0x0317);
538 const UChar* acuteE = "e" + UCharToUCharArray(0x0301);
539 const UChar* circumflexA = "a" + UCharToUCharArray(0x0302);
540 const UChar* tildeE = "e" + UCharToUCharArray(0x0303);
541 */
542
543 /**
544 * Add Character Data
545 */
546 void addTestCharacterData()
547 {
548 int32_t elems;
549 UChar temp[10];
550 UChar *td;
551
552 characterSelectionData=(Vector*)malloc(sizeof(Vector));
553 characterSelectionData->text=(UChar*)malloc(sizeof(UChar) * 2);
554 u_uastrcpy(characterSelectionData->text, "B");
555 characterSelectionData->link=NULL;
556
557 u_uastrcpy(temp, "S");
558 td = UCharToUCharArray(0x0317);
559 u_strcat(temp, td);
560 free(td);
561 /*u_strcat(temp, UCharToUCharArray(0x0317));*/
562 addElement2(characterSelectionData, temp); /* graveS */
563
564 u_uastrcpy(temp, "i");
565 td = UCharToUCharArray(0x0301);
566 u_strcat(temp, td);
567 free(td);
568 /*u_strcat(temp, UCharToUCharArray(0x0301));*/
569 addElement2(characterSelectionData, temp); /* acuteBelowI */
570
571 addElement(characterSelectionData, "m");
572 addElement(characterSelectionData, "p");
573 addElement(characterSelectionData, "l");
574
575 u_uastrcpy(temp, "e");
576 td = UCharToUCharArray(0x0301);
577 u_strcat(temp, td);
578 free(td);
579 addElement2(characterSelectionData, temp);/* acuteE */
580
581 addElement(characterSelectionData, " ");
582 addElement(characterSelectionData, "s");
583
584 u_uastrcpy(temp, "a");
585 td = UCharToUCharArray(0x0302);
586 u_strcat(temp, td);
587 free(td);
588 addElement2(characterSelectionData, temp);/* circumflexA */
589
590 addElement(characterSelectionData, "m");
591 addElement(characterSelectionData, "p");
592 addElement(characterSelectionData, "l");
593
594 u_uastrcpy(temp, "e");
595 td = UCharToUCharArray(0x0303);
596 u_strcat(temp, td);
597 free(td);
598 addElement2(characterSelectionData, temp); /* tildeE */
599
600 addElement(characterSelectionData, ".");
601 addElement(characterSelectionData, "w");
602
603 u_uastrcpy(temp, "a");
604 td = UCharToUCharArray(0x0302);
605 u_strcat(temp, td);
606 free(td);
607 addElement2(characterSelectionData, temp);/* circumflexA */
608
609 addElement(characterSelectionData, "w");
610 addElement(characterSelectionData, "a");
611 addElement(characterSelectionData, "f");
612 addElement(characterSelectionData, "q");
613 addElement(characterSelectionData, "\n");
614 addElement(characterSelectionData, "\r");
615 addElement(characterSelectionData, "\r\n");
616 addElement(characterSelectionData, "\n");
617 addElement(characterSelectionData, "E");
618 /* to test for bug #4098467
619 What follows is a string of Korean characters (I found it in the Yellow Pages
620 ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed
621 it correctly), first as precomposed syllables, and then as conjoining jamo.
622 Both sequences should be semantically identical and break the same way.
623 precomposed syllables... */
624 free(addElement2(characterSelectionData, CharsToUChars("\\uc0c1")));
625 free(addElement2(characterSelectionData, CharsToUChars("\\ud56d")));
626 addElement(characterSelectionData, " ");
627 free(addElement2(characterSelectionData, CharsToUChars("\\ud55c")));
628 free(addElement2(characterSelectionData, CharsToUChars("\\uc778")));
629 addElement(characterSelectionData, " ");
630 free(addElement2(characterSelectionData, CharsToUChars("\\uc5f0")));
631 free(addElement2(characterSelectionData, CharsToUChars("\\ud569")));
632 addElement(characterSelectionData, " ");
633 free(addElement2(characterSelectionData, CharsToUChars("\\uc7a5")));
634 free(addElement2(characterSelectionData, CharsToUChars("\\ub85c")));
635 free(addElement2(characterSelectionData, CharsToUChars("\\uad50")));
636 free(addElement2(characterSelectionData, CharsToUChars("\\ud68c")));
637 addElement(characterSelectionData, " ");
638 /* conjoining jamo... */
639 free(addElement2(characterSelectionData, CharsToUChars("\\u1109\\u1161\\u11bc")));
640 free(addElement2(characterSelectionData, CharsToUChars("\\u1112\\u1161\\u11bc")));
641 addElement(characterSelectionData, " ");
642 free(addElement2(characterSelectionData, CharsToUChars("\\u1112\\u1161\\u11ab")));
643 free(addElement2(characterSelectionData, CharsToUChars("\\u110b\\u1175\\u11ab")));
644 addElement(characterSelectionData, " ");
645 free(addElement2(characterSelectionData, CharsToUChars("\\u110b\\u1167\\u11ab")));
646 free(addElement2(characterSelectionData, CharsToUChars("\\u1112\\u1161\\u11b8")));
647 addElement(characterSelectionData, " ");
648 free(addElement2(characterSelectionData, CharsToUChars("\\u110c\\u1161\\u11bc")));
649 free(addElement2(characterSelectionData, CharsToUChars("\\u1105\\u1169")));
650 free(addElement2(characterSelectionData, CharsToUChars("\\u1100\\u116d")));
651 free(addElement2(characterSelectionData, CharsToUChars("\\u1112\\u116c")));
652
653 elems = Count(characterSelectionData);
654 log_verbose("In character: the no: of characters are %d", elems);
655 testCharacterText = createTestData(characterSelectionData, elems);
656 }
657
658 UChar* createTestData(Vector *select, int32_t e)
659 {
660 int32_t i, len;
661 UChar* result;
662 result=(UChar*)malloc(sizeof(UChar) * 2);
663 u_uastrcpy(result, "");
664 i=0;
665 while (i<e) {
666 len=u_strlen(result)+1;
667 result=(UChar*)realloc(result, sizeof(UChar) * (len + u_strlen(elementAt(select,i))));
668 u_strcat(result, elementAt(select,i));
669 i++;
670 }
671
672 return result;
673 }
674
675 /*---------------------------------------------
676 SentenceBreak tests
677 --------------------------------------------- */
678
679 void TestForwardSentenceSelection()
680 {
681 UErrorCode status = U_ZERO_ERROR;
682 UBreakIterator *e;
683 addTestSentenceData();
684 e = ubrk_open(UBRK_SENTENCE, "en_US", testSentenceText, u_strlen(testSentenceText), &status);
685 if(U_FAILURE(status)){
686 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
687 return;
688 }
689 /* sample(e, testSentenceText); */
690 log_verbose("Testing forward sentence selection.....\n");
691 doForwardSelectionTest(e, testSentenceText, sentenceSelectionData);
692 ubrk_close(e);
693 cleanupVector(sentenceSelectionData);
694 free(testSentenceText);
695 /*free(sentenceSelectionData);*/
696 }
697
698 void TestFirstSentenceSelection()
699 {
700 UErrorCode status = U_ZERO_ERROR;
701 UBreakIterator *e;
702 addTestSentenceData();
703 e = ubrk_open(UBRK_SENTENCE, "en_US", testSentenceText, u_strlen(testSentenceText), &status);
704 if(U_FAILURE(status)){
705 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
706 return;
707 }
708 log_verbose("Testing first sentence selection.....\n");
709 doFirstSelectionTest(e, testSentenceText, sentenceSelectionData);
710 ubrk_close(e);
711 cleanupVector(sentenceSelectionData);
712 free(testSentenceText);
713 /*free(sentenceSelectionData);*/
714 }
715
716 void TestLastSentenceSelection()
717 {
718 UErrorCode status = U_ZERO_ERROR;
719 UBreakIterator *e;
720 addTestSentenceData();
721 e = ubrk_open(UBRK_SENTENCE, "en_US", testSentenceText, u_strlen(testSentenceText), &status);
722 if(U_FAILURE(status)){
723 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
724 return;
725 }
726 log_verbose("Testing last sentence selection.....\n");
727 doLastSelectionTest(e, testSentenceText, sentenceSelectionData);
728 ubrk_close(e);
729 cleanupVector(sentenceSelectionData);
730 free(testSentenceText);
731 /*free(sentenceSelectionData);*/
732 }
733
734 void TestBackwardSentenceSelection()
735 {
736 UErrorCode status = U_ZERO_ERROR;
737 UBreakIterator *e;
738 addTestSentenceData();
739 e = ubrk_open(UBRK_SENTENCE, "en_US", testSentenceText, u_strlen(testSentenceText), &status);
740 if(U_FAILURE(status)){
741 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
742 return;
743 }
744 log_verbose("Testing backward sentence selection.....\n");
745 doBackwardSelectionTest(e, testSentenceText, sentenceSelectionData);
746 ubrk_close(e);
747 cleanupVector(sentenceSelectionData);
748 free(testSentenceText);
749 /*free(sentenceSelectionData);*/
750 }
751
752 void TestForwardSentenceIndexSelection()
753 {
754 UErrorCode status = U_ZERO_ERROR;
755 UBreakIterator *e;
756 addTestSentenceData();
757 e = ubrk_open(UBRK_SENTENCE, "en_US", testSentenceText, u_strlen(testSentenceText), &status);
758 if(U_FAILURE(status)){
759 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
760 return;
761 }
762 log_verbose("Testing sentence forward index selection.....\n");
763 doForwardIndexSelectionTest(e, testSentenceText, sentenceSelectionData);
764 ubrk_close(e);
765 cleanupVector(sentenceSelectionData);
766 free(testSentenceText);
767 /*free(sentenceSelectionData);*/
768 }
769
770 void TestBackwardSentenceIndexSelection()
771 {
772 UErrorCode status = U_ZERO_ERROR;
773 UBreakIterator *e;
774 addTestSentenceData();
775 e = ubrk_open(UBRK_SENTENCE, "en_US", testSentenceText, u_strlen(testSentenceText), &status);
776 if(U_FAILURE(status)){
777 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
778 return;
779 }
780 log_verbose("Testing sentence backward index selection.....\n");
781 doBackwardIndexSelectionTest(e, testSentenceText, sentenceSelectionData);
782 ubrk_close(e);
783 cleanupVector(sentenceSelectionData);
784 free(testSentenceText);
785 /*free(sentenceSelectionData);*/
786 }
787
788
789 void TestSentenceInvariants()
790 {
791 int x;
792 UChar *s;
793 UChar *tempStr;
794 AllocateTextBoundary();
795 x=u_strlen(cannedTestChars);
796 s=(UChar*)malloc(sizeof(UChar) * (x + 15));
797 u_strcpy(s, cannedTestChars);
798 tempStr = CharsToUChars(".,\\u3001\\u3002\\u3041\\u3042\\u3043\\ufeff");
799 u_strcat(s, tempStr);
800 free(tempStr);
801 log_verbose("Testing sentence Other invariants.....\n");
802 doOtherInvariantTest(UBRK_SENTENCE, s);
803 free(s);
804 FreeTextBoundary();
805 }
806
807 /*---------------------------------------------
808 WordBreak tests
809 --------------------------------------------- */
810
811 void TestForwardWordSelection()
812 {
813 UErrorCode status = U_ZERO_ERROR;
814 UBreakIterator *e;
815 addTestWordData();
816 e = ubrk_open(UBRK_WORD, "en_US", testWordText, u_strlen(testWordText), &status);
817 if(U_FAILURE(status)){
818 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
819 return;
820 }
821 /* sample(e, testWordText); */
822 log_verbose("Testing forward word selection.....\n");
823 doForwardSelectionTest(e, testWordText, wordSelectionData);
824 doForwardSelectionTest(e, testWordText, wordSelectionData);
825 ubrk_close(e);
826 cleanupVector(wordSelectionData);
827 free(testWordText);
828 /*free(wordSelectionData);*/
829 }
830
831 void TestFirstWordSelection()
832 {
833 UErrorCode status = U_ZERO_ERROR;
834 UBreakIterator *e;
835 addTestWordData();
836 e = ubrk_open(UBRK_WORD, "en_US", testWordText, u_strlen(testWordText), &status);
837 if(U_FAILURE(status)){
838 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
839 return;
840 }
841 log_verbose("Testing first word selection.....\n");
842 doFirstSelectionTest(e, testWordText, wordSelectionData);
843 ubrk_close(e);
844 cleanupVector(wordSelectionData);
845 free(testWordText);
846 /*free(wordSelectionData);*/
847 }
848
849 void TestLastWordSelection()
850 {
851 UErrorCode status = U_ZERO_ERROR;
852 UBreakIterator *e;
853 addTestWordData();
854 e = ubrk_open(UBRK_WORD, "en_US", testWordText, u_strlen(testWordText), &status);
855 if(U_FAILURE(status)){
856 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
857 return;
858 }
859 log_verbose("Testing last word selection.....\n");
860 doLastSelectionTest(e, testWordText, wordSelectionData);
861 ubrk_close(e);
862 cleanupVector(wordSelectionData);
863 free(testWordText);
864 /*free(wordSelectionData);*/
865 }
866
867 void TestBackwardWordSelection()
868 {
869 UErrorCode status = U_ZERO_ERROR;
870 UBreakIterator *e;
871 addTestWordData();
872 e = ubrk_open(UBRK_WORD, "en_US", testWordText, u_strlen(testWordText), &status);
873 if(U_FAILURE(status)){
874 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
875 return;
876 }
877 log_verbose("Testing backward word selection.....\n");
878 doBackwardSelectionTest(e, testWordText, wordSelectionData);
879 ubrk_close(e);
880 cleanupVector(wordSelectionData);
881 free(testWordText);
882 /*free(wordSelectionData);*/
883 }
884
885 void TestForwardWordIndexSelection()
886 {
887 UErrorCode status = U_ZERO_ERROR;
888 UBreakIterator *e;
889 addTestWordData();
890 e = ubrk_open(UBRK_WORD, "en_US", testWordText, u_strlen(testWordText), &status);
891 if(U_FAILURE(status)){
892 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
893 return;
894 }
895 log_verbose("Testing forward word index selection.....\n");
896 doForwardIndexSelectionTest(e, testWordText, wordSelectionData);
897 ubrk_close(e);
898 cleanupVector(wordSelectionData);
899 free(testWordText);
900 /*free(wordSelectionData);*/
901 }
902
903 void TestBackwardWordIndexSelection()
904 {
905 UErrorCode status = U_ZERO_ERROR;
906 UBreakIterator *e;
907 addTestWordData();
908 e = ubrk_open(UBRK_WORD, "en_US", testWordText, u_strlen(testWordText), &status);
909 if(U_FAILURE(status)){
910 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
911 return;
912 }
913 log_verbose("Testing backward word index selection.....\n");
914 doBackwardIndexSelectionTest(e, testWordText, wordSelectionData);
915 ubrk_close(e);
916 cleanupVector(wordSelectionData);
917 free(testWordText);
918 /*free(wordSelectionData);*/
919 }
920
921 void TestWordInvariants()
922 {
923 UChar *s;
924 UChar *tempStr;
925 int x;
926 AllocateTextBoundary();
927 x=u_strlen(cannedTestChars);
928 s=(UChar*)malloc(sizeof(UChar) * (x + 15));
929 u_strcpy(s, cannedTestChars);
930 tempStr = CharsToUChars("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02");
931 u_strcat(s, tempStr);
932 free(tempStr);
933 log_verbose("Testing word break invariant.....\n");
934 doBreakInvariantTest(UBRK_WORD, s);
935 u_strcpy(s, cannedTestChars);
936 tempStr = CharsToUChars("\',.\\u3041\\u3042\\u3043\\u309b\\u309c\\u30a1\\u30a2\\u30a3\\u4e00\\u4e01\\u4e02");
937 u_strcat(s, tempStr);
938 free(tempStr);
939 doOtherInvariantTest(UBRK_WORD, s);
940 free(s);
941 FreeTextBoundary();
942 }
943
944 /*---------------------------------------------
945 LineBreak tests
946 --------------------------------------------- */
947
948 void TestForwardLineSelection()
949 {
950 UErrorCode status = U_ZERO_ERROR;
951 UBreakIterator *e;
952 addTestLineData();
953 e = ubrk_open(UBRK_LINE, "en_US", testLineText, u_strlen(testLineText), &status);
954 if(U_FAILURE(status)){
955 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
956 return;
957 }
958 log_verbose("Testing forward line selection.....\n");
959 doForwardSelectionTest(e, testLineText, lineSelectionData);
960 ubrk_close(e);
961 cleanupVector(lineSelectionData);
962 free(testLineText);
963 }
964
965 void TestFirstLineSelection()
966 {
967 UErrorCode status = U_ZERO_ERROR;
968 UBreakIterator *e;
969 addTestLineData();
970 e = ubrk_open(UBRK_LINE, "en_US", testLineText, u_strlen(testLineText), &status);
971 if(U_FAILURE(status)){
972 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
973 return;
974 }
975 log_verbose("Testing first line selection.....\n");
976 doFirstSelectionTest(e, testLineText, lineSelectionData);
977 ubrk_close(e);
978 cleanupVector(lineSelectionData);
979 free(testLineText);
980 /*free(lineSelectionData);*/
981 }
982
983 void TestLastLineSelection()
984 {
985 UErrorCode status = U_ZERO_ERROR;
986 UBreakIterator *e;
987 addTestLineData();
988 e = ubrk_open(UBRK_LINE, "en_US", testLineText, u_strlen(testLineText), &status);
989 if(U_FAILURE(status)){
990 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
991 return;
992 }
993 log_verbose("Testing last line selection.....\n");
994 doLastSelectionTest(e, testLineText, lineSelectionData);
995 ubrk_close(e);
996 cleanupVector(lineSelectionData);
997 free(testLineText);
998 /*free(lineSelectionData);*/
999 }
1000
1001 void TestBackwardLineSelection()
1002 {
1003 UErrorCode status = U_ZERO_ERROR;
1004 UBreakIterator *e;
1005 addTestLineData();
1006 e = ubrk_open(UBRK_LINE, "en_US", testLineText, u_strlen(testLineText), &status);
1007 if(U_FAILURE(status)){
1008 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1009 return;
1010 }
1011 log_verbose("Testing backward line selection.....\n");
1012 doBackwardSelectionTest(e, testLineText, lineSelectionData);
1013 ubrk_close(e);
1014 cleanupVector(lineSelectionData);
1015 free(testLineText);
1016 /*free(lineSelectionData);*/
1017 }
1018
1019 void TestForwardLineIndexSelection()
1020 {
1021 UErrorCode status = U_ZERO_ERROR;
1022 UBreakIterator *e;
1023 addTestLineData();
1024 e = ubrk_open(UBRK_LINE, "en_US", testLineText, u_strlen(testLineText), &status);
1025 if(U_FAILURE(status)){
1026 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1027 return;
1028 }
1029 log_verbose("Testing forward line index selection.....\n");
1030 doForwardIndexSelectionTest(e, testLineText, lineSelectionData);
1031 ubrk_close(e);
1032 cleanupVector(lineSelectionData);
1033 free(testLineText);
1034 /*free(lineSelectionData);*/
1035 }
1036
1037 void TestBackwardLineIndexSelection()
1038 {
1039 UErrorCode status = U_ZERO_ERROR;
1040 UBreakIterator *e;
1041 addTestLineData();
1042 e = ubrk_open(UBRK_LINE, "en_US", testLineText, u_strlen(testLineText), &status);
1043 if(U_FAILURE(status)){
1044 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1045 return;
1046 }
1047 log_verbose("Testing backward line index selection.....\n");
1048 doBackwardIndexSelectionTest(e, testLineText, lineSelectionData);
1049 ubrk_close(e);
1050 cleanupVector(lineSelectionData);
1051 free(testLineText);
1052 /*free(lineSelectionData);*/
1053 }
1054
1055 void TestLineInvariants()
1056 {
1057 int errorCount,l;
1058 int32_t i, j, k;
1059 UChar c;
1060 UBreakIterator *e;
1061 UErrorCode status = U_ZERO_ERROR;
1062 UChar noBreak[10], dashes[10];
1063 UBool saw2;
1064 UChar work[5];
1065 UChar *s, *ustr;
1066 int32_t sLen;
1067
1068 AllocateTextBoundary();
1069 s=(UChar*)malloc(sizeof(UChar) * (u_strlen(cannedTestChars) + 20));
1070 u_strcpy(s, cannedTestChars);
1071 ustr = CharsToUChars(".,;:\\u3001\\u3002\\u3041\\u3042\\u3043\\u3044\\u3045\\u30a3\\u4e00\\u4e01\\u4e02");
1072 u_strcat(s, ustr);
1073 free(ustr);
1074 log_verbose("Testing line break Invariant.....\n");
1075 doBreakInvariantTest(UBRK_LINE, s);
1076 log_verbose("Testing line other Invariant....\n");
1077 doOtherInvariantTest(UBRK_LINE, s);
1078
1079
1080
1081 u_uastrcpy(work, "aaa");
1082 /* in addition to the other invariants, a line-break iterator should make sure that:
1083 it doesn't break around the non-breaking characters */
1084 e = ubrk_open(UBRK_LINE, "en_US", work, u_strlen(work), &status);
1085 errorCount=0;
1086 status=U_ZERO_ERROR;
1087 ustr = CharsToUChars("\\u00a0\\u2007\\u2011\\ufeff");
1088 u_strcpy(noBreak, ustr);
1089 free(ustr);
1090 sLen = u_strlen(s);
1091 for (i = 0; i < sLen; i++) {
1092 c = s[i];
1093 if (c == '\r' || c == '\n' || c == 0x2029 || c == 0x2028 || c == 0x0003)
1094 continue;
1095 work[0] = c;
1096 for (j = 0; j < u_strlen(noBreak); j++) {
1097 work[1] = noBreak[j];
1098 for (k = 0; k < sLen; k++) {
1099 work[2] = s[k];
1100 ubrk_setText(e, work, u_strlen(work), &status);
1101 if(U_FAILURE(status)){
1102 log_err("FAIL: Error in opening the word break Iterator in testLineInvaiants:\n %s\n", myErrorName(status));
1103 return;
1104 }
1105 for (l = ubrk_first(e); l != UBRK_DONE; l = ubrk_next(e))
1106 if (l == 1 || l == 2) {
1107 log_err("Got break between U+%s and U+%s\n", austrdup(UCharToUCharArray(work[l - 1])),
1108 austrdup(UCharToUCharArray(work[l])) );
1109
1110 errorCount++;
1111 if (errorCount >= 75)
1112 return;
1113 }
1114 }
1115 }
1116 }
1117
1118 /* it does break after hyphens (unless they're followed by a digit, a non-spacing mark,
1119 a currency symbol, a non-breaking space, or a line or paragraph separator) */
1120 ustr = CharsToUChars("-\\u00ad\\u2010\\u2012\\u2013\\u2014");
1121 u_strcpy(dashes, ustr);
1122 free(ustr);
1123
1124 for (i = 0; i < sLen; i++) {
1125 work[0] = s[i];
1126 for (j = 0; j < u_strlen(dashes); j++) {
1127 work[1] = dashes[j];
1128 for (k = 0; k < sLen; k++) {
1129 c = s[k];
1130 if (u_charType(c) == U_DECIMAL_DIGIT_NUMBER ||
1131 u_charType(c) == U_OTHER_NUMBER ||
1132 u_charType(c) == U_NON_SPACING_MARK ||
1133 u_charType(c) == U_ENCLOSING_MARK ||
1134 u_charType(c) == U_CURRENCY_SYMBOL ||
1135 u_charType(c) == U_SPACE_SEPARATOR ||
1136 u_charType(c) == U_DASH_PUNCTUATION ||
1137 u_charType(c) == U_CONTROL_CHAR ||
1138 u_charType(c) == U_FORMAT_CHAR ||
1139 c == '\n' || c == '\r' || c == 0x2028 || c == 0x2029 ||
1140 c == 0x0003 || c == 0x00a0 || c == 0x2007 || c == 0x2011 ||
1141 c == 0xfeff)
1142 continue;
1143 work[2] = c;
1144 ubrk_setText(e, work, u_strlen(work), &status);
1145 if(U_FAILURE(status)){
1146 log_err("FAIL: Error in setting text on the word break Iterator in testLineInvaiants:\n %s \n", myErrorName(status));
1147 return;
1148 }
1149 saw2 = FALSE;
1150 for (l = ubrk_first(e); l != UBRK_DONE; l = ubrk_next(e))
1151 if (l == 2)
1152 saw2 = TRUE;
1153 if (!saw2) {
1154 log_err("Didn't get break between U+%s and U+%s\n", austrdup(UCharToUCharArray(work[1])),
1155 austrdup(UCharToUCharArray(work[2])) );
1156 errorCount++;
1157 if (errorCount >= 75)
1158 return;
1159 }
1160 }
1161 }
1162 }
1163 ubrk_close(e);
1164 free(s);
1165 FreeTextBoundary();
1166 }
1167 /*---------------------------------------------
1168 CharacterBreak tests
1169 --------------------------------------------- */
1170
1171 void TestForwardCharacterSelection()
1172 {
1173 UErrorCode status = U_ZERO_ERROR;
1174 UBreakIterator *e;
1175 addTestCharacterData();
1176 e = ubrk_open(UBRK_CHARACTER, "en_US", testCharacterText, u_strlen(testCharacterText), &status);
1177 if(U_FAILURE(status)){
1178 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1179 return;
1180 }
1181 log_verbose("Testing forward character selection.....\n");
1182 doForwardSelectionTest(e, testCharacterText, characterSelectionData);
1183 ubrk_close(e);
1184 cleanupVector(characterSelectionData);
1185 free(testCharacterText);
1186 /*free(characterSelectionData);*/
1187 }
1188
1189 void TestFirstCharacterSelection()
1190 {
1191 UErrorCode status = U_ZERO_ERROR;
1192 UBreakIterator *e;
1193 addTestCharacterData();
1194 e = ubrk_open(UBRK_CHARACTER, "en_US", testCharacterText, u_strlen(testCharacterText), &status);
1195 if(U_FAILURE(status)){
1196 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1197 return;
1198 }
1199 log_verbose("Testing first character selection.....\n");
1200 doFirstSelectionTest(e, testCharacterText, characterSelectionData);
1201 ubrk_close(e);
1202 cleanupVector(characterSelectionData);
1203 free(testCharacterText);
1204 /*free(characterSelectionData);*/
1205 }
1206
1207 void TestLastCharacterSelection()
1208 {
1209 UErrorCode status = U_ZERO_ERROR;
1210 UBreakIterator *e;
1211 addTestCharacterData();
1212 e = ubrk_open(UBRK_CHARACTER, "en_US", testCharacterText, u_strlen(testCharacterText), &status);
1213 if(U_FAILURE(status)){
1214 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1215 return;
1216 }
1217 log_verbose("Testing last character selection.....\n");
1218 doLastSelectionTest(e, testCharacterText, characterSelectionData);
1219 ubrk_close(e);
1220 cleanupVector(characterSelectionData);
1221 free(testCharacterText);
1222 /*free(characterSelectionData);*/
1223 }
1224
1225 void TestBackwardCharacterSelection()
1226 {
1227 UErrorCode status = U_ZERO_ERROR;
1228 UBreakIterator *e;
1229 addTestCharacterData();
1230 e = ubrk_open(UBRK_CHARACTER, "en_US", testCharacterText, u_strlen(testCharacterText), &status);
1231 if(U_FAILURE(status)){
1232 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1233 return;
1234 }
1235 log_verbose("Testing backward character selection.....\n");
1236 doBackwardSelectionTest(e, testCharacterText, characterSelectionData);
1237 ubrk_close(e);
1238 cleanupVector(characterSelectionData);
1239 free(testCharacterText);
1240 /*free(characterSelectionData);*/
1241 }
1242
1243 void TestForwardCharacterIndexSelection()
1244 {
1245 UErrorCode status = U_ZERO_ERROR;
1246 UBreakIterator *e;
1247 addTestCharacterData();
1248 e = ubrk_open(UBRK_CHARACTER, "en_US", testCharacterText, u_strlen(testCharacterText), &status);
1249 if(U_FAILURE(status)){
1250 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1251 return;
1252 }
1253 log_verbose("Testing forward index character selection.....\n");
1254 doForwardIndexSelectionTest(e, testCharacterText, characterSelectionData);
1255 ubrk_close(e);
1256 cleanupVector(characterSelectionData);
1257 free(testCharacterText);
1258 /*free(characterSelectionData);*/
1259 }
1260
1261 void TestBackwardCharacterIndexSelection()
1262 {
1263 UErrorCode status = U_ZERO_ERROR;
1264 UBreakIterator *e;
1265 addTestCharacterData();
1266 e = ubrk_open(UBRK_CHARACTER, "en_US", testCharacterText, u_strlen(testCharacterText), &status);
1267 if(U_FAILURE(status)){
1268 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1269 return;
1270 }
1271 log_verbose("Testing backward character index selection.....\n");
1272 doBackwardSelectionTest(e, testCharacterText, characterSelectionData);
1273 ubrk_close(e);
1274 cleanupVector(characterSelectionData);
1275 free(testCharacterText);
1276 /*free(characterSelectionData);*/
1277 }
1278
1279 void TestCharacterInvariants()
1280 {
1281 UChar *s;
1282 UChar *tempStr;
1283
1284 AllocateTextBoundary();
1285 s=(UChar*)malloc(sizeof(UChar) * (u_strlen(cannedTestChars) + 15));
1286 u_strcpy(s, cannedTestChars);
1287 tempStr = CharsToUChars("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa");
1288 u_strcat(s, tempStr);
1289 free(tempStr);
1290 log_verbose("Testing character break invariant.....\n");
1291 doBreakInvariantTest(UBRK_CHARACTER, s);
1292 u_strcpy(s, cannedTestChars);
1293 tempStr = CharsToUChars("\\u1100\\u1101\\u1102\\u1160\\u1161\\u1162\\u11a8\\u11a9\\u11aa");
1294 u_strcat(s, tempStr);
1295 free(tempStr);
1296 log_verbose("Testing character other invariant.....\n");
1297 doOtherInvariantTest(UBRK_CHARACTER, s);
1298 free(s);
1299 FreeTextBoundary();
1300 }
1301 /*---------------------------------------------
1302 other tests
1303 --------------------------------------------- */
1304
1305
1306 void TestPreceding()
1307 {
1308 int32_t p1, p2, p3, p4, f, p;
1309 UBreakIterator *e;
1310 UChar words3[15];
1311 UErrorCode status = U_ZERO_ERROR;
1312 u_uastrcpy(words3, "aaa bbb ccc");
1313 log_verbose("Testting preceding...\n");
1314 e = ubrk_open(UBRK_WORD, "en_US", words3, u_strlen(words3), &status);
1315 if(U_FAILURE(status)){
1316 log_err("FAIL: Error in ubrk_open() for word breakiterator: %s\n", myErrorName(status));
1317 }
1318
1319 ubrk_first(e);
1320 p1 = ubrk_next(e);
1321 p2 = ubrk_next(e);
1322 p3 = ubrk_next(e);
1323 p4 = ubrk_next(e);
1324 f = ubrk_following(e, p2+1);
1325 p = ubrk_preceding(e, p2+1);
1326
1327 if (f!=p3) log_err("Error in TestPreceding: %d!=%d\n", (int32_t)f, (int32_t)p3);
1328 if (p!=p2) log_err("Error in TestPreceding: %d!=%d\n", (int32_t)p, (int32_t)p2);
1329
1330 ubrk_close(e);
1331 }
1332
1333 /**
1334 * @bug 4068137
1335 */
1336 void TestEndBehaviour()
1337 {
1338 int32_t end, previous;
1339 UErrorCode status = U_ZERO_ERROR;
1340 UBreakIterator* wb;
1341 UChar testString[5];
1342 u_uastrcpy(testString, "boo");
1343 log_verbose("Testing end behaviour\n");
1344 wb = ubrk_open(UBRK_WORD, "en_US", testString, u_strlen(testString), &status);
1345 if(U_FAILURE(status)){
1346 log_err("FAIL: Error in opening the word break Iterator: %s\n", myErrorName(status));
1347 return;
1348 }
1349
1350
1351 end=ubrk_last(wb);
1352 previous=ubrk_previous(wb);
1353 log_verbose("end= %d and previous=%d %d\n", end, previous, ubrk_previous(wb));
1354
1355
1356 ubrk_close(wb);
1357 }
1358
1359
1360 /*---------------------------------------------
1361 Test implementation routines
1362 --------------------------------------------- */
1363
1364 void doForwardSelectionTest(UBreakIterator* iterator, UChar* testText, Vector* result)
1365 {
1366 UChar *expectedResult, *selectionResult;
1367 int32_t lastOffset, offset;
1368 int32_t forwardSelectionCounter = 0;
1369 int32_t forwardSelectionOffset = 0;
1370
1371 log_verbose("doForwardSelectionTest text of length: %d\n", u_strlen(testText));
1372
1373
1374 lastOffset = ubrk_first(iterator);
1375 offset = ubrk_next(iterator);
1376 while(offset!=UBRK_DONE && forwardSelectionCounter < Count(result)) {
1377
1378 if (offset != ubrk_current(iterator)){
1379 log_err("current() failed: it returned %d and offset was %d\n", ubrk_current(iterator), offset);
1380 }
1381 expectedResult =elementAt(result, forwardSelectionCounter);
1382 forwardSelectionOffset += u_strlen(expectedResult);
1383
1384 selectionResult=extractBetween(lastOffset, offset, testText);
1385 if (offset != forwardSelectionOffset) {
1386 log_err("\n*** Selection #%d\n expected : %s - length %d\n\rselected : %s - length %d\n",
1387 forwardSelectionCounter, austrdup(expectedResult), u_strlen(expectedResult),
1388 austrdup(selectionResult), u_strlen(selectionResult) );
1389 }
1390 log_verbose("#%d [\"%d\",\"%d\"] : %s\n", forwardSelectionCounter, lastOffset, offset,
1391 austrdup(selectionResult));
1392
1393 forwardSelectionCounter++;
1394 lastOffset = offset;
1395 offset = ubrk_next(iterator);
1396 free(selectionResult);
1397 }
1398 if (forwardSelectionCounter < Count(result) - 1){
1399 log_err("\n*** Selection #%d not found at offset %d !!!\n", forwardSelectionCounter, offset);
1400 }
1401 else if (forwardSelectionCounter >= Count(result) && offset != UBRK_DONE){
1402 log_err("\n*** Selection #%d should not exist at offset %d !!!\n", forwardSelectionCounter, offset);
1403 }
1404 }
1405 void doBackwardSelectionTest(UBreakIterator* iterator, UChar* testText, Vector* result)
1406 {
1407 UChar* expectedResult;
1408 UChar* selectionResult;
1409 int32_t backwardSelectionCounter, neededOffset, lastOffset, offset;
1410 backwardSelectionCounter = (Count(result) - 1);
1411 neededOffset = u_strlen(testText);
1412 lastOffset = ubrk_last(iterator);
1413 offset = ubrk_previous(iterator);
1414
1415 log_verbose("doBackwardSelectionTest text of length: %d\n", u_strlen(testText));
1416 while(offset != UBRK_DONE)
1417 {
1418 expectedResult = elementAt(result, backwardSelectionCounter);
1419 neededOffset -= u_strlen(expectedResult);
1420 selectionResult=extractBetween(offset, lastOffset, testText);
1421 if(offset != neededOffset) {
1422 log_err("\n*** Selection #%d\nExpected : %d > %s < \n\rSelected : %d > %s < \n",
1423 backwardSelectionCounter, neededOffset, austrdup(expectedResult),
1424 offset, austrdup(selectionResult) );
1425 }
1426
1427 log_verbose("#%d : %s\n", backwardSelectionCounter, selectionResult);
1428 backwardSelectionCounter--;
1429 lastOffset = offset;
1430 offset = ubrk_previous(iterator);
1431 free(selectionResult);
1432 }
1433 if (backwardSelectionCounter >= 0 && offset != UBRK_DONE){
1434 log_err("*** Selection #%d not found!!!\n", backwardSelectionCounter);
1435 }
1436 }
1437
1438 void doFirstSelectionTest(UBreakIterator* iterator, UChar* testText, Vector* result)
1439 {
1440 int32_t selectionStart, selectionEnd;
1441 UChar* expectedFirstSelection=NULL;
1442 UChar* tempFirst = NULL;
1443 UBool success = TRUE;
1444
1445 log_verbose("doFirstSelectionTest.......\n");
1446
1447 selectionStart = ubrk_first(iterator);
1448 selectionEnd = ubrk_next(iterator);
1449 if(selectionEnd != UBRK_DONE) {
1450
1451 tempFirst=extractBetween(selectionStart, selectionEnd, testText);
1452 expectedFirstSelection = elementAt(result,0);
1453
1454 if(u_strcmp(tempFirst,expectedFirstSelection)!=0) {
1455 log_err("\n### Error in doFirstSelectionTest. First selection not equal to what expected\n");
1456 log_err("Expected: %s - length %d\n\rSelected: %s - length %d\n",
1457 austrdup(expectedFirstSelection), u_strlen(expectedFirstSelection),
1458 austrdup(tempFirst), u_strlen(tempFirst));
1459 success = FALSE;
1460 }
1461 }
1462 else if (selectionStart != 0 || u_strlen(testText)!= 0) {
1463 log_err("\n### Error in doFirstSelectionTest. Could not get first selection.\n\r start= %d end= %d\n",
1464 selectionStart, selectionEnd);
1465 success = FALSE;
1466 }
1467
1468 if(success) {
1469 log_verbose("doFirstSelectionTest\n\nExpexcted first selection: %s\nCalculated first selection: %s is correct\n",
1470 austrdup(expectedFirstSelection), austrdup(tempFirst) );
1471
1472 }
1473 if(tempFirst!= NULL) {
1474 free(tempFirst);
1475 }
1476
1477 }
1478
1479 void doLastSelectionTest(UBreakIterator* iterator, UChar* testText, Vector* result)
1480 {
1481 int32_t selectionEnd, selectionStart;
1482 UChar *expectedLastSelection=NULL;
1483 UChar *tempLast = NULL;
1484 UBool success = TRUE;
1485
1486 log_verbose("doLastSelectionTest.......\n");
1487
1488 selectionEnd = ubrk_last(iterator);
1489 selectionStart = ubrk_previous(iterator);
1490
1491
1492 if(selectionStart != UBRK_DONE) {
1493 tempLast=extractBetween(selectionStart, selectionEnd, testText);
1494 expectedLastSelection = elementAt(result,Count(result)-1);
1495 if(u_strcmp(tempLast,expectedLastSelection)!=0) {
1496 log_err("\n\n### Error in doLastSelectionTest. Last selection not equal to what expected.\n");
1497 log_err("Expected: %s - length %d\n\r Selected: %s - length %d\n",
1498 austrdup(expectedLastSelection), u_strlen(expectedLastSelection),
1499 austrdup(tempLast), u_strlen(tempLast) );
1500 success = FALSE;
1501
1502 }
1503 }
1504 else if (selectionEnd != 0 || u_strlen(testText)!= 0) {
1505 log_err("\n### Error in doLastSelectionTest. Could not get last selection. [%d,%d]\n", selectionStart,
1506 selectionEnd);
1507 success = FALSE;
1508 }
1509 if(success) {
1510 log_verbose("doLastSelectionTest\n\nExpected Last selection: %s \n", austrdup(expectedLastSelection));
1511 log_verbose("Calculated last Selection: %s is correct\n", austrdup(tempLast) );
1512 }
1513
1514 if(tempLast!=NULL) {
1515 free(tempLast);
1516 }
1517 }
1518
1519 /**
1520 * @bug 4052418 4068139
1521 */
1522 void doForwardIndexSelectionTest(UBreakIterator* iterator, UChar* testText, Vector* result)
1523 {
1524 int32_t arrayCount, textLength;
1525 int32_t selBegin, selEnd, current, entry, pos;
1526 int32_t offset;
1527
1528 log_verbose("doForwardIndexSelectionTest text of length: %d\n", u_strlen(testText));
1529 arrayCount = Count(result);
1530 textLength = u_strlen(testText);
1531
1532 for(offset = 0; offset < textLength; offset++) {
1533 selBegin = ubrk_preceding(iterator, offset);
1534 selEnd = ubrk_following(iterator, offset);
1535
1536 entry = 0;
1537 pos = 0;
1538 if (selBegin != UBRK_DONE) {
1539 while (pos < selBegin && entry < arrayCount) {
1540 pos += u_strlen(elementAt(result, entry));
1541 ++entry;
1542 }
1543 if (pos != selBegin) {
1544 log_err("With offset = %d, got back spurious %d from preceding\n", offset, selBegin);
1545 continue;
1546 }
1547 else {
1548 pos += u_strlen(elementAt(result, entry));
1549 ++entry;
1550 }
1551 }
1552 current=ubrk_current(iterator);
1553 if(pos==current){
1554 if (pos != selEnd) {
1555 log_err("With offset = %d, got back erroneous %d from follwoing\n", offset, selEnd);
1556 continue;
1557 }
1558 }
1559 }
1560 }
1561
1562 /**
1563 * @bug 4052418 4068139
1564 */
1565 void doBackwardIndexSelectionTest(UBreakIterator* iterator, UChar* testText, Vector* result)
1566 {
1567 int32_t arrayCount, textLength;
1568 int32_t selBegin, selEnd, current, entry, pos;
1569 int32_t offset;
1570
1571 log_verbose("doBackwardIndexSelectionTest text of length: %d\n", u_strlen(testText));
1572 arrayCount = Count(result);
1573 textLength = u_strlen(testText);
1574
1575 for(offset = textLength-1; offset >= 0; offset--) {
1576 selBegin = ubrk_preceding(iterator, offset);
1577 selEnd = ubrk_following(iterator, offset);
1578
1579 entry = 0;
1580 pos = 0;
1581 if (selBegin != UBRK_DONE) {
1582 while (pos < selBegin && entry < arrayCount) {
1583 pos += u_strlen(elementAt(result, entry));
1584 ++entry;
1585 }
1586 if (pos != selBegin) {
1587 log_err("With offset = %d, got back spurious %d from preceding\n", offset, selBegin);
1588 continue;
1589 }
1590 else {
1591 pos += u_strlen(elementAt(result, entry));
1592 ++entry;
1593 }
1594 }
1595 current=ubrk_current(iterator);
1596 if(pos==current){
1597 if (pos != selEnd) {
1598 log_err("With offset = %d, got back erroneous %d from following\n", offset, selEnd);
1599 continue;
1600 }
1601 }
1602 }
1603 }
1604
1605
1606
1607 void doBreakInvariantTest(UBreakIteratorType type, UChar* testChars)
1608 {
1609 int l,k;
1610 UBreakIterator *tb;
1611 int32_t i, j;
1612 UErrorCode status = U_ZERO_ERROR;
1613 UChar work[4];
1614 UChar breaks[10];
1615 UChar c;
1616 UChar *ustr;
1617 UBool seen2;
1618 int errorCount = 0;
1619 status=U_ZERO_ERROR;
1620
1621 u_uastrcpy(work, "aaa");
1622
1623 log_verbose("doBreakInvariantTest text of length: %d\n", u_strlen(testChars));
1624 /* a break should always occur after CR (unless followed by LF), LF, PS, and LS */
1625
1626 ustr = CharsToUChars("\r\n\\u2029\\u2028");
1627 u_strcpy(breaks, ustr);
1628 free(ustr);
1629
1630 tb = ubrk_open(type, "en_US", work, u_strlen(work), &status);
1631
1632 for (i = 0; i < u_strlen(breaks); i++) {
1633 work[1] = breaks[i];
1634 for (j = 0; j < u_strlen(testChars); j++) {
1635 work[0] = testChars[j];
1636 for (k = 0; k < u_strlen(testChars); k++) {
1637 c = testChars[k];
1638
1639 /* if a cr is followed by lf, ps, ls or etx, don't do the check (that's
1640 not supposed to work) */
1641 if (work[1] == '\r' && (c == '\n' || c == 0x2029
1642 || c == 0x2028 || c == 0x0003))
1643 continue;
1644
1645 work[2] = testChars[k];
1646 ubrk_setText(tb, work, u_strlen(work), &status);
1647 if(U_FAILURE(status)){
1648 log_err("ERROR in opening the breakIterator in doVariant Function: %s\n", myErrorName(status));
1649 }
1650 seen2 = FALSE;
1651 for (l = ubrk_first(tb); l != UBRK_DONE; l = ubrk_next(tb)) {
1652 if (l == 2)
1653 seen2 = TRUE;
1654 }
1655 if (!seen2) {
1656 log_err("No break between U+%s and U+%s\n", austrdup(UCharToUCharArray(work[1])),
1657 austrdup(UCharToUCharArray(work[2])) );
1658 errorCount++;
1659 if (errorCount >= 75)
1660 return;
1661 }
1662 }
1663 }
1664 }
1665 ubrk_close(tb);
1666 }
1667
1668 void doOtherInvariantTest(UBreakIteratorType type , UChar* testChars)
1669 {
1670 int32_t k;
1671 UBreakIterator *tb;
1672 int32_t i, j;
1673 UErrorCode status = U_ZERO_ERROR;
1674 UChar work[5];
1675 UChar c;
1676 int32_t errorCount = 0;
1677 status=U_ZERO_ERROR;
1678
1679 u_uastrcpy(work, "a\r\na");
1680
1681 log_verbose("doOtherInvariantTest text of length: %d\n", u_strlen(testChars));
1682
1683 tb = ubrk_open(type, "en_us", work, u_strlen(work), &status);
1684
1685 /* a break should never occur between CR and LF */
1686 for (i = 0; i < u_strlen(testChars); i++) {
1687 work[0] = testChars[i];
1688 for (j = 0; j < u_strlen(testChars); j++) {
1689 work[3] = testChars[j];
1690 ubrk_setText(tb, work, u_strlen(work), &status);
1691 if(U_FAILURE(status)){
1692 log_err("ERROR in opening the breakIterator in doVariant Function: %s\n", myErrorName(status));
1693 }
1694 for ( k = ubrk_first(tb); k != UBRK_DONE; k = ubrk_next(tb))
1695 if (k == 2) {
1696 log_err("Break between CR and LF in string U+%s, U+d U+a U+%s\n",
1697 austrdup(UCharToUCharArray(work[0])), austrdup(UCharToUCharArray(work[3])) );
1698 errorCount++;
1699 if (errorCount >= 75)
1700 return;
1701 }
1702 }
1703 }
1704
1705 /* a break should never occur before a non-spacing mark, unless the preceding
1706 character is CR, LF, PS, or LS */
1707 u_uastrcpy(work,"aaaa");
1708 for (i = 0; i < u_strlen(testChars); i++) {
1709 c = testChars[i];
1710 if (c == '\n' || c == '\r' || c == 0x2029 || c == 0x2028 || c == 0x0003)
1711 continue;
1712 work[1] = c;
1713 for (j = 0; j < u_strlen(testChars); j++) {
1714 c = testChars[j];
1715 if ((u_charType(c) != U_NON_SPACING_MARK) &&
1716 (u_charType(c) != U_ENCLOSING_MARK))
1717 continue;
1718 work[2] = c;
1719 ubrk_setText(tb, work, u_strlen(work), &status);
1720 if(U_FAILURE(status)){
1721 log_err("ERROR in opening the breakIterator in doOtherVariant Function %s\n", myErrorName(status));
1722 }
1723 for (k = ubrk_first(tb); k != UBRK_DONE; k = ubrk_next(tb))
1724 if (k == 2) {
1725 log_err("Break between U+%s and U+%s\n", austrdup(UCharToUCharArray(work[1])),
1726 austrdup(UCharToUCharArray(work[2])) );
1727 errorCount++;
1728 if (errorCount >= 75)
1729 return;
1730 }
1731 }
1732 }
1733 ubrk_close(tb);
1734 }
1735
1736 void sample(UBreakIterator* tb, UChar* text)
1737 {
1738
1739 int32_t start, end;
1740 UChar* substring;
1741 log_verbose("-------------------------\n");
1742 log_verbose("%s of length %d\n", austrdup(text), u_strlen(text));
1743
1744 start = ubrk_first(tb);
1745 for (end = ubrk_next(tb); end != UBRK_DONE; end = ubrk_next(tb)) {
1746 substring=extractBetween(start, end, text);
1747 log_err("[%d,%d] \"%s\" \n", start, end, austrdup(substring) );
1748 start = end;
1749 free(substring);
1750 }
1751
1752 }
1753
1754 void addBrkIterRegrTest(TestNode** root);
1755
1756 void addBrkIterRegrTest(TestNode** root)
1757 {
1758
1759 #if 0
1760 /* These tests are removed becaue
1761 * 1. The test data is completely redundant with that in the C++ break iterator tests
1762 * 2. The data here is stale, and I don't want to copy all of the changes from the C++ tests, and
1763 * 3. The C API is covered by the API tests.
1764 */
1765
1766 addTest(root, &TestForwardWordSelection, "tstxtbd/cregrtst/TestForwardWordSelection" );
1767 addTest(root, &TestBackwardWordSelection, "tstxtbd/cregrtst/TestBackwardWordSelection" );
1768 addTest(root, &TestFirstWordSelection, "tstxtbd/cregrtst/TestFirstWordSelection" );
1769 addTest(root, &TestLastWordSelection, "tstxtbd/cregrtst/TestLastWordSelection" );
1770 addTest(root, &TestForwardWordIndexSelection, "tstxtbd/cregrtst/TestForwardWordIndexSelection");
1771 addTest(root, &TestBackwardWordIndexSelection, "tstxtbd/cregrtst/TestBackwardWordIndexSelection");
1772 addTest(root, &TestForwardSentenceSelection, "tstxtbd/cregrtst/TestForwardSentenceSelection");
1773 addTest(root, &TestBackwardSentenceSelection, "tstxtbd/cregrtst/TestBackwardSentenceSelection");
1774 addTest(root, &TestFirstSentenceSelection, "tstxtbd/cregrtst/TestFirstSentenceSelection");
1775 addTest(root, &TestLastSentenceSelection, "tstxtbd/cregrtst/TestLastSentenceSelection");
1776 addTest(root, &TestForwardSentenceIndexSelection, "tstxtbd/cregrtst/TestForwardSentenceIndexSelection");
1777 addTest(root, &TestBackwardSentenceIndexSelection, "tstxtbd/cregrtst/TestBackwardSentenceIndexSelection");
1778
1779 addTest(root, &TestForwardLineSelection, "tstxtbd/cregrtst/TestForwardLineSelection");
1780 addTest(root, &TestBackwardLineSelection, "tstxtbd/cregrtst/TestBackwardLineSelection");
1781 addTest(root, &TestFirstLineSelection, "tstxtbd/cregrtst/TestFirstLineSelection");
1782 addTest(root, &TestLastLineSelection, "tstxtbd/cregrtst/TestLastLineSelection");
1783 addTest(root, &TestForwardLineIndexSelection, "tstxtbd/cregrtst/TestForwardLineIndexSelection");
1784 addTest(root, &TestBackwardLineIndexSelection, "tstxtbd/cregrtst/TestBackwardLineIndexSelection");
1785
1786 addTest(root, &TestForwardCharacterSelection, "tstxtbd/cregrtst/TestForwardCharacterSelection");
1787 addTest(root, &TestBackwardCharacterSelection, "tstxtbd/cregrtst/TestBackwardCharacterSelection");
1788 addTest(root, &TestFirstCharacterSelection, "tstxtbd/cregrtst/TestFirstCharacterSelection");
1789 addTest(root, &TestLastCharacterSelection, "tstxtbd/cregrtst/TestLastCharacterSelection");
1790 addTest(root, &TestForwardCharacterIndexSelection, "tstxtbd/cregrtst/TestForwardCharacterIndexSelection");
1791 addTest(root, &TestBackwardCharacterIndexSelection, "tstxtbd/cregrtst/TestBackwardCharacterIndexSelection");
1792
1793 addTest(root, &TestPreceding, "tstxtbd/cregrtst/TestPreceding");
1794 addTest(root, &TestEndBehaviour, "tstxtbd/cregrtst/TestEndBehaviour");
1795
1796 addTest(root, &TestWordInvariants, "tstxtbd/cregrtst/TestWordInvariants");
1797 addTest(root, &TestSentenceInvariants, "tstxtbd/cregrtst/TestSentenceInvariants");
1798 addTest(root, &TestCharacterInvariants, "tstxtbd/cregrtst/TestCharacterInvariants");
1799 addTest(root, &TestLineInvariants, "tstxtbd/cregrtst/TestLineInvariants");
1800 #endif
1801
1802 }
1803
1804 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */