]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/reapits.c
ICU-6.2.14.tar.gz
[apple/icu.git] / icuSources / test / cintltst / reapits.c
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
7 *
8 * File reapits.c
9 *
10 *********************************************************************************/
11 /*C API TEST FOR Regular Expressions */
12 /**
13 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
14 * try to test the full functionality. It just calls each function and verifies that it
15 * works on a basic level.
16 *
17 * More complete testing of regular expression functionality is done with the C++ tests.
18 **/
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
23
24 #include <stdlib.h>
25 #include <string.h>
26 #include "unicode/uloc.h"
27 #include "unicode/uregex.h"
28 #include "unicode/ustring.h"
29 #include "cintltst.h"
30
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
32 log_err("Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
33
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 log_err("Test Failure at file %s, line %d\n", __FILE__, __LINE__);}}
36
37 #define TEST_ASSERT_STRING(expected, actual, nulTerm) { \
38 char buf_inside_macro[120]; \
39 int32_t len = (int32_t)strlen(expected); \
40 UBool success; \
41 if (nulTerm) { \
42 u_austrncpy(buf_inside_macro, (actual), len+1); \
43 success = (strcmp((expected), buf_inside_macro) == 0); \
44 } else { \
45 u_austrncpy(buf_inside_macro, (actual), len); \
46 success = (strncmp((expected), buf_inside_macro, len) == 0); \
47 } \
48 if (success == FALSE) { \
49 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", \
50 __FILE__, __LINE__, (expected), buf_inside_macro); \
51 } \
52 }
53
54
55
56
57
58
59 static void TestRegexCAPI(void);
60
61 void addURegexTest(TestNode** root);
62
63 void addURegexTest(TestNode** root)
64 {
65 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
66 /* addTest(root, &TestBreakIteratorSafeClone, "tstxtbd/cbiapts/TestBreakIteratorSafeClone"); */
67 }
68
69
70 static void TestRegexCAPI(void) {
71 UErrorCode status = U_ZERO_ERROR;
72 URegularExpression *re;
73 UChar pat[200];
74 UChar *minus1;
75
76 memset(&minus1, -1, sizeof(minus1));
77
78 /* Mimimalist open/close */
79 u_uastrncpy(pat, "abc*", sizeof(pat)/2);
80 re = uregex_open(pat, -1, 0, 0, &status);
81 TEST_ASSERT_SUCCESS(status);
82 uregex_close(re);
83
84 /* Open with all flag values set */
85 status = U_ZERO_ERROR;
86 re = uregex_open(pat, -1,
87 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
88 0, &status);
89 TEST_ASSERT_SUCCESS(status);
90 uregex_close(re);
91
92 /* Open with an invalid flag */
93 status = U_ZERO_ERROR;
94 re = uregex_open(pat, -1, 0x40000000, 0, &status);
95 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
96 uregex_close(re);
97
98
99 /* openC open from a C string */
100 {
101 const UChar *p;
102 int32_t len;
103 status = U_ZERO_ERROR;
104 re = uregex_openC("abc*", 0, 0, &status);
105 TEST_ASSERT_SUCCESS(status);
106 p = uregex_pattern(re, &len, &status);
107 TEST_ASSERT_SUCCESS(status);
108 u_uastrncpy(pat, "abc*", sizeof(pat)/2);
109 TEST_ASSERT(u_strcmp(pat, p) == 0);
110 TEST_ASSERT(len==(int32_t)strlen("abc*"));
111
112 uregex_close(re);
113
114 /* TODO: Open with ParseError parameter */
115 }
116
117 /*
118 * clone
119 */
120 {
121 URegularExpression *clone1;
122 URegularExpression *clone2;
123 URegularExpression *clone3;
124 UChar testString1[30];
125 UChar testString2[30];
126 UBool result;
127
128
129 status = U_ZERO_ERROR;
130 re = uregex_openC("abc*", 0, 0, &status);
131 TEST_ASSERT_SUCCESS(status);
132 clone1 = uregex_clone(re, &status);
133 TEST_ASSERT_SUCCESS(status);
134 TEST_ASSERT(clone1 != NULL);
135
136 status = U_ZERO_ERROR;
137 clone2 = uregex_clone(re, &status);
138 TEST_ASSERT_SUCCESS(status);
139 TEST_ASSERT(clone2 != NULL);
140 uregex_close(re);
141
142 status = U_ZERO_ERROR;
143 clone3 = uregex_clone(clone2, &status);
144 TEST_ASSERT_SUCCESS(status);
145 TEST_ASSERT(clone3 != NULL);
146
147 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
148 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
149
150 status = U_ZERO_ERROR;
151 uregex_setText(clone1, testString1, -1, &status);
152 TEST_ASSERT_SUCCESS(status);
153 result = uregex_lookingAt(clone1, 0, &status);
154 TEST_ASSERT_SUCCESS(status);
155 TEST_ASSERT(result==TRUE);
156
157 status = U_ZERO_ERROR;
158 uregex_setText(clone2, testString2, -1, &status);
159 TEST_ASSERT_SUCCESS(status);
160 result = uregex_lookingAt(clone2, 0, &status);
161 TEST_ASSERT_SUCCESS(status);
162 TEST_ASSERT(result==FALSE);
163 result = uregex_find(clone2, 0, &status);
164 TEST_ASSERT_SUCCESS(status);
165 TEST_ASSERT(result==TRUE);
166
167 uregex_close(clone1);
168 uregex_close(clone2);
169 uregex_close(clone3);
170
171 }
172
173 /*
174 * pattern()
175 */
176 {
177 const UChar *resultPat;
178 int32_t resultLen;
179 u_uastrncpy(pat, "hello", sizeof(pat)/2);
180 status = U_ZERO_ERROR;
181 re = uregex_open(pat, -1, 0, NULL, &status);
182 resultPat = uregex_pattern(re, &resultLen, &status);
183 TEST_ASSERT_SUCCESS(status);
184 TEST_ASSERT(resultLen == -1);
185 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
186 uregex_close(re);
187
188 status = U_ZERO_ERROR;
189 re = uregex_open(pat, 3, 0, NULL, &status);
190 resultPat = uregex_pattern(re, &resultLen, &status);
191 TEST_ASSERT_SUCCESS(status);
192 TEST_ASSERT(resultLen == 3);
193 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
194 TEST_ASSERT(u_strlen(resultPat) == 3);
195 uregex_close(re);
196 }
197
198 /*
199 * flags()
200 */
201 {
202 int32_t t;
203
204 status = U_ZERO_ERROR;
205 re = uregex_open(pat, -1, 0, NULL, &status);
206 t = uregex_flags(re, &status);
207 TEST_ASSERT_SUCCESS(status);
208 TEST_ASSERT(t == 0);
209 uregex_close(re);
210
211 status = U_ZERO_ERROR;
212 re = uregex_open(pat, -1, 0, NULL, &status);
213 t = uregex_flags(re, &status);
214 TEST_ASSERT_SUCCESS(status);
215 TEST_ASSERT(t == 0);
216 uregex_close(re);
217
218 status = U_ZERO_ERROR;
219 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
220 t = uregex_flags(re, &status);
221 TEST_ASSERT_SUCCESS(status);
222 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
223 uregex_close(re);
224 }
225
226 /*
227 * setText() and lookingAt()
228 */
229 {
230 UChar text1[50];
231 UChar text2[50];
232 UBool result;
233
234 u_uastrncpy(text1, "abcccd", sizeof(text1)/2);
235 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
236 status = U_ZERO_ERROR;
237 u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
238 re = uregex_open(pat, -1, 0, NULL, &status);
239 TEST_ASSERT_SUCCESS(status);
240
241 /* Operation before doing a setText should fail... */
242 status = U_ZERO_ERROR;
243 uregex_lookingAt(re, 0, &status);
244 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
245
246 status = U_ZERO_ERROR;
247 uregex_setText(re, text1, -1, &status);
248 result = uregex_lookingAt(re, 0, &status);
249 TEST_ASSERT(result == TRUE);
250 TEST_ASSERT_SUCCESS(status);
251
252 status = U_ZERO_ERROR;
253 uregex_setText(re, text2, -1, &status);
254 result = uregex_lookingAt(re, 0, &status);
255 TEST_ASSERT(result == FALSE);
256 TEST_ASSERT_SUCCESS(status);
257
258 status = U_ZERO_ERROR;
259 uregex_setText(re, text1, -1, &status);
260 result = uregex_lookingAt(re, 0, &status);
261 TEST_ASSERT(result == TRUE);
262 TEST_ASSERT_SUCCESS(status);
263
264 status = U_ZERO_ERROR;
265 uregex_setText(re, text1, 5, &status);
266 result = uregex_lookingAt(re, 0, &status);
267 TEST_ASSERT(result == FALSE);
268 TEST_ASSERT_SUCCESS(status);
269
270 status = U_ZERO_ERROR;
271 uregex_setText(re, text1, 6, &status);
272 result = uregex_lookingAt(re, 0, &status);
273 TEST_ASSERT(result == TRUE);
274 TEST_ASSERT_SUCCESS(status);
275
276 uregex_close(re);
277 }
278
279
280 /*
281 * getText()
282 */
283 {
284 UChar text1[50];
285 UChar text2[50];
286 const UChar *result;
287 int32_t textLength;
288
289 u_uastrncpy(text1, "abcccd", sizeof(text1)/2);
290 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
291 status = U_ZERO_ERROR;
292 u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
293 re = uregex_open(pat, -1, 0, NULL, &status);
294
295 uregex_setText(re, text1, -1, &status);
296 result = uregex_getText(re, &textLength, &status);
297 TEST_ASSERT(result == text1);
298 TEST_ASSERT(textLength == -1);
299 TEST_ASSERT_SUCCESS(status);
300
301 status = U_ZERO_ERROR;
302 uregex_setText(re, text2, 7, &status);
303 result = uregex_getText(re, &textLength, &status);
304 TEST_ASSERT(result == text2);
305 TEST_ASSERT(textLength == 7);
306 TEST_ASSERT_SUCCESS(status);
307
308 status = U_ZERO_ERROR;
309 uregex_setText(re, text2, 4, &status);
310 result = uregex_getText(re, &textLength, &status);
311 TEST_ASSERT(result == text2);
312 TEST_ASSERT(textLength == 4);
313 TEST_ASSERT_SUCCESS(status);
314 uregex_close(re);
315 }
316
317 /*
318 * matches()
319 */
320 {
321 UChar text1[50];
322 UBool result;
323 int len;
324 UChar nullString[] = {0,0,0};
325
326 u_uastrncpy(text1, "abcccde", sizeof(text1)/2);
327 status = U_ZERO_ERROR;
328 u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
329 re = uregex_open(pat, -1, 0, NULL, &status);
330
331 uregex_setText(re, text1, -1, &status);
332 result = uregex_matches(re, 0, &status);
333 TEST_ASSERT(result == FALSE);
334 TEST_ASSERT_SUCCESS(status);
335
336 status = U_ZERO_ERROR;
337 uregex_setText(re, text1, 6, &status);
338 result = uregex_matches(re, 0, &status);
339 TEST_ASSERT(result == TRUE);
340 TEST_ASSERT_SUCCESS(status);
341
342 status = U_ZERO_ERROR;
343 uregex_setText(re, text1, 6, &status);
344 result = uregex_matches(re, 1, &status);
345 TEST_ASSERT(result == FALSE);
346 TEST_ASSERT_SUCCESS(status);
347 uregex_close(re);
348
349 status = U_ZERO_ERROR;
350 re = uregex_openC(".?", 0, NULL, &status);
351 uregex_setText(re, text1, -1, &status);
352 len = u_strlen(text1);
353 result = uregex_matches(re, len, &status);
354 TEST_ASSERT(result == TRUE);
355 TEST_ASSERT_SUCCESS(status);
356
357 status = U_ZERO_ERROR;
358 uregex_setText(re, nullString, -1, &status);
359 TEST_ASSERT_SUCCESS(status);
360 result = uregex_matches(re, 0, &status);
361 TEST_ASSERT(result == TRUE);
362 TEST_ASSERT_SUCCESS(status);
363 uregex_close(re);
364 }
365
366
367 /*
368 * lookingAt() Used in setText test.
369 */
370
371
372 /*
373 * find(), findNext, start, end, reset
374 */
375 {
376 UChar text1[50];
377 UBool result;
378 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2);
379 status = U_ZERO_ERROR;
380 re = uregex_openC("rx", 0, NULL, &status);
381
382 uregex_setText(re, text1, -1, &status);
383 result = uregex_find(re, 0, &status);
384 TEST_ASSERT(result == TRUE);
385 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
386 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
387 TEST_ASSERT_SUCCESS(status);
388
389 result = uregex_find(re, 9, &status);
390 TEST_ASSERT(result == TRUE);
391 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
392 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
393 TEST_ASSERT_SUCCESS(status);
394
395 result = uregex_find(re, 14, &status);
396 TEST_ASSERT(result == FALSE);
397 TEST_ASSERT_SUCCESS(status);
398
399 status = U_ZERO_ERROR;
400 uregex_reset(re, 0, &status);
401
402 result = uregex_findNext(re, &status);
403 TEST_ASSERT(result == TRUE);
404 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
405 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
406 TEST_ASSERT_SUCCESS(status);
407
408 result = uregex_findNext(re, &status);
409 TEST_ASSERT(result == TRUE);
410 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
411 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
412 TEST_ASSERT_SUCCESS(status);
413
414 status = U_ZERO_ERROR;
415 uregex_reset(re, 12, &status);
416
417 result = uregex_findNext(re, &status);
418 TEST_ASSERT(result == TRUE);
419 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
420 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
421 TEST_ASSERT_SUCCESS(status);
422
423 result = uregex_findNext(re, &status);
424 TEST_ASSERT(result == FALSE);
425 TEST_ASSERT_SUCCESS(status);
426
427 uregex_close(re);
428 }
429
430 /*
431 * groupCount
432 */
433 {
434 int32_t result;
435
436 status = U_ZERO_ERROR;
437 re = uregex_openC("abc", 0, NULL, &status);
438 result = uregex_groupCount(re, &status);
439 TEST_ASSERT_SUCCESS(status);
440 TEST_ASSERT(result == 0);
441 uregex_close(re);
442
443 status = U_ZERO_ERROR;
444 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
445 result = uregex_groupCount(re, &status);
446 TEST_ASSERT_SUCCESS(status);
447 TEST_ASSERT(result == 3);
448 uregex_close(re);
449
450 }
451
452
453 /*
454 * group()
455 */
456 {
457 UChar text1[80];
458 UChar buf[80];
459 UBool result;
460 int32_t resultSz;
461 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2);
462
463 status = U_ZERO_ERROR;
464 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
465 TEST_ASSERT_SUCCESS(status);
466
467
468 uregex_setText(re, text1, -1, &status);
469 result = uregex_find(re, 0, &status);
470 TEST_ASSERT(result==TRUE);
471
472 /* Capture Group 0, the full match. Should succeed. */
473 status = U_ZERO_ERROR;
474 resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
475 TEST_ASSERT_SUCCESS(status);
476 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
477 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
478
479 /* Capture group #1. Should succeed. */
480 status = U_ZERO_ERROR;
481 resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
482 TEST_ASSERT_SUCCESS(status);
483 TEST_ASSERT_STRING(" interior ", buf, TRUE);
484 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
485
486 /* Capture group out of range. Error. */
487 status = U_ZERO_ERROR;
488 uregex_group(re, 2, buf, sizeof(buf)/2, &status);
489 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
490
491 /* NULL buffer, pure pre-flight */
492 status = U_ZERO_ERROR;
493 resultSz = uregex_group(re, 0, NULL, 0, &status);
494 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
495 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
496
497 /* Too small buffer, truncated string */
498 status = U_ZERO_ERROR;
499 memset(buf, -1, sizeof(buf));
500 resultSz = uregex_group(re, 0, buf, 5, &status);
501 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
502 TEST_ASSERT_STRING("abc i", buf, FALSE);
503 TEST_ASSERT(buf[5] == (UChar)0xffff);
504 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
505
506 /* Output string just fits buffer, no NUL term. */
507 status = U_ZERO_ERROR;
508 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
509 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
510 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
511 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
512 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
513
514 uregex_close(re);
515
516 }
517
518 /*
519 * replaceFirst()
520 */
521 {
522 UChar text1[80];
523 UChar text2[80];
524 UChar replText[80];
525 UChar buf[80];
526 int32_t resultSz;
527 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
528 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
529 u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
530
531 status = U_ZERO_ERROR;
532 re = uregex_openC("x(.*?)x", 0, NULL, &status);
533 TEST_ASSERT_SUCCESS(status);
534
535 /* Normal case, with match */
536 uregex_setText(re, text1, -1, &status);
537 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
538 TEST_ASSERT_SUCCESS(status);
539 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
540 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
541
542 /* No match. Text should copy to output with no changes. */
543 status = U_ZERO_ERROR;
544 uregex_setText(re, text2, -1, &status);
545 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
546 TEST_ASSERT_SUCCESS(status);
547 TEST_ASSERT_STRING("No match here.", buf, TRUE);
548 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
549
550 /* Match, output just fills buffer, no termination warning. */
551 status = U_ZERO_ERROR;
552 uregex_setText(re, text1, -1, &status);
553 memset(buf, -1, sizeof(buf));
554 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
555 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
556 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
557 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
558 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
559
560 /* Do the replaceFirst again, without first resetting anything.
561 * Should give the same results.
562 */
563 status = U_ZERO_ERROR;
564 memset(buf, -1, sizeof(buf));
565 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
566 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
567 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
568 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
569 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
570
571 /* NULL buffer, zero buffer length */
572 status = U_ZERO_ERROR;
573 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
574 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
575 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
576
577 /* Buffer too small by one */
578 status = U_ZERO_ERROR;
579 memset(buf, -1, sizeof(buf));
580 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
581 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
582 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
583 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
584 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
585
586 uregex_close(re);
587 }
588
589
590 /*
591 * replaceAll()
592 */
593 {
594 UChar text1[80];
595 UChar text2[80];
596 UChar replText[80];
597 UChar buf[80];
598 int32_t resultSz;
599 int32_t expectedResultSize;
600 int32_t i;
601
602 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
603 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
604 u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
605 expectedResultSize = u_strlen(text1);
606
607 status = U_ZERO_ERROR;
608 re = uregex_openC("x(.*?)x", 0, NULL, &status);
609 TEST_ASSERT_SUCCESS(status);
610
611 /* Normal case, with match */
612 uregex_setText(re, text1, -1, &status);
613 resultSz = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
614 TEST_ASSERT_SUCCESS(status);
615 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, TRUE);
616 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
617
618 /* No match. Text should copy to output with no changes. */
619 status = U_ZERO_ERROR;
620 uregex_setText(re, text2, -1, &status);
621 resultSz = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
622 TEST_ASSERT_SUCCESS(status);
623 TEST_ASSERT_STRING("No match here.", buf, TRUE);
624 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
625
626 /* Match, output just fills buffer, no termination warning. */
627 status = U_ZERO_ERROR;
628 uregex_setText(re, text1, -1, &status);
629 memset(buf, -1, sizeof(buf));
630 resultSz = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
631 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
632 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
633 TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>."));
634 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
635
636 /* Do the replaceFirst again, without first resetting anything.
637 * Should give the same results.
638 */
639 status = U_ZERO_ERROR;
640 memset(buf, -1, sizeof(buf));
641 resultSz = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
642 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
643 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
644 TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>."));
645 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
646
647 /* NULL buffer, zero buffer length */
648 status = U_ZERO_ERROR;
649 resultSz = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
650 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
651 TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>."));
652
653 /* Buffer too small. Try every size, which will tickle edge cases
654 * in uregex_appendReplacement (used by replaceAll) */
655 for (i=0; i<expectedResultSize; i++) {
656 char expected[80];
657 status = U_ZERO_ERROR;
658 memset(buf, -1, sizeof(buf));
659 resultSz = uregex_replaceAll(re, replText, -1, buf, i, &status);
660 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
661 strcpy(expected, "Replace <aa> <1> <...>.");
662 expected[i] = 0;
663 TEST_ASSERT_STRING(expected, buf, FALSE);
664 TEST_ASSERT(resultSz == expectedResultSize);
665 TEST_ASSERT(buf[i] == (UChar)0xffff);
666 }
667
668 uregex_close(re);
669 }
670
671
672 /*
673 * appendReplacement()
674 */
675 {
676 UChar text[100];
677 UChar repl[100];
678 UChar buf[100];
679 UChar *bufPtr;
680 int32_t bufCap;
681
682
683 status = U_ZERO_ERROR;
684 re = uregex_openC(".*", 0, 0, &status);
685 TEST_ASSERT_SUCCESS(status);
686
687 u_uastrncpy(text, "whatever", sizeof(text)/2);
688 u_uastrncpy(repl, "some other", sizeof(repl)/2);
689 uregex_setText(re, text, -1, &status);
690
691 /* match covers whole target string */
692 uregex_find(re, 0, &status);
693 TEST_ASSERT_SUCCESS(status);
694 bufPtr = buf;
695 bufCap = sizeof(buf) / 2;
696 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
697 TEST_ASSERT_SUCCESS(status);
698 TEST_ASSERT_STRING("some other", buf, TRUE);
699
700 /* Match has \u \U escapes */
701 uregex_find(re, 0, &status);
702 TEST_ASSERT_SUCCESS(status);
703 bufPtr = buf;
704 bufCap = sizeof(buf) / 2;
705 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
706 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
707 TEST_ASSERT_SUCCESS(status);
708 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
709
710 uregex_close(re);
711 }
712
713
714 /*
715 * appendTail(). Checked in ReplaceFirst(), replaceAll().
716 */
717
718 /*
719 * split()
720 */
721 {
722 UChar textToSplit[80];
723 UChar text2[80];
724 UChar buf[200];
725 UChar *fields[10];
726 int32_t numFields;
727 int32_t requiredCapacity;
728 int32_t spaceNeeded;
729 int32_t sz;
730
731 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2);
732 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
733
734 status = U_ZERO_ERROR;
735 re = uregex_openC(":", 0, NULL, &status);
736
737
738 /* Simple split */
739
740 uregex_setText(re, textToSplit, -1, &status);
741 TEST_ASSERT_SUCCESS(status);
742
743 memset(fields, -1, sizeof(fields));
744 numFields =
745 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
746 TEST_ASSERT_SUCCESS(status);
747 TEST_ASSERT(numFields == 3);
748 TEST_ASSERT_STRING("first ", fields[0], TRUE);
749 TEST_ASSERT_STRING(" second", fields[1], TRUE);
750 TEST_ASSERT_STRING(" third", fields[2], TRUE);
751 TEST_ASSERT(fields[3] == NULL);
752
753 spaceNeeded = u_strlen(textToSplit) -
754 (numFields - 1) + /* Field delimiters do not appear in output */
755 numFields; /* Each field gets a NUL terminator */
756
757 TEST_ASSERT(spaceNeeded == requiredCapacity);
758 uregex_close(re);
759
760
761 /* Split with too few output strings available */
762 status = U_ZERO_ERROR;
763 re = uregex_openC(":", 0, NULL, &status);
764 uregex_setText(re, textToSplit, -1, &status);
765 TEST_ASSERT_SUCCESS(status);
766
767 memset(fields, -1, sizeof(fields));
768 numFields =
769 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
770 TEST_ASSERT_SUCCESS(status);
771 TEST_ASSERT(numFields == 2);
772 TEST_ASSERT_STRING("first ", fields[0], TRUE);
773 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
774 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
775
776 spaceNeeded = u_strlen(textToSplit) -
777 (numFields - 1) + /* Field delimiters do not appear in output */
778 numFields; /* Each field gets a NUL terminator */
779
780 TEST_ASSERT(spaceNeeded == requiredCapacity);
781
782 /* Split with a range of output buffer sizes. */
783 spaceNeeded = u_strlen(textToSplit) -
784 (numFields - 1) + /* Field delimiters do not appear in output */
785 numFields; /* Each field gets a NUL terminator */
786
787 for (sz=0; sz < spaceNeeded+1; sz++) {
788 memset(fields, -1, sizeof(fields));
789 status = U_ZERO_ERROR;
790 numFields =
791 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
792 if (sz >= spaceNeeded) {
793 TEST_ASSERT_SUCCESS(status);
794 TEST_ASSERT_STRING("first ", fields[0], TRUE);
795 TEST_ASSERT_STRING(" second", fields[1], TRUE);
796 TEST_ASSERT_STRING(" third", fields[2], TRUE);
797 } else {
798 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
799 }
800 TEST_ASSERT(numFields == 3);
801 TEST_ASSERT(fields[3] == NULL);
802 TEST_ASSERT(spaceNeeded == requiredCapacity);
803 }
804 uregex_close(re);
805 }
806
807
808
809
810 /* Split(), part 2. Patterns with capture groups. The capture group text
811 * comes out as additional fields. */
812 {
813 UChar textToSplit[80];
814 UChar buf[200];
815 UChar *fields[10];
816 int32_t numFields;
817 int32_t requiredCapacity;
818 int32_t spaceNeeded;
819 int32_t sz;
820
821 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2);
822
823 status = U_ZERO_ERROR;
824 re = uregex_openC("<(.*?)>", 0, NULL, &status);
825
826 uregex_setText(re, textToSplit, -1, &status);
827 TEST_ASSERT_SUCCESS(status);
828
829 memset(fields, -1, sizeof(fields));
830 numFields =
831 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
832 TEST_ASSERT_SUCCESS(status);
833 TEST_ASSERT(numFields == 5);
834 TEST_ASSERT_STRING("first ", fields[0], TRUE);
835 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
836 TEST_ASSERT_STRING(" second", fields[2], TRUE);
837 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
838 TEST_ASSERT_STRING(" third", fields[4], TRUE);
839 TEST_ASSERT(fields[5] == NULL);
840 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
841 TEST_ASSERT(spaceNeeded == requiredCapacity);
842
843
844 /* Split with too few output strings available (2) */
845 status = U_ZERO_ERROR;
846 memset(fields, -1, sizeof(fields));
847 numFields =
848 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
849 TEST_ASSERT_SUCCESS(status);
850 TEST_ASSERT(numFields == 2);
851 TEST_ASSERT_STRING("first ", fields[0], TRUE);
852 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
853 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
854
855 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */
856 TEST_ASSERT(spaceNeeded == requiredCapacity);
857
858 /* Split with too few output strings available (3) */
859 status = U_ZERO_ERROR;
860 memset(fields, -1, sizeof(fields));
861 numFields =
862 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
863 TEST_ASSERT_SUCCESS(status);
864 TEST_ASSERT(numFields == 3);
865 TEST_ASSERT_STRING("first ", fields[0], TRUE);
866 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
867 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
868 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
869
870 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
871 TEST_ASSERT(spaceNeeded == requiredCapacity);
872
873 /* Split with just enough output strings available (5) */
874 status = U_ZERO_ERROR;
875 memset(fields, -1, sizeof(fields));
876 numFields =
877 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
878 TEST_ASSERT_SUCCESS(status);
879 TEST_ASSERT(numFields == 5);
880 TEST_ASSERT_STRING("first ", fields[0], TRUE);
881 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
882 TEST_ASSERT_STRING(" second", fields[2], TRUE);
883 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
884 TEST_ASSERT_STRING(" third", fields[4], TRUE);
885 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
886
887 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
888 TEST_ASSERT(spaceNeeded == requiredCapacity);
889
890
891 /* Split, end of text is a field delimiter. */
892 status = U_ZERO_ERROR;
893 sz = strlen("first <tag-a> second<tag-b>");
894 uregex_setText(re, textToSplit, sz, &status);
895 TEST_ASSERT_SUCCESS(status);
896 memset(fields, -1, sizeof(fields));
897 numFields =
898 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
899 TEST_ASSERT_SUCCESS(status);
900 TEST_ASSERT(numFields == 4);
901 TEST_ASSERT_STRING("first ", fields[0], TRUE);
902 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
903 TEST_ASSERT_STRING(" second", fields[2], TRUE);
904 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
905 TEST_ASSERT(fields[4] == NULL);
906 TEST_ASSERT(fields[8] == NULL);
907 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
908 spaceNeeded = strlen("first .tag-a. second.tag-b."); /* "." at NUL positions */
909 TEST_ASSERT(spaceNeeded == requiredCapacity);
910
911 uregex_close(re);
912 }
913
914 }
915
916 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */