1 /********************************************************************
3 * Copyright (c) 2004, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 *********************************************************************************/
11 /*C API TEST FOR Regular Expressions */
13 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
14 * try to test the full functionality. It just calls each function and verifies that it
15 * works on a basic level.
17 * More complete testing of regular expression functionality is done with the C++ tests.
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
26 #include "unicode/uloc.h"
27 #include "unicode/uregex.h"
28 #include "unicode/ustring.h"
31 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
32 log_err("Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}}
34 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
35 log_err("Test Failure at file %s, line %d\n", __FILE__, __LINE__);}}
37 #define TEST_ASSERT_STRING(expected, actual, nulTerm) { \
38 char buf_inside_macro[120]; \
39 int32_t len = (int32_t)strlen(expected); \
42 u_austrncpy(buf_inside_macro, (actual), len+1); \
43 success = (strcmp((expected), buf_inside_macro) == 0); \
45 u_austrncpy(buf_inside_macro, (actual), len); \
46 success = (strncmp((expected), buf_inside_macro, len) == 0); \
48 if (success == FALSE) { \
49 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", \
50 __FILE__, __LINE__, (expected), buf_inside_macro); \
59 static void TestRegexCAPI(void);
61 void addURegexTest(TestNode
** root
);
63 void addURegexTest(TestNode
** root
)
65 addTest(root
, &TestRegexCAPI
, "regex/TestRegexCAPI");
66 /* addTest(root, &TestBreakIteratorSafeClone, "tstxtbd/cbiapts/TestBreakIteratorSafeClone"); */
70 static void TestRegexCAPI(void) {
71 UErrorCode status
= U_ZERO_ERROR
;
72 URegularExpression
*re
;
76 memset(&minus1
, -1, sizeof(minus1
));
78 /* Mimimalist open/close */
79 u_uastrncpy(pat
, "abc*", sizeof(pat
)/2);
80 re
= uregex_open(pat
, -1, 0, 0, &status
);
81 TEST_ASSERT_SUCCESS(status
);
84 /* Open with all flag values set */
85 status
= U_ZERO_ERROR
;
86 re
= uregex_open(pat
, -1,
87 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
,
89 TEST_ASSERT_SUCCESS(status
);
92 /* Open with an invalid flag */
93 status
= U_ZERO_ERROR
;
94 re
= uregex_open(pat
, -1, 0x40000000, 0, &status
);
95 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
99 /* openC open from a C string */
103 status
= U_ZERO_ERROR
;
104 re
= uregex_openC("abc*", 0, 0, &status
);
105 TEST_ASSERT_SUCCESS(status
);
106 p
= uregex_pattern(re
, &len
, &status
);
107 TEST_ASSERT_SUCCESS(status
);
108 u_uastrncpy(pat
, "abc*", sizeof(pat
)/2);
109 TEST_ASSERT(u_strcmp(pat
, p
) == 0);
110 TEST_ASSERT(len
==(int32_t)strlen("abc*"));
114 /* TODO: Open with ParseError parameter */
121 URegularExpression
*clone1
;
122 URegularExpression
*clone2
;
123 URegularExpression
*clone3
;
124 UChar testString1
[30];
125 UChar testString2
[30];
129 status
= U_ZERO_ERROR
;
130 re
= uregex_openC("abc*", 0, 0, &status
);
131 TEST_ASSERT_SUCCESS(status
);
132 clone1
= uregex_clone(re
, &status
);
133 TEST_ASSERT_SUCCESS(status
);
134 TEST_ASSERT(clone1
!= NULL
);
136 status
= U_ZERO_ERROR
;
137 clone2
= uregex_clone(re
, &status
);
138 TEST_ASSERT_SUCCESS(status
);
139 TEST_ASSERT(clone2
!= NULL
);
142 status
= U_ZERO_ERROR
;
143 clone3
= uregex_clone(clone2
, &status
);
144 TEST_ASSERT_SUCCESS(status
);
145 TEST_ASSERT(clone3
!= NULL
);
147 u_uastrncpy(testString1
, "abcccd", sizeof(pat
)/2);
148 u_uastrncpy(testString2
, "xxxabcccd", sizeof(pat
)/2);
150 status
= U_ZERO_ERROR
;
151 uregex_setText(clone1
, testString1
, -1, &status
);
152 TEST_ASSERT_SUCCESS(status
);
153 result
= uregex_lookingAt(clone1
, 0, &status
);
154 TEST_ASSERT_SUCCESS(status
);
155 TEST_ASSERT(result
==TRUE
);
157 status
= U_ZERO_ERROR
;
158 uregex_setText(clone2
, testString2
, -1, &status
);
159 TEST_ASSERT_SUCCESS(status
);
160 result
= uregex_lookingAt(clone2
, 0, &status
);
161 TEST_ASSERT_SUCCESS(status
);
162 TEST_ASSERT(result
==FALSE
);
163 result
= uregex_find(clone2
, 0, &status
);
164 TEST_ASSERT_SUCCESS(status
);
165 TEST_ASSERT(result
==TRUE
);
167 uregex_close(clone1
);
168 uregex_close(clone2
);
169 uregex_close(clone3
);
177 const UChar
*resultPat
;
179 u_uastrncpy(pat
, "hello", sizeof(pat
)/2);
180 status
= U_ZERO_ERROR
;
181 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
182 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
183 TEST_ASSERT_SUCCESS(status
);
184 TEST_ASSERT(resultLen
== -1);
185 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
188 status
= U_ZERO_ERROR
;
189 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
190 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
191 TEST_ASSERT_SUCCESS(status
);
192 TEST_ASSERT(resultLen
== 3);
193 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
194 TEST_ASSERT(u_strlen(resultPat
) == 3);
204 status
= U_ZERO_ERROR
;
205 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
206 t
= uregex_flags(re
, &status
);
207 TEST_ASSERT_SUCCESS(status
);
211 status
= U_ZERO_ERROR
;
212 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
213 t
= uregex_flags(re
, &status
);
214 TEST_ASSERT_SUCCESS(status
);
218 status
= U_ZERO_ERROR
;
219 re
= uregex_open(pat
, -1, UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
, NULL
, &status
);
220 t
= uregex_flags(re
, &status
);
221 TEST_ASSERT_SUCCESS(status
);
222 TEST_ASSERT(t
== (UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
));
227 * setText() and lookingAt()
234 u_uastrncpy(text1
, "abcccd", sizeof(text1
)/2);
235 u_uastrncpy(text2
, "abcccxd", sizeof(text2
)/2);
236 status
= U_ZERO_ERROR
;
237 u_uastrncpy(pat
, "abc*d", sizeof(pat
)/2);
238 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
239 TEST_ASSERT_SUCCESS(status
);
241 /* Operation before doing a setText should fail... */
242 status
= U_ZERO_ERROR
;
243 uregex_lookingAt(re
, 0, &status
);
244 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
246 status
= U_ZERO_ERROR
;
247 uregex_setText(re
, text1
, -1, &status
);
248 result
= uregex_lookingAt(re
, 0, &status
);
249 TEST_ASSERT(result
== TRUE
);
250 TEST_ASSERT_SUCCESS(status
);
252 status
= U_ZERO_ERROR
;
253 uregex_setText(re
, text2
, -1, &status
);
254 result
= uregex_lookingAt(re
, 0, &status
);
255 TEST_ASSERT(result
== FALSE
);
256 TEST_ASSERT_SUCCESS(status
);
258 status
= U_ZERO_ERROR
;
259 uregex_setText(re
, text1
, -1, &status
);
260 result
= uregex_lookingAt(re
, 0, &status
);
261 TEST_ASSERT(result
== TRUE
);
262 TEST_ASSERT_SUCCESS(status
);
264 status
= U_ZERO_ERROR
;
265 uregex_setText(re
, text1
, 5, &status
);
266 result
= uregex_lookingAt(re
, 0, &status
);
267 TEST_ASSERT(result
== FALSE
);
268 TEST_ASSERT_SUCCESS(status
);
270 status
= U_ZERO_ERROR
;
271 uregex_setText(re
, text1
, 6, &status
);
272 result
= uregex_lookingAt(re
, 0, &status
);
273 TEST_ASSERT(result
== TRUE
);
274 TEST_ASSERT_SUCCESS(status
);
289 u_uastrncpy(text1
, "abcccd", sizeof(text1
)/2);
290 u_uastrncpy(text2
, "abcccxd", sizeof(text2
)/2);
291 status
= U_ZERO_ERROR
;
292 u_uastrncpy(pat
, "abc*d", sizeof(pat
)/2);
293 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
295 uregex_setText(re
, text1
, -1, &status
);
296 result
= uregex_getText(re
, &textLength
, &status
);
297 TEST_ASSERT(result
== text1
);
298 TEST_ASSERT(textLength
== -1);
299 TEST_ASSERT_SUCCESS(status
);
301 status
= U_ZERO_ERROR
;
302 uregex_setText(re
, text2
, 7, &status
);
303 result
= uregex_getText(re
, &textLength
, &status
);
304 TEST_ASSERT(result
== text2
);
305 TEST_ASSERT(textLength
== 7);
306 TEST_ASSERT_SUCCESS(status
);
308 status
= U_ZERO_ERROR
;
309 uregex_setText(re
, text2
, 4, &status
);
310 result
= uregex_getText(re
, &textLength
, &status
);
311 TEST_ASSERT(result
== text2
);
312 TEST_ASSERT(textLength
== 4);
313 TEST_ASSERT_SUCCESS(status
);
324 UChar nullString
[] = {0,0,0};
326 u_uastrncpy(text1
, "abcccde", sizeof(text1
)/2);
327 status
= U_ZERO_ERROR
;
328 u_uastrncpy(pat
, "abc*d", sizeof(pat
)/2);
329 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
331 uregex_setText(re
, text1
, -1, &status
);
332 result
= uregex_matches(re
, 0, &status
);
333 TEST_ASSERT(result
== FALSE
);
334 TEST_ASSERT_SUCCESS(status
);
336 status
= U_ZERO_ERROR
;
337 uregex_setText(re
, text1
, 6, &status
);
338 result
= uregex_matches(re
, 0, &status
);
339 TEST_ASSERT(result
== TRUE
);
340 TEST_ASSERT_SUCCESS(status
);
342 status
= U_ZERO_ERROR
;
343 uregex_setText(re
, text1
, 6, &status
);
344 result
= uregex_matches(re
, 1, &status
);
345 TEST_ASSERT(result
== FALSE
);
346 TEST_ASSERT_SUCCESS(status
);
349 status
= U_ZERO_ERROR
;
350 re
= uregex_openC(".?", 0, NULL
, &status
);
351 uregex_setText(re
, text1
, -1, &status
);
352 len
= u_strlen(text1
);
353 result
= uregex_matches(re
, len
, &status
);
354 TEST_ASSERT(result
== TRUE
);
355 TEST_ASSERT_SUCCESS(status
);
357 status
= U_ZERO_ERROR
;
358 uregex_setText(re
, nullString
, -1, &status
);
359 TEST_ASSERT_SUCCESS(status
);
360 result
= uregex_matches(re
, 0, &status
);
361 TEST_ASSERT(result
== TRUE
);
362 TEST_ASSERT_SUCCESS(status
);
368 * lookingAt() Used in setText test.
373 * find(), findNext, start, end, reset
378 u_uastrncpy(text1
, "012rx5rx890rxrx...", sizeof(text1
)/2);
379 status
= U_ZERO_ERROR
;
380 re
= uregex_openC("rx", 0, NULL
, &status
);
382 uregex_setText(re
, text1
, -1, &status
);
383 result
= uregex_find(re
, 0, &status
);
384 TEST_ASSERT(result
== TRUE
);
385 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
386 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
387 TEST_ASSERT_SUCCESS(status
);
389 result
= uregex_find(re
, 9, &status
);
390 TEST_ASSERT(result
== TRUE
);
391 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
392 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
393 TEST_ASSERT_SUCCESS(status
);
395 result
= uregex_find(re
, 14, &status
);
396 TEST_ASSERT(result
== FALSE
);
397 TEST_ASSERT_SUCCESS(status
);
399 status
= U_ZERO_ERROR
;
400 uregex_reset(re
, 0, &status
);
402 result
= uregex_findNext(re
, &status
);
403 TEST_ASSERT(result
== TRUE
);
404 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
405 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
406 TEST_ASSERT_SUCCESS(status
);
408 result
= uregex_findNext(re
, &status
);
409 TEST_ASSERT(result
== TRUE
);
410 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
411 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
412 TEST_ASSERT_SUCCESS(status
);
414 status
= U_ZERO_ERROR
;
415 uregex_reset(re
, 12, &status
);
417 result
= uregex_findNext(re
, &status
);
418 TEST_ASSERT(result
== TRUE
);
419 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
420 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
421 TEST_ASSERT_SUCCESS(status
);
423 result
= uregex_findNext(re
, &status
);
424 TEST_ASSERT(result
== FALSE
);
425 TEST_ASSERT_SUCCESS(status
);
436 status
= U_ZERO_ERROR
;
437 re
= uregex_openC("abc", 0, NULL
, &status
);
438 result
= uregex_groupCount(re
, &status
);
439 TEST_ASSERT_SUCCESS(status
);
440 TEST_ASSERT(result
== 0);
443 status
= U_ZERO_ERROR
;
444 re
= uregex_openC("abc(def)(ghi(j))", 0, NULL
, &status
);
445 result
= uregex_groupCount(re
, &status
);
446 TEST_ASSERT_SUCCESS(status
);
447 TEST_ASSERT(result
== 3);
461 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", sizeof(text1
)/2);
463 status
= U_ZERO_ERROR
;
464 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
465 TEST_ASSERT_SUCCESS(status
);
468 uregex_setText(re
, text1
, -1, &status
);
469 result
= uregex_find(re
, 0, &status
);
470 TEST_ASSERT(result
==TRUE
);
472 /* Capture Group 0, the full match. Should succeed. */
473 status
= U_ZERO_ERROR
;
474 resultSz
= uregex_group(re
, 0, buf
, sizeof(buf
)/2, &status
);
475 TEST_ASSERT_SUCCESS(status
);
476 TEST_ASSERT_STRING("abc interior def", buf
, TRUE
);
477 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
479 /* Capture group #1. Should succeed. */
480 status
= U_ZERO_ERROR
;
481 resultSz
= uregex_group(re
, 1, buf
, sizeof(buf
)/2, &status
);
482 TEST_ASSERT_SUCCESS(status
);
483 TEST_ASSERT_STRING(" interior ", buf
, TRUE
);
484 TEST_ASSERT(resultSz
== (int32_t)strlen(" interior "));
486 /* Capture group out of range. Error. */
487 status
= U_ZERO_ERROR
;
488 uregex_group(re
, 2, buf
, sizeof(buf
)/2, &status
);
489 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
491 /* NULL buffer, pure pre-flight */
492 status
= U_ZERO_ERROR
;
493 resultSz
= uregex_group(re
, 0, NULL
, 0, &status
);
494 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
495 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
497 /* Too small buffer, truncated string */
498 status
= U_ZERO_ERROR
;
499 memset(buf
, -1, sizeof(buf
));
500 resultSz
= uregex_group(re
, 0, buf
, 5, &status
);
501 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
502 TEST_ASSERT_STRING("abc i", buf
, FALSE
);
503 TEST_ASSERT(buf
[5] == (UChar
)0xffff);
504 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
506 /* Output string just fits buffer, no NUL term. */
507 status
= U_ZERO_ERROR
;
508 resultSz
= uregex_group(re
, 0, buf
, (int32_t)strlen("abc interior def"), &status
);
509 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
510 TEST_ASSERT_STRING("abc interior def", buf
, FALSE
);
511 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
512 TEST_ASSERT(buf
[strlen("abc interior def")] == (UChar
)0xffff);
527 u_uastrncpy(text1
, "Replace xaax x1x x...x.", sizeof(text1
)/2);
528 u_uastrncpy(text2
, "No match here.", sizeof(text2
)/2);
529 u_uastrncpy(replText
, "<$1>", sizeof(replText
)/2);
531 status
= U_ZERO_ERROR
;
532 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
533 TEST_ASSERT_SUCCESS(status
);
535 /* Normal case, with match */
536 uregex_setText(re
, text1
, -1, &status
);
537 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, sizeof(buf
)/2, &status
);
538 TEST_ASSERT_SUCCESS(status
);
539 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, TRUE
);
540 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
542 /* No match. Text should copy to output with no changes. */
543 status
= U_ZERO_ERROR
;
544 uregex_setText(re
, text2
, -1, &status
);
545 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, sizeof(buf
)/2, &status
);
546 TEST_ASSERT_SUCCESS(status
);
547 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
548 TEST_ASSERT(resultSz
== (int32_t)strlen("No match here."));
550 /* Match, output just fills buffer, no termination warning. */
551 status
= U_ZERO_ERROR
;
552 uregex_setText(re
, text1
, -1, &status
);
553 memset(buf
, -1, sizeof(buf
));
554 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x."), &status
);
555 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
556 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
557 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
558 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
560 /* Do the replaceFirst again, without first resetting anything.
561 * Should give the same results.
563 status
= U_ZERO_ERROR
;
564 memset(buf
, -1, sizeof(buf
));
565 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x."), &status
);
566 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
567 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
568 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
569 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
571 /* NULL buffer, zero buffer length */
572 status
= U_ZERO_ERROR
;
573 resultSz
= uregex_replaceFirst(re
, replText
, -1, NULL
, 0, &status
);
574 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
575 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
577 /* Buffer too small by one */
578 status
= U_ZERO_ERROR
;
579 memset(buf
, -1, sizeof(buf
));
580 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x.")-1, &status
);
581 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
582 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf
, FALSE
);
583 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
584 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
599 int32_t expectedResultSize
;
602 u_uastrncpy(text1
, "Replace xaax x1x x...x.", sizeof(text1
)/2);
603 u_uastrncpy(text2
, "No match here.", sizeof(text2
)/2);
604 u_uastrncpy(replText
, "<$1>", sizeof(replText
)/2);
605 expectedResultSize
= u_strlen(text1
);
607 status
= U_ZERO_ERROR
;
608 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
609 TEST_ASSERT_SUCCESS(status
);
611 /* Normal case, with match */
612 uregex_setText(re
, text1
, -1, &status
);
613 resultSz
= uregex_replaceAll(re
, replText
, -1, buf
, sizeof(buf
)/2, &status
);
614 TEST_ASSERT_SUCCESS(status
);
615 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf
, TRUE
);
616 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
618 /* No match. Text should copy to output with no changes. */
619 status
= U_ZERO_ERROR
;
620 uregex_setText(re
, text2
, -1, &status
);
621 resultSz
= uregex_replaceAll(re
, replText
, -1, buf
, sizeof(buf
)/2, &status
);
622 TEST_ASSERT_SUCCESS(status
);
623 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
624 TEST_ASSERT(resultSz
== (int32_t)strlen("No match here."));
626 /* Match, output just fills buffer, no termination warning. */
627 status
= U_ZERO_ERROR
;
628 uregex_setText(re
, text1
, -1, &status
);
629 memset(buf
, -1, sizeof(buf
));
630 resultSz
= uregex_replaceAll(re
, replText
, -1, buf
, strlen("Replace xaax x1x x...x."), &status
);
631 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
632 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf
, FALSE
);
633 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace <aa> <1> <...>."));
634 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
636 /* Do the replaceFirst again, without first resetting anything.
637 * Should give the same results.
639 status
= U_ZERO_ERROR
;
640 memset(buf
, -1, sizeof(buf
));
641 resultSz
= uregex_replaceAll(re
, replText
, -1, buf
, strlen("Replace xaax x1x x...x."), &status
);
642 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
643 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf
, FALSE
);
644 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace <aa> <1> <...>."));
645 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
647 /* NULL buffer, zero buffer length */
648 status
= U_ZERO_ERROR
;
649 resultSz
= uregex_replaceAll(re
, replText
, -1, NULL
, 0, &status
);
650 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
651 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace <aa> <1> <...>."));
653 /* Buffer too small. Try every size, which will tickle edge cases
654 * in uregex_appendReplacement (used by replaceAll) */
655 for (i
=0; i
<expectedResultSize
; i
++) {
657 status
= U_ZERO_ERROR
;
658 memset(buf
, -1, sizeof(buf
));
659 resultSz
= uregex_replaceAll(re
, replText
, -1, buf
, i
, &status
);
660 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
661 strcpy(expected
, "Replace <aa> <1> <...>.");
663 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
664 TEST_ASSERT(resultSz
== expectedResultSize
);
665 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
673 * appendReplacement()
683 status
= U_ZERO_ERROR
;
684 re
= uregex_openC(".*", 0, 0, &status
);
685 TEST_ASSERT_SUCCESS(status
);
687 u_uastrncpy(text
, "whatever", sizeof(text
)/2);
688 u_uastrncpy(repl
, "some other", sizeof(repl
)/2);
689 uregex_setText(re
, text
, -1, &status
);
691 /* match covers whole target string */
692 uregex_find(re
, 0, &status
);
693 TEST_ASSERT_SUCCESS(status
);
695 bufCap
= sizeof(buf
) / 2;
696 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
697 TEST_ASSERT_SUCCESS(status
);
698 TEST_ASSERT_STRING("some other", buf
, TRUE
);
700 /* Match has \u \U escapes */
701 uregex_find(re
, 0, &status
);
702 TEST_ASSERT_SUCCESS(status
);
704 bufCap
= sizeof(buf
) / 2;
705 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl
)/2);
706 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
707 TEST_ASSERT_SUCCESS(status
);
708 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
715 * appendTail(). Checked in ReplaceFirst(), replaceAll().
722 UChar textToSplit
[80];
727 int32_t requiredCapacity
;
731 u_uastrncpy(textToSplit
, "first : second: third", sizeof(textToSplit
)/2);
732 u_uastrncpy(text2
, "No match here.", sizeof(text2
)/2);
734 status
= U_ZERO_ERROR
;
735 re
= uregex_openC(":", 0, NULL
, &status
);
740 uregex_setText(re
, textToSplit
, -1, &status
);
741 TEST_ASSERT_SUCCESS(status
);
743 memset(fields
, -1, sizeof(fields
));
745 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 10, &status
);
746 TEST_ASSERT_SUCCESS(status
);
747 TEST_ASSERT(numFields
== 3);
748 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
749 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
750 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
751 TEST_ASSERT(fields
[3] == NULL
);
753 spaceNeeded
= u_strlen(textToSplit
) -
754 (numFields
- 1) + /* Field delimiters do not appear in output */
755 numFields
; /* Each field gets a NUL terminator */
757 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
761 /* Split with too few output strings available */
762 status
= U_ZERO_ERROR
;
763 re
= uregex_openC(":", 0, NULL
, &status
);
764 uregex_setText(re
, textToSplit
, -1, &status
);
765 TEST_ASSERT_SUCCESS(status
);
767 memset(fields
, -1, sizeof(fields
));
769 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 2, &status
);
770 TEST_ASSERT_SUCCESS(status
);
771 TEST_ASSERT(numFields
== 2);
772 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
773 TEST_ASSERT_STRING(" second: third", fields
[1], TRUE
);
774 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
776 spaceNeeded
= u_strlen(textToSplit
) -
777 (numFields
- 1) + /* Field delimiters do not appear in output */
778 numFields
; /* Each field gets a NUL terminator */
780 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
782 /* Split with a range of output buffer sizes. */
783 spaceNeeded
= u_strlen(textToSplit
) -
784 (numFields
- 1) + /* Field delimiters do not appear in output */
785 numFields
; /* Each field gets a NUL terminator */
787 for (sz
=0; sz
< spaceNeeded
+1; sz
++) {
788 memset(fields
, -1, sizeof(fields
));
789 status
= U_ZERO_ERROR
;
791 uregex_split(re
, buf
, sz
, &requiredCapacity
, fields
, 10, &status
);
792 if (sz
>= spaceNeeded
) {
793 TEST_ASSERT_SUCCESS(status
);
794 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
795 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
796 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
798 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
800 TEST_ASSERT(numFields
== 3);
801 TEST_ASSERT(fields
[3] == NULL
);
802 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
810 /* Split(), part 2. Patterns with capture groups. The capture group text
811 * comes out as additional fields. */
813 UChar textToSplit
[80];
817 int32_t requiredCapacity
;
821 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", sizeof(textToSplit
)/2);
823 status
= U_ZERO_ERROR
;
824 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
826 uregex_setText(re
, textToSplit
, -1, &status
);
827 TEST_ASSERT_SUCCESS(status
);
829 memset(fields
, -1, sizeof(fields
));
831 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 10, &status
);
832 TEST_ASSERT_SUCCESS(status
);
833 TEST_ASSERT(numFields
== 5);
834 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
835 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
836 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
837 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
838 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
839 TEST_ASSERT(fields
[5] == NULL
);
840 spaceNeeded
= strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
841 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
844 /* Split with too few output strings available (2) */
845 status
= U_ZERO_ERROR
;
846 memset(fields
, -1, sizeof(fields
));
848 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 2, &status
);
849 TEST_ASSERT_SUCCESS(status
);
850 TEST_ASSERT(numFields
== 2);
851 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
852 TEST_ASSERT_STRING(" second<tag-b> third", fields
[1], TRUE
);
853 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
855 spaceNeeded
= strlen("first . second<tag-b> third."); /* "." at NUL positions */
856 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
858 /* Split with too few output strings available (3) */
859 status
= U_ZERO_ERROR
;
860 memset(fields
, -1, sizeof(fields
));
862 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 3, &status
);
863 TEST_ASSERT_SUCCESS(status
);
864 TEST_ASSERT(numFields
== 3);
865 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
866 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
867 TEST_ASSERT_STRING(" second<tag-b> third", fields
[2], TRUE
);
868 TEST_ASSERT(!memcmp(&fields
[3],&minus1
,sizeof(UChar
*)));
870 spaceNeeded
= strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
871 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
873 /* Split with just enough output strings available (5) */
874 status
= U_ZERO_ERROR
;
875 memset(fields
, -1, sizeof(fields
));
877 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 5, &status
);
878 TEST_ASSERT_SUCCESS(status
);
879 TEST_ASSERT(numFields
== 5);
880 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
881 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
882 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
883 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
884 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
885 TEST_ASSERT(!memcmp(&fields
[5],&minus1
,sizeof(UChar
*)));
887 spaceNeeded
= strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
888 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
891 /* Split, end of text is a field delimiter. */
892 status
= U_ZERO_ERROR
;
893 sz
= strlen("first <tag-a> second<tag-b>");
894 uregex_setText(re
, textToSplit
, sz
, &status
);
895 TEST_ASSERT_SUCCESS(status
);
896 memset(fields
, -1, sizeof(fields
));
898 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 9, &status
);
899 TEST_ASSERT_SUCCESS(status
);
900 TEST_ASSERT(numFields
== 4);
901 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
902 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
903 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
904 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
905 TEST_ASSERT(fields
[4] == NULL
);
906 TEST_ASSERT(fields
[8] == NULL
);
907 TEST_ASSERT(!memcmp(&fields
[9],&minus1
,sizeof(UChar
*)));
908 spaceNeeded
= strlen("first .tag-a. second.tag-b."); /* "." at NUL positions */
909 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
916 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */