1 /********************************************************************
3 * Copyright (c) 2004-2010, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 *********************************************************************************/
11 /*C API TEST FOR Regular Expressions */
13 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
14 * try to test the full functionality. It just calls each function and verifies that it
15 * works on a basic level.
17 * More complete testing of regular expression functionality is done with the C++ tests.
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
26 #include "unicode/uloc.h"
27 #include "unicode/uregex.h"
28 #include "unicode/ustring.h"
29 #include "unicode/utext.h"
32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
39 * TEST_SETUP and TEST_TEARDOWN
40 * macros to handle the boilerplate around setting up regex test cases.
41 * parameteres to setup:
42 * pattern: The regex pattern, a (char *) null terminated C string.
43 * testString: The string data, also a (char *) C string.
44 * flags: Regex flags to set when compiling the pattern
46 * Put arbitrary test code between SETUP and TEARDOWN.
47 * 're" is the compiled, ready-to-go regular expression.
49 #define TEST_SETUP(pattern, testString, flags) { \
50 UChar *srcString = NULL; \
51 status = U_ZERO_ERROR; \
52 re = uregex_openC(pattern, flags, NULL, &status); \
53 TEST_ASSERT_SUCCESS(status); \
54 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
55 u_uastrncpy(srcString, testString, strlen(testString)+1); \
56 uregex_setText(re, srcString, -1, &status); \
57 TEST_ASSERT_SUCCESS(status); \
58 if (U_SUCCESS(status)) {
60 #define TEST_TEARDOWN \
62 TEST_ASSERT_SUCCESS(status); \
69 * @param expected utf-8 array of bytes to be expected
71 static void test_assert_string(const char *expected
, const UChar
*actual
, UBool nulTerm
, const char *file
, int line
) {
72 char buf_inside_macro
[120];
73 int32_t len
= (int32_t)strlen(expected
);
76 u_austrncpy(buf_inside_macro
, (actual
), len
+1);
77 buf_inside_macro
[len
+2] = 0;
78 success
= (strcmp((expected
), buf_inside_macro
) == 0);
80 u_austrncpy(buf_inside_macro
, (actual
), len
);
81 buf_inside_macro
[len
+1] = 0;
82 success
= (strncmp((expected
), buf_inside_macro
, len
) == 0);
84 if (success
== FALSE
) {
85 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
86 file
, line
, (expected
), buf_inside_macro
);
90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
93 static void test_assert_utext(const char *expected
, UText
*actual
, const char *file
, int line
) {
94 UErrorCode status
= U_ZERO_ERROR
;
95 UText expectedText
= UTEXT_INITIALIZER
;
96 utext_openUTF8(&expectedText
, expected
, -1, &status
);
97 utext_setNativeIndex(actual
, 0);
98 if (utext_compare(&expectedText
, -1, actual
, -1) != 0) {
100 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file
, line
, expected
);
101 c
= utext_next32From(actual
, 0);
102 while (c
!= U_SENTINEL
) {
103 if (0x20<c
&& c
<0x7e) {
108 c
= UTEXT_NEXT32(actual
);
112 utext_close(&expectedText
);
115 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
119 static void TestRegexCAPI(void);
120 static void TestBug4315(void);
121 static void TestUTextAPI(void);
123 void addURegexTest(TestNode
** root
);
125 void addURegexTest(TestNode
** root
)
127 addTest(root
, &TestRegexCAPI
, "regex/TestRegexCAPI");
128 addTest(root
, &TestBug4315
, "regex/TestBug4315");
129 addTest(root
, &TestUTextAPI
, "regex/TestUTextAPI");
133 * Call back function and context struct used for testing
134 * regular expression user callbacks. This test is mostly the same as
135 * the corresponding C++ test in intltest.
137 typedef struct callBackContext
{
143 static UBool U_EXPORT2 U_CALLCONV
144 TestCallbackFn(const void *context
, int32_t steps
) {
145 callBackContext
*info
= (callBackContext
*)context
;
146 if (info
->lastSteps
+1 != steps
) {
147 log_err("incorrect steps in callback. Expected %d, got %d\n", info
->lastSteps
+1, steps
);
149 info
->lastSteps
= steps
;
151 return (info
->numCalls
< info
->maxCalls
);
155 * Regular Expression C API Tests
157 static void TestRegexCAPI(void) {
158 UErrorCode status
= U_ZERO_ERROR
;
159 URegularExpression
*re
;
163 memset(&minus1
, -1, sizeof(minus1
));
165 /* Mimimalist open/close */
166 u_uastrncpy(pat
, "abc*", sizeof(pat
)/2);
167 re
= uregex_open(pat
, -1, 0, 0, &status
);
168 if (U_FAILURE(status
)) {
169 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
174 /* Open with all flag values set */
175 status
= U_ZERO_ERROR
;
176 re
= uregex_open(pat
, -1,
177 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
,
179 TEST_ASSERT_SUCCESS(status
);
182 /* Open with an invalid flag */
183 status
= U_ZERO_ERROR
;
184 re
= uregex_open(pat
, -1, 0x40000000, 0, &status
);
185 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
188 /* Open with an unimplemented flag */
189 status
= U_ZERO_ERROR
;
190 re
= uregex_open(pat
, -1, UREGEX_LITERAL
, 0, &status
);
191 TEST_ASSERT(status
== U_REGEX_UNIMPLEMENTED
);
194 /* openC with an invalid parameter */
195 status
= U_ZERO_ERROR
;
196 re
= uregex_openC(NULL
,
197 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
198 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
200 /* openC with an invalid parameter */
201 status
= U_USELESS_COLLATOR_ERROR
;
202 re
= uregex_openC(NULL
,
203 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
204 TEST_ASSERT(status
== U_USELESS_COLLATOR_ERROR
&& re
== NULL
);
206 /* openC open from a C string */
210 status
= U_ZERO_ERROR
;
211 re
= uregex_openC("abc*", 0, 0, &status
);
212 TEST_ASSERT_SUCCESS(status
);
213 p
= uregex_pattern(re
, &len
, &status
);
214 TEST_ASSERT_SUCCESS(status
);
216 /* The TEST_ASSERT_SUCCESS above should change too... */
217 if(U_SUCCESS(status
)) {
218 u_uastrncpy(pat
, "abc*", sizeof(pat
)/2);
219 TEST_ASSERT(u_strcmp(pat
, p
) == 0);
220 TEST_ASSERT(len
==(int32_t)strlen("abc*"));
225 /* TODO: Open with ParseError parameter */
232 URegularExpression
*clone1
;
233 URegularExpression
*clone2
;
234 URegularExpression
*clone3
;
235 UChar testString1
[30];
236 UChar testString2
[30];
240 status
= U_ZERO_ERROR
;
241 re
= uregex_openC("abc*", 0, 0, &status
);
242 TEST_ASSERT_SUCCESS(status
);
243 clone1
= uregex_clone(re
, &status
);
244 TEST_ASSERT_SUCCESS(status
);
245 TEST_ASSERT(clone1
!= NULL
);
247 status
= U_ZERO_ERROR
;
248 clone2
= uregex_clone(re
, &status
);
249 TEST_ASSERT_SUCCESS(status
);
250 TEST_ASSERT(clone2
!= NULL
);
253 status
= U_ZERO_ERROR
;
254 clone3
= uregex_clone(clone2
, &status
);
255 TEST_ASSERT_SUCCESS(status
);
256 TEST_ASSERT(clone3
!= NULL
);
258 u_uastrncpy(testString1
, "abcccd", sizeof(pat
)/2);
259 u_uastrncpy(testString2
, "xxxabcccd", sizeof(pat
)/2);
261 status
= U_ZERO_ERROR
;
262 uregex_setText(clone1
, testString1
, -1, &status
);
263 TEST_ASSERT_SUCCESS(status
);
264 result
= uregex_lookingAt(clone1
, 0, &status
);
265 TEST_ASSERT_SUCCESS(status
);
266 TEST_ASSERT(result
==TRUE
);
268 status
= U_ZERO_ERROR
;
269 uregex_setText(clone2
, testString2
, -1, &status
);
270 TEST_ASSERT_SUCCESS(status
);
271 result
= uregex_lookingAt(clone2
, 0, &status
);
272 TEST_ASSERT_SUCCESS(status
);
273 TEST_ASSERT(result
==FALSE
);
274 result
= uregex_find(clone2
, 0, &status
);
275 TEST_ASSERT_SUCCESS(status
);
276 TEST_ASSERT(result
==TRUE
);
278 uregex_close(clone1
);
279 uregex_close(clone2
);
280 uregex_close(clone3
);
288 const UChar
*resultPat
;
290 u_uastrncpy(pat
, "hello", sizeof(pat
)/2);
291 status
= U_ZERO_ERROR
;
292 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
293 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
294 TEST_ASSERT_SUCCESS(status
);
296 /* The TEST_ASSERT_SUCCESS above should change too... */
297 if (U_SUCCESS(status
)) {
298 TEST_ASSERT(resultLen
== -1);
299 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
304 status
= U_ZERO_ERROR
;
305 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
306 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
307 TEST_ASSERT_SUCCESS(status
);
308 TEST_ASSERT_SUCCESS(status
);
310 /* The TEST_ASSERT_SUCCESS above should change too... */
311 if (U_SUCCESS(status
)) {
312 TEST_ASSERT(resultLen
== 3);
313 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
314 TEST_ASSERT(u_strlen(resultPat
) == 3);
326 status
= U_ZERO_ERROR
;
327 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
328 t
= uregex_flags(re
, &status
);
329 TEST_ASSERT_SUCCESS(status
);
333 status
= U_ZERO_ERROR
;
334 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
335 t
= uregex_flags(re
, &status
);
336 TEST_ASSERT_SUCCESS(status
);
340 status
= U_ZERO_ERROR
;
341 re
= uregex_open(pat
, -1, UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
, NULL
, &status
);
342 t
= uregex_flags(re
, &status
);
343 TEST_ASSERT_SUCCESS(status
);
344 TEST_ASSERT(t
== (UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
));
349 * setText() and lookingAt()
356 u_uastrncpy(text1
, "abcccd", sizeof(text1
)/2);
357 u_uastrncpy(text2
, "abcccxd", sizeof(text2
)/2);
358 status
= U_ZERO_ERROR
;
359 u_uastrncpy(pat
, "abc*d", sizeof(pat
)/2);
360 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
361 TEST_ASSERT_SUCCESS(status
);
363 /* Operation before doing a setText should fail... */
364 status
= U_ZERO_ERROR
;
365 uregex_lookingAt(re
, 0, &status
);
366 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
368 status
= U_ZERO_ERROR
;
369 uregex_setText(re
, text1
, -1, &status
);
370 result
= uregex_lookingAt(re
, 0, &status
);
371 TEST_ASSERT(result
== TRUE
);
372 TEST_ASSERT_SUCCESS(status
);
374 status
= U_ZERO_ERROR
;
375 uregex_setText(re
, text2
, -1, &status
);
376 result
= uregex_lookingAt(re
, 0, &status
);
377 TEST_ASSERT(result
== FALSE
);
378 TEST_ASSERT_SUCCESS(status
);
380 status
= U_ZERO_ERROR
;
381 uregex_setText(re
, text1
, -1, &status
);
382 result
= uregex_lookingAt(re
, 0, &status
);
383 TEST_ASSERT(result
== TRUE
);
384 TEST_ASSERT_SUCCESS(status
);
386 status
= U_ZERO_ERROR
;
387 uregex_setText(re
, text1
, 5, &status
);
388 result
= uregex_lookingAt(re
, 0, &status
);
389 TEST_ASSERT(result
== FALSE
);
390 TEST_ASSERT_SUCCESS(status
);
392 status
= U_ZERO_ERROR
;
393 uregex_setText(re
, text1
, 6, &status
);
394 result
= uregex_lookingAt(re
, 0, &status
);
395 TEST_ASSERT(result
== TRUE
);
396 TEST_ASSERT_SUCCESS(status
);
411 u_uastrncpy(text1
, "abcccd", sizeof(text1
)/2);
412 u_uastrncpy(text2
, "abcccxd", sizeof(text2
)/2);
413 status
= U_ZERO_ERROR
;
414 u_uastrncpy(pat
, "abc*d", sizeof(pat
)/2);
415 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
417 uregex_setText(re
, text1
, -1, &status
);
418 result
= uregex_getText(re
, &textLength
, &status
);
419 TEST_ASSERT(result
== text1
);
420 TEST_ASSERT(textLength
== -1);
421 TEST_ASSERT_SUCCESS(status
);
423 status
= U_ZERO_ERROR
;
424 uregex_setText(re
, text2
, 7, &status
);
425 result
= uregex_getText(re
, &textLength
, &status
);
426 TEST_ASSERT(result
== text2
);
427 TEST_ASSERT(textLength
== 7);
428 TEST_ASSERT_SUCCESS(status
);
430 status
= U_ZERO_ERROR
;
431 uregex_setText(re
, text2
, 4, &status
);
432 result
= uregex_getText(re
, &textLength
, &status
);
433 TEST_ASSERT(result
== text2
);
434 TEST_ASSERT(textLength
== 4);
435 TEST_ASSERT_SUCCESS(status
);
446 UChar nullString
[] = {0,0,0};
448 u_uastrncpy(text1
, "abcccde", sizeof(text1
)/2);
449 status
= U_ZERO_ERROR
;
450 u_uastrncpy(pat
, "abc*d", sizeof(pat
)/2);
451 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
453 uregex_setText(re
, text1
, -1, &status
);
454 result
= uregex_matches(re
, 0, &status
);
455 TEST_ASSERT(result
== FALSE
);
456 TEST_ASSERT_SUCCESS(status
);
458 status
= U_ZERO_ERROR
;
459 uregex_setText(re
, text1
, 6, &status
);
460 result
= uregex_matches(re
, 0, &status
);
461 TEST_ASSERT(result
== TRUE
);
462 TEST_ASSERT_SUCCESS(status
);
464 status
= U_ZERO_ERROR
;
465 uregex_setText(re
, text1
, 6, &status
);
466 result
= uregex_matches(re
, 1, &status
);
467 TEST_ASSERT(result
== FALSE
);
468 TEST_ASSERT_SUCCESS(status
);
471 status
= U_ZERO_ERROR
;
472 re
= uregex_openC(".?", 0, NULL
, &status
);
473 uregex_setText(re
, text1
, -1, &status
);
474 len
= u_strlen(text1
);
475 result
= uregex_matches(re
, len
, &status
);
476 TEST_ASSERT(result
== TRUE
);
477 TEST_ASSERT_SUCCESS(status
);
479 status
= U_ZERO_ERROR
;
480 uregex_setText(re
, nullString
, -1, &status
);
481 TEST_ASSERT_SUCCESS(status
);
482 result
= uregex_matches(re
, 0, &status
);
483 TEST_ASSERT(result
== TRUE
);
484 TEST_ASSERT_SUCCESS(status
);
490 * lookingAt() Used in setText test.
495 * find(), findNext, start, end, reset
500 u_uastrncpy(text1
, "012rx5rx890rxrx...", sizeof(text1
)/2);
501 status
= U_ZERO_ERROR
;
502 re
= uregex_openC("rx", 0, NULL
, &status
);
504 uregex_setText(re
, text1
, -1, &status
);
505 result
= uregex_find(re
, 0, &status
);
506 TEST_ASSERT(result
== TRUE
);
507 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
508 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
509 TEST_ASSERT_SUCCESS(status
);
511 result
= uregex_find(re
, 9, &status
);
512 TEST_ASSERT(result
== TRUE
);
513 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
514 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
515 TEST_ASSERT_SUCCESS(status
);
517 result
= uregex_find(re
, 14, &status
);
518 TEST_ASSERT(result
== FALSE
);
519 TEST_ASSERT_SUCCESS(status
);
521 status
= U_ZERO_ERROR
;
522 uregex_reset(re
, 0, &status
);
524 result
= uregex_findNext(re
, &status
);
525 TEST_ASSERT(result
== TRUE
);
526 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
527 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
528 TEST_ASSERT_SUCCESS(status
);
530 result
= uregex_findNext(re
, &status
);
531 TEST_ASSERT(result
== TRUE
);
532 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
533 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
534 TEST_ASSERT_SUCCESS(status
);
536 status
= U_ZERO_ERROR
;
537 uregex_reset(re
, 12, &status
);
539 result
= uregex_findNext(re
, &status
);
540 TEST_ASSERT(result
== TRUE
);
541 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
542 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
543 TEST_ASSERT_SUCCESS(status
);
545 result
= uregex_findNext(re
, &status
);
546 TEST_ASSERT(result
== FALSE
);
547 TEST_ASSERT_SUCCESS(status
);
558 status
= U_ZERO_ERROR
;
559 re
= uregex_openC("abc", 0, NULL
, &status
);
560 result
= uregex_groupCount(re
, &status
);
561 TEST_ASSERT_SUCCESS(status
);
562 TEST_ASSERT(result
== 0);
565 status
= U_ZERO_ERROR
;
566 re
= uregex_openC("abc(def)(ghi(j))", 0, NULL
, &status
);
567 result
= uregex_groupCount(re
, &status
);
568 TEST_ASSERT_SUCCESS(status
);
569 TEST_ASSERT(result
== 3);
583 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", sizeof(text1
)/2);
585 status
= U_ZERO_ERROR
;
586 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
587 TEST_ASSERT_SUCCESS(status
);
590 uregex_setText(re
, text1
, -1, &status
);
591 result
= uregex_find(re
, 0, &status
);
592 TEST_ASSERT(result
==TRUE
);
594 /* Capture Group 0, the full match. Should succeed. */
595 status
= U_ZERO_ERROR
;
596 resultSz
= uregex_group(re
, 0, buf
, sizeof(buf
)/2, &status
);
597 TEST_ASSERT_SUCCESS(status
);
598 TEST_ASSERT_STRING("abc interior def", buf
, TRUE
);
599 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
601 /* Capture group #1. Should succeed. */
602 status
= U_ZERO_ERROR
;
603 resultSz
= uregex_group(re
, 1, buf
, sizeof(buf
)/2, &status
);
604 TEST_ASSERT_SUCCESS(status
);
605 TEST_ASSERT_STRING(" interior ", buf
, TRUE
);
606 TEST_ASSERT(resultSz
== (int32_t)strlen(" interior "));
608 /* Capture group out of range. Error. */
609 status
= U_ZERO_ERROR
;
610 uregex_group(re
, 2, buf
, sizeof(buf
)/2, &status
);
611 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
613 /* NULL buffer, pure pre-flight */
614 status
= U_ZERO_ERROR
;
615 resultSz
= uregex_group(re
, 0, NULL
, 0, &status
);
616 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
617 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
619 /* Too small buffer, truncated string */
620 status
= U_ZERO_ERROR
;
621 memset(buf
, -1, sizeof(buf
));
622 resultSz
= uregex_group(re
, 0, buf
, 5, &status
);
623 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
624 TEST_ASSERT_STRING("abc i", buf
, FALSE
);
625 TEST_ASSERT(buf
[5] == (UChar
)0xffff);
626 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
628 /* Output string just fits buffer, no NUL term. */
629 status
= U_ZERO_ERROR
;
630 resultSz
= uregex_group(re
, 0, buf
, (int32_t)strlen("abc interior def"), &status
);
631 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
632 TEST_ASSERT_STRING("abc interior def", buf
, FALSE
);
633 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
634 TEST_ASSERT(buf
[strlen("abc interior def")] == (UChar
)0xffff);
645 /* SetRegion(), getRegion() do something */
646 TEST_SETUP(".*", "0123456789ABCDEF", 0)
647 UChar resultString
[40];
648 TEST_ASSERT(uregex_regionStart(re
, &status
) == 0);
649 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 16);
650 uregex_setRegion(re
, 3, 6, &status
);
651 TEST_ASSERT(uregex_regionStart(re
, &status
) == 3);
652 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 6);
653 TEST_ASSERT(uregex_findNext(re
, &status
));
654 TEST_ASSERT(uregex_group(re
, 0, resultString
, sizeof(resultString
)/2, &status
) == 3)
655 TEST_ASSERT_STRING("345", resultString
, TRUE
);
658 /* find(start=-1) uses regions */
659 TEST_SETUP(".*", "0123456789ABCDEF", 0);
660 uregex_setRegion(re
, 4, 6, &status
);
661 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
662 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
663 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
666 /* find (start >=0) does not use regions */
667 TEST_SETUP(".*", "0123456789ABCDEF", 0);
668 uregex_setRegion(re
, 4, 6, &status
);
669 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
670 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
671 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
674 /* findNext() obeys regions */
675 TEST_SETUP(".", "0123456789ABCDEF", 0);
676 uregex_setRegion(re
, 4, 6, &status
);
677 TEST_ASSERT(uregex_findNext(re
,&status
) == TRUE
);
678 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
679 TEST_ASSERT(uregex_findNext(re
, &status
) == TRUE
);
680 TEST_ASSERT(uregex_start(re
, 0, &status
) == 5);
681 TEST_ASSERT(uregex_findNext(re
, &status
) == FALSE
);
684 /* matches(start=-1) uses regions */
685 /* Also, verify that non-greedy *? succeeds in finding the full match. */
686 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
687 uregex_setRegion(re
, 4, 6, &status
);
688 TEST_ASSERT(uregex_matches(re
, -1, &status
) == TRUE
);
689 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
690 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
693 /* matches (start >=0) does not use regions */
694 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
695 uregex_setRegion(re
, 4, 6, &status
);
696 TEST_ASSERT(uregex_matches(re
, 0, &status
) == TRUE
);
697 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
698 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
701 /* lookingAt(start=-1) uses regions */
702 /* Also, verify that non-greedy *? finds the first (shortest) match. */
703 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
704 uregex_setRegion(re
, 4, 6, &status
);
705 TEST_ASSERT(uregex_lookingAt(re
, -1, &status
) == TRUE
);
706 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
707 TEST_ASSERT(uregex_end(re
, 0, &status
) == 4);
710 /* lookingAt (start >=0) does not use regions */
711 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
712 uregex_setRegion(re
, 4, 6, &status
);
713 TEST_ASSERT(uregex_lookingAt(re
, 0, &status
) == TRUE
);
714 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
715 TEST_ASSERT(uregex_end(re
, 0, &status
) == 0);
719 TEST_SETUP("[a-f]*", "abcdefghij", 0);
720 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
721 TEST_ASSERT(uregex_hitEnd(re
, &status
) == FALSE
);
724 TEST_SETUP("[a-f]*", "abcdef", 0);
725 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
726 TEST_ASSERT(uregex_hitEnd(re
, &status
) == TRUE
);
730 TEST_SETUP("abcd", "abcd", 0);
731 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
732 TEST_ASSERT(uregex_requireEnd(re
, &status
) == FALSE
);
735 TEST_SETUP("abcd$", "abcd", 0);
736 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
737 TEST_ASSERT(uregex_requireEnd(re
, &status
) == TRUE
);
740 /* anchoringBounds */
741 TEST_SETUP("abc$", "abcdef", 0);
742 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == TRUE
);
743 uregex_useAnchoringBounds(re
, FALSE
, &status
);
744 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == FALSE
);
746 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
);
747 uregex_useAnchoringBounds(re
, TRUE
, &status
);
748 uregex_setRegion(re
, 0, 3, &status
);
749 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
750 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
753 /* Transparent Bounds */
754 TEST_SETUP("abc(?=def)", "abcdef", 0);
755 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == FALSE
);
756 uregex_useTransparentBounds(re
, TRUE
, &status
);
757 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == TRUE
);
759 uregex_useTransparentBounds(re
, FALSE
, &status
);
760 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* No Region */
761 uregex_setRegion(re
, 0, 3, &status
);
762 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
); /* with region, opaque bounds */
763 uregex_useTransparentBounds(re
, TRUE
, &status
);
764 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* with region, transparent bounds */
765 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
778 u_uastrncpy(text1
, "Replace xaax x1x x...x.", sizeof(text1
)/2);
779 u_uastrncpy(text2
, "No match here.", sizeof(text2
)/2);
780 u_uastrncpy(replText
, "<$1>", sizeof(replText
)/2);
782 status
= U_ZERO_ERROR
;
783 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
784 TEST_ASSERT_SUCCESS(status
);
786 /* Normal case, with match */
787 uregex_setText(re
, text1
, -1, &status
);
788 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, sizeof(buf
)/2, &status
);
789 TEST_ASSERT_SUCCESS(status
);
790 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, TRUE
);
791 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
793 /* No match. Text should copy to output with no changes. */
794 status
= U_ZERO_ERROR
;
795 uregex_setText(re
, text2
, -1, &status
);
796 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, sizeof(buf
)/2, &status
);
797 TEST_ASSERT_SUCCESS(status
);
798 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
799 TEST_ASSERT(resultSz
== (int32_t)strlen("No match here."));
801 /* Match, output just fills buffer, no termination warning. */
802 status
= U_ZERO_ERROR
;
803 uregex_setText(re
, text1
, -1, &status
);
804 memset(buf
, -1, sizeof(buf
));
805 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x."), &status
);
806 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
807 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
808 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
809 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
811 /* Do the replaceFirst again, without first resetting anything.
812 * Should give the same results.
814 status
= U_ZERO_ERROR
;
815 memset(buf
, -1, sizeof(buf
));
816 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x."), &status
);
817 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
818 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
819 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
820 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
822 /* NULL buffer, zero buffer length */
823 status
= U_ZERO_ERROR
;
824 resultSz
= uregex_replaceFirst(re
, replText
, -1, NULL
, 0, &status
);
825 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
826 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
828 /* Buffer too small by one */
829 status
= U_ZERO_ERROR
;
830 memset(buf
, -1, sizeof(buf
));
831 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x.")-1, &status
);
832 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
833 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf
, FALSE
);
834 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
835 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
845 UChar text1
[80]; /* "Replace xaax x1x x...x." */
846 UChar text2
[80]; /* "No match Here" */
847 UChar replText
[80]; /* "<$1>" */
848 UChar replText2
[80]; /* "<<$1>>" */
849 const char * pattern
= "x(.*?)x";
850 const char * expectedResult
= "Replace <aa> <1> <...>.";
851 const char * expectedResult2
= "Replace <<aa>> <<1>> <<...>>.";
854 int32_t expectedResultSize
;
855 int32_t expectedResultSize2
;
858 u_uastrncpy(text1
, "Replace xaax x1x x...x.", sizeof(text1
)/2);
859 u_uastrncpy(text2
, "No match here.", sizeof(text2
)/2);
860 u_uastrncpy(replText
, "<$1>", sizeof(replText
)/2);
861 u_uastrncpy(replText2
, "<<$1>>", sizeof(replText2
)/2);
862 expectedResultSize
= strlen(expectedResult
);
863 expectedResultSize2
= strlen(expectedResult2
);
865 status
= U_ZERO_ERROR
;
866 re
= uregex_openC(pattern
, 0, NULL
, &status
);
867 TEST_ASSERT_SUCCESS(status
);
869 /* Normal case, with match */
870 uregex_setText(re
, text1
, -1, &status
);
871 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, sizeof(buf
)/2, &status
);
872 TEST_ASSERT_SUCCESS(status
);
873 TEST_ASSERT_STRING(expectedResult
, buf
, TRUE
);
874 TEST_ASSERT(resultSize
== expectedResultSize
);
876 /* No match. Text should copy to output with no changes. */
877 status
= U_ZERO_ERROR
;
878 uregex_setText(re
, text2
, -1, &status
);
879 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, sizeof(buf
)/2, &status
);
880 TEST_ASSERT_SUCCESS(status
);
881 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
882 TEST_ASSERT(resultSize
== u_strlen(text2
));
884 /* Match, output just fills buffer, no termination warning. */
885 status
= U_ZERO_ERROR
;
886 uregex_setText(re
, text1
, -1, &status
);
887 memset(buf
, -1, sizeof(buf
));
888 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, expectedResultSize
, &status
);
889 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
890 TEST_ASSERT_STRING(expectedResult
, buf
, FALSE
);
891 TEST_ASSERT(resultSize
== expectedResultSize
);
892 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
894 /* Do the replaceFirst again, without first resetting anything.
895 * Should give the same results.
897 status
= U_ZERO_ERROR
;
898 memset(buf
, -1, sizeof(buf
));
899 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, strlen("Replace xaax x1x x...x."), &status
);
900 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
901 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf
, FALSE
);
902 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
903 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
905 /* NULL buffer, zero buffer length */
906 status
= U_ZERO_ERROR
;
907 resultSize
= uregex_replaceAll(re
, replText
, -1, NULL
, 0, &status
);
908 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
909 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
911 /* Buffer too small. Try every size, which will tickle edge cases
912 * in uregex_appendReplacement (used by replaceAll) */
913 for (i
=0; i
<expectedResultSize
; i
++) {
915 status
= U_ZERO_ERROR
;
916 memset(buf
, -1, sizeof(buf
));
917 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, i
, &status
);
918 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
919 strcpy(expected
, expectedResult
);
921 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
922 TEST_ASSERT(resultSize
== expectedResultSize
);
923 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
926 /* Buffer too small. Same as previous test, except this time the replacement
927 * text is longer than the match capture group, making the length of the complete
928 * replacement longer than the original string.
930 for (i
=0; i
<expectedResultSize2
; i
++) {
932 status
= U_ZERO_ERROR
;
933 memset(buf
, -1, sizeof(buf
));
934 resultSize
= uregex_replaceAll(re
, replText2
, -1, buf
, i
, &status
);
935 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
936 strcpy(expected
, expectedResult2
);
938 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
939 TEST_ASSERT(resultSize
== expectedResultSize2
);
940 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
949 * appendReplacement()
959 status
= U_ZERO_ERROR
;
960 re
= uregex_openC(".*", 0, 0, &status
);
961 TEST_ASSERT_SUCCESS(status
);
963 u_uastrncpy(text
, "whatever", sizeof(text
)/2);
964 u_uastrncpy(repl
, "some other", sizeof(repl
)/2);
965 uregex_setText(re
, text
, -1, &status
);
967 /* match covers whole target string */
968 uregex_find(re
, 0, &status
);
969 TEST_ASSERT_SUCCESS(status
);
971 bufCap
= sizeof(buf
) / 2;
972 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
973 TEST_ASSERT_SUCCESS(status
);
974 TEST_ASSERT_STRING("some other", buf
, TRUE
);
976 /* Match has \u \U escapes */
977 uregex_find(re
, 0, &status
);
978 TEST_ASSERT_SUCCESS(status
);
980 bufCap
= sizeof(buf
) / 2;
981 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl
)/2);
982 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
983 TEST_ASSERT_SUCCESS(status
);
984 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
986 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
987 status
= U_ZERO_ERROR
;
988 uregex_find(re
, 0, &status
);
989 TEST_ASSERT_SUCCESS(status
);
991 status
= U_BUFFER_OVERFLOW_ERROR
;
992 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, NULL
, &status
);
993 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1000 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1007 UChar textToSplit
[80];
1012 int32_t requiredCapacity
;
1013 int32_t spaceNeeded
;
1016 u_uastrncpy(textToSplit
, "first : second: third", sizeof(textToSplit
)/2);
1017 u_uastrncpy(text2
, "No match here.", sizeof(text2
)/2);
1019 status
= U_ZERO_ERROR
;
1020 re
= uregex_openC(":", 0, NULL
, &status
);
1025 uregex_setText(re
, textToSplit
, -1, &status
);
1026 TEST_ASSERT_SUCCESS(status
);
1028 /* The TEST_ASSERT_SUCCESS call above should change too... */
1029 if (U_SUCCESS(status
)) {
1030 memset(fields
, -1, sizeof(fields
));
1032 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 10, &status
);
1033 TEST_ASSERT_SUCCESS(status
);
1035 /* The TEST_ASSERT_SUCCESS call above should change too... */
1036 if(U_SUCCESS(status
)) {
1037 TEST_ASSERT(numFields
== 3);
1038 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1039 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1040 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1041 TEST_ASSERT(fields
[3] == NULL
);
1043 spaceNeeded
= u_strlen(textToSplit
) -
1044 (numFields
- 1) + /* Field delimiters do not appear in output */
1045 numFields
; /* Each field gets a NUL terminator */
1047 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1054 /* Split with too few output strings available */
1055 status
= U_ZERO_ERROR
;
1056 re
= uregex_openC(":", 0, NULL
, &status
);
1057 uregex_setText(re
, textToSplit
, -1, &status
);
1058 TEST_ASSERT_SUCCESS(status
);
1060 /* The TEST_ASSERT_SUCCESS call above should change too... */
1061 if(U_SUCCESS(status
)) {
1062 memset(fields
, -1, sizeof(fields
));
1064 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 2, &status
);
1065 TEST_ASSERT_SUCCESS(status
);
1067 /* The TEST_ASSERT_SUCCESS call above should change too... */
1068 if(U_SUCCESS(status
)) {
1069 TEST_ASSERT(numFields
== 2);
1070 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1071 TEST_ASSERT_STRING(" second: third", fields
[1], TRUE
);
1072 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1074 spaceNeeded
= u_strlen(textToSplit
) -
1075 (numFields
- 1) + /* Field delimiters do not appear in output */
1076 numFields
; /* Each field gets a NUL terminator */
1078 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1080 /* Split with a range of output buffer sizes. */
1081 spaceNeeded
= u_strlen(textToSplit
) -
1082 (numFields
- 1) + /* Field delimiters do not appear in output */
1083 numFields
; /* Each field gets a NUL terminator */
1085 for (sz
=0; sz
< spaceNeeded
+1; sz
++) {
1086 memset(fields
, -1, sizeof(fields
));
1087 status
= U_ZERO_ERROR
;
1089 uregex_split(re
, buf
, sz
, &requiredCapacity
, fields
, 10, &status
);
1090 if (sz
>= spaceNeeded
) {
1091 TEST_ASSERT_SUCCESS(status
);
1092 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1093 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1094 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1096 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1098 TEST_ASSERT(numFields
== 3);
1099 TEST_ASSERT(fields
[3] == NULL
);
1100 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1111 /* Split(), part 2. Patterns with capture groups. The capture group text
1112 * comes out as additional fields. */
1114 UChar textToSplit
[80];
1118 int32_t requiredCapacity
;
1119 int32_t spaceNeeded
;
1122 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", sizeof(textToSplit
)/2);
1124 status
= U_ZERO_ERROR
;
1125 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
1127 uregex_setText(re
, textToSplit
, -1, &status
);
1128 TEST_ASSERT_SUCCESS(status
);
1130 /* The TEST_ASSERT_SUCCESS call above should change too... */
1131 if(U_SUCCESS(status
)) {
1132 memset(fields
, -1, sizeof(fields
));
1134 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 10, &status
);
1135 TEST_ASSERT_SUCCESS(status
);
1137 /* The TEST_ASSERT_SUCCESS call above should change too... */
1138 if(U_SUCCESS(status
)) {
1139 TEST_ASSERT(numFields
== 5);
1140 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1141 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1142 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1143 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1144 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1145 TEST_ASSERT(fields
[5] == NULL
);
1146 spaceNeeded
= strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1147 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1151 /* Split with too few output strings available (2) */
1152 status
= U_ZERO_ERROR
;
1153 memset(fields
, -1, sizeof(fields
));
1155 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 2, &status
);
1156 TEST_ASSERT_SUCCESS(status
);
1158 /* The TEST_ASSERT_SUCCESS call above should change too... */
1159 if(U_SUCCESS(status
)) {
1160 TEST_ASSERT(numFields
== 2);
1161 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1162 TEST_ASSERT_STRING(" second<tag-b> third", fields
[1], TRUE
);
1163 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1165 spaceNeeded
= strlen("first . second<tag-b> third."); /* "." at NUL positions */
1166 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1169 /* Split with too few output strings available (3) */
1170 status
= U_ZERO_ERROR
;
1171 memset(fields
, -1, sizeof(fields
));
1173 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 3, &status
);
1174 TEST_ASSERT_SUCCESS(status
);
1176 /* The TEST_ASSERT_SUCCESS call above should change too... */
1177 if(U_SUCCESS(status
)) {
1178 TEST_ASSERT(numFields
== 3);
1179 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1180 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1181 TEST_ASSERT_STRING(" second<tag-b> third", fields
[2], TRUE
);
1182 TEST_ASSERT(!memcmp(&fields
[3],&minus1
,sizeof(UChar
*)));
1184 spaceNeeded
= strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1185 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1188 /* Split with just enough output strings available (5) */
1189 status
= U_ZERO_ERROR
;
1190 memset(fields
, -1, sizeof(fields
));
1192 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 5, &status
);
1193 TEST_ASSERT_SUCCESS(status
);
1195 /* The TEST_ASSERT_SUCCESS call above should change too... */
1196 if(U_SUCCESS(status
)) {
1197 TEST_ASSERT(numFields
== 5);
1198 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1199 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1200 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1201 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1202 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1203 TEST_ASSERT(!memcmp(&fields
[5],&minus1
,sizeof(UChar
*)));
1205 spaceNeeded
= strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1206 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1209 /* Split, end of text is a field delimiter. */
1210 status
= U_ZERO_ERROR
;
1211 sz
= strlen("first <tag-a> second<tag-b>");
1212 uregex_setText(re
, textToSplit
, sz
, &status
);
1213 TEST_ASSERT_SUCCESS(status
);
1215 /* The TEST_ASSERT_SUCCESS call above should change too... */
1216 if(U_SUCCESS(status
)) {
1217 memset(fields
, -1, sizeof(fields
));
1219 uregex_split(re
, buf
, sizeof(buf
)/2, &requiredCapacity
, fields
, 9, &status
);
1220 TEST_ASSERT_SUCCESS(status
);
1222 /* The TEST_ASSERT_SUCCESS call above should change too... */
1223 if(U_SUCCESS(status
)) {
1224 TEST_ASSERT(numFields
== 4);
1225 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1226 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1227 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1228 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1229 TEST_ASSERT(fields
[4] == NULL
);
1230 TEST_ASSERT(fields
[8] == NULL
);
1231 TEST_ASSERT(!memcmp(&fields
[9],&minus1
,sizeof(UChar
*)));
1232 spaceNeeded
= strlen("first .tag-a. second.tag-b."); /* "." at NUL positions */
1233 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1243 TEST_SETUP("abc$", "abcdef", 0);
1244 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 0);
1245 uregex_setTimeLimit(re
, 1000, &status
);
1246 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1247 TEST_ASSERT_SUCCESS(status
);
1248 uregex_setTimeLimit(re
, -1, &status
);
1249 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1250 status
= U_ZERO_ERROR
;
1251 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1255 * set/get Stack Limit
1257 TEST_SETUP("abc$", "abcdef", 0);
1258 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 8000000);
1259 uregex_setStackLimit(re
, 40000, &status
);
1260 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1261 TEST_ASSERT_SUCCESS(status
);
1262 uregex_setStackLimit(re
, -1, &status
);
1263 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1264 status
= U_ZERO_ERROR
;
1265 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1270 * Get/Set callback functions
1271 * This test is copied from intltest regex/Callbacks
1272 * The pattern and test data will run long enough to cause the callback
1273 * to be invoked. The nested '+' operators give exponential time
1274 * behavior with increasing string length.
1276 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1277 callBackContext cbInfo
= {4, 0, 0};
1278 const void *pContext
= &cbInfo
;
1279 URegexMatchCallback
*returnedFn
= &TestCallbackFn
;
1281 /* Getting the callback fn when it hasn't been set must return NULL */
1282 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1283 TEST_ASSERT_SUCCESS(status
);
1284 TEST_ASSERT(returnedFn
== NULL
);
1285 TEST_ASSERT(pContext
== NULL
);
1287 /* Set thecallback and do a match. */
1288 /* The callback function should record that it has been called. */
1289 uregex_setMatchCallback(re
, &TestCallbackFn
, &cbInfo
, &status
);
1290 TEST_ASSERT_SUCCESS(status
);
1291 TEST_ASSERT(cbInfo
.numCalls
== 0);
1292 TEST_ASSERT(uregex_matches(re
, -1, &status
) == FALSE
);
1293 TEST_ASSERT_SUCCESS(status
);
1294 TEST_ASSERT(cbInfo
.numCalls
> 0);
1296 /* Getting the callback should return the values that were set above. */
1297 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1298 TEST_ASSERT(returnedFn
== &TestCallbackFn
);
1299 TEST_ASSERT(pContext
== &cbInfo
);
1306 static void TestBug4315(void) {
1307 UErrorCode theICUError
= U_ZERO_ERROR
;
1308 URegularExpression
*theRegEx
;
1310 const char *thePattern
;
1311 UChar theString
[100];
1312 UChar
*destFields
[24];
1313 int32_t neededLength1
;
1314 int32_t neededLength2
;
1316 int32_t wordCount
= 0;
1317 int32_t destFieldsSize
= 24;
1320 u_uastrcpy(theString
, "The quick brown fox jumped over the slow black turtle.");
1323 theRegEx
= uregex_openC(thePattern
, 0, NULL
, &theICUError
);
1324 TEST_ASSERT_SUCCESS(theICUError
);
1326 /* set the input string */
1327 uregex_setText(theRegEx
, theString
, u_strlen(theString
), &theICUError
);
1328 TEST_ASSERT_SUCCESS(theICUError
);
1331 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1333 wordCount
= uregex_split(theRegEx
, NULL
, 0, &neededLength1
, destFields
,
1334 destFieldsSize
, &theICUError
);
1336 TEST_ASSERT(theICUError
== U_BUFFER_OVERFLOW_ERROR
);
1337 TEST_ASSERT(wordCount
==3);
1339 if(theICUError
== U_BUFFER_OVERFLOW_ERROR
)
1341 theICUError
= U_ZERO_ERROR
;
1342 textBuff
= (UChar
*) malloc(sizeof(UChar
) * (neededLength1
+ 1));
1343 wordCount
= uregex_split(theRegEx
, textBuff
, neededLength1
+1, &neededLength2
,
1344 destFields
, destFieldsSize
, &theICUError
);
1345 TEST_ASSERT(wordCount
==3);
1346 TEST_ASSERT_SUCCESS(theICUError
);
1347 TEST_ASSERT(neededLength1
== neededLength2
);
1348 TEST_ASSERT_STRING("The qui", destFields
[0], TRUE
);
1349 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields
[1], TRUE
);
1350 TEST_ASSERT_STRING("turtle.", destFields
[2], TRUE
);
1351 TEST_ASSERT(destFields
[3] == NULL
);
1354 uregex_close(theRegEx
);
1357 /* Based on TestRegexCAPI() */
1358 static void TestUTextAPI(void) {
1359 UErrorCode status
= U_ZERO_ERROR
;
1360 URegularExpression
*re
;
1361 UText patternText
= UTEXT_INITIALIZER
;
1363 const char patternTextUTF8
[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1365 /* Mimimalist open/close */
1366 utext_openUTF8(&patternText
, patternTextUTF8
, -1, &status
);
1367 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1368 if (U_FAILURE(status
)) {
1369 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
1370 utext_close(&patternText
);
1375 /* Open with all flag values set */
1376 status
= U_ZERO_ERROR
;
1377 re
= uregex_openUText(&patternText
,
1378 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
,
1380 TEST_ASSERT_SUCCESS(status
);
1383 /* Open with an invalid flag */
1384 status
= U_ZERO_ERROR
;
1385 re
= uregex_openUText(&patternText
, 0x40000000, 0, &status
);
1386 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
1389 /* open with an invalid parameter */
1390 status
= U_ZERO_ERROR
;
1391 re
= uregex_openUText(NULL
,
1392 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
1393 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
1399 URegularExpression
*clone1
;
1400 URegularExpression
*clone2
;
1401 URegularExpression
*clone3
;
1402 UChar testString1
[30];
1403 UChar testString2
[30];
1407 status
= U_ZERO_ERROR
;
1408 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1409 TEST_ASSERT_SUCCESS(status
);
1410 clone1
= uregex_clone(re
, &status
);
1411 TEST_ASSERT_SUCCESS(status
);
1412 TEST_ASSERT(clone1
!= NULL
);
1414 status
= U_ZERO_ERROR
;
1415 clone2
= uregex_clone(re
, &status
);
1416 TEST_ASSERT_SUCCESS(status
);
1417 TEST_ASSERT(clone2
!= NULL
);
1420 status
= U_ZERO_ERROR
;
1421 clone3
= uregex_clone(clone2
, &status
);
1422 TEST_ASSERT_SUCCESS(status
);
1423 TEST_ASSERT(clone3
!= NULL
);
1425 u_uastrncpy(testString1
, "abcccd", sizeof(pat
)/2);
1426 u_uastrncpy(testString2
, "xxxabcccd", sizeof(pat
)/2);
1428 status
= U_ZERO_ERROR
;
1429 uregex_setText(clone1
, testString1
, -1, &status
);
1430 TEST_ASSERT_SUCCESS(status
);
1431 result
= uregex_lookingAt(clone1
, 0, &status
);
1432 TEST_ASSERT_SUCCESS(status
);
1433 TEST_ASSERT(result
==TRUE
);
1435 status
= U_ZERO_ERROR
;
1436 uregex_setText(clone2
, testString2
, -1, &status
);
1437 TEST_ASSERT_SUCCESS(status
);
1438 result
= uregex_lookingAt(clone2
, 0, &status
);
1439 TEST_ASSERT_SUCCESS(status
);
1440 TEST_ASSERT(result
==FALSE
);
1441 result
= uregex_find(clone2
, 0, &status
);
1442 TEST_ASSERT_SUCCESS(status
);
1443 TEST_ASSERT(result
==TRUE
);
1445 uregex_close(clone1
);
1446 uregex_close(clone2
);
1447 uregex_close(clone3
);
1452 * pattern() and patternText()
1455 const UChar
*resultPat
;
1458 const char str_hello
[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1459 const char str_hel
[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1460 u_uastrncpy(pat
, "hello", sizeof(pat
)/2); /* for comparison */
1461 status
= U_ZERO_ERROR
;
1463 utext_openUTF8(&patternText
, str_hello
, -1, &status
);
1464 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
1465 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1466 TEST_ASSERT_SUCCESS(status
);
1468 /* The TEST_ASSERT_SUCCESS above should change too... */
1469 if (U_SUCCESS(status
)) {
1470 TEST_ASSERT(resultLen
== -1);
1471 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
1474 resultText
= uregex_patternUText(re
, &status
);
1475 TEST_ASSERT_SUCCESS(status
);
1476 TEST_ASSERT_UTEXT(str_hello
, resultText
);
1480 status
= U_ZERO_ERROR
;
1481 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
1482 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1483 TEST_ASSERT_SUCCESS(status
);
1485 /* The TEST_ASSERT_SUCCESS above should change too... */
1486 if (U_SUCCESS(status
)) {
1487 TEST_ASSERT(resultLen
== 3);
1488 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
1489 TEST_ASSERT(u_strlen(resultPat
) == 3);
1492 resultText
= uregex_patternUText(re
, &status
);
1493 TEST_ASSERT_SUCCESS(status
);
1494 TEST_ASSERT_UTEXT(str_hel
, resultText
);
1500 * setUText() and lookingAt()
1503 UText text1
= UTEXT_INITIALIZER
;
1504 UText text2
= UTEXT_INITIALIZER
;
1506 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1507 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1508 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1509 status
= U_ZERO_ERROR
;
1510 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1511 utext_openUTF8(&text2
, str_abcccxd
, -1, &status
);
1513 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1514 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1515 TEST_ASSERT_SUCCESS(status
);
1517 /* Operation before doing a setText should fail... */
1518 status
= U_ZERO_ERROR
;
1519 uregex_lookingAt(re
, 0, &status
);
1520 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
1522 status
= U_ZERO_ERROR
;
1523 uregex_setUText(re
, &text1
, &status
);
1524 result
= uregex_lookingAt(re
, 0, &status
);
1525 TEST_ASSERT(result
== TRUE
);
1526 TEST_ASSERT_SUCCESS(status
);
1528 status
= U_ZERO_ERROR
;
1529 uregex_setUText(re
, &text2
, &status
);
1530 result
= uregex_lookingAt(re
, 0, &status
);
1531 TEST_ASSERT(result
== FALSE
);
1532 TEST_ASSERT_SUCCESS(status
);
1534 status
= U_ZERO_ERROR
;
1535 uregex_setUText(re
, &text1
, &status
);
1536 result
= uregex_lookingAt(re
, 0, &status
);
1537 TEST_ASSERT(result
== TRUE
);
1538 TEST_ASSERT_SUCCESS(status
);
1541 utext_close(&text1
);
1542 utext_close(&text2
);
1547 * getText() and getUText()
1550 UText text1
= UTEXT_INITIALIZER
;
1551 UText text2
= UTEXT_INITIALIZER
;
1552 UChar text2Chars
[20];
1554 const UChar
*result
;
1556 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1557 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1558 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1561 status
= U_ZERO_ERROR
;
1562 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1563 u_uastrncpy(text2Chars
, str_abcccxd
, sizeof(text2
)/2);
1564 utext_openUChars(&text2
, text2Chars
, -1, &status
);
1566 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1567 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1569 /* First set a UText */
1570 uregex_setUText(re
, &text1
, &status
);
1571 resultText
= uregex_getUText(re
, NULL
, &status
);
1572 TEST_ASSERT_SUCCESS(status
);
1573 TEST_ASSERT(resultText
!= &text1
);
1574 utext_setNativeIndex(resultText
, 0);
1575 utext_setNativeIndex(&text1
, 0);
1576 TEST_ASSERT(utext_compare(resultText
, -1, &text1
, -1) == 0);
1577 utext_close(resultText
);
1579 result
= uregex_getText(re
, &textLength
, &status
); /* flattens UText into buffer */
1580 TEST_ASSERT(textLength
== -1 || textLength
== 6);
1581 resultText
= uregex_getUText(re
, NULL
, &status
);
1582 TEST_ASSERT_SUCCESS(status
);
1583 TEST_ASSERT(resultText
!= &text1
);
1584 utext_setNativeIndex(resultText
, 0);
1585 utext_setNativeIndex(&text1
, 0);
1586 TEST_ASSERT(utext_compare(resultText
, -1, &text1
, -1) == 0);
1587 utext_close(resultText
);
1589 /* Then set a UChar * */
1590 uregex_setText(re
, text2Chars
, 7, &status
);
1591 resultText
= uregex_getUText(re
, NULL
, &status
);
1592 TEST_ASSERT_SUCCESS(status
);
1593 utext_setNativeIndex(resultText
, 0);
1594 utext_setNativeIndex(&text2
, 0);
1595 TEST_ASSERT(utext_compare(resultText
, -1, &text2
, -1) == 0);
1596 utext_close(resultText
);
1597 result
= uregex_getText(re
, &textLength
, &status
);
1598 TEST_ASSERT(textLength
== 7);
1601 utext_close(&text1
);
1602 utext_close(&text2
);
1609 UText text1
= UTEXT_INITIALIZER
;
1611 UText nullText
= UTEXT_INITIALIZER
;
1612 const char str_abcccde
[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1613 const char str_abcd
[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1615 status
= U_ZERO_ERROR
;
1616 utext_openUTF8(&text1
, str_abcccde
, -1, &status
);
1617 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1618 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1620 uregex_setUText(re
, &text1
, &status
);
1621 result
= uregex_matches(re
, 0, &status
);
1622 TEST_ASSERT(result
== FALSE
);
1623 TEST_ASSERT_SUCCESS(status
);
1626 status
= U_ZERO_ERROR
;
1627 re
= uregex_openC(".?", 0, NULL
, &status
);
1628 uregex_setUText(re
, &text1
, &status
);
1629 result
= uregex_matches(re
, 7, &status
);
1630 TEST_ASSERT(result
== TRUE
);
1631 TEST_ASSERT_SUCCESS(status
);
1633 status
= U_ZERO_ERROR
;
1634 utext_openUTF8(&nullText
, "", -1, &status
);
1635 uregex_setUText(re
, &nullText
, &status
);
1636 TEST_ASSERT_SUCCESS(status
);
1637 result
= uregex_matches(re
, 0, &status
);
1638 TEST_ASSERT(result
== TRUE
);
1639 TEST_ASSERT_SUCCESS(status
);
1642 utext_close(&text1
);
1643 utext_close(&nullText
);
1648 * lookingAt() Used in setText test.
1653 * find(), findNext, start, end, reset
1658 u_uastrncpy(text1
, "012rx5rx890rxrx...", sizeof(text1
)/2);
1659 status
= U_ZERO_ERROR
;
1660 re
= uregex_openC("rx", 0, NULL
, &status
);
1662 uregex_setText(re
, text1
, -1, &status
);
1663 result
= uregex_find(re
, 0, &status
);
1664 TEST_ASSERT(result
== TRUE
);
1665 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1666 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1667 TEST_ASSERT_SUCCESS(status
);
1669 result
= uregex_find(re
, 9, &status
);
1670 TEST_ASSERT(result
== TRUE
);
1671 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
1672 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
1673 TEST_ASSERT_SUCCESS(status
);
1675 result
= uregex_find(re
, 14, &status
);
1676 TEST_ASSERT(result
== FALSE
);
1677 TEST_ASSERT_SUCCESS(status
);
1679 status
= U_ZERO_ERROR
;
1680 uregex_reset(re
, 0, &status
);
1682 result
= uregex_findNext(re
, &status
);
1683 TEST_ASSERT(result
== TRUE
);
1684 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1685 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1686 TEST_ASSERT_SUCCESS(status
);
1688 result
= uregex_findNext(re
, &status
);
1689 TEST_ASSERT(result
== TRUE
);
1690 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
1691 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
1692 TEST_ASSERT_SUCCESS(status
);
1694 status
= U_ZERO_ERROR
;
1695 uregex_reset(re
, 12, &status
);
1697 result
= uregex_findNext(re
, &status
);
1698 TEST_ASSERT(result
== TRUE
);
1699 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
1700 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
1701 TEST_ASSERT_SUCCESS(status
);
1703 result
= uregex_findNext(re
, &status
);
1704 TEST_ASSERT(result
== FALSE
);
1705 TEST_ASSERT_SUCCESS(status
);
1718 const char str_abcinteriordef
[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
1719 const char str_interior
[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
1722 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", sizeof(text1
)/2);
1724 status
= U_ZERO_ERROR
;
1725 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
1726 TEST_ASSERT_SUCCESS(status
);
1728 uregex_setText(re
, text1
, -1, &status
);
1729 result
= uregex_find(re
, 0, &status
);
1730 TEST_ASSERT(result
==TRUE
);
1732 /* Capture Group 0, the full match. Should succeed. */
1733 status
= U_ZERO_ERROR
;
1734 actual
= uregex_groupUTextDeep(re
, 0, NULL
, &status
);
1735 TEST_ASSERT_SUCCESS(status
);
1736 TEST_ASSERT_UTEXT(str_abcinteriordef
, actual
);
1737 utext_close(actual
);
1739 /* Capture Group 0 with shallow clone API. Should succeed. */
1740 status
= U_ZERO_ERROR
;
1744 UErrorCode shallowStatus
= U_ZERO_ERROR
;
1745 int64_t nativeIndex
;
1747 UText groupText
= UTEXT_INITIALIZER
;
1749 actual
= uregex_groupUText(re
, 0, NULL
, &group_len
, &status
);
1750 TEST_ASSERT_SUCCESS(status
);
1752 nativeIndex
= utext_getNativeIndex(actual
);
1753 /* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */
1754 /* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */
1757 groupChars
= (UChar
*)malloc(sizeof(UChar
)*(len16
+1));
1758 utext_extract(actual
, nativeIndex
, nativeIndex
+ group_len
, groupChars
, len16
+1, &shallowStatus
);
1760 utext_openUChars(&groupText
, groupChars
, len16
, &shallowStatus
);
1762 TEST_ASSERT_UTEXT(str_abcinteriordef
, &groupText
);
1763 utext_close(&groupText
);
1766 utext_close(actual
);
1768 /* Capture group #1. Should succeed. */
1769 status
= U_ZERO_ERROR
;
1770 actual
= uregex_groupUTextDeep(re
, 1, NULL
, &status
);
1771 TEST_ASSERT_SUCCESS(status
);
1772 TEST_ASSERT_UTEXT(str_interior
, actual
);
1773 utext_close(actual
);
1775 /* Capture group out of range. Error. */
1776 status
= U_ZERO_ERROR
;
1777 actual
= uregex_groupUTextDeep(re
, 2, NULL
, &status
);
1778 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
1779 TEST_ASSERT(utext_nativeLength(actual
) == 0);
1780 utext_close(actual
);
1792 UText replText
= UTEXT_INITIALIZER
;
1794 const char str_Replxxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1795 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1796 const char str_u00411U00000042a
[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
1797 const char str_1x
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1798 const char str_ReplaceAaaBax1xxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1799 status
= U_ZERO_ERROR
;
1800 u_uastrncpy(text1
, "Replace xaax x1x x...x.", sizeof(text1
)/2);
1801 u_uastrncpy(text2
, "No match here.", sizeof(text2
)/2);
1802 utext_openUTF8(&replText
, str_1x
, -1, &status
);
1804 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1805 TEST_ASSERT_SUCCESS(status
);
1807 /* Normal case, with match */
1808 uregex_setText(re
, text1
, -1, &status
);
1809 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1810 TEST_ASSERT_SUCCESS(status
);
1811 TEST_ASSERT_UTEXT(str_Replxxx
, result
);
1812 utext_close(result
);
1814 /* No match. Text should copy to output with no changes. */
1815 uregex_setText(re
, text2
, -1, &status
);
1816 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1817 TEST_ASSERT_SUCCESS(status
);
1818 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1819 utext_close(result
);
1821 /* Unicode escapes */
1822 uregex_setText(re
, text1
, -1, &status
);
1823 utext_openUTF8(&replText
, str_u00411U00000042a
, -1, &status
);
1824 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1825 TEST_ASSERT_SUCCESS(status
);
1826 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx
, result
);
1827 utext_close(result
);
1830 utext_close(&replText
);
1840 UText replText
= UTEXT_INITIALIZER
;
1842 const char str_1
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1843 const char str_Replaceaa1
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1844 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1845 status
= U_ZERO_ERROR
;
1846 u_uastrncpy(text1
, "Replace xaax x1x x...x.", sizeof(text1
)/2);
1847 u_uastrncpy(text2
, "No match here.", sizeof(text2
)/2);
1848 utext_openUTF8(&replText
, str_1
, -1, &status
);
1850 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1851 TEST_ASSERT_SUCCESS(status
);
1853 /* Normal case, with match */
1854 uregex_setText(re
, text1
, -1, &status
);
1855 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1856 TEST_ASSERT_SUCCESS(status
);
1857 TEST_ASSERT_UTEXT(str_Replaceaa1
, result
);
1858 utext_close(result
);
1860 /* No match. Text should copy to output with no changes. */
1861 uregex_setText(re
, text2
, -1, &status
);
1862 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1863 TEST_ASSERT_SUCCESS(status
);
1864 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1865 utext_close(result
);
1868 utext_close(&replText
);
1873 * appendReplacement()
1882 status
= U_ZERO_ERROR
;
1883 re
= uregex_openC(".*", 0, 0, &status
);
1884 TEST_ASSERT_SUCCESS(status
);
1886 u_uastrncpy(text
, "whatever", sizeof(text
)/2);
1887 u_uastrncpy(repl
, "some other", sizeof(repl
)/2);
1888 uregex_setText(re
, text
, -1, &status
);
1890 /* match covers whole target string */
1891 uregex_find(re
, 0, &status
);
1892 TEST_ASSERT_SUCCESS(status
);
1894 bufCap
= sizeof(buf
) / 2;
1895 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1896 TEST_ASSERT_SUCCESS(status
);
1897 TEST_ASSERT_STRING("some other", buf
, TRUE
);
1899 /* Match has \u \U escapes */
1900 uregex_find(re
, 0, &status
);
1901 TEST_ASSERT_SUCCESS(status
);
1903 bufCap
= sizeof(buf
) / 2;
1904 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl
)/2);
1905 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1906 TEST_ASSERT_SUCCESS(status
);
1907 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
1914 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1921 UChar textToSplit
[80];
1927 u_uastrncpy(textToSplit
, "first : second: third", sizeof(textToSplit
)/2);
1928 u_uastrncpy(text2
, "No match here.", sizeof(text2
)/2);
1930 status
= U_ZERO_ERROR
;
1931 re
= uregex_openC(":", 0, NULL
, &status
);
1936 uregex_setText(re
, textToSplit
, -1, &status
);
1937 TEST_ASSERT_SUCCESS(status
);
1939 /* The TEST_ASSERT_SUCCESS call above should change too... */
1940 if (U_SUCCESS(status
)) {
1941 memset(fields
, 0, sizeof(fields
));
1942 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
1943 TEST_ASSERT_SUCCESS(status
);
1945 /* The TEST_ASSERT_SUCCESS call above should change too... */
1946 if(U_SUCCESS(status
)) {
1947 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1948 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1949 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1950 TEST_ASSERT(numFields
== 3);
1951 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
1952 TEST_ASSERT_UTEXT(str_second
, fields
[1]);
1953 TEST_ASSERT_UTEXT(str_third
, fields
[2]);
1954 TEST_ASSERT(fields
[3] == NULL
);
1956 for(i
= 0; i
< numFields
; i
++) {
1957 utext_close(fields
[i
]);
1964 /* Split with too few output strings available */
1965 status
= U_ZERO_ERROR
;
1966 re
= uregex_openC(":", 0, NULL
, &status
);
1967 uregex_setText(re
, textToSplit
, -1, &status
);
1968 TEST_ASSERT_SUCCESS(status
);
1970 /* The TEST_ASSERT_SUCCESS call above should change too... */
1971 if(U_SUCCESS(status
)) {
1974 fields
[2] = &patternText
;
1975 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
1976 TEST_ASSERT_SUCCESS(status
);
1978 /* The TEST_ASSERT_SUCCESS call above should change too... */
1979 if(U_SUCCESS(status
)) {
1980 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
1981 const char str_secondthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
1982 TEST_ASSERT(numFields
== 2);
1983 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
1984 TEST_ASSERT_UTEXT(str_secondthird
, fields
[1]);
1985 TEST_ASSERT(fields
[2] == &patternText
);
1987 for(i
= 0; i
< numFields
; i
++) {
1988 utext_close(fields
[i
]);
1995 /* splitUText(), part 2. Patterns with capture groups. The capture group text
1996 * comes out as additional fields. */
1998 UChar textToSplit
[80];
2003 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", sizeof(textToSplit
)/2);
2005 status
= U_ZERO_ERROR
;
2006 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
2008 uregex_setText(re
, textToSplit
, -1, &status
);
2009 TEST_ASSERT_SUCCESS(status
);
2011 /* The TEST_ASSERT_SUCCESS call above should change too... */
2012 if(U_SUCCESS(status
)) {
2013 memset(fields
, 0, sizeof(fields
));
2014 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
2015 TEST_ASSERT_SUCCESS(status
);
2017 /* The TEST_ASSERT_SUCCESS call above should change too... */
2018 if(U_SUCCESS(status
)) {
2019 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2020 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2021 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2022 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2023 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2025 TEST_ASSERT(numFields
== 5);
2026 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2027 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2028 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2029 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2030 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2031 TEST_ASSERT(fields
[5] == NULL
);
2033 for(i
= 0; i
< numFields
; i
++) {
2034 utext_close(fields
[i
]);
2038 /* Split with too few output strings available (2) */
2039 status
= U_ZERO_ERROR
;
2042 fields
[2] = &patternText
;
2043 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
2044 TEST_ASSERT_SUCCESS(status
);
2046 /* The TEST_ASSERT_SUCCESS call above should change too... */
2047 if(U_SUCCESS(status
)) {
2048 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2049 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2050 TEST_ASSERT(numFields
== 2);
2051 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2052 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[1]);
2053 TEST_ASSERT(fields
[2] == &patternText
);
2055 for(i
= 0; i
< numFields
; i
++) {
2056 utext_close(fields
[i
]);
2060 /* Split with too few output strings available (3) */
2061 status
= U_ZERO_ERROR
;
2065 fields
[3] = &patternText
;
2066 numFields
= uregex_splitUText(re
, fields
, 3, &status
);
2067 TEST_ASSERT_SUCCESS(status
);
2069 /* The TEST_ASSERT_SUCCESS call above should change too... */
2070 if(U_SUCCESS(status
)) {
2071 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2072 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2073 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2074 TEST_ASSERT(numFields
== 3);
2075 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2076 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2077 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[2]);
2078 TEST_ASSERT(fields
[3] == &patternText
);
2080 for(i
= 0; i
< numFields
; i
++) {
2081 utext_close(fields
[i
]);
2084 /* Split with just enough output strings available (5) */
2085 status
= U_ZERO_ERROR
;
2091 fields
[5] = &patternText
;
2092 numFields
= uregex_splitUText(re
, fields
, 5, &status
);
2093 TEST_ASSERT_SUCCESS(status
);
2095 /* The TEST_ASSERT_SUCCESS call above should change too... */
2096 if(U_SUCCESS(status
)) {
2097 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2098 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2099 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2100 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2101 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2103 TEST_ASSERT(numFields
== 5);
2104 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2105 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2106 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2107 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2108 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2109 TEST_ASSERT(fields
[5] == &patternText
);
2111 for(i
= 0; i
< numFields
; i
++) {
2112 utext_close(fields
[i
]);
2115 /* Split, end of text is a field delimiter. */
2116 status
= U_ZERO_ERROR
;
2117 uregex_setText(re
, textToSplit
, strlen("first <tag-a> second<tag-b>"), &status
);
2118 TEST_ASSERT_SUCCESS(status
);
2120 /* The TEST_ASSERT_SUCCESS call above should change too... */
2121 if(U_SUCCESS(status
)) {
2122 memset(fields
, 0, sizeof(fields
));
2123 fields
[9] = &patternText
;
2124 numFields
= uregex_splitUText(re
, fields
, 9, &status
);
2125 TEST_ASSERT_SUCCESS(status
);
2127 /* The TEST_ASSERT_SUCCESS call above should change too... */
2128 if(U_SUCCESS(status
)) {
2129 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2130 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2131 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2132 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2134 TEST_ASSERT(numFields
== 4);
2135 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2136 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2137 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2138 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2139 TEST_ASSERT(fields
[4] == NULL
);
2140 TEST_ASSERT(fields
[8] == NULL
);
2141 TEST_ASSERT(fields
[9] == &patternText
);
2143 for(i
= 0; i
< numFields
; i
++) {
2144 utext_close(fields
[i
]);
2150 utext_close(&patternText
);
2153 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */