1 /********************************************************************
3 * Copyright (c) 2004-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 *********************************************************************************/
11 /*C API TEST FOR Regular Expressions */
13 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
14 * try to test the full functionality. It just calls each function and verifies that it
15 * works on a basic level.
17 * More complete testing of regular expression functionality is done with the C++ tests.
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
26 #include "unicode/uloc.h"
27 #include "unicode/uregex.h"
28 #include "unicode/ustring.h"
29 #include "unicode/utext.h"
33 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
34 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
36 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
37 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
40 * TEST_SETUP and TEST_TEARDOWN
41 * macros to handle the boilerplate around setting up regex test cases.
42 * parameteres to setup:
43 * pattern: The regex pattern, a (char *) null terminated C string.
44 * testString: The string data, also a (char *) C string.
45 * flags: Regex flags to set when compiling the pattern
47 * Put arbitrary test code between SETUP and TEARDOWN.
48 * 're" is the compiled, ready-to-go regular expression.
50 #define TEST_SETUP(pattern, testString, flags) { \
51 UChar *srcString = NULL; \
52 status = U_ZERO_ERROR; \
53 re = uregex_openC(pattern, flags, NULL, &status); \
54 TEST_ASSERT_SUCCESS(status); \
55 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
56 u_uastrncpy(srcString, testString, strlen(testString)+1); \
57 uregex_setText(re, srcString, -1, &status); \
58 TEST_ASSERT_SUCCESS(status); \
59 if (U_SUCCESS(status)) {
61 #define TEST_TEARDOWN \
63 TEST_ASSERT_SUCCESS(status); \
70 * @param expected utf-8 array of bytes to be expected
72 static void test_assert_string(const char *expected
, const UChar
*actual
, UBool nulTerm
, const char *file
, int line
) {
73 char buf_inside_macro
[120];
74 int32_t len
= (int32_t)strlen(expected
);
77 u_austrncpy(buf_inside_macro
, (actual
), len
+1);
78 buf_inside_macro
[len
+2] = 0;
79 success
= (strcmp((expected
), buf_inside_macro
) == 0);
81 u_austrncpy(buf_inside_macro
, (actual
), len
);
82 buf_inside_macro
[len
+1] = 0;
83 success
= (strncmp((expected
), buf_inside_macro
, len
) == 0);
85 if (success
== FALSE
) {
86 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
87 file
, line
, (expected
), buf_inside_macro
);
91 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
94 static UBool
equals_utf8_utext(const char *utf8
, UText
*utext
) {
98 UBool stringsEqual
= TRUE
;
99 utext_setNativeIndex(utext
, 0);
101 U8_NEXT_UNSAFE(utf8
, u8i
, u8c
);
102 utc
= utext_next32(utext
);
103 if (u8c
== 0 && utc
== U_SENTINEL
) {
106 if (u8c
!= utc
|| u8c
== 0) {
107 stringsEqual
= FALSE
;
115 static void test_assert_utext(const char *expected
, UText
*actual
, const char *file
, int line
) {
116 utext_setNativeIndex(actual
, 0);
117 if (!equals_utf8_utext(expected
, actual
)) {
119 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file
, line
, expected
);
120 c
= utext_next32From(actual
, 0);
121 while (c
!= U_SENTINEL
) {
122 if (0x20<c
&& c
<0x7e) {
127 c
= UTEXT_NEXT32(actual
);
134 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
135 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
137 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
139 static UBool
testUTextEqual(UText
*uta
, UText
*utb
) {
142 utext_setNativeIndex(uta
, 0);
143 utext_setNativeIndex(utb
, 0);
145 ca
= utext_next32(uta
);
146 cb
= utext_next32(utb
);
150 } while (ca
!= U_SENTINEL
);
157 static void TestRegexCAPI(void);
158 static void TestBug4315(void);
159 static void TestUTextAPI(void);
160 static void TestRefreshInput(void);
161 static void TestBug8421(void);
162 static void TestBug10815(void);
164 void addURegexTest(TestNode
** root
);
166 void addURegexTest(TestNode
** root
)
168 addTest(root
, &TestRegexCAPI
, "regex/TestRegexCAPI");
169 addTest(root
, &TestBug4315
, "regex/TestBug4315");
170 addTest(root
, &TestUTextAPI
, "regex/TestUTextAPI");
171 addTest(root
, &TestRefreshInput
, "regex/TestRefreshInput");
172 addTest(root
, &TestBug8421
, "regex/TestBug8421");
173 addTest(root
, &TestBug10815
, "regex/TestBug10815");
177 * Call back function and context struct used for testing
178 * regular expression user callbacks. This test is mostly the same as
179 * the corresponding C++ test in intltest.
181 typedef struct callBackContext
{
187 static UBool U_EXPORT2 U_CALLCONV
188 TestCallbackFn(const void *context
, int32_t steps
) {
189 callBackContext
*info
= (callBackContext
*)context
;
190 if (info
->lastSteps
+1 != steps
) {
191 log_err("incorrect steps in callback. Expected %d, got %d\n", info
->lastSteps
+1, steps
);
193 info
->lastSteps
= steps
;
195 return (info
->numCalls
< info
->maxCalls
);
199 * Regular Expression C API Tests
201 static void TestRegexCAPI(void) {
202 UErrorCode status
= U_ZERO_ERROR
;
203 URegularExpression
*re
;
207 memset(&minus1
, -1, sizeof(minus1
));
209 /* Mimimalist open/close */
210 u_uastrncpy(pat
, "abc*", UPRV_LENGTHOF(pat
));
211 re
= uregex_open(pat
, -1, 0, 0, &status
);
212 if (U_FAILURE(status
)) {
213 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
218 /* Open with all flag values set */
219 status
= U_ZERO_ERROR
;
220 re
= uregex_open(pat
, -1,
221 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
| UREGEX_LITERAL
,
223 TEST_ASSERT_SUCCESS(status
);
226 /* Open with an invalid flag */
227 status
= U_ZERO_ERROR
;
228 re
= uregex_open(pat
, -1, 0x40000000, 0, &status
);
229 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
232 /* Open with an unimplemented flag */
233 status
= U_ZERO_ERROR
;
234 re
= uregex_open(pat
, -1, UREGEX_CANON_EQ
, 0, &status
);
235 TEST_ASSERT(status
== U_REGEX_UNIMPLEMENTED
);
238 /* openC with an invalid parameter */
239 status
= U_ZERO_ERROR
;
240 re
= uregex_openC(NULL
,
241 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
242 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
244 /* openC with an invalid parameter */
245 status
= U_USELESS_COLLATOR_ERROR
;
246 re
= uregex_openC(NULL
,
247 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
248 TEST_ASSERT(status
== U_USELESS_COLLATOR_ERROR
&& re
== NULL
);
250 /* openC open from a C string */
254 status
= U_ZERO_ERROR
;
255 re
= uregex_openC("abc*", 0, 0, &status
);
256 TEST_ASSERT_SUCCESS(status
);
257 p
= uregex_pattern(re
, &len
, &status
);
258 TEST_ASSERT_SUCCESS(status
);
260 /* The TEST_ASSERT_SUCCESS above should change too... */
261 if(U_SUCCESS(status
)) {
262 u_uastrncpy(pat
, "abc*", UPRV_LENGTHOF(pat
));
263 TEST_ASSERT(u_strcmp(pat
, p
) == 0);
264 TEST_ASSERT(len
==(int32_t)strlen("abc*"));
269 /* TODO: Open with ParseError parameter */
276 URegularExpression
*clone1
;
277 URegularExpression
*clone2
;
278 URegularExpression
*clone3
;
279 UChar testString1
[30];
280 UChar testString2
[30];
284 status
= U_ZERO_ERROR
;
285 re
= uregex_openC("abc*", 0, 0, &status
);
286 TEST_ASSERT_SUCCESS(status
);
287 clone1
= uregex_clone(re
, &status
);
288 TEST_ASSERT_SUCCESS(status
);
289 TEST_ASSERT(clone1
!= NULL
);
291 status
= U_ZERO_ERROR
;
292 clone2
= uregex_clone(re
, &status
);
293 TEST_ASSERT_SUCCESS(status
);
294 TEST_ASSERT(clone2
!= NULL
);
297 status
= U_ZERO_ERROR
;
298 clone3
= uregex_clone(clone2
, &status
);
299 TEST_ASSERT_SUCCESS(status
);
300 TEST_ASSERT(clone3
!= NULL
);
302 u_uastrncpy(testString1
, "abcccd", UPRV_LENGTHOF(pat
));
303 u_uastrncpy(testString2
, "xxxabcccd", UPRV_LENGTHOF(pat
));
305 status
= U_ZERO_ERROR
;
306 uregex_setText(clone1
, testString1
, -1, &status
);
307 TEST_ASSERT_SUCCESS(status
);
308 result
= uregex_lookingAt(clone1
, 0, &status
);
309 TEST_ASSERT_SUCCESS(status
);
310 TEST_ASSERT(result
==TRUE
);
312 status
= U_ZERO_ERROR
;
313 uregex_setText(clone2
, testString2
, -1, &status
);
314 TEST_ASSERT_SUCCESS(status
);
315 result
= uregex_lookingAt(clone2
, 0, &status
);
316 TEST_ASSERT_SUCCESS(status
);
317 TEST_ASSERT(result
==FALSE
);
318 result
= uregex_find(clone2
, 0, &status
);
319 TEST_ASSERT_SUCCESS(status
);
320 TEST_ASSERT(result
==TRUE
);
322 uregex_close(clone1
);
323 uregex_close(clone2
);
324 uregex_close(clone3
);
332 const UChar
*resultPat
;
334 u_uastrncpy(pat
, "hello", UPRV_LENGTHOF(pat
));
335 status
= U_ZERO_ERROR
;
336 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
337 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
338 TEST_ASSERT_SUCCESS(status
);
340 /* The TEST_ASSERT_SUCCESS above should change too... */
341 if (U_SUCCESS(status
)) {
342 TEST_ASSERT(resultLen
== -1);
343 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
348 status
= U_ZERO_ERROR
;
349 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
350 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
351 TEST_ASSERT_SUCCESS(status
);
352 TEST_ASSERT_SUCCESS(status
);
354 /* The TEST_ASSERT_SUCCESS above should change too... */
355 if (U_SUCCESS(status
)) {
356 TEST_ASSERT(resultLen
== 3);
357 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
358 TEST_ASSERT(u_strlen(resultPat
) == 3);
370 status
= U_ZERO_ERROR
;
371 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
372 t
= uregex_flags(re
, &status
);
373 TEST_ASSERT_SUCCESS(status
);
377 status
= U_ZERO_ERROR
;
378 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
379 t
= uregex_flags(re
, &status
);
380 TEST_ASSERT_SUCCESS(status
);
384 status
= U_ZERO_ERROR
;
385 re
= uregex_open(pat
, -1, UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
, NULL
, &status
);
386 t
= uregex_flags(re
, &status
);
387 TEST_ASSERT_SUCCESS(status
);
388 TEST_ASSERT(t
== (UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
));
393 * setText() and lookingAt()
400 u_uastrncpy(text1
, "abcccd", UPRV_LENGTHOF(text1
));
401 u_uastrncpy(text2
, "abcccxd", UPRV_LENGTHOF(text2
));
402 status
= U_ZERO_ERROR
;
403 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
404 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
405 TEST_ASSERT_SUCCESS(status
);
407 /* Operation before doing a setText should fail... */
408 status
= U_ZERO_ERROR
;
409 uregex_lookingAt(re
, 0, &status
);
410 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
412 status
= U_ZERO_ERROR
;
413 uregex_setText(re
, text1
, -1, &status
);
414 result
= uregex_lookingAt(re
, 0, &status
);
415 TEST_ASSERT(result
== TRUE
);
416 TEST_ASSERT_SUCCESS(status
);
418 status
= U_ZERO_ERROR
;
419 uregex_setText(re
, text2
, -1, &status
);
420 result
= uregex_lookingAt(re
, 0, &status
);
421 TEST_ASSERT(result
== FALSE
);
422 TEST_ASSERT_SUCCESS(status
);
424 status
= U_ZERO_ERROR
;
425 uregex_setText(re
, text1
, -1, &status
);
426 result
= uregex_lookingAt(re
, 0, &status
);
427 TEST_ASSERT(result
== TRUE
);
428 TEST_ASSERT_SUCCESS(status
);
430 status
= U_ZERO_ERROR
;
431 uregex_setText(re
, text1
, 5, &status
);
432 result
= uregex_lookingAt(re
, 0, &status
);
433 TEST_ASSERT(result
== FALSE
);
434 TEST_ASSERT_SUCCESS(status
);
436 status
= U_ZERO_ERROR
;
437 uregex_setText(re
, text1
, 6, &status
);
438 result
= uregex_lookingAt(re
, 0, &status
);
439 TEST_ASSERT(result
== TRUE
);
440 TEST_ASSERT_SUCCESS(status
);
455 u_uastrncpy(text1
, "abcccd", UPRV_LENGTHOF(text1
));
456 u_uastrncpy(text2
, "abcccxd", UPRV_LENGTHOF(text2
));
457 status
= U_ZERO_ERROR
;
458 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
459 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
461 uregex_setText(re
, text1
, -1, &status
);
462 result
= uregex_getText(re
, &textLength
, &status
);
463 TEST_ASSERT(result
== text1
);
464 TEST_ASSERT(textLength
== -1);
465 TEST_ASSERT_SUCCESS(status
);
467 status
= U_ZERO_ERROR
;
468 uregex_setText(re
, text2
, 7, &status
);
469 result
= uregex_getText(re
, &textLength
, &status
);
470 TEST_ASSERT(result
== text2
);
471 TEST_ASSERT(textLength
== 7);
472 TEST_ASSERT_SUCCESS(status
);
474 status
= U_ZERO_ERROR
;
475 uregex_setText(re
, text2
, 4, &status
);
476 result
= uregex_getText(re
, &textLength
, &status
);
477 TEST_ASSERT(result
== text2
);
478 TEST_ASSERT(textLength
== 4);
479 TEST_ASSERT_SUCCESS(status
);
490 UChar nullString
[] = {0,0,0};
492 u_uastrncpy(text1
, "abcccde", UPRV_LENGTHOF(text1
));
493 status
= U_ZERO_ERROR
;
494 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
495 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
497 uregex_setText(re
, text1
, -1, &status
);
498 result
= uregex_matches(re
, 0, &status
);
499 TEST_ASSERT(result
== FALSE
);
500 TEST_ASSERT_SUCCESS(status
);
502 status
= U_ZERO_ERROR
;
503 uregex_setText(re
, text1
, 6, &status
);
504 result
= uregex_matches(re
, 0, &status
);
505 TEST_ASSERT(result
== TRUE
);
506 TEST_ASSERT_SUCCESS(status
);
508 status
= U_ZERO_ERROR
;
509 uregex_setText(re
, text1
, 6, &status
);
510 result
= uregex_matches(re
, 1, &status
);
511 TEST_ASSERT(result
== FALSE
);
512 TEST_ASSERT_SUCCESS(status
);
515 status
= U_ZERO_ERROR
;
516 re
= uregex_openC(".?", 0, NULL
, &status
);
517 uregex_setText(re
, text1
, -1, &status
);
518 len
= u_strlen(text1
);
519 result
= uregex_matches(re
, len
, &status
);
520 TEST_ASSERT(result
== TRUE
);
521 TEST_ASSERT_SUCCESS(status
);
523 status
= U_ZERO_ERROR
;
524 uregex_setText(re
, nullString
, -1, &status
);
525 TEST_ASSERT_SUCCESS(status
);
526 result
= uregex_matches(re
, 0, &status
);
527 TEST_ASSERT(result
== TRUE
);
528 TEST_ASSERT_SUCCESS(status
);
534 * lookingAt() Used in setText test.
539 * find(), findNext, start, end, reset
544 u_uastrncpy(text1
, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1
));
545 status
= U_ZERO_ERROR
;
546 re
= uregex_openC("rx", 0, NULL
, &status
);
548 uregex_setText(re
, text1
, -1, &status
);
549 result
= uregex_find(re
, 0, &status
);
550 TEST_ASSERT(result
== TRUE
);
551 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
552 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
553 TEST_ASSERT_SUCCESS(status
);
555 result
= uregex_find(re
, 9, &status
);
556 TEST_ASSERT(result
== TRUE
);
557 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
558 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
559 TEST_ASSERT_SUCCESS(status
);
561 result
= uregex_find(re
, 14, &status
);
562 TEST_ASSERT(result
== FALSE
);
563 TEST_ASSERT_SUCCESS(status
);
565 status
= U_ZERO_ERROR
;
566 uregex_reset(re
, 0, &status
);
568 result
= uregex_findNext(re
, &status
);
569 TEST_ASSERT(result
== TRUE
);
570 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
571 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
572 TEST_ASSERT_SUCCESS(status
);
574 result
= uregex_findNext(re
, &status
);
575 TEST_ASSERT(result
== TRUE
);
576 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
577 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
578 TEST_ASSERT_SUCCESS(status
);
580 status
= U_ZERO_ERROR
;
581 uregex_reset(re
, 12, &status
);
583 result
= uregex_findNext(re
, &status
);
584 TEST_ASSERT(result
== TRUE
);
585 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
586 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
587 TEST_ASSERT_SUCCESS(status
);
589 result
= uregex_findNext(re
, &status
);
590 TEST_ASSERT(result
== FALSE
);
591 TEST_ASSERT_SUCCESS(status
);
602 status
= U_ZERO_ERROR
;
603 re
= uregex_openC("abc", 0, NULL
, &status
);
604 result
= uregex_groupCount(re
, &status
);
605 TEST_ASSERT_SUCCESS(status
);
606 TEST_ASSERT(result
== 0);
609 status
= U_ZERO_ERROR
;
610 re
= uregex_openC("abc(def)(ghi(j))", 0, NULL
, &status
);
611 result
= uregex_groupCount(re
, &status
);
612 TEST_ASSERT_SUCCESS(status
);
613 TEST_ASSERT(result
== 3);
627 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1
));
629 status
= U_ZERO_ERROR
;
630 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
631 TEST_ASSERT_SUCCESS(status
);
634 uregex_setText(re
, text1
, -1, &status
);
635 result
= uregex_find(re
, 0, &status
);
636 TEST_ASSERT(result
==TRUE
);
638 /* Capture Group 0, the full match. Should succeed. */
639 status
= U_ZERO_ERROR
;
640 resultSz
= uregex_group(re
, 0, buf
, UPRV_LENGTHOF(buf
), &status
);
641 TEST_ASSERT_SUCCESS(status
);
642 TEST_ASSERT_STRING("abc interior def", buf
, TRUE
);
643 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
645 /* Capture group #1. Should succeed. */
646 status
= U_ZERO_ERROR
;
647 resultSz
= uregex_group(re
, 1, buf
, UPRV_LENGTHOF(buf
), &status
);
648 TEST_ASSERT_SUCCESS(status
);
649 TEST_ASSERT_STRING(" interior ", buf
, TRUE
);
650 TEST_ASSERT(resultSz
== (int32_t)strlen(" interior "));
652 /* Capture group out of range. Error. */
653 status
= U_ZERO_ERROR
;
654 uregex_group(re
, 2, buf
, UPRV_LENGTHOF(buf
), &status
);
655 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
657 /* NULL buffer, pure pre-flight */
658 status
= U_ZERO_ERROR
;
659 resultSz
= uregex_group(re
, 0, NULL
, 0, &status
);
660 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
661 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
663 /* Too small buffer, truncated string */
664 status
= U_ZERO_ERROR
;
665 memset(buf
, -1, sizeof(buf
));
666 resultSz
= uregex_group(re
, 0, buf
, 5, &status
);
667 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
668 TEST_ASSERT_STRING("abc i", buf
, FALSE
);
669 TEST_ASSERT(buf
[5] == (UChar
)0xffff);
670 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
672 /* Output string just fits buffer, no NUL term. */
673 status
= U_ZERO_ERROR
;
674 resultSz
= uregex_group(re
, 0, buf
, (int32_t)strlen("abc interior def"), &status
);
675 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
676 TEST_ASSERT_STRING("abc interior def", buf
, FALSE
);
677 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
678 TEST_ASSERT(buf
[strlen("abc interior def")] == (UChar
)0xffff);
689 /* SetRegion(), getRegion() do something */
690 TEST_SETUP(".*", "0123456789ABCDEF", 0)
691 UChar resultString
[40];
692 TEST_ASSERT(uregex_regionStart(re
, &status
) == 0);
693 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 16);
694 uregex_setRegion(re
, 3, 6, &status
);
695 TEST_ASSERT(uregex_regionStart(re
, &status
) == 3);
696 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 6);
697 TEST_ASSERT(uregex_findNext(re
, &status
));
698 TEST_ASSERT(uregex_group(re
, 0, resultString
, UPRV_LENGTHOF(resultString
), &status
) == 3)
699 TEST_ASSERT_STRING("345", resultString
, TRUE
);
702 /* find(start=-1) uses regions */
703 TEST_SETUP(".*", "0123456789ABCDEF", 0);
704 uregex_setRegion(re
, 4, 6, &status
);
705 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
706 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
707 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
710 /* find (start >=0) does not use regions */
711 TEST_SETUP(".*", "0123456789ABCDEF", 0);
712 uregex_setRegion(re
, 4, 6, &status
);
713 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
714 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
715 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
718 /* findNext() obeys regions */
719 TEST_SETUP(".", "0123456789ABCDEF", 0);
720 uregex_setRegion(re
, 4, 6, &status
);
721 TEST_ASSERT(uregex_findNext(re
,&status
) == TRUE
);
722 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
723 TEST_ASSERT(uregex_findNext(re
, &status
) == TRUE
);
724 TEST_ASSERT(uregex_start(re
, 0, &status
) == 5);
725 TEST_ASSERT(uregex_findNext(re
, &status
) == FALSE
);
728 /* matches(start=-1) uses regions */
729 /* Also, verify that non-greedy *? succeeds in finding the full match. */
730 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
731 uregex_setRegion(re
, 4, 6, &status
);
732 TEST_ASSERT(uregex_matches(re
, -1, &status
) == TRUE
);
733 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
734 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
737 /* matches (start >=0) does not use regions */
738 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
739 uregex_setRegion(re
, 4, 6, &status
);
740 TEST_ASSERT(uregex_matches(re
, 0, &status
) == TRUE
);
741 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
742 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
745 /* lookingAt(start=-1) uses regions */
746 /* Also, verify that non-greedy *? finds the first (shortest) match. */
747 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
748 uregex_setRegion(re
, 4, 6, &status
);
749 TEST_ASSERT(uregex_lookingAt(re
, -1, &status
) == TRUE
);
750 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
751 TEST_ASSERT(uregex_end(re
, 0, &status
) == 4);
754 /* lookingAt (start >=0) does not use regions */
755 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
756 uregex_setRegion(re
, 4, 6, &status
);
757 TEST_ASSERT(uregex_lookingAt(re
, 0, &status
) == TRUE
);
758 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
759 TEST_ASSERT(uregex_end(re
, 0, &status
) == 0);
763 TEST_SETUP("[a-f]*", "abcdefghij", 0);
764 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
765 TEST_ASSERT(uregex_hitEnd(re
, &status
) == FALSE
);
768 TEST_SETUP("[a-f]*", "abcdef", 0);
769 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
770 TEST_ASSERT(uregex_hitEnd(re
, &status
) == TRUE
);
774 TEST_SETUP("abcd", "abcd", 0);
775 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
776 TEST_ASSERT(uregex_requireEnd(re
, &status
) == FALSE
);
779 TEST_SETUP("abcd$", "abcd", 0);
780 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
781 TEST_ASSERT(uregex_requireEnd(re
, &status
) == TRUE
);
784 /* anchoringBounds */
785 TEST_SETUP("abc$", "abcdef", 0);
786 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == TRUE
);
787 uregex_useAnchoringBounds(re
, FALSE
, &status
);
788 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == FALSE
);
790 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
);
791 uregex_useAnchoringBounds(re
, TRUE
, &status
);
792 uregex_setRegion(re
, 0, 3, &status
);
793 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
794 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
797 /* Transparent Bounds */
798 TEST_SETUP("abc(?=def)", "abcdef", 0);
799 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == FALSE
);
800 uregex_useTransparentBounds(re
, TRUE
, &status
);
801 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == TRUE
);
803 uregex_useTransparentBounds(re
, FALSE
, &status
);
804 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* No Region */
805 uregex_setRegion(re
, 0, 3, &status
);
806 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
); /* with region, opaque bounds */
807 uregex_useTransparentBounds(re
, TRUE
, &status
);
808 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* with region, transparent bounds */
809 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
822 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
823 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
824 u_uastrncpy(replText
, "<$1>", UPRV_LENGTHOF(replText
));
826 status
= U_ZERO_ERROR
;
827 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
828 TEST_ASSERT_SUCCESS(status
);
830 /* Normal case, with match */
831 uregex_setText(re
, text1
, -1, &status
);
832 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
833 TEST_ASSERT_SUCCESS(status
);
834 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, TRUE
);
835 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
837 /* No match. Text should copy to output with no changes. */
838 status
= U_ZERO_ERROR
;
839 uregex_setText(re
, text2
, -1, &status
);
840 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
841 TEST_ASSERT_SUCCESS(status
);
842 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
843 TEST_ASSERT(resultSz
== (int32_t)strlen("No match here."));
845 /* Match, output just fills buffer, no termination warning. */
846 status
= U_ZERO_ERROR
;
847 uregex_setText(re
, text1
, -1, &status
);
848 memset(buf
, -1, sizeof(buf
));
849 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x."), &status
);
850 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
851 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
852 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
853 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
855 /* Do the replaceFirst again, without first resetting anything.
856 * Should give the same results.
858 status
= U_ZERO_ERROR
;
859 memset(buf
, -1, sizeof(buf
));
860 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x."), &status
);
861 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
862 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
863 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
864 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
866 /* NULL buffer, zero buffer length */
867 status
= U_ZERO_ERROR
;
868 resultSz
= uregex_replaceFirst(re
, replText
, -1, NULL
, 0, &status
);
869 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
870 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
872 /* Buffer too small by one */
873 status
= U_ZERO_ERROR
;
874 memset(buf
, -1, sizeof(buf
));
875 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x.")-1, &status
);
876 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
877 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf
, FALSE
);
878 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
879 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
889 UChar text1
[80]; /* "Replace xaax x1x x...x." */
890 UChar text2
[80]; /* "No match Here" */
891 UChar replText
[80]; /* "<$1>" */
892 UChar replText2
[80]; /* "<<$1>>" */
893 const char * pattern
= "x(.*?)x";
894 const char * expectedResult
= "Replace <aa> <1> <...>.";
895 const char * expectedResult2
= "Replace <<aa>> <<1>> <<...>>.";
898 int32_t expectedResultSize
;
899 int32_t expectedResultSize2
;
902 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
903 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
904 u_uastrncpy(replText
, "<$1>", UPRV_LENGTHOF(replText
));
905 u_uastrncpy(replText2
, "<<$1>>", UPRV_LENGTHOF(replText2
));
906 expectedResultSize
= strlen(expectedResult
);
907 expectedResultSize2
= strlen(expectedResult2
);
909 status
= U_ZERO_ERROR
;
910 re
= uregex_openC(pattern
, 0, NULL
, &status
);
911 TEST_ASSERT_SUCCESS(status
);
913 /* Normal case, with match */
914 uregex_setText(re
, text1
, -1, &status
);
915 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
916 TEST_ASSERT_SUCCESS(status
);
917 TEST_ASSERT_STRING(expectedResult
, buf
, TRUE
);
918 TEST_ASSERT(resultSize
== expectedResultSize
);
920 /* No match. Text should copy to output with no changes. */
921 status
= U_ZERO_ERROR
;
922 uregex_setText(re
, text2
, -1, &status
);
923 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
924 TEST_ASSERT_SUCCESS(status
);
925 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
926 TEST_ASSERT(resultSize
== u_strlen(text2
));
928 /* Match, output just fills buffer, no termination warning. */
929 status
= U_ZERO_ERROR
;
930 uregex_setText(re
, text1
, -1, &status
);
931 memset(buf
, -1, sizeof(buf
));
932 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, expectedResultSize
, &status
);
933 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
934 TEST_ASSERT_STRING(expectedResult
, buf
, FALSE
);
935 TEST_ASSERT(resultSize
== expectedResultSize
);
936 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
938 /* Do the replaceFirst again, without first resetting anything.
939 * Should give the same results.
941 status
= U_ZERO_ERROR
;
942 memset(buf
, -1, sizeof(buf
));
943 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, strlen("Replace xaax x1x x...x."), &status
);
944 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
945 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf
, FALSE
);
946 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
947 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
949 /* NULL buffer, zero buffer length */
950 status
= U_ZERO_ERROR
;
951 resultSize
= uregex_replaceAll(re
, replText
, -1, NULL
, 0, &status
);
952 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
953 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
955 /* Buffer too small. Try every size, which will tickle edge cases
956 * in uregex_appendReplacement (used by replaceAll) */
957 for (i
=0; i
<expectedResultSize
; i
++) {
959 status
= U_ZERO_ERROR
;
960 memset(buf
, -1, sizeof(buf
));
961 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, i
, &status
);
962 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
963 strcpy(expected
, expectedResult
);
965 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
966 TEST_ASSERT(resultSize
== expectedResultSize
);
967 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
970 /* Buffer too small. Same as previous test, except this time the replacement
971 * text is longer than the match capture group, making the length of the complete
972 * replacement longer than the original string.
974 for (i
=0; i
<expectedResultSize2
; i
++) {
976 status
= U_ZERO_ERROR
;
977 memset(buf
, -1, sizeof(buf
));
978 resultSize
= uregex_replaceAll(re
, replText2
, -1, buf
, i
, &status
);
979 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
980 strcpy(expected
, expectedResult2
);
982 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
983 TEST_ASSERT(resultSize
== expectedResultSize2
);
984 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
993 * appendReplacement()
1003 status
= U_ZERO_ERROR
;
1004 re
= uregex_openC(".*", 0, 0, &status
);
1005 TEST_ASSERT_SUCCESS(status
);
1007 u_uastrncpy(text
, "whatever", UPRV_LENGTHOF(text
));
1008 u_uastrncpy(repl
, "some other", UPRV_LENGTHOF(repl
));
1009 uregex_setText(re
, text
, -1, &status
);
1011 /* match covers whole target string */
1012 uregex_find(re
, 0, &status
);
1013 TEST_ASSERT_SUCCESS(status
);
1015 bufCap
= UPRV_LENGTHOF(buf
);
1016 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1017 TEST_ASSERT_SUCCESS(status
);
1018 TEST_ASSERT_STRING("some other", buf
, TRUE
);
1020 /* Match has \u \U escapes */
1021 uregex_find(re
, 0, &status
);
1022 TEST_ASSERT_SUCCESS(status
);
1024 bufCap
= UPRV_LENGTHOF(buf
);
1025 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl
));
1026 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1027 TEST_ASSERT_SUCCESS(status
);
1028 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
1030 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1031 status
= U_ZERO_ERROR
;
1032 uregex_find(re
, 0, &status
);
1033 TEST_ASSERT_SUCCESS(status
);
1035 status
= U_BUFFER_OVERFLOW_ERROR
;
1036 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, NULL
, &status
);
1037 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1044 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1051 UChar textToSplit
[80];
1056 int32_t requiredCapacity
;
1057 int32_t spaceNeeded
;
1060 u_uastrncpy(textToSplit
, "first : second: third", UPRV_LENGTHOF(textToSplit
));
1061 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1063 status
= U_ZERO_ERROR
;
1064 re
= uregex_openC(":", 0, NULL
, &status
);
1069 uregex_setText(re
, textToSplit
, -1, &status
);
1070 TEST_ASSERT_SUCCESS(status
);
1072 /* The TEST_ASSERT_SUCCESS call above should change too... */
1073 if (U_SUCCESS(status
)) {
1074 memset(fields
, -1, sizeof(fields
));
1076 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 10, &status
);
1077 TEST_ASSERT_SUCCESS(status
);
1079 /* The TEST_ASSERT_SUCCESS call above should change too... */
1080 if(U_SUCCESS(status
)) {
1081 TEST_ASSERT(numFields
== 3);
1082 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1083 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1084 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1085 TEST_ASSERT(fields
[3] == NULL
);
1087 spaceNeeded
= u_strlen(textToSplit
) -
1088 (numFields
- 1) + /* Field delimiters do not appear in output */
1089 numFields
; /* Each field gets a NUL terminator */
1091 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1098 /* Split with too few output strings available */
1099 status
= U_ZERO_ERROR
;
1100 re
= uregex_openC(":", 0, NULL
, &status
);
1101 uregex_setText(re
, textToSplit
, -1, &status
);
1102 TEST_ASSERT_SUCCESS(status
);
1104 /* The TEST_ASSERT_SUCCESS call above should change too... */
1105 if(U_SUCCESS(status
)) {
1106 memset(fields
, -1, sizeof(fields
));
1108 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 2, &status
);
1109 TEST_ASSERT_SUCCESS(status
);
1111 /* The TEST_ASSERT_SUCCESS call above should change too... */
1112 if(U_SUCCESS(status
)) {
1113 TEST_ASSERT(numFields
== 2);
1114 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1115 TEST_ASSERT_STRING(" second: third", fields
[1], TRUE
);
1116 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1118 spaceNeeded
= u_strlen(textToSplit
) -
1119 (numFields
- 1) + /* Field delimiters do not appear in output */
1120 numFields
; /* Each field gets a NUL terminator */
1122 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1124 /* Split with a range of output buffer sizes. */
1125 spaceNeeded
= u_strlen(textToSplit
) -
1126 (numFields
- 1) + /* Field delimiters do not appear in output */
1127 numFields
; /* Each field gets a NUL terminator */
1129 for (sz
=0; sz
< spaceNeeded
+1; sz
++) {
1130 memset(fields
, -1, sizeof(fields
));
1131 status
= U_ZERO_ERROR
;
1133 uregex_split(re
, buf
, sz
, &requiredCapacity
, fields
, 10, &status
);
1134 if (sz
>= spaceNeeded
) {
1135 TEST_ASSERT_SUCCESS(status
);
1136 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1137 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1138 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1140 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1142 TEST_ASSERT(numFields
== 3);
1143 TEST_ASSERT(fields
[3] == NULL
);
1144 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1155 /* Split(), part 2. Patterns with capture groups. The capture group text
1156 * comes out as additional fields. */
1158 UChar textToSplit
[80];
1162 int32_t requiredCapacity
;
1163 int32_t spaceNeeded
;
1166 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit
));
1168 status
= U_ZERO_ERROR
;
1169 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
1171 uregex_setText(re
, textToSplit
, -1, &status
);
1172 TEST_ASSERT_SUCCESS(status
);
1174 /* The TEST_ASSERT_SUCCESS call above should change too... */
1175 if(U_SUCCESS(status
)) {
1176 memset(fields
, -1, sizeof(fields
));
1178 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 10, &status
);
1179 TEST_ASSERT_SUCCESS(status
);
1181 /* The TEST_ASSERT_SUCCESS call above should change too... */
1182 if(U_SUCCESS(status
)) {
1183 TEST_ASSERT(numFields
== 5);
1184 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1185 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1186 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1187 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1188 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1189 TEST_ASSERT(fields
[5] == NULL
);
1190 spaceNeeded
= strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1191 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1195 /* Split with too few output strings available (2) */
1196 status
= U_ZERO_ERROR
;
1197 memset(fields
, -1, sizeof(fields
));
1199 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 2, &status
);
1200 TEST_ASSERT_SUCCESS(status
);
1202 /* The TEST_ASSERT_SUCCESS call above should change too... */
1203 if(U_SUCCESS(status
)) {
1204 TEST_ASSERT(numFields
== 2);
1205 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1206 TEST_ASSERT_STRING(" second<tag-b> third", fields
[1], TRUE
);
1207 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1209 spaceNeeded
= strlen("first . second<tag-b> third."); /* "." at NUL positions */
1210 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1213 /* Split with too few output strings available (3) */
1214 status
= U_ZERO_ERROR
;
1215 memset(fields
, -1, sizeof(fields
));
1217 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 3, &status
);
1218 TEST_ASSERT_SUCCESS(status
);
1220 /* The TEST_ASSERT_SUCCESS call above should change too... */
1221 if(U_SUCCESS(status
)) {
1222 TEST_ASSERT(numFields
== 3);
1223 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1224 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1225 TEST_ASSERT_STRING(" second<tag-b> third", fields
[2], TRUE
);
1226 TEST_ASSERT(!memcmp(&fields
[3],&minus1
,sizeof(UChar
*)));
1228 spaceNeeded
= strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1229 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1232 /* Split with just enough output strings available (5) */
1233 status
= U_ZERO_ERROR
;
1234 memset(fields
, -1, sizeof(fields
));
1236 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 5, &status
);
1237 TEST_ASSERT_SUCCESS(status
);
1239 /* The TEST_ASSERT_SUCCESS call above should change too... */
1240 if(U_SUCCESS(status
)) {
1241 TEST_ASSERT(numFields
== 5);
1242 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1243 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1244 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1245 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1246 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1247 TEST_ASSERT(!memcmp(&fields
[5],&minus1
,sizeof(UChar
*)));
1249 spaceNeeded
= strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1250 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1253 /* Split, end of text is a field delimiter. */
1254 status
= U_ZERO_ERROR
;
1255 sz
= strlen("first <tag-a> second<tag-b>");
1256 uregex_setText(re
, textToSplit
, sz
, &status
);
1257 TEST_ASSERT_SUCCESS(status
);
1259 /* The TEST_ASSERT_SUCCESS call above should change too... */
1260 if(U_SUCCESS(status
)) {
1261 memset(fields
, -1, sizeof(fields
));
1263 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 9, &status
);
1264 TEST_ASSERT_SUCCESS(status
);
1266 /* The TEST_ASSERT_SUCCESS call above should change too... */
1267 if(U_SUCCESS(status
)) {
1268 TEST_ASSERT(numFields
== 5);
1269 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1270 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1271 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1272 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1273 TEST_ASSERT_STRING("", fields
[4], TRUE
);
1274 TEST_ASSERT(fields
[5] == NULL
);
1275 TEST_ASSERT(fields
[8] == NULL
);
1276 TEST_ASSERT(!memcmp(&fields
[9],&minus1
,sizeof(UChar
*)));
1277 spaceNeeded
= strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1278 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1288 TEST_SETUP("abc$", "abcdef", 0);
1289 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 0);
1290 uregex_setTimeLimit(re
, 1000, &status
);
1291 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1292 TEST_ASSERT_SUCCESS(status
);
1293 uregex_setTimeLimit(re
, -1, &status
);
1294 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1295 status
= U_ZERO_ERROR
;
1296 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1300 * set/get Stack Limit
1302 TEST_SETUP("abc$", "abcdef", 0);
1303 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 8000000);
1304 uregex_setStackLimit(re
, 40000, &status
);
1305 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1306 TEST_ASSERT_SUCCESS(status
);
1307 uregex_setStackLimit(re
, -1, &status
);
1308 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1309 status
= U_ZERO_ERROR
;
1310 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1315 * Get/Set callback functions
1316 * This test is copied from intltest regex/Callbacks
1317 * The pattern and test data will run long enough to cause the callback
1318 * to be invoked. The nested '+' operators give exponential time
1319 * behavior with increasing string length.
1321 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1322 callBackContext cbInfo
= {4, 0, 0};
1323 const void *pContext
= &cbInfo
;
1324 URegexMatchCallback
*returnedFn
= &TestCallbackFn
;
1326 /* Getting the callback fn when it hasn't been set must return NULL */
1327 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1328 TEST_ASSERT_SUCCESS(status
);
1329 TEST_ASSERT(returnedFn
== NULL
);
1330 TEST_ASSERT(pContext
== NULL
);
1332 /* Set thecallback and do a match. */
1333 /* The callback function should record that it has been called. */
1334 uregex_setMatchCallback(re
, &TestCallbackFn
, &cbInfo
, &status
);
1335 TEST_ASSERT_SUCCESS(status
);
1336 TEST_ASSERT(cbInfo
.numCalls
== 0);
1337 TEST_ASSERT(uregex_matches(re
, -1, &status
) == FALSE
);
1338 TEST_ASSERT_SUCCESS(status
);
1339 TEST_ASSERT(cbInfo
.numCalls
> 0);
1341 /* Getting the callback should return the values that were set above. */
1342 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1343 TEST_ASSERT(returnedFn
== &TestCallbackFn
);
1344 TEST_ASSERT(pContext
== &cbInfo
);
1351 static void TestBug4315(void) {
1352 UErrorCode theICUError
= U_ZERO_ERROR
;
1353 URegularExpression
*theRegEx
;
1355 const char *thePattern
;
1356 UChar theString
[100];
1357 UChar
*destFields
[24];
1358 int32_t neededLength1
;
1359 int32_t neededLength2
;
1361 int32_t wordCount
= 0;
1362 int32_t destFieldsSize
= 24;
1365 u_uastrcpy(theString
, "The quick brown fox jumped over the slow black turtle.");
1368 theRegEx
= uregex_openC(thePattern
, 0, NULL
, &theICUError
);
1369 TEST_ASSERT_SUCCESS(theICUError
);
1371 /* set the input string */
1372 uregex_setText(theRegEx
, theString
, u_strlen(theString
), &theICUError
);
1373 TEST_ASSERT_SUCCESS(theICUError
);
1376 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1378 wordCount
= uregex_split(theRegEx
, NULL
, 0, &neededLength1
, destFields
,
1379 destFieldsSize
, &theICUError
);
1381 TEST_ASSERT(theICUError
== U_BUFFER_OVERFLOW_ERROR
);
1382 TEST_ASSERT(wordCount
==3);
1384 if(theICUError
== U_BUFFER_OVERFLOW_ERROR
)
1386 theICUError
= U_ZERO_ERROR
;
1387 textBuff
= (UChar
*) malloc(sizeof(UChar
) * (neededLength1
+ 1));
1388 wordCount
= uregex_split(theRegEx
, textBuff
, neededLength1
+1, &neededLength2
,
1389 destFields
, destFieldsSize
, &theICUError
);
1390 TEST_ASSERT(wordCount
==3);
1391 TEST_ASSERT_SUCCESS(theICUError
);
1392 TEST_ASSERT(neededLength1
== neededLength2
);
1393 TEST_ASSERT_STRING("The qui", destFields
[0], TRUE
);
1394 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields
[1], TRUE
);
1395 TEST_ASSERT_STRING("turtle.", destFields
[2], TRUE
);
1396 TEST_ASSERT(destFields
[3] == NULL
);
1399 uregex_close(theRegEx
);
1402 /* Based on TestRegexCAPI() */
1403 static void TestUTextAPI(void) {
1404 UErrorCode status
= U_ZERO_ERROR
;
1405 URegularExpression
*re
;
1406 UText patternText
= UTEXT_INITIALIZER
;
1408 const char patternTextUTF8
[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1410 /* Mimimalist open/close */
1411 utext_openUTF8(&patternText
, patternTextUTF8
, -1, &status
);
1412 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1413 if (U_FAILURE(status
)) {
1414 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
1415 utext_close(&patternText
);
1420 /* Open with all flag values set */
1421 status
= U_ZERO_ERROR
;
1422 re
= uregex_openUText(&patternText
,
1423 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
,
1425 TEST_ASSERT_SUCCESS(status
);
1428 /* Open with an invalid flag */
1429 status
= U_ZERO_ERROR
;
1430 re
= uregex_openUText(&patternText
, 0x40000000, 0, &status
);
1431 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
1434 /* open with an invalid parameter */
1435 status
= U_ZERO_ERROR
;
1436 re
= uregex_openUText(NULL
,
1437 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
1438 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
1444 URegularExpression
*clone1
;
1445 URegularExpression
*clone2
;
1446 URegularExpression
*clone3
;
1447 UChar testString1
[30];
1448 UChar testString2
[30];
1452 status
= U_ZERO_ERROR
;
1453 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1454 TEST_ASSERT_SUCCESS(status
);
1455 clone1
= uregex_clone(re
, &status
);
1456 TEST_ASSERT_SUCCESS(status
);
1457 TEST_ASSERT(clone1
!= NULL
);
1459 status
= U_ZERO_ERROR
;
1460 clone2
= uregex_clone(re
, &status
);
1461 TEST_ASSERT_SUCCESS(status
);
1462 TEST_ASSERT(clone2
!= NULL
);
1465 status
= U_ZERO_ERROR
;
1466 clone3
= uregex_clone(clone2
, &status
);
1467 TEST_ASSERT_SUCCESS(status
);
1468 TEST_ASSERT(clone3
!= NULL
);
1470 u_uastrncpy(testString1
, "abcccd", UPRV_LENGTHOF(pat
));
1471 u_uastrncpy(testString2
, "xxxabcccd", UPRV_LENGTHOF(pat
));
1473 status
= U_ZERO_ERROR
;
1474 uregex_setText(clone1
, testString1
, -1, &status
);
1475 TEST_ASSERT_SUCCESS(status
);
1476 result
= uregex_lookingAt(clone1
, 0, &status
);
1477 TEST_ASSERT_SUCCESS(status
);
1478 TEST_ASSERT(result
==TRUE
);
1480 status
= U_ZERO_ERROR
;
1481 uregex_setText(clone2
, testString2
, -1, &status
);
1482 TEST_ASSERT_SUCCESS(status
);
1483 result
= uregex_lookingAt(clone2
, 0, &status
);
1484 TEST_ASSERT_SUCCESS(status
);
1485 TEST_ASSERT(result
==FALSE
);
1486 result
= uregex_find(clone2
, 0, &status
);
1487 TEST_ASSERT_SUCCESS(status
);
1488 TEST_ASSERT(result
==TRUE
);
1490 uregex_close(clone1
);
1491 uregex_close(clone2
);
1492 uregex_close(clone3
);
1497 * pattern() and patternText()
1500 const UChar
*resultPat
;
1503 const char str_hello
[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1504 const char str_hel
[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1505 u_uastrncpy(pat
, "hello", UPRV_LENGTHOF(pat
)); /* for comparison */
1506 status
= U_ZERO_ERROR
;
1508 utext_openUTF8(&patternText
, str_hello
, -1, &status
);
1509 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
1510 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1511 TEST_ASSERT_SUCCESS(status
);
1513 /* The TEST_ASSERT_SUCCESS above should change too... */
1514 if (U_SUCCESS(status
)) {
1515 TEST_ASSERT(resultLen
== -1);
1516 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
1519 resultText
= uregex_patternUText(re
, &status
);
1520 TEST_ASSERT_SUCCESS(status
);
1521 TEST_ASSERT_UTEXT(str_hello
, resultText
);
1525 status
= U_ZERO_ERROR
;
1526 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
1527 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1528 TEST_ASSERT_SUCCESS(status
);
1530 /* The TEST_ASSERT_SUCCESS above should change too... */
1531 if (U_SUCCESS(status
)) {
1532 TEST_ASSERT(resultLen
== 3);
1533 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
1534 TEST_ASSERT(u_strlen(resultPat
) == 3);
1537 resultText
= uregex_patternUText(re
, &status
);
1538 TEST_ASSERT_SUCCESS(status
);
1539 TEST_ASSERT_UTEXT(str_hel
, resultText
);
1545 * setUText() and lookingAt()
1548 UText text1
= UTEXT_INITIALIZER
;
1549 UText text2
= UTEXT_INITIALIZER
;
1551 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1552 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1553 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1554 status
= U_ZERO_ERROR
;
1555 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1556 utext_openUTF8(&text2
, str_abcccxd
, -1, &status
);
1558 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1559 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1560 TEST_ASSERT_SUCCESS(status
);
1562 /* Operation before doing a setText should fail... */
1563 status
= U_ZERO_ERROR
;
1564 uregex_lookingAt(re
, 0, &status
);
1565 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
1567 status
= U_ZERO_ERROR
;
1568 uregex_setUText(re
, &text1
, &status
);
1569 result
= uregex_lookingAt(re
, 0, &status
);
1570 TEST_ASSERT(result
== TRUE
);
1571 TEST_ASSERT_SUCCESS(status
);
1573 status
= U_ZERO_ERROR
;
1574 uregex_setUText(re
, &text2
, &status
);
1575 result
= uregex_lookingAt(re
, 0, &status
);
1576 TEST_ASSERT(result
== FALSE
);
1577 TEST_ASSERT_SUCCESS(status
);
1579 status
= U_ZERO_ERROR
;
1580 uregex_setUText(re
, &text1
, &status
);
1581 result
= uregex_lookingAt(re
, 0, &status
);
1582 TEST_ASSERT(result
== TRUE
);
1583 TEST_ASSERT_SUCCESS(status
);
1586 utext_close(&text1
);
1587 utext_close(&text2
);
1592 * getText() and getUText()
1595 UText text1
= UTEXT_INITIALIZER
;
1596 UText text2
= UTEXT_INITIALIZER
;
1597 UChar text2Chars
[20];
1599 const UChar
*result
;
1601 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1602 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1603 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1606 status
= U_ZERO_ERROR
;
1607 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1608 u_uastrncpy(text2Chars
, str_abcccxd
, UPRV_LENGTHOF(text2Chars
));
1609 utext_openUChars(&text2
, text2Chars
, -1, &status
);
1611 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1612 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1614 /* First set a UText */
1615 uregex_setUText(re
, &text1
, &status
);
1616 resultText
= uregex_getUText(re
, NULL
, &status
);
1617 TEST_ASSERT_SUCCESS(status
);
1618 TEST_ASSERT(resultText
!= &text1
);
1619 utext_setNativeIndex(resultText
, 0);
1620 utext_setNativeIndex(&text1
, 0);
1621 TEST_ASSERT(testUTextEqual(resultText
, &text1
));
1622 utext_close(resultText
);
1624 result
= uregex_getText(re
, &textLength
, &status
); /* flattens UText into buffer */
1625 (void)result
; /* Suppress set but not used warning. */
1626 TEST_ASSERT(textLength
== -1 || textLength
== 6);
1627 resultText
= uregex_getUText(re
, NULL
, &status
);
1628 TEST_ASSERT_SUCCESS(status
);
1629 TEST_ASSERT(resultText
!= &text1
);
1630 utext_setNativeIndex(resultText
, 0);
1631 utext_setNativeIndex(&text1
, 0);
1632 TEST_ASSERT(testUTextEqual(resultText
, &text1
));
1633 utext_close(resultText
);
1635 /* Then set a UChar * */
1636 uregex_setText(re
, text2Chars
, 7, &status
);
1637 resultText
= uregex_getUText(re
, NULL
, &status
);
1638 TEST_ASSERT_SUCCESS(status
);
1639 utext_setNativeIndex(resultText
, 0);
1640 utext_setNativeIndex(&text2
, 0);
1641 TEST_ASSERT(testUTextEqual(resultText
, &text2
));
1642 utext_close(resultText
);
1643 result
= uregex_getText(re
, &textLength
, &status
);
1644 TEST_ASSERT(textLength
== 7);
1647 utext_close(&text1
);
1648 utext_close(&text2
);
1655 UText text1
= UTEXT_INITIALIZER
;
1657 UText nullText
= UTEXT_INITIALIZER
;
1658 const char str_abcccde
[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1659 const char str_abcd
[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1661 status
= U_ZERO_ERROR
;
1662 utext_openUTF8(&text1
, str_abcccde
, -1, &status
);
1663 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1664 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1666 uregex_setUText(re
, &text1
, &status
);
1667 result
= uregex_matches(re
, 0, &status
);
1668 TEST_ASSERT(result
== FALSE
);
1669 TEST_ASSERT_SUCCESS(status
);
1672 status
= U_ZERO_ERROR
;
1673 re
= uregex_openC(".?", 0, NULL
, &status
);
1674 uregex_setUText(re
, &text1
, &status
);
1675 result
= uregex_matches(re
, 7, &status
);
1676 TEST_ASSERT(result
== TRUE
);
1677 TEST_ASSERT_SUCCESS(status
);
1679 status
= U_ZERO_ERROR
;
1680 utext_openUTF8(&nullText
, "", -1, &status
);
1681 uregex_setUText(re
, &nullText
, &status
);
1682 TEST_ASSERT_SUCCESS(status
);
1683 result
= uregex_matches(re
, 0, &status
);
1684 TEST_ASSERT(result
== TRUE
);
1685 TEST_ASSERT_SUCCESS(status
);
1688 utext_close(&text1
);
1689 utext_close(&nullText
);
1694 * lookingAt() Used in setText test.
1699 * find(), findNext, start, end, reset
1704 u_uastrncpy(text1
, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1
));
1705 status
= U_ZERO_ERROR
;
1706 re
= uregex_openC("rx", 0, NULL
, &status
);
1708 uregex_setText(re
, text1
, -1, &status
);
1709 result
= uregex_find(re
, 0, &status
);
1710 TEST_ASSERT(result
== TRUE
);
1711 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1712 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1713 TEST_ASSERT_SUCCESS(status
);
1715 result
= uregex_find(re
, 9, &status
);
1716 TEST_ASSERT(result
== TRUE
);
1717 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
1718 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
1719 TEST_ASSERT_SUCCESS(status
);
1721 result
= uregex_find(re
, 14, &status
);
1722 TEST_ASSERT(result
== FALSE
);
1723 TEST_ASSERT_SUCCESS(status
);
1725 status
= U_ZERO_ERROR
;
1726 uregex_reset(re
, 0, &status
);
1728 result
= uregex_findNext(re
, &status
);
1729 TEST_ASSERT(result
== TRUE
);
1730 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1731 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1732 TEST_ASSERT_SUCCESS(status
);
1734 result
= uregex_findNext(re
, &status
);
1735 TEST_ASSERT(result
== TRUE
);
1736 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
1737 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
1738 TEST_ASSERT_SUCCESS(status
);
1740 status
= U_ZERO_ERROR
;
1741 uregex_reset(re
, 12, &status
);
1743 result
= uregex_findNext(re
, &status
);
1744 TEST_ASSERT(result
== TRUE
);
1745 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
1746 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
1747 TEST_ASSERT_SUCCESS(status
);
1749 result
= uregex_findNext(re
, &status
);
1750 TEST_ASSERT(result
== FALSE
);
1751 TEST_ASSERT_SUCCESS(status
);
1763 int64_t groupLen
= 0;
1766 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1
));
1768 status
= U_ZERO_ERROR
;
1769 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
1770 TEST_ASSERT_SUCCESS(status
);
1772 uregex_setText(re
, text1
, -1, &status
);
1773 result
= uregex_find(re
, 0, &status
);
1774 TEST_ASSERT(result
==TRUE
);
1776 /* Capture Group 0 with shallow clone API. Should succeed. */
1777 status
= U_ZERO_ERROR
;
1778 actual
= uregex_groupUText(re
, 0, NULL
, &groupLen
, &status
);
1779 TEST_ASSERT_SUCCESS(status
);
1781 TEST_ASSERT(utext_getNativeIndex(actual
) == 6); /* index of "abc " within "noise abc ..." */
1782 TEST_ASSERT(groupLen
== 16); /* length of "abc interior def" */
1783 utext_extract(actual
, 6 /*start index */, 6+16 /*limit index*/, groupBuf
, sizeof(groupBuf
), &status
);
1785 TEST_ASSERT_STRING("abc interior def", groupBuf
, TRUE
);
1786 utext_close(actual
);
1788 /* Capture group #1. Should succeed. */
1789 status
= U_ZERO_ERROR
;
1791 actual
= uregex_groupUText(re
, 1, NULL
, &groupLen
, &status
);
1792 TEST_ASSERT_SUCCESS(status
);
1793 TEST_ASSERT(9 == utext_getNativeIndex(actual
)); /* index of " interior " within "noise abc interior def ... " */
1794 /* (within the string text1) */
1795 TEST_ASSERT(10 == groupLen
); /* length of " interior " */
1796 utext_extract(actual
, 9 /*start index*/, 9+10 /*limit index*/, groupBuf
, sizeof(groupBuf
), &status
);
1797 TEST_ASSERT_STRING(" interior ", groupBuf
, TRUE
);
1799 utext_close(actual
);
1801 /* Capture group out of range. Error. */
1802 status
= U_ZERO_ERROR
;
1803 actual
= uregex_groupUText(re
, 2, NULL
, &groupLen
, &status
);
1804 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
1805 utext_close(actual
);
1816 UText replText
= UTEXT_INITIALIZER
;
1818 const char str_Replxxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1819 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1820 const char str_u00411U00000042a
[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1821 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1822 const char str_1x
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1823 const char str_ReplaceAaaBax1xxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1824 status
= U_ZERO_ERROR
;
1825 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
1826 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1827 utext_openUTF8(&replText
, str_1x
, -1, &status
);
1829 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1830 TEST_ASSERT_SUCCESS(status
);
1832 /* Normal case, with match */
1833 uregex_setText(re
, text1
, -1, &status
);
1834 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1835 TEST_ASSERT_SUCCESS(status
);
1836 TEST_ASSERT_UTEXT(str_Replxxx
, result
);
1837 utext_close(result
);
1839 /* No match. Text should copy to output with no changes. */
1840 uregex_setText(re
, text2
, -1, &status
);
1841 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1842 TEST_ASSERT_SUCCESS(status
);
1843 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1844 utext_close(result
);
1846 /* Unicode escapes */
1847 uregex_setText(re
, text1
, -1, &status
);
1848 utext_openUTF8(&replText
, str_u00411U00000042a
, -1, &status
);
1849 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1850 TEST_ASSERT_SUCCESS(status
);
1851 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx
, result
);
1852 utext_close(result
);
1855 utext_close(&replText
);
1865 UText replText
= UTEXT_INITIALIZER
;
1867 const char str_1
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1868 const char str_Replaceaa1
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1869 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1870 status
= U_ZERO_ERROR
;
1871 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
1872 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1873 utext_openUTF8(&replText
, str_1
, -1, &status
);
1875 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1876 TEST_ASSERT_SUCCESS(status
);
1878 /* Normal case, with match */
1879 uregex_setText(re
, text1
, -1, &status
);
1880 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1881 TEST_ASSERT_SUCCESS(status
);
1882 TEST_ASSERT_UTEXT(str_Replaceaa1
, result
);
1883 utext_close(result
);
1885 /* No match. Text should copy to output with no changes. */
1886 uregex_setText(re
, text2
, -1, &status
);
1887 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1888 TEST_ASSERT_SUCCESS(status
);
1889 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1890 utext_close(result
);
1893 utext_close(&replText
);
1898 * appendReplacement()
1907 status
= U_ZERO_ERROR
;
1908 re
= uregex_openC(".*", 0, 0, &status
);
1909 TEST_ASSERT_SUCCESS(status
);
1911 u_uastrncpy(text
, "whatever", UPRV_LENGTHOF(text
));
1912 u_uastrncpy(repl
, "some other", UPRV_LENGTHOF(repl
));
1913 uregex_setText(re
, text
, -1, &status
);
1915 /* match covers whole target string */
1916 uregex_find(re
, 0, &status
);
1917 TEST_ASSERT_SUCCESS(status
);
1919 bufCap
= UPRV_LENGTHOF(buf
);
1920 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1921 TEST_ASSERT_SUCCESS(status
);
1922 TEST_ASSERT_STRING("some other", buf
, TRUE
);
1924 /* Match has \u \U escapes */
1925 uregex_find(re
, 0, &status
);
1926 TEST_ASSERT_SUCCESS(status
);
1928 bufCap
= UPRV_LENGTHOF(buf
);
1929 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl
));
1930 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1931 TEST_ASSERT_SUCCESS(status
);
1932 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
1939 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1946 UChar textToSplit
[80];
1952 u_uastrncpy(textToSplit
, "first : second: third", UPRV_LENGTHOF(textToSplit
));
1953 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1955 status
= U_ZERO_ERROR
;
1956 re
= uregex_openC(":", 0, NULL
, &status
);
1961 uregex_setText(re
, textToSplit
, -1, &status
);
1962 TEST_ASSERT_SUCCESS(status
);
1964 /* The TEST_ASSERT_SUCCESS call above should change too... */
1965 if (U_SUCCESS(status
)) {
1966 memset(fields
, 0, sizeof(fields
));
1967 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
1968 TEST_ASSERT_SUCCESS(status
);
1970 /* The TEST_ASSERT_SUCCESS call above should change too... */
1971 if(U_SUCCESS(status
)) {
1972 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1973 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1974 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1975 TEST_ASSERT(numFields
== 3);
1976 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
1977 TEST_ASSERT_UTEXT(str_second
, fields
[1]);
1978 TEST_ASSERT_UTEXT(str_third
, fields
[2]);
1979 TEST_ASSERT(fields
[3] == NULL
);
1981 for(i
= 0; i
< numFields
; i
++) {
1982 utext_close(fields
[i
]);
1989 /* Split with too few output strings available */
1990 status
= U_ZERO_ERROR
;
1991 re
= uregex_openC(":", 0, NULL
, &status
);
1992 uregex_setText(re
, textToSplit
, -1, &status
);
1993 TEST_ASSERT_SUCCESS(status
);
1995 /* The TEST_ASSERT_SUCCESS call above should change too... */
1996 if(U_SUCCESS(status
)) {
1999 fields
[2] = &patternText
;
2000 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
2001 TEST_ASSERT_SUCCESS(status
);
2003 /* The TEST_ASSERT_SUCCESS call above should change too... */
2004 if(U_SUCCESS(status
)) {
2005 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2006 const char str_secondthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2007 TEST_ASSERT(numFields
== 2);
2008 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2009 TEST_ASSERT_UTEXT(str_secondthird
, fields
[1]);
2010 TEST_ASSERT(fields
[2] == &patternText
);
2012 for(i
= 0; i
< numFields
; i
++) {
2013 utext_close(fields
[i
]);
2020 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2021 * comes out as additional fields. */
2023 UChar textToSplit
[80];
2028 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit
));
2030 status
= U_ZERO_ERROR
;
2031 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
2033 uregex_setText(re
, textToSplit
, -1, &status
);
2034 TEST_ASSERT_SUCCESS(status
);
2036 /* The TEST_ASSERT_SUCCESS call above should change too... */
2037 if(U_SUCCESS(status
)) {
2038 memset(fields
, 0, sizeof(fields
));
2039 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
2040 TEST_ASSERT_SUCCESS(status
);
2042 /* The TEST_ASSERT_SUCCESS call above should change too... */
2043 if(U_SUCCESS(status
)) {
2044 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2045 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2046 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2047 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2048 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2050 TEST_ASSERT(numFields
== 5);
2051 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2052 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2053 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2054 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2055 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2056 TEST_ASSERT(fields
[5] == NULL
);
2058 for(i
= 0; i
< numFields
; i
++) {
2059 utext_close(fields
[i
]);
2063 /* Split with too few output strings available (2) */
2064 status
= U_ZERO_ERROR
;
2067 fields
[2] = &patternText
;
2068 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
2069 TEST_ASSERT_SUCCESS(status
);
2071 /* The TEST_ASSERT_SUCCESS call above should change too... */
2072 if(U_SUCCESS(status
)) {
2073 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2074 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2075 TEST_ASSERT(numFields
== 2);
2076 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2077 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[1]);
2078 TEST_ASSERT(fields
[2] == &patternText
);
2080 for(i
= 0; i
< numFields
; i
++) {
2081 utext_close(fields
[i
]);
2085 /* Split with too few output strings available (3) */
2086 status
= U_ZERO_ERROR
;
2090 fields
[3] = &patternText
;
2091 numFields
= uregex_splitUText(re
, fields
, 3, &status
);
2092 TEST_ASSERT_SUCCESS(status
);
2094 /* The TEST_ASSERT_SUCCESS call above should change too... */
2095 if(U_SUCCESS(status
)) {
2096 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2097 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2098 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2099 TEST_ASSERT(numFields
== 3);
2100 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2101 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2102 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[2]);
2103 TEST_ASSERT(fields
[3] == &patternText
);
2105 for(i
= 0; i
< numFields
; i
++) {
2106 utext_close(fields
[i
]);
2109 /* Split with just enough output strings available (5) */
2110 status
= U_ZERO_ERROR
;
2116 fields
[5] = &patternText
;
2117 numFields
= uregex_splitUText(re
, fields
, 5, &status
);
2118 TEST_ASSERT_SUCCESS(status
);
2120 /* The TEST_ASSERT_SUCCESS call above should change too... */
2121 if(U_SUCCESS(status
)) {
2122 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2123 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2124 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2125 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2126 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2128 TEST_ASSERT(numFields
== 5);
2129 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2130 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2131 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2132 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2133 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2134 TEST_ASSERT(fields
[5] == &patternText
);
2136 for(i
= 0; i
< numFields
; i
++) {
2137 utext_close(fields
[i
]);
2140 /* Split, end of text is a field delimiter. */
2141 status
= U_ZERO_ERROR
;
2142 uregex_setText(re
, textToSplit
, strlen("first <tag-a> second<tag-b>"), &status
);
2143 TEST_ASSERT_SUCCESS(status
);
2145 /* The TEST_ASSERT_SUCCESS call above should change too... */
2146 if(U_SUCCESS(status
)) {
2147 memset(fields
, 0, sizeof(fields
));
2148 fields
[9] = &patternText
;
2149 numFields
= uregex_splitUText(re
, fields
, 9, &status
);
2150 TEST_ASSERT_SUCCESS(status
);
2152 /* The TEST_ASSERT_SUCCESS call above should change too... */
2153 if(U_SUCCESS(status
)) {
2154 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2155 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2156 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2157 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2158 const char str_empty
[] = { 0x00 };
2160 TEST_ASSERT(numFields
== 5);
2161 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2162 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2163 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2164 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2165 TEST_ASSERT_UTEXT(str_empty
, fields
[4]);
2166 TEST_ASSERT(fields
[5] == NULL
);
2167 TEST_ASSERT(fields
[8] == NULL
);
2168 TEST_ASSERT(fields
[9] == &patternText
);
2170 for(i
= 0; i
< numFields
; i
++) {
2171 utext_close(fields
[i
]);
2177 utext_close(&patternText
);
2181 static void TestRefreshInput(void) {
2183 * RefreshInput changes out the input of a URegularExpression without
2184 * changing anything else in the match state. Used with Java JNI,
2185 * when Java moves the underlying string storage. This test
2186 * runs a find() loop, moving the text after the first match.
2187 * The right number of matches should still be found.
2189 UChar testStr
[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2190 UChar movedStr
[] = { 0, 0, 0, 0, 0, 0};
2191 UErrorCode status
= U_ZERO_ERROR
;
2192 URegularExpression
*re
;
2193 UText ut1
= UTEXT_INITIALIZER
;
2194 UText ut2
= UTEXT_INITIALIZER
;
2196 re
= uregex_openC("[ABC]", 0, 0, &status
);
2197 TEST_ASSERT_SUCCESS(status
);
2199 utext_openUChars(&ut1
, testStr
, -1, &status
);
2200 TEST_ASSERT_SUCCESS(status
);
2201 uregex_setUText(re
, &ut1
, &status
);
2202 TEST_ASSERT_SUCCESS(status
);
2204 /* Find the first match "A" in the original string */
2205 TEST_ASSERT(uregex_findNext(re
, &status
));
2206 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
2208 /* Move the string, kill the original string. */
2209 u_strcpy(movedStr
, testStr
);
2210 u_memset(testStr
, 0, u_strlen(testStr
));
2211 utext_openUChars(&ut2
, movedStr
, -1, &status
);
2212 TEST_ASSERT_SUCCESS(status
);
2213 uregex_refreshUText(re
, &ut2
, &status
);
2214 TEST_ASSERT_SUCCESS(status
);
2216 /* Find the following two matches, now working in the moved string. */
2217 TEST_ASSERT(uregex_findNext(re
, &status
));
2218 TEST_ASSERT(uregex_start(re
, 0, &status
) == 2);
2219 TEST_ASSERT(uregex_findNext(re
, &status
));
2220 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
2221 TEST_ASSERT(FALSE
== uregex_findNext(re
, &status
));
2227 static void TestBug8421(void) {
2228 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2231 URegularExpression
*re
;
2232 UErrorCode status
= U_ZERO_ERROR
;
2235 re
= uregex_openC("abc", 0, 0, &status
);
2236 TEST_ASSERT_SUCCESS(status
);
2238 limit
= uregex_getTimeLimit(re
, &status
);
2239 TEST_ASSERT_SUCCESS(status
);
2240 TEST_ASSERT(limit
== 0);
2242 uregex_setTimeLimit(re
, 100, &status
);
2243 TEST_ASSERT_SUCCESS(status
);
2244 limit
= uregex_getTimeLimit(re
, &status
);
2245 TEST_ASSERT_SUCCESS(status
);
2246 TEST_ASSERT(limit
== 100);
2251 static UBool U_CALLCONV
FindCallback(const void* context
, int64_t matchIndex
) {
2255 static UBool U_CALLCONV
MatchCallback(const void *context
, int32_t steps
) {
2259 static void TestBug10815() {
2260 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2261 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2263 URegularExpression
*re
;
2264 UErrorCode status
= U_ZERO_ERROR
;
2268 // findNext() with a find progress callback function.
2270 re
= uregex_openC(".z", 0, 0, &status
);
2271 TEST_ASSERT_SUCCESS(status
);
2273 u_uastrncpy(text
, "Hello, World.", UPRV_LENGTHOF(text
));
2274 uregex_setText(re
, text
, -1, &status
);
2275 TEST_ASSERT_SUCCESS(status
);
2277 uregex_setFindProgressCallback(re
, FindCallback
, NULL
, &status
);
2278 TEST_ASSERT_SUCCESS(status
);
2280 uregex_findNext(re
, &status
);
2281 TEST_ASSERT(status
== U_REGEX_STOPPED_BY_CALLER
);
2285 // findNext() with a match progress callback function.
2287 status
= U_ZERO_ERROR
;
2288 re
= uregex_openC("((xxx)*)*y", 0, 0, &status
);
2289 TEST_ASSERT_SUCCESS(status
);
2291 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2292 // it will appear to be stuck in a (near) infinite loop.
2293 u_uastrncpy(text
, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text
));
2294 uregex_setText(re
, text
, -1, &status
);
2295 TEST_ASSERT_SUCCESS(status
);
2297 uregex_setMatchCallback(re
, MatchCallback
, NULL
, &status
);
2298 TEST_ASSERT_SUCCESS(status
);
2300 uregex_findNext(re
, &status
);
2301 TEST_ASSERT(status
== U_REGEX_STOPPED_BY_CALLER
);
2307 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */