1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
19 * More complete testing of regular expression functionality is done with the C++ tests.
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
36 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
37 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
39 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
40 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
43 * TEST_SETUP and TEST_TEARDOWN
44 * macros to handle the boilerplate around setting up regex test cases.
45 * parameteres to setup:
46 * pattern: The regex pattern, a (char *) null terminated C string.
47 * testString: The string data, also a (char *) C string.
48 * flags: Regex flags to set when compiling the pattern
50 * Put arbitrary test code between SETUP and TEARDOWN.
51 * 're" is the compiled, ready-to-go regular expression.
53 #define TEST_SETUP(pattern, testString, flags) { \
54 UChar *srcString = NULL; \
55 status = U_ZERO_ERROR; \
56 re = uregex_openC(pattern, flags, NULL, &status); \
57 TEST_ASSERT_SUCCESS(status); \
58 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
59 u_uastrncpy(srcString, testString, strlen(testString)+1); \
60 uregex_setText(re, srcString, -1, &status); \
61 TEST_ASSERT_SUCCESS(status); \
62 if (U_SUCCESS(status)) {
64 #define TEST_TEARDOWN \
66 TEST_ASSERT_SUCCESS(status); \
73 * @param expected utf-8 array of bytes to be expected
75 static void test_assert_string(const char *expected
, const UChar
*actual
, UBool nulTerm
, const char *file
, int line
) {
76 char buf_inside_macro
[120];
77 int32_t len
= (int32_t)strlen(expected
);
80 u_austrncpy(buf_inside_macro
, (actual
), len
+1);
81 buf_inside_macro
[len
+2] = 0;
82 success
= (strcmp((expected
), buf_inside_macro
) == 0);
84 u_austrncpy(buf_inside_macro
, (actual
), len
);
85 buf_inside_macro
[len
+1] = 0;
86 success
= (strncmp((expected
), buf_inside_macro
, len
) == 0);
88 if (success
== FALSE
) {
89 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
90 file
, line
, (expected
), buf_inside_macro
);
94 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
97 static UBool
equals_utf8_utext(const char *utf8
, UText
*utext
) {
101 UBool stringsEqual
= TRUE
;
102 utext_setNativeIndex(utext
, 0);
104 U8_NEXT_UNSAFE(utf8
, u8i
, u8c
);
105 utc
= utext_next32(utext
);
106 if (u8c
== 0 && utc
== U_SENTINEL
) {
109 if (u8c
!= utc
|| u8c
== 0) {
110 stringsEqual
= FALSE
;
118 static void test_assert_utext(const char *expected
, UText
*actual
, const char *file
, int line
) {
119 utext_setNativeIndex(actual
, 0);
120 if (!equals_utf8_utext(expected
, actual
)) {
122 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file
, line
, expected
);
123 c
= utext_next32From(actual
, 0);
124 while (c
!= U_SENTINEL
) {
125 if (0x20<c
&& c
<0x7e) {
130 c
= UTEXT_NEXT32(actual
);
137 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
138 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
140 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
142 static UBool
testUTextEqual(UText
*uta
, UText
*utb
) {
145 utext_setNativeIndex(uta
, 0);
146 utext_setNativeIndex(utb
, 0);
148 ca
= utext_next32(uta
);
149 cb
= utext_next32(utb
);
153 } while (ca
!= U_SENTINEL
);
160 static void TestRegexCAPI(void);
161 static void TestBug4315(void);
162 static void TestUTextAPI(void);
163 static void TestRefreshInput(void);
164 static void TestBug8421(void);
165 static void TestBug10815(void);
166 static void TestMatchStartLineWithEmptyText(void);
168 void addURegexTest(TestNode
** root
);
170 void addURegexTest(TestNode
** root
)
172 addTest(root
, &TestRegexCAPI
, "regex/TestRegexCAPI");
173 addTest(root
, &TestBug4315
, "regex/TestBug4315");
174 addTest(root
, &TestUTextAPI
, "regex/TestUTextAPI");
175 addTest(root
, &TestRefreshInput
, "regex/TestRefreshInput");
176 addTest(root
, &TestBug8421
, "regex/TestBug8421");
177 addTest(root
, &TestBug10815
, "regex/TestBug10815");
178 addTest(root
, &TestMatchStartLineWithEmptyText
, "regex/TestMatchStartLineWithEmptyText");
182 * Call back function and context struct used for testing
183 * regular expression user callbacks. This test is mostly the same as
184 * the corresponding C++ test in intltest.
186 typedef struct callBackContext
{
192 static UBool U_EXPORT2 U_CALLCONV
193 TestCallbackFn(const void *context
, int32_t steps
) {
194 callBackContext
*info
= (callBackContext
*)context
;
195 if (info
->lastSteps
+1 != steps
) {
196 log_err("incorrect steps in callback. Expected %d, got %d\n", info
->lastSteps
+1, steps
);
198 info
->lastSteps
= steps
;
200 return (info
->numCalls
< info
->maxCalls
);
204 * Regular Expression C API Tests
206 static void TestRegexCAPI(void) {
207 UErrorCode status
= U_ZERO_ERROR
;
208 URegularExpression
*re
;
212 memset(&minus1
, -1, sizeof(minus1
));
214 /* Mimimalist open/close */
215 u_uastrncpy(pat
, "abc*", UPRV_LENGTHOF(pat
));
216 re
= uregex_open(pat
, -1, 0, 0, &status
);
217 if (U_FAILURE(status
)) {
218 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
223 /* Open with all flag values set */
224 status
= U_ZERO_ERROR
;
225 re
= uregex_open(pat
, -1,
226 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
| UREGEX_LITERAL
,
228 TEST_ASSERT_SUCCESS(status
);
231 /* Open with an invalid flag */
232 status
= U_ZERO_ERROR
;
233 re
= uregex_open(pat
, -1, 0x40000000, 0, &status
);
234 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
237 /* Open with an unimplemented flag */
238 status
= U_ZERO_ERROR
;
239 re
= uregex_open(pat
, -1, UREGEX_CANON_EQ
, 0, &status
);
240 TEST_ASSERT(status
== U_REGEX_UNIMPLEMENTED
);
243 /* openC with an invalid parameter */
244 status
= U_ZERO_ERROR
;
245 re
= uregex_openC(NULL
,
246 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
247 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
249 /* openC with an invalid parameter */
250 status
= U_USELESS_COLLATOR_ERROR
;
251 re
= uregex_openC(NULL
,
252 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
253 TEST_ASSERT(status
== U_USELESS_COLLATOR_ERROR
&& re
== NULL
);
255 /* openC open from a C string */
259 status
= U_ZERO_ERROR
;
260 re
= uregex_openC("abc*", 0, 0, &status
);
261 TEST_ASSERT_SUCCESS(status
);
262 p
= uregex_pattern(re
, &len
, &status
);
263 TEST_ASSERT_SUCCESS(status
);
265 /* The TEST_ASSERT_SUCCESS above should change too... */
266 if(U_SUCCESS(status
)) {
267 u_uastrncpy(pat
, "abc*", UPRV_LENGTHOF(pat
));
268 TEST_ASSERT(u_strcmp(pat
, p
) == 0);
269 TEST_ASSERT(len
==(int32_t)strlen("abc*"));
274 /* TODO: Open with ParseError parameter */
281 URegularExpression
*clone1
;
282 URegularExpression
*clone2
;
283 URegularExpression
*clone3
;
284 UChar testString1
[30];
285 UChar testString2
[30];
289 status
= U_ZERO_ERROR
;
290 re
= uregex_openC("abc*", 0, 0, &status
);
291 TEST_ASSERT_SUCCESS(status
);
292 clone1
= uregex_clone(re
, &status
);
293 TEST_ASSERT_SUCCESS(status
);
294 TEST_ASSERT(clone1
!= NULL
);
296 status
= U_ZERO_ERROR
;
297 clone2
= uregex_clone(re
, &status
);
298 TEST_ASSERT_SUCCESS(status
);
299 TEST_ASSERT(clone2
!= NULL
);
302 status
= U_ZERO_ERROR
;
303 clone3
= uregex_clone(clone2
, &status
);
304 TEST_ASSERT_SUCCESS(status
);
305 TEST_ASSERT(clone3
!= NULL
);
307 u_uastrncpy(testString1
, "abcccd", UPRV_LENGTHOF(pat
));
308 u_uastrncpy(testString2
, "xxxabcccd", UPRV_LENGTHOF(pat
));
310 status
= U_ZERO_ERROR
;
311 uregex_setText(clone1
, testString1
, -1, &status
);
312 TEST_ASSERT_SUCCESS(status
);
313 result
= uregex_lookingAt(clone1
, 0, &status
);
314 TEST_ASSERT_SUCCESS(status
);
315 TEST_ASSERT(result
==TRUE
);
317 status
= U_ZERO_ERROR
;
318 uregex_setText(clone2
, testString2
, -1, &status
);
319 TEST_ASSERT_SUCCESS(status
);
320 result
= uregex_lookingAt(clone2
, 0, &status
);
321 TEST_ASSERT_SUCCESS(status
);
322 TEST_ASSERT(result
==FALSE
);
323 result
= uregex_find(clone2
, 0, &status
);
324 TEST_ASSERT_SUCCESS(status
);
325 TEST_ASSERT(result
==TRUE
);
327 uregex_close(clone1
);
328 uregex_close(clone2
);
329 uregex_close(clone3
);
337 const UChar
*resultPat
;
339 u_uastrncpy(pat
, "hello", UPRV_LENGTHOF(pat
));
340 status
= U_ZERO_ERROR
;
341 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
342 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
343 TEST_ASSERT_SUCCESS(status
);
345 /* The TEST_ASSERT_SUCCESS above should change too... */
346 if (U_SUCCESS(status
)) {
347 TEST_ASSERT(resultLen
== -1);
348 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
353 status
= U_ZERO_ERROR
;
354 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
355 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
356 TEST_ASSERT_SUCCESS(status
);
357 TEST_ASSERT_SUCCESS(status
);
359 /* The TEST_ASSERT_SUCCESS above should change too... */
360 if (U_SUCCESS(status
)) {
361 TEST_ASSERT(resultLen
== 3);
362 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
363 TEST_ASSERT(u_strlen(resultPat
) == 3);
375 status
= U_ZERO_ERROR
;
376 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
377 t
= uregex_flags(re
, &status
);
378 TEST_ASSERT_SUCCESS(status
);
382 status
= U_ZERO_ERROR
;
383 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
384 t
= uregex_flags(re
, &status
);
385 TEST_ASSERT_SUCCESS(status
);
389 status
= U_ZERO_ERROR
;
390 re
= uregex_open(pat
, -1, UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
, NULL
, &status
);
391 t
= uregex_flags(re
, &status
);
392 TEST_ASSERT_SUCCESS(status
);
393 TEST_ASSERT(t
== (UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
));
398 * setText() and lookingAt()
405 u_uastrncpy(text1
, "abcccd", UPRV_LENGTHOF(text1
));
406 u_uastrncpy(text2
, "abcccxd", UPRV_LENGTHOF(text2
));
407 status
= U_ZERO_ERROR
;
408 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
409 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
410 TEST_ASSERT_SUCCESS(status
);
412 /* Operation before doing a setText should fail... */
413 status
= U_ZERO_ERROR
;
414 uregex_lookingAt(re
, 0, &status
);
415 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
417 status
= U_ZERO_ERROR
;
418 uregex_setText(re
, text1
, -1, &status
);
419 result
= uregex_lookingAt(re
, 0, &status
);
420 TEST_ASSERT(result
== TRUE
);
421 TEST_ASSERT_SUCCESS(status
);
423 status
= U_ZERO_ERROR
;
424 uregex_setText(re
, text2
, -1, &status
);
425 result
= uregex_lookingAt(re
, 0, &status
);
426 TEST_ASSERT(result
== FALSE
);
427 TEST_ASSERT_SUCCESS(status
);
429 status
= U_ZERO_ERROR
;
430 uregex_setText(re
, text1
, -1, &status
);
431 result
= uregex_lookingAt(re
, 0, &status
);
432 TEST_ASSERT(result
== TRUE
);
433 TEST_ASSERT_SUCCESS(status
);
435 status
= U_ZERO_ERROR
;
436 uregex_setText(re
, text1
, 5, &status
);
437 result
= uregex_lookingAt(re
, 0, &status
);
438 TEST_ASSERT(result
== FALSE
);
439 TEST_ASSERT_SUCCESS(status
);
441 status
= U_ZERO_ERROR
;
442 uregex_setText(re
, text1
, 6, &status
);
443 result
= uregex_lookingAt(re
, 0, &status
);
444 TEST_ASSERT(result
== TRUE
);
445 TEST_ASSERT_SUCCESS(status
);
460 u_uastrncpy(text1
, "abcccd", UPRV_LENGTHOF(text1
));
461 u_uastrncpy(text2
, "abcccxd", UPRV_LENGTHOF(text2
));
462 status
= U_ZERO_ERROR
;
463 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
464 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
466 uregex_setText(re
, text1
, -1, &status
);
467 result
= uregex_getText(re
, &textLength
, &status
);
468 TEST_ASSERT(result
== text1
);
469 TEST_ASSERT(textLength
== -1);
470 TEST_ASSERT_SUCCESS(status
);
472 status
= U_ZERO_ERROR
;
473 uregex_setText(re
, text2
, 7, &status
);
474 result
= uregex_getText(re
, &textLength
, &status
);
475 TEST_ASSERT(result
== text2
);
476 TEST_ASSERT(textLength
== 7);
477 TEST_ASSERT_SUCCESS(status
);
479 status
= U_ZERO_ERROR
;
480 uregex_setText(re
, text2
, 4, &status
);
481 result
= uregex_getText(re
, &textLength
, &status
);
482 TEST_ASSERT(result
== text2
);
483 TEST_ASSERT(textLength
== 4);
484 TEST_ASSERT_SUCCESS(status
);
495 UChar nullString
[] = {0,0,0};
497 u_uastrncpy(text1
, "abcccde", UPRV_LENGTHOF(text1
));
498 status
= U_ZERO_ERROR
;
499 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
500 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
502 uregex_setText(re
, text1
, -1, &status
);
503 result
= uregex_matches(re
, 0, &status
);
504 TEST_ASSERT(result
== FALSE
);
505 TEST_ASSERT_SUCCESS(status
);
507 status
= U_ZERO_ERROR
;
508 uregex_setText(re
, text1
, 6, &status
);
509 result
= uregex_matches(re
, 0, &status
);
510 TEST_ASSERT(result
== TRUE
);
511 TEST_ASSERT_SUCCESS(status
);
513 status
= U_ZERO_ERROR
;
514 uregex_setText(re
, text1
, 6, &status
);
515 result
= uregex_matches(re
, 1, &status
);
516 TEST_ASSERT(result
== FALSE
);
517 TEST_ASSERT_SUCCESS(status
);
520 status
= U_ZERO_ERROR
;
521 re
= uregex_openC(".?", 0, NULL
, &status
);
522 uregex_setText(re
, text1
, -1, &status
);
523 len
= u_strlen(text1
);
524 result
= uregex_matches(re
, len
, &status
);
525 TEST_ASSERT(result
== TRUE
);
526 TEST_ASSERT_SUCCESS(status
);
528 status
= U_ZERO_ERROR
;
529 uregex_setText(re
, nullString
, -1, &status
);
530 TEST_ASSERT_SUCCESS(status
);
531 result
= uregex_matches(re
, 0, &status
);
532 TEST_ASSERT(result
== TRUE
);
533 TEST_ASSERT_SUCCESS(status
);
539 * lookingAt() Used in setText test.
544 * find(), findNext, start, end, reset
549 u_uastrncpy(text1
, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1
));
550 status
= U_ZERO_ERROR
;
551 re
= uregex_openC("rx", 0, NULL
, &status
);
553 uregex_setText(re
, text1
, -1, &status
);
554 result
= uregex_find(re
, 0, &status
);
555 TEST_ASSERT(result
== TRUE
);
556 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
557 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
558 TEST_ASSERT_SUCCESS(status
);
560 result
= uregex_find(re
, 9, &status
);
561 TEST_ASSERT(result
== TRUE
);
562 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
563 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
564 TEST_ASSERT_SUCCESS(status
);
566 result
= uregex_find(re
, 14, &status
);
567 TEST_ASSERT(result
== FALSE
);
568 TEST_ASSERT_SUCCESS(status
);
570 status
= U_ZERO_ERROR
;
571 uregex_reset(re
, 0, &status
);
573 result
= uregex_findNext(re
, &status
);
574 TEST_ASSERT(result
== TRUE
);
575 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
576 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
577 TEST_ASSERT_SUCCESS(status
);
579 result
= uregex_findNext(re
, &status
);
580 TEST_ASSERT(result
== TRUE
);
581 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
582 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
583 TEST_ASSERT_SUCCESS(status
);
585 status
= U_ZERO_ERROR
;
586 uregex_reset(re
, 12, &status
);
588 result
= uregex_findNext(re
, &status
);
589 TEST_ASSERT(result
== TRUE
);
590 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
591 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
592 TEST_ASSERT_SUCCESS(status
);
594 result
= uregex_findNext(re
, &status
);
595 TEST_ASSERT(result
== FALSE
);
596 TEST_ASSERT_SUCCESS(status
);
607 status
= U_ZERO_ERROR
;
608 re
= uregex_openC("abc", 0, NULL
, &status
);
609 result
= uregex_groupCount(re
, &status
);
610 TEST_ASSERT_SUCCESS(status
);
611 TEST_ASSERT(result
== 0);
614 status
= U_ZERO_ERROR
;
615 re
= uregex_openC("abc(def)(ghi(j))", 0, NULL
, &status
);
616 result
= uregex_groupCount(re
, &status
);
617 TEST_ASSERT_SUCCESS(status
);
618 TEST_ASSERT(result
== 3);
632 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1
));
634 status
= U_ZERO_ERROR
;
635 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
636 TEST_ASSERT_SUCCESS(status
);
639 uregex_setText(re
, text1
, -1, &status
);
640 result
= uregex_find(re
, 0, &status
);
641 TEST_ASSERT(result
==TRUE
);
643 /* Capture Group 0, the full match. Should succeed. */
644 status
= U_ZERO_ERROR
;
645 resultSz
= uregex_group(re
, 0, buf
, UPRV_LENGTHOF(buf
), &status
);
646 TEST_ASSERT_SUCCESS(status
);
647 TEST_ASSERT_STRING("abc interior def", buf
, TRUE
);
648 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
650 /* Capture group #1. Should succeed. */
651 status
= U_ZERO_ERROR
;
652 resultSz
= uregex_group(re
, 1, buf
, UPRV_LENGTHOF(buf
), &status
);
653 TEST_ASSERT_SUCCESS(status
);
654 TEST_ASSERT_STRING(" interior ", buf
, TRUE
);
655 TEST_ASSERT(resultSz
== (int32_t)strlen(" interior "));
657 /* Capture group out of range. Error. */
658 status
= U_ZERO_ERROR
;
659 uregex_group(re
, 2, buf
, UPRV_LENGTHOF(buf
), &status
);
660 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
662 /* NULL buffer, pure pre-flight */
663 status
= U_ZERO_ERROR
;
664 resultSz
= uregex_group(re
, 0, NULL
, 0, &status
);
665 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
666 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
668 /* Too small buffer, truncated string */
669 status
= U_ZERO_ERROR
;
670 memset(buf
, -1, sizeof(buf
));
671 resultSz
= uregex_group(re
, 0, buf
, 5, &status
);
672 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
673 TEST_ASSERT_STRING("abc i", buf
, FALSE
);
674 TEST_ASSERT(buf
[5] == (UChar
)0xffff);
675 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
677 /* Output string just fits buffer, no NUL term. */
678 status
= U_ZERO_ERROR
;
679 resultSz
= uregex_group(re
, 0, buf
, (int32_t)strlen("abc interior def"), &status
);
680 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
681 TEST_ASSERT_STRING("abc interior def", buf
, FALSE
);
682 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
683 TEST_ASSERT(buf
[strlen("abc interior def")] == (UChar
)0xffff);
694 /* SetRegion(), getRegion() do something */
695 TEST_SETUP(".*", "0123456789ABCDEF", 0)
696 UChar resultString
[40];
697 TEST_ASSERT(uregex_regionStart(re
, &status
) == 0);
698 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 16);
699 uregex_setRegion(re
, 3, 6, &status
);
700 TEST_ASSERT(uregex_regionStart(re
, &status
) == 3);
701 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 6);
702 TEST_ASSERT(uregex_findNext(re
, &status
));
703 TEST_ASSERT(uregex_group(re
, 0, resultString
, UPRV_LENGTHOF(resultString
), &status
) == 3)
704 TEST_ASSERT_STRING("345", resultString
, TRUE
);
707 /* find(start=-1) uses regions */
708 TEST_SETUP(".*", "0123456789ABCDEF", 0);
709 uregex_setRegion(re
, 4, 6, &status
);
710 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
711 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
712 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
715 /* find (start >=0) does not use regions */
716 TEST_SETUP(".*", "0123456789ABCDEF", 0);
717 uregex_setRegion(re
, 4, 6, &status
);
718 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
719 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
720 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
723 /* findNext() obeys regions */
724 TEST_SETUP(".", "0123456789ABCDEF", 0);
725 uregex_setRegion(re
, 4, 6, &status
);
726 TEST_ASSERT(uregex_findNext(re
,&status
) == TRUE
);
727 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
728 TEST_ASSERT(uregex_findNext(re
, &status
) == TRUE
);
729 TEST_ASSERT(uregex_start(re
, 0, &status
) == 5);
730 TEST_ASSERT(uregex_findNext(re
, &status
) == FALSE
);
733 /* matches(start=-1) uses regions */
734 /* Also, verify that non-greedy *? succeeds in finding the full match. */
735 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
736 uregex_setRegion(re
, 4, 6, &status
);
737 TEST_ASSERT(uregex_matches(re
, -1, &status
) == TRUE
);
738 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
739 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
742 /* matches (start >=0) does not use regions */
743 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
744 uregex_setRegion(re
, 4, 6, &status
);
745 TEST_ASSERT(uregex_matches(re
, 0, &status
) == TRUE
);
746 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
747 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
750 /* lookingAt(start=-1) uses regions */
751 /* Also, verify that non-greedy *? finds the first (shortest) match. */
752 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
753 uregex_setRegion(re
, 4, 6, &status
);
754 TEST_ASSERT(uregex_lookingAt(re
, -1, &status
) == TRUE
);
755 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
756 TEST_ASSERT(uregex_end(re
, 0, &status
) == 4);
759 /* lookingAt (start >=0) does not use regions */
760 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
761 uregex_setRegion(re
, 4, 6, &status
);
762 TEST_ASSERT(uregex_lookingAt(re
, 0, &status
) == TRUE
);
763 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
764 TEST_ASSERT(uregex_end(re
, 0, &status
) == 0);
768 TEST_SETUP("[a-f]*", "abcdefghij", 0);
769 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
770 TEST_ASSERT(uregex_hitEnd(re
, &status
) == FALSE
);
773 TEST_SETUP("[a-f]*", "abcdef", 0);
774 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
775 TEST_ASSERT(uregex_hitEnd(re
, &status
) == TRUE
);
779 TEST_SETUP("abcd", "abcd", 0);
780 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
781 TEST_ASSERT(uregex_requireEnd(re
, &status
) == FALSE
);
784 TEST_SETUP("abcd$", "abcd", 0);
785 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
786 TEST_ASSERT(uregex_requireEnd(re
, &status
) == TRUE
);
789 /* anchoringBounds */
790 TEST_SETUP("abc$", "abcdef", 0);
791 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == TRUE
);
792 uregex_useAnchoringBounds(re
, FALSE
, &status
);
793 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == FALSE
);
795 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
);
796 uregex_useAnchoringBounds(re
, TRUE
, &status
);
797 uregex_setRegion(re
, 0, 3, &status
);
798 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
799 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
802 /* Transparent Bounds */
803 TEST_SETUP("abc(?=def)", "abcdef", 0);
804 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == FALSE
);
805 uregex_useTransparentBounds(re
, TRUE
, &status
);
806 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == TRUE
);
808 uregex_useTransparentBounds(re
, FALSE
, &status
);
809 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* No Region */
810 uregex_setRegion(re
, 0, 3, &status
);
811 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
); /* with region, opaque bounds */
812 uregex_useTransparentBounds(re
, TRUE
, &status
);
813 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* with region, transparent bounds */
814 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
827 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
828 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
829 u_uastrncpy(replText
, "<$1>", UPRV_LENGTHOF(replText
));
831 status
= U_ZERO_ERROR
;
832 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
833 TEST_ASSERT_SUCCESS(status
);
835 /* Normal case, with match */
836 uregex_setText(re
, text1
, -1, &status
);
837 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
838 TEST_ASSERT_SUCCESS(status
);
839 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, TRUE
);
840 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
842 /* No match. Text should copy to output with no changes. */
843 status
= U_ZERO_ERROR
;
844 uregex_setText(re
, text2
, -1, &status
);
845 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
846 TEST_ASSERT_SUCCESS(status
);
847 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
848 TEST_ASSERT(resultSz
== (int32_t)strlen("No match here."));
850 /* Match, output just fills buffer, no termination warning. */
851 status
= U_ZERO_ERROR
;
852 uregex_setText(re
, text1
, -1, &status
);
853 memset(buf
, -1, sizeof(buf
));
854 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x."), &status
);
855 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
856 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
857 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
858 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
860 /* Do the replaceFirst again, without first resetting anything.
861 * Should give the same results.
863 status
= U_ZERO_ERROR
;
864 memset(buf
, -1, sizeof(buf
));
865 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x."), &status
);
866 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
867 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
868 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
869 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
871 /* NULL buffer, zero buffer length */
872 status
= U_ZERO_ERROR
;
873 resultSz
= uregex_replaceFirst(re
, replText
, -1, NULL
, 0, &status
);
874 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
875 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
877 /* Buffer too small by one */
878 status
= U_ZERO_ERROR
;
879 memset(buf
, -1, sizeof(buf
));
880 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, strlen("Replace <aa> x1x x...x.")-1, &status
);
881 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
882 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf
, FALSE
);
883 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
884 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
894 UChar text1
[80]; /* "Replace xaax x1x x...x." */
895 UChar text2
[80]; /* "No match Here" */
896 UChar replText
[80]; /* "<$1>" */
897 UChar replText2
[80]; /* "<<$1>>" */
898 const char * pattern
= "x(.*?)x";
899 const char * expectedResult
= "Replace <aa> <1> <...>.";
900 const char * expectedResult2
= "Replace <<aa>> <<1>> <<...>>.";
903 int32_t expectedResultSize
;
904 int32_t expectedResultSize2
;
907 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
908 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
909 u_uastrncpy(replText
, "<$1>", UPRV_LENGTHOF(replText
));
910 u_uastrncpy(replText2
, "<<$1>>", UPRV_LENGTHOF(replText2
));
911 expectedResultSize
= strlen(expectedResult
);
912 expectedResultSize2
= strlen(expectedResult2
);
914 status
= U_ZERO_ERROR
;
915 re
= uregex_openC(pattern
, 0, NULL
, &status
);
916 TEST_ASSERT_SUCCESS(status
);
918 /* Normal case, with match */
919 uregex_setText(re
, text1
, -1, &status
);
920 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
921 TEST_ASSERT_SUCCESS(status
);
922 TEST_ASSERT_STRING(expectedResult
, buf
, TRUE
);
923 TEST_ASSERT(resultSize
== expectedResultSize
);
925 /* No match. Text should copy to output with no changes. */
926 status
= U_ZERO_ERROR
;
927 uregex_setText(re
, text2
, -1, &status
);
928 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
929 TEST_ASSERT_SUCCESS(status
);
930 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
931 TEST_ASSERT(resultSize
== u_strlen(text2
));
933 /* Match, output just fills buffer, no termination warning. */
934 status
= U_ZERO_ERROR
;
935 uregex_setText(re
, text1
, -1, &status
);
936 memset(buf
, -1, sizeof(buf
));
937 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, expectedResultSize
, &status
);
938 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
939 TEST_ASSERT_STRING(expectedResult
, buf
, FALSE
);
940 TEST_ASSERT(resultSize
== expectedResultSize
);
941 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
943 /* Do the replaceFirst again, without first resetting anything.
944 * Should give the same results.
946 status
= U_ZERO_ERROR
;
947 memset(buf
, -1, sizeof(buf
));
948 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, strlen("Replace xaax x1x x...x."), &status
);
949 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
950 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf
, FALSE
);
951 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
952 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
954 /* NULL buffer, zero buffer length */
955 status
= U_ZERO_ERROR
;
956 resultSize
= uregex_replaceAll(re
, replText
, -1, NULL
, 0, &status
);
957 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
958 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
960 /* Buffer too small. Try every size, which will tickle edge cases
961 * in uregex_appendReplacement (used by replaceAll) */
962 for (i
=0; i
<expectedResultSize
; i
++) {
964 status
= U_ZERO_ERROR
;
965 memset(buf
, -1, sizeof(buf
));
966 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, i
, &status
);
967 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
968 strcpy(expected
, expectedResult
);
970 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
971 TEST_ASSERT(resultSize
== expectedResultSize
);
972 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
975 /* Buffer too small. Same as previous test, except this time the replacement
976 * text is longer than the match capture group, making the length of the complete
977 * replacement longer than the original string.
979 for (i
=0; i
<expectedResultSize2
; i
++) {
981 status
= U_ZERO_ERROR
;
982 memset(buf
, -1, sizeof(buf
));
983 resultSize
= uregex_replaceAll(re
, replText2
, -1, buf
, i
, &status
);
984 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
985 strcpy(expected
, expectedResult2
);
987 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
988 TEST_ASSERT(resultSize
== expectedResultSize2
);
989 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
998 * appendReplacement()
1008 status
= U_ZERO_ERROR
;
1009 re
= uregex_openC(".*", 0, 0, &status
);
1010 TEST_ASSERT_SUCCESS(status
);
1012 u_uastrncpy(text
, "whatever", UPRV_LENGTHOF(text
));
1013 u_uastrncpy(repl
, "some other", UPRV_LENGTHOF(repl
));
1014 uregex_setText(re
, text
, -1, &status
);
1016 /* match covers whole target string */
1017 uregex_find(re
, 0, &status
);
1018 TEST_ASSERT_SUCCESS(status
);
1020 bufCap
= UPRV_LENGTHOF(buf
);
1021 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1022 TEST_ASSERT_SUCCESS(status
);
1023 TEST_ASSERT_STRING("some other", buf
, TRUE
);
1025 /* Match has \u \U escapes */
1026 uregex_find(re
, 0, &status
);
1027 TEST_ASSERT_SUCCESS(status
);
1029 bufCap
= UPRV_LENGTHOF(buf
);
1030 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl
));
1031 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1032 TEST_ASSERT_SUCCESS(status
);
1033 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
1035 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1036 status
= U_ZERO_ERROR
;
1037 uregex_find(re
, 0, &status
);
1038 TEST_ASSERT_SUCCESS(status
);
1040 status
= U_BUFFER_OVERFLOW_ERROR
;
1041 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, NULL
, &status
);
1042 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1049 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1056 UChar textToSplit
[80];
1061 int32_t requiredCapacity
;
1062 int32_t spaceNeeded
;
1065 u_uastrncpy(textToSplit
, "first : second: third", UPRV_LENGTHOF(textToSplit
));
1066 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1068 status
= U_ZERO_ERROR
;
1069 re
= uregex_openC(":", 0, NULL
, &status
);
1074 uregex_setText(re
, textToSplit
, -1, &status
);
1075 TEST_ASSERT_SUCCESS(status
);
1077 /* The TEST_ASSERT_SUCCESS call above should change too... */
1078 if (U_SUCCESS(status
)) {
1079 memset(fields
, -1, sizeof(fields
));
1081 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 10, &status
);
1082 TEST_ASSERT_SUCCESS(status
);
1084 /* The TEST_ASSERT_SUCCESS call above should change too... */
1085 if(U_SUCCESS(status
)) {
1086 TEST_ASSERT(numFields
== 3);
1087 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1088 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1089 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1090 TEST_ASSERT(fields
[3] == NULL
);
1092 spaceNeeded
= u_strlen(textToSplit
) -
1093 (numFields
- 1) + /* Field delimiters do not appear in output */
1094 numFields
; /* Each field gets a NUL terminator */
1096 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1103 /* Split with too few output strings available */
1104 status
= U_ZERO_ERROR
;
1105 re
= uregex_openC(":", 0, NULL
, &status
);
1106 uregex_setText(re
, textToSplit
, -1, &status
);
1107 TEST_ASSERT_SUCCESS(status
);
1109 /* The TEST_ASSERT_SUCCESS call above should change too... */
1110 if(U_SUCCESS(status
)) {
1111 memset(fields
, -1, sizeof(fields
));
1113 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 2, &status
);
1114 TEST_ASSERT_SUCCESS(status
);
1116 /* The TEST_ASSERT_SUCCESS call above should change too... */
1117 if(U_SUCCESS(status
)) {
1118 TEST_ASSERT(numFields
== 2);
1119 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1120 TEST_ASSERT_STRING(" second: third", fields
[1], TRUE
);
1121 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1123 spaceNeeded
= u_strlen(textToSplit
) -
1124 (numFields
- 1) + /* Field delimiters do not appear in output */
1125 numFields
; /* Each field gets a NUL terminator */
1127 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1129 /* Split with a range of output buffer sizes. */
1130 spaceNeeded
= u_strlen(textToSplit
) -
1131 (numFields
- 1) + /* Field delimiters do not appear in output */
1132 numFields
; /* Each field gets a NUL terminator */
1134 for (sz
=0; sz
< spaceNeeded
+1; sz
++) {
1135 memset(fields
, -1, sizeof(fields
));
1136 status
= U_ZERO_ERROR
;
1138 uregex_split(re
, buf
, sz
, &requiredCapacity
, fields
, 10, &status
);
1139 if (sz
>= spaceNeeded
) {
1140 TEST_ASSERT_SUCCESS(status
);
1141 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1142 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1143 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1145 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1147 TEST_ASSERT(numFields
== 3);
1148 TEST_ASSERT(fields
[3] == NULL
);
1149 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1160 /* Split(), part 2. Patterns with capture groups. The capture group text
1161 * comes out as additional fields. */
1163 UChar textToSplit
[80];
1167 int32_t requiredCapacity
;
1168 int32_t spaceNeeded
;
1171 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit
));
1173 status
= U_ZERO_ERROR
;
1174 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
1176 uregex_setText(re
, textToSplit
, -1, &status
);
1177 TEST_ASSERT_SUCCESS(status
);
1179 /* The TEST_ASSERT_SUCCESS call above should change too... */
1180 if(U_SUCCESS(status
)) {
1181 memset(fields
, -1, sizeof(fields
));
1183 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 10, &status
);
1184 TEST_ASSERT_SUCCESS(status
);
1186 /* The TEST_ASSERT_SUCCESS call above should change too... */
1187 if(U_SUCCESS(status
)) {
1188 TEST_ASSERT(numFields
== 5);
1189 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1190 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1191 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1192 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1193 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1194 TEST_ASSERT(fields
[5] == NULL
);
1195 spaceNeeded
= strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1196 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1200 /* Split with too few output strings available (2) */
1201 status
= U_ZERO_ERROR
;
1202 memset(fields
, -1, sizeof(fields
));
1204 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 2, &status
);
1205 TEST_ASSERT_SUCCESS(status
);
1207 /* The TEST_ASSERT_SUCCESS call above should change too... */
1208 if(U_SUCCESS(status
)) {
1209 TEST_ASSERT(numFields
== 2);
1210 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1211 TEST_ASSERT_STRING(" second<tag-b> third", fields
[1], TRUE
);
1212 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1214 spaceNeeded
= strlen("first . second<tag-b> third."); /* "." at NUL positions */
1215 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1218 /* Split with too few output strings available (3) */
1219 status
= U_ZERO_ERROR
;
1220 memset(fields
, -1, sizeof(fields
));
1222 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 3, &status
);
1223 TEST_ASSERT_SUCCESS(status
);
1225 /* The TEST_ASSERT_SUCCESS call above should change too... */
1226 if(U_SUCCESS(status
)) {
1227 TEST_ASSERT(numFields
== 3);
1228 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1229 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1230 TEST_ASSERT_STRING(" second<tag-b> third", fields
[2], TRUE
);
1231 TEST_ASSERT(!memcmp(&fields
[3],&minus1
,sizeof(UChar
*)));
1233 spaceNeeded
= strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1234 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1237 /* Split with just enough output strings available (5) */
1238 status
= U_ZERO_ERROR
;
1239 memset(fields
, -1, sizeof(fields
));
1241 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 5, &status
);
1242 TEST_ASSERT_SUCCESS(status
);
1244 /* The TEST_ASSERT_SUCCESS call above should change too... */
1245 if(U_SUCCESS(status
)) {
1246 TEST_ASSERT(numFields
== 5);
1247 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1248 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1249 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1250 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1251 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1252 TEST_ASSERT(!memcmp(&fields
[5],&minus1
,sizeof(UChar
*)));
1254 spaceNeeded
= strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1255 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1258 /* Split, end of text is a field delimiter. */
1259 status
= U_ZERO_ERROR
;
1260 sz
= strlen("first <tag-a> second<tag-b>");
1261 uregex_setText(re
, textToSplit
, sz
, &status
);
1262 TEST_ASSERT_SUCCESS(status
);
1264 /* The TEST_ASSERT_SUCCESS call above should change too... */
1265 if(U_SUCCESS(status
)) {
1266 memset(fields
, -1, sizeof(fields
));
1268 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 9, &status
);
1269 TEST_ASSERT_SUCCESS(status
);
1271 /* The TEST_ASSERT_SUCCESS call above should change too... */
1272 if(U_SUCCESS(status
)) {
1273 TEST_ASSERT(numFields
== 5);
1274 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1275 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1276 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1277 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1278 TEST_ASSERT_STRING("", fields
[4], TRUE
);
1279 TEST_ASSERT(fields
[5] == NULL
);
1280 TEST_ASSERT(fields
[8] == NULL
);
1281 TEST_ASSERT(!memcmp(&fields
[9],&minus1
,sizeof(UChar
*)));
1282 spaceNeeded
= strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1283 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1293 TEST_SETUP("abc$", "abcdef", 0);
1294 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 0);
1295 uregex_setTimeLimit(re
, 1000, &status
);
1296 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1297 TEST_ASSERT_SUCCESS(status
);
1298 uregex_setTimeLimit(re
, -1, &status
);
1299 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1300 status
= U_ZERO_ERROR
;
1301 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1305 * set/get Stack Limit
1307 TEST_SETUP("abc$", "abcdef", 0);
1308 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 8000000);
1309 uregex_setStackLimit(re
, 40000, &status
);
1310 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1311 TEST_ASSERT_SUCCESS(status
);
1312 uregex_setStackLimit(re
, -1, &status
);
1313 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1314 status
= U_ZERO_ERROR
;
1315 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1320 * Get/Set callback functions
1321 * This test is copied from intltest regex/Callbacks
1322 * The pattern and test data will run long enough to cause the callback
1323 * to be invoked. The nested '+' operators give exponential time
1324 * behavior with increasing string length.
1326 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1327 callBackContext cbInfo
= {4, 0, 0};
1328 const void *pContext
= &cbInfo
;
1329 URegexMatchCallback
*returnedFn
= &TestCallbackFn
;
1331 /* Getting the callback fn when it hasn't been set must return NULL */
1332 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1333 TEST_ASSERT_SUCCESS(status
);
1334 TEST_ASSERT(returnedFn
== NULL
);
1335 TEST_ASSERT(pContext
== NULL
);
1337 /* Set thecallback and do a match. */
1338 /* The callback function should record that it has been called. */
1339 uregex_setMatchCallback(re
, &TestCallbackFn
, &cbInfo
, &status
);
1340 TEST_ASSERT_SUCCESS(status
);
1341 TEST_ASSERT(cbInfo
.numCalls
== 0);
1342 TEST_ASSERT(uregex_matches(re
, -1, &status
) == FALSE
);
1343 TEST_ASSERT_SUCCESS(status
);
1344 TEST_ASSERT(cbInfo
.numCalls
> 0);
1346 /* Getting the callback should return the values that were set above. */
1347 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1348 TEST_ASSERT(returnedFn
== &TestCallbackFn
);
1349 TEST_ASSERT(pContext
== &cbInfo
);
1356 static void TestBug4315(void) {
1357 UErrorCode theICUError
= U_ZERO_ERROR
;
1358 URegularExpression
*theRegEx
;
1360 const char *thePattern
;
1361 UChar theString
[100];
1362 UChar
*destFields
[24];
1363 int32_t neededLength1
;
1364 int32_t neededLength2
;
1366 int32_t wordCount
= 0;
1367 int32_t destFieldsSize
= 24;
1370 u_uastrcpy(theString
, "The quick brown fox jumped over the slow black turtle.");
1373 theRegEx
= uregex_openC(thePattern
, 0, NULL
, &theICUError
);
1374 TEST_ASSERT_SUCCESS(theICUError
);
1376 /* set the input string */
1377 uregex_setText(theRegEx
, theString
, u_strlen(theString
), &theICUError
);
1378 TEST_ASSERT_SUCCESS(theICUError
);
1381 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1383 wordCount
= uregex_split(theRegEx
, NULL
, 0, &neededLength1
, destFields
,
1384 destFieldsSize
, &theICUError
);
1386 TEST_ASSERT(theICUError
== U_BUFFER_OVERFLOW_ERROR
);
1387 TEST_ASSERT(wordCount
==3);
1389 if(theICUError
== U_BUFFER_OVERFLOW_ERROR
)
1391 theICUError
= U_ZERO_ERROR
;
1392 textBuff
= (UChar
*) malloc(sizeof(UChar
) * (neededLength1
+ 1));
1393 wordCount
= uregex_split(theRegEx
, textBuff
, neededLength1
+1, &neededLength2
,
1394 destFields
, destFieldsSize
, &theICUError
);
1395 TEST_ASSERT(wordCount
==3);
1396 TEST_ASSERT_SUCCESS(theICUError
);
1397 TEST_ASSERT(neededLength1
== neededLength2
);
1398 TEST_ASSERT_STRING("The qui", destFields
[0], TRUE
);
1399 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields
[1], TRUE
);
1400 TEST_ASSERT_STRING("turtle.", destFields
[2], TRUE
);
1401 TEST_ASSERT(destFields
[3] == NULL
);
1404 uregex_close(theRegEx
);
1407 /* Based on TestRegexCAPI() */
1408 static void TestUTextAPI(void) {
1409 UErrorCode status
= U_ZERO_ERROR
;
1410 URegularExpression
*re
;
1411 UText patternText
= UTEXT_INITIALIZER
;
1413 const char patternTextUTF8
[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1415 /* Mimimalist open/close */
1416 utext_openUTF8(&patternText
, patternTextUTF8
, -1, &status
);
1417 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1418 if (U_FAILURE(status
)) {
1419 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
1420 utext_close(&patternText
);
1425 /* Open with all flag values set */
1426 status
= U_ZERO_ERROR
;
1427 re
= uregex_openUText(&patternText
,
1428 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
,
1430 TEST_ASSERT_SUCCESS(status
);
1433 /* Open with an invalid flag */
1434 status
= U_ZERO_ERROR
;
1435 re
= uregex_openUText(&patternText
, 0x40000000, 0, &status
);
1436 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
1439 /* open with an invalid parameter */
1440 status
= U_ZERO_ERROR
;
1441 re
= uregex_openUText(NULL
,
1442 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
1443 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
1449 URegularExpression
*clone1
;
1450 URegularExpression
*clone2
;
1451 URegularExpression
*clone3
;
1452 UChar testString1
[30];
1453 UChar testString2
[30];
1457 status
= U_ZERO_ERROR
;
1458 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1459 TEST_ASSERT_SUCCESS(status
);
1460 clone1
= uregex_clone(re
, &status
);
1461 TEST_ASSERT_SUCCESS(status
);
1462 TEST_ASSERT(clone1
!= NULL
);
1464 status
= U_ZERO_ERROR
;
1465 clone2
= uregex_clone(re
, &status
);
1466 TEST_ASSERT_SUCCESS(status
);
1467 TEST_ASSERT(clone2
!= NULL
);
1470 status
= U_ZERO_ERROR
;
1471 clone3
= uregex_clone(clone2
, &status
);
1472 TEST_ASSERT_SUCCESS(status
);
1473 TEST_ASSERT(clone3
!= NULL
);
1475 u_uastrncpy(testString1
, "abcccd", UPRV_LENGTHOF(pat
));
1476 u_uastrncpy(testString2
, "xxxabcccd", UPRV_LENGTHOF(pat
));
1478 status
= U_ZERO_ERROR
;
1479 uregex_setText(clone1
, testString1
, -1, &status
);
1480 TEST_ASSERT_SUCCESS(status
);
1481 result
= uregex_lookingAt(clone1
, 0, &status
);
1482 TEST_ASSERT_SUCCESS(status
);
1483 TEST_ASSERT(result
==TRUE
);
1485 status
= U_ZERO_ERROR
;
1486 uregex_setText(clone2
, testString2
, -1, &status
);
1487 TEST_ASSERT_SUCCESS(status
);
1488 result
= uregex_lookingAt(clone2
, 0, &status
);
1489 TEST_ASSERT_SUCCESS(status
);
1490 TEST_ASSERT(result
==FALSE
);
1491 result
= uregex_find(clone2
, 0, &status
);
1492 TEST_ASSERT_SUCCESS(status
);
1493 TEST_ASSERT(result
==TRUE
);
1495 uregex_close(clone1
);
1496 uregex_close(clone2
);
1497 uregex_close(clone3
);
1502 * pattern() and patternText()
1505 const UChar
*resultPat
;
1508 const char str_hello
[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1509 const char str_hel
[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1510 u_uastrncpy(pat
, "hello", UPRV_LENGTHOF(pat
)); /* for comparison */
1511 status
= U_ZERO_ERROR
;
1513 utext_openUTF8(&patternText
, str_hello
, -1, &status
);
1514 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
1515 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1516 TEST_ASSERT_SUCCESS(status
);
1518 /* The TEST_ASSERT_SUCCESS above should change too... */
1519 if (U_SUCCESS(status
)) {
1520 TEST_ASSERT(resultLen
== -1);
1521 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
1524 resultText
= uregex_patternUText(re
, &status
);
1525 TEST_ASSERT_SUCCESS(status
);
1526 TEST_ASSERT_UTEXT(str_hello
, resultText
);
1530 status
= U_ZERO_ERROR
;
1531 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
1532 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1533 TEST_ASSERT_SUCCESS(status
);
1535 /* The TEST_ASSERT_SUCCESS above should change too... */
1536 if (U_SUCCESS(status
)) {
1537 TEST_ASSERT(resultLen
== 3);
1538 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
1539 TEST_ASSERT(u_strlen(resultPat
) == 3);
1542 resultText
= uregex_patternUText(re
, &status
);
1543 TEST_ASSERT_SUCCESS(status
);
1544 TEST_ASSERT_UTEXT(str_hel
, resultText
);
1550 * setUText() and lookingAt()
1553 UText text1
= UTEXT_INITIALIZER
;
1554 UText text2
= UTEXT_INITIALIZER
;
1556 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1557 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1558 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1559 status
= U_ZERO_ERROR
;
1560 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1561 utext_openUTF8(&text2
, str_abcccxd
, -1, &status
);
1563 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1564 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1565 TEST_ASSERT_SUCCESS(status
);
1567 /* Operation before doing a setText should fail... */
1568 status
= U_ZERO_ERROR
;
1569 uregex_lookingAt(re
, 0, &status
);
1570 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
1572 status
= U_ZERO_ERROR
;
1573 uregex_setUText(re
, &text1
, &status
);
1574 result
= uregex_lookingAt(re
, 0, &status
);
1575 TEST_ASSERT(result
== TRUE
);
1576 TEST_ASSERT_SUCCESS(status
);
1578 status
= U_ZERO_ERROR
;
1579 uregex_setUText(re
, &text2
, &status
);
1580 result
= uregex_lookingAt(re
, 0, &status
);
1581 TEST_ASSERT(result
== FALSE
);
1582 TEST_ASSERT_SUCCESS(status
);
1584 status
= U_ZERO_ERROR
;
1585 uregex_setUText(re
, &text1
, &status
);
1586 result
= uregex_lookingAt(re
, 0, &status
);
1587 TEST_ASSERT(result
== TRUE
);
1588 TEST_ASSERT_SUCCESS(status
);
1591 utext_close(&text1
);
1592 utext_close(&text2
);
1597 * getText() and getUText()
1600 UText text1
= UTEXT_INITIALIZER
;
1601 UText text2
= UTEXT_INITIALIZER
;
1602 UChar text2Chars
[20];
1604 const UChar
*result
;
1606 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1607 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1608 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1611 status
= U_ZERO_ERROR
;
1612 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1613 u_uastrncpy(text2Chars
, str_abcccxd
, UPRV_LENGTHOF(text2Chars
));
1614 utext_openUChars(&text2
, text2Chars
, -1, &status
);
1616 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1617 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1619 /* First set a UText */
1620 uregex_setUText(re
, &text1
, &status
);
1621 resultText
= uregex_getUText(re
, NULL
, &status
);
1622 TEST_ASSERT_SUCCESS(status
);
1623 TEST_ASSERT(resultText
!= &text1
);
1624 utext_setNativeIndex(resultText
, 0);
1625 utext_setNativeIndex(&text1
, 0);
1626 TEST_ASSERT(testUTextEqual(resultText
, &text1
));
1627 utext_close(resultText
);
1629 result
= uregex_getText(re
, &textLength
, &status
); /* flattens UText into buffer */
1630 (void)result
; /* Suppress set but not used warning. */
1631 TEST_ASSERT(textLength
== -1 || textLength
== 6);
1632 resultText
= uregex_getUText(re
, NULL
, &status
);
1633 TEST_ASSERT_SUCCESS(status
);
1634 TEST_ASSERT(resultText
!= &text1
);
1635 utext_setNativeIndex(resultText
, 0);
1636 utext_setNativeIndex(&text1
, 0);
1637 TEST_ASSERT(testUTextEqual(resultText
, &text1
));
1638 utext_close(resultText
);
1640 /* Then set a UChar * */
1641 uregex_setText(re
, text2Chars
, 7, &status
);
1642 resultText
= uregex_getUText(re
, NULL
, &status
);
1643 TEST_ASSERT_SUCCESS(status
);
1644 utext_setNativeIndex(resultText
, 0);
1645 utext_setNativeIndex(&text2
, 0);
1646 TEST_ASSERT(testUTextEqual(resultText
, &text2
));
1647 utext_close(resultText
);
1648 result
= uregex_getText(re
, &textLength
, &status
);
1649 TEST_ASSERT(textLength
== 7);
1652 utext_close(&text1
);
1653 utext_close(&text2
);
1660 UText text1
= UTEXT_INITIALIZER
;
1662 UText nullText
= UTEXT_INITIALIZER
;
1663 const char str_abcccde
[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1664 const char str_abcd
[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1666 status
= U_ZERO_ERROR
;
1667 utext_openUTF8(&text1
, str_abcccde
, -1, &status
);
1668 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1669 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1671 uregex_setUText(re
, &text1
, &status
);
1672 result
= uregex_matches(re
, 0, &status
);
1673 TEST_ASSERT(result
== FALSE
);
1674 TEST_ASSERT_SUCCESS(status
);
1677 status
= U_ZERO_ERROR
;
1678 re
= uregex_openC(".?", 0, NULL
, &status
);
1679 uregex_setUText(re
, &text1
, &status
);
1680 result
= uregex_matches(re
, 7, &status
);
1681 TEST_ASSERT(result
== TRUE
);
1682 TEST_ASSERT_SUCCESS(status
);
1684 status
= U_ZERO_ERROR
;
1685 utext_openUTF8(&nullText
, "", -1, &status
);
1686 uregex_setUText(re
, &nullText
, &status
);
1687 TEST_ASSERT_SUCCESS(status
);
1688 result
= uregex_matches(re
, 0, &status
);
1689 TEST_ASSERT(result
== TRUE
);
1690 TEST_ASSERT_SUCCESS(status
);
1693 utext_close(&text1
);
1694 utext_close(&nullText
);
1699 * lookingAt() Used in setText test.
1704 * find(), findNext, start, end, reset
1709 u_uastrncpy(text1
, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1
));
1710 status
= U_ZERO_ERROR
;
1711 re
= uregex_openC("rx", 0, NULL
, &status
);
1713 uregex_setText(re
, text1
, -1, &status
);
1714 result
= uregex_find(re
, 0, &status
);
1715 TEST_ASSERT(result
== TRUE
);
1716 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1717 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1718 TEST_ASSERT_SUCCESS(status
);
1720 result
= uregex_find(re
, 9, &status
);
1721 TEST_ASSERT(result
== TRUE
);
1722 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
1723 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
1724 TEST_ASSERT_SUCCESS(status
);
1726 result
= uregex_find(re
, 14, &status
);
1727 TEST_ASSERT(result
== FALSE
);
1728 TEST_ASSERT_SUCCESS(status
);
1730 status
= U_ZERO_ERROR
;
1731 uregex_reset(re
, 0, &status
);
1733 result
= uregex_findNext(re
, &status
);
1734 TEST_ASSERT(result
== TRUE
);
1735 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1736 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1737 TEST_ASSERT_SUCCESS(status
);
1739 result
= uregex_findNext(re
, &status
);
1740 TEST_ASSERT(result
== TRUE
);
1741 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
1742 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
1743 TEST_ASSERT_SUCCESS(status
);
1745 status
= U_ZERO_ERROR
;
1746 uregex_reset(re
, 12, &status
);
1748 result
= uregex_findNext(re
, &status
);
1749 TEST_ASSERT(result
== TRUE
);
1750 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
1751 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
1752 TEST_ASSERT_SUCCESS(status
);
1754 result
= uregex_findNext(re
, &status
);
1755 TEST_ASSERT(result
== FALSE
);
1756 TEST_ASSERT_SUCCESS(status
);
1768 int64_t groupLen
= 0;
1771 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1
));
1773 status
= U_ZERO_ERROR
;
1774 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
1775 TEST_ASSERT_SUCCESS(status
);
1777 uregex_setText(re
, text1
, -1, &status
);
1778 result
= uregex_find(re
, 0, &status
);
1779 TEST_ASSERT(result
==TRUE
);
1781 /* Capture Group 0 with shallow clone API. Should succeed. */
1782 status
= U_ZERO_ERROR
;
1783 actual
= uregex_groupUText(re
, 0, NULL
, &groupLen
, &status
);
1784 TEST_ASSERT_SUCCESS(status
);
1786 TEST_ASSERT(utext_getNativeIndex(actual
) == 6); /* index of "abc " within "noise abc ..." */
1787 TEST_ASSERT(groupLen
== 16); /* length of "abc interior def" */
1788 utext_extract(actual
, 6 /*start index */, 6+16 /*limit index*/, groupBuf
, sizeof(groupBuf
), &status
);
1790 TEST_ASSERT_STRING("abc interior def", groupBuf
, TRUE
);
1791 utext_close(actual
);
1793 /* Capture group #1. Should succeed. */
1794 status
= U_ZERO_ERROR
;
1796 actual
= uregex_groupUText(re
, 1, NULL
, &groupLen
, &status
);
1797 TEST_ASSERT_SUCCESS(status
);
1798 TEST_ASSERT(9 == utext_getNativeIndex(actual
)); /* index of " interior " within "noise abc interior def ... " */
1799 /* (within the string text1) */
1800 TEST_ASSERT(10 == groupLen
); /* length of " interior " */
1801 utext_extract(actual
, 9 /*start index*/, 9+10 /*limit index*/, groupBuf
, sizeof(groupBuf
), &status
);
1802 TEST_ASSERT_STRING(" interior ", groupBuf
, TRUE
);
1804 utext_close(actual
);
1806 /* Capture group out of range. Error. */
1807 status
= U_ZERO_ERROR
;
1808 actual
= uregex_groupUText(re
, 2, NULL
, &groupLen
, &status
);
1809 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
1810 utext_close(actual
);
1821 UText replText
= UTEXT_INITIALIZER
;
1823 const char str_Replxxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1824 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1825 const char str_u00411U00000042a
[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1826 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1827 const char str_1x
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1828 const char str_ReplaceAaaBax1xxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1829 status
= U_ZERO_ERROR
;
1830 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
1831 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1832 utext_openUTF8(&replText
, str_1x
, -1, &status
);
1834 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1835 TEST_ASSERT_SUCCESS(status
);
1837 /* Normal case, with match */
1838 uregex_setText(re
, text1
, -1, &status
);
1839 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1840 TEST_ASSERT_SUCCESS(status
);
1841 TEST_ASSERT_UTEXT(str_Replxxx
, result
);
1842 utext_close(result
);
1844 /* No match. Text should copy to output with no changes. */
1845 uregex_setText(re
, text2
, -1, &status
);
1846 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1847 TEST_ASSERT_SUCCESS(status
);
1848 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1849 utext_close(result
);
1851 /* Unicode escapes */
1852 uregex_setText(re
, text1
, -1, &status
);
1853 utext_openUTF8(&replText
, str_u00411U00000042a
, -1, &status
);
1854 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1855 TEST_ASSERT_SUCCESS(status
);
1856 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx
, result
);
1857 utext_close(result
);
1860 utext_close(&replText
);
1870 UText replText
= UTEXT_INITIALIZER
;
1872 const char str_1
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1873 const char str_Replaceaa1
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1874 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1875 status
= U_ZERO_ERROR
;
1876 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
1877 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1878 utext_openUTF8(&replText
, str_1
, -1, &status
);
1880 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1881 TEST_ASSERT_SUCCESS(status
);
1883 /* Normal case, with match */
1884 uregex_setText(re
, text1
, -1, &status
);
1885 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1886 TEST_ASSERT_SUCCESS(status
);
1887 TEST_ASSERT_UTEXT(str_Replaceaa1
, result
);
1888 utext_close(result
);
1890 /* No match. Text should copy to output with no changes. */
1891 uregex_setText(re
, text2
, -1, &status
);
1892 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1893 TEST_ASSERT_SUCCESS(status
);
1894 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1895 utext_close(result
);
1898 utext_close(&replText
);
1903 * appendReplacement()
1912 status
= U_ZERO_ERROR
;
1913 re
= uregex_openC(".*", 0, 0, &status
);
1914 TEST_ASSERT_SUCCESS(status
);
1916 u_uastrncpy(text
, "whatever", UPRV_LENGTHOF(text
));
1917 u_uastrncpy(repl
, "some other", UPRV_LENGTHOF(repl
));
1918 uregex_setText(re
, text
, -1, &status
);
1920 /* match covers whole target string */
1921 uregex_find(re
, 0, &status
);
1922 TEST_ASSERT_SUCCESS(status
);
1924 bufCap
= UPRV_LENGTHOF(buf
);
1925 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1926 TEST_ASSERT_SUCCESS(status
);
1927 TEST_ASSERT_STRING("some other", buf
, TRUE
);
1929 /* Match has \u \U escapes */
1930 uregex_find(re
, 0, &status
);
1931 TEST_ASSERT_SUCCESS(status
);
1933 bufCap
= UPRV_LENGTHOF(buf
);
1934 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl
));
1935 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1936 TEST_ASSERT_SUCCESS(status
);
1937 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
1944 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1951 UChar textToSplit
[80];
1957 u_uastrncpy(textToSplit
, "first : second: third", UPRV_LENGTHOF(textToSplit
));
1958 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1960 status
= U_ZERO_ERROR
;
1961 re
= uregex_openC(":", 0, NULL
, &status
);
1966 uregex_setText(re
, textToSplit
, -1, &status
);
1967 TEST_ASSERT_SUCCESS(status
);
1969 /* The TEST_ASSERT_SUCCESS call above should change too... */
1970 if (U_SUCCESS(status
)) {
1971 memset(fields
, 0, sizeof(fields
));
1972 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
1973 TEST_ASSERT_SUCCESS(status
);
1975 /* The TEST_ASSERT_SUCCESS call above should change too... */
1976 if(U_SUCCESS(status
)) {
1977 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1978 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1979 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1980 TEST_ASSERT(numFields
== 3);
1981 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
1982 TEST_ASSERT_UTEXT(str_second
, fields
[1]);
1983 TEST_ASSERT_UTEXT(str_third
, fields
[2]);
1984 TEST_ASSERT(fields
[3] == NULL
);
1986 for(i
= 0; i
< numFields
; i
++) {
1987 utext_close(fields
[i
]);
1994 /* Split with too few output strings available */
1995 status
= U_ZERO_ERROR
;
1996 re
= uregex_openC(":", 0, NULL
, &status
);
1997 uregex_setText(re
, textToSplit
, -1, &status
);
1998 TEST_ASSERT_SUCCESS(status
);
2000 /* The TEST_ASSERT_SUCCESS call above should change too... */
2001 if(U_SUCCESS(status
)) {
2004 fields
[2] = &patternText
;
2005 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
2006 TEST_ASSERT_SUCCESS(status
);
2008 /* The TEST_ASSERT_SUCCESS call above should change too... */
2009 if(U_SUCCESS(status
)) {
2010 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2011 const char str_secondthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2012 TEST_ASSERT(numFields
== 2);
2013 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2014 TEST_ASSERT_UTEXT(str_secondthird
, fields
[1]);
2015 TEST_ASSERT(fields
[2] == &patternText
);
2017 for(i
= 0; i
< numFields
; i
++) {
2018 utext_close(fields
[i
]);
2025 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2026 * comes out as additional fields. */
2028 UChar textToSplit
[80];
2033 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit
));
2035 status
= U_ZERO_ERROR
;
2036 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
2038 uregex_setText(re
, textToSplit
, -1, &status
);
2039 TEST_ASSERT_SUCCESS(status
);
2041 /* The TEST_ASSERT_SUCCESS call above should change too... */
2042 if(U_SUCCESS(status
)) {
2043 memset(fields
, 0, sizeof(fields
));
2044 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
2045 TEST_ASSERT_SUCCESS(status
);
2047 /* The TEST_ASSERT_SUCCESS call above should change too... */
2048 if(U_SUCCESS(status
)) {
2049 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2050 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2051 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2052 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2053 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2055 TEST_ASSERT(numFields
== 5);
2056 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2057 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2058 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2059 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2060 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2061 TEST_ASSERT(fields
[5] == NULL
);
2063 for(i
= 0; i
< numFields
; i
++) {
2064 utext_close(fields
[i
]);
2068 /* Split with too few output strings available (2) */
2069 status
= U_ZERO_ERROR
;
2072 fields
[2] = &patternText
;
2073 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
2074 TEST_ASSERT_SUCCESS(status
);
2076 /* The TEST_ASSERT_SUCCESS call above should change too... */
2077 if(U_SUCCESS(status
)) {
2078 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2079 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2080 TEST_ASSERT(numFields
== 2);
2081 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2082 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[1]);
2083 TEST_ASSERT(fields
[2] == &patternText
);
2085 for(i
= 0; i
< numFields
; i
++) {
2086 utext_close(fields
[i
]);
2090 /* Split with too few output strings available (3) */
2091 status
= U_ZERO_ERROR
;
2095 fields
[3] = &patternText
;
2096 numFields
= uregex_splitUText(re
, fields
, 3, &status
);
2097 TEST_ASSERT_SUCCESS(status
);
2099 /* The TEST_ASSERT_SUCCESS call above should change too... */
2100 if(U_SUCCESS(status
)) {
2101 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2102 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2103 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2104 TEST_ASSERT(numFields
== 3);
2105 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2106 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2107 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[2]);
2108 TEST_ASSERT(fields
[3] == &patternText
);
2110 for(i
= 0; i
< numFields
; i
++) {
2111 utext_close(fields
[i
]);
2114 /* Split with just enough output strings available (5) */
2115 status
= U_ZERO_ERROR
;
2121 fields
[5] = &patternText
;
2122 numFields
= uregex_splitUText(re
, fields
, 5, &status
);
2123 TEST_ASSERT_SUCCESS(status
);
2125 /* The TEST_ASSERT_SUCCESS call above should change too... */
2126 if(U_SUCCESS(status
)) {
2127 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2128 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2129 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2130 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2131 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2133 TEST_ASSERT(numFields
== 5);
2134 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2135 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2136 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2137 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2138 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2139 TEST_ASSERT(fields
[5] == &patternText
);
2141 for(i
= 0; i
< numFields
; i
++) {
2142 utext_close(fields
[i
]);
2145 /* Split, end of text is a field delimiter. */
2146 status
= U_ZERO_ERROR
;
2147 uregex_setText(re
, textToSplit
, strlen("first <tag-a> second<tag-b>"), &status
);
2148 TEST_ASSERT_SUCCESS(status
);
2150 /* The TEST_ASSERT_SUCCESS call above should change too... */
2151 if(U_SUCCESS(status
)) {
2152 memset(fields
, 0, sizeof(fields
));
2153 fields
[9] = &patternText
;
2154 numFields
= uregex_splitUText(re
, fields
, 9, &status
);
2155 TEST_ASSERT_SUCCESS(status
);
2157 /* The TEST_ASSERT_SUCCESS call above should change too... */
2158 if(U_SUCCESS(status
)) {
2159 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2160 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2161 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2162 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2163 const char str_empty
[] = { 0x00 };
2165 TEST_ASSERT(numFields
== 5);
2166 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2167 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2168 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2169 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2170 TEST_ASSERT_UTEXT(str_empty
, fields
[4]);
2171 TEST_ASSERT(fields
[5] == NULL
);
2172 TEST_ASSERT(fields
[8] == NULL
);
2173 TEST_ASSERT(fields
[9] == &patternText
);
2175 for(i
= 0; i
< numFields
; i
++) {
2176 utext_close(fields
[i
]);
2182 utext_close(&patternText
);
2186 static void TestRefreshInput(void) {
2188 * RefreshInput changes out the input of a URegularExpression without
2189 * changing anything else in the match state. Used with Java JNI,
2190 * when Java moves the underlying string storage. This test
2191 * runs a find() loop, moving the text after the first match.
2192 * The right number of matches should still be found.
2194 UChar testStr
[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2195 UChar movedStr
[] = { 0, 0, 0, 0, 0, 0};
2196 UErrorCode status
= U_ZERO_ERROR
;
2197 URegularExpression
*re
;
2198 UText ut1
= UTEXT_INITIALIZER
;
2199 UText ut2
= UTEXT_INITIALIZER
;
2201 re
= uregex_openC("[ABC]", 0, 0, &status
);
2202 TEST_ASSERT_SUCCESS(status
);
2204 utext_openUChars(&ut1
, testStr
, -1, &status
);
2205 TEST_ASSERT_SUCCESS(status
);
2206 uregex_setUText(re
, &ut1
, &status
);
2207 TEST_ASSERT_SUCCESS(status
);
2209 /* Find the first match "A" in the original string */
2210 TEST_ASSERT(uregex_findNext(re
, &status
));
2211 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
2213 /* Move the string, kill the original string. */
2214 u_strcpy(movedStr
, testStr
);
2215 u_memset(testStr
, 0, u_strlen(testStr
));
2216 utext_openUChars(&ut2
, movedStr
, -1, &status
);
2217 TEST_ASSERT_SUCCESS(status
);
2218 uregex_refreshUText(re
, &ut2
, &status
);
2219 TEST_ASSERT_SUCCESS(status
);
2221 /* Find the following two matches, now working in the moved string. */
2222 TEST_ASSERT(uregex_findNext(re
, &status
));
2223 TEST_ASSERT(uregex_start(re
, 0, &status
) == 2);
2224 TEST_ASSERT(uregex_findNext(re
, &status
));
2225 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
2226 TEST_ASSERT(FALSE
== uregex_findNext(re
, &status
));
2232 static void TestBug8421(void) {
2233 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2236 URegularExpression
*re
;
2237 UErrorCode status
= U_ZERO_ERROR
;
2240 re
= uregex_openC("abc", 0, 0, &status
);
2241 TEST_ASSERT_SUCCESS(status
);
2243 limit
= uregex_getTimeLimit(re
, &status
);
2244 TEST_ASSERT_SUCCESS(status
);
2245 TEST_ASSERT(limit
== 0);
2247 uregex_setTimeLimit(re
, 100, &status
);
2248 TEST_ASSERT_SUCCESS(status
);
2249 limit
= uregex_getTimeLimit(re
, &status
);
2250 TEST_ASSERT_SUCCESS(status
);
2251 TEST_ASSERT(limit
== 100);
2256 static UBool U_CALLCONV
FindCallback(const void* context
, int64_t matchIndex
) {
2260 static UBool U_CALLCONV
MatchCallback(const void *context
, int32_t steps
) {
2264 static void TestBug10815() {
2265 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2266 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2268 URegularExpression
*re
;
2269 UErrorCode status
= U_ZERO_ERROR
;
2273 // findNext() with a find progress callback function.
2275 re
= uregex_openC(".z", 0, 0, &status
);
2276 TEST_ASSERT_SUCCESS(status
);
2278 u_uastrncpy(text
, "Hello, World.", UPRV_LENGTHOF(text
));
2279 uregex_setText(re
, text
, -1, &status
);
2280 TEST_ASSERT_SUCCESS(status
);
2282 uregex_setFindProgressCallback(re
, FindCallback
, NULL
, &status
);
2283 TEST_ASSERT_SUCCESS(status
);
2285 uregex_findNext(re
, &status
);
2286 TEST_ASSERT(status
== U_REGEX_STOPPED_BY_CALLER
);
2290 // findNext() with a match progress callback function.
2292 status
= U_ZERO_ERROR
;
2293 re
= uregex_openC("((xxx)*)*y", 0, 0, &status
);
2294 TEST_ASSERT_SUCCESS(status
);
2296 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2297 // it will appear to be stuck in a (near) infinite loop.
2298 u_uastrncpy(text
, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text
));
2299 uregex_setText(re
, text
, -1, &status
);
2300 TEST_ASSERT_SUCCESS(status
);
2302 uregex_setMatchCallback(re
, MatchCallback
, NULL
, &status
);
2303 TEST_ASSERT_SUCCESS(status
);
2305 uregex_findNext(re
, &status
);
2306 TEST_ASSERT(status
== U_REGEX_STOPPED_BY_CALLER
);
2311 static const UChar startLinePattern
[] = { 0x5E, 0x78, 0 }; // "^x"
2313 static void TestMatchStartLineWithEmptyText() {
2314 UErrorCode status
= U_ZERO_ERROR
;
2315 UText
* ut
= utext_openUChars(NULL
, NULL
, 0, &status
);
2316 TEST_ASSERT_SUCCESS(status
);
2317 if (U_SUCCESS(status
)) {
2318 URegularExpression
*re
= uregex_open(startLinePattern
, -1, 0, NULL
, &status
);
2319 TEST_ASSERT_SUCCESS(status
);
2320 if (U_SUCCESS(status
)) {
2321 uregex_setUText(re
, ut
, &status
);
2322 TEST_ASSERT(U_SUCCESS(status
));
2323 if (U_SUCCESS(status
)) {
2324 UBool found
= uregex_findNext(re
, &status
);
2325 TEST_ASSERT(U_SUCCESS(status
) && !found
);
2333 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */