1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
19 * More complete testing of regular expression functionality is done with the C++ tests.
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
36 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
37 if (U_FAILURE(status)) { \
38 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); \
40 } UPRV_BLOCK_MACRO_END
42 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
43 if ((expr)==FALSE) { \
44 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr); \
46 } UPRV_BLOCK_MACRO_END
49 * TEST_SETUP and TEST_TEARDOWN
50 * macros to handle the boilerplate around setting up regex test cases.
51 * parameteres to setup:
52 * pattern: The regex pattern, a (char *) null terminated C string.
53 * testString: The string data, also a (char *) C string.
54 * flags: Regex flags to set when compiling the pattern
56 * Put arbitrary test code between SETUP and TEARDOWN.
57 * 're" is the compiled, ready-to-go regular expression.
59 #define TEST_SETUP(pattern, testString, flags) UPRV_BLOCK_MACRO_BEGIN { \
60 UChar *srcString = NULL; \
61 status = U_ZERO_ERROR; \
62 re = uregex_openC(pattern, flags, NULL, &status); \
63 TEST_ASSERT_SUCCESS(status); \
64 int32_t testStringLen = (int32_t)strlen(testString); \
65 srcString = (UChar *)malloc( (testStringLen + 2) * sizeof(UChar) ); \
66 u_uastrncpy(srcString, testString, testStringLen + 1); \
67 uregex_setText(re, srcString, -1, &status); \
68 TEST_ASSERT_SUCCESS(status); \
69 if (U_SUCCESS(status)) { \
70 UPRV_BLOCK_MACRO_BEGIN {} UPRV_BLOCK_MACRO_END
72 #define TEST_TEARDOWN \
74 TEST_ASSERT_SUCCESS(status); \
77 } UPRV_BLOCK_MACRO_END
81 * @param expected utf-8 array of bytes to be expected
83 static void test_assert_string(const char *expected
, const UChar
*actual
, UBool nulTerm
, const char *file
, int line
) {
84 char buf_inside_macro
[120];
85 int32_t len
= (int32_t)strlen(expected
);
88 u_austrncpy(buf_inside_macro
, (actual
), len
+1);
89 buf_inside_macro
[len
+2] = 0;
90 success
= (strcmp((expected
), buf_inside_macro
) == 0);
92 u_austrncpy(buf_inside_macro
, (actual
), len
);
93 buf_inside_macro
[len
+1] = 0;
94 success
= (strncmp((expected
), buf_inside_macro
, len
) == 0);
96 if (success
== FALSE
) {
97 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
98 file
, line
, (expected
), buf_inside_macro
);
102 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
105 static UBool
equals_utf8_utext(const char *utf8
, UText
*utext
) {
109 UBool stringsEqual
= TRUE
;
110 utext_setNativeIndex(utext
, 0);
112 U8_NEXT_UNSAFE(utf8
, u8i
, u8c
);
113 utc
= utext_next32(utext
);
114 if (u8c
== 0 && utc
== U_SENTINEL
) {
117 if (u8c
!= utc
|| u8c
== 0) {
118 stringsEqual
= FALSE
;
126 static void test_assert_utext(const char *expected
, UText
*actual
, const char *file
, int line
) {
127 utext_setNativeIndex(actual
, 0);
128 if (!equals_utf8_utext(expected
, actual
)) {
130 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file
, line
, expected
);
131 c
= utext_next32From(actual
, 0);
132 while (c
!= U_SENTINEL
) {
133 if (0x20<c
&& c
<0x7e) {
138 c
= UTEXT_NEXT32(actual
);
145 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
146 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
148 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
150 static UBool
testUTextEqual(UText
*uta
, UText
*utb
) {
153 utext_setNativeIndex(uta
, 0);
154 utext_setNativeIndex(utb
, 0);
156 ca
= utext_next32(uta
);
157 cb
= utext_next32(utb
);
161 } while (ca
!= U_SENTINEL
);
168 static void TestRegexCAPI(void);
169 static void TestBug4315(void);
170 static void TestUTextAPI(void);
171 static void TestRefreshInput(void);
172 static void TestBug8421(void);
173 static void TestBug10815(void);
174 static void TestMatchStartLineWithEmptyText(void);
176 void addURegexTest(TestNode
** root
);
178 void addURegexTest(TestNode
** root
)
180 addTest(root
, &TestRegexCAPI
, "regex/TestRegexCAPI");
181 addTest(root
, &TestBug4315
, "regex/TestBug4315");
182 addTest(root
, &TestUTextAPI
, "regex/TestUTextAPI");
183 addTest(root
, &TestRefreshInput
, "regex/TestRefreshInput");
184 addTest(root
, &TestBug8421
, "regex/TestBug8421");
185 addTest(root
, &TestBug10815
, "regex/TestBug10815");
186 addTest(root
, &TestMatchStartLineWithEmptyText
, "regex/TestMatchStartLineWithEmptyText");
190 * Call back function and context struct used for testing
191 * regular expression user callbacks. This test is mostly the same as
192 * the corresponding C++ test in intltest.
194 typedef struct callBackContext
{
200 static UBool U_EXPORT2 U_CALLCONV
201 TestCallbackFn(const void *context
, int32_t steps
) {
202 callBackContext
*info
= (callBackContext
*)context
;
203 if (info
->lastSteps
+1 != steps
) {
204 log_err("incorrect steps in callback. Expected %d, got %d\n", info
->lastSteps
+1, steps
);
206 info
->lastSteps
= steps
;
208 return (info
->numCalls
< info
->maxCalls
);
212 * Regular Expression C API Tests
214 static void TestRegexCAPI(void) {
215 UErrorCode status
= U_ZERO_ERROR
;
216 URegularExpression
*re
;
220 memset(&minus1
, -1, sizeof(minus1
));
222 /* Mimimalist open/close */
223 u_uastrncpy(pat
, "abc*", UPRV_LENGTHOF(pat
));
224 re
= uregex_open(pat
, -1, 0, 0, &status
);
225 if (U_FAILURE(status
)) {
226 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
231 /* Open with all flag values set */
232 status
= U_ZERO_ERROR
;
233 re
= uregex_open(pat
, -1,
234 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
| UREGEX_LITERAL
,
236 TEST_ASSERT_SUCCESS(status
);
239 /* Open with an invalid flag */
240 status
= U_ZERO_ERROR
;
241 re
= uregex_open(pat
, -1, 0x40000000, 0, &status
);
242 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
245 /* Open with an unimplemented flag */
246 status
= U_ZERO_ERROR
;
247 re
= uregex_open(pat
, -1, UREGEX_CANON_EQ
, 0, &status
);
248 TEST_ASSERT(status
== U_REGEX_UNIMPLEMENTED
);
251 /* openC with an invalid parameter */
252 status
= U_ZERO_ERROR
;
253 re
= uregex_openC(NULL
,
254 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
255 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
257 /* openC with an invalid parameter */
258 status
= U_USELESS_COLLATOR_ERROR
;
259 re
= uregex_openC(NULL
,
260 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
261 TEST_ASSERT(status
== U_USELESS_COLLATOR_ERROR
&& re
== NULL
);
263 /* openC open from a C string */
267 status
= U_ZERO_ERROR
;
268 re
= uregex_openC("abc*", 0, 0, &status
);
269 TEST_ASSERT_SUCCESS(status
);
270 p
= uregex_pattern(re
, &len
, &status
);
271 TEST_ASSERT_SUCCESS(status
);
273 /* The TEST_ASSERT_SUCCESS above should change too... */
274 if(U_SUCCESS(status
)) {
275 u_uastrncpy(pat
, "abc*", UPRV_LENGTHOF(pat
));
276 TEST_ASSERT(u_strcmp(pat
, p
) == 0);
277 TEST_ASSERT(len
==(int32_t)strlen("abc*"));
282 /* TODO: Open with ParseError parameter */
289 URegularExpression
*clone1
;
290 URegularExpression
*clone2
;
291 URegularExpression
*clone3
;
292 UChar testString1
[30];
293 UChar testString2
[30];
297 status
= U_ZERO_ERROR
;
298 re
= uregex_openC("abc*", 0, 0, &status
);
299 TEST_ASSERT_SUCCESS(status
);
300 clone1
= uregex_clone(re
, &status
);
301 TEST_ASSERT_SUCCESS(status
);
302 TEST_ASSERT(clone1
!= NULL
);
304 status
= U_ZERO_ERROR
;
305 clone2
= uregex_clone(re
, &status
);
306 TEST_ASSERT_SUCCESS(status
);
307 TEST_ASSERT(clone2
!= NULL
);
310 status
= U_ZERO_ERROR
;
311 clone3
= uregex_clone(clone2
, &status
);
312 TEST_ASSERT_SUCCESS(status
);
313 TEST_ASSERT(clone3
!= NULL
);
315 u_uastrncpy(testString1
, "abcccd", UPRV_LENGTHOF(pat
));
316 u_uastrncpy(testString2
, "xxxabcccd", UPRV_LENGTHOF(pat
));
318 status
= U_ZERO_ERROR
;
319 uregex_setText(clone1
, testString1
, -1, &status
);
320 TEST_ASSERT_SUCCESS(status
);
321 result
= uregex_lookingAt(clone1
, 0, &status
);
322 TEST_ASSERT_SUCCESS(status
);
323 TEST_ASSERT(result
==TRUE
);
325 status
= U_ZERO_ERROR
;
326 uregex_setText(clone2
, testString2
, -1, &status
);
327 TEST_ASSERT_SUCCESS(status
);
328 result
= uregex_lookingAt(clone2
, 0, &status
);
329 TEST_ASSERT_SUCCESS(status
);
330 TEST_ASSERT(result
==FALSE
);
331 result
= uregex_find(clone2
, 0, &status
);
332 TEST_ASSERT_SUCCESS(status
);
333 TEST_ASSERT(result
==TRUE
);
335 uregex_close(clone1
);
336 uregex_close(clone2
);
337 uregex_close(clone3
);
345 const UChar
*resultPat
;
347 u_uastrncpy(pat
, "hello", UPRV_LENGTHOF(pat
));
348 status
= U_ZERO_ERROR
;
349 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
350 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
351 TEST_ASSERT_SUCCESS(status
);
353 /* The TEST_ASSERT_SUCCESS above should change too... */
354 if (U_SUCCESS(status
)) {
355 TEST_ASSERT(resultLen
== -1);
356 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
361 status
= U_ZERO_ERROR
;
362 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
363 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
364 TEST_ASSERT_SUCCESS(status
);
365 TEST_ASSERT_SUCCESS(status
);
367 /* The TEST_ASSERT_SUCCESS above should change too... */
368 if (U_SUCCESS(status
)) {
369 TEST_ASSERT(resultLen
== 3);
370 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
371 TEST_ASSERT(u_strlen(resultPat
) == 3);
383 status
= U_ZERO_ERROR
;
384 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
385 t
= uregex_flags(re
, &status
);
386 TEST_ASSERT_SUCCESS(status
);
390 status
= U_ZERO_ERROR
;
391 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
392 t
= uregex_flags(re
, &status
);
393 TEST_ASSERT_SUCCESS(status
);
397 status
= U_ZERO_ERROR
;
398 re
= uregex_open(pat
, -1, UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
, NULL
, &status
);
399 t
= uregex_flags(re
, &status
);
400 TEST_ASSERT_SUCCESS(status
);
401 TEST_ASSERT(t
== (UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
));
406 * setText() and lookingAt()
413 u_uastrncpy(text1
, "abcccd", UPRV_LENGTHOF(text1
));
414 u_uastrncpy(text2
, "abcccxd", UPRV_LENGTHOF(text2
));
415 status
= U_ZERO_ERROR
;
416 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
417 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
418 TEST_ASSERT_SUCCESS(status
);
420 /* Operation before doing a setText should fail... */
421 status
= U_ZERO_ERROR
;
422 uregex_lookingAt(re
, 0, &status
);
423 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
425 status
= U_ZERO_ERROR
;
426 uregex_setText(re
, text1
, -1, &status
);
427 result
= uregex_lookingAt(re
, 0, &status
);
428 TEST_ASSERT(result
== TRUE
);
429 TEST_ASSERT_SUCCESS(status
);
431 status
= U_ZERO_ERROR
;
432 uregex_setText(re
, text2
, -1, &status
);
433 result
= uregex_lookingAt(re
, 0, &status
);
434 TEST_ASSERT(result
== FALSE
);
435 TEST_ASSERT_SUCCESS(status
);
437 status
= U_ZERO_ERROR
;
438 uregex_setText(re
, text1
, -1, &status
);
439 result
= uregex_lookingAt(re
, 0, &status
);
440 TEST_ASSERT(result
== TRUE
);
441 TEST_ASSERT_SUCCESS(status
);
443 status
= U_ZERO_ERROR
;
444 uregex_setText(re
, text1
, 5, &status
);
445 result
= uregex_lookingAt(re
, 0, &status
);
446 TEST_ASSERT(result
== FALSE
);
447 TEST_ASSERT_SUCCESS(status
);
449 status
= U_ZERO_ERROR
;
450 uregex_setText(re
, text1
, 6, &status
);
451 result
= uregex_lookingAt(re
, 0, &status
);
452 TEST_ASSERT(result
== TRUE
);
453 TEST_ASSERT_SUCCESS(status
);
468 u_uastrncpy(text1
, "abcccd", UPRV_LENGTHOF(text1
));
469 u_uastrncpy(text2
, "abcccxd", UPRV_LENGTHOF(text2
));
470 status
= U_ZERO_ERROR
;
471 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
472 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
474 uregex_setText(re
, text1
, -1, &status
);
475 result
= uregex_getText(re
, &textLength
, &status
);
476 TEST_ASSERT(result
== text1
);
477 TEST_ASSERT(textLength
== -1);
478 TEST_ASSERT_SUCCESS(status
);
480 status
= U_ZERO_ERROR
;
481 uregex_setText(re
, text2
, 7, &status
);
482 result
= uregex_getText(re
, &textLength
, &status
);
483 TEST_ASSERT(result
== text2
);
484 TEST_ASSERT(textLength
== 7);
485 TEST_ASSERT_SUCCESS(status
);
487 status
= U_ZERO_ERROR
;
488 uregex_setText(re
, text2
, 4, &status
);
489 result
= uregex_getText(re
, &textLength
, &status
);
490 TEST_ASSERT(result
== text2
);
491 TEST_ASSERT(textLength
== 4);
492 TEST_ASSERT_SUCCESS(status
);
503 UChar nullString
[] = {0,0,0};
505 u_uastrncpy(text1
, "abcccde", UPRV_LENGTHOF(text1
));
506 status
= U_ZERO_ERROR
;
507 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
508 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
510 uregex_setText(re
, text1
, -1, &status
);
511 result
= uregex_matches(re
, 0, &status
);
512 TEST_ASSERT(result
== FALSE
);
513 TEST_ASSERT_SUCCESS(status
);
515 status
= U_ZERO_ERROR
;
516 uregex_setText(re
, text1
, 6, &status
);
517 result
= uregex_matches(re
, 0, &status
);
518 TEST_ASSERT(result
== TRUE
);
519 TEST_ASSERT_SUCCESS(status
);
521 status
= U_ZERO_ERROR
;
522 uregex_setText(re
, text1
, 6, &status
);
523 result
= uregex_matches(re
, 1, &status
);
524 TEST_ASSERT(result
== FALSE
);
525 TEST_ASSERT_SUCCESS(status
);
528 status
= U_ZERO_ERROR
;
529 re
= uregex_openC(".?", 0, NULL
, &status
);
530 uregex_setText(re
, text1
, -1, &status
);
531 len
= u_strlen(text1
);
532 result
= uregex_matches(re
, len
, &status
);
533 TEST_ASSERT(result
== TRUE
);
534 TEST_ASSERT_SUCCESS(status
);
536 status
= U_ZERO_ERROR
;
537 uregex_setText(re
, nullString
, -1, &status
);
538 TEST_ASSERT_SUCCESS(status
);
539 result
= uregex_matches(re
, 0, &status
);
540 TEST_ASSERT(result
== TRUE
);
541 TEST_ASSERT_SUCCESS(status
);
547 * lookingAt() Used in setText test.
552 * find(), findNext, start, end, reset
557 u_uastrncpy(text1
, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1
));
558 status
= U_ZERO_ERROR
;
559 re
= uregex_openC("rx", 0, NULL
, &status
);
561 uregex_setText(re
, text1
, -1, &status
);
562 result
= uregex_find(re
, 0, &status
);
563 TEST_ASSERT(result
== TRUE
);
564 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
565 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
566 TEST_ASSERT_SUCCESS(status
);
568 result
= uregex_find(re
, 9, &status
);
569 TEST_ASSERT(result
== TRUE
);
570 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
571 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
572 TEST_ASSERT_SUCCESS(status
);
574 result
= uregex_find(re
, 14, &status
);
575 TEST_ASSERT(result
== FALSE
);
576 TEST_ASSERT_SUCCESS(status
);
578 status
= U_ZERO_ERROR
;
579 uregex_reset(re
, 0, &status
);
581 result
= uregex_findNext(re
, &status
);
582 TEST_ASSERT(result
== TRUE
);
583 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
584 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
585 TEST_ASSERT_SUCCESS(status
);
587 result
= uregex_findNext(re
, &status
);
588 TEST_ASSERT(result
== TRUE
);
589 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
590 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
591 TEST_ASSERT_SUCCESS(status
);
593 status
= U_ZERO_ERROR
;
594 uregex_reset(re
, 12, &status
);
596 result
= uregex_findNext(re
, &status
);
597 TEST_ASSERT(result
== TRUE
);
598 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
599 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
600 TEST_ASSERT_SUCCESS(status
);
602 result
= uregex_findNext(re
, &status
);
603 TEST_ASSERT(result
== FALSE
);
604 TEST_ASSERT_SUCCESS(status
);
615 status
= U_ZERO_ERROR
;
616 re
= uregex_openC("abc", 0, NULL
, &status
);
617 result
= uregex_groupCount(re
, &status
);
618 TEST_ASSERT_SUCCESS(status
);
619 TEST_ASSERT(result
== 0);
622 status
= U_ZERO_ERROR
;
623 re
= uregex_openC("abc(def)(ghi(j))", 0, NULL
, &status
);
624 result
= uregex_groupCount(re
, &status
);
625 TEST_ASSERT_SUCCESS(status
);
626 TEST_ASSERT(result
== 3);
640 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1
));
642 status
= U_ZERO_ERROR
;
643 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
644 TEST_ASSERT_SUCCESS(status
);
647 uregex_setText(re
, text1
, -1, &status
);
648 result
= uregex_find(re
, 0, &status
);
649 TEST_ASSERT(result
==TRUE
);
651 /* Capture Group 0, the full match. Should succeed. */
652 status
= U_ZERO_ERROR
;
653 resultSz
= uregex_group(re
, 0, buf
, UPRV_LENGTHOF(buf
), &status
);
654 TEST_ASSERT_SUCCESS(status
);
655 TEST_ASSERT_STRING("abc interior def", buf
, TRUE
);
656 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
658 /* Capture group #1. Should succeed. */
659 status
= U_ZERO_ERROR
;
660 resultSz
= uregex_group(re
, 1, buf
, UPRV_LENGTHOF(buf
), &status
);
661 TEST_ASSERT_SUCCESS(status
);
662 TEST_ASSERT_STRING(" interior ", buf
, TRUE
);
663 TEST_ASSERT(resultSz
== (int32_t)strlen(" interior "));
665 /* Capture group out of range. Error. */
666 status
= U_ZERO_ERROR
;
667 uregex_group(re
, 2, buf
, UPRV_LENGTHOF(buf
), &status
);
668 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
670 /* NULL buffer, pure pre-flight */
671 status
= U_ZERO_ERROR
;
672 resultSz
= uregex_group(re
, 0, NULL
, 0, &status
);
673 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
674 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
676 /* Too small buffer, truncated string */
677 status
= U_ZERO_ERROR
;
678 memset(buf
, -1, sizeof(buf
));
679 resultSz
= uregex_group(re
, 0, buf
, 5, &status
);
680 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
681 TEST_ASSERT_STRING("abc i", buf
, FALSE
);
682 TEST_ASSERT(buf
[5] == (UChar
)0xffff);
683 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
685 /* Output string just fits buffer, no NUL term. */
686 status
= U_ZERO_ERROR
;
687 resultSz
= uregex_group(re
, 0, buf
, (int32_t)strlen("abc interior def"), &status
);
688 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
689 TEST_ASSERT_STRING("abc interior def", buf
, FALSE
);
690 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
691 TEST_ASSERT(buf
[strlen("abc interior def")] == (UChar
)0xffff);
702 /* SetRegion(), getRegion() do something */
703 TEST_SETUP(".*", "0123456789ABCDEF", 0);
704 UChar resultString
[40];
705 TEST_ASSERT(uregex_regionStart(re
, &status
) == 0);
706 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 16);
707 uregex_setRegion(re
, 3, 6, &status
);
708 TEST_ASSERT(uregex_regionStart(re
, &status
) == 3);
709 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 6);
710 TEST_ASSERT(uregex_findNext(re
, &status
));
711 TEST_ASSERT(uregex_group(re
, 0, resultString
, UPRV_LENGTHOF(resultString
), &status
) == 3);
712 TEST_ASSERT_STRING("345", resultString
, TRUE
);
715 /* find(start=-1) uses regions */
716 TEST_SETUP(".*", "0123456789ABCDEF", 0);
717 uregex_setRegion(re
, 4, 6, &status
);
718 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
719 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
720 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
723 /* find (start >=0) does not use regions */
724 TEST_SETUP(".*", "0123456789ABCDEF", 0);
725 uregex_setRegion(re
, 4, 6, &status
);
726 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
727 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
728 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
731 /* findNext() obeys regions */
732 TEST_SETUP(".", "0123456789ABCDEF", 0);
733 uregex_setRegion(re
, 4, 6, &status
);
734 TEST_ASSERT(uregex_findNext(re
,&status
) == TRUE
);
735 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
736 TEST_ASSERT(uregex_findNext(re
, &status
) == TRUE
);
737 TEST_ASSERT(uregex_start(re
, 0, &status
) == 5);
738 TEST_ASSERT(uregex_findNext(re
, &status
) == FALSE
);
741 /* matches(start=-1) uses regions */
742 /* Also, verify that non-greedy *? succeeds in finding the full match. */
743 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
744 uregex_setRegion(re
, 4, 6, &status
);
745 TEST_ASSERT(uregex_matches(re
, -1, &status
) == TRUE
);
746 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
747 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
750 /* matches (start >=0) does not use regions */
751 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
752 uregex_setRegion(re
, 4, 6, &status
);
753 TEST_ASSERT(uregex_matches(re
, 0, &status
) == TRUE
);
754 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
755 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
758 /* lookingAt(start=-1) uses regions */
759 /* Also, verify that non-greedy *? finds the first (shortest) match. */
760 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
761 uregex_setRegion(re
, 4, 6, &status
);
762 TEST_ASSERT(uregex_lookingAt(re
, -1, &status
) == TRUE
);
763 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
764 TEST_ASSERT(uregex_end(re
, 0, &status
) == 4);
767 /* lookingAt (start >=0) does not use regions */
768 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
769 uregex_setRegion(re
, 4, 6, &status
);
770 TEST_ASSERT(uregex_lookingAt(re
, 0, &status
) == TRUE
);
771 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
772 TEST_ASSERT(uregex_end(re
, 0, &status
) == 0);
776 TEST_SETUP("[a-f]*", "abcdefghij", 0);
777 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
778 TEST_ASSERT(uregex_hitEnd(re
, &status
) == FALSE
);
781 TEST_SETUP("[a-f]*", "abcdef", 0);
782 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
783 TEST_ASSERT(uregex_hitEnd(re
, &status
) == TRUE
);
787 TEST_SETUP("abcd", "abcd", 0);
788 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
789 TEST_ASSERT(uregex_requireEnd(re
, &status
) == FALSE
);
792 TEST_SETUP("abcd$", "abcd", 0);
793 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
794 TEST_ASSERT(uregex_requireEnd(re
, &status
) == TRUE
);
797 /* anchoringBounds */
798 TEST_SETUP("abc$", "abcdef", 0);
799 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == TRUE
);
800 uregex_useAnchoringBounds(re
, FALSE
, &status
);
801 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == FALSE
);
803 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
);
804 uregex_useAnchoringBounds(re
, TRUE
, &status
);
805 uregex_setRegion(re
, 0, 3, &status
);
806 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
807 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
810 /* Transparent Bounds */
811 TEST_SETUP("abc(?=def)", "abcdef", 0);
812 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == FALSE
);
813 uregex_useTransparentBounds(re
, TRUE
, &status
);
814 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == TRUE
);
816 uregex_useTransparentBounds(re
, FALSE
, &status
);
817 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* No Region */
818 uregex_setRegion(re
, 0, 3, &status
);
819 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
); /* with region, opaque bounds */
820 uregex_useTransparentBounds(re
, TRUE
, &status
);
821 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* with region, transparent bounds */
822 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
835 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
836 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
837 u_uastrncpy(replText
, "<$1>", UPRV_LENGTHOF(replText
));
839 status
= U_ZERO_ERROR
;
840 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
841 TEST_ASSERT_SUCCESS(status
);
843 /* Normal case, with match */
844 uregex_setText(re
, text1
, -1, &status
);
845 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
846 TEST_ASSERT_SUCCESS(status
);
847 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, TRUE
);
848 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
850 /* No match. Text should copy to output with no changes. */
851 status
= U_ZERO_ERROR
;
852 uregex_setText(re
, text2
, -1, &status
);
853 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
854 TEST_ASSERT_SUCCESS(status
);
855 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
856 TEST_ASSERT(resultSz
== (int32_t)strlen("No match here."));
858 /* Match, output just fills buffer, no termination warning. */
859 status
= U_ZERO_ERROR
;
860 uregex_setText(re
, text1
, -1, &status
);
861 memset(buf
, -1, sizeof(buf
));
862 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, (int32_t)strlen("Replace <aa> x1x x...x."), &status
);
863 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
864 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
865 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
866 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
868 /* Do the replaceFirst again, without first resetting anything.
869 * Should give the same results.
871 status
= U_ZERO_ERROR
;
872 memset(buf
, -1, sizeof(buf
));
873 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, (int32_t)strlen("Replace <aa> x1x x...x."), &status
);
874 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
875 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
876 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
877 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
879 /* NULL buffer, zero buffer length */
880 status
= U_ZERO_ERROR
;
881 resultSz
= uregex_replaceFirst(re
, replText
, -1, NULL
, 0, &status
);
882 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
883 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
885 /* Buffer too small by one */
886 status
= U_ZERO_ERROR
;
887 memset(buf
, -1, sizeof(buf
));
888 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, (int32_t)strlen("Replace <aa> x1x x...x.")-1, &status
);
889 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
890 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf
, FALSE
);
891 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
892 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
902 UChar text1
[80]; /* "Replace xaax x1x x...x." */
903 UChar text2
[80]; /* "No match Here" */
904 UChar replText
[80]; /* "<$1>" */
905 UChar replText2
[80]; /* "<<$1>>" */
906 const char * pattern
= "x(.*?)x";
907 const char * expectedResult
= "Replace <aa> <1> <...>.";
908 const char * expectedResult2
= "Replace <<aa>> <<1>> <<...>>.";
911 int32_t expectedResultSize
;
912 int32_t expectedResultSize2
;
915 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
916 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
917 u_uastrncpy(replText
, "<$1>", UPRV_LENGTHOF(replText
));
918 u_uastrncpy(replText2
, "<<$1>>", UPRV_LENGTHOF(replText2
));
919 expectedResultSize
= (int32_t)strlen(expectedResult
);
920 expectedResultSize2
= (int32_t)strlen(expectedResult2
);
922 status
= U_ZERO_ERROR
;
923 re
= uregex_openC(pattern
, 0, NULL
, &status
);
924 TEST_ASSERT_SUCCESS(status
);
926 /* Normal case, with match */
927 uregex_setText(re
, text1
, -1, &status
);
928 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
929 TEST_ASSERT_SUCCESS(status
);
930 TEST_ASSERT_STRING(expectedResult
, buf
, TRUE
);
931 TEST_ASSERT(resultSize
== expectedResultSize
);
933 /* No match. Text should copy to output with no changes. */
934 status
= U_ZERO_ERROR
;
935 uregex_setText(re
, text2
, -1, &status
);
936 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
937 TEST_ASSERT_SUCCESS(status
);
938 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
939 TEST_ASSERT(resultSize
== u_strlen(text2
));
941 /* Match, output just fills buffer, no termination warning. */
942 status
= U_ZERO_ERROR
;
943 uregex_setText(re
, text1
, -1, &status
);
944 memset(buf
, -1, sizeof(buf
));
945 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, expectedResultSize
, &status
);
946 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
947 TEST_ASSERT_STRING(expectedResult
, buf
, FALSE
);
948 TEST_ASSERT(resultSize
== expectedResultSize
);
949 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
951 /* Do the replaceFirst again, without first resetting anything.
952 * Should give the same results.
954 status
= U_ZERO_ERROR
;
955 memset(buf
, -1, sizeof(buf
));
956 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, (int32_t)strlen("Replace xaax x1x x...x."), &status
);
957 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
958 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf
, FALSE
);
959 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
960 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
962 /* NULL buffer, zero buffer length */
963 status
= U_ZERO_ERROR
;
964 resultSize
= uregex_replaceAll(re
, replText
, -1, NULL
, 0, &status
);
965 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
966 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
968 /* Buffer too small. Try every size, which will tickle edge cases
969 * in uregex_appendReplacement (used by replaceAll) */
970 for (i
=0; i
<expectedResultSize
; i
++) {
972 status
= U_ZERO_ERROR
;
973 memset(buf
, -1, sizeof(buf
));
974 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, i
, &status
);
975 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
976 strcpy(expected
, expectedResult
);
978 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
979 TEST_ASSERT(resultSize
== expectedResultSize
);
980 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
983 /* Buffer too small. Same as previous test, except this time the replacement
984 * text is longer than the match capture group, making the length of the complete
985 * replacement longer than the original string.
987 for (i
=0; i
<expectedResultSize2
; i
++) {
989 status
= U_ZERO_ERROR
;
990 memset(buf
, -1, sizeof(buf
));
991 resultSize
= uregex_replaceAll(re
, replText2
, -1, buf
, i
, &status
);
992 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
993 strcpy(expected
, expectedResult2
);
995 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
996 TEST_ASSERT(resultSize
== expectedResultSize2
);
997 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
1006 * appendReplacement()
1016 status
= U_ZERO_ERROR
;
1017 re
= uregex_openC(".*", 0, 0, &status
);
1018 TEST_ASSERT_SUCCESS(status
);
1020 u_uastrncpy(text
, "whatever", UPRV_LENGTHOF(text
));
1021 u_uastrncpy(repl
, "some other", UPRV_LENGTHOF(repl
));
1022 uregex_setText(re
, text
, -1, &status
);
1024 /* match covers whole target string */
1025 uregex_find(re
, 0, &status
);
1026 TEST_ASSERT_SUCCESS(status
);
1028 bufCap
= UPRV_LENGTHOF(buf
);
1029 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1030 TEST_ASSERT_SUCCESS(status
);
1031 TEST_ASSERT_STRING("some other", buf
, TRUE
);
1033 /* Match has \u \U escapes */
1034 uregex_find(re
, 0, &status
);
1035 TEST_ASSERT_SUCCESS(status
);
1037 bufCap
= UPRV_LENGTHOF(buf
);
1038 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl
));
1039 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1040 TEST_ASSERT_SUCCESS(status
);
1041 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
1043 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1044 status
= U_ZERO_ERROR
;
1045 uregex_find(re
, 0, &status
);
1046 TEST_ASSERT_SUCCESS(status
);
1048 status
= U_BUFFER_OVERFLOW_ERROR
;
1049 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, NULL
, &status
);
1050 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1057 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1064 UChar textToSplit
[80];
1069 int32_t requiredCapacity
;
1070 int32_t spaceNeeded
;
1073 u_uastrncpy(textToSplit
, "first : second: third", UPRV_LENGTHOF(textToSplit
));
1074 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1076 status
= U_ZERO_ERROR
;
1077 re
= uregex_openC(":", 0, NULL
, &status
);
1082 uregex_setText(re
, textToSplit
, -1, &status
);
1083 TEST_ASSERT_SUCCESS(status
);
1085 /* The TEST_ASSERT_SUCCESS call above should change too... */
1086 if (U_SUCCESS(status
)) {
1087 memset(fields
, -1, sizeof(fields
));
1089 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 10, &status
);
1090 TEST_ASSERT_SUCCESS(status
);
1092 /* The TEST_ASSERT_SUCCESS call above should change too... */
1093 if(U_SUCCESS(status
)) {
1094 TEST_ASSERT(numFields
== 3);
1095 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1096 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1097 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1098 TEST_ASSERT(fields
[3] == NULL
);
1100 spaceNeeded
= u_strlen(textToSplit
) -
1101 (numFields
- 1) + /* Field delimiters do not appear in output */
1102 numFields
; /* Each field gets a NUL terminator */
1104 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1111 /* Split with too few output strings available */
1112 status
= U_ZERO_ERROR
;
1113 re
= uregex_openC(":", 0, NULL
, &status
);
1114 uregex_setText(re
, textToSplit
, -1, &status
);
1115 TEST_ASSERT_SUCCESS(status
);
1117 /* The TEST_ASSERT_SUCCESS call above should change too... */
1118 if(U_SUCCESS(status
)) {
1119 memset(fields
, -1, sizeof(fields
));
1121 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 2, &status
);
1122 TEST_ASSERT_SUCCESS(status
);
1124 /* The TEST_ASSERT_SUCCESS call above should change too... */
1125 if(U_SUCCESS(status
)) {
1126 TEST_ASSERT(numFields
== 2);
1127 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1128 TEST_ASSERT_STRING(" second: third", fields
[1], TRUE
);
1129 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1131 spaceNeeded
= u_strlen(textToSplit
) -
1132 (numFields
- 1) + /* Field delimiters do not appear in output */
1133 numFields
; /* Each field gets a NUL terminator */
1135 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1137 /* Split with a range of output buffer sizes. */
1138 spaceNeeded
= u_strlen(textToSplit
) -
1139 (numFields
- 1) + /* Field delimiters do not appear in output */
1140 numFields
; /* Each field gets a NUL terminator */
1142 for (sz
=0; sz
< spaceNeeded
+1; sz
++) {
1143 memset(fields
, -1, sizeof(fields
));
1144 status
= U_ZERO_ERROR
;
1146 uregex_split(re
, buf
, sz
, &requiredCapacity
, fields
, 10, &status
);
1147 if (sz
>= spaceNeeded
) {
1148 TEST_ASSERT_SUCCESS(status
);
1149 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1150 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1151 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1153 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1155 TEST_ASSERT(numFields
== 3);
1156 TEST_ASSERT(fields
[3] == NULL
);
1157 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1168 /* Split(), part 2. Patterns with capture groups. The capture group text
1169 * comes out as additional fields. */
1171 UChar textToSplit
[80];
1175 int32_t requiredCapacity
;
1176 int32_t spaceNeeded
;
1179 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit
));
1181 status
= U_ZERO_ERROR
;
1182 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
1184 uregex_setText(re
, textToSplit
, -1, &status
);
1185 TEST_ASSERT_SUCCESS(status
);
1187 /* The TEST_ASSERT_SUCCESS call above should change too... */
1188 if(U_SUCCESS(status
)) {
1189 memset(fields
, -1, sizeof(fields
));
1191 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 10, &status
);
1192 TEST_ASSERT_SUCCESS(status
);
1194 /* The TEST_ASSERT_SUCCESS call above should change too... */
1195 if(U_SUCCESS(status
)) {
1196 TEST_ASSERT(numFields
== 5);
1197 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1198 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1199 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1200 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1201 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1202 TEST_ASSERT(fields
[5] == NULL
);
1203 spaceNeeded
= (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1204 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1208 /* Split with too few output strings available (2) */
1209 status
= U_ZERO_ERROR
;
1210 memset(fields
, -1, sizeof(fields
));
1212 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 2, &status
);
1213 TEST_ASSERT_SUCCESS(status
);
1215 /* The TEST_ASSERT_SUCCESS call above should change too... */
1216 if(U_SUCCESS(status
)) {
1217 TEST_ASSERT(numFields
== 2);
1218 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1219 TEST_ASSERT_STRING(" second<tag-b> third", fields
[1], TRUE
);
1220 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1222 spaceNeeded
= (int32_t)strlen("first . second<tag-b> third."); /* "." at NUL positions */
1223 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1226 /* Split with too few output strings available (3) */
1227 status
= U_ZERO_ERROR
;
1228 memset(fields
, -1, sizeof(fields
));
1230 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 3, &status
);
1231 TEST_ASSERT_SUCCESS(status
);
1233 /* The TEST_ASSERT_SUCCESS call above should change too... */
1234 if(U_SUCCESS(status
)) {
1235 TEST_ASSERT(numFields
== 3);
1236 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1237 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1238 TEST_ASSERT_STRING(" second<tag-b> third", fields
[2], TRUE
);
1239 TEST_ASSERT(!memcmp(&fields
[3],&minus1
,sizeof(UChar
*)));
1241 spaceNeeded
= (int32_t)strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1242 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1245 /* Split with just enough output strings available (5) */
1246 status
= U_ZERO_ERROR
;
1247 memset(fields
, -1, sizeof(fields
));
1249 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 5, &status
);
1250 TEST_ASSERT_SUCCESS(status
);
1252 /* The TEST_ASSERT_SUCCESS call above should change too... */
1253 if(U_SUCCESS(status
)) {
1254 TEST_ASSERT(numFields
== 5);
1255 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1256 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1257 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1258 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1259 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1260 TEST_ASSERT(!memcmp(&fields
[5],&minus1
,sizeof(UChar
*)));
1262 spaceNeeded
= (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1263 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1266 /* Split, end of text is a field delimiter. */
1267 status
= U_ZERO_ERROR
;
1268 sz
= (int32_t)strlen("first <tag-a> second<tag-b>");
1269 uregex_setText(re
, textToSplit
, sz
, &status
);
1270 TEST_ASSERT_SUCCESS(status
);
1272 /* The TEST_ASSERT_SUCCESS call above should change too... */
1273 if(U_SUCCESS(status
)) {
1274 memset(fields
, -1, sizeof(fields
));
1276 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 9, &status
);
1277 TEST_ASSERT_SUCCESS(status
);
1279 /* The TEST_ASSERT_SUCCESS call above should change too... */
1280 if(U_SUCCESS(status
)) {
1281 TEST_ASSERT(numFields
== 5);
1282 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1283 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1284 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1285 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1286 TEST_ASSERT_STRING("", fields
[4], TRUE
);
1287 TEST_ASSERT(fields
[5] == NULL
);
1288 TEST_ASSERT(fields
[8] == NULL
);
1289 TEST_ASSERT(!memcmp(&fields
[9],&minus1
,sizeof(UChar
*)));
1290 spaceNeeded
= (int32_t)strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1291 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1301 TEST_SETUP("abc$", "abcdef", 0);
1302 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 0);
1303 uregex_setTimeLimit(re
, 1000, &status
);
1304 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1305 TEST_ASSERT_SUCCESS(status
);
1306 uregex_setTimeLimit(re
, -1, &status
);
1307 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1308 status
= U_ZERO_ERROR
;
1309 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1313 * set/get Stack Limit
1315 TEST_SETUP("abc$", "abcdef", 0);
1316 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 8000000);
1317 uregex_setStackLimit(re
, 40000, &status
);
1318 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1319 TEST_ASSERT_SUCCESS(status
);
1320 uregex_setStackLimit(re
, -1, &status
);
1321 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1322 status
= U_ZERO_ERROR
;
1323 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1328 * Get/Set callback functions
1329 * This test is copied from intltest regex/Callbacks
1330 * The pattern and test data will run long enough to cause the callback
1331 * to be invoked. The nested '+' operators give exponential time
1332 * behavior with increasing string length.
1334 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0);
1335 callBackContext cbInfo
= {4, 0, 0};
1336 const void *pContext
= &cbInfo
;
1337 URegexMatchCallback
*returnedFn
= &TestCallbackFn
;
1339 /* Getting the callback fn when it hasn't been set must return NULL */
1340 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1341 TEST_ASSERT_SUCCESS(status
);
1342 TEST_ASSERT(returnedFn
== NULL
);
1343 TEST_ASSERT(pContext
== NULL
);
1345 /* Set thecallback and do a match. */
1346 /* The callback function should record that it has been called. */
1347 uregex_setMatchCallback(re
, &TestCallbackFn
, &cbInfo
, &status
);
1348 TEST_ASSERT_SUCCESS(status
);
1349 TEST_ASSERT(cbInfo
.numCalls
== 0);
1350 TEST_ASSERT(uregex_matches(re
, -1, &status
) == FALSE
);
1351 TEST_ASSERT_SUCCESS(status
);
1352 TEST_ASSERT(cbInfo
.numCalls
> 0);
1354 /* Getting the callback should return the values that were set above. */
1355 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1356 TEST_ASSERT(returnedFn
== &TestCallbackFn
);
1357 TEST_ASSERT(pContext
== &cbInfo
);
1364 static void TestBug4315(void) {
1365 UErrorCode theICUError
= U_ZERO_ERROR
;
1366 URegularExpression
*theRegEx
;
1368 const char *thePattern
;
1369 UChar theString
[100];
1370 UChar
*destFields
[24];
1371 int32_t neededLength1
;
1372 int32_t neededLength2
;
1374 int32_t wordCount
= 0;
1375 int32_t destFieldsSize
= 24;
1378 u_uastrcpy(theString
, "The quick brown fox jumped over the slow black turtle.");
1381 theRegEx
= uregex_openC(thePattern
, 0, NULL
, &theICUError
);
1382 TEST_ASSERT_SUCCESS(theICUError
);
1384 /* set the input string */
1385 uregex_setText(theRegEx
, theString
, u_strlen(theString
), &theICUError
);
1386 TEST_ASSERT_SUCCESS(theICUError
);
1389 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1391 wordCount
= uregex_split(theRegEx
, NULL
, 0, &neededLength1
, destFields
,
1392 destFieldsSize
, &theICUError
);
1394 TEST_ASSERT(theICUError
== U_BUFFER_OVERFLOW_ERROR
);
1395 TEST_ASSERT(wordCount
==3);
1397 if(theICUError
== U_BUFFER_OVERFLOW_ERROR
)
1399 theICUError
= U_ZERO_ERROR
;
1400 textBuff
= (UChar
*) malloc(sizeof(UChar
) * (neededLength1
+ 1));
1401 wordCount
= uregex_split(theRegEx
, textBuff
, neededLength1
+1, &neededLength2
,
1402 destFields
, destFieldsSize
, &theICUError
);
1403 TEST_ASSERT(wordCount
==3);
1404 TEST_ASSERT_SUCCESS(theICUError
);
1405 TEST_ASSERT(neededLength1
== neededLength2
);
1406 TEST_ASSERT_STRING("The qui", destFields
[0], TRUE
);
1407 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields
[1], TRUE
);
1408 TEST_ASSERT_STRING("turtle.", destFields
[2], TRUE
);
1409 TEST_ASSERT(destFields
[3] == NULL
);
1412 uregex_close(theRegEx
);
1415 /* Based on TestRegexCAPI() */
1416 static void TestUTextAPI(void) {
1417 UErrorCode status
= U_ZERO_ERROR
;
1418 URegularExpression
*re
;
1419 UText patternText
= UTEXT_INITIALIZER
;
1421 const char patternTextUTF8
[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1423 /* Mimimalist open/close */
1424 utext_openUTF8(&patternText
, patternTextUTF8
, -1, &status
);
1425 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1426 if (U_FAILURE(status
)) {
1427 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
1428 utext_close(&patternText
);
1433 /* Open with all flag values set */
1434 status
= U_ZERO_ERROR
;
1435 re
= uregex_openUText(&patternText
,
1436 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
,
1438 TEST_ASSERT_SUCCESS(status
);
1441 /* Open with an invalid flag */
1442 status
= U_ZERO_ERROR
;
1443 re
= uregex_openUText(&patternText
, 0x40000000, 0, &status
);
1444 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
1447 /* open with an invalid parameter */
1448 status
= U_ZERO_ERROR
;
1449 re
= uregex_openUText(NULL
,
1450 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
1451 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
1457 URegularExpression
*clone1
;
1458 URegularExpression
*clone2
;
1459 URegularExpression
*clone3
;
1460 UChar testString1
[30];
1461 UChar testString2
[30];
1465 status
= U_ZERO_ERROR
;
1466 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1467 TEST_ASSERT_SUCCESS(status
);
1468 clone1
= uregex_clone(re
, &status
);
1469 TEST_ASSERT_SUCCESS(status
);
1470 TEST_ASSERT(clone1
!= NULL
);
1472 status
= U_ZERO_ERROR
;
1473 clone2
= uregex_clone(re
, &status
);
1474 TEST_ASSERT_SUCCESS(status
);
1475 TEST_ASSERT(clone2
!= NULL
);
1478 status
= U_ZERO_ERROR
;
1479 clone3
= uregex_clone(clone2
, &status
);
1480 TEST_ASSERT_SUCCESS(status
);
1481 TEST_ASSERT(clone3
!= NULL
);
1483 u_uastrncpy(testString1
, "abcccd", UPRV_LENGTHOF(pat
));
1484 u_uastrncpy(testString2
, "xxxabcccd", UPRV_LENGTHOF(pat
));
1486 status
= U_ZERO_ERROR
;
1487 uregex_setText(clone1
, testString1
, -1, &status
);
1488 TEST_ASSERT_SUCCESS(status
);
1489 result
= uregex_lookingAt(clone1
, 0, &status
);
1490 TEST_ASSERT_SUCCESS(status
);
1491 TEST_ASSERT(result
==TRUE
);
1493 status
= U_ZERO_ERROR
;
1494 uregex_setText(clone2
, testString2
, -1, &status
);
1495 TEST_ASSERT_SUCCESS(status
);
1496 result
= uregex_lookingAt(clone2
, 0, &status
);
1497 TEST_ASSERT_SUCCESS(status
);
1498 TEST_ASSERT(result
==FALSE
);
1499 result
= uregex_find(clone2
, 0, &status
);
1500 TEST_ASSERT_SUCCESS(status
);
1501 TEST_ASSERT(result
==TRUE
);
1503 uregex_close(clone1
);
1504 uregex_close(clone2
);
1505 uregex_close(clone3
);
1510 * pattern() and patternText()
1513 const UChar
*resultPat
;
1516 const char str_hello
[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1517 const char str_hel
[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1518 u_uastrncpy(pat
, "hello", UPRV_LENGTHOF(pat
)); /* for comparison */
1519 status
= U_ZERO_ERROR
;
1521 utext_openUTF8(&patternText
, str_hello
, -1, &status
);
1522 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
1523 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1524 TEST_ASSERT_SUCCESS(status
);
1526 /* The TEST_ASSERT_SUCCESS above should change too... */
1527 if (U_SUCCESS(status
)) {
1528 TEST_ASSERT(resultLen
== -1);
1529 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
1532 resultText
= uregex_patternUText(re
, &status
);
1533 TEST_ASSERT_SUCCESS(status
);
1534 TEST_ASSERT_UTEXT(str_hello
, resultText
);
1538 status
= U_ZERO_ERROR
;
1539 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
1540 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1541 TEST_ASSERT_SUCCESS(status
);
1543 /* The TEST_ASSERT_SUCCESS above should change too... */
1544 if (U_SUCCESS(status
)) {
1545 TEST_ASSERT(resultLen
== 3);
1546 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
1547 TEST_ASSERT(u_strlen(resultPat
) == 3);
1550 resultText
= uregex_patternUText(re
, &status
);
1551 TEST_ASSERT_SUCCESS(status
);
1552 TEST_ASSERT_UTEXT(str_hel
, resultText
);
1558 * setUText() and lookingAt()
1561 UText text1
= UTEXT_INITIALIZER
;
1562 UText text2
= UTEXT_INITIALIZER
;
1564 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1565 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1566 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1567 status
= U_ZERO_ERROR
;
1568 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1569 utext_openUTF8(&text2
, str_abcccxd
, -1, &status
);
1571 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1572 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1573 TEST_ASSERT_SUCCESS(status
);
1575 /* Operation before doing a setText should fail... */
1576 status
= U_ZERO_ERROR
;
1577 uregex_lookingAt(re
, 0, &status
);
1578 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
1580 status
= U_ZERO_ERROR
;
1581 uregex_setUText(re
, &text1
, &status
);
1582 result
= uregex_lookingAt(re
, 0, &status
);
1583 TEST_ASSERT(result
== TRUE
);
1584 TEST_ASSERT_SUCCESS(status
);
1586 status
= U_ZERO_ERROR
;
1587 uregex_setUText(re
, &text2
, &status
);
1588 result
= uregex_lookingAt(re
, 0, &status
);
1589 TEST_ASSERT(result
== FALSE
);
1590 TEST_ASSERT_SUCCESS(status
);
1592 status
= U_ZERO_ERROR
;
1593 uregex_setUText(re
, &text1
, &status
);
1594 result
= uregex_lookingAt(re
, 0, &status
);
1595 TEST_ASSERT(result
== TRUE
);
1596 TEST_ASSERT_SUCCESS(status
);
1599 utext_close(&text1
);
1600 utext_close(&text2
);
1605 * getText() and getUText()
1608 UText text1
= UTEXT_INITIALIZER
;
1609 UText text2
= UTEXT_INITIALIZER
;
1610 UChar text2Chars
[20];
1612 const UChar
*result
;
1614 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1615 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1616 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1619 status
= U_ZERO_ERROR
;
1620 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1621 u_uastrncpy(text2Chars
, str_abcccxd
, UPRV_LENGTHOF(text2Chars
));
1622 utext_openUChars(&text2
, text2Chars
, -1, &status
);
1624 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1625 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1627 /* First set a UText */
1628 uregex_setUText(re
, &text1
, &status
);
1629 resultText
= uregex_getUText(re
, NULL
, &status
);
1630 TEST_ASSERT_SUCCESS(status
);
1631 TEST_ASSERT(resultText
!= &text1
);
1632 utext_setNativeIndex(resultText
, 0);
1633 utext_setNativeIndex(&text1
, 0);
1634 TEST_ASSERT(testUTextEqual(resultText
, &text1
));
1635 utext_close(resultText
);
1637 result
= uregex_getText(re
, &textLength
, &status
); /* flattens UText into buffer */
1638 (void)result
; /* Suppress set but not used warning. */
1639 TEST_ASSERT(textLength
== -1 || textLength
== 6);
1640 resultText
= uregex_getUText(re
, NULL
, &status
);
1641 TEST_ASSERT_SUCCESS(status
);
1642 TEST_ASSERT(resultText
!= &text1
);
1643 utext_setNativeIndex(resultText
, 0);
1644 utext_setNativeIndex(&text1
, 0);
1645 TEST_ASSERT(testUTextEqual(resultText
, &text1
));
1646 utext_close(resultText
);
1648 /* Then set a UChar * */
1649 uregex_setText(re
, text2Chars
, 7, &status
);
1650 resultText
= uregex_getUText(re
, NULL
, &status
);
1651 TEST_ASSERT_SUCCESS(status
);
1652 utext_setNativeIndex(resultText
, 0);
1653 utext_setNativeIndex(&text2
, 0);
1654 TEST_ASSERT(testUTextEqual(resultText
, &text2
));
1655 utext_close(resultText
);
1656 result
= uregex_getText(re
, &textLength
, &status
);
1657 TEST_ASSERT(textLength
== 7);
1660 utext_close(&text1
);
1661 utext_close(&text2
);
1668 UText text1
= UTEXT_INITIALIZER
;
1670 UText nullText
= UTEXT_INITIALIZER
;
1671 const char str_abcccde
[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1672 const char str_abcd
[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1674 status
= U_ZERO_ERROR
;
1675 utext_openUTF8(&text1
, str_abcccde
, -1, &status
);
1676 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1677 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1679 uregex_setUText(re
, &text1
, &status
);
1680 result
= uregex_matches(re
, 0, &status
);
1681 TEST_ASSERT(result
== FALSE
);
1682 TEST_ASSERT_SUCCESS(status
);
1685 status
= U_ZERO_ERROR
;
1686 re
= uregex_openC(".?", 0, NULL
, &status
);
1687 uregex_setUText(re
, &text1
, &status
);
1688 result
= uregex_matches(re
, 7, &status
);
1689 TEST_ASSERT(result
== TRUE
);
1690 TEST_ASSERT_SUCCESS(status
);
1692 status
= U_ZERO_ERROR
;
1693 utext_openUTF8(&nullText
, "", -1, &status
);
1694 uregex_setUText(re
, &nullText
, &status
);
1695 TEST_ASSERT_SUCCESS(status
);
1696 result
= uregex_matches(re
, 0, &status
);
1697 TEST_ASSERT(result
== TRUE
);
1698 TEST_ASSERT_SUCCESS(status
);
1701 utext_close(&text1
);
1702 utext_close(&nullText
);
1707 * lookingAt() Used in setText test.
1712 * find(), findNext, start, end, reset
1717 u_uastrncpy(text1
, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1
));
1718 status
= U_ZERO_ERROR
;
1719 re
= uregex_openC("rx", 0, NULL
, &status
);
1721 uregex_setText(re
, text1
, -1, &status
);
1722 result
= uregex_find(re
, 0, &status
);
1723 TEST_ASSERT(result
== TRUE
);
1724 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1725 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1726 TEST_ASSERT_SUCCESS(status
);
1728 result
= uregex_find(re
, 9, &status
);
1729 TEST_ASSERT(result
== TRUE
);
1730 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
1731 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
1732 TEST_ASSERT_SUCCESS(status
);
1734 result
= uregex_find(re
, 14, &status
);
1735 TEST_ASSERT(result
== FALSE
);
1736 TEST_ASSERT_SUCCESS(status
);
1738 status
= U_ZERO_ERROR
;
1739 uregex_reset(re
, 0, &status
);
1741 result
= uregex_findNext(re
, &status
);
1742 TEST_ASSERT(result
== TRUE
);
1743 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1744 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1745 TEST_ASSERT_SUCCESS(status
);
1747 result
= uregex_findNext(re
, &status
);
1748 TEST_ASSERT(result
== TRUE
);
1749 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
1750 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
1751 TEST_ASSERT_SUCCESS(status
);
1753 status
= U_ZERO_ERROR
;
1754 uregex_reset(re
, 12, &status
);
1756 result
= uregex_findNext(re
, &status
);
1757 TEST_ASSERT(result
== TRUE
);
1758 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
1759 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
1760 TEST_ASSERT_SUCCESS(status
);
1762 result
= uregex_findNext(re
, &status
);
1763 TEST_ASSERT(result
== FALSE
);
1764 TEST_ASSERT_SUCCESS(status
);
1776 int64_t groupLen
= 0;
1779 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1
));
1781 status
= U_ZERO_ERROR
;
1782 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
1783 TEST_ASSERT_SUCCESS(status
);
1785 uregex_setText(re
, text1
, -1, &status
);
1786 result
= uregex_find(re
, 0, &status
);
1787 TEST_ASSERT(result
==TRUE
);
1789 /* Capture Group 0 with shallow clone API. Should succeed. */
1790 status
= U_ZERO_ERROR
;
1791 actual
= uregex_groupUText(re
, 0, NULL
, &groupLen
, &status
);
1792 TEST_ASSERT_SUCCESS(status
);
1794 TEST_ASSERT(utext_getNativeIndex(actual
) == 6); /* index of "abc " within "noise abc ..." */
1795 TEST_ASSERT(groupLen
== 16); /* length of "abc interior def" */
1796 utext_extract(actual
, 6 /*start index */, 6+16 /*limit index*/, groupBuf
, sizeof(groupBuf
), &status
);
1798 TEST_ASSERT_STRING("abc interior def", groupBuf
, TRUE
);
1799 utext_close(actual
);
1801 /* Capture group #1. Should succeed. */
1802 status
= U_ZERO_ERROR
;
1804 actual
= uregex_groupUText(re
, 1, NULL
, &groupLen
, &status
);
1805 TEST_ASSERT_SUCCESS(status
);
1806 TEST_ASSERT(9 == utext_getNativeIndex(actual
)); /* index of " interior " within "noise abc interior def ... " */
1807 /* (within the string text1) */
1808 TEST_ASSERT(10 == groupLen
); /* length of " interior " */
1809 utext_extract(actual
, 9 /*start index*/, 9+10 /*limit index*/, groupBuf
, sizeof(groupBuf
), &status
);
1810 TEST_ASSERT_STRING(" interior ", groupBuf
, TRUE
);
1812 utext_close(actual
);
1814 /* Capture group out of range. Error. */
1815 status
= U_ZERO_ERROR
;
1816 actual
= uregex_groupUText(re
, 2, NULL
, &groupLen
, &status
);
1817 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
1818 utext_close(actual
);
1829 UText replText
= UTEXT_INITIALIZER
;
1831 const char str_Replxxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1832 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1833 const char str_u00411U00000042a
[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1834 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1835 const char str_1x
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1836 const char str_ReplaceAaaBax1xxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1837 status
= U_ZERO_ERROR
;
1838 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
1839 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1840 utext_openUTF8(&replText
, str_1x
, -1, &status
);
1842 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1843 TEST_ASSERT_SUCCESS(status
);
1845 /* Normal case, with match */
1846 uregex_setText(re
, text1
, -1, &status
);
1847 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1848 TEST_ASSERT_SUCCESS(status
);
1849 TEST_ASSERT_UTEXT(str_Replxxx
, result
);
1850 utext_close(result
);
1852 /* No match. Text should copy to output with no changes. */
1853 uregex_setText(re
, text2
, -1, &status
);
1854 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1855 TEST_ASSERT_SUCCESS(status
);
1856 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1857 utext_close(result
);
1859 /* Unicode escapes */
1860 uregex_setText(re
, text1
, -1, &status
);
1861 utext_openUTF8(&replText
, str_u00411U00000042a
, -1, &status
);
1862 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1863 TEST_ASSERT_SUCCESS(status
);
1864 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx
, result
);
1865 utext_close(result
);
1868 utext_close(&replText
);
1878 UText replText
= UTEXT_INITIALIZER
;
1880 const char str_1
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1881 const char str_Replaceaa1
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1882 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1883 status
= U_ZERO_ERROR
;
1884 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
1885 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1886 utext_openUTF8(&replText
, str_1
, -1, &status
);
1888 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1889 TEST_ASSERT_SUCCESS(status
);
1891 /* Normal case, with match */
1892 uregex_setText(re
, text1
, -1, &status
);
1893 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1894 TEST_ASSERT_SUCCESS(status
);
1895 TEST_ASSERT_UTEXT(str_Replaceaa1
, result
);
1896 utext_close(result
);
1898 /* No match. Text should copy to output with no changes. */
1899 uregex_setText(re
, text2
, -1, &status
);
1900 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1901 TEST_ASSERT_SUCCESS(status
);
1902 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1903 utext_close(result
);
1906 utext_close(&replText
);
1911 * appendReplacement()
1920 status
= U_ZERO_ERROR
;
1921 re
= uregex_openC(".*", 0, 0, &status
);
1922 TEST_ASSERT_SUCCESS(status
);
1924 u_uastrncpy(text
, "whatever", UPRV_LENGTHOF(text
));
1925 u_uastrncpy(repl
, "some other", UPRV_LENGTHOF(repl
));
1926 uregex_setText(re
, text
, -1, &status
);
1928 /* match covers whole target string */
1929 uregex_find(re
, 0, &status
);
1930 TEST_ASSERT_SUCCESS(status
);
1932 bufCap
= UPRV_LENGTHOF(buf
);
1933 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1934 TEST_ASSERT_SUCCESS(status
);
1935 TEST_ASSERT_STRING("some other", buf
, TRUE
);
1937 /* Match has \u \U escapes */
1938 uregex_find(re
, 0, &status
);
1939 TEST_ASSERT_SUCCESS(status
);
1941 bufCap
= UPRV_LENGTHOF(buf
);
1942 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl
));
1943 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1944 TEST_ASSERT_SUCCESS(status
);
1945 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
1952 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1959 UChar textToSplit
[80];
1965 u_uastrncpy(textToSplit
, "first : second: third", UPRV_LENGTHOF(textToSplit
));
1966 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1968 status
= U_ZERO_ERROR
;
1969 re
= uregex_openC(":", 0, NULL
, &status
);
1974 uregex_setText(re
, textToSplit
, -1, &status
);
1975 TEST_ASSERT_SUCCESS(status
);
1977 /* The TEST_ASSERT_SUCCESS call above should change too... */
1978 if (U_SUCCESS(status
)) {
1979 memset(fields
, 0, sizeof(fields
));
1980 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
1981 TEST_ASSERT_SUCCESS(status
);
1983 /* The TEST_ASSERT_SUCCESS call above should change too... */
1984 if(U_SUCCESS(status
)) {
1985 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1986 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1987 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1988 TEST_ASSERT(numFields
== 3);
1989 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
1990 TEST_ASSERT_UTEXT(str_second
, fields
[1]);
1991 TEST_ASSERT_UTEXT(str_third
, fields
[2]);
1992 TEST_ASSERT(fields
[3] == NULL
);
1994 for(i
= 0; i
< numFields
; i
++) {
1995 utext_close(fields
[i
]);
2002 /* Split with too few output strings available */
2003 status
= U_ZERO_ERROR
;
2004 re
= uregex_openC(":", 0, NULL
, &status
);
2005 uregex_setText(re
, textToSplit
, -1, &status
);
2006 TEST_ASSERT_SUCCESS(status
);
2008 /* The TEST_ASSERT_SUCCESS call above should change too... */
2009 if(U_SUCCESS(status
)) {
2012 fields
[2] = &patternText
;
2013 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
2014 TEST_ASSERT_SUCCESS(status
);
2016 /* The TEST_ASSERT_SUCCESS call above should change too... */
2017 if(U_SUCCESS(status
)) {
2018 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2019 const char str_secondthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2020 TEST_ASSERT(numFields
== 2);
2021 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2022 TEST_ASSERT_UTEXT(str_secondthird
, fields
[1]);
2023 TEST_ASSERT(fields
[2] == &patternText
);
2025 for(i
= 0; i
< numFields
; i
++) {
2026 utext_close(fields
[i
]);
2033 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2034 * comes out as additional fields. */
2036 UChar textToSplit
[80];
2041 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit
));
2043 status
= U_ZERO_ERROR
;
2044 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
2046 uregex_setText(re
, textToSplit
, -1, &status
);
2047 TEST_ASSERT_SUCCESS(status
);
2049 /* The TEST_ASSERT_SUCCESS call above should change too... */
2050 if(U_SUCCESS(status
)) {
2051 memset(fields
, 0, sizeof(fields
));
2052 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
2053 TEST_ASSERT_SUCCESS(status
);
2055 /* The TEST_ASSERT_SUCCESS call above should change too... */
2056 if(U_SUCCESS(status
)) {
2057 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2058 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2059 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2060 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2061 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2063 TEST_ASSERT(numFields
== 5);
2064 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2065 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2066 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2067 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2068 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2069 TEST_ASSERT(fields
[5] == NULL
);
2071 for(i
= 0; i
< numFields
; i
++) {
2072 utext_close(fields
[i
]);
2076 /* Split with too few output strings available (2) */
2077 status
= U_ZERO_ERROR
;
2080 fields
[2] = &patternText
;
2081 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
2082 TEST_ASSERT_SUCCESS(status
);
2084 /* The TEST_ASSERT_SUCCESS call above should change too... */
2085 if(U_SUCCESS(status
)) {
2086 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2087 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2088 TEST_ASSERT(numFields
== 2);
2089 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2090 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[1]);
2091 TEST_ASSERT(fields
[2] == &patternText
);
2093 for(i
= 0; i
< numFields
; i
++) {
2094 utext_close(fields
[i
]);
2098 /* Split with too few output strings available (3) */
2099 status
= U_ZERO_ERROR
;
2103 fields
[3] = &patternText
;
2104 numFields
= uregex_splitUText(re
, fields
, 3, &status
);
2105 TEST_ASSERT_SUCCESS(status
);
2107 /* The TEST_ASSERT_SUCCESS call above should change too... */
2108 if(U_SUCCESS(status
)) {
2109 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2110 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2111 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2112 TEST_ASSERT(numFields
== 3);
2113 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2114 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2115 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[2]);
2116 TEST_ASSERT(fields
[3] == &patternText
);
2118 for(i
= 0; i
< numFields
; i
++) {
2119 utext_close(fields
[i
]);
2122 /* Split with just enough output strings available (5) */
2123 status
= U_ZERO_ERROR
;
2129 fields
[5] = &patternText
;
2130 numFields
= uregex_splitUText(re
, fields
, 5, &status
);
2131 TEST_ASSERT_SUCCESS(status
);
2133 /* The TEST_ASSERT_SUCCESS call above should change too... */
2134 if(U_SUCCESS(status
)) {
2135 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2136 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2137 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2138 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2139 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2141 TEST_ASSERT(numFields
== 5);
2142 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2143 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2144 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2145 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2146 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2147 TEST_ASSERT(fields
[5] == &patternText
);
2149 for(i
= 0; i
< numFields
; i
++) {
2150 utext_close(fields
[i
]);
2153 /* Split, end of text is a field delimiter. */
2154 status
= U_ZERO_ERROR
;
2155 uregex_setText(re
, textToSplit
, (int32_t)strlen("first <tag-a> second<tag-b>"), &status
);
2156 TEST_ASSERT_SUCCESS(status
);
2158 /* The TEST_ASSERT_SUCCESS call above should change too... */
2159 if(U_SUCCESS(status
)) {
2160 memset(fields
, 0, sizeof(fields
));
2161 fields
[9] = &patternText
;
2162 numFields
= uregex_splitUText(re
, fields
, 9, &status
);
2163 TEST_ASSERT_SUCCESS(status
);
2165 /* The TEST_ASSERT_SUCCESS call above should change too... */
2166 if(U_SUCCESS(status
)) {
2167 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2168 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2169 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2170 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2171 const char str_empty
[] = { 0x00 };
2173 TEST_ASSERT(numFields
== 5);
2174 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2175 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2176 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2177 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2178 TEST_ASSERT_UTEXT(str_empty
, fields
[4]);
2179 TEST_ASSERT(fields
[5] == NULL
);
2180 TEST_ASSERT(fields
[8] == NULL
);
2181 TEST_ASSERT(fields
[9] == &patternText
);
2183 for(i
= 0; i
< numFields
; i
++) {
2184 utext_close(fields
[i
]);
2190 utext_close(&patternText
);
2194 static void TestRefreshInput(void) {
2196 * RefreshInput changes out the input of a URegularExpression without
2197 * changing anything else in the match state. Used with Java JNI,
2198 * when Java moves the underlying string storage. This test
2199 * runs a find() loop, moving the text after the first match.
2200 * The right number of matches should still be found.
2202 UChar testStr
[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2203 UChar movedStr
[] = { 0, 0, 0, 0, 0, 0};
2204 UErrorCode status
= U_ZERO_ERROR
;
2205 URegularExpression
*re
;
2206 UText ut1
= UTEXT_INITIALIZER
;
2207 UText ut2
= UTEXT_INITIALIZER
;
2209 re
= uregex_openC("[ABC]", 0, 0, &status
);
2210 TEST_ASSERT_SUCCESS(status
);
2212 utext_openUChars(&ut1
, testStr
, -1, &status
);
2213 TEST_ASSERT_SUCCESS(status
);
2214 uregex_setUText(re
, &ut1
, &status
);
2215 TEST_ASSERT_SUCCESS(status
);
2217 /* Find the first match "A" in the original string */
2218 TEST_ASSERT(uregex_findNext(re
, &status
));
2219 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
2221 /* Move the string, kill the original string. */
2222 u_strcpy(movedStr
, testStr
);
2223 u_memset(testStr
, 0, u_strlen(testStr
));
2224 utext_openUChars(&ut2
, movedStr
, -1, &status
);
2225 TEST_ASSERT_SUCCESS(status
);
2226 uregex_refreshUText(re
, &ut2
, &status
);
2227 TEST_ASSERT_SUCCESS(status
);
2229 /* Find the following two matches, now working in the moved string. */
2230 TEST_ASSERT(uregex_findNext(re
, &status
));
2231 TEST_ASSERT(uregex_start(re
, 0, &status
) == 2);
2232 TEST_ASSERT(uregex_findNext(re
, &status
));
2233 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
2234 TEST_ASSERT(FALSE
== uregex_findNext(re
, &status
));
2240 static void TestBug8421(void) {
2241 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2244 URegularExpression
*re
;
2245 UErrorCode status
= U_ZERO_ERROR
;
2248 re
= uregex_openC("abc", 0, 0, &status
);
2249 TEST_ASSERT_SUCCESS(status
);
2251 limit
= uregex_getTimeLimit(re
, &status
);
2252 TEST_ASSERT_SUCCESS(status
);
2253 TEST_ASSERT(limit
== 0);
2255 uregex_setTimeLimit(re
, 100, &status
);
2256 TEST_ASSERT_SUCCESS(status
);
2257 limit
= uregex_getTimeLimit(re
, &status
);
2258 TEST_ASSERT_SUCCESS(status
);
2259 TEST_ASSERT(limit
== 100);
2264 static UBool U_CALLCONV
FindCallback(const void* context
, int64_t matchIndex
) {
2265 // suppress compiler warnings about unused variables
2271 static UBool U_CALLCONV
MatchCallback(const void *context
, int32_t steps
) {
2272 // suppress compiler warnings about unused variables
2278 static void TestBug10815() {
2279 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2280 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2282 URegularExpression
*re
;
2283 UErrorCode status
= U_ZERO_ERROR
;
2287 // findNext() with a find progress callback function.
2289 re
= uregex_openC(".z", 0, 0, &status
);
2290 TEST_ASSERT_SUCCESS(status
);
2292 u_uastrncpy(text
, "Hello, World.", UPRV_LENGTHOF(text
));
2293 uregex_setText(re
, text
, -1, &status
);
2294 TEST_ASSERT_SUCCESS(status
);
2296 uregex_setFindProgressCallback(re
, FindCallback
, NULL
, &status
);
2297 TEST_ASSERT_SUCCESS(status
);
2299 uregex_findNext(re
, &status
);
2300 TEST_ASSERT(status
== U_REGEX_STOPPED_BY_CALLER
);
2304 // findNext() with a match progress callback function.
2306 status
= U_ZERO_ERROR
;
2307 re
= uregex_openC("((xxx)*)*y", 0, 0, &status
);
2308 TEST_ASSERT_SUCCESS(status
);
2310 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2311 // it will appear to be stuck in a (near) infinite loop.
2312 u_uastrncpy(text
, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text
));
2313 uregex_setText(re
, text
, -1, &status
);
2314 TEST_ASSERT_SUCCESS(status
);
2316 uregex_setMatchCallback(re
, MatchCallback
, NULL
, &status
);
2317 TEST_ASSERT_SUCCESS(status
);
2319 uregex_findNext(re
, &status
);
2320 TEST_ASSERT(status
== U_REGEX_STOPPED_BY_CALLER
);
2325 static const UChar startLinePattern
[] = { 0x5E, 0x78, 0 }; // "^x"
2327 static void TestMatchStartLineWithEmptyText() {
2328 UErrorCode status
= U_ZERO_ERROR
;
2329 UText
* ut
= utext_openUChars(NULL
, NULL
, 0, &status
);
2330 TEST_ASSERT_SUCCESS(status
);
2331 if (U_SUCCESS(status
)) {
2332 URegularExpression
*re
= uregex_open(startLinePattern
, -1, 0, NULL
, &status
);
2333 TEST_ASSERT_SUCCESS(status
);
2334 if (U_SUCCESS(status
)) {
2335 uregex_setUText(re
, ut
, &status
);
2336 TEST_ASSERT(U_SUCCESS(status
));
2337 if (U_SUCCESS(status
)) {
2338 UBool found
= uregex_findNext(re
, &status
);
2339 TEST_ASSERT(U_SUCCESS(status
) && !found
);
2347 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */