1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
19 * More complete testing of regular expression functionality is done with the C++ tests.
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
36 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
37 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
39 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
40 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
43 * TEST_SETUP and TEST_TEARDOWN
44 * macros to handle the boilerplate around setting up regex test cases.
45 * parameteres to setup:
46 * pattern: The regex pattern, a (char *) null terminated C string.
47 * testString: The string data, also a (char *) C string.
48 * flags: Regex flags to set when compiling the pattern
50 * Put arbitrary test code between SETUP and TEARDOWN.
51 * 're" is the compiled, ready-to-go regular expression.
53 #define TEST_SETUP(pattern, testString, flags) { \
54 UChar *srcString = NULL; \
55 status = U_ZERO_ERROR; \
56 re = uregex_openC(pattern, flags, NULL, &status); \
57 TEST_ASSERT_SUCCESS(status); \
58 int32_t testStringLen = (int32_t)strlen(testString); \
59 srcString = (UChar *)malloc( (testStringLen + 2) * sizeof(UChar) ); \
60 u_uastrncpy(srcString, testString, testStringLen + 1); \
61 uregex_setText(re, srcString, -1, &status); \
62 TEST_ASSERT_SUCCESS(status); \
63 if (U_SUCCESS(status)) {
65 #define TEST_TEARDOWN \
67 TEST_ASSERT_SUCCESS(status); \
74 * @param expected utf-8 array of bytes to be expected
76 static void test_assert_string(const char *expected
, const UChar
*actual
, UBool nulTerm
, const char *file
, int line
) {
77 char buf_inside_macro
[120];
78 int32_t len
= (int32_t)strlen(expected
);
81 u_austrncpy(buf_inside_macro
, (actual
), len
+1);
82 buf_inside_macro
[len
+2] = 0;
83 success
= (strcmp((expected
), buf_inside_macro
) == 0);
85 u_austrncpy(buf_inside_macro
, (actual
), len
);
86 buf_inside_macro
[len
+1] = 0;
87 success
= (strncmp((expected
), buf_inside_macro
, len
) == 0);
89 if (success
== FALSE
) {
90 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
91 file
, line
, (expected
), buf_inside_macro
);
95 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
98 static UBool
equals_utf8_utext(const char *utf8
, UText
*utext
) {
102 UBool stringsEqual
= TRUE
;
103 utext_setNativeIndex(utext
, 0);
105 U8_NEXT_UNSAFE(utf8
, u8i
, u8c
);
106 utc
= utext_next32(utext
);
107 if (u8c
== 0 && utc
== U_SENTINEL
) {
110 if (u8c
!= utc
|| u8c
== 0) {
111 stringsEqual
= FALSE
;
119 static void test_assert_utext(const char *expected
, UText
*actual
, const char *file
, int line
) {
120 utext_setNativeIndex(actual
, 0);
121 if (!equals_utf8_utext(expected
, actual
)) {
123 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file
, line
, expected
);
124 c
= utext_next32From(actual
, 0);
125 while (c
!= U_SENTINEL
) {
126 if (0x20<c
&& c
<0x7e) {
131 c
= UTEXT_NEXT32(actual
);
138 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
139 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
141 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
143 static UBool
testUTextEqual(UText
*uta
, UText
*utb
) {
146 utext_setNativeIndex(uta
, 0);
147 utext_setNativeIndex(utb
, 0);
149 ca
= utext_next32(uta
);
150 cb
= utext_next32(utb
);
154 } while (ca
!= U_SENTINEL
);
161 static void TestRegexCAPI(void);
162 static void TestBug4315(void);
163 static void TestUTextAPI(void);
164 static void TestRefreshInput(void);
165 static void TestBug8421(void);
166 static void TestBug10815(void);
167 static void TestMatchStartLineWithEmptyText(void);
169 void addURegexTest(TestNode
** root
);
171 void addURegexTest(TestNode
** root
)
173 addTest(root
, &TestRegexCAPI
, "regex/TestRegexCAPI");
174 addTest(root
, &TestBug4315
, "regex/TestBug4315");
175 addTest(root
, &TestUTextAPI
, "regex/TestUTextAPI");
176 addTest(root
, &TestRefreshInput
, "regex/TestRefreshInput");
177 addTest(root
, &TestBug8421
, "regex/TestBug8421");
178 addTest(root
, &TestBug10815
, "regex/TestBug10815");
179 addTest(root
, &TestMatchStartLineWithEmptyText
, "regex/TestMatchStartLineWithEmptyText");
183 * Call back function and context struct used for testing
184 * regular expression user callbacks. This test is mostly the same as
185 * the corresponding C++ test in intltest.
187 typedef struct callBackContext
{
193 static UBool U_EXPORT2 U_CALLCONV
194 TestCallbackFn(const void *context
, int32_t steps
) {
195 callBackContext
*info
= (callBackContext
*)context
;
196 if (info
->lastSteps
+1 != steps
) {
197 log_err("incorrect steps in callback. Expected %d, got %d\n", info
->lastSteps
+1, steps
);
199 info
->lastSteps
= steps
;
201 return (info
->numCalls
< info
->maxCalls
);
205 * Regular Expression C API Tests
207 static void TestRegexCAPI(void) {
208 UErrorCode status
= U_ZERO_ERROR
;
209 URegularExpression
*re
;
213 memset(&minus1
, -1, sizeof(minus1
));
215 /* Mimimalist open/close */
216 u_uastrncpy(pat
, "abc*", UPRV_LENGTHOF(pat
));
217 re
= uregex_open(pat
, -1, 0, 0, &status
);
218 if (U_FAILURE(status
)) {
219 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
224 /* Open with all flag values set */
225 status
= U_ZERO_ERROR
;
226 re
= uregex_open(pat
, -1,
227 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
| UREGEX_LITERAL
,
229 TEST_ASSERT_SUCCESS(status
);
232 /* Open with an invalid flag */
233 status
= U_ZERO_ERROR
;
234 re
= uregex_open(pat
, -1, 0x40000000, 0, &status
);
235 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
238 /* Open with an unimplemented flag */
239 status
= U_ZERO_ERROR
;
240 re
= uregex_open(pat
, -1, UREGEX_CANON_EQ
, 0, &status
);
241 TEST_ASSERT(status
== U_REGEX_UNIMPLEMENTED
);
244 /* openC with an invalid parameter */
245 status
= U_ZERO_ERROR
;
246 re
= uregex_openC(NULL
,
247 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
248 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
250 /* openC with an invalid parameter */
251 status
= U_USELESS_COLLATOR_ERROR
;
252 re
= uregex_openC(NULL
,
253 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
254 TEST_ASSERT(status
== U_USELESS_COLLATOR_ERROR
&& re
== NULL
);
256 /* openC open from a C string */
260 status
= U_ZERO_ERROR
;
261 re
= uregex_openC("abc*", 0, 0, &status
);
262 TEST_ASSERT_SUCCESS(status
);
263 p
= uregex_pattern(re
, &len
, &status
);
264 TEST_ASSERT_SUCCESS(status
);
266 /* The TEST_ASSERT_SUCCESS above should change too... */
267 if(U_SUCCESS(status
)) {
268 u_uastrncpy(pat
, "abc*", UPRV_LENGTHOF(pat
));
269 TEST_ASSERT(u_strcmp(pat
, p
) == 0);
270 TEST_ASSERT(len
==(int32_t)strlen("abc*"));
275 /* TODO: Open with ParseError parameter */
282 URegularExpression
*clone1
;
283 URegularExpression
*clone2
;
284 URegularExpression
*clone3
;
285 UChar testString1
[30];
286 UChar testString2
[30];
290 status
= U_ZERO_ERROR
;
291 re
= uregex_openC("abc*", 0, 0, &status
);
292 TEST_ASSERT_SUCCESS(status
);
293 clone1
= uregex_clone(re
, &status
);
294 TEST_ASSERT_SUCCESS(status
);
295 TEST_ASSERT(clone1
!= NULL
);
297 status
= U_ZERO_ERROR
;
298 clone2
= uregex_clone(re
, &status
);
299 TEST_ASSERT_SUCCESS(status
);
300 TEST_ASSERT(clone2
!= NULL
);
303 status
= U_ZERO_ERROR
;
304 clone3
= uregex_clone(clone2
, &status
);
305 TEST_ASSERT_SUCCESS(status
);
306 TEST_ASSERT(clone3
!= NULL
);
308 u_uastrncpy(testString1
, "abcccd", UPRV_LENGTHOF(pat
));
309 u_uastrncpy(testString2
, "xxxabcccd", UPRV_LENGTHOF(pat
));
311 status
= U_ZERO_ERROR
;
312 uregex_setText(clone1
, testString1
, -1, &status
);
313 TEST_ASSERT_SUCCESS(status
);
314 result
= uregex_lookingAt(clone1
, 0, &status
);
315 TEST_ASSERT_SUCCESS(status
);
316 TEST_ASSERT(result
==TRUE
);
318 status
= U_ZERO_ERROR
;
319 uregex_setText(clone2
, testString2
, -1, &status
);
320 TEST_ASSERT_SUCCESS(status
);
321 result
= uregex_lookingAt(clone2
, 0, &status
);
322 TEST_ASSERT_SUCCESS(status
);
323 TEST_ASSERT(result
==FALSE
);
324 result
= uregex_find(clone2
, 0, &status
);
325 TEST_ASSERT_SUCCESS(status
);
326 TEST_ASSERT(result
==TRUE
);
328 uregex_close(clone1
);
329 uregex_close(clone2
);
330 uregex_close(clone3
);
338 const UChar
*resultPat
;
340 u_uastrncpy(pat
, "hello", UPRV_LENGTHOF(pat
));
341 status
= U_ZERO_ERROR
;
342 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
343 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
344 TEST_ASSERT_SUCCESS(status
);
346 /* The TEST_ASSERT_SUCCESS above should change too... */
347 if (U_SUCCESS(status
)) {
348 TEST_ASSERT(resultLen
== -1);
349 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
354 status
= U_ZERO_ERROR
;
355 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
356 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
357 TEST_ASSERT_SUCCESS(status
);
358 TEST_ASSERT_SUCCESS(status
);
360 /* The TEST_ASSERT_SUCCESS above should change too... */
361 if (U_SUCCESS(status
)) {
362 TEST_ASSERT(resultLen
== 3);
363 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
364 TEST_ASSERT(u_strlen(resultPat
) == 3);
376 status
= U_ZERO_ERROR
;
377 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
378 t
= uregex_flags(re
, &status
);
379 TEST_ASSERT_SUCCESS(status
);
383 status
= U_ZERO_ERROR
;
384 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
385 t
= uregex_flags(re
, &status
);
386 TEST_ASSERT_SUCCESS(status
);
390 status
= U_ZERO_ERROR
;
391 re
= uregex_open(pat
, -1, UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
, NULL
, &status
);
392 t
= uregex_flags(re
, &status
);
393 TEST_ASSERT_SUCCESS(status
);
394 TEST_ASSERT(t
== (UREGEX_CASE_INSENSITIVE
| UREGEX_DOTALL
));
399 * setText() and lookingAt()
406 u_uastrncpy(text1
, "abcccd", UPRV_LENGTHOF(text1
));
407 u_uastrncpy(text2
, "abcccxd", UPRV_LENGTHOF(text2
));
408 status
= U_ZERO_ERROR
;
409 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
410 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
411 TEST_ASSERT_SUCCESS(status
);
413 /* Operation before doing a setText should fail... */
414 status
= U_ZERO_ERROR
;
415 uregex_lookingAt(re
, 0, &status
);
416 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
418 status
= U_ZERO_ERROR
;
419 uregex_setText(re
, text1
, -1, &status
);
420 result
= uregex_lookingAt(re
, 0, &status
);
421 TEST_ASSERT(result
== TRUE
);
422 TEST_ASSERT_SUCCESS(status
);
424 status
= U_ZERO_ERROR
;
425 uregex_setText(re
, text2
, -1, &status
);
426 result
= uregex_lookingAt(re
, 0, &status
);
427 TEST_ASSERT(result
== FALSE
);
428 TEST_ASSERT_SUCCESS(status
);
430 status
= U_ZERO_ERROR
;
431 uregex_setText(re
, text1
, -1, &status
);
432 result
= uregex_lookingAt(re
, 0, &status
);
433 TEST_ASSERT(result
== TRUE
);
434 TEST_ASSERT_SUCCESS(status
);
436 status
= U_ZERO_ERROR
;
437 uregex_setText(re
, text1
, 5, &status
);
438 result
= uregex_lookingAt(re
, 0, &status
);
439 TEST_ASSERT(result
== FALSE
);
440 TEST_ASSERT_SUCCESS(status
);
442 status
= U_ZERO_ERROR
;
443 uregex_setText(re
, text1
, 6, &status
);
444 result
= uregex_lookingAt(re
, 0, &status
);
445 TEST_ASSERT(result
== TRUE
);
446 TEST_ASSERT_SUCCESS(status
);
461 u_uastrncpy(text1
, "abcccd", UPRV_LENGTHOF(text1
));
462 u_uastrncpy(text2
, "abcccxd", UPRV_LENGTHOF(text2
));
463 status
= U_ZERO_ERROR
;
464 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
465 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
467 uregex_setText(re
, text1
, -1, &status
);
468 result
= uregex_getText(re
, &textLength
, &status
);
469 TEST_ASSERT(result
== text1
);
470 TEST_ASSERT(textLength
== -1);
471 TEST_ASSERT_SUCCESS(status
);
473 status
= U_ZERO_ERROR
;
474 uregex_setText(re
, text2
, 7, &status
);
475 result
= uregex_getText(re
, &textLength
, &status
);
476 TEST_ASSERT(result
== text2
);
477 TEST_ASSERT(textLength
== 7);
478 TEST_ASSERT_SUCCESS(status
);
480 status
= U_ZERO_ERROR
;
481 uregex_setText(re
, text2
, 4, &status
);
482 result
= uregex_getText(re
, &textLength
, &status
);
483 TEST_ASSERT(result
== text2
);
484 TEST_ASSERT(textLength
== 4);
485 TEST_ASSERT_SUCCESS(status
);
496 UChar nullString
[] = {0,0,0};
498 u_uastrncpy(text1
, "abcccde", UPRV_LENGTHOF(text1
));
499 status
= U_ZERO_ERROR
;
500 u_uastrncpy(pat
, "abc*d", UPRV_LENGTHOF(pat
));
501 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
503 uregex_setText(re
, text1
, -1, &status
);
504 result
= uregex_matches(re
, 0, &status
);
505 TEST_ASSERT(result
== FALSE
);
506 TEST_ASSERT_SUCCESS(status
);
508 status
= U_ZERO_ERROR
;
509 uregex_setText(re
, text1
, 6, &status
);
510 result
= uregex_matches(re
, 0, &status
);
511 TEST_ASSERT(result
== TRUE
);
512 TEST_ASSERT_SUCCESS(status
);
514 status
= U_ZERO_ERROR
;
515 uregex_setText(re
, text1
, 6, &status
);
516 result
= uregex_matches(re
, 1, &status
);
517 TEST_ASSERT(result
== FALSE
);
518 TEST_ASSERT_SUCCESS(status
);
521 status
= U_ZERO_ERROR
;
522 re
= uregex_openC(".?", 0, NULL
, &status
);
523 uregex_setText(re
, text1
, -1, &status
);
524 len
= u_strlen(text1
);
525 result
= uregex_matches(re
, len
, &status
);
526 TEST_ASSERT(result
== TRUE
);
527 TEST_ASSERT_SUCCESS(status
);
529 status
= U_ZERO_ERROR
;
530 uregex_setText(re
, nullString
, -1, &status
);
531 TEST_ASSERT_SUCCESS(status
);
532 result
= uregex_matches(re
, 0, &status
);
533 TEST_ASSERT(result
== TRUE
);
534 TEST_ASSERT_SUCCESS(status
);
540 * lookingAt() Used in setText test.
545 * find(), findNext, start, end, reset
550 u_uastrncpy(text1
, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1
));
551 status
= U_ZERO_ERROR
;
552 re
= uregex_openC("rx", 0, NULL
, &status
);
554 uregex_setText(re
, text1
, -1, &status
);
555 result
= uregex_find(re
, 0, &status
);
556 TEST_ASSERT(result
== TRUE
);
557 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
558 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
559 TEST_ASSERT_SUCCESS(status
);
561 result
= uregex_find(re
, 9, &status
);
562 TEST_ASSERT(result
== TRUE
);
563 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
564 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
565 TEST_ASSERT_SUCCESS(status
);
567 result
= uregex_find(re
, 14, &status
);
568 TEST_ASSERT(result
== FALSE
);
569 TEST_ASSERT_SUCCESS(status
);
571 status
= U_ZERO_ERROR
;
572 uregex_reset(re
, 0, &status
);
574 result
= uregex_findNext(re
, &status
);
575 TEST_ASSERT(result
== TRUE
);
576 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
577 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
578 TEST_ASSERT_SUCCESS(status
);
580 result
= uregex_findNext(re
, &status
);
581 TEST_ASSERT(result
== TRUE
);
582 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
583 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
584 TEST_ASSERT_SUCCESS(status
);
586 status
= U_ZERO_ERROR
;
587 uregex_reset(re
, 12, &status
);
589 result
= uregex_findNext(re
, &status
);
590 TEST_ASSERT(result
== TRUE
);
591 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
592 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
593 TEST_ASSERT_SUCCESS(status
);
595 result
= uregex_findNext(re
, &status
);
596 TEST_ASSERT(result
== FALSE
);
597 TEST_ASSERT_SUCCESS(status
);
608 status
= U_ZERO_ERROR
;
609 re
= uregex_openC("abc", 0, NULL
, &status
);
610 result
= uregex_groupCount(re
, &status
);
611 TEST_ASSERT_SUCCESS(status
);
612 TEST_ASSERT(result
== 0);
615 status
= U_ZERO_ERROR
;
616 re
= uregex_openC("abc(def)(ghi(j))", 0, NULL
, &status
);
617 result
= uregex_groupCount(re
, &status
);
618 TEST_ASSERT_SUCCESS(status
);
619 TEST_ASSERT(result
== 3);
633 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1
));
635 status
= U_ZERO_ERROR
;
636 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
637 TEST_ASSERT_SUCCESS(status
);
640 uregex_setText(re
, text1
, -1, &status
);
641 result
= uregex_find(re
, 0, &status
);
642 TEST_ASSERT(result
==TRUE
);
644 /* Capture Group 0, the full match. Should succeed. */
645 status
= U_ZERO_ERROR
;
646 resultSz
= uregex_group(re
, 0, buf
, UPRV_LENGTHOF(buf
), &status
);
647 TEST_ASSERT_SUCCESS(status
);
648 TEST_ASSERT_STRING("abc interior def", buf
, TRUE
);
649 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
651 /* Capture group #1. Should succeed. */
652 status
= U_ZERO_ERROR
;
653 resultSz
= uregex_group(re
, 1, buf
, UPRV_LENGTHOF(buf
), &status
);
654 TEST_ASSERT_SUCCESS(status
);
655 TEST_ASSERT_STRING(" interior ", buf
, TRUE
);
656 TEST_ASSERT(resultSz
== (int32_t)strlen(" interior "));
658 /* Capture group out of range. Error. */
659 status
= U_ZERO_ERROR
;
660 uregex_group(re
, 2, buf
, UPRV_LENGTHOF(buf
), &status
);
661 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
663 /* NULL buffer, pure pre-flight */
664 status
= U_ZERO_ERROR
;
665 resultSz
= uregex_group(re
, 0, NULL
, 0, &status
);
666 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
667 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
669 /* Too small buffer, truncated string */
670 status
= U_ZERO_ERROR
;
671 memset(buf
, -1, sizeof(buf
));
672 resultSz
= uregex_group(re
, 0, buf
, 5, &status
);
673 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
674 TEST_ASSERT_STRING("abc i", buf
, FALSE
);
675 TEST_ASSERT(buf
[5] == (UChar
)0xffff);
676 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
678 /* Output string just fits buffer, no NUL term. */
679 status
= U_ZERO_ERROR
;
680 resultSz
= uregex_group(re
, 0, buf
, (int32_t)strlen("abc interior def"), &status
);
681 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
682 TEST_ASSERT_STRING("abc interior def", buf
, FALSE
);
683 TEST_ASSERT(resultSz
== (int32_t)strlen("abc interior def"));
684 TEST_ASSERT(buf
[strlen("abc interior def")] == (UChar
)0xffff);
695 /* SetRegion(), getRegion() do something */
696 TEST_SETUP(".*", "0123456789ABCDEF", 0)
697 UChar resultString
[40];
698 TEST_ASSERT(uregex_regionStart(re
, &status
) == 0);
699 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 16);
700 uregex_setRegion(re
, 3, 6, &status
);
701 TEST_ASSERT(uregex_regionStart(re
, &status
) == 3);
702 TEST_ASSERT(uregex_regionEnd(re
, &status
) == 6);
703 TEST_ASSERT(uregex_findNext(re
, &status
));
704 TEST_ASSERT(uregex_group(re
, 0, resultString
, UPRV_LENGTHOF(resultString
), &status
) == 3)
705 TEST_ASSERT_STRING("345", resultString
, TRUE
);
708 /* find(start=-1) uses regions */
709 TEST_SETUP(".*", "0123456789ABCDEF", 0);
710 uregex_setRegion(re
, 4, 6, &status
);
711 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
712 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
713 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
716 /* find (start >=0) does not use regions */
717 TEST_SETUP(".*", "0123456789ABCDEF", 0);
718 uregex_setRegion(re
, 4, 6, &status
);
719 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
720 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
721 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
724 /* findNext() obeys regions */
725 TEST_SETUP(".", "0123456789ABCDEF", 0);
726 uregex_setRegion(re
, 4, 6, &status
);
727 TEST_ASSERT(uregex_findNext(re
,&status
) == TRUE
);
728 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
729 TEST_ASSERT(uregex_findNext(re
, &status
) == TRUE
);
730 TEST_ASSERT(uregex_start(re
, 0, &status
) == 5);
731 TEST_ASSERT(uregex_findNext(re
, &status
) == FALSE
);
734 /* matches(start=-1) uses regions */
735 /* Also, verify that non-greedy *? succeeds in finding the full match. */
736 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
737 uregex_setRegion(re
, 4, 6, &status
);
738 TEST_ASSERT(uregex_matches(re
, -1, &status
) == TRUE
);
739 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
740 TEST_ASSERT(uregex_end(re
, 0, &status
) == 6);
743 /* matches (start >=0) does not use regions */
744 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
745 uregex_setRegion(re
, 4, 6, &status
);
746 TEST_ASSERT(uregex_matches(re
, 0, &status
) == TRUE
);
747 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
748 TEST_ASSERT(uregex_end(re
, 0, &status
) == 16);
751 /* lookingAt(start=-1) uses regions */
752 /* Also, verify that non-greedy *? finds the first (shortest) match. */
753 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
754 uregex_setRegion(re
, 4, 6, &status
);
755 TEST_ASSERT(uregex_lookingAt(re
, -1, &status
) == TRUE
);
756 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
757 TEST_ASSERT(uregex_end(re
, 0, &status
) == 4);
760 /* lookingAt (start >=0) does not use regions */
761 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
762 uregex_setRegion(re
, 4, 6, &status
);
763 TEST_ASSERT(uregex_lookingAt(re
, 0, &status
) == TRUE
);
764 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
765 TEST_ASSERT(uregex_end(re
, 0, &status
) == 0);
769 TEST_SETUP("[a-f]*", "abcdefghij", 0);
770 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
771 TEST_ASSERT(uregex_hitEnd(re
, &status
) == FALSE
);
774 TEST_SETUP("[a-f]*", "abcdef", 0);
775 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
776 TEST_ASSERT(uregex_hitEnd(re
, &status
) == TRUE
);
780 TEST_SETUP("abcd", "abcd", 0);
781 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
782 TEST_ASSERT(uregex_requireEnd(re
, &status
) == FALSE
);
785 TEST_SETUP("abcd$", "abcd", 0);
786 TEST_ASSERT(uregex_find(re
, 0, &status
) == TRUE
);
787 TEST_ASSERT(uregex_requireEnd(re
, &status
) == TRUE
);
790 /* anchoringBounds */
791 TEST_SETUP("abc$", "abcdef", 0);
792 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == TRUE
);
793 uregex_useAnchoringBounds(re
, FALSE
, &status
);
794 TEST_ASSERT(uregex_hasAnchoringBounds(re
, &status
) == FALSE
);
796 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
);
797 uregex_useAnchoringBounds(re
, TRUE
, &status
);
798 uregex_setRegion(re
, 0, 3, &status
);
799 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
);
800 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
803 /* Transparent Bounds */
804 TEST_SETUP("abc(?=def)", "abcdef", 0);
805 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == FALSE
);
806 uregex_useTransparentBounds(re
, TRUE
, &status
);
807 TEST_ASSERT(uregex_hasTransparentBounds(re
, &status
) == TRUE
);
809 uregex_useTransparentBounds(re
, FALSE
, &status
);
810 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* No Region */
811 uregex_setRegion(re
, 0, 3, &status
);
812 TEST_ASSERT(uregex_find(re
, -1, &status
) == FALSE
); /* with region, opaque bounds */
813 uregex_useTransparentBounds(re
, TRUE
, &status
);
814 TEST_ASSERT(uregex_find(re
, -1, &status
) == TRUE
); /* with region, transparent bounds */
815 TEST_ASSERT(uregex_end(re
, 0, &status
) == 3);
828 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
829 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
830 u_uastrncpy(replText
, "<$1>", UPRV_LENGTHOF(replText
));
832 status
= U_ZERO_ERROR
;
833 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
834 TEST_ASSERT_SUCCESS(status
);
836 /* Normal case, with match */
837 uregex_setText(re
, text1
, -1, &status
);
838 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
839 TEST_ASSERT_SUCCESS(status
);
840 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, TRUE
);
841 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
843 /* No match. Text should copy to output with no changes. */
844 status
= U_ZERO_ERROR
;
845 uregex_setText(re
, text2
, -1, &status
);
846 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
847 TEST_ASSERT_SUCCESS(status
);
848 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
849 TEST_ASSERT(resultSz
== (int32_t)strlen("No match here."));
851 /* Match, output just fills buffer, no termination warning. */
852 status
= U_ZERO_ERROR
;
853 uregex_setText(re
, text1
, -1, &status
);
854 memset(buf
, -1, sizeof(buf
));
855 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, (int32_t)strlen("Replace <aa> x1x x...x."), &status
);
856 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
857 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
858 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
859 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
861 /* Do the replaceFirst again, without first resetting anything.
862 * Should give the same results.
864 status
= U_ZERO_ERROR
;
865 memset(buf
, -1, sizeof(buf
));
866 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, (int32_t)strlen("Replace <aa> x1x x...x."), &status
);
867 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
868 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf
, FALSE
);
869 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
870 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
872 /* NULL buffer, zero buffer length */
873 status
= U_ZERO_ERROR
;
874 resultSz
= uregex_replaceFirst(re
, replText
, -1, NULL
, 0, &status
);
875 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
876 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
878 /* Buffer too small by one */
879 status
= U_ZERO_ERROR
;
880 memset(buf
, -1, sizeof(buf
));
881 resultSz
= uregex_replaceFirst(re
, replText
, -1, buf
, (int32_t)strlen("Replace <aa> x1x x...x.")-1, &status
);
882 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
883 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf
, FALSE
);
884 TEST_ASSERT(resultSz
== (int32_t)strlen("Replace xaax x1x x...x."));
885 TEST_ASSERT(buf
[resultSz
] == (UChar
)0xffff);
895 UChar text1
[80]; /* "Replace xaax x1x x...x." */
896 UChar text2
[80]; /* "No match Here" */
897 UChar replText
[80]; /* "<$1>" */
898 UChar replText2
[80]; /* "<<$1>>" */
899 const char * pattern
= "x(.*?)x";
900 const char * expectedResult
= "Replace <aa> <1> <...>.";
901 const char * expectedResult2
= "Replace <<aa>> <<1>> <<...>>.";
904 int32_t expectedResultSize
;
905 int32_t expectedResultSize2
;
908 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
909 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
910 u_uastrncpy(replText
, "<$1>", UPRV_LENGTHOF(replText
));
911 u_uastrncpy(replText2
, "<<$1>>", UPRV_LENGTHOF(replText2
));
912 expectedResultSize
= (int32_t)strlen(expectedResult
);
913 expectedResultSize2
= (int32_t)strlen(expectedResult2
);
915 status
= U_ZERO_ERROR
;
916 re
= uregex_openC(pattern
, 0, NULL
, &status
);
917 TEST_ASSERT_SUCCESS(status
);
919 /* Normal case, with match */
920 uregex_setText(re
, text1
, -1, &status
);
921 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
922 TEST_ASSERT_SUCCESS(status
);
923 TEST_ASSERT_STRING(expectedResult
, buf
, TRUE
);
924 TEST_ASSERT(resultSize
== expectedResultSize
);
926 /* No match. Text should copy to output with no changes. */
927 status
= U_ZERO_ERROR
;
928 uregex_setText(re
, text2
, -1, &status
);
929 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, UPRV_LENGTHOF(buf
), &status
);
930 TEST_ASSERT_SUCCESS(status
);
931 TEST_ASSERT_STRING("No match here.", buf
, TRUE
);
932 TEST_ASSERT(resultSize
== u_strlen(text2
));
934 /* Match, output just fills buffer, no termination warning. */
935 status
= U_ZERO_ERROR
;
936 uregex_setText(re
, text1
, -1, &status
);
937 memset(buf
, -1, sizeof(buf
));
938 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, expectedResultSize
, &status
);
939 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
940 TEST_ASSERT_STRING(expectedResult
, buf
, FALSE
);
941 TEST_ASSERT(resultSize
== expectedResultSize
);
942 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
944 /* Do the replaceFirst again, without first resetting anything.
945 * Should give the same results.
947 status
= U_ZERO_ERROR
;
948 memset(buf
, -1, sizeof(buf
));
949 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, (int32_t)strlen("Replace xaax x1x x...x."), &status
);
950 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
951 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf
, FALSE
);
952 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
953 TEST_ASSERT(buf
[resultSize
] == (UChar
)0xffff);
955 /* NULL buffer, zero buffer length */
956 status
= U_ZERO_ERROR
;
957 resultSize
= uregex_replaceAll(re
, replText
, -1, NULL
, 0, &status
);
958 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
959 TEST_ASSERT(resultSize
== (int32_t)strlen("Replace <aa> <1> <...>."));
961 /* Buffer too small. Try every size, which will tickle edge cases
962 * in uregex_appendReplacement (used by replaceAll) */
963 for (i
=0; i
<expectedResultSize
; i
++) {
965 status
= U_ZERO_ERROR
;
966 memset(buf
, -1, sizeof(buf
));
967 resultSize
= uregex_replaceAll(re
, replText
, -1, buf
, i
, &status
);
968 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
969 strcpy(expected
, expectedResult
);
971 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
972 TEST_ASSERT(resultSize
== expectedResultSize
);
973 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
976 /* Buffer too small. Same as previous test, except this time the replacement
977 * text is longer than the match capture group, making the length of the complete
978 * replacement longer than the original string.
980 for (i
=0; i
<expectedResultSize2
; i
++) {
982 status
= U_ZERO_ERROR
;
983 memset(buf
, -1, sizeof(buf
));
984 resultSize
= uregex_replaceAll(re
, replText2
, -1, buf
, i
, &status
);
985 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
986 strcpy(expected
, expectedResult2
);
988 TEST_ASSERT_STRING(expected
, buf
, FALSE
);
989 TEST_ASSERT(resultSize
== expectedResultSize2
);
990 TEST_ASSERT(buf
[i
] == (UChar
)0xffff);
999 * appendReplacement()
1009 status
= U_ZERO_ERROR
;
1010 re
= uregex_openC(".*", 0, 0, &status
);
1011 TEST_ASSERT_SUCCESS(status
);
1013 u_uastrncpy(text
, "whatever", UPRV_LENGTHOF(text
));
1014 u_uastrncpy(repl
, "some other", UPRV_LENGTHOF(repl
));
1015 uregex_setText(re
, text
, -1, &status
);
1017 /* match covers whole target string */
1018 uregex_find(re
, 0, &status
);
1019 TEST_ASSERT_SUCCESS(status
);
1021 bufCap
= UPRV_LENGTHOF(buf
);
1022 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1023 TEST_ASSERT_SUCCESS(status
);
1024 TEST_ASSERT_STRING("some other", buf
, TRUE
);
1026 /* Match has \u \U escapes */
1027 uregex_find(re
, 0, &status
);
1028 TEST_ASSERT_SUCCESS(status
);
1030 bufCap
= UPRV_LENGTHOF(buf
);
1031 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl
));
1032 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1033 TEST_ASSERT_SUCCESS(status
);
1034 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
1036 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1037 status
= U_ZERO_ERROR
;
1038 uregex_find(re
, 0, &status
);
1039 TEST_ASSERT_SUCCESS(status
);
1041 status
= U_BUFFER_OVERFLOW_ERROR
;
1042 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, NULL
, &status
);
1043 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1050 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1057 UChar textToSplit
[80];
1062 int32_t requiredCapacity
;
1063 int32_t spaceNeeded
;
1066 u_uastrncpy(textToSplit
, "first : second: third", UPRV_LENGTHOF(textToSplit
));
1067 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1069 status
= U_ZERO_ERROR
;
1070 re
= uregex_openC(":", 0, NULL
, &status
);
1075 uregex_setText(re
, textToSplit
, -1, &status
);
1076 TEST_ASSERT_SUCCESS(status
);
1078 /* The TEST_ASSERT_SUCCESS call above should change too... */
1079 if (U_SUCCESS(status
)) {
1080 memset(fields
, -1, sizeof(fields
));
1082 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 10, &status
);
1083 TEST_ASSERT_SUCCESS(status
);
1085 /* The TEST_ASSERT_SUCCESS call above should change too... */
1086 if(U_SUCCESS(status
)) {
1087 TEST_ASSERT(numFields
== 3);
1088 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1089 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1090 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1091 TEST_ASSERT(fields
[3] == NULL
);
1093 spaceNeeded
= u_strlen(textToSplit
) -
1094 (numFields
- 1) + /* Field delimiters do not appear in output */
1095 numFields
; /* Each field gets a NUL terminator */
1097 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1104 /* Split with too few output strings available */
1105 status
= U_ZERO_ERROR
;
1106 re
= uregex_openC(":", 0, NULL
, &status
);
1107 uregex_setText(re
, textToSplit
, -1, &status
);
1108 TEST_ASSERT_SUCCESS(status
);
1110 /* The TEST_ASSERT_SUCCESS call above should change too... */
1111 if(U_SUCCESS(status
)) {
1112 memset(fields
, -1, sizeof(fields
));
1114 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 2, &status
);
1115 TEST_ASSERT_SUCCESS(status
);
1117 /* The TEST_ASSERT_SUCCESS call above should change too... */
1118 if(U_SUCCESS(status
)) {
1119 TEST_ASSERT(numFields
== 2);
1120 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1121 TEST_ASSERT_STRING(" second: third", fields
[1], TRUE
);
1122 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1124 spaceNeeded
= u_strlen(textToSplit
) -
1125 (numFields
- 1) + /* Field delimiters do not appear in output */
1126 numFields
; /* Each field gets a NUL terminator */
1128 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1130 /* Split with a range of output buffer sizes. */
1131 spaceNeeded
= u_strlen(textToSplit
) -
1132 (numFields
- 1) + /* Field delimiters do not appear in output */
1133 numFields
; /* Each field gets a NUL terminator */
1135 for (sz
=0; sz
< spaceNeeded
+1; sz
++) {
1136 memset(fields
, -1, sizeof(fields
));
1137 status
= U_ZERO_ERROR
;
1139 uregex_split(re
, buf
, sz
, &requiredCapacity
, fields
, 10, &status
);
1140 if (sz
>= spaceNeeded
) {
1141 TEST_ASSERT_SUCCESS(status
);
1142 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1143 TEST_ASSERT_STRING(" second", fields
[1], TRUE
);
1144 TEST_ASSERT_STRING(" third", fields
[2], TRUE
);
1146 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
1148 TEST_ASSERT(numFields
== 3);
1149 TEST_ASSERT(fields
[3] == NULL
);
1150 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1161 /* Split(), part 2. Patterns with capture groups. The capture group text
1162 * comes out as additional fields. */
1164 UChar textToSplit
[80];
1168 int32_t requiredCapacity
;
1169 int32_t spaceNeeded
;
1172 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit
));
1174 status
= U_ZERO_ERROR
;
1175 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
1177 uregex_setText(re
, textToSplit
, -1, &status
);
1178 TEST_ASSERT_SUCCESS(status
);
1180 /* The TEST_ASSERT_SUCCESS call above should change too... */
1181 if(U_SUCCESS(status
)) {
1182 memset(fields
, -1, sizeof(fields
));
1184 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 10, &status
);
1185 TEST_ASSERT_SUCCESS(status
);
1187 /* The TEST_ASSERT_SUCCESS call above should change too... */
1188 if(U_SUCCESS(status
)) {
1189 TEST_ASSERT(numFields
== 5);
1190 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1191 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1192 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1193 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1194 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1195 TEST_ASSERT(fields
[5] == NULL
);
1196 spaceNeeded
= (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1197 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1201 /* Split with too few output strings available (2) */
1202 status
= U_ZERO_ERROR
;
1203 memset(fields
, -1, sizeof(fields
));
1205 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 2, &status
);
1206 TEST_ASSERT_SUCCESS(status
);
1208 /* The TEST_ASSERT_SUCCESS call above should change too... */
1209 if(U_SUCCESS(status
)) {
1210 TEST_ASSERT(numFields
== 2);
1211 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1212 TEST_ASSERT_STRING(" second<tag-b> third", fields
[1], TRUE
);
1213 TEST_ASSERT(!memcmp(&fields
[2],&minus1
,sizeof(UChar
*)));
1215 spaceNeeded
= (int32_t)strlen("first . second<tag-b> third."); /* "." at NUL positions */
1216 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1219 /* Split with too few output strings available (3) */
1220 status
= U_ZERO_ERROR
;
1221 memset(fields
, -1, sizeof(fields
));
1223 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 3, &status
);
1224 TEST_ASSERT_SUCCESS(status
);
1226 /* The TEST_ASSERT_SUCCESS call above should change too... */
1227 if(U_SUCCESS(status
)) {
1228 TEST_ASSERT(numFields
== 3);
1229 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1230 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1231 TEST_ASSERT_STRING(" second<tag-b> third", fields
[2], TRUE
);
1232 TEST_ASSERT(!memcmp(&fields
[3],&minus1
,sizeof(UChar
*)));
1234 spaceNeeded
= (int32_t)strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1235 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1238 /* Split with just enough output strings available (5) */
1239 status
= U_ZERO_ERROR
;
1240 memset(fields
, -1, sizeof(fields
));
1242 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 5, &status
);
1243 TEST_ASSERT_SUCCESS(status
);
1245 /* The TEST_ASSERT_SUCCESS call above should change too... */
1246 if(U_SUCCESS(status
)) {
1247 TEST_ASSERT(numFields
== 5);
1248 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1249 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1250 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1251 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1252 TEST_ASSERT_STRING(" third", fields
[4], TRUE
);
1253 TEST_ASSERT(!memcmp(&fields
[5],&minus1
,sizeof(UChar
*)));
1255 spaceNeeded
= (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1256 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1259 /* Split, end of text is a field delimiter. */
1260 status
= U_ZERO_ERROR
;
1261 sz
= (int32_t)strlen("first <tag-a> second<tag-b>");
1262 uregex_setText(re
, textToSplit
, sz
, &status
);
1263 TEST_ASSERT_SUCCESS(status
);
1265 /* The TEST_ASSERT_SUCCESS call above should change too... */
1266 if(U_SUCCESS(status
)) {
1267 memset(fields
, -1, sizeof(fields
));
1269 uregex_split(re
, buf
, UPRV_LENGTHOF(buf
), &requiredCapacity
, fields
, 9, &status
);
1270 TEST_ASSERT_SUCCESS(status
);
1272 /* The TEST_ASSERT_SUCCESS call above should change too... */
1273 if(U_SUCCESS(status
)) {
1274 TEST_ASSERT(numFields
== 5);
1275 TEST_ASSERT_STRING("first ", fields
[0], TRUE
);
1276 TEST_ASSERT_STRING("tag-a", fields
[1], TRUE
);
1277 TEST_ASSERT_STRING(" second", fields
[2], TRUE
);
1278 TEST_ASSERT_STRING("tag-b", fields
[3], TRUE
);
1279 TEST_ASSERT_STRING("", fields
[4], TRUE
);
1280 TEST_ASSERT(fields
[5] == NULL
);
1281 TEST_ASSERT(fields
[8] == NULL
);
1282 TEST_ASSERT(!memcmp(&fields
[9],&minus1
,sizeof(UChar
*)));
1283 spaceNeeded
= (int32_t)strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1284 TEST_ASSERT(spaceNeeded
== requiredCapacity
);
1294 TEST_SETUP("abc$", "abcdef", 0);
1295 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 0);
1296 uregex_setTimeLimit(re
, 1000, &status
);
1297 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1298 TEST_ASSERT_SUCCESS(status
);
1299 uregex_setTimeLimit(re
, -1, &status
);
1300 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1301 status
= U_ZERO_ERROR
;
1302 TEST_ASSERT(uregex_getTimeLimit(re
, &status
) == 1000);
1306 * set/get Stack Limit
1308 TEST_SETUP("abc$", "abcdef", 0);
1309 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 8000000);
1310 uregex_setStackLimit(re
, 40000, &status
);
1311 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1312 TEST_ASSERT_SUCCESS(status
);
1313 uregex_setStackLimit(re
, -1, &status
);
1314 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
);
1315 status
= U_ZERO_ERROR
;
1316 TEST_ASSERT(uregex_getStackLimit(re
, &status
) == 40000);
1321 * Get/Set callback functions
1322 * This test is copied from intltest regex/Callbacks
1323 * The pattern and test data will run long enough to cause the callback
1324 * to be invoked. The nested '+' operators give exponential time
1325 * behavior with increasing string length.
1327 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1328 callBackContext cbInfo
= {4, 0, 0};
1329 const void *pContext
= &cbInfo
;
1330 URegexMatchCallback
*returnedFn
= &TestCallbackFn
;
1332 /* Getting the callback fn when it hasn't been set must return NULL */
1333 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1334 TEST_ASSERT_SUCCESS(status
);
1335 TEST_ASSERT(returnedFn
== NULL
);
1336 TEST_ASSERT(pContext
== NULL
);
1338 /* Set thecallback and do a match. */
1339 /* The callback function should record that it has been called. */
1340 uregex_setMatchCallback(re
, &TestCallbackFn
, &cbInfo
, &status
);
1341 TEST_ASSERT_SUCCESS(status
);
1342 TEST_ASSERT(cbInfo
.numCalls
== 0);
1343 TEST_ASSERT(uregex_matches(re
, -1, &status
) == FALSE
);
1344 TEST_ASSERT_SUCCESS(status
);
1345 TEST_ASSERT(cbInfo
.numCalls
> 0);
1347 /* Getting the callback should return the values that were set above. */
1348 uregex_getMatchCallback(re
, &returnedFn
, &pContext
, &status
);
1349 TEST_ASSERT(returnedFn
== &TestCallbackFn
);
1350 TEST_ASSERT(pContext
== &cbInfo
);
1357 static void TestBug4315(void) {
1358 UErrorCode theICUError
= U_ZERO_ERROR
;
1359 URegularExpression
*theRegEx
;
1361 const char *thePattern
;
1362 UChar theString
[100];
1363 UChar
*destFields
[24];
1364 int32_t neededLength1
;
1365 int32_t neededLength2
;
1367 int32_t wordCount
= 0;
1368 int32_t destFieldsSize
= 24;
1371 u_uastrcpy(theString
, "The quick brown fox jumped over the slow black turtle.");
1374 theRegEx
= uregex_openC(thePattern
, 0, NULL
, &theICUError
);
1375 TEST_ASSERT_SUCCESS(theICUError
);
1377 /* set the input string */
1378 uregex_setText(theRegEx
, theString
, u_strlen(theString
), &theICUError
);
1379 TEST_ASSERT_SUCCESS(theICUError
);
1382 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1384 wordCount
= uregex_split(theRegEx
, NULL
, 0, &neededLength1
, destFields
,
1385 destFieldsSize
, &theICUError
);
1387 TEST_ASSERT(theICUError
== U_BUFFER_OVERFLOW_ERROR
);
1388 TEST_ASSERT(wordCount
==3);
1390 if(theICUError
== U_BUFFER_OVERFLOW_ERROR
)
1392 theICUError
= U_ZERO_ERROR
;
1393 textBuff
= (UChar
*) malloc(sizeof(UChar
) * (neededLength1
+ 1));
1394 wordCount
= uregex_split(theRegEx
, textBuff
, neededLength1
+1, &neededLength2
,
1395 destFields
, destFieldsSize
, &theICUError
);
1396 TEST_ASSERT(wordCount
==3);
1397 TEST_ASSERT_SUCCESS(theICUError
);
1398 TEST_ASSERT(neededLength1
== neededLength2
);
1399 TEST_ASSERT_STRING("The qui", destFields
[0], TRUE
);
1400 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields
[1], TRUE
);
1401 TEST_ASSERT_STRING("turtle.", destFields
[2], TRUE
);
1402 TEST_ASSERT(destFields
[3] == NULL
);
1405 uregex_close(theRegEx
);
1408 /* Based on TestRegexCAPI() */
1409 static void TestUTextAPI(void) {
1410 UErrorCode status
= U_ZERO_ERROR
;
1411 URegularExpression
*re
;
1412 UText patternText
= UTEXT_INITIALIZER
;
1414 const char patternTextUTF8
[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1416 /* Mimimalist open/close */
1417 utext_openUTF8(&patternText
, patternTextUTF8
, -1, &status
);
1418 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1419 if (U_FAILURE(status
)) {
1420 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__
, __LINE__
, u_errorName(status
));
1421 utext_close(&patternText
);
1426 /* Open with all flag values set */
1427 status
= U_ZERO_ERROR
;
1428 re
= uregex_openUText(&patternText
,
1429 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
,
1431 TEST_ASSERT_SUCCESS(status
);
1434 /* Open with an invalid flag */
1435 status
= U_ZERO_ERROR
;
1436 re
= uregex_openUText(&patternText
, 0x40000000, 0, &status
);
1437 TEST_ASSERT(status
== U_REGEX_INVALID_FLAG
);
1440 /* open with an invalid parameter */
1441 status
= U_ZERO_ERROR
;
1442 re
= uregex_openUText(NULL
,
1443 UREGEX_CASE_INSENSITIVE
| UREGEX_COMMENTS
| UREGEX_DOTALL
| UREGEX_MULTILINE
| UREGEX_UWORD
, 0, &status
);
1444 TEST_ASSERT(status
== U_ILLEGAL_ARGUMENT_ERROR
&& re
== NULL
);
1450 URegularExpression
*clone1
;
1451 URegularExpression
*clone2
;
1452 URegularExpression
*clone3
;
1453 UChar testString1
[30];
1454 UChar testString2
[30];
1458 status
= U_ZERO_ERROR
;
1459 re
= uregex_openUText(&patternText
, 0, 0, &status
);
1460 TEST_ASSERT_SUCCESS(status
);
1461 clone1
= uregex_clone(re
, &status
);
1462 TEST_ASSERT_SUCCESS(status
);
1463 TEST_ASSERT(clone1
!= NULL
);
1465 status
= U_ZERO_ERROR
;
1466 clone2
= uregex_clone(re
, &status
);
1467 TEST_ASSERT_SUCCESS(status
);
1468 TEST_ASSERT(clone2
!= NULL
);
1471 status
= U_ZERO_ERROR
;
1472 clone3
= uregex_clone(clone2
, &status
);
1473 TEST_ASSERT_SUCCESS(status
);
1474 TEST_ASSERT(clone3
!= NULL
);
1476 u_uastrncpy(testString1
, "abcccd", UPRV_LENGTHOF(pat
));
1477 u_uastrncpy(testString2
, "xxxabcccd", UPRV_LENGTHOF(pat
));
1479 status
= U_ZERO_ERROR
;
1480 uregex_setText(clone1
, testString1
, -1, &status
);
1481 TEST_ASSERT_SUCCESS(status
);
1482 result
= uregex_lookingAt(clone1
, 0, &status
);
1483 TEST_ASSERT_SUCCESS(status
);
1484 TEST_ASSERT(result
==TRUE
);
1486 status
= U_ZERO_ERROR
;
1487 uregex_setText(clone2
, testString2
, -1, &status
);
1488 TEST_ASSERT_SUCCESS(status
);
1489 result
= uregex_lookingAt(clone2
, 0, &status
);
1490 TEST_ASSERT_SUCCESS(status
);
1491 TEST_ASSERT(result
==FALSE
);
1492 result
= uregex_find(clone2
, 0, &status
);
1493 TEST_ASSERT_SUCCESS(status
);
1494 TEST_ASSERT(result
==TRUE
);
1496 uregex_close(clone1
);
1497 uregex_close(clone2
);
1498 uregex_close(clone3
);
1503 * pattern() and patternText()
1506 const UChar
*resultPat
;
1509 const char str_hello
[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1510 const char str_hel
[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1511 u_uastrncpy(pat
, "hello", UPRV_LENGTHOF(pat
)); /* for comparison */
1512 status
= U_ZERO_ERROR
;
1514 utext_openUTF8(&patternText
, str_hello
, -1, &status
);
1515 re
= uregex_open(pat
, -1, 0, NULL
, &status
);
1516 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1517 TEST_ASSERT_SUCCESS(status
);
1519 /* The TEST_ASSERT_SUCCESS above should change too... */
1520 if (U_SUCCESS(status
)) {
1521 TEST_ASSERT(resultLen
== -1);
1522 TEST_ASSERT(u_strcmp(resultPat
, pat
) == 0);
1525 resultText
= uregex_patternUText(re
, &status
);
1526 TEST_ASSERT_SUCCESS(status
);
1527 TEST_ASSERT_UTEXT(str_hello
, resultText
);
1531 status
= U_ZERO_ERROR
;
1532 re
= uregex_open(pat
, 3, 0, NULL
, &status
);
1533 resultPat
= uregex_pattern(re
, &resultLen
, &status
);
1534 TEST_ASSERT_SUCCESS(status
);
1536 /* The TEST_ASSERT_SUCCESS above should change too... */
1537 if (U_SUCCESS(status
)) {
1538 TEST_ASSERT(resultLen
== 3);
1539 TEST_ASSERT(u_strncmp(resultPat
, pat
, 3) == 0);
1540 TEST_ASSERT(u_strlen(resultPat
) == 3);
1543 resultText
= uregex_patternUText(re
, &status
);
1544 TEST_ASSERT_SUCCESS(status
);
1545 TEST_ASSERT_UTEXT(str_hel
, resultText
);
1551 * setUText() and lookingAt()
1554 UText text1
= UTEXT_INITIALIZER
;
1555 UText text2
= UTEXT_INITIALIZER
;
1557 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1558 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1559 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1560 status
= U_ZERO_ERROR
;
1561 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1562 utext_openUTF8(&text2
, str_abcccxd
, -1, &status
);
1564 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1565 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1566 TEST_ASSERT_SUCCESS(status
);
1568 /* Operation before doing a setText should fail... */
1569 status
= U_ZERO_ERROR
;
1570 uregex_lookingAt(re
, 0, &status
);
1571 TEST_ASSERT( status
== U_REGEX_INVALID_STATE
);
1573 status
= U_ZERO_ERROR
;
1574 uregex_setUText(re
, &text1
, &status
);
1575 result
= uregex_lookingAt(re
, 0, &status
);
1576 TEST_ASSERT(result
== TRUE
);
1577 TEST_ASSERT_SUCCESS(status
);
1579 status
= U_ZERO_ERROR
;
1580 uregex_setUText(re
, &text2
, &status
);
1581 result
= uregex_lookingAt(re
, 0, &status
);
1582 TEST_ASSERT(result
== FALSE
);
1583 TEST_ASSERT_SUCCESS(status
);
1585 status
= U_ZERO_ERROR
;
1586 uregex_setUText(re
, &text1
, &status
);
1587 result
= uregex_lookingAt(re
, 0, &status
);
1588 TEST_ASSERT(result
== TRUE
);
1589 TEST_ASSERT_SUCCESS(status
);
1592 utext_close(&text1
);
1593 utext_close(&text2
);
1598 * getText() and getUText()
1601 UText text1
= UTEXT_INITIALIZER
;
1602 UText text2
= UTEXT_INITIALIZER
;
1603 UChar text2Chars
[20];
1605 const UChar
*result
;
1607 const char str_abcccd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1608 const char str_abcccxd
[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1609 const char str_abcd
[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1612 status
= U_ZERO_ERROR
;
1613 utext_openUTF8(&text1
, str_abcccd
, -1, &status
);
1614 u_uastrncpy(text2Chars
, str_abcccxd
, UPRV_LENGTHOF(text2Chars
));
1615 utext_openUChars(&text2
, text2Chars
, -1, &status
);
1617 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1618 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1620 /* First set a UText */
1621 uregex_setUText(re
, &text1
, &status
);
1622 resultText
= uregex_getUText(re
, NULL
, &status
);
1623 TEST_ASSERT_SUCCESS(status
);
1624 TEST_ASSERT(resultText
!= &text1
);
1625 utext_setNativeIndex(resultText
, 0);
1626 utext_setNativeIndex(&text1
, 0);
1627 TEST_ASSERT(testUTextEqual(resultText
, &text1
));
1628 utext_close(resultText
);
1630 result
= uregex_getText(re
, &textLength
, &status
); /* flattens UText into buffer */
1631 (void)result
; /* Suppress set but not used warning. */
1632 TEST_ASSERT(textLength
== -1 || textLength
== 6);
1633 resultText
= uregex_getUText(re
, NULL
, &status
);
1634 TEST_ASSERT_SUCCESS(status
);
1635 TEST_ASSERT(resultText
!= &text1
);
1636 utext_setNativeIndex(resultText
, 0);
1637 utext_setNativeIndex(&text1
, 0);
1638 TEST_ASSERT(testUTextEqual(resultText
, &text1
));
1639 utext_close(resultText
);
1641 /* Then set a UChar * */
1642 uregex_setText(re
, text2Chars
, 7, &status
);
1643 resultText
= uregex_getUText(re
, NULL
, &status
);
1644 TEST_ASSERT_SUCCESS(status
);
1645 utext_setNativeIndex(resultText
, 0);
1646 utext_setNativeIndex(&text2
, 0);
1647 TEST_ASSERT(testUTextEqual(resultText
, &text2
));
1648 utext_close(resultText
);
1649 result
= uregex_getText(re
, &textLength
, &status
);
1650 TEST_ASSERT(textLength
== 7);
1653 utext_close(&text1
);
1654 utext_close(&text2
);
1661 UText text1
= UTEXT_INITIALIZER
;
1663 UText nullText
= UTEXT_INITIALIZER
;
1664 const char str_abcccde
[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1665 const char str_abcd
[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1667 status
= U_ZERO_ERROR
;
1668 utext_openUTF8(&text1
, str_abcccde
, -1, &status
);
1669 utext_openUTF8(&patternText
, str_abcd
, -1, &status
);
1670 re
= uregex_openUText(&patternText
, 0, NULL
, &status
);
1672 uregex_setUText(re
, &text1
, &status
);
1673 result
= uregex_matches(re
, 0, &status
);
1674 TEST_ASSERT(result
== FALSE
);
1675 TEST_ASSERT_SUCCESS(status
);
1678 status
= U_ZERO_ERROR
;
1679 re
= uregex_openC(".?", 0, NULL
, &status
);
1680 uregex_setUText(re
, &text1
, &status
);
1681 result
= uregex_matches(re
, 7, &status
);
1682 TEST_ASSERT(result
== TRUE
);
1683 TEST_ASSERT_SUCCESS(status
);
1685 status
= U_ZERO_ERROR
;
1686 utext_openUTF8(&nullText
, "", -1, &status
);
1687 uregex_setUText(re
, &nullText
, &status
);
1688 TEST_ASSERT_SUCCESS(status
);
1689 result
= uregex_matches(re
, 0, &status
);
1690 TEST_ASSERT(result
== TRUE
);
1691 TEST_ASSERT_SUCCESS(status
);
1694 utext_close(&text1
);
1695 utext_close(&nullText
);
1700 * lookingAt() Used in setText test.
1705 * find(), findNext, start, end, reset
1710 u_uastrncpy(text1
, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1
));
1711 status
= U_ZERO_ERROR
;
1712 re
= uregex_openC("rx", 0, NULL
, &status
);
1714 uregex_setText(re
, text1
, -1, &status
);
1715 result
= uregex_find(re
, 0, &status
);
1716 TEST_ASSERT(result
== TRUE
);
1717 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1718 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1719 TEST_ASSERT_SUCCESS(status
);
1721 result
= uregex_find(re
, 9, &status
);
1722 TEST_ASSERT(result
== TRUE
);
1723 TEST_ASSERT(uregex_start(re
, 0, &status
) == 11);
1724 TEST_ASSERT(uregex_end(re
, 0, &status
) == 13);
1725 TEST_ASSERT_SUCCESS(status
);
1727 result
= uregex_find(re
, 14, &status
);
1728 TEST_ASSERT(result
== FALSE
);
1729 TEST_ASSERT_SUCCESS(status
);
1731 status
= U_ZERO_ERROR
;
1732 uregex_reset(re
, 0, &status
);
1734 result
= uregex_findNext(re
, &status
);
1735 TEST_ASSERT(result
== TRUE
);
1736 TEST_ASSERT(uregex_start(re
, 0, &status
) == 3);
1737 TEST_ASSERT(uregex_end(re
, 0, &status
) == 5);
1738 TEST_ASSERT_SUCCESS(status
);
1740 result
= uregex_findNext(re
, &status
);
1741 TEST_ASSERT(result
== TRUE
);
1742 TEST_ASSERT(uregex_start(re
, 0, &status
) == 6);
1743 TEST_ASSERT(uregex_end(re
, 0, &status
) == 8);
1744 TEST_ASSERT_SUCCESS(status
);
1746 status
= U_ZERO_ERROR
;
1747 uregex_reset(re
, 12, &status
);
1749 result
= uregex_findNext(re
, &status
);
1750 TEST_ASSERT(result
== TRUE
);
1751 TEST_ASSERT(uregex_start(re
, 0, &status
) == 13);
1752 TEST_ASSERT(uregex_end(re
, 0, &status
) == 15);
1753 TEST_ASSERT_SUCCESS(status
);
1755 result
= uregex_findNext(re
, &status
);
1756 TEST_ASSERT(result
== FALSE
);
1757 TEST_ASSERT_SUCCESS(status
);
1769 int64_t groupLen
= 0;
1772 u_uastrncpy(text1
, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1
));
1774 status
= U_ZERO_ERROR
;
1775 re
= uregex_openC("abc(.*?)def", 0, NULL
, &status
);
1776 TEST_ASSERT_SUCCESS(status
);
1778 uregex_setText(re
, text1
, -1, &status
);
1779 result
= uregex_find(re
, 0, &status
);
1780 TEST_ASSERT(result
==TRUE
);
1782 /* Capture Group 0 with shallow clone API. Should succeed. */
1783 status
= U_ZERO_ERROR
;
1784 actual
= uregex_groupUText(re
, 0, NULL
, &groupLen
, &status
);
1785 TEST_ASSERT_SUCCESS(status
);
1787 TEST_ASSERT(utext_getNativeIndex(actual
) == 6); /* index of "abc " within "noise abc ..." */
1788 TEST_ASSERT(groupLen
== 16); /* length of "abc interior def" */
1789 utext_extract(actual
, 6 /*start index */, 6+16 /*limit index*/, groupBuf
, sizeof(groupBuf
), &status
);
1791 TEST_ASSERT_STRING("abc interior def", groupBuf
, TRUE
);
1792 utext_close(actual
);
1794 /* Capture group #1. Should succeed. */
1795 status
= U_ZERO_ERROR
;
1797 actual
= uregex_groupUText(re
, 1, NULL
, &groupLen
, &status
);
1798 TEST_ASSERT_SUCCESS(status
);
1799 TEST_ASSERT(9 == utext_getNativeIndex(actual
)); /* index of " interior " within "noise abc interior def ... " */
1800 /* (within the string text1) */
1801 TEST_ASSERT(10 == groupLen
); /* length of " interior " */
1802 utext_extract(actual
, 9 /*start index*/, 9+10 /*limit index*/, groupBuf
, sizeof(groupBuf
), &status
);
1803 TEST_ASSERT_STRING(" interior ", groupBuf
, TRUE
);
1805 utext_close(actual
);
1807 /* Capture group out of range. Error. */
1808 status
= U_ZERO_ERROR
;
1809 actual
= uregex_groupUText(re
, 2, NULL
, &groupLen
, &status
);
1810 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
1811 utext_close(actual
);
1822 UText replText
= UTEXT_INITIALIZER
;
1824 const char str_Replxxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1825 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1826 const char str_u00411U00000042a
[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1827 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1828 const char str_1x
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1829 const char str_ReplaceAaaBax1xxx
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1830 status
= U_ZERO_ERROR
;
1831 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
1832 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1833 utext_openUTF8(&replText
, str_1x
, -1, &status
);
1835 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1836 TEST_ASSERT_SUCCESS(status
);
1838 /* Normal case, with match */
1839 uregex_setText(re
, text1
, -1, &status
);
1840 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1841 TEST_ASSERT_SUCCESS(status
);
1842 TEST_ASSERT_UTEXT(str_Replxxx
, result
);
1843 utext_close(result
);
1845 /* No match. Text should copy to output with no changes. */
1846 uregex_setText(re
, text2
, -1, &status
);
1847 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1848 TEST_ASSERT_SUCCESS(status
);
1849 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1850 utext_close(result
);
1852 /* Unicode escapes */
1853 uregex_setText(re
, text1
, -1, &status
);
1854 utext_openUTF8(&replText
, str_u00411U00000042a
, -1, &status
);
1855 result
= uregex_replaceFirstUText(re
, &replText
, NULL
, &status
);
1856 TEST_ASSERT_SUCCESS(status
);
1857 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx
, result
);
1858 utext_close(result
);
1861 utext_close(&replText
);
1871 UText replText
= UTEXT_INITIALIZER
;
1873 const char str_1
[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1874 const char str_Replaceaa1
[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1875 const char str_Nomatchhere
[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1876 status
= U_ZERO_ERROR
;
1877 u_uastrncpy(text1
, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1
));
1878 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1879 utext_openUTF8(&replText
, str_1
, -1, &status
);
1881 re
= uregex_openC("x(.*?)x", 0, NULL
, &status
);
1882 TEST_ASSERT_SUCCESS(status
);
1884 /* Normal case, with match */
1885 uregex_setText(re
, text1
, -1, &status
);
1886 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1887 TEST_ASSERT_SUCCESS(status
);
1888 TEST_ASSERT_UTEXT(str_Replaceaa1
, result
);
1889 utext_close(result
);
1891 /* No match. Text should copy to output with no changes. */
1892 uregex_setText(re
, text2
, -1, &status
);
1893 result
= uregex_replaceAllUText(re
, &replText
, NULL
, &status
);
1894 TEST_ASSERT_SUCCESS(status
);
1895 TEST_ASSERT_UTEXT(str_Nomatchhere
, result
);
1896 utext_close(result
);
1899 utext_close(&replText
);
1904 * appendReplacement()
1913 status
= U_ZERO_ERROR
;
1914 re
= uregex_openC(".*", 0, 0, &status
);
1915 TEST_ASSERT_SUCCESS(status
);
1917 u_uastrncpy(text
, "whatever", UPRV_LENGTHOF(text
));
1918 u_uastrncpy(repl
, "some other", UPRV_LENGTHOF(repl
));
1919 uregex_setText(re
, text
, -1, &status
);
1921 /* match covers whole target string */
1922 uregex_find(re
, 0, &status
);
1923 TEST_ASSERT_SUCCESS(status
);
1925 bufCap
= UPRV_LENGTHOF(buf
);
1926 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1927 TEST_ASSERT_SUCCESS(status
);
1928 TEST_ASSERT_STRING("some other", buf
, TRUE
);
1930 /* Match has \u \U escapes */
1931 uregex_find(re
, 0, &status
);
1932 TEST_ASSERT_SUCCESS(status
);
1934 bufCap
= UPRV_LENGTHOF(buf
);
1935 u_uastrncpy(repl
, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl
));
1936 uregex_appendReplacement(re
, repl
, -1, &bufPtr
, &bufCap
, &status
);
1937 TEST_ASSERT_SUCCESS(status
);
1938 TEST_ASSERT_STRING("abcAB \\ $ abc", buf
, TRUE
);
1945 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1952 UChar textToSplit
[80];
1958 u_uastrncpy(textToSplit
, "first : second: third", UPRV_LENGTHOF(textToSplit
));
1959 u_uastrncpy(text2
, "No match here.", UPRV_LENGTHOF(text2
));
1961 status
= U_ZERO_ERROR
;
1962 re
= uregex_openC(":", 0, NULL
, &status
);
1967 uregex_setText(re
, textToSplit
, -1, &status
);
1968 TEST_ASSERT_SUCCESS(status
);
1970 /* The TEST_ASSERT_SUCCESS call above should change too... */
1971 if (U_SUCCESS(status
)) {
1972 memset(fields
, 0, sizeof(fields
));
1973 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
1974 TEST_ASSERT_SUCCESS(status
);
1976 /* The TEST_ASSERT_SUCCESS call above should change too... */
1977 if(U_SUCCESS(status
)) {
1978 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1979 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1980 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1981 TEST_ASSERT(numFields
== 3);
1982 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
1983 TEST_ASSERT_UTEXT(str_second
, fields
[1]);
1984 TEST_ASSERT_UTEXT(str_third
, fields
[2]);
1985 TEST_ASSERT(fields
[3] == NULL
);
1987 for(i
= 0; i
< numFields
; i
++) {
1988 utext_close(fields
[i
]);
1995 /* Split with too few output strings available */
1996 status
= U_ZERO_ERROR
;
1997 re
= uregex_openC(":", 0, NULL
, &status
);
1998 uregex_setText(re
, textToSplit
, -1, &status
);
1999 TEST_ASSERT_SUCCESS(status
);
2001 /* The TEST_ASSERT_SUCCESS call above should change too... */
2002 if(U_SUCCESS(status
)) {
2005 fields
[2] = &patternText
;
2006 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
2007 TEST_ASSERT_SUCCESS(status
);
2009 /* The TEST_ASSERT_SUCCESS call above should change too... */
2010 if(U_SUCCESS(status
)) {
2011 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2012 const char str_secondthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2013 TEST_ASSERT(numFields
== 2);
2014 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2015 TEST_ASSERT_UTEXT(str_secondthird
, fields
[1]);
2016 TEST_ASSERT(fields
[2] == &patternText
);
2018 for(i
= 0; i
< numFields
; i
++) {
2019 utext_close(fields
[i
]);
2026 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2027 * comes out as additional fields. */
2029 UChar textToSplit
[80];
2034 u_uastrncpy(textToSplit
, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit
));
2036 status
= U_ZERO_ERROR
;
2037 re
= uregex_openC("<(.*?)>", 0, NULL
, &status
);
2039 uregex_setText(re
, textToSplit
, -1, &status
);
2040 TEST_ASSERT_SUCCESS(status
);
2042 /* The TEST_ASSERT_SUCCESS call above should change too... */
2043 if(U_SUCCESS(status
)) {
2044 memset(fields
, 0, sizeof(fields
));
2045 numFields
= uregex_splitUText(re
, fields
, 10, &status
);
2046 TEST_ASSERT_SUCCESS(status
);
2048 /* The TEST_ASSERT_SUCCESS call above should change too... */
2049 if(U_SUCCESS(status
)) {
2050 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2051 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2052 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2053 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2054 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2056 TEST_ASSERT(numFields
== 5);
2057 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2058 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2059 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2060 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2061 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2062 TEST_ASSERT(fields
[5] == NULL
);
2064 for(i
= 0; i
< numFields
; i
++) {
2065 utext_close(fields
[i
]);
2069 /* Split with too few output strings available (2) */
2070 status
= U_ZERO_ERROR
;
2073 fields
[2] = &patternText
;
2074 numFields
= uregex_splitUText(re
, fields
, 2, &status
);
2075 TEST_ASSERT_SUCCESS(status
);
2077 /* The TEST_ASSERT_SUCCESS call above should change too... */
2078 if(U_SUCCESS(status
)) {
2079 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2080 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2081 TEST_ASSERT(numFields
== 2);
2082 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2083 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[1]);
2084 TEST_ASSERT(fields
[2] == &patternText
);
2086 for(i
= 0; i
< numFields
; i
++) {
2087 utext_close(fields
[i
]);
2091 /* Split with too few output strings available (3) */
2092 status
= U_ZERO_ERROR
;
2096 fields
[3] = &patternText
;
2097 numFields
= uregex_splitUText(re
, fields
, 3, &status
);
2098 TEST_ASSERT_SUCCESS(status
);
2100 /* The TEST_ASSERT_SUCCESS call above should change too... */
2101 if(U_SUCCESS(status
)) {
2102 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2103 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2104 const char str_secondtagbthird
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2105 TEST_ASSERT(numFields
== 3);
2106 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2107 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2108 TEST_ASSERT_UTEXT(str_secondtagbthird
, fields
[2]);
2109 TEST_ASSERT(fields
[3] == &patternText
);
2111 for(i
= 0; i
< numFields
; i
++) {
2112 utext_close(fields
[i
]);
2115 /* Split with just enough output strings available (5) */
2116 status
= U_ZERO_ERROR
;
2122 fields
[5] = &patternText
;
2123 numFields
= uregex_splitUText(re
, fields
, 5, &status
);
2124 TEST_ASSERT_SUCCESS(status
);
2126 /* The TEST_ASSERT_SUCCESS call above should change too... */
2127 if(U_SUCCESS(status
)) {
2128 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2129 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2130 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2131 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2132 const char str_third
[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2134 TEST_ASSERT(numFields
== 5);
2135 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2136 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2137 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2138 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2139 TEST_ASSERT_UTEXT(str_third
, fields
[4]);
2140 TEST_ASSERT(fields
[5] == &patternText
);
2142 for(i
= 0; i
< numFields
; i
++) {
2143 utext_close(fields
[i
]);
2146 /* Split, end of text is a field delimiter. */
2147 status
= U_ZERO_ERROR
;
2148 uregex_setText(re
, textToSplit
, (int32_t)strlen("first <tag-a> second<tag-b>"), &status
);
2149 TEST_ASSERT_SUCCESS(status
);
2151 /* The TEST_ASSERT_SUCCESS call above should change too... */
2152 if(U_SUCCESS(status
)) {
2153 memset(fields
, 0, sizeof(fields
));
2154 fields
[9] = &patternText
;
2155 numFields
= uregex_splitUText(re
, fields
, 9, &status
);
2156 TEST_ASSERT_SUCCESS(status
);
2158 /* The TEST_ASSERT_SUCCESS call above should change too... */
2159 if(U_SUCCESS(status
)) {
2160 const char str_first
[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2161 const char str_taga
[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2162 const char str_second
[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2163 const char str_tagb
[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2164 const char str_empty
[] = { 0x00 };
2166 TEST_ASSERT(numFields
== 5);
2167 TEST_ASSERT_UTEXT(str_first
, fields
[0]);
2168 TEST_ASSERT_UTEXT(str_taga
, fields
[1]);
2169 TEST_ASSERT_UTEXT(str_second
, fields
[2]);
2170 TEST_ASSERT_UTEXT(str_tagb
, fields
[3]);
2171 TEST_ASSERT_UTEXT(str_empty
, fields
[4]);
2172 TEST_ASSERT(fields
[5] == NULL
);
2173 TEST_ASSERT(fields
[8] == NULL
);
2174 TEST_ASSERT(fields
[9] == &patternText
);
2176 for(i
= 0; i
< numFields
; i
++) {
2177 utext_close(fields
[i
]);
2183 utext_close(&patternText
);
2187 static void TestRefreshInput(void) {
2189 * RefreshInput changes out the input of a URegularExpression without
2190 * changing anything else in the match state. Used with Java JNI,
2191 * when Java moves the underlying string storage. This test
2192 * runs a find() loop, moving the text after the first match.
2193 * The right number of matches should still be found.
2195 UChar testStr
[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2196 UChar movedStr
[] = { 0, 0, 0, 0, 0, 0};
2197 UErrorCode status
= U_ZERO_ERROR
;
2198 URegularExpression
*re
;
2199 UText ut1
= UTEXT_INITIALIZER
;
2200 UText ut2
= UTEXT_INITIALIZER
;
2202 re
= uregex_openC("[ABC]", 0, 0, &status
);
2203 TEST_ASSERT_SUCCESS(status
);
2205 utext_openUChars(&ut1
, testStr
, -1, &status
);
2206 TEST_ASSERT_SUCCESS(status
);
2207 uregex_setUText(re
, &ut1
, &status
);
2208 TEST_ASSERT_SUCCESS(status
);
2210 /* Find the first match "A" in the original string */
2211 TEST_ASSERT(uregex_findNext(re
, &status
));
2212 TEST_ASSERT(uregex_start(re
, 0, &status
) == 0);
2214 /* Move the string, kill the original string. */
2215 u_strcpy(movedStr
, testStr
);
2216 u_memset(testStr
, 0, u_strlen(testStr
));
2217 utext_openUChars(&ut2
, movedStr
, -1, &status
);
2218 TEST_ASSERT_SUCCESS(status
);
2219 uregex_refreshUText(re
, &ut2
, &status
);
2220 TEST_ASSERT_SUCCESS(status
);
2222 /* Find the following two matches, now working in the moved string. */
2223 TEST_ASSERT(uregex_findNext(re
, &status
));
2224 TEST_ASSERT(uregex_start(re
, 0, &status
) == 2);
2225 TEST_ASSERT(uregex_findNext(re
, &status
));
2226 TEST_ASSERT(uregex_start(re
, 0, &status
) == 4);
2227 TEST_ASSERT(FALSE
== uregex_findNext(re
, &status
));
2233 static void TestBug8421(void) {
2234 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2237 URegularExpression
*re
;
2238 UErrorCode status
= U_ZERO_ERROR
;
2241 re
= uregex_openC("abc", 0, 0, &status
);
2242 TEST_ASSERT_SUCCESS(status
);
2244 limit
= uregex_getTimeLimit(re
, &status
);
2245 TEST_ASSERT_SUCCESS(status
);
2246 TEST_ASSERT(limit
== 0);
2248 uregex_setTimeLimit(re
, 100, &status
);
2249 TEST_ASSERT_SUCCESS(status
);
2250 limit
= uregex_getTimeLimit(re
, &status
);
2251 TEST_ASSERT_SUCCESS(status
);
2252 TEST_ASSERT(limit
== 100);
2257 static UBool U_CALLCONV
FindCallback(const void* context
, int64_t matchIndex
) {
2261 static UBool U_CALLCONV
MatchCallback(const void *context
, int32_t steps
) {
2265 static void TestBug10815() {
2266 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2267 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2269 URegularExpression
*re
;
2270 UErrorCode status
= U_ZERO_ERROR
;
2274 // findNext() with a find progress callback function.
2276 re
= uregex_openC(".z", 0, 0, &status
);
2277 TEST_ASSERT_SUCCESS(status
);
2279 u_uastrncpy(text
, "Hello, World.", UPRV_LENGTHOF(text
));
2280 uregex_setText(re
, text
, -1, &status
);
2281 TEST_ASSERT_SUCCESS(status
);
2283 uregex_setFindProgressCallback(re
, FindCallback
, NULL
, &status
);
2284 TEST_ASSERT_SUCCESS(status
);
2286 uregex_findNext(re
, &status
);
2287 TEST_ASSERT(status
== U_REGEX_STOPPED_BY_CALLER
);
2291 // findNext() with a match progress callback function.
2293 status
= U_ZERO_ERROR
;
2294 re
= uregex_openC("((xxx)*)*y", 0, 0, &status
);
2295 TEST_ASSERT_SUCCESS(status
);
2297 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2298 // it will appear to be stuck in a (near) infinite loop.
2299 u_uastrncpy(text
, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text
));
2300 uregex_setText(re
, text
, -1, &status
);
2301 TEST_ASSERT_SUCCESS(status
);
2303 uregex_setMatchCallback(re
, MatchCallback
, NULL
, &status
);
2304 TEST_ASSERT_SUCCESS(status
);
2306 uregex_findNext(re
, &status
);
2307 TEST_ASSERT(status
== U_REGEX_STOPPED_BY_CALLER
);
2312 static const UChar startLinePattern
[] = { 0x5E, 0x78, 0 }; // "^x"
2314 static void TestMatchStartLineWithEmptyText() {
2315 UErrorCode status
= U_ZERO_ERROR
;
2316 UText
* ut
= utext_openUChars(NULL
, NULL
, 0, &status
);
2317 TEST_ASSERT_SUCCESS(status
);
2318 if (U_SUCCESS(status
)) {
2319 URegularExpression
*re
= uregex_open(startLinePattern
, -1, 0, NULL
, &status
);
2320 TEST_ASSERT_SUCCESS(status
);
2321 if (U_SUCCESS(status
)) {
2322 uregex_setUText(re
, ut
, &status
);
2323 TEST_ASSERT(U_SUCCESS(status
));
2324 if (U_SUCCESS(status
)) {
2325 UBool found
= uregex_findNext(re
, &status
);
2326 TEST_ASSERT(U_SUCCESS(status
) && !found
);
2334 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */