]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/reapits.c
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / reapits.c
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
18 *
19 * More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
33 #include "cintltst.h"
34 #include "cmemory.h"
35
36 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
37 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
38
39 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
40 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
41
42 /*
43 * TEST_SETUP and TEST_TEARDOWN
44 * macros to handle the boilerplate around setting up regex test cases.
45 * parameteres to setup:
46 * pattern: The regex pattern, a (char *) null terminated C string.
47 * testString: The string data, also a (char *) C string.
48 * flags: Regex flags to set when compiling the pattern
49 *
50 * Put arbitrary test code between SETUP and TEARDOWN.
51 * 're" is the compiled, ready-to-go regular expression.
52 */
53 #define TEST_SETUP(pattern, testString, flags) { \
54 UChar *srcString = NULL; \
55 status = U_ZERO_ERROR; \
56 re = uregex_openC(pattern, flags, NULL, &status); \
57 TEST_ASSERT_SUCCESS(status); \
58 int32_t testStringLen = (int32_t)strlen(testString); \
59 srcString = (UChar *)malloc( (testStringLen + 2) * sizeof(UChar) ); \
60 u_uastrncpy(srcString, testString, testStringLen + 1); \
61 uregex_setText(re, srcString, -1, &status); \
62 TEST_ASSERT_SUCCESS(status); \
63 if (U_SUCCESS(status)) {
64
65 #define TEST_TEARDOWN \
66 } \
67 TEST_ASSERT_SUCCESS(status); \
68 uregex_close(re); \
69 free(srcString); \
70 }
71
72
73 /**
74 * @param expected utf-8 array of bytes to be expected
75 */
76 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
77 char buf_inside_macro[120];
78 int32_t len = (int32_t)strlen(expected);
79 UBool success;
80 if (nulTerm) {
81 u_austrncpy(buf_inside_macro, (actual), len+1);
82 buf_inside_macro[len+2] = 0;
83 success = (strcmp((expected), buf_inside_macro) == 0);
84 } else {
85 u_austrncpy(buf_inside_macro, (actual), len);
86 buf_inside_macro[len+1] = 0;
87 success = (strncmp((expected), buf_inside_macro, len) == 0);
88 }
89 if (success == FALSE) {
90 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
91 file, line, (expected), buf_inside_macro);
92 }
93 }
94
95 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
96
97
98 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
99 int32_t u8i = 0;
100 UChar32 u8c = 0;
101 UChar32 utc = 0;
102 UBool stringsEqual = TRUE;
103 utext_setNativeIndex(utext, 0);
104 for (;;) {
105 U8_NEXT_UNSAFE(utf8, u8i, u8c);
106 utc = utext_next32(utext);
107 if (u8c == 0 && utc == U_SENTINEL) {
108 break;
109 }
110 if (u8c != utc || u8c == 0) {
111 stringsEqual = FALSE;
112 break;
113 }
114 }
115 return stringsEqual;
116 }
117
118
119 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
120 utext_setNativeIndex(actual, 0);
121 if (!equals_utf8_utext(expected, actual)) {
122 UChar32 c;
123 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
124 c = utext_next32From(actual, 0);
125 while (c != U_SENTINEL) {
126 if (0x20<c && c <0x7e) {
127 log_err("%c", c);
128 } else {
129 log_err("%#x", c);
130 }
131 c = UTEXT_NEXT32(actual);
132 }
133 log_err("\"\n");
134 }
135 }
136
137 /*
138 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
139 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
140 */
141 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
142
143 static UBool testUTextEqual(UText *uta, UText *utb) {
144 UChar32 ca = 0;
145 UChar32 cb = 0;
146 utext_setNativeIndex(uta, 0);
147 utext_setNativeIndex(utb, 0);
148 do {
149 ca = utext_next32(uta);
150 cb = utext_next32(utb);
151 if (ca != cb) {
152 break;
153 }
154 } while (ca != U_SENTINEL);
155 return ca == cb;
156 }
157
158
159
160
161 static void TestRegexCAPI(void);
162 static void TestBug4315(void);
163 static void TestUTextAPI(void);
164 static void TestRefreshInput(void);
165 static void TestBug8421(void);
166 static void TestBug10815(void);
167 static void TestMatchStartLineWithEmptyText(void);
168
169 void addURegexTest(TestNode** root);
170
171 void addURegexTest(TestNode** root)
172 {
173 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
174 addTest(root, &TestBug4315, "regex/TestBug4315");
175 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
176 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
177 addTest(root, &TestBug8421, "regex/TestBug8421");
178 addTest(root, &TestBug10815, "regex/TestBug10815");
179 addTest(root, &TestMatchStartLineWithEmptyText, "regex/TestMatchStartLineWithEmptyText");
180 }
181
182 /*
183 * Call back function and context struct used for testing
184 * regular expression user callbacks. This test is mostly the same as
185 * the corresponding C++ test in intltest.
186 */
187 typedef struct callBackContext {
188 int32_t maxCalls;
189 int32_t numCalls;
190 int32_t lastSteps;
191 } callBackContext;
192
193 static UBool U_EXPORT2 U_CALLCONV
194 TestCallbackFn(const void *context, int32_t steps) {
195 callBackContext *info = (callBackContext *)context;
196 if (info->lastSteps+1 != steps) {
197 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
198 }
199 info->lastSteps = steps;
200 info->numCalls++;
201 return (info->numCalls < info->maxCalls);
202 }
203
204 /*
205 * Regular Expression C API Tests
206 */
207 static void TestRegexCAPI(void) {
208 UErrorCode status = U_ZERO_ERROR;
209 URegularExpression *re;
210 UChar pat[200];
211 UChar *minus1;
212
213 memset(&minus1, -1, sizeof(minus1));
214
215 /* Mimimalist open/close */
216 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
217 re = uregex_open(pat, -1, 0, 0, &status);
218 if (U_FAILURE(status)) {
219 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
220 return;
221 }
222 uregex_close(re);
223
224 /* Open with all flag values set */
225 status = U_ZERO_ERROR;
226 re = uregex_open(pat, -1,
227 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
228 0, &status);
229 TEST_ASSERT_SUCCESS(status);
230 uregex_close(re);
231
232 /* Open with an invalid flag */
233 status = U_ZERO_ERROR;
234 re = uregex_open(pat, -1, 0x40000000, 0, &status);
235 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
236 uregex_close(re);
237
238 /* Open with an unimplemented flag */
239 status = U_ZERO_ERROR;
240 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
241 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
242 uregex_close(re);
243
244 /* openC with an invalid parameter */
245 status = U_ZERO_ERROR;
246 re = uregex_openC(NULL,
247 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
248 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
249
250 /* openC with an invalid parameter */
251 status = U_USELESS_COLLATOR_ERROR;
252 re = uregex_openC(NULL,
253 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
254 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
255
256 /* openC open from a C string */
257 {
258 const UChar *p;
259 int32_t len;
260 status = U_ZERO_ERROR;
261 re = uregex_openC("abc*", 0, 0, &status);
262 TEST_ASSERT_SUCCESS(status);
263 p = uregex_pattern(re, &len, &status);
264 TEST_ASSERT_SUCCESS(status);
265
266 /* The TEST_ASSERT_SUCCESS above should change too... */
267 if(U_SUCCESS(status)) {
268 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
269 TEST_ASSERT(u_strcmp(pat, p) == 0);
270 TEST_ASSERT(len==(int32_t)strlen("abc*"));
271 }
272
273 uregex_close(re);
274
275 /* TODO: Open with ParseError parameter */
276 }
277
278 /*
279 * clone
280 */
281 {
282 URegularExpression *clone1;
283 URegularExpression *clone2;
284 URegularExpression *clone3;
285 UChar testString1[30];
286 UChar testString2[30];
287 UBool result;
288
289
290 status = U_ZERO_ERROR;
291 re = uregex_openC("abc*", 0, 0, &status);
292 TEST_ASSERT_SUCCESS(status);
293 clone1 = uregex_clone(re, &status);
294 TEST_ASSERT_SUCCESS(status);
295 TEST_ASSERT(clone1 != NULL);
296
297 status = U_ZERO_ERROR;
298 clone2 = uregex_clone(re, &status);
299 TEST_ASSERT_SUCCESS(status);
300 TEST_ASSERT(clone2 != NULL);
301 uregex_close(re);
302
303 status = U_ZERO_ERROR;
304 clone3 = uregex_clone(clone2, &status);
305 TEST_ASSERT_SUCCESS(status);
306 TEST_ASSERT(clone3 != NULL);
307
308 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
309 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
310
311 status = U_ZERO_ERROR;
312 uregex_setText(clone1, testString1, -1, &status);
313 TEST_ASSERT_SUCCESS(status);
314 result = uregex_lookingAt(clone1, 0, &status);
315 TEST_ASSERT_SUCCESS(status);
316 TEST_ASSERT(result==TRUE);
317
318 status = U_ZERO_ERROR;
319 uregex_setText(clone2, testString2, -1, &status);
320 TEST_ASSERT_SUCCESS(status);
321 result = uregex_lookingAt(clone2, 0, &status);
322 TEST_ASSERT_SUCCESS(status);
323 TEST_ASSERT(result==FALSE);
324 result = uregex_find(clone2, 0, &status);
325 TEST_ASSERT_SUCCESS(status);
326 TEST_ASSERT(result==TRUE);
327
328 uregex_close(clone1);
329 uregex_close(clone2);
330 uregex_close(clone3);
331
332 }
333
334 /*
335 * pattern()
336 */
337 {
338 const UChar *resultPat;
339 int32_t resultLen;
340 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
341 status = U_ZERO_ERROR;
342 re = uregex_open(pat, -1, 0, NULL, &status);
343 resultPat = uregex_pattern(re, &resultLen, &status);
344 TEST_ASSERT_SUCCESS(status);
345
346 /* The TEST_ASSERT_SUCCESS above should change too... */
347 if (U_SUCCESS(status)) {
348 TEST_ASSERT(resultLen == -1);
349 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
350 }
351
352 uregex_close(re);
353
354 status = U_ZERO_ERROR;
355 re = uregex_open(pat, 3, 0, NULL, &status);
356 resultPat = uregex_pattern(re, &resultLen, &status);
357 TEST_ASSERT_SUCCESS(status);
358 TEST_ASSERT_SUCCESS(status);
359
360 /* The TEST_ASSERT_SUCCESS above should change too... */
361 if (U_SUCCESS(status)) {
362 TEST_ASSERT(resultLen == 3);
363 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
364 TEST_ASSERT(u_strlen(resultPat) == 3);
365 }
366
367 uregex_close(re);
368 }
369
370 /*
371 * flags()
372 */
373 {
374 int32_t t;
375
376 status = U_ZERO_ERROR;
377 re = uregex_open(pat, -1, 0, NULL, &status);
378 t = uregex_flags(re, &status);
379 TEST_ASSERT_SUCCESS(status);
380 TEST_ASSERT(t == 0);
381 uregex_close(re);
382
383 status = U_ZERO_ERROR;
384 re = uregex_open(pat, -1, 0, NULL, &status);
385 t = uregex_flags(re, &status);
386 TEST_ASSERT_SUCCESS(status);
387 TEST_ASSERT(t == 0);
388 uregex_close(re);
389
390 status = U_ZERO_ERROR;
391 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
392 t = uregex_flags(re, &status);
393 TEST_ASSERT_SUCCESS(status);
394 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
395 uregex_close(re);
396 }
397
398 /*
399 * setText() and lookingAt()
400 */
401 {
402 UChar text1[50];
403 UChar text2[50];
404 UBool result;
405
406 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
407 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
408 status = U_ZERO_ERROR;
409 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
410 re = uregex_open(pat, -1, 0, NULL, &status);
411 TEST_ASSERT_SUCCESS(status);
412
413 /* Operation before doing a setText should fail... */
414 status = U_ZERO_ERROR;
415 uregex_lookingAt(re, 0, &status);
416 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
417
418 status = U_ZERO_ERROR;
419 uregex_setText(re, text1, -1, &status);
420 result = uregex_lookingAt(re, 0, &status);
421 TEST_ASSERT(result == TRUE);
422 TEST_ASSERT_SUCCESS(status);
423
424 status = U_ZERO_ERROR;
425 uregex_setText(re, text2, -1, &status);
426 result = uregex_lookingAt(re, 0, &status);
427 TEST_ASSERT(result == FALSE);
428 TEST_ASSERT_SUCCESS(status);
429
430 status = U_ZERO_ERROR;
431 uregex_setText(re, text1, -1, &status);
432 result = uregex_lookingAt(re, 0, &status);
433 TEST_ASSERT(result == TRUE);
434 TEST_ASSERT_SUCCESS(status);
435
436 status = U_ZERO_ERROR;
437 uregex_setText(re, text1, 5, &status);
438 result = uregex_lookingAt(re, 0, &status);
439 TEST_ASSERT(result == FALSE);
440 TEST_ASSERT_SUCCESS(status);
441
442 status = U_ZERO_ERROR;
443 uregex_setText(re, text1, 6, &status);
444 result = uregex_lookingAt(re, 0, &status);
445 TEST_ASSERT(result == TRUE);
446 TEST_ASSERT_SUCCESS(status);
447
448 uregex_close(re);
449 }
450
451
452 /*
453 * getText()
454 */
455 {
456 UChar text1[50];
457 UChar text2[50];
458 const UChar *result;
459 int32_t textLength;
460
461 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
462 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
463 status = U_ZERO_ERROR;
464 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
465 re = uregex_open(pat, -1, 0, NULL, &status);
466
467 uregex_setText(re, text1, -1, &status);
468 result = uregex_getText(re, &textLength, &status);
469 TEST_ASSERT(result == text1);
470 TEST_ASSERT(textLength == -1);
471 TEST_ASSERT_SUCCESS(status);
472
473 status = U_ZERO_ERROR;
474 uregex_setText(re, text2, 7, &status);
475 result = uregex_getText(re, &textLength, &status);
476 TEST_ASSERT(result == text2);
477 TEST_ASSERT(textLength == 7);
478 TEST_ASSERT_SUCCESS(status);
479
480 status = U_ZERO_ERROR;
481 uregex_setText(re, text2, 4, &status);
482 result = uregex_getText(re, &textLength, &status);
483 TEST_ASSERT(result == text2);
484 TEST_ASSERT(textLength == 4);
485 TEST_ASSERT_SUCCESS(status);
486 uregex_close(re);
487 }
488
489 /*
490 * matches()
491 */
492 {
493 UChar text1[50];
494 UBool result;
495 int len;
496 UChar nullString[] = {0,0,0};
497
498 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
499 status = U_ZERO_ERROR;
500 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
501 re = uregex_open(pat, -1, 0, NULL, &status);
502
503 uregex_setText(re, text1, -1, &status);
504 result = uregex_matches(re, 0, &status);
505 TEST_ASSERT(result == FALSE);
506 TEST_ASSERT_SUCCESS(status);
507
508 status = U_ZERO_ERROR;
509 uregex_setText(re, text1, 6, &status);
510 result = uregex_matches(re, 0, &status);
511 TEST_ASSERT(result == TRUE);
512 TEST_ASSERT_SUCCESS(status);
513
514 status = U_ZERO_ERROR;
515 uregex_setText(re, text1, 6, &status);
516 result = uregex_matches(re, 1, &status);
517 TEST_ASSERT(result == FALSE);
518 TEST_ASSERT_SUCCESS(status);
519 uregex_close(re);
520
521 status = U_ZERO_ERROR;
522 re = uregex_openC(".?", 0, NULL, &status);
523 uregex_setText(re, text1, -1, &status);
524 len = u_strlen(text1);
525 result = uregex_matches(re, len, &status);
526 TEST_ASSERT(result == TRUE);
527 TEST_ASSERT_SUCCESS(status);
528
529 status = U_ZERO_ERROR;
530 uregex_setText(re, nullString, -1, &status);
531 TEST_ASSERT_SUCCESS(status);
532 result = uregex_matches(re, 0, &status);
533 TEST_ASSERT(result == TRUE);
534 TEST_ASSERT_SUCCESS(status);
535 uregex_close(re);
536 }
537
538
539 /*
540 * lookingAt() Used in setText test.
541 */
542
543
544 /*
545 * find(), findNext, start, end, reset
546 */
547 {
548 UChar text1[50];
549 UBool result;
550 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
551 status = U_ZERO_ERROR;
552 re = uregex_openC("rx", 0, NULL, &status);
553
554 uregex_setText(re, text1, -1, &status);
555 result = uregex_find(re, 0, &status);
556 TEST_ASSERT(result == TRUE);
557 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
558 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
559 TEST_ASSERT_SUCCESS(status);
560
561 result = uregex_find(re, 9, &status);
562 TEST_ASSERT(result == TRUE);
563 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
564 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
565 TEST_ASSERT_SUCCESS(status);
566
567 result = uregex_find(re, 14, &status);
568 TEST_ASSERT(result == FALSE);
569 TEST_ASSERT_SUCCESS(status);
570
571 status = U_ZERO_ERROR;
572 uregex_reset(re, 0, &status);
573
574 result = uregex_findNext(re, &status);
575 TEST_ASSERT(result == TRUE);
576 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
577 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
578 TEST_ASSERT_SUCCESS(status);
579
580 result = uregex_findNext(re, &status);
581 TEST_ASSERT(result == TRUE);
582 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
583 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
584 TEST_ASSERT_SUCCESS(status);
585
586 status = U_ZERO_ERROR;
587 uregex_reset(re, 12, &status);
588
589 result = uregex_findNext(re, &status);
590 TEST_ASSERT(result == TRUE);
591 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
592 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
593 TEST_ASSERT_SUCCESS(status);
594
595 result = uregex_findNext(re, &status);
596 TEST_ASSERT(result == FALSE);
597 TEST_ASSERT_SUCCESS(status);
598
599 uregex_close(re);
600 }
601
602 /*
603 * groupCount
604 */
605 {
606 int32_t result;
607
608 status = U_ZERO_ERROR;
609 re = uregex_openC("abc", 0, NULL, &status);
610 result = uregex_groupCount(re, &status);
611 TEST_ASSERT_SUCCESS(status);
612 TEST_ASSERT(result == 0);
613 uregex_close(re);
614
615 status = U_ZERO_ERROR;
616 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
617 result = uregex_groupCount(re, &status);
618 TEST_ASSERT_SUCCESS(status);
619 TEST_ASSERT(result == 3);
620 uregex_close(re);
621
622 }
623
624
625 /*
626 * group()
627 */
628 {
629 UChar text1[80];
630 UChar buf[80];
631 UBool result;
632 int32_t resultSz;
633 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
634
635 status = U_ZERO_ERROR;
636 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
637 TEST_ASSERT_SUCCESS(status);
638
639
640 uregex_setText(re, text1, -1, &status);
641 result = uregex_find(re, 0, &status);
642 TEST_ASSERT(result==TRUE);
643
644 /* Capture Group 0, the full match. Should succeed. */
645 status = U_ZERO_ERROR;
646 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
647 TEST_ASSERT_SUCCESS(status);
648 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
649 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
650
651 /* Capture group #1. Should succeed. */
652 status = U_ZERO_ERROR;
653 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
654 TEST_ASSERT_SUCCESS(status);
655 TEST_ASSERT_STRING(" interior ", buf, TRUE);
656 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
657
658 /* Capture group out of range. Error. */
659 status = U_ZERO_ERROR;
660 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
661 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
662
663 /* NULL buffer, pure pre-flight */
664 status = U_ZERO_ERROR;
665 resultSz = uregex_group(re, 0, NULL, 0, &status);
666 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
667 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
668
669 /* Too small buffer, truncated string */
670 status = U_ZERO_ERROR;
671 memset(buf, -1, sizeof(buf));
672 resultSz = uregex_group(re, 0, buf, 5, &status);
673 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
674 TEST_ASSERT_STRING("abc i", buf, FALSE);
675 TEST_ASSERT(buf[5] == (UChar)0xffff);
676 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
677
678 /* Output string just fits buffer, no NUL term. */
679 status = U_ZERO_ERROR;
680 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
681 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
682 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
683 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
684 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
685
686 uregex_close(re);
687
688 }
689
690 /*
691 * Regions
692 */
693
694
695 /* SetRegion(), getRegion() do something */
696 TEST_SETUP(".*", "0123456789ABCDEF", 0)
697 UChar resultString[40];
698 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
699 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
700 uregex_setRegion(re, 3, 6, &status);
701 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
702 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
703 TEST_ASSERT(uregex_findNext(re, &status));
704 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
705 TEST_ASSERT_STRING("345", resultString, TRUE);
706 TEST_TEARDOWN;
707
708 /* find(start=-1) uses regions */
709 TEST_SETUP(".*", "0123456789ABCDEF", 0);
710 uregex_setRegion(re, 4, 6, &status);
711 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
712 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
713 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
714 TEST_TEARDOWN;
715
716 /* find (start >=0) does not use regions */
717 TEST_SETUP(".*", "0123456789ABCDEF", 0);
718 uregex_setRegion(re, 4, 6, &status);
719 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
720 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
721 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
722 TEST_TEARDOWN;
723
724 /* findNext() obeys regions */
725 TEST_SETUP(".", "0123456789ABCDEF", 0);
726 uregex_setRegion(re, 4, 6, &status);
727 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
728 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
729 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
730 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
731 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
732 TEST_TEARDOWN;
733
734 /* matches(start=-1) uses regions */
735 /* Also, verify that non-greedy *? succeeds in finding the full match. */
736 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
737 uregex_setRegion(re, 4, 6, &status);
738 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
739 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
740 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
741 TEST_TEARDOWN;
742
743 /* matches (start >=0) does not use regions */
744 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
745 uregex_setRegion(re, 4, 6, &status);
746 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
747 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
748 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
749 TEST_TEARDOWN;
750
751 /* lookingAt(start=-1) uses regions */
752 /* Also, verify that non-greedy *? finds the first (shortest) match. */
753 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
754 uregex_setRegion(re, 4, 6, &status);
755 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
756 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
757 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
758 TEST_TEARDOWN;
759
760 /* lookingAt (start >=0) does not use regions */
761 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
762 uregex_setRegion(re, 4, 6, &status);
763 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
764 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
765 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
766 TEST_TEARDOWN;
767
768 /* hitEnd() */
769 TEST_SETUP("[a-f]*", "abcdefghij", 0);
770 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
771 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
772 TEST_TEARDOWN;
773
774 TEST_SETUP("[a-f]*", "abcdef", 0);
775 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
776 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
777 TEST_TEARDOWN;
778
779 /* requireEnd */
780 TEST_SETUP("abcd", "abcd", 0);
781 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
782 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
783 TEST_TEARDOWN;
784
785 TEST_SETUP("abcd$", "abcd", 0);
786 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
787 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
788 TEST_TEARDOWN;
789
790 /* anchoringBounds */
791 TEST_SETUP("abc$", "abcdef", 0);
792 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
793 uregex_useAnchoringBounds(re, FALSE, &status);
794 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
795
796 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
797 uregex_useAnchoringBounds(re, TRUE, &status);
798 uregex_setRegion(re, 0, 3, &status);
799 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
800 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
801 TEST_TEARDOWN;
802
803 /* Transparent Bounds */
804 TEST_SETUP("abc(?=def)", "abcdef", 0);
805 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
806 uregex_useTransparentBounds(re, TRUE, &status);
807 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
808
809 uregex_useTransparentBounds(re, FALSE, &status);
810 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
811 uregex_setRegion(re, 0, 3, &status);
812 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
813 uregex_useTransparentBounds(re, TRUE, &status);
814 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
815 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
816 TEST_TEARDOWN;
817
818
819 /*
820 * replaceFirst()
821 */
822 {
823 UChar text1[80];
824 UChar text2[80];
825 UChar replText[80];
826 UChar buf[80];
827 int32_t resultSz;
828 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
829 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
830 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
831
832 status = U_ZERO_ERROR;
833 re = uregex_openC("x(.*?)x", 0, NULL, &status);
834 TEST_ASSERT_SUCCESS(status);
835
836 /* Normal case, with match */
837 uregex_setText(re, text1, -1, &status);
838 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
839 TEST_ASSERT_SUCCESS(status);
840 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
841 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
842
843 /* No match. Text should copy to output with no changes. */
844 status = U_ZERO_ERROR;
845 uregex_setText(re, text2, -1, &status);
846 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
847 TEST_ASSERT_SUCCESS(status);
848 TEST_ASSERT_STRING("No match here.", buf, TRUE);
849 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
850
851 /* Match, output just fills buffer, no termination warning. */
852 status = U_ZERO_ERROR;
853 uregex_setText(re, text1, -1, &status);
854 memset(buf, -1, sizeof(buf));
855 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
856 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
857 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
858 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
859 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
860
861 /* Do the replaceFirst again, without first resetting anything.
862 * Should give the same results.
863 */
864 status = U_ZERO_ERROR;
865 memset(buf, -1, sizeof(buf));
866 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
867 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
868 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
869 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
870 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
871
872 /* NULL buffer, zero buffer length */
873 status = U_ZERO_ERROR;
874 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
875 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
876 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
877
878 /* Buffer too small by one */
879 status = U_ZERO_ERROR;
880 memset(buf, -1, sizeof(buf));
881 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x.")-1, &status);
882 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
883 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
884 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
885 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
886
887 uregex_close(re);
888 }
889
890
891 /*
892 * replaceAll()
893 */
894 {
895 UChar text1[80]; /* "Replace xaax x1x x...x." */
896 UChar text2[80]; /* "No match Here" */
897 UChar replText[80]; /* "<$1>" */
898 UChar replText2[80]; /* "<<$1>>" */
899 const char * pattern = "x(.*?)x";
900 const char * expectedResult = "Replace <aa> <1> <...>.";
901 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
902 UChar buf[80];
903 int32_t resultSize;
904 int32_t expectedResultSize;
905 int32_t expectedResultSize2;
906 int32_t i;
907
908 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
909 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
910 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
911 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
912 expectedResultSize = (int32_t)strlen(expectedResult);
913 expectedResultSize2 = (int32_t)strlen(expectedResult2);
914
915 status = U_ZERO_ERROR;
916 re = uregex_openC(pattern, 0, NULL, &status);
917 TEST_ASSERT_SUCCESS(status);
918
919 /* Normal case, with match */
920 uregex_setText(re, text1, -1, &status);
921 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
922 TEST_ASSERT_SUCCESS(status);
923 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
924 TEST_ASSERT(resultSize == expectedResultSize);
925
926 /* No match. Text should copy to output with no changes. */
927 status = U_ZERO_ERROR;
928 uregex_setText(re, text2, -1, &status);
929 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
930 TEST_ASSERT_SUCCESS(status);
931 TEST_ASSERT_STRING("No match here.", buf, TRUE);
932 TEST_ASSERT(resultSize == u_strlen(text2));
933
934 /* Match, output just fills buffer, no termination warning. */
935 status = U_ZERO_ERROR;
936 uregex_setText(re, text1, -1, &status);
937 memset(buf, -1, sizeof(buf));
938 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
939 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
940 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
941 TEST_ASSERT(resultSize == expectedResultSize);
942 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
943
944 /* Do the replaceFirst again, without first resetting anything.
945 * Should give the same results.
946 */
947 status = U_ZERO_ERROR;
948 memset(buf, -1, sizeof(buf));
949 resultSize = uregex_replaceAll(re, replText, -1, buf, (int32_t)strlen("Replace xaax x1x x...x."), &status);
950 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
951 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
952 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
953 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
954
955 /* NULL buffer, zero buffer length */
956 status = U_ZERO_ERROR;
957 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
958 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
959 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
960
961 /* Buffer too small. Try every size, which will tickle edge cases
962 * in uregex_appendReplacement (used by replaceAll) */
963 for (i=0; i<expectedResultSize; i++) {
964 char expected[80];
965 status = U_ZERO_ERROR;
966 memset(buf, -1, sizeof(buf));
967 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
968 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
969 strcpy(expected, expectedResult);
970 expected[i] = 0;
971 TEST_ASSERT_STRING(expected, buf, FALSE);
972 TEST_ASSERT(resultSize == expectedResultSize);
973 TEST_ASSERT(buf[i] == (UChar)0xffff);
974 }
975
976 /* Buffer too small. Same as previous test, except this time the replacement
977 * text is longer than the match capture group, making the length of the complete
978 * replacement longer than the original string.
979 */
980 for (i=0; i<expectedResultSize2; i++) {
981 char expected[80];
982 status = U_ZERO_ERROR;
983 memset(buf, -1, sizeof(buf));
984 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
985 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
986 strcpy(expected, expectedResult2);
987 expected[i] = 0;
988 TEST_ASSERT_STRING(expected, buf, FALSE);
989 TEST_ASSERT(resultSize == expectedResultSize2);
990 TEST_ASSERT(buf[i] == (UChar)0xffff);
991 }
992
993
994 uregex_close(re);
995 }
996
997
998 /*
999 * appendReplacement()
1000 */
1001 {
1002 UChar text[100];
1003 UChar repl[100];
1004 UChar buf[100];
1005 UChar *bufPtr;
1006 int32_t bufCap;
1007
1008
1009 status = U_ZERO_ERROR;
1010 re = uregex_openC(".*", 0, 0, &status);
1011 TEST_ASSERT_SUCCESS(status);
1012
1013 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1014 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1015 uregex_setText(re, text, -1, &status);
1016
1017 /* match covers whole target string */
1018 uregex_find(re, 0, &status);
1019 TEST_ASSERT_SUCCESS(status);
1020 bufPtr = buf;
1021 bufCap = UPRV_LENGTHOF(buf);
1022 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1023 TEST_ASSERT_SUCCESS(status);
1024 TEST_ASSERT_STRING("some other", buf, TRUE);
1025
1026 /* Match has \u \U escapes */
1027 uregex_find(re, 0, &status);
1028 TEST_ASSERT_SUCCESS(status);
1029 bufPtr = buf;
1030 bufCap = UPRV_LENGTHOF(buf);
1031 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1032 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1033 TEST_ASSERT_SUCCESS(status);
1034 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1035
1036 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1037 status = U_ZERO_ERROR;
1038 uregex_find(re, 0, &status);
1039 TEST_ASSERT_SUCCESS(status);
1040 bufPtr = buf;
1041 status = U_BUFFER_OVERFLOW_ERROR;
1042 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1043 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1044
1045 uregex_close(re);
1046 }
1047
1048
1049 /*
1050 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1051 */
1052
1053 /*
1054 * split()
1055 */
1056 {
1057 UChar textToSplit[80];
1058 UChar text2[80];
1059 UChar buf[200];
1060 UChar *fields[10];
1061 int32_t numFields;
1062 int32_t requiredCapacity;
1063 int32_t spaceNeeded;
1064 int32_t sz;
1065
1066 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1067 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1068
1069 status = U_ZERO_ERROR;
1070 re = uregex_openC(":", 0, NULL, &status);
1071
1072
1073 /* Simple split */
1074
1075 uregex_setText(re, textToSplit, -1, &status);
1076 TEST_ASSERT_SUCCESS(status);
1077
1078 /* The TEST_ASSERT_SUCCESS call above should change too... */
1079 if (U_SUCCESS(status)) {
1080 memset(fields, -1, sizeof(fields));
1081 numFields =
1082 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1083 TEST_ASSERT_SUCCESS(status);
1084
1085 /* The TEST_ASSERT_SUCCESS call above should change too... */
1086 if(U_SUCCESS(status)) {
1087 TEST_ASSERT(numFields == 3);
1088 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1089 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1090 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1091 TEST_ASSERT(fields[3] == NULL);
1092
1093 spaceNeeded = u_strlen(textToSplit) -
1094 (numFields - 1) + /* Field delimiters do not appear in output */
1095 numFields; /* Each field gets a NUL terminator */
1096
1097 TEST_ASSERT(spaceNeeded == requiredCapacity);
1098 }
1099 }
1100
1101 uregex_close(re);
1102
1103
1104 /* Split with too few output strings available */
1105 status = U_ZERO_ERROR;
1106 re = uregex_openC(":", 0, NULL, &status);
1107 uregex_setText(re, textToSplit, -1, &status);
1108 TEST_ASSERT_SUCCESS(status);
1109
1110 /* The TEST_ASSERT_SUCCESS call above should change too... */
1111 if(U_SUCCESS(status)) {
1112 memset(fields, -1, sizeof(fields));
1113 numFields =
1114 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1115 TEST_ASSERT_SUCCESS(status);
1116
1117 /* The TEST_ASSERT_SUCCESS call above should change too... */
1118 if(U_SUCCESS(status)) {
1119 TEST_ASSERT(numFields == 2);
1120 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1121 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1122 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1123
1124 spaceNeeded = u_strlen(textToSplit) -
1125 (numFields - 1) + /* Field delimiters do not appear in output */
1126 numFields; /* Each field gets a NUL terminator */
1127
1128 TEST_ASSERT(spaceNeeded == requiredCapacity);
1129
1130 /* Split with a range of output buffer sizes. */
1131 spaceNeeded = u_strlen(textToSplit) -
1132 (numFields - 1) + /* Field delimiters do not appear in output */
1133 numFields; /* Each field gets a NUL terminator */
1134
1135 for (sz=0; sz < spaceNeeded+1; sz++) {
1136 memset(fields, -1, sizeof(fields));
1137 status = U_ZERO_ERROR;
1138 numFields =
1139 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1140 if (sz >= spaceNeeded) {
1141 TEST_ASSERT_SUCCESS(status);
1142 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1143 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1144 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1145 } else {
1146 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1147 }
1148 TEST_ASSERT(numFields == 3);
1149 TEST_ASSERT(fields[3] == NULL);
1150 TEST_ASSERT(spaceNeeded == requiredCapacity);
1151 }
1152 }
1153 }
1154
1155 uregex_close(re);
1156 }
1157
1158
1159
1160
1161 /* Split(), part 2. Patterns with capture groups. The capture group text
1162 * comes out as additional fields. */
1163 {
1164 UChar textToSplit[80];
1165 UChar buf[200];
1166 UChar *fields[10];
1167 int32_t numFields;
1168 int32_t requiredCapacity;
1169 int32_t spaceNeeded;
1170 int32_t sz;
1171
1172 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
1173
1174 status = U_ZERO_ERROR;
1175 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1176
1177 uregex_setText(re, textToSplit, -1, &status);
1178 TEST_ASSERT_SUCCESS(status);
1179
1180 /* The TEST_ASSERT_SUCCESS call above should change too... */
1181 if(U_SUCCESS(status)) {
1182 memset(fields, -1, sizeof(fields));
1183 numFields =
1184 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1185 TEST_ASSERT_SUCCESS(status);
1186
1187 /* The TEST_ASSERT_SUCCESS call above should change too... */
1188 if(U_SUCCESS(status)) {
1189 TEST_ASSERT(numFields == 5);
1190 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1191 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1192 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1193 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1194 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1195 TEST_ASSERT(fields[5] == NULL);
1196 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1197 TEST_ASSERT(spaceNeeded == requiredCapacity);
1198 }
1199 }
1200
1201 /* Split with too few output strings available (2) */
1202 status = U_ZERO_ERROR;
1203 memset(fields, -1, sizeof(fields));
1204 numFields =
1205 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1206 TEST_ASSERT_SUCCESS(status);
1207
1208 /* The TEST_ASSERT_SUCCESS call above should change too... */
1209 if(U_SUCCESS(status)) {
1210 TEST_ASSERT(numFields == 2);
1211 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1212 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1213 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1214
1215 spaceNeeded = (int32_t)strlen("first . second<tag-b> third."); /* "." at NUL positions */
1216 TEST_ASSERT(spaceNeeded == requiredCapacity);
1217 }
1218
1219 /* Split with too few output strings available (3) */
1220 status = U_ZERO_ERROR;
1221 memset(fields, -1, sizeof(fields));
1222 numFields =
1223 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1224 TEST_ASSERT_SUCCESS(status);
1225
1226 /* The TEST_ASSERT_SUCCESS call above should change too... */
1227 if(U_SUCCESS(status)) {
1228 TEST_ASSERT(numFields == 3);
1229 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1230 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1231 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1232 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1233
1234 spaceNeeded = (int32_t)strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1235 TEST_ASSERT(spaceNeeded == requiredCapacity);
1236 }
1237
1238 /* Split with just enough output strings available (5) */
1239 status = U_ZERO_ERROR;
1240 memset(fields, -1, sizeof(fields));
1241 numFields =
1242 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1243 TEST_ASSERT_SUCCESS(status);
1244
1245 /* The TEST_ASSERT_SUCCESS call above should change too... */
1246 if(U_SUCCESS(status)) {
1247 TEST_ASSERT(numFields == 5);
1248 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1249 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1250 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1251 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1252 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1253 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1254
1255 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1256 TEST_ASSERT(spaceNeeded == requiredCapacity);
1257 }
1258
1259 /* Split, end of text is a field delimiter. */
1260 status = U_ZERO_ERROR;
1261 sz = (int32_t)strlen("first <tag-a> second<tag-b>");
1262 uregex_setText(re, textToSplit, sz, &status);
1263 TEST_ASSERT_SUCCESS(status);
1264
1265 /* The TEST_ASSERT_SUCCESS call above should change too... */
1266 if(U_SUCCESS(status)) {
1267 memset(fields, -1, sizeof(fields));
1268 numFields =
1269 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1270 TEST_ASSERT_SUCCESS(status);
1271
1272 /* The TEST_ASSERT_SUCCESS call above should change too... */
1273 if(U_SUCCESS(status)) {
1274 TEST_ASSERT(numFields == 5);
1275 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1276 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1277 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1278 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1279 TEST_ASSERT_STRING("", fields[4], TRUE);
1280 TEST_ASSERT(fields[5] == NULL);
1281 TEST_ASSERT(fields[8] == NULL);
1282 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1283 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1284 TEST_ASSERT(spaceNeeded == requiredCapacity);
1285 }
1286 }
1287
1288 uregex_close(re);
1289 }
1290
1291 /*
1292 * set/getTimeLimit
1293 */
1294 TEST_SETUP("abc$", "abcdef", 0);
1295 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1296 uregex_setTimeLimit(re, 1000, &status);
1297 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1298 TEST_ASSERT_SUCCESS(status);
1299 uregex_setTimeLimit(re, -1, &status);
1300 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1301 status = U_ZERO_ERROR;
1302 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1303 TEST_TEARDOWN;
1304
1305 /*
1306 * set/get Stack Limit
1307 */
1308 TEST_SETUP("abc$", "abcdef", 0);
1309 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1310 uregex_setStackLimit(re, 40000, &status);
1311 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1312 TEST_ASSERT_SUCCESS(status);
1313 uregex_setStackLimit(re, -1, &status);
1314 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1315 status = U_ZERO_ERROR;
1316 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1317 TEST_TEARDOWN;
1318
1319
1320 /*
1321 * Get/Set callback functions
1322 * This test is copied from intltest regex/Callbacks
1323 * The pattern and test data will run long enough to cause the callback
1324 * to be invoked. The nested '+' operators give exponential time
1325 * behavior with increasing string length.
1326 */
1327 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1328 callBackContext cbInfo = {4, 0, 0};
1329 const void *pContext = &cbInfo;
1330 URegexMatchCallback *returnedFn = &TestCallbackFn;
1331
1332 /* Getting the callback fn when it hasn't been set must return NULL */
1333 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1334 TEST_ASSERT_SUCCESS(status);
1335 TEST_ASSERT(returnedFn == NULL);
1336 TEST_ASSERT(pContext == NULL);
1337
1338 /* Set thecallback and do a match. */
1339 /* The callback function should record that it has been called. */
1340 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1341 TEST_ASSERT_SUCCESS(status);
1342 TEST_ASSERT(cbInfo.numCalls == 0);
1343 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1344 TEST_ASSERT_SUCCESS(status);
1345 TEST_ASSERT(cbInfo.numCalls > 0);
1346
1347 /* Getting the callback should return the values that were set above. */
1348 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1349 TEST_ASSERT(returnedFn == &TestCallbackFn);
1350 TEST_ASSERT(pContext == &cbInfo);
1351
1352 TEST_TEARDOWN;
1353 }
1354
1355
1356
1357 static void TestBug4315(void) {
1358 UErrorCode theICUError = U_ZERO_ERROR;
1359 URegularExpression *theRegEx;
1360 UChar *textBuff;
1361 const char *thePattern;
1362 UChar theString[100];
1363 UChar *destFields[24];
1364 int32_t neededLength1;
1365 int32_t neededLength2;
1366
1367 int32_t wordCount = 0;
1368 int32_t destFieldsSize = 24;
1369
1370 thePattern = "ck ";
1371 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1372
1373 /* open a regex */
1374 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1375 TEST_ASSERT_SUCCESS(theICUError);
1376
1377 /* set the input string */
1378 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1379 TEST_ASSERT_SUCCESS(theICUError);
1380
1381 /* split */
1382 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1383 * error occurs! */
1384 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1385 destFieldsSize, &theICUError);
1386
1387 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1388 TEST_ASSERT(wordCount==3);
1389
1390 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1391 {
1392 theICUError = U_ZERO_ERROR;
1393 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1394 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1395 destFields, destFieldsSize, &theICUError);
1396 TEST_ASSERT(wordCount==3);
1397 TEST_ASSERT_SUCCESS(theICUError);
1398 TEST_ASSERT(neededLength1 == neededLength2);
1399 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1400 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1401 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1402 TEST_ASSERT(destFields[3] == NULL);
1403 free(textBuff);
1404 }
1405 uregex_close(theRegEx);
1406 }
1407
1408 /* Based on TestRegexCAPI() */
1409 static void TestUTextAPI(void) {
1410 UErrorCode status = U_ZERO_ERROR;
1411 URegularExpression *re;
1412 UText patternText = UTEXT_INITIALIZER;
1413 UChar pat[200];
1414 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1415
1416 /* Mimimalist open/close */
1417 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1418 re = uregex_openUText(&patternText, 0, 0, &status);
1419 if (U_FAILURE(status)) {
1420 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1421 utext_close(&patternText);
1422 return;
1423 }
1424 uregex_close(re);
1425
1426 /* Open with all flag values set */
1427 status = U_ZERO_ERROR;
1428 re = uregex_openUText(&patternText,
1429 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1430 0, &status);
1431 TEST_ASSERT_SUCCESS(status);
1432 uregex_close(re);
1433
1434 /* Open with an invalid flag */
1435 status = U_ZERO_ERROR;
1436 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1437 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1438 uregex_close(re);
1439
1440 /* open with an invalid parameter */
1441 status = U_ZERO_ERROR;
1442 re = uregex_openUText(NULL,
1443 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1444 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1445
1446 /*
1447 * clone
1448 */
1449 {
1450 URegularExpression *clone1;
1451 URegularExpression *clone2;
1452 URegularExpression *clone3;
1453 UChar testString1[30];
1454 UChar testString2[30];
1455 UBool result;
1456
1457
1458 status = U_ZERO_ERROR;
1459 re = uregex_openUText(&patternText, 0, 0, &status);
1460 TEST_ASSERT_SUCCESS(status);
1461 clone1 = uregex_clone(re, &status);
1462 TEST_ASSERT_SUCCESS(status);
1463 TEST_ASSERT(clone1 != NULL);
1464
1465 status = U_ZERO_ERROR;
1466 clone2 = uregex_clone(re, &status);
1467 TEST_ASSERT_SUCCESS(status);
1468 TEST_ASSERT(clone2 != NULL);
1469 uregex_close(re);
1470
1471 status = U_ZERO_ERROR;
1472 clone3 = uregex_clone(clone2, &status);
1473 TEST_ASSERT_SUCCESS(status);
1474 TEST_ASSERT(clone3 != NULL);
1475
1476 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1477 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1478
1479 status = U_ZERO_ERROR;
1480 uregex_setText(clone1, testString1, -1, &status);
1481 TEST_ASSERT_SUCCESS(status);
1482 result = uregex_lookingAt(clone1, 0, &status);
1483 TEST_ASSERT_SUCCESS(status);
1484 TEST_ASSERT(result==TRUE);
1485
1486 status = U_ZERO_ERROR;
1487 uregex_setText(clone2, testString2, -1, &status);
1488 TEST_ASSERT_SUCCESS(status);
1489 result = uregex_lookingAt(clone2, 0, &status);
1490 TEST_ASSERT_SUCCESS(status);
1491 TEST_ASSERT(result==FALSE);
1492 result = uregex_find(clone2, 0, &status);
1493 TEST_ASSERT_SUCCESS(status);
1494 TEST_ASSERT(result==TRUE);
1495
1496 uregex_close(clone1);
1497 uregex_close(clone2);
1498 uregex_close(clone3);
1499
1500 }
1501
1502 /*
1503 * pattern() and patternText()
1504 */
1505 {
1506 const UChar *resultPat;
1507 int32_t resultLen;
1508 UText *resultText;
1509 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1510 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1511 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1512 status = U_ZERO_ERROR;
1513
1514 utext_openUTF8(&patternText, str_hello, -1, &status);
1515 re = uregex_open(pat, -1, 0, NULL, &status);
1516 resultPat = uregex_pattern(re, &resultLen, &status);
1517 TEST_ASSERT_SUCCESS(status);
1518
1519 /* The TEST_ASSERT_SUCCESS above should change too... */
1520 if (U_SUCCESS(status)) {
1521 TEST_ASSERT(resultLen == -1);
1522 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1523 }
1524
1525 resultText = uregex_patternUText(re, &status);
1526 TEST_ASSERT_SUCCESS(status);
1527 TEST_ASSERT_UTEXT(str_hello, resultText);
1528
1529 uregex_close(re);
1530
1531 status = U_ZERO_ERROR;
1532 re = uregex_open(pat, 3, 0, NULL, &status);
1533 resultPat = uregex_pattern(re, &resultLen, &status);
1534 TEST_ASSERT_SUCCESS(status);
1535
1536 /* The TEST_ASSERT_SUCCESS above should change too... */
1537 if (U_SUCCESS(status)) {
1538 TEST_ASSERT(resultLen == 3);
1539 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1540 TEST_ASSERT(u_strlen(resultPat) == 3);
1541 }
1542
1543 resultText = uregex_patternUText(re, &status);
1544 TEST_ASSERT_SUCCESS(status);
1545 TEST_ASSERT_UTEXT(str_hel, resultText);
1546
1547 uregex_close(re);
1548 }
1549
1550 /*
1551 * setUText() and lookingAt()
1552 */
1553 {
1554 UText text1 = UTEXT_INITIALIZER;
1555 UText text2 = UTEXT_INITIALIZER;
1556 UBool result;
1557 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1558 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1559 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1560 status = U_ZERO_ERROR;
1561 utext_openUTF8(&text1, str_abcccd, -1, &status);
1562 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1563
1564 utext_openUTF8(&patternText, str_abcd, -1, &status);
1565 re = uregex_openUText(&patternText, 0, NULL, &status);
1566 TEST_ASSERT_SUCCESS(status);
1567
1568 /* Operation before doing a setText should fail... */
1569 status = U_ZERO_ERROR;
1570 uregex_lookingAt(re, 0, &status);
1571 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1572
1573 status = U_ZERO_ERROR;
1574 uregex_setUText(re, &text1, &status);
1575 result = uregex_lookingAt(re, 0, &status);
1576 TEST_ASSERT(result == TRUE);
1577 TEST_ASSERT_SUCCESS(status);
1578
1579 status = U_ZERO_ERROR;
1580 uregex_setUText(re, &text2, &status);
1581 result = uregex_lookingAt(re, 0, &status);
1582 TEST_ASSERT(result == FALSE);
1583 TEST_ASSERT_SUCCESS(status);
1584
1585 status = U_ZERO_ERROR;
1586 uregex_setUText(re, &text1, &status);
1587 result = uregex_lookingAt(re, 0, &status);
1588 TEST_ASSERT(result == TRUE);
1589 TEST_ASSERT_SUCCESS(status);
1590
1591 uregex_close(re);
1592 utext_close(&text1);
1593 utext_close(&text2);
1594 }
1595
1596
1597 /*
1598 * getText() and getUText()
1599 */
1600 {
1601 UText text1 = UTEXT_INITIALIZER;
1602 UText text2 = UTEXT_INITIALIZER;
1603 UChar text2Chars[20];
1604 UText *resultText;
1605 const UChar *result;
1606 int32_t textLength;
1607 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1608 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1609 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1610
1611
1612 status = U_ZERO_ERROR;
1613 utext_openUTF8(&text1, str_abcccd, -1, &status);
1614 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1615 utext_openUChars(&text2, text2Chars, -1, &status);
1616
1617 utext_openUTF8(&patternText, str_abcd, -1, &status);
1618 re = uregex_openUText(&patternText, 0, NULL, &status);
1619
1620 /* First set a UText */
1621 uregex_setUText(re, &text1, &status);
1622 resultText = uregex_getUText(re, NULL, &status);
1623 TEST_ASSERT_SUCCESS(status);
1624 TEST_ASSERT(resultText != &text1);
1625 utext_setNativeIndex(resultText, 0);
1626 utext_setNativeIndex(&text1, 0);
1627 TEST_ASSERT(testUTextEqual(resultText, &text1));
1628 utext_close(resultText);
1629
1630 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1631 (void)result; /* Suppress set but not used warning. */
1632 TEST_ASSERT(textLength == -1 || textLength == 6);
1633 resultText = uregex_getUText(re, NULL, &status);
1634 TEST_ASSERT_SUCCESS(status);
1635 TEST_ASSERT(resultText != &text1);
1636 utext_setNativeIndex(resultText, 0);
1637 utext_setNativeIndex(&text1, 0);
1638 TEST_ASSERT(testUTextEqual(resultText, &text1));
1639 utext_close(resultText);
1640
1641 /* Then set a UChar * */
1642 uregex_setText(re, text2Chars, 7, &status);
1643 resultText = uregex_getUText(re, NULL, &status);
1644 TEST_ASSERT_SUCCESS(status);
1645 utext_setNativeIndex(resultText, 0);
1646 utext_setNativeIndex(&text2, 0);
1647 TEST_ASSERT(testUTextEqual(resultText, &text2));
1648 utext_close(resultText);
1649 result = uregex_getText(re, &textLength, &status);
1650 TEST_ASSERT(textLength == 7);
1651
1652 uregex_close(re);
1653 utext_close(&text1);
1654 utext_close(&text2);
1655 }
1656
1657 /*
1658 * matches()
1659 */
1660 {
1661 UText text1 = UTEXT_INITIALIZER;
1662 UBool result;
1663 UText nullText = UTEXT_INITIALIZER;
1664 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1665 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1666
1667 status = U_ZERO_ERROR;
1668 utext_openUTF8(&text1, str_abcccde, -1, &status);
1669 utext_openUTF8(&patternText, str_abcd, -1, &status);
1670 re = uregex_openUText(&patternText, 0, NULL, &status);
1671
1672 uregex_setUText(re, &text1, &status);
1673 result = uregex_matches(re, 0, &status);
1674 TEST_ASSERT(result == FALSE);
1675 TEST_ASSERT_SUCCESS(status);
1676 uregex_close(re);
1677
1678 status = U_ZERO_ERROR;
1679 re = uregex_openC(".?", 0, NULL, &status);
1680 uregex_setUText(re, &text1, &status);
1681 result = uregex_matches(re, 7, &status);
1682 TEST_ASSERT(result == TRUE);
1683 TEST_ASSERT_SUCCESS(status);
1684
1685 status = U_ZERO_ERROR;
1686 utext_openUTF8(&nullText, "", -1, &status);
1687 uregex_setUText(re, &nullText, &status);
1688 TEST_ASSERT_SUCCESS(status);
1689 result = uregex_matches(re, 0, &status);
1690 TEST_ASSERT(result == TRUE);
1691 TEST_ASSERT_SUCCESS(status);
1692
1693 uregex_close(re);
1694 utext_close(&text1);
1695 utext_close(&nullText);
1696 }
1697
1698
1699 /*
1700 * lookingAt() Used in setText test.
1701 */
1702
1703
1704 /*
1705 * find(), findNext, start, end, reset
1706 */
1707 {
1708 UChar text1[50];
1709 UBool result;
1710 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
1711 status = U_ZERO_ERROR;
1712 re = uregex_openC("rx", 0, NULL, &status);
1713
1714 uregex_setText(re, text1, -1, &status);
1715 result = uregex_find(re, 0, &status);
1716 TEST_ASSERT(result == TRUE);
1717 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1718 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1719 TEST_ASSERT_SUCCESS(status);
1720
1721 result = uregex_find(re, 9, &status);
1722 TEST_ASSERT(result == TRUE);
1723 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1724 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1725 TEST_ASSERT_SUCCESS(status);
1726
1727 result = uregex_find(re, 14, &status);
1728 TEST_ASSERT(result == FALSE);
1729 TEST_ASSERT_SUCCESS(status);
1730
1731 status = U_ZERO_ERROR;
1732 uregex_reset(re, 0, &status);
1733
1734 result = uregex_findNext(re, &status);
1735 TEST_ASSERT(result == TRUE);
1736 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1737 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1738 TEST_ASSERT_SUCCESS(status);
1739
1740 result = uregex_findNext(re, &status);
1741 TEST_ASSERT(result == TRUE);
1742 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1743 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1744 TEST_ASSERT_SUCCESS(status);
1745
1746 status = U_ZERO_ERROR;
1747 uregex_reset(re, 12, &status);
1748
1749 result = uregex_findNext(re, &status);
1750 TEST_ASSERT(result == TRUE);
1751 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1752 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1753 TEST_ASSERT_SUCCESS(status);
1754
1755 result = uregex_findNext(re, &status);
1756 TEST_ASSERT(result == FALSE);
1757 TEST_ASSERT_SUCCESS(status);
1758
1759 uregex_close(re);
1760 }
1761
1762 /*
1763 * groupUText()
1764 */
1765 {
1766 UChar text1[80];
1767 UText *actual;
1768 UBool result;
1769 int64_t groupLen = 0;
1770 UChar groupBuf[20];
1771
1772 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
1773
1774 status = U_ZERO_ERROR;
1775 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1776 TEST_ASSERT_SUCCESS(status);
1777
1778 uregex_setText(re, text1, -1, &status);
1779 result = uregex_find(re, 0, &status);
1780 TEST_ASSERT(result==TRUE);
1781
1782 /* Capture Group 0 with shallow clone API. Should succeed. */
1783 status = U_ZERO_ERROR;
1784 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1785 TEST_ASSERT_SUCCESS(status);
1786
1787 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1788 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1789 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1790
1791 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1792 utext_close(actual);
1793
1794 /* Capture group #1. Should succeed. */
1795 status = U_ZERO_ERROR;
1796
1797 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1798 TEST_ASSERT_SUCCESS(status);
1799 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1800 /* (within the string text1) */
1801 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1802 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1803 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1804
1805 utext_close(actual);
1806
1807 /* Capture group out of range. Error. */
1808 status = U_ZERO_ERROR;
1809 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1810 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1811 utext_close(actual);
1812
1813 uregex_close(re);
1814 }
1815
1816 /*
1817 * replaceFirst()
1818 */
1819 {
1820 UChar text1[80];
1821 UChar text2[80];
1822 UText replText = UTEXT_INITIALIZER;
1823 UText *result;
1824 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1825 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1826 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1827 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1828 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1829 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1830 status = U_ZERO_ERROR;
1831 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1832 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1833 utext_openUTF8(&replText, str_1x, -1, &status);
1834
1835 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1836 TEST_ASSERT_SUCCESS(status);
1837
1838 /* Normal case, with match */
1839 uregex_setText(re, text1, -1, &status);
1840 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1841 TEST_ASSERT_SUCCESS(status);
1842 TEST_ASSERT_UTEXT(str_Replxxx, result);
1843 utext_close(result);
1844
1845 /* No match. Text should copy to output with no changes. */
1846 uregex_setText(re, text2, -1, &status);
1847 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1848 TEST_ASSERT_SUCCESS(status);
1849 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1850 utext_close(result);
1851
1852 /* Unicode escapes */
1853 uregex_setText(re, text1, -1, &status);
1854 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1855 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1856 TEST_ASSERT_SUCCESS(status);
1857 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1858 utext_close(result);
1859
1860 uregex_close(re);
1861 utext_close(&replText);
1862 }
1863
1864
1865 /*
1866 * replaceAll()
1867 */
1868 {
1869 UChar text1[80];
1870 UChar text2[80];
1871 UText replText = UTEXT_INITIALIZER;
1872 UText *result;
1873 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1874 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1875 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1876 status = U_ZERO_ERROR;
1877 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1878 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1879 utext_openUTF8(&replText, str_1, -1, &status);
1880
1881 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1882 TEST_ASSERT_SUCCESS(status);
1883
1884 /* Normal case, with match */
1885 uregex_setText(re, text1, -1, &status);
1886 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1887 TEST_ASSERT_SUCCESS(status);
1888 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1889 utext_close(result);
1890
1891 /* No match. Text should copy to output with no changes. */
1892 uregex_setText(re, text2, -1, &status);
1893 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1894 TEST_ASSERT_SUCCESS(status);
1895 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1896 utext_close(result);
1897
1898 uregex_close(re);
1899 utext_close(&replText);
1900 }
1901
1902
1903 /*
1904 * appendReplacement()
1905 */
1906 {
1907 UChar text[100];
1908 UChar repl[100];
1909 UChar buf[100];
1910 UChar *bufPtr;
1911 int32_t bufCap;
1912
1913 status = U_ZERO_ERROR;
1914 re = uregex_openC(".*", 0, 0, &status);
1915 TEST_ASSERT_SUCCESS(status);
1916
1917 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1918 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1919 uregex_setText(re, text, -1, &status);
1920
1921 /* match covers whole target string */
1922 uregex_find(re, 0, &status);
1923 TEST_ASSERT_SUCCESS(status);
1924 bufPtr = buf;
1925 bufCap = UPRV_LENGTHOF(buf);
1926 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1927 TEST_ASSERT_SUCCESS(status);
1928 TEST_ASSERT_STRING("some other", buf, TRUE);
1929
1930 /* Match has \u \U escapes */
1931 uregex_find(re, 0, &status);
1932 TEST_ASSERT_SUCCESS(status);
1933 bufPtr = buf;
1934 bufCap = UPRV_LENGTHOF(buf);
1935 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1936 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1937 TEST_ASSERT_SUCCESS(status);
1938 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1939
1940 uregex_close(re);
1941 }
1942
1943
1944 /*
1945 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1946 */
1947
1948 /*
1949 * splitUText()
1950 */
1951 {
1952 UChar textToSplit[80];
1953 UChar text2[80];
1954 UText *fields[10];
1955 int32_t numFields;
1956 int32_t i;
1957
1958 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1959 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1960
1961 status = U_ZERO_ERROR;
1962 re = uregex_openC(":", 0, NULL, &status);
1963
1964
1965 /* Simple split */
1966
1967 uregex_setText(re, textToSplit, -1, &status);
1968 TEST_ASSERT_SUCCESS(status);
1969
1970 /* The TEST_ASSERT_SUCCESS call above should change too... */
1971 if (U_SUCCESS(status)) {
1972 memset(fields, 0, sizeof(fields));
1973 numFields = uregex_splitUText(re, fields, 10, &status);
1974 TEST_ASSERT_SUCCESS(status);
1975
1976 /* The TEST_ASSERT_SUCCESS call above should change too... */
1977 if(U_SUCCESS(status)) {
1978 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1979 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1980 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1981 TEST_ASSERT(numFields == 3);
1982 TEST_ASSERT_UTEXT(str_first, fields[0]);
1983 TEST_ASSERT_UTEXT(str_second, fields[1]);
1984 TEST_ASSERT_UTEXT(str_third, fields[2]);
1985 TEST_ASSERT(fields[3] == NULL);
1986 }
1987 for(i = 0; i < numFields; i++) {
1988 utext_close(fields[i]);
1989 }
1990 }
1991
1992 uregex_close(re);
1993
1994
1995 /* Split with too few output strings available */
1996 status = U_ZERO_ERROR;
1997 re = uregex_openC(":", 0, NULL, &status);
1998 uregex_setText(re, textToSplit, -1, &status);
1999 TEST_ASSERT_SUCCESS(status);
2000
2001 /* The TEST_ASSERT_SUCCESS call above should change too... */
2002 if(U_SUCCESS(status)) {
2003 fields[0] = NULL;
2004 fields[1] = NULL;
2005 fields[2] = &patternText;
2006 numFields = uregex_splitUText(re, fields, 2, &status);
2007 TEST_ASSERT_SUCCESS(status);
2008
2009 /* The TEST_ASSERT_SUCCESS call above should change too... */
2010 if(U_SUCCESS(status)) {
2011 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2012 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2013 TEST_ASSERT(numFields == 2);
2014 TEST_ASSERT_UTEXT(str_first, fields[0]);
2015 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2016 TEST_ASSERT(fields[2] == &patternText);
2017 }
2018 for(i = 0; i < numFields; i++) {
2019 utext_close(fields[i]);
2020 }
2021 }
2022
2023 uregex_close(re);
2024 }
2025
2026 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2027 * comes out as additional fields. */
2028 {
2029 UChar textToSplit[80];
2030 UText *fields[10];
2031 int32_t numFields;
2032 int32_t i;
2033
2034 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
2035
2036 status = U_ZERO_ERROR;
2037 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2038
2039 uregex_setText(re, textToSplit, -1, &status);
2040 TEST_ASSERT_SUCCESS(status);
2041
2042 /* The TEST_ASSERT_SUCCESS call above should change too... */
2043 if(U_SUCCESS(status)) {
2044 memset(fields, 0, sizeof(fields));
2045 numFields = uregex_splitUText(re, fields, 10, &status);
2046 TEST_ASSERT_SUCCESS(status);
2047
2048 /* The TEST_ASSERT_SUCCESS call above should change too... */
2049 if(U_SUCCESS(status)) {
2050 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2051 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2052 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2053 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2054 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2055
2056 TEST_ASSERT(numFields == 5);
2057 TEST_ASSERT_UTEXT(str_first, fields[0]);
2058 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2059 TEST_ASSERT_UTEXT(str_second, fields[2]);
2060 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2061 TEST_ASSERT_UTEXT(str_third, fields[4]);
2062 TEST_ASSERT(fields[5] == NULL);
2063 }
2064 for(i = 0; i < numFields; i++) {
2065 utext_close(fields[i]);
2066 }
2067 }
2068
2069 /* Split with too few output strings available (2) */
2070 status = U_ZERO_ERROR;
2071 fields[0] = NULL;
2072 fields[1] = NULL;
2073 fields[2] = &patternText;
2074 numFields = uregex_splitUText(re, fields, 2, &status);
2075 TEST_ASSERT_SUCCESS(status);
2076
2077 /* The TEST_ASSERT_SUCCESS call above should change too... */
2078 if(U_SUCCESS(status)) {
2079 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2080 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2081 TEST_ASSERT(numFields == 2);
2082 TEST_ASSERT_UTEXT(str_first, fields[0]);
2083 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2084 TEST_ASSERT(fields[2] == &patternText);
2085 }
2086 for(i = 0; i < numFields; i++) {
2087 utext_close(fields[i]);
2088 }
2089
2090
2091 /* Split with too few output strings available (3) */
2092 status = U_ZERO_ERROR;
2093 fields[0] = NULL;
2094 fields[1] = NULL;
2095 fields[2] = NULL;
2096 fields[3] = &patternText;
2097 numFields = uregex_splitUText(re, fields, 3, &status);
2098 TEST_ASSERT_SUCCESS(status);
2099
2100 /* The TEST_ASSERT_SUCCESS call above should change too... */
2101 if(U_SUCCESS(status)) {
2102 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2103 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2104 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2105 TEST_ASSERT(numFields == 3);
2106 TEST_ASSERT_UTEXT(str_first, fields[0]);
2107 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2108 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2109 TEST_ASSERT(fields[3] == &patternText);
2110 }
2111 for(i = 0; i < numFields; i++) {
2112 utext_close(fields[i]);
2113 }
2114
2115 /* Split with just enough output strings available (5) */
2116 status = U_ZERO_ERROR;
2117 fields[0] = NULL;
2118 fields[1] = NULL;
2119 fields[2] = NULL;
2120 fields[3] = NULL;
2121 fields[4] = NULL;
2122 fields[5] = &patternText;
2123 numFields = uregex_splitUText(re, fields, 5, &status);
2124 TEST_ASSERT_SUCCESS(status);
2125
2126 /* The TEST_ASSERT_SUCCESS call above should change too... */
2127 if(U_SUCCESS(status)) {
2128 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2129 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2130 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2131 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2132 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2133
2134 TEST_ASSERT(numFields == 5);
2135 TEST_ASSERT_UTEXT(str_first, fields[0]);
2136 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2137 TEST_ASSERT_UTEXT(str_second, fields[2]);
2138 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2139 TEST_ASSERT_UTEXT(str_third, fields[4]);
2140 TEST_ASSERT(fields[5] == &patternText);
2141 }
2142 for(i = 0; i < numFields; i++) {
2143 utext_close(fields[i]);
2144 }
2145
2146 /* Split, end of text is a field delimiter. */
2147 status = U_ZERO_ERROR;
2148 uregex_setText(re, textToSplit, (int32_t)strlen("first <tag-a> second<tag-b>"), &status);
2149 TEST_ASSERT_SUCCESS(status);
2150
2151 /* The TEST_ASSERT_SUCCESS call above should change too... */
2152 if(U_SUCCESS(status)) {
2153 memset(fields, 0, sizeof(fields));
2154 fields[9] = &patternText;
2155 numFields = uregex_splitUText(re, fields, 9, &status);
2156 TEST_ASSERT_SUCCESS(status);
2157
2158 /* The TEST_ASSERT_SUCCESS call above should change too... */
2159 if(U_SUCCESS(status)) {
2160 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2161 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2162 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2163 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2164 const char str_empty[] = { 0x00 };
2165
2166 TEST_ASSERT(numFields == 5);
2167 TEST_ASSERT_UTEXT(str_first, fields[0]);
2168 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2169 TEST_ASSERT_UTEXT(str_second, fields[2]);
2170 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2171 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2172 TEST_ASSERT(fields[5] == NULL);
2173 TEST_ASSERT(fields[8] == NULL);
2174 TEST_ASSERT(fields[9] == &patternText);
2175 }
2176 for(i = 0; i < numFields; i++) {
2177 utext_close(fields[i]);
2178 }
2179 }
2180
2181 uregex_close(re);
2182 }
2183 utext_close(&patternText);
2184 }
2185
2186
2187 static void TestRefreshInput(void) {
2188 /*
2189 * RefreshInput changes out the input of a URegularExpression without
2190 * changing anything else in the match state. Used with Java JNI,
2191 * when Java moves the underlying string storage. This test
2192 * runs a find() loop, moving the text after the first match.
2193 * The right number of matches should still be found.
2194 */
2195 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2196 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2197 UErrorCode status = U_ZERO_ERROR;
2198 URegularExpression *re;
2199 UText ut1 = UTEXT_INITIALIZER;
2200 UText ut2 = UTEXT_INITIALIZER;
2201
2202 re = uregex_openC("[ABC]", 0, 0, &status);
2203 TEST_ASSERT_SUCCESS(status);
2204
2205 utext_openUChars(&ut1, testStr, -1, &status);
2206 TEST_ASSERT_SUCCESS(status);
2207 uregex_setUText(re, &ut1, &status);
2208 TEST_ASSERT_SUCCESS(status);
2209
2210 /* Find the first match "A" in the original string */
2211 TEST_ASSERT(uregex_findNext(re, &status));
2212 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2213
2214 /* Move the string, kill the original string. */
2215 u_strcpy(movedStr, testStr);
2216 u_memset(testStr, 0, u_strlen(testStr));
2217 utext_openUChars(&ut2, movedStr, -1, &status);
2218 TEST_ASSERT_SUCCESS(status);
2219 uregex_refreshUText(re, &ut2, &status);
2220 TEST_ASSERT_SUCCESS(status);
2221
2222 /* Find the following two matches, now working in the moved string. */
2223 TEST_ASSERT(uregex_findNext(re, &status));
2224 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2225 TEST_ASSERT(uregex_findNext(re, &status));
2226 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2227 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2228
2229 uregex_close(re);
2230 }
2231
2232
2233 static void TestBug8421(void) {
2234 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2235 * was failing.
2236 */
2237 URegularExpression *re;
2238 UErrorCode status = U_ZERO_ERROR;
2239 int32_t limit = -1;
2240
2241 re = uregex_openC("abc", 0, 0, &status);
2242 TEST_ASSERT_SUCCESS(status);
2243
2244 limit = uregex_getTimeLimit(re, &status);
2245 TEST_ASSERT_SUCCESS(status);
2246 TEST_ASSERT(limit == 0);
2247
2248 uregex_setTimeLimit(re, 100, &status);
2249 TEST_ASSERT_SUCCESS(status);
2250 limit = uregex_getTimeLimit(re, &status);
2251 TEST_ASSERT_SUCCESS(status);
2252 TEST_ASSERT(limit == 100);
2253
2254 uregex_close(re);
2255 }
2256
2257 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2258 return FALSE;
2259 }
2260
2261 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2262 return FALSE;
2263 }
2264
2265 static void TestBug10815() {
2266 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2267 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2268 */
2269 URegularExpression *re;
2270 UErrorCode status = U_ZERO_ERROR;
2271 UChar text[100];
2272
2273
2274 // findNext() with a find progress callback function.
2275
2276 re = uregex_openC(".z", 0, 0, &status);
2277 TEST_ASSERT_SUCCESS(status);
2278
2279 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2280 uregex_setText(re, text, -1, &status);
2281 TEST_ASSERT_SUCCESS(status);
2282
2283 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2284 TEST_ASSERT_SUCCESS(status);
2285
2286 uregex_findNext(re, &status);
2287 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2288
2289 uregex_close(re);
2290
2291 // findNext() with a match progress callback function.
2292
2293 status = U_ZERO_ERROR;
2294 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2295 TEST_ASSERT_SUCCESS(status);
2296
2297 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2298 // it will appear to be stuck in a (near) infinite loop.
2299 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2300 uregex_setText(re, text, -1, &status);
2301 TEST_ASSERT_SUCCESS(status);
2302
2303 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2304 TEST_ASSERT_SUCCESS(status);
2305
2306 uregex_findNext(re, &status);
2307 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2308
2309 uregex_close(re);
2310 }
2311
2312 static const UChar startLinePattern[] = { 0x5E, 0x78, 0 }; // "^x"
2313
2314 static void TestMatchStartLineWithEmptyText() {
2315 UErrorCode status = U_ZERO_ERROR;
2316 UText* ut = utext_openUChars(NULL, NULL, 0, &status);
2317 TEST_ASSERT_SUCCESS(status);
2318 if (U_SUCCESS(status)) {
2319 URegularExpression *re = uregex_open(startLinePattern, -1, 0, NULL, &status);
2320 TEST_ASSERT_SUCCESS(status);
2321 if (U_SUCCESS(status)) {
2322 uregex_setUText(re, ut, &status);
2323 TEST_ASSERT(U_SUCCESS(status));
2324 if (U_SUCCESS(status)) {
2325 UBool found = uregex_findNext(re, &status);
2326 TEST_ASSERT(U_SUCCESS(status) && !found);
2327 }
2328 uregex_close(re);
2329 }
2330 utext_close(ut);
2331 }
2332 }
2333
2334 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */