]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/reapits.c
ICU-62123.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / reapits.c
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
18 *
19 * More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
33 #include "cintltst.h"
34 #include "cmemory.h"
35
36 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
37 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
38
39 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
40 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
41
42 /*
43 * TEST_SETUP and TEST_TEARDOWN
44 * macros to handle the boilerplate around setting up regex test cases.
45 * parameteres to setup:
46 * pattern: The regex pattern, a (char *) null terminated C string.
47 * testString: The string data, also a (char *) C string.
48 * flags: Regex flags to set when compiling the pattern
49 *
50 * Put arbitrary test code between SETUP and TEARDOWN.
51 * 're" is the compiled, ready-to-go regular expression.
52 */
53 #define TEST_SETUP(pattern, testString, flags) { \
54 UChar *srcString = NULL; \
55 status = U_ZERO_ERROR; \
56 re = uregex_openC(pattern, flags, NULL, &status); \
57 TEST_ASSERT_SUCCESS(status); \
58 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
59 u_uastrncpy(srcString, testString, strlen(testString)+1); \
60 uregex_setText(re, srcString, -1, &status); \
61 TEST_ASSERT_SUCCESS(status); \
62 if (U_SUCCESS(status)) {
63
64 #define TEST_TEARDOWN \
65 } \
66 TEST_ASSERT_SUCCESS(status); \
67 uregex_close(re); \
68 free(srcString); \
69 }
70
71
72 /**
73 * @param expected utf-8 array of bytes to be expected
74 */
75 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
76 char buf_inside_macro[120];
77 int32_t len = (int32_t)strlen(expected);
78 UBool success;
79 if (nulTerm) {
80 u_austrncpy(buf_inside_macro, (actual), len+1);
81 buf_inside_macro[len+2] = 0;
82 success = (strcmp((expected), buf_inside_macro) == 0);
83 } else {
84 u_austrncpy(buf_inside_macro, (actual), len);
85 buf_inside_macro[len+1] = 0;
86 success = (strncmp((expected), buf_inside_macro, len) == 0);
87 }
88 if (success == FALSE) {
89 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
90 file, line, (expected), buf_inside_macro);
91 }
92 }
93
94 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
95
96
97 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
98 int32_t u8i = 0;
99 UChar32 u8c = 0;
100 UChar32 utc = 0;
101 UBool stringsEqual = TRUE;
102 utext_setNativeIndex(utext, 0);
103 for (;;) {
104 U8_NEXT_UNSAFE(utf8, u8i, u8c);
105 utc = utext_next32(utext);
106 if (u8c == 0 && utc == U_SENTINEL) {
107 break;
108 }
109 if (u8c != utc || u8c == 0) {
110 stringsEqual = FALSE;
111 break;
112 }
113 }
114 return stringsEqual;
115 }
116
117
118 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
119 utext_setNativeIndex(actual, 0);
120 if (!equals_utf8_utext(expected, actual)) {
121 UChar32 c;
122 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
123 c = utext_next32From(actual, 0);
124 while (c != U_SENTINEL) {
125 if (0x20<c && c <0x7e) {
126 log_err("%c", c);
127 } else {
128 log_err("%#x", c);
129 }
130 c = UTEXT_NEXT32(actual);
131 }
132 log_err("\"\n");
133 }
134 }
135
136 /*
137 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
138 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
139 */
140 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
141
142 static UBool testUTextEqual(UText *uta, UText *utb) {
143 UChar32 ca = 0;
144 UChar32 cb = 0;
145 utext_setNativeIndex(uta, 0);
146 utext_setNativeIndex(utb, 0);
147 do {
148 ca = utext_next32(uta);
149 cb = utext_next32(utb);
150 if (ca != cb) {
151 break;
152 }
153 } while (ca != U_SENTINEL);
154 return ca == cb;
155 }
156
157
158
159
160 static void TestRegexCAPI(void);
161 static void TestBug4315(void);
162 static void TestUTextAPI(void);
163 static void TestRefreshInput(void);
164 static void TestBug8421(void);
165 static void TestBug10815(void);
166 static void TestMatchStartLineWithEmptyText(void);
167
168 void addURegexTest(TestNode** root);
169
170 void addURegexTest(TestNode** root)
171 {
172 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
173 addTest(root, &TestBug4315, "regex/TestBug4315");
174 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
175 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
176 addTest(root, &TestBug8421, "regex/TestBug8421");
177 addTest(root, &TestBug10815, "regex/TestBug10815");
178 addTest(root, &TestMatchStartLineWithEmptyText, "regex/TestMatchStartLineWithEmptyText");
179 }
180
181 /*
182 * Call back function and context struct used for testing
183 * regular expression user callbacks. This test is mostly the same as
184 * the corresponding C++ test in intltest.
185 */
186 typedef struct callBackContext {
187 int32_t maxCalls;
188 int32_t numCalls;
189 int32_t lastSteps;
190 } callBackContext;
191
192 static UBool U_EXPORT2 U_CALLCONV
193 TestCallbackFn(const void *context, int32_t steps) {
194 callBackContext *info = (callBackContext *)context;
195 if (info->lastSteps+1 != steps) {
196 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
197 }
198 info->lastSteps = steps;
199 info->numCalls++;
200 return (info->numCalls < info->maxCalls);
201 }
202
203 /*
204 * Regular Expression C API Tests
205 */
206 static void TestRegexCAPI(void) {
207 UErrorCode status = U_ZERO_ERROR;
208 URegularExpression *re;
209 UChar pat[200];
210 UChar *minus1;
211
212 memset(&minus1, -1, sizeof(minus1));
213
214 /* Mimimalist open/close */
215 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
216 re = uregex_open(pat, -1, 0, 0, &status);
217 if (U_FAILURE(status)) {
218 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
219 return;
220 }
221 uregex_close(re);
222
223 /* Open with all flag values set */
224 status = U_ZERO_ERROR;
225 re = uregex_open(pat, -1,
226 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
227 0, &status);
228 TEST_ASSERT_SUCCESS(status);
229 uregex_close(re);
230
231 /* Open with an invalid flag */
232 status = U_ZERO_ERROR;
233 re = uregex_open(pat, -1, 0x40000000, 0, &status);
234 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
235 uregex_close(re);
236
237 /* Open with an unimplemented flag */
238 status = U_ZERO_ERROR;
239 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
240 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
241 uregex_close(re);
242
243 /* openC with an invalid parameter */
244 status = U_ZERO_ERROR;
245 re = uregex_openC(NULL,
246 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
247 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
248
249 /* openC with an invalid parameter */
250 status = U_USELESS_COLLATOR_ERROR;
251 re = uregex_openC(NULL,
252 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
253 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
254
255 /* openC open from a C string */
256 {
257 const UChar *p;
258 int32_t len;
259 status = U_ZERO_ERROR;
260 re = uregex_openC("abc*", 0, 0, &status);
261 TEST_ASSERT_SUCCESS(status);
262 p = uregex_pattern(re, &len, &status);
263 TEST_ASSERT_SUCCESS(status);
264
265 /* The TEST_ASSERT_SUCCESS above should change too... */
266 if(U_SUCCESS(status)) {
267 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
268 TEST_ASSERT(u_strcmp(pat, p) == 0);
269 TEST_ASSERT(len==(int32_t)strlen("abc*"));
270 }
271
272 uregex_close(re);
273
274 /* TODO: Open with ParseError parameter */
275 }
276
277 /*
278 * clone
279 */
280 {
281 URegularExpression *clone1;
282 URegularExpression *clone2;
283 URegularExpression *clone3;
284 UChar testString1[30];
285 UChar testString2[30];
286 UBool result;
287
288
289 status = U_ZERO_ERROR;
290 re = uregex_openC("abc*", 0, 0, &status);
291 TEST_ASSERT_SUCCESS(status);
292 clone1 = uregex_clone(re, &status);
293 TEST_ASSERT_SUCCESS(status);
294 TEST_ASSERT(clone1 != NULL);
295
296 status = U_ZERO_ERROR;
297 clone2 = uregex_clone(re, &status);
298 TEST_ASSERT_SUCCESS(status);
299 TEST_ASSERT(clone2 != NULL);
300 uregex_close(re);
301
302 status = U_ZERO_ERROR;
303 clone3 = uregex_clone(clone2, &status);
304 TEST_ASSERT_SUCCESS(status);
305 TEST_ASSERT(clone3 != NULL);
306
307 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
308 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
309
310 status = U_ZERO_ERROR;
311 uregex_setText(clone1, testString1, -1, &status);
312 TEST_ASSERT_SUCCESS(status);
313 result = uregex_lookingAt(clone1, 0, &status);
314 TEST_ASSERT_SUCCESS(status);
315 TEST_ASSERT(result==TRUE);
316
317 status = U_ZERO_ERROR;
318 uregex_setText(clone2, testString2, -1, &status);
319 TEST_ASSERT_SUCCESS(status);
320 result = uregex_lookingAt(clone2, 0, &status);
321 TEST_ASSERT_SUCCESS(status);
322 TEST_ASSERT(result==FALSE);
323 result = uregex_find(clone2, 0, &status);
324 TEST_ASSERT_SUCCESS(status);
325 TEST_ASSERT(result==TRUE);
326
327 uregex_close(clone1);
328 uregex_close(clone2);
329 uregex_close(clone3);
330
331 }
332
333 /*
334 * pattern()
335 */
336 {
337 const UChar *resultPat;
338 int32_t resultLen;
339 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
340 status = U_ZERO_ERROR;
341 re = uregex_open(pat, -1, 0, NULL, &status);
342 resultPat = uregex_pattern(re, &resultLen, &status);
343 TEST_ASSERT_SUCCESS(status);
344
345 /* The TEST_ASSERT_SUCCESS above should change too... */
346 if (U_SUCCESS(status)) {
347 TEST_ASSERT(resultLen == -1);
348 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
349 }
350
351 uregex_close(re);
352
353 status = U_ZERO_ERROR;
354 re = uregex_open(pat, 3, 0, NULL, &status);
355 resultPat = uregex_pattern(re, &resultLen, &status);
356 TEST_ASSERT_SUCCESS(status);
357 TEST_ASSERT_SUCCESS(status);
358
359 /* The TEST_ASSERT_SUCCESS above should change too... */
360 if (U_SUCCESS(status)) {
361 TEST_ASSERT(resultLen == 3);
362 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
363 TEST_ASSERT(u_strlen(resultPat) == 3);
364 }
365
366 uregex_close(re);
367 }
368
369 /*
370 * flags()
371 */
372 {
373 int32_t t;
374
375 status = U_ZERO_ERROR;
376 re = uregex_open(pat, -1, 0, NULL, &status);
377 t = uregex_flags(re, &status);
378 TEST_ASSERT_SUCCESS(status);
379 TEST_ASSERT(t == 0);
380 uregex_close(re);
381
382 status = U_ZERO_ERROR;
383 re = uregex_open(pat, -1, 0, NULL, &status);
384 t = uregex_flags(re, &status);
385 TEST_ASSERT_SUCCESS(status);
386 TEST_ASSERT(t == 0);
387 uregex_close(re);
388
389 status = U_ZERO_ERROR;
390 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
391 t = uregex_flags(re, &status);
392 TEST_ASSERT_SUCCESS(status);
393 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
394 uregex_close(re);
395 }
396
397 /*
398 * setText() and lookingAt()
399 */
400 {
401 UChar text1[50];
402 UChar text2[50];
403 UBool result;
404
405 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
406 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
407 status = U_ZERO_ERROR;
408 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
409 re = uregex_open(pat, -1, 0, NULL, &status);
410 TEST_ASSERT_SUCCESS(status);
411
412 /* Operation before doing a setText should fail... */
413 status = U_ZERO_ERROR;
414 uregex_lookingAt(re, 0, &status);
415 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
416
417 status = U_ZERO_ERROR;
418 uregex_setText(re, text1, -1, &status);
419 result = uregex_lookingAt(re, 0, &status);
420 TEST_ASSERT(result == TRUE);
421 TEST_ASSERT_SUCCESS(status);
422
423 status = U_ZERO_ERROR;
424 uregex_setText(re, text2, -1, &status);
425 result = uregex_lookingAt(re, 0, &status);
426 TEST_ASSERT(result == FALSE);
427 TEST_ASSERT_SUCCESS(status);
428
429 status = U_ZERO_ERROR;
430 uregex_setText(re, text1, -1, &status);
431 result = uregex_lookingAt(re, 0, &status);
432 TEST_ASSERT(result == TRUE);
433 TEST_ASSERT_SUCCESS(status);
434
435 status = U_ZERO_ERROR;
436 uregex_setText(re, text1, 5, &status);
437 result = uregex_lookingAt(re, 0, &status);
438 TEST_ASSERT(result == FALSE);
439 TEST_ASSERT_SUCCESS(status);
440
441 status = U_ZERO_ERROR;
442 uregex_setText(re, text1, 6, &status);
443 result = uregex_lookingAt(re, 0, &status);
444 TEST_ASSERT(result == TRUE);
445 TEST_ASSERT_SUCCESS(status);
446
447 uregex_close(re);
448 }
449
450
451 /*
452 * getText()
453 */
454 {
455 UChar text1[50];
456 UChar text2[50];
457 const UChar *result;
458 int32_t textLength;
459
460 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
461 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
462 status = U_ZERO_ERROR;
463 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
464 re = uregex_open(pat, -1, 0, NULL, &status);
465
466 uregex_setText(re, text1, -1, &status);
467 result = uregex_getText(re, &textLength, &status);
468 TEST_ASSERT(result == text1);
469 TEST_ASSERT(textLength == -1);
470 TEST_ASSERT_SUCCESS(status);
471
472 status = U_ZERO_ERROR;
473 uregex_setText(re, text2, 7, &status);
474 result = uregex_getText(re, &textLength, &status);
475 TEST_ASSERT(result == text2);
476 TEST_ASSERT(textLength == 7);
477 TEST_ASSERT_SUCCESS(status);
478
479 status = U_ZERO_ERROR;
480 uregex_setText(re, text2, 4, &status);
481 result = uregex_getText(re, &textLength, &status);
482 TEST_ASSERT(result == text2);
483 TEST_ASSERT(textLength == 4);
484 TEST_ASSERT_SUCCESS(status);
485 uregex_close(re);
486 }
487
488 /*
489 * matches()
490 */
491 {
492 UChar text1[50];
493 UBool result;
494 int len;
495 UChar nullString[] = {0,0,0};
496
497 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
498 status = U_ZERO_ERROR;
499 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
500 re = uregex_open(pat, -1, 0, NULL, &status);
501
502 uregex_setText(re, text1, -1, &status);
503 result = uregex_matches(re, 0, &status);
504 TEST_ASSERT(result == FALSE);
505 TEST_ASSERT_SUCCESS(status);
506
507 status = U_ZERO_ERROR;
508 uregex_setText(re, text1, 6, &status);
509 result = uregex_matches(re, 0, &status);
510 TEST_ASSERT(result == TRUE);
511 TEST_ASSERT_SUCCESS(status);
512
513 status = U_ZERO_ERROR;
514 uregex_setText(re, text1, 6, &status);
515 result = uregex_matches(re, 1, &status);
516 TEST_ASSERT(result == FALSE);
517 TEST_ASSERT_SUCCESS(status);
518 uregex_close(re);
519
520 status = U_ZERO_ERROR;
521 re = uregex_openC(".?", 0, NULL, &status);
522 uregex_setText(re, text1, -1, &status);
523 len = u_strlen(text1);
524 result = uregex_matches(re, len, &status);
525 TEST_ASSERT(result == TRUE);
526 TEST_ASSERT_SUCCESS(status);
527
528 status = U_ZERO_ERROR;
529 uregex_setText(re, nullString, -1, &status);
530 TEST_ASSERT_SUCCESS(status);
531 result = uregex_matches(re, 0, &status);
532 TEST_ASSERT(result == TRUE);
533 TEST_ASSERT_SUCCESS(status);
534 uregex_close(re);
535 }
536
537
538 /*
539 * lookingAt() Used in setText test.
540 */
541
542
543 /*
544 * find(), findNext, start, end, reset
545 */
546 {
547 UChar text1[50];
548 UBool result;
549 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
550 status = U_ZERO_ERROR;
551 re = uregex_openC("rx", 0, NULL, &status);
552
553 uregex_setText(re, text1, -1, &status);
554 result = uregex_find(re, 0, &status);
555 TEST_ASSERT(result == TRUE);
556 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
557 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
558 TEST_ASSERT_SUCCESS(status);
559
560 result = uregex_find(re, 9, &status);
561 TEST_ASSERT(result == TRUE);
562 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
563 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
564 TEST_ASSERT_SUCCESS(status);
565
566 result = uregex_find(re, 14, &status);
567 TEST_ASSERT(result == FALSE);
568 TEST_ASSERT_SUCCESS(status);
569
570 status = U_ZERO_ERROR;
571 uregex_reset(re, 0, &status);
572
573 result = uregex_findNext(re, &status);
574 TEST_ASSERT(result == TRUE);
575 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
576 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
577 TEST_ASSERT_SUCCESS(status);
578
579 result = uregex_findNext(re, &status);
580 TEST_ASSERT(result == TRUE);
581 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
582 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
583 TEST_ASSERT_SUCCESS(status);
584
585 status = U_ZERO_ERROR;
586 uregex_reset(re, 12, &status);
587
588 result = uregex_findNext(re, &status);
589 TEST_ASSERT(result == TRUE);
590 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
591 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
592 TEST_ASSERT_SUCCESS(status);
593
594 result = uregex_findNext(re, &status);
595 TEST_ASSERT(result == FALSE);
596 TEST_ASSERT_SUCCESS(status);
597
598 uregex_close(re);
599 }
600
601 /*
602 * groupCount
603 */
604 {
605 int32_t result;
606
607 status = U_ZERO_ERROR;
608 re = uregex_openC("abc", 0, NULL, &status);
609 result = uregex_groupCount(re, &status);
610 TEST_ASSERT_SUCCESS(status);
611 TEST_ASSERT(result == 0);
612 uregex_close(re);
613
614 status = U_ZERO_ERROR;
615 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
616 result = uregex_groupCount(re, &status);
617 TEST_ASSERT_SUCCESS(status);
618 TEST_ASSERT(result == 3);
619 uregex_close(re);
620
621 }
622
623
624 /*
625 * group()
626 */
627 {
628 UChar text1[80];
629 UChar buf[80];
630 UBool result;
631 int32_t resultSz;
632 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
633
634 status = U_ZERO_ERROR;
635 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
636 TEST_ASSERT_SUCCESS(status);
637
638
639 uregex_setText(re, text1, -1, &status);
640 result = uregex_find(re, 0, &status);
641 TEST_ASSERT(result==TRUE);
642
643 /* Capture Group 0, the full match. Should succeed. */
644 status = U_ZERO_ERROR;
645 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
646 TEST_ASSERT_SUCCESS(status);
647 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
648 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
649
650 /* Capture group #1. Should succeed. */
651 status = U_ZERO_ERROR;
652 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
653 TEST_ASSERT_SUCCESS(status);
654 TEST_ASSERT_STRING(" interior ", buf, TRUE);
655 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
656
657 /* Capture group out of range. Error. */
658 status = U_ZERO_ERROR;
659 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
660 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
661
662 /* NULL buffer, pure pre-flight */
663 status = U_ZERO_ERROR;
664 resultSz = uregex_group(re, 0, NULL, 0, &status);
665 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
666 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
667
668 /* Too small buffer, truncated string */
669 status = U_ZERO_ERROR;
670 memset(buf, -1, sizeof(buf));
671 resultSz = uregex_group(re, 0, buf, 5, &status);
672 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
673 TEST_ASSERT_STRING("abc i", buf, FALSE);
674 TEST_ASSERT(buf[5] == (UChar)0xffff);
675 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
676
677 /* Output string just fits buffer, no NUL term. */
678 status = U_ZERO_ERROR;
679 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
680 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
681 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
682 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
683 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
684
685 uregex_close(re);
686
687 }
688
689 /*
690 * Regions
691 */
692
693
694 /* SetRegion(), getRegion() do something */
695 TEST_SETUP(".*", "0123456789ABCDEF", 0)
696 UChar resultString[40];
697 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
698 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
699 uregex_setRegion(re, 3, 6, &status);
700 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
701 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
702 TEST_ASSERT(uregex_findNext(re, &status));
703 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
704 TEST_ASSERT_STRING("345", resultString, TRUE);
705 TEST_TEARDOWN;
706
707 /* find(start=-1) uses regions */
708 TEST_SETUP(".*", "0123456789ABCDEF", 0);
709 uregex_setRegion(re, 4, 6, &status);
710 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
711 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
712 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
713 TEST_TEARDOWN;
714
715 /* find (start >=0) does not use regions */
716 TEST_SETUP(".*", "0123456789ABCDEF", 0);
717 uregex_setRegion(re, 4, 6, &status);
718 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
719 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
720 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
721 TEST_TEARDOWN;
722
723 /* findNext() obeys regions */
724 TEST_SETUP(".", "0123456789ABCDEF", 0);
725 uregex_setRegion(re, 4, 6, &status);
726 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
727 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
728 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
729 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
730 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
731 TEST_TEARDOWN;
732
733 /* matches(start=-1) uses regions */
734 /* Also, verify that non-greedy *? succeeds in finding the full match. */
735 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
736 uregex_setRegion(re, 4, 6, &status);
737 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
738 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
739 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
740 TEST_TEARDOWN;
741
742 /* matches (start >=0) does not use regions */
743 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
744 uregex_setRegion(re, 4, 6, &status);
745 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
746 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
747 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
748 TEST_TEARDOWN;
749
750 /* lookingAt(start=-1) uses regions */
751 /* Also, verify that non-greedy *? finds the first (shortest) match. */
752 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
753 uregex_setRegion(re, 4, 6, &status);
754 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
755 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
756 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
757 TEST_TEARDOWN;
758
759 /* lookingAt (start >=0) does not use regions */
760 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
761 uregex_setRegion(re, 4, 6, &status);
762 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
763 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
764 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
765 TEST_TEARDOWN;
766
767 /* hitEnd() */
768 TEST_SETUP("[a-f]*", "abcdefghij", 0);
769 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
770 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
771 TEST_TEARDOWN;
772
773 TEST_SETUP("[a-f]*", "abcdef", 0);
774 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
775 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
776 TEST_TEARDOWN;
777
778 /* requireEnd */
779 TEST_SETUP("abcd", "abcd", 0);
780 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
781 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
782 TEST_TEARDOWN;
783
784 TEST_SETUP("abcd$", "abcd", 0);
785 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
786 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
787 TEST_TEARDOWN;
788
789 /* anchoringBounds */
790 TEST_SETUP("abc$", "abcdef", 0);
791 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
792 uregex_useAnchoringBounds(re, FALSE, &status);
793 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
794
795 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
796 uregex_useAnchoringBounds(re, TRUE, &status);
797 uregex_setRegion(re, 0, 3, &status);
798 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
799 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
800 TEST_TEARDOWN;
801
802 /* Transparent Bounds */
803 TEST_SETUP("abc(?=def)", "abcdef", 0);
804 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
805 uregex_useTransparentBounds(re, TRUE, &status);
806 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
807
808 uregex_useTransparentBounds(re, FALSE, &status);
809 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
810 uregex_setRegion(re, 0, 3, &status);
811 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
812 uregex_useTransparentBounds(re, TRUE, &status);
813 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
814 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
815 TEST_TEARDOWN;
816
817
818 /*
819 * replaceFirst()
820 */
821 {
822 UChar text1[80];
823 UChar text2[80];
824 UChar replText[80];
825 UChar buf[80];
826 int32_t resultSz;
827 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
828 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
829 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
830
831 status = U_ZERO_ERROR;
832 re = uregex_openC("x(.*?)x", 0, NULL, &status);
833 TEST_ASSERT_SUCCESS(status);
834
835 /* Normal case, with match */
836 uregex_setText(re, text1, -1, &status);
837 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
838 TEST_ASSERT_SUCCESS(status);
839 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
840 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
841
842 /* No match. Text should copy to output with no changes. */
843 status = U_ZERO_ERROR;
844 uregex_setText(re, text2, -1, &status);
845 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
846 TEST_ASSERT_SUCCESS(status);
847 TEST_ASSERT_STRING("No match here.", buf, TRUE);
848 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
849
850 /* Match, output just fills buffer, no termination warning. */
851 status = U_ZERO_ERROR;
852 uregex_setText(re, text1, -1, &status);
853 memset(buf, -1, sizeof(buf));
854 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
855 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
856 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
857 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
858 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
859
860 /* Do the replaceFirst again, without first resetting anything.
861 * Should give the same results.
862 */
863 status = U_ZERO_ERROR;
864 memset(buf, -1, sizeof(buf));
865 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
866 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
867 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
868 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
869 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
870
871 /* NULL buffer, zero buffer length */
872 status = U_ZERO_ERROR;
873 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
874 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
875 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
876
877 /* Buffer too small by one */
878 status = U_ZERO_ERROR;
879 memset(buf, -1, sizeof(buf));
880 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
881 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
882 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
883 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
884 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
885
886 uregex_close(re);
887 }
888
889
890 /*
891 * replaceAll()
892 */
893 {
894 UChar text1[80]; /* "Replace xaax x1x x...x." */
895 UChar text2[80]; /* "No match Here" */
896 UChar replText[80]; /* "<$1>" */
897 UChar replText2[80]; /* "<<$1>>" */
898 const char * pattern = "x(.*?)x";
899 const char * expectedResult = "Replace <aa> <1> <...>.";
900 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
901 UChar buf[80];
902 int32_t resultSize;
903 int32_t expectedResultSize;
904 int32_t expectedResultSize2;
905 int32_t i;
906
907 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
908 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
909 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
910 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
911 expectedResultSize = strlen(expectedResult);
912 expectedResultSize2 = strlen(expectedResult2);
913
914 status = U_ZERO_ERROR;
915 re = uregex_openC(pattern, 0, NULL, &status);
916 TEST_ASSERT_SUCCESS(status);
917
918 /* Normal case, with match */
919 uregex_setText(re, text1, -1, &status);
920 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
921 TEST_ASSERT_SUCCESS(status);
922 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
923 TEST_ASSERT(resultSize == expectedResultSize);
924
925 /* No match. Text should copy to output with no changes. */
926 status = U_ZERO_ERROR;
927 uregex_setText(re, text2, -1, &status);
928 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
929 TEST_ASSERT_SUCCESS(status);
930 TEST_ASSERT_STRING("No match here.", buf, TRUE);
931 TEST_ASSERT(resultSize == u_strlen(text2));
932
933 /* Match, output just fills buffer, no termination warning. */
934 status = U_ZERO_ERROR;
935 uregex_setText(re, text1, -1, &status);
936 memset(buf, -1, sizeof(buf));
937 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
938 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
939 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
940 TEST_ASSERT(resultSize == expectedResultSize);
941 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
942
943 /* Do the replaceFirst again, without first resetting anything.
944 * Should give the same results.
945 */
946 status = U_ZERO_ERROR;
947 memset(buf, -1, sizeof(buf));
948 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
949 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
950 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
951 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
952 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
953
954 /* NULL buffer, zero buffer length */
955 status = U_ZERO_ERROR;
956 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
957 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
958 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
959
960 /* Buffer too small. Try every size, which will tickle edge cases
961 * in uregex_appendReplacement (used by replaceAll) */
962 for (i=0; i<expectedResultSize; i++) {
963 char expected[80];
964 status = U_ZERO_ERROR;
965 memset(buf, -1, sizeof(buf));
966 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
967 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
968 strcpy(expected, expectedResult);
969 expected[i] = 0;
970 TEST_ASSERT_STRING(expected, buf, FALSE);
971 TEST_ASSERT(resultSize == expectedResultSize);
972 TEST_ASSERT(buf[i] == (UChar)0xffff);
973 }
974
975 /* Buffer too small. Same as previous test, except this time the replacement
976 * text is longer than the match capture group, making the length of the complete
977 * replacement longer than the original string.
978 */
979 for (i=0; i<expectedResultSize2; i++) {
980 char expected[80];
981 status = U_ZERO_ERROR;
982 memset(buf, -1, sizeof(buf));
983 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
984 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
985 strcpy(expected, expectedResult2);
986 expected[i] = 0;
987 TEST_ASSERT_STRING(expected, buf, FALSE);
988 TEST_ASSERT(resultSize == expectedResultSize2);
989 TEST_ASSERT(buf[i] == (UChar)0xffff);
990 }
991
992
993 uregex_close(re);
994 }
995
996
997 /*
998 * appendReplacement()
999 */
1000 {
1001 UChar text[100];
1002 UChar repl[100];
1003 UChar buf[100];
1004 UChar *bufPtr;
1005 int32_t bufCap;
1006
1007
1008 status = U_ZERO_ERROR;
1009 re = uregex_openC(".*", 0, 0, &status);
1010 TEST_ASSERT_SUCCESS(status);
1011
1012 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1013 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1014 uregex_setText(re, text, -1, &status);
1015
1016 /* match covers whole target string */
1017 uregex_find(re, 0, &status);
1018 TEST_ASSERT_SUCCESS(status);
1019 bufPtr = buf;
1020 bufCap = UPRV_LENGTHOF(buf);
1021 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1022 TEST_ASSERT_SUCCESS(status);
1023 TEST_ASSERT_STRING("some other", buf, TRUE);
1024
1025 /* Match has \u \U escapes */
1026 uregex_find(re, 0, &status);
1027 TEST_ASSERT_SUCCESS(status);
1028 bufPtr = buf;
1029 bufCap = UPRV_LENGTHOF(buf);
1030 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1031 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1032 TEST_ASSERT_SUCCESS(status);
1033 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1034
1035 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1036 status = U_ZERO_ERROR;
1037 uregex_find(re, 0, &status);
1038 TEST_ASSERT_SUCCESS(status);
1039 bufPtr = buf;
1040 status = U_BUFFER_OVERFLOW_ERROR;
1041 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1042 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1043
1044 uregex_close(re);
1045 }
1046
1047
1048 /*
1049 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1050 */
1051
1052 /*
1053 * split()
1054 */
1055 {
1056 UChar textToSplit[80];
1057 UChar text2[80];
1058 UChar buf[200];
1059 UChar *fields[10];
1060 int32_t numFields;
1061 int32_t requiredCapacity;
1062 int32_t spaceNeeded;
1063 int32_t sz;
1064
1065 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1066 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1067
1068 status = U_ZERO_ERROR;
1069 re = uregex_openC(":", 0, NULL, &status);
1070
1071
1072 /* Simple split */
1073
1074 uregex_setText(re, textToSplit, -1, &status);
1075 TEST_ASSERT_SUCCESS(status);
1076
1077 /* The TEST_ASSERT_SUCCESS call above should change too... */
1078 if (U_SUCCESS(status)) {
1079 memset(fields, -1, sizeof(fields));
1080 numFields =
1081 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1082 TEST_ASSERT_SUCCESS(status);
1083
1084 /* The TEST_ASSERT_SUCCESS call above should change too... */
1085 if(U_SUCCESS(status)) {
1086 TEST_ASSERT(numFields == 3);
1087 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1088 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1089 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1090 TEST_ASSERT(fields[3] == NULL);
1091
1092 spaceNeeded = u_strlen(textToSplit) -
1093 (numFields - 1) + /* Field delimiters do not appear in output */
1094 numFields; /* Each field gets a NUL terminator */
1095
1096 TEST_ASSERT(spaceNeeded == requiredCapacity);
1097 }
1098 }
1099
1100 uregex_close(re);
1101
1102
1103 /* Split with too few output strings available */
1104 status = U_ZERO_ERROR;
1105 re = uregex_openC(":", 0, NULL, &status);
1106 uregex_setText(re, textToSplit, -1, &status);
1107 TEST_ASSERT_SUCCESS(status);
1108
1109 /* The TEST_ASSERT_SUCCESS call above should change too... */
1110 if(U_SUCCESS(status)) {
1111 memset(fields, -1, sizeof(fields));
1112 numFields =
1113 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1114 TEST_ASSERT_SUCCESS(status);
1115
1116 /* The TEST_ASSERT_SUCCESS call above should change too... */
1117 if(U_SUCCESS(status)) {
1118 TEST_ASSERT(numFields == 2);
1119 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1120 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1121 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1122
1123 spaceNeeded = u_strlen(textToSplit) -
1124 (numFields - 1) + /* Field delimiters do not appear in output */
1125 numFields; /* Each field gets a NUL terminator */
1126
1127 TEST_ASSERT(spaceNeeded == requiredCapacity);
1128
1129 /* Split with a range of output buffer sizes. */
1130 spaceNeeded = u_strlen(textToSplit) -
1131 (numFields - 1) + /* Field delimiters do not appear in output */
1132 numFields; /* Each field gets a NUL terminator */
1133
1134 for (sz=0; sz < spaceNeeded+1; sz++) {
1135 memset(fields, -1, sizeof(fields));
1136 status = U_ZERO_ERROR;
1137 numFields =
1138 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1139 if (sz >= spaceNeeded) {
1140 TEST_ASSERT_SUCCESS(status);
1141 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1142 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1143 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1144 } else {
1145 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1146 }
1147 TEST_ASSERT(numFields == 3);
1148 TEST_ASSERT(fields[3] == NULL);
1149 TEST_ASSERT(spaceNeeded == requiredCapacity);
1150 }
1151 }
1152 }
1153
1154 uregex_close(re);
1155 }
1156
1157
1158
1159
1160 /* Split(), part 2. Patterns with capture groups. The capture group text
1161 * comes out as additional fields. */
1162 {
1163 UChar textToSplit[80];
1164 UChar buf[200];
1165 UChar *fields[10];
1166 int32_t numFields;
1167 int32_t requiredCapacity;
1168 int32_t spaceNeeded;
1169 int32_t sz;
1170
1171 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
1172
1173 status = U_ZERO_ERROR;
1174 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1175
1176 uregex_setText(re, textToSplit, -1, &status);
1177 TEST_ASSERT_SUCCESS(status);
1178
1179 /* The TEST_ASSERT_SUCCESS call above should change too... */
1180 if(U_SUCCESS(status)) {
1181 memset(fields, -1, sizeof(fields));
1182 numFields =
1183 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1184 TEST_ASSERT_SUCCESS(status);
1185
1186 /* The TEST_ASSERT_SUCCESS call above should change too... */
1187 if(U_SUCCESS(status)) {
1188 TEST_ASSERT(numFields == 5);
1189 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1190 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1191 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1192 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1193 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1194 TEST_ASSERT(fields[5] == NULL);
1195 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1196 TEST_ASSERT(spaceNeeded == requiredCapacity);
1197 }
1198 }
1199
1200 /* Split with too few output strings available (2) */
1201 status = U_ZERO_ERROR;
1202 memset(fields, -1, sizeof(fields));
1203 numFields =
1204 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1205 TEST_ASSERT_SUCCESS(status);
1206
1207 /* The TEST_ASSERT_SUCCESS call above should change too... */
1208 if(U_SUCCESS(status)) {
1209 TEST_ASSERT(numFields == 2);
1210 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1211 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1212 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1213
1214 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */
1215 TEST_ASSERT(spaceNeeded == requiredCapacity);
1216 }
1217
1218 /* Split with too few output strings available (3) */
1219 status = U_ZERO_ERROR;
1220 memset(fields, -1, sizeof(fields));
1221 numFields =
1222 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1223 TEST_ASSERT_SUCCESS(status);
1224
1225 /* The TEST_ASSERT_SUCCESS call above should change too... */
1226 if(U_SUCCESS(status)) {
1227 TEST_ASSERT(numFields == 3);
1228 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1229 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1230 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1231 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1232
1233 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1234 TEST_ASSERT(spaceNeeded == requiredCapacity);
1235 }
1236
1237 /* Split with just enough output strings available (5) */
1238 status = U_ZERO_ERROR;
1239 memset(fields, -1, sizeof(fields));
1240 numFields =
1241 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1242 TEST_ASSERT_SUCCESS(status);
1243
1244 /* The TEST_ASSERT_SUCCESS call above should change too... */
1245 if(U_SUCCESS(status)) {
1246 TEST_ASSERT(numFields == 5);
1247 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1248 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1249 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1250 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1251 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1252 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1253
1254 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1255 TEST_ASSERT(spaceNeeded == requiredCapacity);
1256 }
1257
1258 /* Split, end of text is a field delimiter. */
1259 status = U_ZERO_ERROR;
1260 sz = strlen("first <tag-a> second<tag-b>");
1261 uregex_setText(re, textToSplit, sz, &status);
1262 TEST_ASSERT_SUCCESS(status);
1263
1264 /* The TEST_ASSERT_SUCCESS call above should change too... */
1265 if(U_SUCCESS(status)) {
1266 memset(fields, -1, sizeof(fields));
1267 numFields =
1268 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1269 TEST_ASSERT_SUCCESS(status);
1270
1271 /* The TEST_ASSERT_SUCCESS call above should change too... */
1272 if(U_SUCCESS(status)) {
1273 TEST_ASSERT(numFields == 5);
1274 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1275 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1276 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1277 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1278 TEST_ASSERT_STRING("", fields[4], TRUE);
1279 TEST_ASSERT(fields[5] == NULL);
1280 TEST_ASSERT(fields[8] == NULL);
1281 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1282 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1283 TEST_ASSERT(spaceNeeded == requiredCapacity);
1284 }
1285 }
1286
1287 uregex_close(re);
1288 }
1289
1290 /*
1291 * set/getTimeLimit
1292 */
1293 TEST_SETUP("abc$", "abcdef", 0);
1294 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1295 uregex_setTimeLimit(re, 1000, &status);
1296 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1297 TEST_ASSERT_SUCCESS(status);
1298 uregex_setTimeLimit(re, -1, &status);
1299 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1300 status = U_ZERO_ERROR;
1301 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1302 TEST_TEARDOWN;
1303
1304 /*
1305 * set/get Stack Limit
1306 */
1307 TEST_SETUP("abc$", "abcdef", 0);
1308 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1309 uregex_setStackLimit(re, 40000, &status);
1310 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1311 TEST_ASSERT_SUCCESS(status);
1312 uregex_setStackLimit(re, -1, &status);
1313 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1314 status = U_ZERO_ERROR;
1315 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1316 TEST_TEARDOWN;
1317
1318
1319 /*
1320 * Get/Set callback functions
1321 * This test is copied from intltest regex/Callbacks
1322 * The pattern and test data will run long enough to cause the callback
1323 * to be invoked. The nested '+' operators give exponential time
1324 * behavior with increasing string length.
1325 */
1326 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1327 callBackContext cbInfo = {4, 0, 0};
1328 const void *pContext = &cbInfo;
1329 URegexMatchCallback *returnedFn = &TestCallbackFn;
1330
1331 /* Getting the callback fn when it hasn't been set must return NULL */
1332 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1333 TEST_ASSERT_SUCCESS(status);
1334 TEST_ASSERT(returnedFn == NULL);
1335 TEST_ASSERT(pContext == NULL);
1336
1337 /* Set thecallback and do a match. */
1338 /* The callback function should record that it has been called. */
1339 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1340 TEST_ASSERT_SUCCESS(status);
1341 TEST_ASSERT(cbInfo.numCalls == 0);
1342 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1343 TEST_ASSERT_SUCCESS(status);
1344 TEST_ASSERT(cbInfo.numCalls > 0);
1345
1346 /* Getting the callback should return the values that were set above. */
1347 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1348 TEST_ASSERT(returnedFn == &TestCallbackFn);
1349 TEST_ASSERT(pContext == &cbInfo);
1350
1351 TEST_TEARDOWN;
1352 }
1353
1354
1355
1356 static void TestBug4315(void) {
1357 UErrorCode theICUError = U_ZERO_ERROR;
1358 URegularExpression *theRegEx;
1359 UChar *textBuff;
1360 const char *thePattern;
1361 UChar theString[100];
1362 UChar *destFields[24];
1363 int32_t neededLength1;
1364 int32_t neededLength2;
1365
1366 int32_t wordCount = 0;
1367 int32_t destFieldsSize = 24;
1368
1369 thePattern = "ck ";
1370 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1371
1372 /* open a regex */
1373 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1374 TEST_ASSERT_SUCCESS(theICUError);
1375
1376 /* set the input string */
1377 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1378 TEST_ASSERT_SUCCESS(theICUError);
1379
1380 /* split */
1381 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1382 * error occurs! */
1383 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1384 destFieldsSize, &theICUError);
1385
1386 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1387 TEST_ASSERT(wordCount==3);
1388
1389 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1390 {
1391 theICUError = U_ZERO_ERROR;
1392 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1393 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1394 destFields, destFieldsSize, &theICUError);
1395 TEST_ASSERT(wordCount==3);
1396 TEST_ASSERT_SUCCESS(theICUError);
1397 TEST_ASSERT(neededLength1 == neededLength2);
1398 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1399 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1400 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1401 TEST_ASSERT(destFields[3] == NULL);
1402 free(textBuff);
1403 }
1404 uregex_close(theRegEx);
1405 }
1406
1407 /* Based on TestRegexCAPI() */
1408 static void TestUTextAPI(void) {
1409 UErrorCode status = U_ZERO_ERROR;
1410 URegularExpression *re;
1411 UText patternText = UTEXT_INITIALIZER;
1412 UChar pat[200];
1413 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1414
1415 /* Mimimalist open/close */
1416 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1417 re = uregex_openUText(&patternText, 0, 0, &status);
1418 if (U_FAILURE(status)) {
1419 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1420 utext_close(&patternText);
1421 return;
1422 }
1423 uregex_close(re);
1424
1425 /* Open with all flag values set */
1426 status = U_ZERO_ERROR;
1427 re = uregex_openUText(&patternText,
1428 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1429 0, &status);
1430 TEST_ASSERT_SUCCESS(status);
1431 uregex_close(re);
1432
1433 /* Open with an invalid flag */
1434 status = U_ZERO_ERROR;
1435 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1436 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1437 uregex_close(re);
1438
1439 /* open with an invalid parameter */
1440 status = U_ZERO_ERROR;
1441 re = uregex_openUText(NULL,
1442 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1443 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1444
1445 /*
1446 * clone
1447 */
1448 {
1449 URegularExpression *clone1;
1450 URegularExpression *clone2;
1451 URegularExpression *clone3;
1452 UChar testString1[30];
1453 UChar testString2[30];
1454 UBool result;
1455
1456
1457 status = U_ZERO_ERROR;
1458 re = uregex_openUText(&patternText, 0, 0, &status);
1459 TEST_ASSERT_SUCCESS(status);
1460 clone1 = uregex_clone(re, &status);
1461 TEST_ASSERT_SUCCESS(status);
1462 TEST_ASSERT(clone1 != NULL);
1463
1464 status = U_ZERO_ERROR;
1465 clone2 = uregex_clone(re, &status);
1466 TEST_ASSERT_SUCCESS(status);
1467 TEST_ASSERT(clone2 != NULL);
1468 uregex_close(re);
1469
1470 status = U_ZERO_ERROR;
1471 clone3 = uregex_clone(clone2, &status);
1472 TEST_ASSERT_SUCCESS(status);
1473 TEST_ASSERT(clone3 != NULL);
1474
1475 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1476 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1477
1478 status = U_ZERO_ERROR;
1479 uregex_setText(clone1, testString1, -1, &status);
1480 TEST_ASSERT_SUCCESS(status);
1481 result = uregex_lookingAt(clone1, 0, &status);
1482 TEST_ASSERT_SUCCESS(status);
1483 TEST_ASSERT(result==TRUE);
1484
1485 status = U_ZERO_ERROR;
1486 uregex_setText(clone2, testString2, -1, &status);
1487 TEST_ASSERT_SUCCESS(status);
1488 result = uregex_lookingAt(clone2, 0, &status);
1489 TEST_ASSERT_SUCCESS(status);
1490 TEST_ASSERT(result==FALSE);
1491 result = uregex_find(clone2, 0, &status);
1492 TEST_ASSERT_SUCCESS(status);
1493 TEST_ASSERT(result==TRUE);
1494
1495 uregex_close(clone1);
1496 uregex_close(clone2);
1497 uregex_close(clone3);
1498
1499 }
1500
1501 /*
1502 * pattern() and patternText()
1503 */
1504 {
1505 const UChar *resultPat;
1506 int32_t resultLen;
1507 UText *resultText;
1508 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1509 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1510 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1511 status = U_ZERO_ERROR;
1512
1513 utext_openUTF8(&patternText, str_hello, -1, &status);
1514 re = uregex_open(pat, -1, 0, NULL, &status);
1515 resultPat = uregex_pattern(re, &resultLen, &status);
1516 TEST_ASSERT_SUCCESS(status);
1517
1518 /* The TEST_ASSERT_SUCCESS above should change too... */
1519 if (U_SUCCESS(status)) {
1520 TEST_ASSERT(resultLen == -1);
1521 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1522 }
1523
1524 resultText = uregex_patternUText(re, &status);
1525 TEST_ASSERT_SUCCESS(status);
1526 TEST_ASSERT_UTEXT(str_hello, resultText);
1527
1528 uregex_close(re);
1529
1530 status = U_ZERO_ERROR;
1531 re = uregex_open(pat, 3, 0, NULL, &status);
1532 resultPat = uregex_pattern(re, &resultLen, &status);
1533 TEST_ASSERT_SUCCESS(status);
1534
1535 /* The TEST_ASSERT_SUCCESS above should change too... */
1536 if (U_SUCCESS(status)) {
1537 TEST_ASSERT(resultLen == 3);
1538 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1539 TEST_ASSERT(u_strlen(resultPat) == 3);
1540 }
1541
1542 resultText = uregex_patternUText(re, &status);
1543 TEST_ASSERT_SUCCESS(status);
1544 TEST_ASSERT_UTEXT(str_hel, resultText);
1545
1546 uregex_close(re);
1547 }
1548
1549 /*
1550 * setUText() and lookingAt()
1551 */
1552 {
1553 UText text1 = UTEXT_INITIALIZER;
1554 UText text2 = UTEXT_INITIALIZER;
1555 UBool result;
1556 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1557 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1558 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1559 status = U_ZERO_ERROR;
1560 utext_openUTF8(&text1, str_abcccd, -1, &status);
1561 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1562
1563 utext_openUTF8(&patternText, str_abcd, -1, &status);
1564 re = uregex_openUText(&patternText, 0, NULL, &status);
1565 TEST_ASSERT_SUCCESS(status);
1566
1567 /* Operation before doing a setText should fail... */
1568 status = U_ZERO_ERROR;
1569 uregex_lookingAt(re, 0, &status);
1570 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1571
1572 status = U_ZERO_ERROR;
1573 uregex_setUText(re, &text1, &status);
1574 result = uregex_lookingAt(re, 0, &status);
1575 TEST_ASSERT(result == TRUE);
1576 TEST_ASSERT_SUCCESS(status);
1577
1578 status = U_ZERO_ERROR;
1579 uregex_setUText(re, &text2, &status);
1580 result = uregex_lookingAt(re, 0, &status);
1581 TEST_ASSERT(result == FALSE);
1582 TEST_ASSERT_SUCCESS(status);
1583
1584 status = U_ZERO_ERROR;
1585 uregex_setUText(re, &text1, &status);
1586 result = uregex_lookingAt(re, 0, &status);
1587 TEST_ASSERT(result == TRUE);
1588 TEST_ASSERT_SUCCESS(status);
1589
1590 uregex_close(re);
1591 utext_close(&text1);
1592 utext_close(&text2);
1593 }
1594
1595
1596 /*
1597 * getText() and getUText()
1598 */
1599 {
1600 UText text1 = UTEXT_INITIALIZER;
1601 UText text2 = UTEXT_INITIALIZER;
1602 UChar text2Chars[20];
1603 UText *resultText;
1604 const UChar *result;
1605 int32_t textLength;
1606 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1607 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1608 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1609
1610
1611 status = U_ZERO_ERROR;
1612 utext_openUTF8(&text1, str_abcccd, -1, &status);
1613 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1614 utext_openUChars(&text2, text2Chars, -1, &status);
1615
1616 utext_openUTF8(&patternText, str_abcd, -1, &status);
1617 re = uregex_openUText(&patternText, 0, NULL, &status);
1618
1619 /* First set a UText */
1620 uregex_setUText(re, &text1, &status);
1621 resultText = uregex_getUText(re, NULL, &status);
1622 TEST_ASSERT_SUCCESS(status);
1623 TEST_ASSERT(resultText != &text1);
1624 utext_setNativeIndex(resultText, 0);
1625 utext_setNativeIndex(&text1, 0);
1626 TEST_ASSERT(testUTextEqual(resultText, &text1));
1627 utext_close(resultText);
1628
1629 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1630 (void)result; /* Suppress set but not used warning. */
1631 TEST_ASSERT(textLength == -1 || textLength == 6);
1632 resultText = uregex_getUText(re, NULL, &status);
1633 TEST_ASSERT_SUCCESS(status);
1634 TEST_ASSERT(resultText != &text1);
1635 utext_setNativeIndex(resultText, 0);
1636 utext_setNativeIndex(&text1, 0);
1637 TEST_ASSERT(testUTextEqual(resultText, &text1));
1638 utext_close(resultText);
1639
1640 /* Then set a UChar * */
1641 uregex_setText(re, text2Chars, 7, &status);
1642 resultText = uregex_getUText(re, NULL, &status);
1643 TEST_ASSERT_SUCCESS(status);
1644 utext_setNativeIndex(resultText, 0);
1645 utext_setNativeIndex(&text2, 0);
1646 TEST_ASSERT(testUTextEqual(resultText, &text2));
1647 utext_close(resultText);
1648 result = uregex_getText(re, &textLength, &status);
1649 TEST_ASSERT(textLength == 7);
1650
1651 uregex_close(re);
1652 utext_close(&text1);
1653 utext_close(&text2);
1654 }
1655
1656 /*
1657 * matches()
1658 */
1659 {
1660 UText text1 = UTEXT_INITIALIZER;
1661 UBool result;
1662 UText nullText = UTEXT_INITIALIZER;
1663 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1664 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1665
1666 status = U_ZERO_ERROR;
1667 utext_openUTF8(&text1, str_abcccde, -1, &status);
1668 utext_openUTF8(&patternText, str_abcd, -1, &status);
1669 re = uregex_openUText(&patternText, 0, NULL, &status);
1670
1671 uregex_setUText(re, &text1, &status);
1672 result = uregex_matches(re, 0, &status);
1673 TEST_ASSERT(result == FALSE);
1674 TEST_ASSERT_SUCCESS(status);
1675 uregex_close(re);
1676
1677 status = U_ZERO_ERROR;
1678 re = uregex_openC(".?", 0, NULL, &status);
1679 uregex_setUText(re, &text1, &status);
1680 result = uregex_matches(re, 7, &status);
1681 TEST_ASSERT(result == TRUE);
1682 TEST_ASSERT_SUCCESS(status);
1683
1684 status = U_ZERO_ERROR;
1685 utext_openUTF8(&nullText, "", -1, &status);
1686 uregex_setUText(re, &nullText, &status);
1687 TEST_ASSERT_SUCCESS(status);
1688 result = uregex_matches(re, 0, &status);
1689 TEST_ASSERT(result == TRUE);
1690 TEST_ASSERT_SUCCESS(status);
1691
1692 uregex_close(re);
1693 utext_close(&text1);
1694 utext_close(&nullText);
1695 }
1696
1697
1698 /*
1699 * lookingAt() Used in setText test.
1700 */
1701
1702
1703 /*
1704 * find(), findNext, start, end, reset
1705 */
1706 {
1707 UChar text1[50];
1708 UBool result;
1709 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
1710 status = U_ZERO_ERROR;
1711 re = uregex_openC("rx", 0, NULL, &status);
1712
1713 uregex_setText(re, text1, -1, &status);
1714 result = uregex_find(re, 0, &status);
1715 TEST_ASSERT(result == TRUE);
1716 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1717 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1718 TEST_ASSERT_SUCCESS(status);
1719
1720 result = uregex_find(re, 9, &status);
1721 TEST_ASSERT(result == TRUE);
1722 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1723 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1724 TEST_ASSERT_SUCCESS(status);
1725
1726 result = uregex_find(re, 14, &status);
1727 TEST_ASSERT(result == FALSE);
1728 TEST_ASSERT_SUCCESS(status);
1729
1730 status = U_ZERO_ERROR;
1731 uregex_reset(re, 0, &status);
1732
1733 result = uregex_findNext(re, &status);
1734 TEST_ASSERT(result == TRUE);
1735 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1736 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1737 TEST_ASSERT_SUCCESS(status);
1738
1739 result = uregex_findNext(re, &status);
1740 TEST_ASSERT(result == TRUE);
1741 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1742 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1743 TEST_ASSERT_SUCCESS(status);
1744
1745 status = U_ZERO_ERROR;
1746 uregex_reset(re, 12, &status);
1747
1748 result = uregex_findNext(re, &status);
1749 TEST_ASSERT(result == TRUE);
1750 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1751 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1752 TEST_ASSERT_SUCCESS(status);
1753
1754 result = uregex_findNext(re, &status);
1755 TEST_ASSERT(result == FALSE);
1756 TEST_ASSERT_SUCCESS(status);
1757
1758 uregex_close(re);
1759 }
1760
1761 /*
1762 * groupUText()
1763 */
1764 {
1765 UChar text1[80];
1766 UText *actual;
1767 UBool result;
1768 int64_t groupLen = 0;
1769 UChar groupBuf[20];
1770
1771 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
1772
1773 status = U_ZERO_ERROR;
1774 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1775 TEST_ASSERT_SUCCESS(status);
1776
1777 uregex_setText(re, text1, -1, &status);
1778 result = uregex_find(re, 0, &status);
1779 TEST_ASSERT(result==TRUE);
1780
1781 /* Capture Group 0 with shallow clone API. Should succeed. */
1782 status = U_ZERO_ERROR;
1783 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1784 TEST_ASSERT_SUCCESS(status);
1785
1786 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1787 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1788 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1789
1790 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1791 utext_close(actual);
1792
1793 /* Capture group #1. Should succeed. */
1794 status = U_ZERO_ERROR;
1795
1796 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1797 TEST_ASSERT_SUCCESS(status);
1798 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1799 /* (within the string text1) */
1800 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1801 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1802 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1803
1804 utext_close(actual);
1805
1806 /* Capture group out of range. Error. */
1807 status = U_ZERO_ERROR;
1808 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1809 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1810 utext_close(actual);
1811
1812 uregex_close(re);
1813 }
1814
1815 /*
1816 * replaceFirst()
1817 */
1818 {
1819 UChar text1[80];
1820 UChar text2[80];
1821 UText replText = UTEXT_INITIALIZER;
1822 UText *result;
1823 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1824 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1825 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1826 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1827 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1828 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1829 status = U_ZERO_ERROR;
1830 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1831 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1832 utext_openUTF8(&replText, str_1x, -1, &status);
1833
1834 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1835 TEST_ASSERT_SUCCESS(status);
1836
1837 /* Normal case, with match */
1838 uregex_setText(re, text1, -1, &status);
1839 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1840 TEST_ASSERT_SUCCESS(status);
1841 TEST_ASSERT_UTEXT(str_Replxxx, result);
1842 utext_close(result);
1843
1844 /* No match. Text should copy to output with no changes. */
1845 uregex_setText(re, text2, -1, &status);
1846 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1847 TEST_ASSERT_SUCCESS(status);
1848 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1849 utext_close(result);
1850
1851 /* Unicode escapes */
1852 uregex_setText(re, text1, -1, &status);
1853 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1854 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1855 TEST_ASSERT_SUCCESS(status);
1856 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1857 utext_close(result);
1858
1859 uregex_close(re);
1860 utext_close(&replText);
1861 }
1862
1863
1864 /*
1865 * replaceAll()
1866 */
1867 {
1868 UChar text1[80];
1869 UChar text2[80];
1870 UText replText = UTEXT_INITIALIZER;
1871 UText *result;
1872 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1873 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1874 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1875 status = U_ZERO_ERROR;
1876 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1877 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1878 utext_openUTF8(&replText, str_1, -1, &status);
1879
1880 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1881 TEST_ASSERT_SUCCESS(status);
1882
1883 /* Normal case, with match */
1884 uregex_setText(re, text1, -1, &status);
1885 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1886 TEST_ASSERT_SUCCESS(status);
1887 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1888 utext_close(result);
1889
1890 /* No match. Text should copy to output with no changes. */
1891 uregex_setText(re, text2, -1, &status);
1892 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1893 TEST_ASSERT_SUCCESS(status);
1894 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1895 utext_close(result);
1896
1897 uregex_close(re);
1898 utext_close(&replText);
1899 }
1900
1901
1902 /*
1903 * appendReplacement()
1904 */
1905 {
1906 UChar text[100];
1907 UChar repl[100];
1908 UChar buf[100];
1909 UChar *bufPtr;
1910 int32_t bufCap;
1911
1912 status = U_ZERO_ERROR;
1913 re = uregex_openC(".*", 0, 0, &status);
1914 TEST_ASSERT_SUCCESS(status);
1915
1916 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1917 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1918 uregex_setText(re, text, -1, &status);
1919
1920 /* match covers whole target string */
1921 uregex_find(re, 0, &status);
1922 TEST_ASSERT_SUCCESS(status);
1923 bufPtr = buf;
1924 bufCap = UPRV_LENGTHOF(buf);
1925 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1926 TEST_ASSERT_SUCCESS(status);
1927 TEST_ASSERT_STRING("some other", buf, TRUE);
1928
1929 /* Match has \u \U escapes */
1930 uregex_find(re, 0, &status);
1931 TEST_ASSERT_SUCCESS(status);
1932 bufPtr = buf;
1933 bufCap = UPRV_LENGTHOF(buf);
1934 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1935 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1936 TEST_ASSERT_SUCCESS(status);
1937 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1938
1939 uregex_close(re);
1940 }
1941
1942
1943 /*
1944 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1945 */
1946
1947 /*
1948 * splitUText()
1949 */
1950 {
1951 UChar textToSplit[80];
1952 UChar text2[80];
1953 UText *fields[10];
1954 int32_t numFields;
1955 int32_t i;
1956
1957 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1958 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1959
1960 status = U_ZERO_ERROR;
1961 re = uregex_openC(":", 0, NULL, &status);
1962
1963
1964 /* Simple split */
1965
1966 uregex_setText(re, textToSplit, -1, &status);
1967 TEST_ASSERT_SUCCESS(status);
1968
1969 /* The TEST_ASSERT_SUCCESS call above should change too... */
1970 if (U_SUCCESS(status)) {
1971 memset(fields, 0, sizeof(fields));
1972 numFields = uregex_splitUText(re, fields, 10, &status);
1973 TEST_ASSERT_SUCCESS(status);
1974
1975 /* The TEST_ASSERT_SUCCESS call above should change too... */
1976 if(U_SUCCESS(status)) {
1977 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1978 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1979 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1980 TEST_ASSERT(numFields == 3);
1981 TEST_ASSERT_UTEXT(str_first, fields[0]);
1982 TEST_ASSERT_UTEXT(str_second, fields[1]);
1983 TEST_ASSERT_UTEXT(str_third, fields[2]);
1984 TEST_ASSERT(fields[3] == NULL);
1985 }
1986 for(i = 0; i < numFields; i++) {
1987 utext_close(fields[i]);
1988 }
1989 }
1990
1991 uregex_close(re);
1992
1993
1994 /* Split with too few output strings available */
1995 status = U_ZERO_ERROR;
1996 re = uregex_openC(":", 0, NULL, &status);
1997 uregex_setText(re, textToSplit, -1, &status);
1998 TEST_ASSERT_SUCCESS(status);
1999
2000 /* The TEST_ASSERT_SUCCESS call above should change too... */
2001 if(U_SUCCESS(status)) {
2002 fields[0] = NULL;
2003 fields[1] = NULL;
2004 fields[2] = &patternText;
2005 numFields = uregex_splitUText(re, fields, 2, &status);
2006 TEST_ASSERT_SUCCESS(status);
2007
2008 /* The TEST_ASSERT_SUCCESS call above should change too... */
2009 if(U_SUCCESS(status)) {
2010 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2011 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2012 TEST_ASSERT(numFields == 2);
2013 TEST_ASSERT_UTEXT(str_first, fields[0]);
2014 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2015 TEST_ASSERT(fields[2] == &patternText);
2016 }
2017 for(i = 0; i < numFields; i++) {
2018 utext_close(fields[i]);
2019 }
2020 }
2021
2022 uregex_close(re);
2023 }
2024
2025 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2026 * comes out as additional fields. */
2027 {
2028 UChar textToSplit[80];
2029 UText *fields[10];
2030 int32_t numFields;
2031 int32_t i;
2032
2033 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
2034
2035 status = U_ZERO_ERROR;
2036 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2037
2038 uregex_setText(re, textToSplit, -1, &status);
2039 TEST_ASSERT_SUCCESS(status);
2040
2041 /* The TEST_ASSERT_SUCCESS call above should change too... */
2042 if(U_SUCCESS(status)) {
2043 memset(fields, 0, sizeof(fields));
2044 numFields = uregex_splitUText(re, fields, 10, &status);
2045 TEST_ASSERT_SUCCESS(status);
2046
2047 /* The TEST_ASSERT_SUCCESS call above should change too... */
2048 if(U_SUCCESS(status)) {
2049 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2050 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2051 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2052 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2053 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2054
2055 TEST_ASSERT(numFields == 5);
2056 TEST_ASSERT_UTEXT(str_first, fields[0]);
2057 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2058 TEST_ASSERT_UTEXT(str_second, fields[2]);
2059 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2060 TEST_ASSERT_UTEXT(str_third, fields[4]);
2061 TEST_ASSERT(fields[5] == NULL);
2062 }
2063 for(i = 0; i < numFields; i++) {
2064 utext_close(fields[i]);
2065 }
2066 }
2067
2068 /* Split with too few output strings available (2) */
2069 status = U_ZERO_ERROR;
2070 fields[0] = NULL;
2071 fields[1] = NULL;
2072 fields[2] = &patternText;
2073 numFields = uregex_splitUText(re, fields, 2, &status);
2074 TEST_ASSERT_SUCCESS(status);
2075
2076 /* The TEST_ASSERT_SUCCESS call above should change too... */
2077 if(U_SUCCESS(status)) {
2078 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2079 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2080 TEST_ASSERT(numFields == 2);
2081 TEST_ASSERT_UTEXT(str_first, fields[0]);
2082 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2083 TEST_ASSERT(fields[2] == &patternText);
2084 }
2085 for(i = 0; i < numFields; i++) {
2086 utext_close(fields[i]);
2087 }
2088
2089
2090 /* Split with too few output strings available (3) */
2091 status = U_ZERO_ERROR;
2092 fields[0] = NULL;
2093 fields[1] = NULL;
2094 fields[2] = NULL;
2095 fields[3] = &patternText;
2096 numFields = uregex_splitUText(re, fields, 3, &status);
2097 TEST_ASSERT_SUCCESS(status);
2098
2099 /* The TEST_ASSERT_SUCCESS call above should change too... */
2100 if(U_SUCCESS(status)) {
2101 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2102 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2103 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2104 TEST_ASSERT(numFields == 3);
2105 TEST_ASSERT_UTEXT(str_first, fields[0]);
2106 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2107 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2108 TEST_ASSERT(fields[3] == &patternText);
2109 }
2110 for(i = 0; i < numFields; i++) {
2111 utext_close(fields[i]);
2112 }
2113
2114 /* Split with just enough output strings available (5) */
2115 status = U_ZERO_ERROR;
2116 fields[0] = NULL;
2117 fields[1] = NULL;
2118 fields[2] = NULL;
2119 fields[3] = NULL;
2120 fields[4] = NULL;
2121 fields[5] = &patternText;
2122 numFields = uregex_splitUText(re, fields, 5, &status);
2123 TEST_ASSERT_SUCCESS(status);
2124
2125 /* The TEST_ASSERT_SUCCESS call above should change too... */
2126 if(U_SUCCESS(status)) {
2127 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2128 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2129 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2130 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2131 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2132
2133 TEST_ASSERT(numFields == 5);
2134 TEST_ASSERT_UTEXT(str_first, fields[0]);
2135 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2136 TEST_ASSERT_UTEXT(str_second, fields[2]);
2137 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2138 TEST_ASSERT_UTEXT(str_third, fields[4]);
2139 TEST_ASSERT(fields[5] == &patternText);
2140 }
2141 for(i = 0; i < numFields; i++) {
2142 utext_close(fields[i]);
2143 }
2144
2145 /* Split, end of text is a field delimiter. */
2146 status = U_ZERO_ERROR;
2147 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2148 TEST_ASSERT_SUCCESS(status);
2149
2150 /* The TEST_ASSERT_SUCCESS call above should change too... */
2151 if(U_SUCCESS(status)) {
2152 memset(fields, 0, sizeof(fields));
2153 fields[9] = &patternText;
2154 numFields = uregex_splitUText(re, fields, 9, &status);
2155 TEST_ASSERT_SUCCESS(status);
2156
2157 /* The TEST_ASSERT_SUCCESS call above should change too... */
2158 if(U_SUCCESS(status)) {
2159 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2160 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2161 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2162 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2163 const char str_empty[] = { 0x00 };
2164
2165 TEST_ASSERT(numFields == 5);
2166 TEST_ASSERT_UTEXT(str_first, fields[0]);
2167 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2168 TEST_ASSERT_UTEXT(str_second, fields[2]);
2169 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2170 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2171 TEST_ASSERT(fields[5] == NULL);
2172 TEST_ASSERT(fields[8] == NULL);
2173 TEST_ASSERT(fields[9] == &patternText);
2174 }
2175 for(i = 0; i < numFields; i++) {
2176 utext_close(fields[i]);
2177 }
2178 }
2179
2180 uregex_close(re);
2181 }
2182 utext_close(&patternText);
2183 }
2184
2185
2186 static void TestRefreshInput(void) {
2187 /*
2188 * RefreshInput changes out the input of a URegularExpression without
2189 * changing anything else in the match state. Used with Java JNI,
2190 * when Java moves the underlying string storage. This test
2191 * runs a find() loop, moving the text after the first match.
2192 * The right number of matches should still be found.
2193 */
2194 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2195 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2196 UErrorCode status = U_ZERO_ERROR;
2197 URegularExpression *re;
2198 UText ut1 = UTEXT_INITIALIZER;
2199 UText ut2 = UTEXT_INITIALIZER;
2200
2201 re = uregex_openC("[ABC]", 0, 0, &status);
2202 TEST_ASSERT_SUCCESS(status);
2203
2204 utext_openUChars(&ut1, testStr, -1, &status);
2205 TEST_ASSERT_SUCCESS(status);
2206 uregex_setUText(re, &ut1, &status);
2207 TEST_ASSERT_SUCCESS(status);
2208
2209 /* Find the first match "A" in the original string */
2210 TEST_ASSERT(uregex_findNext(re, &status));
2211 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2212
2213 /* Move the string, kill the original string. */
2214 u_strcpy(movedStr, testStr);
2215 u_memset(testStr, 0, u_strlen(testStr));
2216 utext_openUChars(&ut2, movedStr, -1, &status);
2217 TEST_ASSERT_SUCCESS(status);
2218 uregex_refreshUText(re, &ut2, &status);
2219 TEST_ASSERT_SUCCESS(status);
2220
2221 /* Find the following two matches, now working in the moved string. */
2222 TEST_ASSERT(uregex_findNext(re, &status));
2223 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2224 TEST_ASSERT(uregex_findNext(re, &status));
2225 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2226 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2227
2228 uregex_close(re);
2229 }
2230
2231
2232 static void TestBug8421(void) {
2233 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2234 * was failing.
2235 */
2236 URegularExpression *re;
2237 UErrorCode status = U_ZERO_ERROR;
2238 int32_t limit = -1;
2239
2240 re = uregex_openC("abc", 0, 0, &status);
2241 TEST_ASSERT_SUCCESS(status);
2242
2243 limit = uregex_getTimeLimit(re, &status);
2244 TEST_ASSERT_SUCCESS(status);
2245 TEST_ASSERT(limit == 0);
2246
2247 uregex_setTimeLimit(re, 100, &status);
2248 TEST_ASSERT_SUCCESS(status);
2249 limit = uregex_getTimeLimit(re, &status);
2250 TEST_ASSERT_SUCCESS(status);
2251 TEST_ASSERT(limit == 100);
2252
2253 uregex_close(re);
2254 }
2255
2256 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2257 return FALSE;
2258 }
2259
2260 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2261 return FALSE;
2262 }
2263
2264 static void TestBug10815() {
2265 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2266 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2267 */
2268 URegularExpression *re;
2269 UErrorCode status = U_ZERO_ERROR;
2270 UChar text[100];
2271
2272
2273 // findNext() with a find progress callback function.
2274
2275 re = uregex_openC(".z", 0, 0, &status);
2276 TEST_ASSERT_SUCCESS(status);
2277
2278 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2279 uregex_setText(re, text, -1, &status);
2280 TEST_ASSERT_SUCCESS(status);
2281
2282 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2283 TEST_ASSERT_SUCCESS(status);
2284
2285 uregex_findNext(re, &status);
2286 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2287
2288 uregex_close(re);
2289
2290 // findNext() with a match progress callback function.
2291
2292 status = U_ZERO_ERROR;
2293 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2294 TEST_ASSERT_SUCCESS(status);
2295
2296 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2297 // it will appear to be stuck in a (near) infinite loop.
2298 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2299 uregex_setText(re, text, -1, &status);
2300 TEST_ASSERT_SUCCESS(status);
2301
2302 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2303 TEST_ASSERT_SUCCESS(status);
2304
2305 uregex_findNext(re, &status);
2306 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2307
2308 uregex_close(re);
2309 }
2310
2311 static const UChar startLinePattern[] = { 0x5E, 0x78, 0 }; // "^x"
2312
2313 static void TestMatchStartLineWithEmptyText() {
2314 UErrorCode status = U_ZERO_ERROR;
2315 UText* ut = utext_openUChars(NULL, NULL, 0, &status);
2316 TEST_ASSERT_SUCCESS(status);
2317 if (U_SUCCESS(status)) {
2318 URegularExpression *re = uregex_open(startLinePattern, -1, 0, NULL, &status);
2319 TEST_ASSERT_SUCCESS(status);
2320 if (U_SUCCESS(status)) {
2321 uregex_setUText(re, ut, &status);
2322 TEST_ASSERT(U_SUCCESS(status));
2323 if (U_SUCCESS(status)) {
2324 UBool found = uregex_findNext(re, &status);
2325 TEST_ASSERT(U_SUCCESS(status) && !found);
2326 }
2327 uregex_close(re);
2328 }
2329 utext_close(ut);
2330 }
2331 }
2332
2333 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */