]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/reapits.c
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / cintltst / reapits.c
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
18 *
19 * More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
33 #include "cintltst.h"
34 #include "cmemory.h"
35
36 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
37 if (U_FAILURE(status)) { \
38 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); \
39 } \
40 } UPRV_BLOCK_MACRO_END
41
42 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
43 if ((expr)==FALSE) { \
44 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr); \
45 } \
46 } UPRV_BLOCK_MACRO_END
47
48 /*
49 * TEST_SETUP and TEST_TEARDOWN
50 * macros to handle the boilerplate around setting up regex test cases.
51 * parameteres to setup:
52 * pattern: The regex pattern, a (char *) null terminated C string.
53 * testString: The string data, also a (char *) C string.
54 * flags: Regex flags to set when compiling the pattern
55 *
56 * Put arbitrary test code between SETUP and TEARDOWN.
57 * 're" is the compiled, ready-to-go regular expression.
58 */
59 #define TEST_SETUP(pattern, testString, flags) UPRV_BLOCK_MACRO_BEGIN { \
60 UChar *srcString = NULL; \
61 status = U_ZERO_ERROR; \
62 re = uregex_openC(pattern, flags, NULL, &status); \
63 TEST_ASSERT_SUCCESS(status); \
64 int32_t testStringLen = (int32_t)strlen(testString); \
65 srcString = (UChar *)malloc( (testStringLen + 2) * sizeof(UChar) ); \
66 u_uastrncpy(srcString, testString, testStringLen + 1); \
67 uregex_setText(re, srcString, -1, &status); \
68 TEST_ASSERT_SUCCESS(status); \
69 if (U_SUCCESS(status)) { \
70 UPRV_BLOCK_MACRO_BEGIN {} UPRV_BLOCK_MACRO_END
71
72 #define TEST_TEARDOWN \
73 } \
74 TEST_ASSERT_SUCCESS(status); \
75 uregex_close(re); \
76 free(srcString); \
77 } UPRV_BLOCK_MACRO_END
78
79
80 /**
81 * @param expected utf-8 array of bytes to be expected
82 */
83 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
84 char buf_inside_macro[120];
85 int32_t len = (int32_t)strlen(expected);
86 UBool success;
87 if (nulTerm) {
88 u_austrncpy(buf_inside_macro, (actual), len+1);
89 buf_inside_macro[len+2] = 0;
90 success = (strcmp((expected), buf_inside_macro) == 0);
91 } else {
92 u_austrncpy(buf_inside_macro, (actual), len);
93 buf_inside_macro[len+1] = 0;
94 success = (strncmp((expected), buf_inside_macro, len) == 0);
95 }
96 if (success == FALSE) {
97 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
98 file, line, (expected), buf_inside_macro);
99 }
100 }
101
102 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
103
104
105 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
106 int32_t u8i = 0;
107 UChar32 u8c = 0;
108 UChar32 utc = 0;
109 UBool stringsEqual = TRUE;
110 utext_setNativeIndex(utext, 0);
111 for (;;) {
112 U8_NEXT_UNSAFE(utf8, u8i, u8c);
113 utc = utext_next32(utext);
114 if (u8c == 0 && utc == U_SENTINEL) {
115 break;
116 }
117 if (u8c != utc || u8c == 0) {
118 stringsEqual = FALSE;
119 break;
120 }
121 }
122 return stringsEqual;
123 }
124
125
126 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
127 utext_setNativeIndex(actual, 0);
128 if (!equals_utf8_utext(expected, actual)) {
129 UChar32 c;
130 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
131 c = utext_next32From(actual, 0);
132 while (c != U_SENTINEL) {
133 if (0x20<c && c <0x7e) {
134 log_err("%c", c);
135 } else {
136 log_err("%#x", c);
137 }
138 c = UTEXT_NEXT32(actual);
139 }
140 log_err("\"\n");
141 }
142 }
143
144 /*
145 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
146 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
147 */
148 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
149
150 static UBool testUTextEqual(UText *uta, UText *utb) {
151 UChar32 ca = 0;
152 UChar32 cb = 0;
153 utext_setNativeIndex(uta, 0);
154 utext_setNativeIndex(utb, 0);
155 do {
156 ca = utext_next32(uta);
157 cb = utext_next32(utb);
158 if (ca != cb) {
159 break;
160 }
161 } while (ca != U_SENTINEL);
162 return ca == cb;
163 }
164
165
166
167
168 static void TestRegexCAPI(void);
169 static void TestBug4315(void);
170 static void TestUTextAPI(void);
171 static void TestRefreshInput(void);
172 static void TestBug8421(void);
173 static void TestBug10815(void);
174 static void TestMatchStartLineWithEmptyText(void);
175
176 void addURegexTest(TestNode** root);
177
178 void addURegexTest(TestNode** root)
179 {
180 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
181 addTest(root, &TestBug4315, "regex/TestBug4315");
182 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
183 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
184 addTest(root, &TestBug8421, "regex/TestBug8421");
185 addTest(root, &TestBug10815, "regex/TestBug10815");
186 addTest(root, &TestMatchStartLineWithEmptyText, "regex/TestMatchStartLineWithEmptyText");
187 }
188
189 /*
190 * Call back function and context struct used for testing
191 * regular expression user callbacks. This test is mostly the same as
192 * the corresponding C++ test in intltest.
193 */
194 typedef struct callBackContext {
195 int32_t maxCalls;
196 int32_t numCalls;
197 int32_t lastSteps;
198 } callBackContext;
199
200 static UBool U_EXPORT2 U_CALLCONV
201 TestCallbackFn(const void *context, int32_t steps) {
202 callBackContext *info = (callBackContext *)context;
203 if (info->lastSteps+1 != steps) {
204 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
205 }
206 info->lastSteps = steps;
207 info->numCalls++;
208 return (info->numCalls < info->maxCalls);
209 }
210
211 /*
212 * Regular Expression C API Tests
213 */
214 static void TestRegexCAPI(void) {
215 UErrorCode status = U_ZERO_ERROR;
216 URegularExpression *re;
217 UChar pat[200];
218 UChar *minus1;
219
220 memset(&minus1, -1, sizeof(minus1));
221
222 /* Mimimalist open/close */
223 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
224 re = uregex_open(pat, -1, 0, 0, &status);
225 if (U_FAILURE(status)) {
226 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
227 return;
228 }
229 uregex_close(re);
230
231 /* Open with all flag values set */
232 status = U_ZERO_ERROR;
233 re = uregex_open(pat, -1,
234 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
235 0, &status);
236 TEST_ASSERT_SUCCESS(status);
237 uregex_close(re);
238
239 /* Open with an invalid flag */
240 status = U_ZERO_ERROR;
241 re = uregex_open(pat, -1, 0x40000000, 0, &status);
242 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
243 uregex_close(re);
244
245 /* Open with an unimplemented flag */
246 status = U_ZERO_ERROR;
247 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
248 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
249 uregex_close(re);
250
251 /* openC with an invalid parameter */
252 status = U_ZERO_ERROR;
253 re = uregex_openC(NULL,
254 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
255 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
256
257 /* openC with an invalid parameter */
258 status = U_USELESS_COLLATOR_ERROR;
259 re = uregex_openC(NULL,
260 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
261 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
262
263 /* openC open from a C string */
264 {
265 const UChar *p;
266 int32_t len;
267 status = U_ZERO_ERROR;
268 re = uregex_openC("abc*", 0, 0, &status);
269 TEST_ASSERT_SUCCESS(status);
270 p = uregex_pattern(re, &len, &status);
271 TEST_ASSERT_SUCCESS(status);
272
273 /* The TEST_ASSERT_SUCCESS above should change too... */
274 if(U_SUCCESS(status)) {
275 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
276 TEST_ASSERT(u_strcmp(pat, p) == 0);
277 TEST_ASSERT(len==(int32_t)strlen("abc*"));
278 }
279
280 uregex_close(re);
281
282 /* TODO: Open with ParseError parameter */
283 }
284
285 /*
286 * clone
287 */
288 {
289 URegularExpression *clone1;
290 URegularExpression *clone2;
291 URegularExpression *clone3;
292 UChar testString1[30];
293 UChar testString2[30];
294 UBool result;
295
296
297 status = U_ZERO_ERROR;
298 re = uregex_openC("abc*", 0, 0, &status);
299 TEST_ASSERT_SUCCESS(status);
300 clone1 = uregex_clone(re, &status);
301 TEST_ASSERT_SUCCESS(status);
302 TEST_ASSERT(clone1 != NULL);
303
304 status = U_ZERO_ERROR;
305 clone2 = uregex_clone(re, &status);
306 TEST_ASSERT_SUCCESS(status);
307 TEST_ASSERT(clone2 != NULL);
308 uregex_close(re);
309
310 status = U_ZERO_ERROR;
311 clone3 = uregex_clone(clone2, &status);
312 TEST_ASSERT_SUCCESS(status);
313 TEST_ASSERT(clone3 != NULL);
314
315 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
316 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
317
318 status = U_ZERO_ERROR;
319 uregex_setText(clone1, testString1, -1, &status);
320 TEST_ASSERT_SUCCESS(status);
321 result = uregex_lookingAt(clone1, 0, &status);
322 TEST_ASSERT_SUCCESS(status);
323 TEST_ASSERT(result==TRUE);
324
325 status = U_ZERO_ERROR;
326 uregex_setText(clone2, testString2, -1, &status);
327 TEST_ASSERT_SUCCESS(status);
328 result = uregex_lookingAt(clone2, 0, &status);
329 TEST_ASSERT_SUCCESS(status);
330 TEST_ASSERT(result==FALSE);
331 result = uregex_find(clone2, 0, &status);
332 TEST_ASSERT_SUCCESS(status);
333 TEST_ASSERT(result==TRUE);
334
335 uregex_close(clone1);
336 uregex_close(clone2);
337 uregex_close(clone3);
338
339 }
340
341 /*
342 * pattern()
343 */
344 {
345 const UChar *resultPat;
346 int32_t resultLen;
347 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
348 status = U_ZERO_ERROR;
349 re = uregex_open(pat, -1, 0, NULL, &status);
350 resultPat = uregex_pattern(re, &resultLen, &status);
351 TEST_ASSERT_SUCCESS(status);
352
353 /* The TEST_ASSERT_SUCCESS above should change too... */
354 if (U_SUCCESS(status)) {
355 TEST_ASSERT(resultLen == -1);
356 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
357 }
358
359 uregex_close(re);
360
361 status = U_ZERO_ERROR;
362 re = uregex_open(pat, 3, 0, NULL, &status);
363 resultPat = uregex_pattern(re, &resultLen, &status);
364 TEST_ASSERT_SUCCESS(status);
365 TEST_ASSERT_SUCCESS(status);
366
367 /* The TEST_ASSERT_SUCCESS above should change too... */
368 if (U_SUCCESS(status)) {
369 TEST_ASSERT(resultLen == 3);
370 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
371 TEST_ASSERT(u_strlen(resultPat) == 3);
372 }
373
374 uregex_close(re);
375 }
376
377 /*
378 * flags()
379 */
380 {
381 int32_t t;
382
383 status = U_ZERO_ERROR;
384 re = uregex_open(pat, -1, 0, NULL, &status);
385 t = uregex_flags(re, &status);
386 TEST_ASSERT_SUCCESS(status);
387 TEST_ASSERT(t == 0);
388 uregex_close(re);
389
390 status = U_ZERO_ERROR;
391 re = uregex_open(pat, -1, 0, NULL, &status);
392 t = uregex_flags(re, &status);
393 TEST_ASSERT_SUCCESS(status);
394 TEST_ASSERT(t == 0);
395 uregex_close(re);
396
397 status = U_ZERO_ERROR;
398 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
399 t = uregex_flags(re, &status);
400 TEST_ASSERT_SUCCESS(status);
401 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
402 uregex_close(re);
403 }
404
405 /*
406 * setText() and lookingAt()
407 */
408 {
409 UChar text1[50];
410 UChar text2[50];
411 UBool result;
412
413 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
414 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
415 status = U_ZERO_ERROR;
416 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
417 re = uregex_open(pat, -1, 0, NULL, &status);
418 TEST_ASSERT_SUCCESS(status);
419
420 /* Operation before doing a setText should fail... */
421 status = U_ZERO_ERROR;
422 uregex_lookingAt(re, 0, &status);
423 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
424
425 status = U_ZERO_ERROR;
426 uregex_setText(re, text1, -1, &status);
427 result = uregex_lookingAt(re, 0, &status);
428 TEST_ASSERT(result == TRUE);
429 TEST_ASSERT_SUCCESS(status);
430
431 status = U_ZERO_ERROR;
432 uregex_setText(re, text2, -1, &status);
433 result = uregex_lookingAt(re, 0, &status);
434 TEST_ASSERT(result == FALSE);
435 TEST_ASSERT_SUCCESS(status);
436
437 status = U_ZERO_ERROR;
438 uregex_setText(re, text1, -1, &status);
439 result = uregex_lookingAt(re, 0, &status);
440 TEST_ASSERT(result == TRUE);
441 TEST_ASSERT_SUCCESS(status);
442
443 status = U_ZERO_ERROR;
444 uregex_setText(re, text1, 5, &status);
445 result = uregex_lookingAt(re, 0, &status);
446 TEST_ASSERT(result == FALSE);
447 TEST_ASSERT_SUCCESS(status);
448
449 status = U_ZERO_ERROR;
450 uregex_setText(re, text1, 6, &status);
451 result = uregex_lookingAt(re, 0, &status);
452 TEST_ASSERT(result == TRUE);
453 TEST_ASSERT_SUCCESS(status);
454
455 uregex_close(re);
456 }
457
458
459 /*
460 * getText()
461 */
462 {
463 UChar text1[50];
464 UChar text2[50];
465 const UChar *result;
466 int32_t textLength;
467
468 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
469 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
470 status = U_ZERO_ERROR;
471 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
472 re = uregex_open(pat, -1, 0, NULL, &status);
473
474 uregex_setText(re, text1, -1, &status);
475 result = uregex_getText(re, &textLength, &status);
476 TEST_ASSERT(result == text1);
477 TEST_ASSERT(textLength == -1);
478 TEST_ASSERT_SUCCESS(status);
479
480 status = U_ZERO_ERROR;
481 uregex_setText(re, text2, 7, &status);
482 result = uregex_getText(re, &textLength, &status);
483 TEST_ASSERT(result == text2);
484 TEST_ASSERT(textLength == 7);
485 TEST_ASSERT_SUCCESS(status);
486
487 status = U_ZERO_ERROR;
488 uregex_setText(re, text2, 4, &status);
489 result = uregex_getText(re, &textLength, &status);
490 TEST_ASSERT(result == text2);
491 TEST_ASSERT(textLength == 4);
492 TEST_ASSERT_SUCCESS(status);
493 uregex_close(re);
494 }
495
496 /*
497 * matches()
498 */
499 {
500 UChar text1[50];
501 UBool result;
502 int len;
503 UChar nullString[] = {0,0,0};
504
505 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
506 status = U_ZERO_ERROR;
507 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
508 re = uregex_open(pat, -1, 0, NULL, &status);
509
510 uregex_setText(re, text1, -1, &status);
511 result = uregex_matches(re, 0, &status);
512 TEST_ASSERT(result == FALSE);
513 TEST_ASSERT_SUCCESS(status);
514
515 status = U_ZERO_ERROR;
516 uregex_setText(re, text1, 6, &status);
517 result = uregex_matches(re, 0, &status);
518 TEST_ASSERT(result == TRUE);
519 TEST_ASSERT_SUCCESS(status);
520
521 status = U_ZERO_ERROR;
522 uregex_setText(re, text1, 6, &status);
523 result = uregex_matches(re, 1, &status);
524 TEST_ASSERT(result == FALSE);
525 TEST_ASSERT_SUCCESS(status);
526 uregex_close(re);
527
528 status = U_ZERO_ERROR;
529 re = uregex_openC(".?", 0, NULL, &status);
530 uregex_setText(re, text1, -1, &status);
531 len = u_strlen(text1);
532 result = uregex_matches(re, len, &status);
533 TEST_ASSERT(result == TRUE);
534 TEST_ASSERT_SUCCESS(status);
535
536 status = U_ZERO_ERROR;
537 uregex_setText(re, nullString, -1, &status);
538 TEST_ASSERT_SUCCESS(status);
539 result = uregex_matches(re, 0, &status);
540 TEST_ASSERT(result == TRUE);
541 TEST_ASSERT_SUCCESS(status);
542 uregex_close(re);
543 }
544
545
546 /*
547 * lookingAt() Used in setText test.
548 */
549
550
551 /*
552 * find(), findNext, start, end, reset
553 */
554 {
555 UChar text1[50];
556 UBool result;
557 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
558 status = U_ZERO_ERROR;
559 re = uregex_openC("rx", 0, NULL, &status);
560
561 uregex_setText(re, text1, -1, &status);
562 result = uregex_find(re, 0, &status);
563 TEST_ASSERT(result == TRUE);
564 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
565 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
566 TEST_ASSERT_SUCCESS(status);
567
568 result = uregex_find(re, 9, &status);
569 TEST_ASSERT(result == TRUE);
570 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
571 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
572 TEST_ASSERT_SUCCESS(status);
573
574 result = uregex_find(re, 14, &status);
575 TEST_ASSERT(result == FALSE);
576 TEST_ASSERT_SUCCESS(status);
577
578 status = U_ZERO_ERROR;
579 uregex_reset(re, 0, &status);
580
581 result = uregex_findNext(re, &status);
582 TEST_ASSERT(result == TRUE);
583 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
584 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
585 TEST_ASSERT_SUCCESS(status);
586
587 result = uregex_findNext(re, &status);
588 TEST_ASSERT(result == TRUE);
589 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
590 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
591 TEST_ASSERT_SUCCESS(status);
592
593 status = U_ZERO_ERROR;
594 uregex_reset(re, 12, &status);
595
596 result = uregex_findNext(re, &status);
597 TEST_ASSERT(result == TRUE);
598 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
599 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
600 TEST_ASSERT_SUCCESS(status);
601
602 result = uregex_findNext(re, &status);
603 TEST_ASSERT(result == FALSE);
604 TEST_ASSERT_SUCCESS(status);
605
606 uregex_close(re);
607 }
608
609 /*
610 * groupCount
611 */
612 {
613 int32_t result;
614
615 status = U_ZERO_ERROR;
616 re = uregex_openC("abc", 0, NULL, &status);
617 result = uregex_groupCount(re, &status);
618 TEST_ASSERT_SUCCESS(status);
619 TEST_ASSERT(result == 0);
620 uregex_close(re);
621
622 status = U_ZERO_ERROR;
623 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
624 result = uregex_groupCount(re, &status);
625 TEST_ASSERT_SUCCESS(status);
626 TEST_ASSERT(result == 3);
627 uregex_close(re);
628
629 }
630
631
632 /*
633 * group()
634 */
635 {
636 UChar text1[80];
637 UChar buf[80];
638 UBool result;
639 int32_t resultSz;
640 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
641
642 status = U_ZERO_ERROR;
643 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
644 TEST_ASSERT_SUCCESS(status);
645
646
647 uregex_setText(re, text1, -1, &status);
648 result = uregex_find(re, 0, &status);
649 TEST_ASSERT(result==TRUE);
650
651 /* Capture Group 0, the full match. Should succeed. */
652 status = U_ZERO_ERROR;
653 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
654 TEST_ASSERT_SUCCESS(status);
655 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
656 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
657
658 /* Capture group #1. Should succeed. */
659 status = U_ZERO_ERROR;
660 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
661 TEST_ASSERT_SUCCESS(status);
662 TEST_ASSERT_STRING(" interior ", buf, TRUE);
663 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
664
665 /* Capture group out of range. Error. */
666 status = U_ZERO_ERROR;
667 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
668 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
669
670 /* NULL buffer, pure pre-flight */
671 status = U_ZERO_ERROR;
672 resultSz = uregex_group(re, 0, NULL, 0, &status);
673 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
674 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
675
676 /* Too small buffer, truncated string */
677 status = U_ZERO_ERROR;
678 memset(buf, -1, sizeof(buf));
679 resultSz = uregex_group(re, 0, buf, 5, &status);
680 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
681 TEST_ASSERT_STRING("abc i", buf, FALSE);
682 TEST_ASSERT(buf[5] == (UChar)0xffff);
683 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
684
685 /* Output string just fits buffer, no NUL term. */
686 status = U_ZERO_ERROR;
687 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
688 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
689 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
690 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
691 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
692
693 uregex_close(re);
694
695 }
696
697 /*
698 * Regions
699 */
700
701
702 /* SetRegion(), getRegion() do something */
703 TEST_SETUP(".*", "0123456789ABCDEF", 0);
704 UChar resultString[40];
705 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
706 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
707 uregex_setRegion(re, 3, 6, &status);
708 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
709 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
710 TEST_ASSERT(uregex_findNext(re, &status));
711 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3);
712 TEST_ASSERT_STRING("345", resultString, TRUE);
713 TEST_TEARDOWN;
714
715 /* find(start=-1) uses regions */
716 TEST_SETUP(".*", "0123456789ABCDEF", 0);
717 uregex_setRegion(re, 4, 6, &status);
718 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
719 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
720 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
721 TEST_TEARDOWN;
722
723 /* find (start >=0) does not use regions */
724 TEST_SETUP(".*", "0123456789ABCDEF", 0);
725 uregex_setRegion(re, 4, 6, &status);
726 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
727 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
728 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
729 TEST_TEARDOWN;
730
731 /* findNext() obeys regions */
732 TEST_SETUP(".", "0123456789ABCDEF", 0);
733 uregex_setRegion(re, 4, 6, &status);
734 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
735 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
736 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
737 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
738 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
739 TEST_TEARDOWN;
740
741 /* matches(start=-1) uses regions */
742 /* Also, verify that non-greedy *? succeeds in finding the full match. */
743 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
744 uregex_setRegion(re, 4, 6, &status);
745 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
746 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
747 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
748 TEST_TEARDOWN;
749
750 /* matches (start >=0) does not use regions */
751 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
752 uregex_setRegion(re, 4, 6, &status);
753 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
754 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
755 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
756 TEST_TEARDOWN;
757
758 /* lookingAt(start=-1) uses regions */
759 /* Also, verify that non-greedy *? finds the first (shortest) match. */
760 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
761 uregex_setRegion(re, 4, 6, &status);
762 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
763 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
764 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
765 TEST_TEARDOWN;
766
767 /* lookingAt (start >=0) does not use regions */
768 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
769 uregex_setRegion(re, 4, 6, &status);
770 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
771 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
772 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
773 TEST_TEARDOWN;
774
775 /* hitEnd() */
776 TEST_SETUP("[a-f]*", "abcdefghij", 0);
777 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
778 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
779 TEST_TEARDOWN;
780
781 TEST_SETUP("[a-f]*", "abcdef", 0);
782 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
783 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
784 TEST_TEARDOWN;
785
786 /* requireEnd */
787 TEST_SETUP("abcd", "abcd", 0);
788 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
789 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
790 TEST_TEARDOWN;
791
792 TEST_SETUP("abcd$", "abcd", 0);
793 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
794 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
795 TEST_TEARDOWN;
796
797 /* anchoringBounds */
798 TEST_SETUP("abc$", "abcdef", 0);
799 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
800 uregex_useAnchoringBounds(re, FALSE, &status);
801 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
802
803 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
804 uregex_useAnchoringBounds(re, TRUE, &status);
805 uregex_setRegion(re, 0, 3, &status);
806 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
807 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
808 TEST_TEARDOWN;
809
810 /* Transparent Bounds */
811 TEST_SETUP("abc(?=def)", "abcdef", 0);
812 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
813 uregex_useTransparentBounds(re, TRUE, &status);
814 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
815
816 uregex_useTransparentBounds(re, FALSE, &status);
817 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
818 uregex_setRegion(re, 0, 3, &status);
819 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
820 uregex_useTransparentBounds(re, TRUE, &status);
821 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
822 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
823 TEST_TEARDOWN;
824
825
826 /*
827 * replaceFirst()
828 */
829 {
830 UChar text1[80];
831 UChar text2[80];
832 UChar replText[80];
833 UChar buf[80];
834 int32_t resultSz;
835 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
836 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
837 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
838
839 status = U_ZERO_ERROR;
840 re = uregex_openC("x(.*?)x", 0, NULL, &status);
841 TEST_ASSERT_SUCCESS(status);
842
843 /* Normal case, with match */
844 uregex_setText(re, text1, -1, &status);
845 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
846 TEST_ASSERT_SUCCESS(status);
847 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
848 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
849
850 /* No match. Text should copy to output with no changes. */
851 status = U_ZERO_ERROR;
852 uregex_setText(re, text2, -1, &status);
853 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
854 TEST_ASSERT_SUCCESS(status);
855 TEST_ASSERT_STRING("No match here.", buf, TRUE);
856 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
857
858 /* Match, output just fills buffer, no termination warning. */
859 status = U_ZERO_ERROR;
860 uregex_setText(re, text1, -1, &status);
861 memset(buf, -1, sizeof(buf));
862 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
863 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
864 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
865 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
866 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
867
868 /* Do the replaceFirst again, without first resetting anything.
869 * Should give the same results.
870 */
871 status = U_ZERO_ERROR;
872 memset(buf, -1, sizeof(buf));
873 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
874 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
875 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
876 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
877 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
878
879 /* NULL buffer, zero buffer length */
880 status = U_ZERO_ERROR;
881 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
882 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
883 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
884
885 /* Buffer too small by one */
886 status = U_ZERO_ERROR;
887 memset(buf, -1, sizeof(buf));
888 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x.")-1, &status);
889 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
890 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
891 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
892 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
893
894 uregex_close(re);
895 }
896
897
898 /*
899 * replaceAll()
900 */
901 {
902 UChar text1[80]; /* "Replace xaax x1x x...x." */
903 UChar text2[80]; /* "No match Here" */
904 UChar replText[80]; /* "<$1>" */
905 UChar replText2[80]; /* "<<$1>>" */
906 const char * pattern = "x(.*?)x";
907 const char * expectedResult = "Replace <aa> <1> <...>.";
908 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
909 UChar buf[80];
910 int32_t resultSize;
911 int32_t expectedResultSize;
912 int32_t expectedResultSize2;
913 int32_t i;
914
915 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
916 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
917 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
918 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
919 expectedResultSize = (int32_t)strlen(expectedResult);
920 expectedResultSize2 = (int32_t)strlen(expectedResult2);
921
922 status = U_ZERO_ERROR;
923 re = uregex_openC(pattern, 0, NULL, &status);
924 TEST_ASSERT_SUCCESS(status);
925
926 /* Normal case, with match */
927 uregex_setText(re, text1, -1, &status);
928 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
929 TEST_ASSERT_SUCCESS(status);
930 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
931 TEST_ASSERT(resultSize == expectedResultSize);
932
933 /* No match. Text should copy to output with no changes. */
934 status = U_ZERO_ERROR;
935 uregex_setText(re, text2, -1, &status);
936 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
937 TEST_ASSERT_SUCCESS(status);
938 TEST_ASSERT_STRING("No match here.", buf, TRUE);
939 TEST_ASSERT(resultSize == u_strlen(text2));
940
941 /* Match, output just fills buffer, no termination warning. */
942 status = U_ZERO_ERROR;
943 uregex_setText(re, text1, -1, &status);
944 memset(buf, -1, sizeof(buf));
945 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
946 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
947 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
948 TEST_ASSERT(resultSize == expectedResultSize);
949 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
950
951 /* Do the replaceFirst again, without first resetting anything.
952 * Should give the same results.
953 */
954 status = U_ZERO_ERROR;
955 memset(buf, -1, sizeof(buf));
956 resultSize = uregex_replaceAll(re, replText, -1, buf, (int32_t)strlen("Replace xaax x1x x...x."), &status);
957 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
958 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
959 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
960 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
961
962 /* NULL buffer, zero buffer length */
963 status = U_ZERO_ERROR;
964 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
965 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
966 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
967
968 /* Buffer too small. Try every size, which will tickle edge cases
969 * in uregex_appendReplacement (used by replaceAll) */
970 for (i=0; i<expectedResultSize; i++) {
971 char expected[80];
972 status = U_ZERO_ERROR;
973 memset(buf, -1, sizeof(buf));
974 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
975 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
976 strcpy(expected, expectedResult);
977 expected[i] = 0;
978 TEST_ASSERT_STRING(expected, buf, FALSE);
979 TEST_ASSERT(resultSize == expectedResultSize);
980 TEST_ASSERT(buf[i] == (UChar)0xffff);
981 }
982
983 /* Buffer too small. Same as previous test, except this time the replacement
984 * text is longer than the match capture group, making the length of the complete
985 * replacement longer than the original string.
986 */
987 for (i=0; i<expectedResultSize2; i++) {
988 char expected[80];
989 status = U_ZERO_ERROR;
990 memset(buf, -1, sizeof(buf));
991 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
992 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
993 strcpy(expected, expectedResult2);
994 expected[i] = 0;
995 TEST_ASSERT_STRING(expected, buf, FALSE);
996 TEST_ASSERT(resultSize == expectedResultSize2);
997 TEST_ASSERT(buf[i] == (UChar)0xffff);
998 }
999
1000
1001 uregex_close(re);
1002 }
1003
1004
1005 /*
1006 * appendReplacement()
1007 */
1008 {
1009 UChar text[100];
1010 UChar repl[100];
1011 UChar buf[100];
1012 UChar *bufPtr;
1013 int32_t bufCap;
1014
1015
1016 status = U_ZERO_ERROR;
1017 re = uregex_openC(".*", 0, 0, &status);
1018 TEST_ASSERT_SUCCESS(status);
1019
1020 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1021 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1022 uregex_setText(re, text, -1, &status);
1023
1024 /* match covers whole target string */
1025 uregex_find(re, 0, &status);
1026 TEST_ASSERT_SUCCESS(status);
1027 bufPtr = buf;
1028 bufCap = UPRV_LENGTHOF(buf);
1029 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1030 TEST_ASSERT_SUCCESS(status);
1031 TEST_ASSERT_STRING("some other", buf, TRUE);
1032
1033 /* Match has \u \U escapes */
1034 uregex_find(re, 0, &status);
1035 TEST_ASSERT_SUCCESS(status);
1036 bufPtr = buf;
1037 bufCap = UPRV_LENGTHOF(buf);
1038 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1039 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1040 TEST_ASSERT_SUCCESS(status);
1041 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1042
1043 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1044 status = U_ZERO_ERROR;
1045 uregex_find(re, 0, &status);
1046 TEST_ASSERT_SUCCESS(status);
1047 bufPtr = buf;
1048 status = U_BUFFER_OVERFLOW_ERROR;
1049 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1050 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1051
1052 uregex_close(re);
1053 }
1054
1055
1056 /*
1057 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1058 */
1059
1060 /*
1061 * split()
1062 */
1063 {
1064 UChar textToSplit[80];
1065 UChar text2[80];
1066 UChar buf[200];
1067 UChar *fields[10];
1068 int32_t numFields;
1069 int32_t requiredCapacity;
1070 int32_t spaceNeeded;
1071 int32_t sz;
1072
1073 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1074 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1075
1076 status = U_ZERO_ERROR;
1077 re = uregex_openC(":", 0, NULL, &status);
1078
1079
1080 /* Simple split */
1081
1082 uregex_setText(re, textToSplit, -1, &status);
1083 TEST_ASSERT_SUCCESS(status);
1084
1085 /* The TEST_ASSERT_SUCCESS call above should change too... */
1086 if (U_SUCCESS(status)) {
1087 memset(fields, -1, sizeof(fields));
1088 numFields =
1089 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1090 TEST_ASSERT_SUCCESS(status);
1091
1092 /* The TEST_ASSERT_SUCCESS call above should change too... */
1093 if(U_SUCCESS(status)) {
1094 TEST_ASSERT(numFields == 3);
1095 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1096 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1097 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1098 TEST_ASSERT(fields[3] == NULL);
1099
1100 spaceNeeded = u_strlen(textToSplit) -
1101 (numFields - 1) + /* Field delimiters do not appear in output */
1102 numFields; /* Each field gets a NUL terminator */
1103
1104 TEST_ASSERT(spaceNeeded == requiredCapacity);
1105 }
1106 }
1107
1108 uregex_close(re);
1109
1110
1111 /* Split with too few output strings available */
1112 status = U_ZERO_ERROR;
1113 re = uregex_openC(":", 0, NULL, &status);
1114 uregex_setText(re, textToSplit, -1, &status);
1115 TEST_ASSERT_SUCCESS(status);
1116
1117 /* The TEST_ASSERT_SUCCESS call above should change too... */
1118 if(U_SUCCESS(status)) {
1119 memset(fields, -1, sizeof(fields));
1120 numFields =
1121 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1122 TEST_ASSERT_SUCCESS(status);
1123
1124 /* The TEST_ASSERT_SUCCESS call above should change too... */
1125 if(U_SUCCESS(status)) {
1126 TEST_ASSERT(numFields == 2);
1127 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1128 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1129 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1130
1131 spaceNeeded = u_strlen(textToSplit) -
1132 (numFields - 1) + /* Field delimiters do not appear in output */
1133 numFields; /* Each field gets a NUL terminator */
1134
1135 TEST_ASSERT(spaceNeeded == requiredCapacity);
1136
1137 /* Split with a range of output buffer sizes. */
1138 spaceNeeded = u_strlen(textToSplit) -
1139 (numFields - 1) + /* Field delimiters do not appear in output */
1140 numFields; /* Each field gets a NUL terminator */
1141
1142 for (sz=0; sz < spaceNeeded+1; sz++) {
1143 memset(fields, -1, sizeof(fields));
1144 status = U_ZERO_ERROR;
1145 numFields =
1146 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1147 if (sz >= spaceNeeded) {
1148 TEST_ASSERT_SUCCESS(status);
1149 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1150 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1151 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1152 } else {
1153 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1154 }
1155 TEST_ASSERT(numFields == 3);
1156 TEST_ASSERT(fields[3] == NULL);
1157 TEST_ASSERT(spaceNeeded == requiredCapacity);
1158 }
1159 }
1160 }
1161
1162 uregex_close(re);
1163 }
1164
1165
1166
1167
1168 /* Split(), part 2. Patterns with capture groups. The capture group text
1169 * comes out as additional fields. */
1170 {
1171 UChar textToSplit[80];
1172 UChar buf[200];
1173 UChar *fields[10];
1174 int32_t numFields;
1175 int32_t requiredCapacity;
1176 int32_t spaceNeeded;
1177 int32_t sz;
1178
1179 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
1180
1181 status = U_ZERO_ERROR;
1182 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1183
1184 uregex_setText(re, textToSplit, -1, &status);
1185 TEST_ASSERT_SUCCESS(status);
1186
1187 /* The TEST_ASSERT_SUCCESS call above should change too... */
1188 if(U_SUCCESS(status)) {
1189 memset(fields, -1, sizeof(fields));
1190 numFields =
1191 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1192 TEST_ASSERT_SUCCESS(status);
1193
1194 /* The TEST_ASSERT_SUCCESS call above should change too... */
1195 if(U_SUCCESS(status)) {
1196 TEST_ASSERT(numFields == 5);
1197 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1198 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1199 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1200 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1201 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1202 TEST_ASSERT(fields[5] == NULL);
1203 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1204 TEST_ASSERT(spaceNeeded == requiredCapacity);
1205 }
1206 }
1207
1208 /* Split with too few output strings available (2) */
1209 status = U_ZERO_ERROR;
1210 memset(fields, -1, sizeof(fields));
1211 numFields =
1212 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1213 TEST_ASSERT_SUCCESS(status);
1214
1215 /* The TEST_ASSERT_SUCCESS call above should change too... */
1216 if(U_SUCCESS(status)) {
1217 TEST_ASSERT(numFields == 2);
1218 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1219 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1220 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1221
1222 spaceNeeded = (int32_t)strlen("first . second<tag-b> third."); /* "." at NUL positions */
1223 TEST_ASSERT(spaceNeeded == requiredCapacity);
1224 }
1225
1226 /* Split with too few output strings available (3) */
1227 status = U_ZERO_ERROR;
1228 memset(fields, -1, sizeof(fields));
1229 numFields =
1230 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1231 TEST_ASSERT_SUCCESS(status);
1232
1233 /* The TEST_ASSERT_SUCCESS call above should change too... */
1234 if(U_SUCCESS(status)) {
1235 TEST_ASSERT(numFields == 3);
1236 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1237 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1238 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1239 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1240
1241 spaceNeeded = (int32_t)strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1242 TEST_ASSERT(spaceNeeded == requiredCapacity);
1243 }
1244
1245 /* Split with just enough output strings available (5) */
1246 status = U_ZERO_ERROR;
1247 memset(fields, -1, sizeof(fields));
1248 numFields =
1249 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1250 TEST_ASSERT_SUCCESS(status);
1251
1252 /* The TEST_ASSERT_SUCCESS call above should change too... */
1253 if(U_SUCCESS(status)) {
1254 TEST_ASSERT(numFields == 5);
1255 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1256 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1257 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1258 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1259 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1260 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1261
1262 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1263 TEST_ASSERT(spaceNeeded == requiredCapacity);
1264 }
1265
1266 /* Split, end of text is a field delimiter. */
1267 status = U_ZERO_ERROR;
1268 sz = (int32_t)strlen("first <tag-a> second<tag-b>");
1269 uregex_setText(re, textToSplit, sz, &status);
1270 TEST_ASSERT_SUCCESS(status);
1271
1272 /* The TEST_ASSERT_SUCCESS call above should change too... */
1273 if(U_SUCCESS(status)) {
1274 memset(fields, -1, sizeof(fields));
1275 numFields =
1276 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1277 TEST_ASSERT_SUCCESS(status);
1278
1279 /* The TEST_ASSERT_SUCCESS call above should change too... */
1280 if(U_SUCCESS(status)) {
1281 TEST_ASSERT(numFields == 5);
1282 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1283 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1284 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1285 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1286 TEST_ASSERT_STRING("", fields[4], TRUE);
1287 TEST_ASSERT(fields[5] == NULL);
1288 TEST_ASSERT(fields[8] == NULL);
1289 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1290 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1291 TEST_ASSERT(spaceNeeded == requiredCapacity);
1292 }
1293 }
1294
1295 uregex_close(re);
1296 }
1297
1298 /*
1299 * set/getTimeLimit
1300 */
1301 TEST_SETUP("abc$", "abcdef", 0);
1302 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1303 uregex_setTimeLimit(re, 1000, &status);
1304 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1305 TEST_ASSERT_SUCCESS(status);
1306 uregex_setTimeLimit(re, -1, &status);
1307 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1308 status = U_ZERO_ERROR;
1309 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1310 TEST_TEARDOWN;
1311
1312 /*
1313 * set/get Stack Limit
1314 */
1315 TEST_SETUP("abc$", "abcdef", 0);
1316 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1317 uregex_setStackLimit(re, 40000, &status);
1318 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1319 TEST_ASSERT_SUCCESS(status);
1320 uregex_setStackLimit(re, -1, &status);
1321 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1322 status = U_ZERO_ERROR;
1323 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1324 TEST_TEARDOWN;
1325
1326
1327 /*
1328 * Get/Set callback functions
1329 * This test is copied from intltest regex/Callbacks
1330 * The pattern and test data will run long enough to cause the callback
1331 * to be invoked. The nested '+' operators give exponential time
1332 * behavior with increasing string length.
1333 */
1334 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0);
1335 callBackContext cbInfo = {4, 0, 0};
1336 const void *pContext = &cbInfo;
1337 URegexMatchCallback *returnedFn = &TestCallbackFn;
1338
1339 /* Getting the callback fn when it hasn't been set must return NULL */
1340 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1341 TEST_ASSERT_SUCCESS(status);
1342 TEST_ASSERT(returnedFn == NULL);
1343 TEST_ASSERT(pContext == NULL);
1344
1345 /* Set thecallback and do a match. */
1346 /* The callback function should record that it has been called. */
1347 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1348 TEST_ASSERT_SUCCESS(status);
1349 TEST_ASSERT(cbInfo.numCalls == 0);
1350 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1351 TEST_ASSERT_SUCCESS(status);
1352 TEST_ASSERT(cbInfo.numCalls > 0);
1353
1354 /* Getting the callback should return the values that were set above. */
1355 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1356 TEST_ASSERT(returnedFn == &TestCallbackFn);
1357 TEST_ASSERT(pContext == &cbInfo);
1358
1359 TEST_TEARDOWN;
1360 }
1361
1362
1363
1364 static void TestBug4315(void) {
1365 UErrorCode theICUError = U_ZERO_ERROR;
1366 URegularExpression *theRegEx;
1367 UChar *textBuff;
1368 const char *thePattern;
1369 UChar theString[100];
1370 UChar *destFields[24];
1371 int32_t neededLength1;
1372 int32_t neededLength2;
1373
1374 int32_t wordCount = 0;
1375 int32_t destFieldsSize = 24;
1376
1377 thePattern = "ck ";
1378 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1379
1380 /* open a regex */
1381 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1382 TEST_ASSERT_SUCCESS(theICUError);
1383
1384 /* set the input string */
1385 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1386 TEST_ASSERT_SUCCESS(theICUError);
1387
1388 /* split */
1389 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1390 * error occurs! */
1391 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1392 destFieldsSize, &theICUError);
1393
1394 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1395 TEST_ASSERT(wordCount==3);
1396
1397 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1398 {
1399 theICUError = U_ZERO_ERROR;
1400 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1401 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1402 destFields, destFieldsSize, &theICUError);
1403 TEST_ASSERT(wordCount==3);
1404 TEST_ASSERT_SUCCESS(theICUError);
1405 TEST_ASSERT(neededLength1 == neededLength2);
1406 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1407 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1408 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1409 TEST_ASSERT(destFields[3] == NULL);
1410 free(textBuff);
1411 }
1412 uregex_close(theRegEx);
1413 }
1414
1415 /* Based on TestRegexCAPI() */
1416 static void TestUTextAPI(void) {
1417 UErrorCode status = U_ZERO_ERROR;
1418 URegularExpression *re;
1419 UText patternText = UTEXT_INITIALIZER;
1420 UChar pat[200];
1421 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1422
1423 /* Mimimalist open/close */
1424 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1425 re = uregex_openUText(&patternText, 0, 0, &status);
1426 if (U_FAILURE(status)) {
1427 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1428 utext_close(&patternText);
1429 return;
1430 }
1431 uregex_close(re);
1432
1433 /* Open with all flag values set */
1434 status = U_ZERO_ERROR;
1435 re = uregex_openUText(&patternText,
1436 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1437 0, &status);
1438 TEST_ASSERT_SUCCESS(status);
1439 uregex_close(re);
1440
1441 /* Open with an invalid flag */
1442 status = U_ZERO_ERROR;
1443 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1444 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1445 uregex_close(re);
1446
1447 /* open with an invalid parameter */
1448 status = U_ZERO_ERROR;
1449 re = uregex_openUText(NULL,
1450 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1451 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1452
1453 /*
1454 * clone
1455 */
1456 {
1457 URegularExpression *clone1;
1458 URegularExpression *clone2;
1459 URegularExpression *clone3;
1460 UChar testString1[30];
1461 UChar testString2[30];
1462 UBool result;
1463
1464
1465 status = U_ZERO_ERROR;
1466 re = uregex_openUText(&patternText, 0, 0, &status);
1467 TEST_ASSERT_SUCCESS(status);
1468 clone1 = uregex_clone(re, &status);
1469 TEST_ASSERT_SUCCESS(status);
1470 TEST_ASSERT(clone1 != NULL);
1471
1472 status = U_ZERO_ERROR;
1473 clone2 = uregex_clone(re, &status);
1474 TEST_ASSERT_SUCCESS(status);
1475 TEST_ASSERT(clone2 != NULL);
1476 uregex_close(re);
1477
1478 status = U_ZERO_ERROR;
1479 clone3 = uregex_clone(clone2, &status);
1480 TEST_ASSERT_SUCCESS(status);
1481 TEST_ASSERT(clone3 != NULL);
1482
1483 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1484 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1485
1486 status = U_ZERO_ERROR;
1487 uregex_setText(clone1, testString1, -1, &status);
1488 TEST_ASSERT_SUCCESS(status);
1489 result = uregex_lookingAt(clone1, 0, &status);
1490 TEST_ASSERT_SUCCESS(status);
1491 TEST_ASSERT(result==TRUE);
1492
1493 status = U_ZERO_ERROR;
1494 uregex_setText(clone2, testString2, -1, &status);
1495 TEST_ASSERT_SUCCESS(status);
1496 result = uregex_lookingAt(clone2, 0, &status);
1497 TEST_ASSERT_SUCCESS(status);
1498 TEST_ASSERT(result==FALSE);
1499 result = uregex_find(clone2, 0, &status);
1500 TEST_ASSERT_SUCCESS(status);
1501 TEST_ASSERT(result==TRUE);
1502
1503 uregex_close(clone1);
1504 uregex_close(clone2);
1505 uregex_close(clone3);
1506
1507 }
1508
1509 /*
1510 * pattern() and patternText()
1511 */
1512 {
1513 const UChar *resultPat;
1514 int32_t resultLen;
1515 UText *resultText;
1516 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1517 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1518 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1519 status = U_ZERO_ERROR;
1520
1521 utext_openUTF8(&patternText, str_hello, -1, &status);
1522 re = uregex_open(pat, -1, 0, NULL, &status);
1523 resultPat = uregex_pattern(re, &resultLen, &status);
1524 TEST_ASSERT_SUCCESS(status);
1525
1526 /* The TEST_ASSERT_SUCCESS above should change too... */
1527 if (U_SUCCESS(status)) {
1528 TEST_ASSERT(resultLen == -1);
1529 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1530 }
1531
1532 resultText = uregex_patternUText(re, &status);
1533 TEST_ASSERT_SUCCESS(status);
1534 TEST_ASSERT_UTEXT(str_hello, resultText);
1535
1536 uregex_close(re);
1537
1538 status = U_ZERO_ERROR;
1539 re = uregex_open(pat, 3, 0, NULL, &status);
1540 resultPat = uregex_pattern(re, &resultLen, &status);
1541 TEST_ASSERT_SUCCESS(status);
1542
1543 /* The TEST_ASSERT_SUCCESS above should change too... */
1544 if (U_SUCCESS(status)) {
1545 TEST_ASSERT(resultLen == 3);
1546 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1547 TEST_ASSERT(u_strlen(resultPat) == 3);
1548 }
1549
1550 resultText = uregex_patternUText(re, &status);
1551 TEST_ASSERT_SUCCESS(status);
1552 TEST_ASSERT_UTEXT(str_hel, resultText);
1553
1554 uregex_close(re);
1555 }
1556
1557 /*
1558 * setUText() and lookingAt()
1559 */
1560 {
1561 UText text1 = UTEXT_INITIALIZER;
1562 UText text2 = UTEXT_INITIALIZER;
1563 UBool result;
1564 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1565 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1566 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1567 status = U_ZERO_ERROR;
1568 utext_openUTF8(&text1, str_abcccd, -1, &status);
1569 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1570
1571 utext_openUTF8(&patternText, str_abcd, -1, &status);
1572 re = uregex_openUText(&patternText, 0, NULL, &status);
1573 TEST_ASSERT_SUCCESS(status);
1574
1575 /* Operation before doing a setText should fail... */
1576 status = U_ZERO_ERROR;
1577 uregex_lookingAt(re, 0, &status);
1578 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1579
1580 status = U_ZERO_ERROR;
1581 uregex_setUText(re, &text1, &status);
1582 result = uregex_lookingAt(re, 0, &status);
1583 TEST_ASSERT(result == TRUE);
1584 TEST_ASSERT_SUCCESS(status);
1585
1586 status = U_ZERO_ERROR;
1587 uregex_setUText(re, &text2, &status);
1588 result = uregex_lookingAt(re, 0, &status);
1589 TEST_ASSERT(result == FALSE);
1590 TEST_ASSERT_SUCCESS(status);
1591
1592 status = U_ZERO_ERROR;
1593 uregex_setUText(re, &text1, &status);
1594 result = uregex_lookingAt(re, 0, &status);
1595 TEST_ASSERT(result == TRUE);
1596 TEST_ASSERT_SUCCESS(status);
1597
1598 uregex_close(re);
1599 utext_close(&text1);
1600 utext_close(&text2);
1601 }
1602
1603
1604 /*
1605 * getText() and getUText()
1606 */
1607 {
1608 UText text1 = UTEXT_INITIALIZER;
1609 UText text2 = UTEXT_INITIALIZER;
1610 UChar text2Chars[20];
1611 UText *resultText;
1612 const UChar *result;
1613 int32_t textLength;
1614 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1615 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1616 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1617
1618
1619 status = U_ZERO_ERROR;
1620 utext_openUTF8(&text1, str_abcccd, -1, &status);
1621 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1622 utext_openUChars(&text2, text2Chars, -1, &status);
1623
1624 utext_openUTF8(&patternText, str_abcd, -1, &status);
1625 re = uregex_openUText(&patternText, 0, NULL, &status);
1626
1627 /* First set a UText */
1628 uregex_setUText(re, &text1, &status);
1629 resultText = uregex_getUText(re, NULL, &status);
1630 TEST_ASSERT_SUCCESS(status);
1631 TEST_ASSERT(resultText != &text1);
1632 utext_setNativeIndex(resultText, 0);
1633 utext_setNativeIndex(&text1, 0);
1634 TEST_ASSERT(testUTextEqual(resultText, &text1));
1635 utext_close(resultText);
1636
1637 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1638 (void)result; /* Suppress set but not used warning. */
1639 TEST_ASSERT(textLength == -1 || textLength == 6);
1640 resultText = uregex_getUText(re, NULL, &status);
1641 TEST_ASSERT_SUCCESS(status);
1642 TEST_ASSERT(resultText != &text1);
1643 utext_setNativeIndex(resultText, 0);
1644 utext_setNativeIndex(&text1, 0);
1645 TEST_ASSERT(testUTextEqual(resultText, &text1));
1646 utext_close(resultText);
1647
1648 /* Then set a UChar * */
1649 uregex_setText(re, text2Chars, 7, &status);
1650 resultText = uregex_getUText(re, NULL, &status);
1651 TEST_ASSERT_SUCCESS(status);
1652 utext_setNativeIndex(resultText, 0);
1653 utext_setNativeIndex(&text2, 0);
1654 TEST_ASSERT(testUTextEqual(resultText, &text2));
1655 utext_close(resultText);
1656 result = uregex_getText(re, &textLength, &status);
1657 TEST_ASSERT(textLength == 7);
1658
1659 uregex_close(re);
1660 utext_close(&text1);
1661 utext_close(&text2);
1662 }
1663
1664 /*
1665 * matches()
1666 */
1667 {
1668 UText text1 = UTEXT_INITIALIZER;
1669 UBool result;
1670 UText nullText = UTEXT_INITIALIZER;
1671 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1672 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1673
1674 status = U_ZERO_ERROR;
1675 utext_openUTF8(&text1, str_abcccde, -1, &status);
1676 utext_openUTF8(&patternText, str_abcd, -1, &status);
1677 re = uregex_openUText(&patternText, 0, NULL, &status);
1678
1679 uregex_setUText(re, &text1, &status);
1680 result = uregex_matches(re, 0, &status);
1681 TEST_ASSERT(result == FALSE);
1682 TEST_ASSERT_SUCCESS(status);
1683 uregex_close(re);
1684
1685 status = U_ZERO_ERROR;
1686 re = uregex_openC(".?", 0, NULL, &status);
1687 uregex_setUText(re, &text1, &status);
1688 result = uregex_matches(re, 7, &status);
1689 TEST_ASSERT(result == TRUE);
1690 TEST_ASSERT_SUCCESS(status);
1691
1692 status = U_ZERO_ERROR;
1693 utext_openUTF8(&nullText, "", -1, &status);
1694 uregex_setUText(re, &nullText, &status);
1695 TEST_ASSERT_SUCCESS(status);
1696 result = uregex_matches(re, 0, &status);
1697 TEST_ASSERT(result == TRUE);
1698 TEST_ASSERT_SUCCESS(status);
1699
1700 uregex_close(re);
1701 utext_close(&text1);
1702 utext_close(&nullText);
1703 }
1704
1705
1706 /*
1707 * lookingAt() Used in setText test.
1708 */
1709
1710
1711 /*
1712 * find(), findNext, start, end, reset
1713 */
1714 {
1715 UChar text1[50];
1716 UBool result;
1717 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
1718 status = U_ZERO_ERROR;
1719 re = uregex_openC("rx", 0, NULL, &status);
1720
1721 uregex_setText(re, text1, -1, &status);
1722 result = uregex_find(re, 0, &status);
1723 TEST_ASSERT(result == TRUE);
1724 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1725 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1726 TEST_ASSERT_SUCCESS(status);
1727
1728 result = uregex_find(re, 9, &status);
1729 TEST_ASSERT(result == TRUE);
1730 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1731 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1732 TEST_ASSERT_SUCCESS(status);
1733
1734 result = uregex_find(re, 14, &status);
1735 TEST_ASSERT(result == FALSE);
1736 TEST_ASSERT_SUCCESS(status);
1737
1738 status = U_ZERO_ERROR;
1739 uregex_reset(re, 0, &status);
1740
1741 result = uregex_findNext(re, &status);
1742 TEST_ASSERT(result == TRUE);
1743 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1744 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1745 TEST_ASSERT_SUCCESS(status);
1746
1747 result = uregex_findNext(re, &status);
1748 TEST_ASSERT(result == TRUE);
1749 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1750 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1751 TEST_ASSERT_SUCCESS(status);
1752
1753 status = U_ZERO_ERROR;
1754 uregex_reset(re, 12, &status);
1755
1756 result = uregex_findNext(re, &status);
1757 TEST_ASSERT(result == TRUE);
1758 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1759 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1760 TEST_ASSERT_SUCCESS(status);
1761
1762 result = uregex_findNext(re, &status);
1763 TEST_ASSERT(result == FALSE);
1764 TEST_ASSERT_SUCCESS(status);
1765
1766 uregex_close(re);
1767 }
1768
1769 /*
1770 * groupUText()
1771 */
1772 {
1773 UChar text1[80];
1774 UText *actual;
1775 UBool result;
1776 int64_t groupLen = 0;
1777 UChar groupBuf[20];
1778
1779 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
1780
1781 status = U_ZERO_ERROR;
1782 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1783 TEST_ASSERT_SUCCESS(status);
1784
1785 uregex_setText(re, text1, -1, &status);
1786 result = uregex_find(re, 0, &status);
1787 TEST_ASSERT(result==TRUE);
1788
1789 /* Capture Group 0 with shallow clone API. Should succeed. */
1790 status = U_ZERO_ERROR;
1791 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1792 TEST_ASSERT_SUCCESS(status);
1793
1794 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1795 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1796 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1797
1798 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1799 utext_close(actual);
1800
1801 /* Capture group #1. Should succeed. */
1802 status = U_ZERO_ERROR;
1803
1804 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1805 TEST_ASSERT_SUCCESS(status);
1806 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1807 /* (within the string text1) */
1808 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1809 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1810 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1811
1812 utext_close(actual);
1813
1814 /* Capture group out of range. Error. */
1815 status = U_ZERO_ERROR;
1816 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1817 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1818 utext_close(actual);
1819
1820 uregex_close(re);
1821 }
1822
1823 /*
1824 * replaceFirst()
1825 */
1826 {
1827 UChar text1[80];
1828 UChar text2[80];
1829 UText replText = UTEXT_INITIALIZER;
1830 UText *result;
1831 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1832 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1833 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1834 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1835 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1836 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1837 status = U_ZERO_ERROR;
1838 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1839 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1840 utext_openUTF8(&replText, str_1x, -1, &status);
1841
1842 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1843 TEST_ASSERT_SUCCESS(status);
1844
1845 /* Normal case, with match */
1846 uregex_setText(re, text1, -1, &status);
1847 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1848 TEST_ASSERT_SUCCESS(status);
1849 TEST_ASSERT_UTEXT(str_Replxxx, result);
1850 utext_close(result);
1851
1852 /* No match. Text should copy to output with no changes. */
1853 uregex_setText(re, text2, -1, &status);
1854 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1855 TEST_ASSERT_SUCCESS(status);
1856 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1857 utext_close(result);
1858
1859 /* Unicode escapes */
1860 uregex_setText(re, text1, -1, &status);
1861 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1862 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1863 TEST_ASSERT_SUCCESS(status);
1864 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1865 utext_close(result);
1866
1867 uregex_close(re);
1868 utext_close(&replText);
1869 }
1870
1871
1872 /*
1873 * replaceAll()
1874 */
1875 {
1876 UChar text1[80];
1877 UChar text2[80];
1878 UText replText = UTEXT_INITIALIZER;
1879 UText *result;
1880 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1881 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1882 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1883 status = U_ZERO_ERROR;
1884 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1885 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1886 utext_openUTF8(&replText, str_1, -1, &status);
1887
1888 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1889 TEST_ASSERT_SUCCESS(status);
1890
1891 /* Normal case, with match */
1892 uregex_setText(re, text1, -1, &status);
1893 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1894 TEST_ASSERT_SUCCESS(status);
1895 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1896 utext_close(result);
1897
1898 /* No match. Text should copy to output with no changes. */
1899 uregex_setText(re, text2, -1, &status);
1900 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1901 TEST_ASSERT_SUCCESS(status);
1902 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1903 utext_close(result);
1904
1905 uregex_close(re);
1906 utext_close(&replText);
1907 }
1908
1909
1910 /*
1911 * appendReplacement()
1912 */
1913 {
1914 UChar text[100];
1915 UChar repl[100];
1916 UChar buf[100];
1917 UChar *bufPtr;
1918 int32_t bufCap;
1919
1920 status = U_ZERO_ERROR;
1921 re = uregex_openC(".*", 0, 0, &status);
1922 TEST_ASSERT_SUCCESS(status);
1923
1924 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1925 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1926 uregex_setText(re, text, -1, &status);
1927
1928 /* match covers whole target string */
1929 uregex_find(re, 0, &status);
1930 TEST_ASSERT_SUCCESS(status);
1931 bufPtr = buf;
1932 bufCap = UPRV_LENGTHOF(buf);
1933 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1934 TEST_ASSERT_SUCCESS(status);
1935 TEST_ASSERT_STRING("some other", buf, TRUE);
1936
1937 /* Match has \u \U escapes */
1938 uregex_find(re, 0, &status);
1939 TEST_ASSERT_SUCCESS(status);
1940 bufPtr = buf;
1941 bufCap = UPRV_LENGTHOF(buf);
1942 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1943 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1944 TEST_ASSERT_SUCCESS(status);
1945 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1946
1947 uregex_close(re);
1948 }
1949
1950
1951 /*
1952 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1953 */
1954
1955 /*
1956 * splitUText()
1957 */
1958 {
1959 UChar textToSplit[80];
1960 UChar text2[80];
1961 UText *fields[10];
1962 int32_t numFields;
1963 int32_t i;
1964
1965 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1966 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1967
1968 status = U_ZERO_ERROR;
1969 re = uregex_openC(":", 0, NULL, &status);
1970
1971
1972 /* Simple split */
1973
1974 uregex_setText(re, textToSplit, -1, &status);
1975 TEST_ASSERT_SUCCESS(status);
1976
1977 /* The TEST_ASSERT_SUCCESS call above should change too... */
1978 if (U_SUCCESS(status)) {
1979 memset(fields, 0, sizeof(fields));
1980 numFields = uregex_splitUText(re, fields, 10, &status);
1981 TEST_ASSERT_SUCCESS(status);
1982
1983 /* The TEST_ASSERT_SUCCESS call above should change too... */
1984 if(U_SUCCESS(status)) {
1985 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1986 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1987 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1988 TEST_ASSERT(numFields == 3);
1989 TEST_ASSERT_UTEXT(str_first, fields[0]);
1990 TEST_ASSERT_UTEXT(str_second, fields[1]);
1991 TEST_ASSERT_UTEXT(str_third, fields[2]);
1992 TEST_ASSERT(fields[3] == NULL);
1993 }
1994 for(i = 0; i < numFields; i++) {
1995 utext_close(fields[i]);
1996 }
1997 }
1998
1999 uregex_close(re);
2000
2001
2002 /* Split with too few output strings available */
2003 status = U_ZERO_ERROR;
2004 re = uregex_openC(":", 0, NULL, &status);
2005 uregex_setText(re, textToSplit, -1, &status);
2006 TEST_ASSERT_SUCCESS(status);
2007
2008 /* The TEST_ASSERT_SUCCESS call above should change too... */
2009 if(U_SUCCESS(status)) {
2010 fields[0] = NULL;
2011 fields[1] = NULL;
2012 fields[2] = &patternText;
2013 numFields = uregex_splitUText(re, fields, 2, &status);
2014 TEST_ASSERT_SUCCESS(status);
2015
2016 /* The TEST_ASSERT_SUCCESS call above should change too... */
2017 if(U_SUCCESS(status)) {
2018 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2019 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2020 TEST_ASSERT(numFields == 2);
2021 TEST_ASSERT_UTEXT(str_first, fields[0]);
2022 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2023 TEST_ASSERT(fields[2] == &patternText);
2024 }
2025 for(i = 0; i < numFields; i++) {
2026 utext_close(fields[i]);
2027 }
2028 }
2029
2030 uregex_close(re);
2031 }
2032
2033 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2034 * comes out as additional fields. */
2035 {
2036 UChar textToSplit[80];
2037 UText *fields[10];
2038 int32_t numFields;
2039 int32_t i;
2040
2041 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
2042
2043 status = U_ZERO_ERROR;
2044 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2045
2046 uregex_setText(re, textToSplit, -1, &status);
2047 TEST_ASSERT_SUCCESS(status);
2048
2049 /* The TEST_ASSERT_SUCCESS call above should change too... */
2050 if(U_SUCCESS(status)) {
2051 memset(fields, 0, sizeof(fields));
2052 numFields = uregex_splitUText(re, fields, 10, &status);
2053 TEST_ASSERT_SUCCESS(status);
2054
2055 /* The TEST_ASSERT_SUCCESS call above should change too... */
2056 if(U_SUCCESS(status)) {
2057 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2058 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2059 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2060 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2061 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2062
2063 TEST_ASSERT(numFields == 5);
2064 TEST_ASSERT_UTEXT(str_first, fields[0]);
2065 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2066 TEST_ASSERT_UTEXT(str_second, fields[2]);
2067 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2068 TEST_ASSERT_UTEXT(str_third, fields[4]);
2069 TEST_ASSERT(fields[5] == NULL);
2070 }
2071 for(i = 0; i < numFields; i++) {
2072 utext_close(fields[i]);
2073 }
2074 }
2075
2076 /* Split with too few output strings available (2) */
2077 status = U_ZERO_ERROR;
2078 fields[0] = NULL;
2079 fields[1] = NULL;
2080 fields[2] = &patternText;
2081 numFields = uregex_splitUText(re, fields, 2, &status);
2082 TEST_ASSERT_SUCCESS(status);
2083
2084 /* The TEST_ASSERT_SUCCESS call above should change too... */
2085 if(U_SUCCESS(status)) {
2086 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2087 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2088 TEST_ASSERT(numFields == 2);
2089 TEST_ASSERT_UTEXT(str_first, fields[0]);
2090 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2091 TEST_ASSERT(fields[2] == &patternText);
2092 }
2093 for(i = 0; i < numFields; i++) {
2094 utext_close(fields[i]);
2095 }
2096
2097
2098 /* Split with too few output strings available (3) */
2099 status = U_ZERO_ERROR;
2100 fields[0] = NULL;
2101 fields[1] = NULL;
2102 fields[2] = NULL;
2103 fields[3] = &patternText;
2104 numFields = uregex_splitUText(re, fields, 3, &status);
2105 TEST_ASSERT_SUCCESS(status);
2106
2107 /* The TEST_ASSERT_SUCCESS call above should change too... */
2108 if(U_SUCCESS(status)) {
2109 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2110 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2111 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2112 TEST_ASSERT(numFields == 3);
2113 TEST_ASSERT_UTEXT(str_first, fields[0]);
2114 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2115 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2116 TEST_ASSERT(fields[3] == &patternText);
2117 }
2118 for(i = 0; i < numFields; i++) {
2119 utext_close(fields[i]);
2120 }
2121
2122 /* Split with just enough output strings available (5) */
2123 status = U_ZERO_ERROR;
2124 fields[0] = NULL;
2125 fields[1] = NULL;
2126 fields[2] = NULL;
2127 fields[3] = NULL;
2128 fields[4] = NULL;
2129 fields[5] = &patternText;
2130 numFields = uregex_splitUText(re, fields, 5, &status);
2131 TEST_ASSERT_SUCCESS(status);
2132
2133 /* The TEST_ASSERT_SUCCESS call above should change too... */
2134 if(U_SUCCESS(status)) {
2135 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2136 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2137 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2138 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2139 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2140
2141 TEST_ASSERT(numFields == 5);
2142 TEST_ASSERT_UTEXT(str_first, fields[0]);
2143 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2144 TEST_ASSERT_UTEXT(str_second, fields[2]);
2145 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2146 TEST_ASSERT_UTEXT(str_third, fields[4]);
2147 TEST_ASSERT(fields[5] == &patternText);
2148 }
2149 for(i = 0; i < numFields; i++) {
2150 utext_close(fields[i]);
2151 }
2152
2153 /* Split, end of text is a field delimiter. */
2154 status = U_ZERO_ERROR;
2155 uregex_setText(re, textToSplit, (int32_t)strlen("first <tag-a> second<tag-b>"), &status);
2156 TEST_ASSERT_SUCCESS(status);
2157
2158 /* The TEST_ASSERT_SUCCESS call above should change too... */
2159 if(U_SUCCESS(status)) {
2160 memset(fields, 0, sizeof(fields));
2161 fields[9] = &patternText;
2162 numFields = uregex_splitUText(re, fields, 9, &status);
2163 TEST_ASSERT_SUCCESS(status);
2164
2165 /* The TEST_ASSERT_SUCCESS call above should change too... */
2166 if(U_SUCCESS(status)) {
2167 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2168 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2169 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2170 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2171 const char str_empty[] = { 0x00 };
2172
2173 TEST_ASSERT(numFields == 5);
2174 TEST_ASSERT_UTEXT(str_first, fields[0]);
2175 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2176 TEST_ASSERT_UTEXT(str_second, fields[2]);
2177 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2178 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2179 TEST_ASSERT(fields[5] == NULL);
2180 TEST_ASSERT(fields[8] == NULL);
2181 TEST_ASSERT(fields[9] == &patternText);
2182 }
2183 for(i = 0; i < numFields; i++) {
2184 utext_close(fields[i]);
2185 }
2186 }
2187
2188 uregex_close(re);
2189 }
2190 utext_close(&patternText);
2191 }
2192
2193
2194 static void TestRefreshInput(void) {
2195 /*
2196 * RefreshInput changes out the input of a URegularExpression without
2197 * changing anything else in the match state. Used with Java JNI,
2198 * when Java moves the underlying string storage. This test
2199 * runs a find() loop, moving the text after the first match.
2200 * The right number of matches should still be found.
2201 */
2202 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2203 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2204 UErrorCode status = U_ZERO_ERROR;
2205 URegularExpression *re;
2206 UText ut1 = UTEXT_INITIALIZER;
2207 UText ut2 = UTEXT_INITIALIZER;
2208
2209 re = uregex_openC("[ABC]", 0, 0, &status);
2210 TEST_ASSERT_SUCCESS(status);
2211
2212 utext_openUChars(&ut1, testStr, -1, &status);
2213 TEST_ASSERT_SUCCESS(status);
2214 uregex_setUText(re, &ut1, &status);
2215 TEST_ASSERT_SUCCESS(status);
2216
2217 /* Find the first match "A" in the original string */
2218 TEST_ASSERT(uregex_findNext(re, &status));
2219 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2220
2221 /* Move the string, kill the original string. */
2222 u_strcpy(movedStr, testStr);
2223 u_memset(testStr, 0, u_strlen(testStr));
2224 utext_openUChars(&ut2, movedStr, -1, &status);
2225 TEST_ASSERT_SUCCESS(status);
2226 uregex_refreshUText(re, &ut2, &status);
2227 TEST_ASSERT_SUCCESS(status);
2228
2229 /* Find the following two matches, now working in the moved string. */
2230 TEST_ASSERT(uregex_findNext(re, &status));
2231 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2232 TEST_ASSERT(uregex_findNext(re, &status));
2233 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2234 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2235
2236 uregex_close(re);
2237 }
2238
2239
2240 static void TestBug8421(void) {
2241 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2242 * was failing.
2243 */
2244 URegularExpression *re;
2245 UErrorCode status = U_ZERO_ERROR;
2246 int32_t limit = -1;
2247
2248 re = uregex_openC("abc", 0, 0, &status);
2249 TEST_ASSERT_SUCCESS(status);
2250
2251 limit = uregex_getTimeLimit(re, &status);
2252 TEST_ASSERT_SUCCESS(status);
2253 TEST_ASSERT(limit == 0);
2254
2255 uregex_setTimeLimit(re, 100, &status);
2256 TEST_ASSERT_SUCCESS(status);
2257 limit = uregex_getTimeLimit(re, &status);
2258 TEST_ASSERT_SUCCESS(status);
2259 TEST_ASSERT(limit == 100);
2260
2261 uregex_close(re);
2262 }
2263
2264 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2265 // suppress compiler warnings about unused variables
2266 (void)context;
2267 (void)matchIndex;
2268 return FALSE;
2269 }
2270
2271 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2272 // suppress compiler warnings about unused variables
2273 (void)context;
2274 (void)steps;
2275 return FALSE;
2276 }
2277
2278 static void TestBug10815() {
2279 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2280 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2281 */
2282 URegularExpression *re;
2283 UErrorCode status = U_ZERO_ERROR;
2284 UChar text[100];
2285
2286
2287 // findNext() with a find progress callback function.
2288
2289 re = uregex_openC(".z", 0, 0, &status);
2290 TEST_ASSERT_SUCCESS(status);
2291
2292 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2293 uregex_setText(re, text, -1, &status);
2294 TEST_ASSERT_SUCCESS(status);
2295
2296 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2297 TEST_ASSERT_SUCCESS(status);
2298
2299 uregex_findNext(re, &status);
2300 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2301
2302 uregex_close(re);
2303
2304 // findNext() with a match progress callback function.
2305
2306 status = U_ZERO_ERROR;
2307 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2308 TEST_ASSERT_SUCCESS(status);
2309
2310 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2311 // it will appear to be stuck in a (near) infinite loop.
2312 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2313 uregex_setText(re, text, -1, &status);
2314 TEST_ASSERT_SUCCESS(status);
2315
2316 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2317 TEST_ASSERT_SUCCESS(status);
2318
2319 uregex_findNext(re, &status);
2320 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2321
2322 uregex_close(re);
2323 }
2324
2325 static const UChar startLinePattern[] = { 0x5E, 0x78, 0 }; // "^x"
2326
2327 static void TestMatchStartLineWithEmptyText() {
2328 UErrorCode status = U_ZERO_ERROR;
2329 UText* ut = utext_openUChars(NULL, NULL, 0, &status);
2330 TEST_ASSERT_SUCCESS(status);
2331 if (U_SUCCESS(status)) {
2332 URegularExpression *re = uregex_open(startLinePattern, -1, 0, NULL, &status);
2333 TEST_ASSERT_SUCCESS(status);
2334 if (U_SUCCESS(status)) {
2335 uregex_setUText(re, ut, &status);
2336 TEST_ASSERT(U_SUCCESS(status));
2337 if (U_SUCCESS(status)) {
2338 UBool found = uregex_findNext(re, &status);
2339 TEST_ASSERT(U_SUCCESS(status) && !found);
2340 }
2341 uregex_close(re);
2342 }
2343 utext_close(ut);
2344 }
2345 }
2346
2347 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */