]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/reapits.c
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / reapits.c
CommitLineData
374ca955
A
1/********************************************************************
2 * COPYRIGHT:
b331163b 3 * Copyright (c) 2004-2015, International Business Machines Corporation and
374ca955
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File reapits.c
9*
10*********************************************************************************/
11/*C API TEST FOR Regular Expressions */
12/**
13* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
14* try to test the full functionality. It just calls each function and verifies that it
15* works on a basic level.
16*
17* More complete testing of regular expression functionality is done with the C++ tests.
18**/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_REGULAR_EXPRESSIONS
23
24#include <stdlib.h>
25#include <string.h>
26#include "unicode/uloc.h"
27#include "unicode/uregex.h"
28#include "unicode/ustring.h"
729e4ab9 29#include "unicode/utext.h"
374ca955 30#include "cintltst.h"
b331163b 31#include "cmemory.h"
374ca955
A
32
33#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
b331163b 34log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
374ca955
A
35
36#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
b331163b 37log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
374ca955 38
46f4442e
A
39/*
40 * TEST_SETUP and TEST_TEARDOWN
41 * macros to handle the boilerplate around setting up regex test cases.
42 * parameteres to setup:
43 * pattern: The regex pattern, a (char *) null terminated C string.
44 * testString: The string data, also a (char *) C string.
45 * flags: Regex flags to set when compiling the pattern
46 *
47 * Put arbitrary test code between SETUP and TEARDOWN.
48 * 're" is the compiled, ready-to-go regular expression.
49 */
50#define TEST_SETUP(pattern, testString, flags) { \
51 UChar *srcString = NULL; \
52 status = U_ZERO_ERROR; \
53 re = uregex_openC(pattern, flags, NULL, &status); \
54 TEST_ASSERT_SUCCESS(status); \
55 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
56 u_uastrncpy(srcString, testString, strlen(testString)+1); \
57 uregex_setText(re, srcString, -1, &status); \
58 TEST_ASSERT_SUCCESS(status); \
59 if (U_SUCCESS(status)) {
60
61#define TEST_TEARDOWN \
62 } \
63 TEST_ASSERT_SUCCESS(status); \
64 uregex_close(re); \
65 free(srcString); \
66 }
67
68
729e4ab9
A
69/**
70 * @param expected utf-8 array of bytes to be expected
71 */
46f4442e
A
72static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
73 char buf_inside_macro[120];
74 int32_t len = (int32_t)strlen(expected);
75 UBool success;
76 if (nulTerm) {
77 u_austrncpy(buf_inside_macro, (actual), len+1);
78 buf_inside_macro[len+2] = 0;
79 success = (strcmp((expected), buf_inside_macro) == 0);
80 } else {
81 u_austrncpy(buf_inside_macro, (actual), len);
82 buf_inside_macro[len+1] = 0;
83 success = (strncmp((expected), buf_inside_macro, len) == 0);
84 }
85 if (success == FALSE) {
86 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
87 file, line, (expected), buf_inside_macro);
88 }
374ca955
A
89}
90
46f4442e 91#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
374ca955
A
92
93
4388f060
A
94static UBool equals_utf8_utext(const char *utf8, UText *utext) {
95 int32_t u8i = 0;
96 UChar32 u8c = 0;
97 UChar32 utc = 0;
98 UBool stringsEqual = TRUE;
99 utext_setNativeIndex(utext, 0);
100 for (;;) {
101 U8_NEXT_UNSAFE(utf8, u8i, u8c);
102 utc = utext_next32(utext);
103 if (u8c == 0 && utc == U_SENTINEL) {
104 break;
105 }
106 if (u8c != utc || u8c == 0) {
107 stringsEqual = FALSE;
108 break;
109 }
110 }
111 return stringsEqual;
112}
113
114
729e4ab9 115static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
729e4ab9 116 utext_setNativeIndex(actual, 0);
4388f060 117 if (!equals_utf8_utext(expected, actual)) {
729e4ab9
A
118 UChar32 c;
119 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
120 c = utext_next32From(actual, 0);
121 while (c != U_SENTINEL) {
122 if (0x20<c && c <0x7e) {
123 log_err("%c", c);
124 } else {
125 log_err("%#x", c);
126 }
127 c = UTEXT_NEXT32(actual);
128 }
129 log_err("\"\n");
130 }
729e4ab9
A
131}
132
4388f060
A
133/*
134 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
135 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
136 */
729e4ab9 137#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
374ca955 138
4388f060
A
139static UBool testUTextEqual(UText *uta, UText *utb) {
140 UChar32 ca = 0;
141 UChar32 cb = 0;
142 utext_setNativeIndex(uta, 0);
143 utext_setNativeIndex(utb, 0);
144 do {
145 ca = utext_next32(uta);
146 cb = utext_next32(utb);
147 if (ca != cb) {
148 break;
149 }
150 } while (ca != U_SENTINEL);
151 return ca == cb;
152}
153
154
374ca955
A
155
156
157static void TestRegexCAPI(void);
73c04bcf 158static void TestBug4315(void);
729e4ab9 159static void TestUTextAPI(void);
4388f060
A
160static void TestRefreshInput(void);
161static void TestBug8421(void);
b331163b 162static void TestBug10815(void);
374ca955
A
163
164void addURegexTest(TestNode** root);
165
166void addURegexTest(TestNode** root)
167{
168 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
73c04bcf 169 addTest(root, &TestBug4315, "regex/TestBug4315");
729e4ab9 170 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
4388f060
A
171 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
172 addTest(root, &TestBug8421, "regex/TestBug8421");
b331163b 173 addTest(root, &TestBug10815, "regex/TestBug10815");
374ca955
A
174}
175
46f4442e
A
176/*
177 * Call back function and context struct used for testing
178 * regular expression user callbacks. This test is mostly the same as
179 * the corresponding C++ test in intltest.
180 */
181typedef struct callBackContext {
182 int32_t maxCalls;
183 int32_t numCalls;
184 int32_t lastSteps;
185} callBackContext;
186
187static UBool U_EXPORT2 U_CALLCONV
188TestCallbackFn(const void *context, int32_t steps) {
189 callBackContext *info = (callBackContext *)context;
190 if (info->lastSteps+1 != steps) {
191 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
192 }
193 info->lastSteps = steps;
194 info->numCalls++;
195 return (info->numCalls < info->maxCalls);
196}
374ca955 197
46f4442e
A
198/*
199 * Regular Expression C API Tests
200 */
374ca955
A
201static void TestRegexCAPI(void) {
202 UErrorCode status = U_ZERO_ERROR;
203 URegularExpression *re;
204 UChar pat[200];
205 UChar *minus1;
206
207 memset(&minus1, -1, sizeof(minus1));
208
209 /* Mimimalist open/close */
b331163b 210 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
374ca955 211 re = uregex_open(pat, -1, 0, 0, &status);
46f4442e 212 if (U_FAILURE(status)) {
729e4ab9 213 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
46f4442e
A
214 return;
215 }
374ca955
A
216 uregex_close(re);
217
218 /* Open with all flag values set */
219 status = U_ZERO_ERROR;
220 re = uregex_open(pat, -1,
4388f060 221 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
374ca955
A
222 0, &status);
223 TEST_ASSERT_SUCCESS(status);
224 uregex_close(re);
225
226 /* Open with an invalid flag */
227 status = U_ZERO_ERROR;
228 re = uregex_open(pat, -1, 0x40000000, 0, &status);
229 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
230 uregex_close(re);
231
729e4ab9
A
232 /* Open with an unimplemented flag */
233 status = U_ZERO_ERROR;
4388f060 234 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
729e4ab9
A
235 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
236 uregex_close(re);
237
73c04bcf
A
238 /* openC with an invalid parameter */
239 status = U_ZERO_ERROR;
240 re = uregex_openC(NULL,
241 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
242 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
243
244 /* openC with an invalid parameter */
245 status = U_USELESS_COLLATOR_ERROR;
246 re = uregex_openC(NULL,
247 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
248 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
374ca955
A
249
250 /* openC open from a C string */
251 {
252 const UChar *p;
253 int32_t len;
254 status = U_ZERO_ERROR;
255 re = uregex_openC("abc*", 0, 0, &status);
256 TEST_ASSERT_SUCCESS(status);
257 p = uregex_pattern(re, &len, &status);
258 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
259
260 /* The TEST_ASSERT_SUCCESS above should change too... */
261 if(U_SUCCESS(status)) {
b331163b 262 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
73c04bcf
A
263 TEST_ASSERT(u_strcmp(pat, p) == 0);
264 TEST_ASSERT(len==(int32_t)strlen("abc*"));
265 }
374ca955
A
266
267 uregex_close(re);
268
269 /* TODO: Open with ParseError parameter */
270 }
271
272 /*
273 * clone
274 */
275 {
276 URegularExpression *clone1;
277 URegularExpression *clone2;
278 URegularExpression *clone3;
279 UChar testString1[30];
280 UChar testString2[30];
281 UBool result;
282
283
284 status = U_ZERO_ERROR;
285 re = uregex_openC("abc*", 0, 0, &status);
286 TEST_ASSERT_SUCCESS(status);
287 clone1 = uregex_clone(re, &status);
288 TEST_ASSERT_SUCCESS(status);
289 TEST_ASSERT(clone1 != NULL);
290
291 status = U_ZERO_ERROR;
292 clone2 = uregex_clone(re, &status);
293 TEST_ASSERT_SUCCESS(status);
294 TEST_ASSERT(clone2 != NULL);
295 uregex_close(re);
296
297 status = U_ZERO_ERROR;
298 clone3 = uregex_clone(clone2, &status);
299 TEST_ASSERT_SUCCESS(status);
300 TEST_ASSERT(clone3 != NULL);
301
b331163b
A
302 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
303 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
374ca955
A
304
305 status = U_ZERO_ERROR;
306 uregex_setText(clone1, testString1, -1, &status);
307 TEST_ASSERT_SUCCESS(status);
308 result = uregex_lookingAt(clone1, 0, &status);
309 TEST_ASSERT_SUCCESS(status);
310 TEST_ASSERT(result==TRUE);
311
312 status = U_ZERO_ERROR;
313 uregex_setText(clone2, testString2, -1, &status);
314 TEST_ASSERT_SUCCESS(status);
315 result = uregex_lookingAt(clone2, 0, &status);
316 TEST_ASSERT_SUCCESS(status);
317 TEST_ASSERT(result==FALSE);
318 result = uregex_find(clone2, 0, &status);
319 TEST_ASSERT_SUCCESS(status);
320 TEST_ASSERT(result==TRUE);
321
322 uregex_close(clone1);
323 uregex_close(clone2);
324 uregex_close(clone3);
325
326 }
327
328 /*
329 * pattern()
330 */
331 {
332 const UChar *resultPat;
333 int32_t resultLen;
b331163b 334 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
374ca955
A
335 status = U_ZERO_ERROR;
336 re = uregex_open(pat, -1, 0, NULL, &status);
337 resultPat = uregex_pattern(re, &resultLen, &status);
338 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
339
340 /* The TEST_ASSERT_SUCCESS above should change too... */
341 if (U_SUCCESS(status)) {
342 TEST_ASSERT(resultLen == -1);
343 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
344 }
345
374ca955
A
346 uregex_close(re);
347
348 status = U_ZERO_ERROR;
349 re = uregex_open(pat, 3, 0, NULL, &status);
350 resultPat = uregex_pattern(re, &resultLen, &status);
351 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
352 TEST_ASSERT_SUCCESS(status);
353
354 /* The TEST_ASSERT_SUCCESS above should change too... */
355 if (U_SUCCESS(status)) {
356 TEST_ASSERT(resultLen == 3);
357 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
358 TEST_ASSERT(u_strlen(resultPat) == 3);
359 }
360
374ca955
A
361 uregex_close(re);
362 }
363
364 /*
365 * flags()
366 */
367 {
368 int32_t t;
369
370 status = U_ZERO_ERROR;
371 re = uregex_open(pat, -1, 0, NULL, &status);
372 t = uregex_flags(re, &status);
373 TEST_ASSERT_SUCCESS(status);
374 TEST_ASSERT(t == 0);
375 uregex_close(re);
376
377 status = U_ZERO_ERROR;
378 re = uregex_open(pat, -1, 0, NULL, &status);
379 t = uregex_flags(re, &status);
380 TEST_ASSERT_SUCCESS(status);
381 TEST_ASSERT(t == 0);
382 uregex_close(re);
383
384 status = U_ZERO_ERROR;
385 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
386 t = uregex_flags(re, &status);
387 TEST_ASSERT_SUCCESS(status);
388 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
389 uregex_close(re);
390 }
391
392 /*
393 * setText() and lookingAt()
394 */
395 {
396 UChar text1[50];
397 UChar text2[50];
398 UBool result;
399
b331163b
A
400 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
401 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
374ca955 402 status = U_ZERO_ERROR;
b331163b 403 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
374ca955
A
404 re = uregex_open(pat, -1, 0, NULL, &status);
405 TEST_ASSERT_SUCCESS(status);
406
407 /* Operation before doing a setText should fail... */
408 status = U_ZERO_ERROR;
409 uregex_lookingAt(re, 0, &status);
410 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
411
412 status = U_ZERO_ERROR;
413 uregex_setText(re, text1, -1, &status);
414 result = uregex_lookingAt(re, 0, &status);
415 TEST_ASSERT(result == TRUE);
416 TEST_ASSERT_SUCCESS(status);
417
418 status = U_ZERO_ERROR;
419 uregex_setText(re, text2, -1, &status);
420 result = uregex_lookingAt(re, 0, &status);
421 TEST_ASSERT(result == FALSE);
422 TEST_ASSERT_SUCCESS(status);
423
424 status = U_ZERO_ERROR;
425 uregex_setText(re, text1, -1, &status);
426 result = uregex_lookingAt(re, 0, &status);
427 TEST_ASSERT(result == TRUE);
428 TEST_ASSERT_SUCCESS(status);
429
430 status = U_ZERO_ERROR;
431 uregex_setText(re, text1, 5, &status);
432 result = uregex_lookingAt(re, 0, &status);
433 TEST_ASSERT(result == FALSE);
434 TEST_ASSERT_SUCCESS(status);
435
436 status = U_ZERO_ERROR;
437 uregex_setText(re, text1, 6, &status);
438 result = uregex_lookingAt(re, 0, &status);
439 TEST_ASSERT(result == TRUE);
440 TEST_ASSERT_SUCCESS(status);
441
442 uregex_close(re);
443 }
444
445
446 /*
447 * getText()
448 */
449 {
450 UChar text1[50];
451 UChar text2[50];
452 const UChar *result;
453 int32_t textLength;
454
b331163b
A
455 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
456 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
374ca955 457 status = U_ZERO_ERROR;
b331163b 458 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
374ca955
A
459 re = uregex_open(pat, -1, 0, NULL, &status);
460
461 uregex_setText(re, text1, -1, &status);
462 result = uregex_getText(re, &textLength, &status);
463 TEST_ASSERT(result == text1);
464 TEST_ASSERT(textLength == -1);
465 TEST_ASSERT_SUCCESS(status);
466
467 status = U_ZERO_ERROR;
468 uregex_setText(re, text2, 7, &status);
469 result = uregex_getText(re, &textLength, &status);
470 TEST_ASSERT(result == text2);
471 TEST_ASSERT(textLength == 7);
472 TEST_ASSERT_SUCCESS(status);
473
474 status = U_ZERO_ERROR;
475 uregex_setText(re, text2, 4, &status);
476 result = uregex_getText(re, &textLength, &status);
477 TEST_ASSERT(result == text2);
478 TEST_ASSERT(textLength == 4);
479 TEST_ASSERT_SUCCESS(status);
480 uregex_close(re);
481 }
482
483 /*
484 * matches()
485 */
486 {
487 UChar text1[50];
488 UBool result;
489 int len;
490 UChar nullString[] = {0,0,0};
491
b331163b 492 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
374ca955 493 status = U_ZERO_ERROR;
b331163b 494 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
374ca955
A
495 re = uregex_open(pat, -1, 0, NULL, &status);
496
497 uregex_setText(re, text1, -1, &status);
498 result = uregex_matches(re, 0, &status);
499 TEST_ASSERT(result == FALSE);
500 TEST_ASSERT_SUCCESS(status);
501
502 status = U_ZERO_ERROR;
503 uregex_setText(re, text1, 6, &status);
504 result = uregex_matches(re, 0, &status);
505 TEST_ASSERT(result == TRUE);
506 TEST_ASSERT_SUCCESS(status);
507
508 status = U_ZERO_ERROR;
509 uregex_setText(re, text1, 6, &status);
510 result = uregex_matches(re, 1, &status);
511 TEST_ASSERT(result == FALSE);
512 TEST_ASSERT_SUCCESS(status);
513 uregex_close(re);
514
515 status = U_ZERO_ERROR;
516 re = uregex_openC(".?", 0, NULL, &status);
517 uregex_setText(re, text1, -1, &status);
518 len = u_strlen(text1);
519 result = uregex_matches(re, len, &status);
520 TEST_ASSERT(result == TRUE);
521 TEST_ASSERT_SUCCESS(status);
522
523 status = U_ZERO_ERROR;
524 uregex_setText(re, nullString, -1, &status);
525 TEST_ASSERT_SUCCESS(status);
526 result = uregex_matches(re, 0, &status);
527 TEST_ASSERT(result == TRUE);
528 TEST_ASSERT_SUCCESS(status);
529 uregex_close(re);
530 }
531
532
533 /*
534 * lookingAt() Used in setText test.
535 */
536
537
538 /*
539 * find(), findNext, start, end, reset
540 */
541 {
542 UChar text1[50];
543 UBool result;
b331163b 544 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
374ca955
A
545 status = U_ZERO_ERROR;
546 re = uregex_openC("rx", 0, NULL, &status);
547
548 uregex_setText(re, text1, -1, &status);
549 result = uregex_find(re, 0, &status);
550 TEST_ASSERT(result == TRUE);
551 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
552 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
553 TEST_ASSERT_SUCCESS(status);
554
555 result = uregex_find(re, 9, &status);
556 TEST_ASSERT(result == TRUE);
557 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
558 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
559 TEST_ASSERT_SUCCESS(status);
560
561 result = uregex_find(re, 14, &status);
562 TEST_ASSERT(result == FALSE);
563 TEST_ASSERT_SUCCESS(status);
564
565 status = U_ZERO_ERROR;
566 uregex_reset(re, 0, &status);
567
568 result = uregex_findNext(re, &status);
569 TEST_ASSERT(result == TRUE);
570 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
571 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
572 TEST_ASSERT_SUCCESS(status);
573
574 result = uregex_findNext(re, &status);
575 TEST_ASSERT(result == TRUE);
576 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
577 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
578 TEST_ASSERT_SUCCESS(status);
579
580 status = U_ZERO_ERROR;
581 uregex_reset(re, 12, &status);
582
583 result = uregex_findNext(re, &status);
584 TEST_ASSERT(result == TRUE);
585 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
586 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
587 TEST_ASSERT_SUCCESS(status);
588
589 result = uregex_findNext(re, &status);
590 TEST_ASSERT(result == FALSE);
591 TEST_ASSERT_SUCCESS(status);
592
593 uregex_close(re);
594 }
595
596 /*
597 * groupCount
598 */
599 {
600 int32_t result;
601
602 status = U_ZERO_ERROR;
603 re = uregex_openC("abc", 0, NULL, &status);
604 result = uregex_groupCount(re, &status);
605 TEST_ASSERT_SUCCESS(status);
606 TEST_ASSERT(result == 0);
607 uregex_close(re);
608
609 status = U_ZERO_ERROR;
610 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
611 result = uregex_groupCount(re, &status);
612 TEST_ASSERT_SUCCESS(status);
613 TEST_ASSERT(result == 3);
614 uregex_close(re);
615
616 }
617
618
619 /*
620 * group()
621 */
622 {
623 UChar text1[80];
624 UChar buf[80];
625 UBool result;
626 int32_t resultSz;
b331163b 627 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
374ca955
A
628
629 status = U_ZERO_ERROR;
630 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
631 TEST_ASSERT_SUCCESS(status);
632
633
634 uregex_setText(re, text1, -1, &status);
635 result = uregex_find(re, 0, &status);
636 TEST_ASSERT(result==TRUE);
637
638 /* Capture Group 0, the full match. Should succeed. */
639 status = U_ZERO_ERROR;
b331163b 640 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
641 TEST_ASSERT_SUCCESS(status);
642 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
643 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
644
645 /* Capture group #1. Should succeed. */
646 status = U_ZERO_ERROR;
b331163b 647 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
648 TEST_ASSERT_SUCCESS(status);
649 TEST_ASSERT_STRING(" interior ", buf, TRUE);
650 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
651
652 /* Capture group out of range. Error. */
653 status = U_ZERO_ERROR;
b331163b 654 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
655 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
656
657 /* NULL buffer, pure pre-flight */
658 status = U_ZERO_ERROR;
659 resultSz = uregex_group(re, 0, NULL, 0, &status);
660 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
661 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
662
663 /* Too small buffer, truncated string */
664 status = U_ZERO_ERROR;
665 memset(buf, -1, sizeof(buf));
666 resultSz = uregex_group(re, 0, buf, 5, &status);
667 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
668 TEST_ASSERT_STRING("abc i", buf, FALSE);
669 TEST_ASSERT(buf[5] == (UChar)0xffff);
670 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
671
672 /* Output string just fits buffer, no NUL term. */
673 status = U_ZERO_ERROR;
674 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
675 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
676 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
677 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
678 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
679
680 uregex_close(re);
681
682 }
46f4442e
A
683
684 /*
685 * Regions
686 */
687
688
689 /* SetRegion(), getRegion() do something */
690 TEST_SETUP(".*", "0123456789ABCDEF", 0)
691 UChar resultString[40];
692 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
693 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
694 uregex_setRegion(re, 3, 6, &status);
695 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
696 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
697 TEST_ASSERT(uregex_findNext(re, &status));
b331163b 698 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
46f4442e
A
699 TEST_ASSERT_STRING("345", resultString, TRUE);
700 TEST_TEARDOWN;
701
702 /* find(start=-1) uses regions */
703 TEST_SETUP(".*", "0123456789ABCDEF", 0);
704 uregex_setRegion(re, 4, 6, &status);
705 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
706 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
707 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
708 TEST_TEARDOWN;
709
710 /* find (start >=0) does not use regions */
711 TEST_SETUP(".*", "0123456789ABCDEF", 0);
712 uregex_setRegion(re, 4, 6, &status);
713 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
714 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
715 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
716 TEST_TEARDOWN;
717
718 /* findNext() obeys regions */
719 TEST_SETUP(".", "0123456789ABCDEF", 0);
720 uregex_setRegion(re, 4, 6, &status);
721 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
722 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
723 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
724 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
725 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
726 TEST_TEARDOWN;
727
728 /* matches(start=-1) uses regions */
729 /* Also, verify that non-greedy *? succeeds in finding the full match. */
730 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
731 uregex_setRegion(re, 4, 6, &status);
732 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
733 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
734 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
735 TEST_TEARDOWN;
736
737 /* matches (start >=0) does not use regions */
738 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
739 uregex_setRegion(re, 4, 6, &status);
740 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
741 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
742 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
743 TEST_TEARDOWN;
744
745 /* lookingAt(start=-1) uses regions */
746 /* Also, verify that non-greedy *? finds the first (shortest) match. */
747 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
748 uregex_setRegion(re, 4, 6, &status);
749 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
750 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
751 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
752 TEST_TEARDOWN;
753
754 /* lookingAt (start >=0) does not use regions */
755 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
756 uregex_setRegion(re, 4, 6, &status);
757 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
758 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
759 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
760 TEST_TEARDOWN;
761
762 /* hitEnd() */
763 TEST_SETUP("[a-f]*", "abcdefghij", 0);
764 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
765 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
766 TEST_TEARDOWN;
767
768 TEST_SETUP("[a-f]*", "abcdef", 0);
769 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
770 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
771 TEST_TEARDOWN;
772
773 /* requireEnd */
774 TEST_SETUP("abcd", "abcd", 0);
775 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
776 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
777 TEST_TEARDOWN;
778
779 TEST_SETUP("abcd$", "abcd", 0);
780 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
781 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
782 TEST_TEARDOWN;
783
784 /* anchoringBounds */
785 TEST_SETUP("abc$", "abcdef", 0);
786 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
787 uregex_useAnchoringBounds(re, FALSE, &status);
788 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
789
790 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
791 uregex_useAnchoringBounds(re, TRUE, &status);
792 uregex_setRegion(re, 0, 3, &status);
793 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
794 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
795 TEST_TEARDOWN;
796
797 /* Transparent Bounds */
798 TEST_SETUP("abc(?=def)", "abcdef", 0);
799 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
800 uregex_useTransparentBounds(re, TRUE, &status);
801 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
802
803 uregex_useTransparentBounds(re, FALSE, &status);
804 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
805 uregex_setRegion(re, 0, 3, &status);
806 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
807 uregex_useTransparentBounds(re, TRUE, &status);
808 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
809 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
810 TEST_TEARDOWN;
811
374ca955
A
812
813 /*
814 * replaceFirst()
815 */
816 {
817 UChar text1[80];
818 UChar text2[80];
819 UChar replText[80];
820 UChar buf[80];
821 int32_t resultSz;
b331163b
A
822 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
823 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
824 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
374ca955
A
825
826 status = U_ZERO_ERROR;
827 re = uregex_openC("x(.*?)x", 0, NULL, &status);
828 TEST_ASSERT_SUCCESS(status);
829
830 /* Normal case, with match */
831 uregex_setText(re, text1, -1, &status);
b331163b 832 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
833 TEST_ASSERT_SUCCESS(status);
834 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
835 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
836
837 /* No match. Text should copy to output with no changes. */
838 status = U_ZERO_ERROR;
839 uregex_setText(re, text2, -1, &status);
b331163b 840 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
841 TEST_ASSERT_SUCCESS(status);
842 TEST_ASSERT_STRING("No match here.", buf, TRUE);
843 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
844
845 /* Match, output just fills buffer, no termination warning. */
846 status = U_ZERO_ERROR;
847 uregex_setText(re, text1, -1, &status);
848 memset(buf, -1, sizeof(buf));
849 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
850 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
851 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
852 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
853 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
854
855 /* Do the replaceFirst again, without first resetting anything.
856 * Should give the same results.
857 */
858 status = U_ZERO_ERROR;
859 memset(buf, -1, sizeof(buf));
860 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
861 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
862 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
863 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
864 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
865
866 /* NULL buffer, zero buffer length */
867 status = U_ZERO_ERROR;
868 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
869 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
870 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
871
872 /* Buffer too small by one */
873 status = U_ZERO_ERROR;
874 memset(buf, -1, sizeof(buf));
875 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
876 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
877 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
878 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
879 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
880
881 uregex_close(re);
882 }
883
884
885 /*
886 * replaceAll()
887 */
888 {
729e4ab9
A
889 UChar text1[80]; /* "Replace xaax x1x x...x." */
890 UChar text2[80]; /* "No match Here" */
891 UChar replText[80]; /* "<$1>" */
892 UChar replText2[80]; /* "<<$1>>" */
893 const char * pattern = "x(.*?)x";
894 const char * expectedResult = "Replace <aa> <1> <...>.";
895 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
374ca955 896 UChar buf[80];
729e4ab9 897 int32_t resultSize;
374ca955 898 int32_t expectedResultSize;
729e4ab9 899 int32_t expectedResultSize2;
374ca955
A
900 int32_t i;
901
b331163b
A
902 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
903 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
904 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
905 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
729e4ab9
A
906 expectedResultSize = strlen(expectedResult);
907 expectedResultSize2 = strlen(expectedResult2);
374ca955
A
908
909 status = U_ZERO_ERROR;
729e4ab9 910 re = uregex_openC(pattern, 0, NULL, &status);
374ca955
A
911 TEST_ASSERT_SUCCESS(status);
912
913 /* Normal case, with match */
914 uregex_setText(re, text1, -1, &status);
b331163b 915 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
374ca955 916 TEST_ASSERT_SUCCESS(status);
729e4ab9
A
917 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
918 TEST_ASSERT(resultSize == expectedResultSize);
374ca955
A
919
920 /* No match. Text should copy to output with no changes. */
921 status = U_ZERO_ERROR;
922 uregex_setText(re, text2, -1, &status);
b331163b 923 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
924 TEST_ASSERT_SUCCESS(status);
925 TEST_ASSERT_STRING("No match here.", buf, TRUE);
729e4ab9 926 TEST_ASSERT(resultSize == u_strlen(text2));
374ca955
A
927
928 /* Match, output just fills buffer, no termination warning. */
929 status = U_ZERO_ERROR;
930 uregex_setText(re, text1, -1, &status);
931 memset(buf, -1, sizeof(buf));
729e4ab9 932 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
374ca955 933 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
729e4ab9
A
934 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
935 TEST_ASSERT(resultSize == expectedResultSize);
936 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
374ca955
A
937
938 /* Do the replaceFirst again, without first resetting anything.
939 * Should give the same results.
940 */
941 status = U_ZERO_ERROR;
942 memset(buf, -1, sizeof(buf));
729e4ab9 943 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
374ca955
A
944 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
945 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
729e4ab9
A
946 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
947 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
374ca955
A
948
949 /* NULL buffer, zero buffer length */
950 status = U_ZERO_ERROR;
729e4ab9 951 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
374ca955 952 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
729e4ab9 953 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
374ca955
A
954
955 /* Buffer too small. Try every size, which will tickle edge cases
956 * in uregex_appendReplacement (used by replaceAll) */
957 for (i=0; i<expectedResultSize; i++) {
958 char expected[80];
959 status = U_ZERO_ERROR;
960 memset(buf, -1, sizeof(buf));
729e4ab9
A
961 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
962 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
963 strcpy(expected, expectedResult);
964 expected[i] = 0;
965 TEST_ASSERT_STRING(expected, buf, FALSE);
966 TEST_ASSERT(resultSize == expectedResultSize);
967 TEST_ASSERT(buf[i] == (UChar)0xffff);
968 }
969
970 /* Buffer too small. Same as previous test, except this time the replacement
971 * text is longer than the match capture group, making the length of the complete
972 * replacement longer than the original string.
973 */
974 for (i=0; i<expectedResultSize2; i++) {
975 char expected[80];
976 status = U_ZERO_ERROR;
977 memset(buf, -1, sizeof(buf));
978 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
374ca955 979 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
729e4ab9 980 strcpy(expected, expectedResult2);
374ca955
A
981 expected[i] = 0;
982 TEST_ASSERT_STRING(expected, buf, FALSE);
729e4ab9 983 TEST_ASSERT(resultSize == expectedResultSize2);
374ca955
A
984 TEST_ASSERT(buf[i] == (UChar)0xffff);
985 }
986
729e4ab9 987
374ca955
A
988 uregex_close(re);
989 }
990
991
992 /*
993 * appendReplacement()
994 */
995 {
996 UChar text[100];
997 UChar repl[100];
998 UChar buf[100];
999 UChar *bufPtr;
1000 int32_t bufCap;
1001
1002
1003 status = U_ZERO_ERROR;
1004 re = uregex_openC(".*", 0, 0, &status);
1005 TEST_ASSERT_SUCCESS(status);
1006
b331163b
A
1007 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1008 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
374ca955
A
1009 uregex_setText(re, text, -1, &status);
1010
1011 /* match covers whole target string */
1012 uregex_find(re, 0, &status);
1013 TEST_ASSERT_SUCCESS(status);
1014 bufPtr = buf;
b331163b 1015 bufCap = UPRV_LENGTHOF(buf);
374ca955
A
1016 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1017 TEST_ASSERT_SUCCESS(status);
1018 TEST_ASSERT_STRING("some other", buf, TRUE);
1019
1020 /* Match has \u \U escapes */
1021 uregex_find(re, 0, &status);
1022 TEST_ASSERT_SUCCESS(status);
1023 bufPtr = buf;
b331163b
A
1024 bufCap = UPRV_LENGTHOF(buf);
1025 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
374ca955
A
1026 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1027 TEST_ASSERT_SUCCESS(status);
1028 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1029
729e4ab9
A
1030 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1031 status = U_ZERO_ERROR;
1032 uregex_find(re, 0, &status);
1033 TEST_ASSERT_SUCCESS(status);
1034 bufPtr = buf;
1035 status = U_BUFFER_OVERFLOW_ERROR;
1036 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1037 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1038
374ca955
A
1039 uregex_close(re);
1040 }
1041
1042
1043 /*
1044 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1045 */
1046
1047 /*
1048 * split()
1049 */
1050 {
1051 UChar textToSplit[80];
1052 UChar text2[80];
1053 UChar buf[200];
1054 UChar *fields[10];
1055 int32_t numFields;
1056 int32_t requiredCapacity;
1057 int32_t spaceNeeded;
1058 int32_t sz;
1059
b331163b
A
1060 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1061 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
374ca955
A
1062
1063 status = U_ZERO_ERROR;
1064 re = uregex_openC(":", 0, NULL, &status);
1065
1066
1067 /* Simple split */
1068
1069 uregex_setText(re, textToSplit, -1, &status);
1070 TEST_ASSERT_SUCCESS(status);
1071
73c04bcf
A
1072 /* The TEST_ASSERT_SUCCESS call above should change too... */
1073 if (U_SUCCESS(status)) {
1074 memset(fields, -1, sizeof(fields));
1075 numFields =
b331163b 1076 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
73c04bcf 1077 TEST_ASSERT_SUCCESS(status);
374ca955 1078
73c04bcf
A
1079 /* The TEST_ASSERT_SUCCESS call above should change too... */
1080 if(U_SUCCESS(status)) {
1081 TEST_ASSERT(numFields == 3);
1082 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1083 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1084 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1085 TEST_ASSERT(fields[3] == NULL);
1086
1087 spaceNeeded = u_strlen(textToSplit) -
1088 (numFields - 1) + /* Field delimiters do not appear in output */
1089 numFields; /* Each field gets a NUL terminator */
1090
1091 TEST_ASSERT(spaceNeeded == requiredCapacity);
1092 }
1093 }
374ca955 1094
374ca955
A
1095 uregex_close(re);
1096
1097
1098 /* Split with too few output strings available */
1099 status = U_ZERO_ERROR;
1100 re = uregex_openC(":", 0, NULL, &status);
1101 uregex_setText(re, textToSplit, -1, &status);
1102 TEST_ASSERT_SUCCESS(status);
1103
73c04bcf
A
1104 /* The TEST_ASSERT_SUCCESS call above should change too... */
1105 if(U_SUCCESS(status)) {
374ca955 1106 memset(fields, -1, sizeof(fields));
374ca955 1107 numFields =
b331163b 1108 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
73c04bcf
A
1109 TEST_ASSERT_SUCCESS(status);
1110
1111 /* The TEST_ASSERT_SUCCESS call above should change too... */
1112 if(U_SUCCESS(status)) {
1113 TEST_ASSERT(numFields == 2);
374ca955 1114 TEST_ASSERT_STRING("first ", fields[0], TRUE);
73c04bcf
A
1115 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1116 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1117
1118 spaceNeeded = u_strlen(textToSplit) -
1119 (numFields - 1) + /* Field delimiters do not appear in output */
1120 numFields; /* Each field gets a NUL terminator */
1121
1122 TEST_ASSERT(spaceNeeded == requiredCapacity);
1123
1124 /* Split with a range of output buffer sizes. */
1125 spaceNeeded = u_strlen(textToSplit) -
1126 (numFields - 1) + /* Field delimiters do not appear in output */
1127 numFields; /* Each field gets a NUL terminator */
1128
1129 for (sz=0; sz < spaceNeeded+1; sz++) {
1130 memset(fields, -1, sizeof(fields));
1131 status = U_ZERO_ERROR;
1132 numFields =
1133 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1134 if (sz >= spaceNeeded) {
1135 TEST_ASSERT_SUCCESS(status);
1136 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1137 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1138 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1139 } else {
1140 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1141 }
1142 TEST_ASSERT(numFields == 3);
1143 TEST_ASSERT(fields[3] == NULL);
1144 TEST_ASSERT(spaceNeeded == requiredCapacity);
1145 }
374ca955 1146 }
374ca955 1147 }
73c04bcf 1148
374ca955
A
1149 uregex_close(re);
1150 }
1151
1152
1153
1154
1155 /* Split(), part 2. Patterns with capture groups. The capture group text
1156 * comes out as additional fields. */
1157 {
1158 UChar textToSplit[80];
1159 UChar buf[200];
1160 UChar *fields[10];
1161 int32_t numFields;
1162 int32_t requiredCapacity;
1163 int32_t spaceNeeded;
1164 int32_t sz;
1165
b331163b 1166 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
374ca955
A
1167
1168 status = U_ZERO_ERROR;
1169 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1170
1171 uregex_setText(re, textToSplit, -1, &status);
1172 TEST_ASSERT_SUCCESS(status);
1173
73c04bcf
A
1174 /* The TEST_ASSERT_SUCCESS call above should change too... */
1175 if(U_SUCCESS(status)) {
1176 memset(fields, -1, sizeof(fields));
1177 numFields =
b331163b 1178 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
73c04bcf 1179 TEST_ASSERT_SUCCESS(status);
374ca955 1180
73c04bcf
A
1181 /* The TEST_ASSERT_SUCCESS call above should change too... */
1182 if(U_SUCCESS(status)) {
1183 TEST_ASSERT(numFields == 5);
1184 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1185 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1186 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1187 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1188 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1189 TEST_ASSERT(fields[5] == NULL);
1190 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1191 TEST_ASSERT(spaceNeeded == requiredCapacity);
1192 }
1193 }
374ca955
A
1194
1195 /* Split with too few output strings available (2) */
1196 status = U_ZERO_ERROR;
1197 memset(fields, -1, sizeof(fields));
1198 numFields =
b331163b 1199 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
374ca955 1200 TEST_ASSERT_SUCCESS(status);
374ca955 1201
73c04bcf
A
1202 /* The TEST_ASSERT_SUCCESS call above should change too... */
1203 if(U_SUCCESS(status)) {
1204 TEST_ASSERT(numFields == 2);
1205 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1206 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1207 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1208
1209 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */
1210 TEST_ASSERT(spaceNeeded == requiredCapacity);
1211 }
374ca955
A
1212
1213 /* Split with too few output strings available (3) */
1214 status = U_ZERO_ERROR;
1215 memset(fields, -1, sizeof(fields));
1216 numFields =
b331163b 1217 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
374ca955 1218 TEST_ASSERT_SUCCESS(status);
374ca955 1219
73c04bcf
A
1220 /* The TEST_ASSERT_SUCCESS call above should change too... */
1221 if(U_SUCCESS(status)) {
1222 TEST_ASSERT(numFields == 3);
1223 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1224 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1225 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1226 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1227
1228 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1229 TEST_ASSERT(spaceNeeded == requiredCapacity);
1230 }
374ca955
A
1231
1232 /* Split with just enough output strings available (5) */
1233 status = U_ZERO_ERROR;
1234 memset(fields, -1, sizeof(fields));
1235 numFields =
b331163b 1236 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
374ca955 1237 TEST_ASSERT_SUCCESS(status);
374ca955 1238
73c04bcf
A
1239 /* The TEST_ASSERT_SUCCESS call above should change too... */
1240 if(U_SUCCESS(status)) {
1241 TEST_ASSERT(numFields == 5);
1242 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1243 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1244 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1245 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1246 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1247 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
374ca955 1248
73c04bcf
A
1249 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1250 TEST_ASSERT(spaceNeeded == requiredCapacity);
1251 }
374ca955
A
1252
1253 /* Split, end of text is a field delimiter. */
1254 status = U_ZERO_ERROR;
1255 sz = strlen("first <tag-a> second<tag-b>");
1256 uregex_setText(re, textToSplit, sz, &status);
1257 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
1258
1259 /* The TEST_ASSERT_SUCCESS call above should change too... */
1260 if(U_SUCCESS(status)) {
1261 memset(fields, -1, sizeof(fields));
1262 numFields =
b331163b 1263 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
73c04bcf
A
1264 TEST_ASSERT_SUCCESS(status);
1265
1266 /* The TEST_ASSERT_SUCCESS call above should change too... */
1267 if(U_SUCCESS(status)) {
4388f060 1268 TEST_ASSERT(numFields == 5);
73c04bcf
A
1269 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1270 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1271 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1272 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
4388f060
A
1273 TEST_ASSERT_STRING("", fields[4], TRUE);
1274 TEST_ASSERT(fields[5] == NULL);
73c04bcf
A
1275 TEST_ASSERT(fields[8] == NULL);
1276 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
4388f060 1277 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
73c04bcf
A
1278 TEST_ASSERT(spaceNeeded == requiredCapacity);
1279 }
1280 }
374ca955
A
1281
1282 uregex_close(re);
1283 }
1284
46f4442e
A
1285 /*
1286 * set/getTimeLimit
1287 */
1288 TEST_SETUP("abc$", "abcdef", 0);
1289 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1290 uregex_setTimeLimit(re, 1000, &status);
1291 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1292 TEST_ASSERT_SUCCESS(status);
1293 uregex_setTimeLimit(re, -1, &status);
1294 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1295 status = U_ZERO_ERROR;
1296 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1297 TEST_TEARDOWN;
1298
1299 /*
1300 * set/get Stack Limit
1301 */
1302 TEST_SETUP("abc$", "abcdef", 0);
1303 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1304 uregex_setStackLimit(re, 40000, &status);
1305 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1306 TEST_ASSERT_SUCCESS(status);
1307 uregex_setStackLimit(re, -1, &status);
1308 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1309 status = U_ZERO_ERROR;
1310 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1311 TEST_TEARDOWN;
1312
1313
1314 /*
1315 * Get/Set callback functions
1316 * This test is copied from intltest regex/Callbacks
1317 * The pattern and test data will run long enough to cause the callback
1318 * to be invoked. The nested '+' operators give exponential time
1319 * behavior with increasing string length.
1320 */
1321 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1322 callBackContext cbInfo = {4, 0, 0};
1323 const void *pContext = &cbInfo;
1324 URegexMatchCallback *returnedFn = &TestCallbackFn;
1325
1326 /* Getting the callback fn when it hasn't been set must return NULL */
1327 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1328 TEST_ASSERT_SUCCESS(status);
1329 TEST_ASSERT(returnedFn == NULL);
1330 TEST_ASSERT(pContext == NULL);
1331
1332 /* Set thecallback and do a match. */
1333 /* The callback function should record that it has been called. */
1334 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1335 TEST_ASSERT_SUCCESS(status);
1336 TEST_ASSERT(cbInfo.numCalls == 0);
1337 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1338 TEST_ASSERT_SUCCESS(status);
1339 TEST_ASSERT(cbInfo.numCalls > 0);
1340
1341 /* Getting the callback should return the values that were set above. */
1342 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1343 TEST_ASSERT(returnedFn == &TestCallbackFn);
1344 TEST_ASSERT(pContext == &cbInfo);
1345
1346 TEST_TEARDOWN;
374ca955
A
1347}
1348
46f4442e
A
1349
1350
73c04bcf
A
1351static void TestBug4315(void) {
1352 UErrorCode theICUError = U_ZERO_ERROR;
1353 URegularExpression *theRegEx;
1354 UChar *textBuff;
1355 const char *thePattern;
1356 UChar theString[100];
1357 UChar *destFields[24];
1358 int32_t neededLength1;
1359 int32_t neededLength2;
1360
1361 int32_t wordCount = 0;
1362 int32_t destFieldsSize = 24;
1363
1364 thePattern = "ck ";
1365 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1366
1367 /* open a regex */
1368 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1369 TEST_ASSERT_SUCCESS(theICUError);
1370
1371 /* set the input string */
1372 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1373 TEST_ASSERT_SUCCESS(theICUError);
1374
1375 /* split */
1376 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1377 * error occurs! */
1378 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1379 destFieldsSize, &theICUError);
1380
1381 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1382 TEST_ASSERT(wordCount==3);
1383
1384 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1385 {
1386 theICUError = U_ZERO_ERROR;
1387 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1388 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1389 destFields, destFieldsSize, &theICUError);
1390 TEST_ASSERT(wordCount==3);
1391 TEST_ASSERT_SUCCESS(theICUError);
1392 TEST_ASSERT(neededLength1 == neededLength2);
1393 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1394 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1395 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1396 TEST_ASSERT(destFields[3] == NULL);
1397 free(textBuff);
1398 }
1399 uregex_close(theRegEx);
1400}
1401
729e4ab9
A
1402/* Based on TestRegexCAPI() */
1403static void TestUTextAPI(void) {
1404 UErrorCode status = U_ZERO_ERROR;
1405 URegularExpression *re;
1406 UText patternText = UTEXT_INITIALIZER;
1407 UChar pat[200];
1408 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1409
1410 /* Mimimalist open/close */
1411 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1412 re = uregex_openUText(&patternText, 0, 0, &status);
1413 if (U_FAILURE(status)) {
1414 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1415 utext_close(&patternText);
1416 return;
1417 }
1418 uregex_close(re);
1419
1420 /* Open with all flag values set */
1421 status = U_ZERO_ERROR;
1422 re = uregex_openUText(&patternText,
1423 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1424 0, &status);
1425 TEST_ASSERT_SUCCESS(status);
1426 uregex_close(re);
1427
1428 /* Open with an invalid flag */
1429 status = U_ZERO_ERROR;
1430 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1431 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1432 uregex_close(re);
1433
1434 /* open with an invalid parameter */
1435 status = U_ZERO_ERROR;
1436 re = uregex_openUText(NULL,
1437 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1438 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1439
1440 /*
1441 * clone
1442 */
1443 {
1444 URegularExpression *clone1;
1445 URegularExpression *clone2;
1446 URegularExpression *clone3;
1447 UChar testString1[30];
1448 UChar testString2[30];
1449 UBool result;
1450
1451
1452 status = U_ZERO_ERROR;
1453 re = uregex_openUText(&patternText, 0, 0, &status);
1454 TEST_ASSERT_SUCCESS(status);
1455 clone1 = uregex_clone(re, &status);
1456 TEST_ASSERT_SUCCESS(status);
1457 TEST_ASSERT(clone1 != NULL);
1458
1459 status = U_ZERO_ERROR;
1460 clone2 = uregex_clone(re, &status);
1461 TEST_ASSERT_SUCCESS(status);
1462 TEST_ASSERT(clone2 != NULL);
1463 uregex_close(re);
1464
1465 status = U_ZERO_ERROR;
1466 clone3 = uregex_clone(clone2, &status);
1467 TEST_ASSERT_SUCCESS(status);
1468 TEST_ASSERT(clone3 != NULL);
1469
b331163b
A
1470 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1471 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
729e4ab9
A
1472
1473 status = U_ZERO_ERROR;
1474 uregex_setText(clone1, testString1, -1, &status);
1475 TEST_ASSERT_SUCCESS(status);
1476 result = uregex_lookingAt(clone1, 0, &status);
1477 TEST_ASSERT_SUCCESS(status);
1478 TEST_ASSERT(result==TRUE);
1479
1480 status = U_ZERO_ERROR;
1481 uregex_setText(clone2, testString2, -1, &status);
1482 TEST_ASSERT_SUCCESS(status);
1483 result = uregex_lookingAt(clone2, 0, &status);
1484 TEST_ASSERT_SUCCESS(status);
1485 TEST_ASSERT(result==FALSE);
1486 result = uregex_find(clone2, 0, &status);
1487 TEST_ASSERT_SUCCESS(status);
1488 TEST_ASSERT(result==TRUE);
1489
1490 uregex_close(clone1);
1491 uregex_close(clone2);
1492 uregex_close(clone3);
1493
1494 }
1495
1496 /*
1497 * pattern() and patternText()
1498 */
1499 {
1500 const UChar *resultPat;
1501 int32_t resultLen;
1502 UText *resultText;
1503 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1504 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
b331163b 1505 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
729e4ab9
A
1506 status = U_ZERO_ERROR;
1507
1508 utext_openUTF8(&patternText, str_hello, -1, &status);
1509 re = uregex_open(pat, -1, 0, NULL, &status);
1510 resultPat = uregex_pattern(re, &resultLen, &status);
1511 TEST_ASSERT_SUCCESS(status);
1512
1513 /* The TEST_ASSERT_SUCCESS above should change too... */
1514 if (U_SUCCESS(status)) {
1515 TEST_ASSERT(resultLen == -1);
1516 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1517 }
1518
1519 resultText = uregex_patternUText(re, &status);
1520 TEST_ASSERT_SUCCESS(status);
1521 TEST_ASSERT_UTEXT(str_hello, resultText);
1522
1523 uregex_close(re);
1524
1525 status = U_ZERO_ERROR;
1526 re = uregex_open(pat, 3, 0, NULL, &status);
1527 resultPat = uregex_pattern(re, &resultLen, &status);
1528 TEST_ASSERT_SUCCESS(status);
1529
1530 /* The TEST_ASSERT_SUCCESS above should change too... */
1531 if (U_SUCCESS(status)) {
1532 TEST_ASSERT(resultLen == 3);
1533 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1534 TEST_ASSERT(u_strlen(resultPat) == 3);
1535 }
1536
1537 resultText = uregex_patternUText(re, &status);
1538 TEST_ASSERT_SUCCESS(status);
1539 TEST_ASSERT_UTEXT(str_hel, resultText);
1540
1541 uregex_close(re);
1542 }
1543
1544 /*
1545 * setUText() and lookingAt()
1546 */
1547 {
1548 UText text1 = UTEXT_INITIALIZER;
1549 UText text2 = UTEXT_INITIALIZER;
1550 UBool result;
1551 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1552 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1553 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1554 status = U_ZERO_ERROR;
1555 utext_openUTF8(&text1, str_abcccd, -1, &status);
1556 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1557
1558 utext_openUTF8(&patternText, str_abcd, -1, &status);
1559 re = uregex_openUText(&patternText, 0, NULL, &status);
1560 TEST_ASSERT_SUCCESS(status);
1561
1562 /* Operation before doing a setText should fail... */
1563 status = U_ZERO_ERROR;
1564 uregex_lookingAt(re, 0, &status);
1565 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1566
1567 status = U_ZERO_ERROR;
1568 uregex_setUText(re, &text1, &status);
1569 result = uregex_lookingAt(re, 0, &status);
1570 TEST_ASSERT(result == TRUE);
1571 TEST_ASSERT_SUCCESS(status);
1572
1573 status = U_ZERO_ERROR;
1574 uregex_setUText(re, &text2, &status);
1575 result = uregex_lookingAt(re, 0, &status);
1576 TEST_ASSERT(result == FALSE);
1577 TEST_ASSERT_SUCCESS(status);
1578
1579 status = U_ZERO_ERROR;
1580 uregex_setUText(re, &text1, &status);
1581 result = uregex_lookingAt(re, 0, &status);
1582 TEST_ASSERT(result == TRUE);
1583 TEST_ASSERT_SUCCESS(status);
1584
1585 uregex_close(re);
1586 utext_close(&text1);
1587 utext_close(&text2);
1588 }
1589
1590
1591 /*
1592 * getText() and getUText()
1593 */
1594 {
1595 UText text1 = UTEXT_INITIALIZER;
1596 UText text2 = UTEXT_INITIALIZER;
1597 UChar text2Chars[20];
1598 UText *resultText;
1599 const UChar *result;
1600 int32_t textLength;
1601 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1602 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1603 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1604
1605
1606 status = U_ZERO_ERROR;
1607 utext_openUTF8(&text1, str_abcccd, -1, &status);
b331163b 1608 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
729e4ab9
A
1609 utext_openUChars(&text2, text2Chars, -1, &status);
1610
1611 utext_openUTF8(&patternText, str_abcd, -1, &status);
1612 re = uregex_openUText(&patternText, 0, NULL, &status);
1613
1614 /* First set a UText */
1615 uregex_setUText(re, &text1, &status);
1616 resultText = uregex_getUText(re, NULL, &status);
1617 TEST_ASSERT_SUCCESS(status);
1618 TEST_ASSERT(resultText != &text1);
1619 utext_setNativeIndex(resultText, 0);
1620 utext_setNativeIndex(&text1, 0);
4388f060 1621 TEST_ASSERT(testUTextEqual(resultText, &text1));
729e4ab9
A
1622 utext_close(resultText);
1623
1624 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
57a6839d 1625 (void)result; /* Suppress set but not used warning. */
729e4ab9
A
1626 TEST_ASSERT(textLength == -1 || textLength == 6);
1627 resultText = uregex_getUText(re, NULL, &status);
1628 TEST_ASSERT_SUCCESS(status);
1629 TEST_ASSERT(resultText != &text1);
1630 utext_setNativeIndex(resultText, 0);
1631 utext_setNativeIndex(&text1, 0);
4388f060 1632 TEST_ASSERT(testUTextEqual(resultText, &text1));
729e4ab9
A
1633 utext_close(resultText);
1634
1635 /* Then set a UChar * */
1636 uregex_setText(re, text2Chars, 7, &status);
1637 resultText = uregex_getUText(re, NULL, &status);
1638 TEST_ASSERT_SUCCESS(status);
1639 utext_setNativeIndex(resultText, 0);
1640 utext_setNativeIndex(&text2, 0);
4388f060 1641 TEST_ASSERT(testUTextEqual(resultText, &text2));
729e4ab9
A
1642 utext_close(resultText);
1643 result = uregex_getText(re, &textLength, &status);
1644 TEST_ASSERT(textLength == 7);
1645
1646 uregex_close(re);
1647 utext_close(&text1);
1648 utext_close(&text2);
1649 }
1650
1651 /*
1652 * matches()
1653 */
1654 {
1655 UText text1 = UTEXT_INITIALIZER;
1656 UBool result;
1657 UText nullText = UTEXT_INITIALIZER;
1658 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1659 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1660
1661 status = U_ZERO_ERROR;
1662 utext_openUTF8(&text1, str_abcccde, -1, &status);
1663 utext_openUTF8(&patternText, str_abcd, -1, &status);
1664 re = uregex_openUText(&patternText, 0, NULL, &status);
1665
1666 uregex_setUText(re, &text1, &status);
1667 result = uregex_matches(re, 0, &status);
1668 TEST_ASSERT(result == FALSE);
1669 TEST_ASSERT_SUCCESS(status);
1670 uregex_close(re);
1671
1672 status = U_ZERO_ERROR;
1673 re = uregex_openC(".?", 0, NULL, &status);
1674 uregex_setUText(re, &text1, &status);
1675 result = uregex_matches(re, 7, &status);
1676 TEST_ASSERT(result == TRUE);
1677 TEST_ASSERT_SUCCESS(status);
1678
1679 status = U_ZERO_ERROR;
1680 utext_openUTF8(&nullText, "", -1, &status);
1681 uregex_setUText(re, &nullText, &status);
1682 TEST_ASSERT_SUCCESS(status);
1683 result = uregex_matches(re, 0, &status);
1684 TEST_ASSERT(result == TRUE);
1685 TEST_ASSERT_SUCCESS(status);
1686
1687 uregex_close(re);
1688 utext_close(&text1);
1689 utext_close(&nullText);
1690 }
1691
1692
1693 /*
1694 * lookingAt() Used in setText test.
1695 */
1696
1697
1698 /*
1699 * find(), findNext, start, end, reset
1700 */
1701 {
1702 UChar text1[50];
1703 UBool result;
b331163b 1704 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
729e4ab9
A
1705 status = U_ZERO_ERROR;
1706 re = uregex_openC("rx", 0, NULL, &status);
1707
1708 uregex_setText(re, text1, -1, &status);
1709 result = uregex_find(re, 0, &status);
1710 TEST_ASSERT(result == TRUE);
1711 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1712 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1713 TEST_ASSERT_SUCCESS(status);
1714
1715 result = uregex_find(re, 9, &status);
1716 TEST_ASSERT(result == TRUE);
1717 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1718 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1719 TEST_ASSERT_SUCCESS(status);
1720
1721 result = uregex_find(re, 14, &status);
1722 TEST_ASSERT(result == FALSE);
1723 TEST_ASSERT_SUCCESS(status);
1724
1725 status = U_ZERO_ERROR;
1726 uregex_reset(re, 0, &status);
1727
1728 result = uregex_findNext(re, &status);
1729 TEST_ASSERT(result == TRUE);
1730 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1731 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1732 TEST_ASSERT_SUCCESS(status);
1733
1734 result = uregex_findNext(re, &status);
1735 TEST_ASSERT(result == TRUE);
1736 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1737 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1738 TEST_ASSERT_SUCCESS(status);
1739
1740 status = U_ZERO_ERROR;
1741 uregex_reset(re, 12, &status);
1742
1743 result = uregex_findNext(re, &status);
1744 TEST_ASSERT(result == TRUE);
1745 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1746 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1747 TEST_ASSERT_SUCCESS(status);
1748
1749 result = uregex_findNext(re, &status);
1750 TEST_ASSERT(result == FALSE);
1751 TEST_ASSERT_SUCCESS(status);
1752
1753 uregex_close(re);
1754 }
1755
1756 /*
b331163b 1757 * groupUText()
729e4ab9
A
1758 */
1759 {
1760 UChar text1[80];
1761 UText *actual;
1762 UBool result;
b331163b
A
1763 int64_t groupLen = 0;
1764 UChar groupBuf[20];
729e4ab9 1765
b331163b 1766 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
729e4ab9
A
1767
1768 status = U_ZERO_ERROR;
1769 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1770 TEST_ASSERT_SUCCESS(status);
1771
1772 uregex_setText(re, text1, -1, &status);
1773 result = uregex_find(re, 0, &status);
1774 TEST_ASSERT(result==TRUE);
1775
729e4ab9
A
1776 /* Capture Group 0 with shallow clone API. Should succeed. */
1777 status = U_ZERO_ERROR;
b331163b
A
1778 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1779 TEST_ASSERT_SUCCESS(status);
729e4ab9 1780
b331163b
A
1781 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1782 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1783 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
729e4ab9 1784
b331163b 1785 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
729e4ab9
A
1786 utext_close(actual);
1787
1788 /* Capture group #1. Should succeed. */
1789 status = U_ZERO_ERROR;
b331163b
A
1790
1791 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
729e4ab9 1792 TEST_ASSERT_SUCCESS(status);
b331163b
A
1793 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1794 /* (within the string text1) */
1795 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1796 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1797 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1798
729e4ab9
A
1799 utext_close(actual);
1800
1801 /* Capture group out of range. Error. */
1802 status = U_ZERO_ERROR;
b331163b 1803 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
729e4ab9 1804 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
729e4ab9
A
1805 utext_close(actual);
1806
1807 uregex_close(re);
729e4ab9
A
1808 }
1809
1810 /*
1811 * replaceFirst()
1812 */
1813 {
1814 UChar text1[80];
1815 UChar text2[80];
1816 UText replText = UTEXT_INITIALIZER;
1817 UText *result;
1818 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1819 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
b331163b
A
1820 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1821 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
729e4ab9
A
1822 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1823 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1824 status = U_ZERO_ERROR;
b331163b
A
1825 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1826 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
729e4ab9
A
1827 utext_openUTF8(&replText, str_1x, -1, &status);
1828
1829 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1830 TEST_ASSERT_SUCCESS(status);
1831
1832 /* Normal case, with match */
1833 uregex_setText(re, text1, -1, &status);
1834 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1835 TEST_ASSERT_SUCCESS(status);
1836 TEST_ASSERT_UTEXT(str_Replxxx, result);
1837 utext_close(result);
1838
1839 /* No match. Text should copy to output with no changes. */
1840 uregex_setText(re, text2, -1, &status);
1841 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1842 TEST_ASSERT_SUCCESS(status);
1843 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1844 utext_close(result);
1845
1846 /* Unicode escapes */
1847 uregex_setText(re, text1, -1, &status);
1848 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1849 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1850 TEST_ASSERT_SUCCESS(status);
1851 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1852 utext_close(result);
1853
1854 uregex_close(re);
1855 utext_close(&replText);
1856 }
1857
1858
1859 /*
1860 * replaceAll()
1861 */
1862 {
1863 UChar text1[80];
1864 UChar text2[80];
1865 UText replText = UTEXT_INITIALIZER;
1866 UText *result;
1867 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1868 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1869 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1870 status = U_ZERO_ERROR;
b331163b
A
1871 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1872 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
729e4ab9
A
1873 utext_openUTF8(&replText, str_1, -1, &status);
1874
1875 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1876 TEST_ASSERT_SUCCESS(status);
1877
1878 /* Normal case, with match */
1879 uregex_setText(re, text1, -1, &status);
1880 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1881 TEST_ASSERT_SUCCESS(status);
1882 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1883 utext_close(result);
1884
1885 /* No match. Text should copy to output with no changes. */
1886 uregex_setText(re, text2, -1, &status);
1887 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1888 TEST_ASSERT_SUCCESS(status);
1889 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1890 utext_close(result);
1891
1892 uregex_close(re);
1893 utext_close(&replText);
1894 }
1895
1896
1897 /*
1898 * appendReplacement()
1899 */
1900 {
1901 UChar text[100];
1902 UChar repl[100];
1903 UChar buf[100];
1904 UChar *bufPtr;
1905 int32_t bufCap;
1906
1907 status = U_ZERO_ERROR;
1908 re = uregex_openC(".*", 0, 0, &status);
1909 TEST_ASSERT_SUCCESS(status);
1910
b331163b
A
1911 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1912 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
729e4ab9
A
1913 uregex_setText(re, text, -1, &status);
1914
1915 /* match covers whole target string */
1916 uregex_find(re, 0, &status);
1917 TEST_ASSERT_SUCCESS(status);
1918 bufPtr = buf;
b331163b 1919 bufCap = UPRV_LENGTHOF(buf);
729e4ab9
A
1920 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1921 TEST_ASSERT_SUCCESS(status);
1922 TEST_ASSERT_STRING("some other", buf, TRUE);
1923
1924 /* Match has \u \U escapes */
1925 uregex_find(re, 0, &status);
1926 TEST_ASSERT_SUCCESS(status);
1927 bufPtr = buf;
b331163b
A
1928 bufCap = UPRV_LENGTHOF(buf);
1929 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
729e4ab9
A
1930 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1931 TEST_ASSERT_SUCCESS(status);
1932 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1933
1934 uregex_close(re);
1935 }
1936
1937
1938 /*
1939 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1940 */
1941
1942 /*
1943 * splitUText()
1944 */
1945 {
1946 UChar textToSplit[80];
1947 UChar text2[80];
1948 UText *fields[10];
1949 int32_t numFields;
1950 int32_t i;
1951
b331163b
A
1952 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1953 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
729e4ab9
A
1954
1955 status = U_ZERO_ERROR;
1956 re = uregex_openC(":", 0, NULL, &status);
1957
1958
1959 /* Simple split */
1960
1961 uregex_setText(re, textToSplit, -1, &status);
1962 TEST_ASSERT_SUCCESS(status);
1963
1964 /* The TEST_ASSERT_SUCCESS call above should change too... */
1965 if (U_SUCCESS(status)) {
1966 memset(fields, 0, sizeof(fields));
1967 numFields = uregex_splitUText(re, fields, 10, &status);
1968 TEST_ASSERT_SUCCESS(status);
1969
1970 /* The TEST_ASSERT_SUCCESS call above should change too... */
1971 if(U_SUCCESS(status)) {
1972 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1973 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1974 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1975 TEST_ASSERT(numFields == 3);
1976 TEST_ASSERT_UTEXT(str_first, fields[0]);
1977 TEST_ASSERT_UTEXT(str_second, fields[1]);
1978 TEST_ASSERT_UTEXT(str_third, fields[2]);
1979 TEST_ASSERT(fields[3] == NULL);
1980 }
1981 for(i = 0; i < numFields; i++) {
1982 utext_close(fields[i]);
1983 }
1984 }
1985
1986 uregex_close(re);
1987
1988
1989 /* Split with too few output strings available */
1990 status = U_ZERO_ERROR;
1991 re = uregex_openC(":", 0, NULL, &status);
1992 uregex_setText(re, textToSplit, -1, &status);
1993 TEST_ASSERT_SUCCESS(status);
1994
1995 /* The TEST_ASSERT_SUCCESS call above should change too... */
1996 if(U_SUCCESS(status)) {
1997 fields[0] = NULL;
1998 fields[1] = NULL;
1999 fields[2] = &patternText;
2000 numFields = uregex_splitUText(re, fields, 2, &status);
2001 TEST_ASSERT_SUCCESS(status);
2002
2003 /* The TEST_ASSERT_SUCCESS call above should change too... */
2004 if(U_SUCCESS(status)) {
2005 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2006 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2007 TEST_ASSERT(numFields == 2);
2008 TEST_ASSERT_UTEXT(str_first, fields[0]);
2009 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2010 TEST_ASSERT(fields[2] == &patternText);
2011 }
2012 for(i = 0; i < numFields; i++) {
2013 utext_close(fields[i]);
2014 }
2015 }
2016
2017 uregex_close(re);
2018 }
2019
2020 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2021 * comes out as additional fields. */
2022 {
2023 UChar textToSplit[80];
2024 UText *fields[10];
2025 int32_t numFields;
2026 int32_t i;
2027
b331163b 2028 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
729e4ab9
A
2029
2030 status = U_ZERO_ERROR;
2031 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2032
2033 uregex_setText(re, textToSplit, -1, &status);
2034 TEST_ASSERT_SUCCESS(status);
2035
2036 /* The TEST_ASSERT_SUCCESS call above should change too... */
2037 if(U_SUCCESS(status)) {
2038 memset(fields, 0, sizeof(fields));
2039 numFields = uregex_splitUText(re, fields, 10, &status);
2040 TEST_ASSERT_SUCCESS(status);
2041
2042 /* The TEST_ASSERT_SUCCESS call above should change too... */
2043 if(U_SUCCESS(status)) {
2044 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2045 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2046 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2047 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2048 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2049
2050 TEST_ASSERT(numFields == 5);
2051 TEST_ASSERT_UTEXT(str_first, fields[0]);
2052 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2053 TEST_ASSERT_UTEXT(str_second, fields[2]);
2054 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2055 TEST_ASSERT_UTEXT(str_third, fields[4]);
2056 TEST_ASSERT(fields[5] == NULL);
2057 }
2058 for(i = 0; i < numFields; i++) {
2059 utext_close(fields[i]);
2060 }
2061 }
2062
2063 /* Split with too few output strings available (2) */
2064 status = U_ZERO_ERROR;
2065 fields[0] = NULL;
2066 fields[1] = NULL;
2067 fields[2] = &patternText;
2068 numFields = uregex_splitUText(re, fields, 2, &status);
2069 TEST_ASSERT_SUCCESS(status);
2070
2071 /* The TEST_ASSERT_SUCCESS call above should change too... */
2072 if(U_SUCCESS(status)) {
2073 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2074 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2075 TEST_ASSERT(numFields == 2);
2076 TEST_ASSERT_UTEXT(str_first, fields[0]);
2077 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2078 TEST_ASSERT(fields[2] == &patternText);
2079 }
2080 for(i = 0; i < numFields; i++) {
2081 utext_close(fields[i]);
2082 }
2083
2084
2085 /* Split with too few output strings available (3) */
2086 status = U_ZERO_ERROR;
2087 fields[0] = NULL;
2088 fields[1] = NULL;
2089 fields[2] = NULL;
2090 fields[3] = &patternText;
2091 numFields = uregex_splitUText(re, fields, 3, &status);
2092 TEST_ASSERT_SUCCESS(status);
2093
2094 /* The TEST_ASSERT_SUCCESS call above should change too... */
2095 if(U_SUCCESS(status)) {
2096 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2097 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2098 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2099 TEST_ASSERT(numFields == 3);
2100 TEST_ASSERT_UTEXT(str_first, fields[0]);
2101 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2102 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2103 TEST_ASSERT(fields[3] == &patternText);
2104 }
2105 for(i = 0; i < numFields; i++) {
2106 utext_close(fields[i]);
2107 }
2108
2109 /* Split with just enough output strings available (5) */
2110 status = U_ZERO_ERROR;
2111 fields[0] = NULL;
2112 fields[1] = NULL;
2113 fields[2] = NULL;
2114 fields[3] = NULL;
2115 fields[4] = NULL;
2116 fields[5] = &patternText;
2117 numFields = uregex_splitUText(re, fields, 5, &status);
2118 TEST_ASSERT_SUCCESS(status);
2119
2120 /* The TEST_ASSERT_SUCCESS call above should change too... */
2121 if(U_SUCCESS(status)) {
2122 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2123 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2124 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2125 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2126 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2127
2128 TEST_ASSERT(numFields == 5);
2129 TEST_ASSERT_UTEXT(str_first, fields[0]);
2130 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2131 TEST_ASSERT_UTEXT(str_second, fields[2]);
2132 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2133 TEST_ASSERT_UTEXT(str_third, fields[4]);
2134 TEST_ASSERT(fields[5] == &patternText);
2135 }
2136 for(i = 0; i < numFields; i++) {
2137 utext_close(fields[i]);
2138 }
2139
2140 /* Split, end of text is a field delimiter. */
2141 status = U_ZERO_ERROR;
2142 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2143 TEST_ASSERT_SUCCESS(status);
2144
2145 /* The TEST_ASSERT_SUCCESS call above should change too... */
2146 if(U_SUCCESS(status)) {
2147 memset(fields, 0, sizeof(fields));
2148 fields[9] = &patternText;
2149 numFields = uregex_splitUText(re, fields, 9, &status);
2150 TEST_ASSERT_SUCCESS(status);
2151
2152 /* The TEST_ASSERT_SUCCESS call above should change too... */
2153 if(U_SUCCESS(status)) {
2154 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2155 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2156 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2157 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
4388f060 2158 const char str_empty[] = { 0x00 };
729e4ab9 2159
4388f060 2160 TEST_ASSERT(numFields == 5);
729e4ab9
A
2161 TEST_ASSERT_UTEXT(str_first, fields[0]);
2162 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2163 TEST_ASSERT_UTEXT(str_second, fields[2]);
2164 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
4388f060
A
2165 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2166 TEST_ASSERT(fields[5] == NULL);
729e4ab9
A
2167 TEST_ASSERT(fields[8] == NULL);
2168 TEST_ASSERT(fields[9] == &patternText);
2169 }
2170 for(i = 0; i < numFields; i++) {
2171 utext_close(fields[i]);
2172 }
2173 }
2174
2175 uregex_close(re);
2176 }
2177 utext_close(&patternText);
2178}
2179
4388f060
A
2180
2181static void TestRefreshInput(void) {
2182 /*
2183 * RefreshInput changes out the input of a URegularExpression without
2184 * changing anything else in the match state. Used with Java JNI,
2185 * when Java moves the underlying string storage. This test
2186 * runs a find() loop, moving the text after the first match.
2187 * The right number of matches should still be found.
2188 */
2189 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2190 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2191 UErrorCode status = U_ZERO_ERROR;
2192 URegularExpression *re;
2193 UText ut1 = UTEXT_INITIALIZER;
2194 UText ut2 = UTEXT_INITIALIZER;
2195
2196 re = uregex_openC("[ABC]", 0, 0, &status);
2197 TEST_ASSERT_SUCCESS(status);
2198
2199 utext_openUChars(&ut1, testStr, -1, &status);
2200 TEST_ASSERT_SUCCESS(status);
2201 uregex_setUText(re, &ut1, &status);
2202 TEST_ASSERT_SUCCESS(status);
2203
2204 /* Find the first match "A" in the original string */
2205 TEST_ASSERT(uregex_findNext(re, &status));
2206 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2207
2208 /* Move the string, kill the original string. */
2209 u_strcpy(movedStr, testStr);
2210 u_memset(testStr, 0, u_strlen(testStr));
2211 utext_openUChars(&ut2, movedStr, -1, &status);
2212 TEST_ASSERT_SUCCESS(status);
2213 uregex_refreshUText(re, &ut2, &status);
2214 TEST_ASSERT_SUCCESS(status);
2215
2216 /* Find the following two matches, now working in the moved string. */
2217 TEST_ASSERT(uregex_findNext(re, &status));
2218 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2219 TEST_ASSERT(uregex_findNext(re, &status));
2220 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2221 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2222
2223 uregex_close(re);
2224}
2225
2226
2227static void TestBug8421(void) {
2228 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2229 * was failing.
2230 */
2231 URegularExpression *re;
2232 UErrorCode status = U_ZERO_ERROR;
2233 int32_t limit = -1;
2234
2235 re = uregex_openC("abc", 0, 0, &status);
2236 TEST_ASSERT_SUCCESS(status);
2237
2238 limit = uregex_getTimeLimit(re, &status);
2239 TEST_ASSERT_SUCCESS(status);
2240 TEST_ASSERT(limit == 0);
2241
2242 uregex_setTimeLimit(re, 100, &status);
2243 TEST_ASSERT_SUCCESS(status);
2244 limit = uregex_getTimeLimit(re, &status);
2245 TEST_ASSERT_SUCCESS(status);
2246 TEST_ASSERT(limit == 100);
2247
2248 uregex_close(re);
2249}
2250
b331163b
A
2251static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2252 return FALSE;
2253}
2254
2255static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2256 return FALSE;
2257}
2258
2259static void TestBug10815() {
2260 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2261 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2262 */
2263 URegularExpression *re;
2264 UErrorCode status = U_ZERO_ERROR;
2265 UChar text[100];
2266
2267
2268 // findNext() with a find progress callback function.
2269
2270 re = uregex_openC(".z", 0, 0, &status);
2271 TEST_ASSERT_SUCCESS(status);
2272
2273 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2274 uregex_setText(re, text, -1, &status);
2275 TEST_ASSERT_SUCCESS(status);
2276
2277 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2278 TEST_ASSERT_SUCCESS(status);
2279
2280 uregex_findNext(re, &status);
2281 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2282
2283 uregex_close(re);
2284
2285 // findNext() with a match progress callback function.
2286
2287 status = U_ZERO_ERROR;
2288 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2289 TEST_ASSERT_SUCCESS(status);
2290
2291 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2292 // it will appear to be stuck in a (near) infinite loop.
2293 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2294 uregex_setText(re, text, -1, &status);
2295 TEST_ASSERT_SUCCESS(status);
2296
2297 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2298 TEST_ASSERT_SUCCESS(status);
2299
2300 uregex_findNext(re, &status);
2301 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2302
2303 uregex_close(re);
2304}
2305
4388f060 2306
374ca955 2307#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */