]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/reapits.c
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / reapits.c
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
374ca955
A
3/********************************************************************
4 * COPYRIGHT:
b331163b 5 * Copyright (c) 2004-2015, International Business Machines Corporation and
374ca955
A
6 * others. All Rights Reserved.
7 ********************************************************************/
8/********************************************************************************
9*
10* File reapits.c
11*
12*********************************************************************************/
13/*C API TEST FOR Regular Expressions */
14/**
15* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16* try to test the full functionality. It just calls each function and verifies that it
17* works on a basic level.
18*
19* More complete testing of regular expression functionality is done with the C++ tests.
20**/
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26#include <stdlib.h>
27#include <string.h>
28#include "unicode/uloc.h"
29#include "unicode/uregex.h"
30#include "unicode/ustring.h"
729e4ab9 31#include "unicode/utext.h"
374ca955 32#include "cintltst.h"
b331163b 33#include "cmemory.h"
374ca955
A
34
35#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
b331163b 36log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
374ca955
A
37
38#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
b331163b 39log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
374ca955 40
46f4442e
A
41/*
42 * TEST_SETUP and TEST_TEARDOWN
43 * macros to handle the boilerplate around setting up regex test cases.
44 * parameteres to setup:
45 * pattern: The regex pattern, a (char *) null terminated C string.
46 * testString: The string data, also a (char *) C string.
47 * flags: Regex flags to set when compiling the pattern
48 *
49 * Put arbitrary test code between SETUP and TEARDOWN.
50 * 're" is the compiled, ready-to-go regular expression.
51 */
52#define TEST_SETUP(pattern, testString, flags) { \
53 UChar *srcString = NULL; \
54 status = U_ZERO_ERROR; \
55 re = uregex_openC(pattern, flags, NULL, &status); \
56 TEST_ASSERT_SUCCESS(status); \
57 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
58 u_uastrncpy(srcString, testString, strlen(testString)+1); \
59 uregex_setText(re, srcString, -1, &status); \
60 TEST_ASSERT_SUCCESS(status); \
61 if (U_SUCCESS(status)) {
62
63#define TEST_TEARDOWN \
64 } \
65 TEST_ASSERT_SUCCESS(status); \
66 uregex_close(re); \
67 free(srcString); \
68 }
69
70
729e4ab9
A
71/**
72 * @param expected utf-8 array of bytes to be expected
73 */
46f4442e
A
74static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
75 char buf_inside_macro[120];
76 int32_t len = (int32_t)strlen(expected);
77 UBool success;
78 if (nulTerm) {
79 u_austrncpy(buf_inside_macro, (actual), len+1);
80 buf_inside_macro[len+2] = 0;
81 success = (strcmp((expected), buf_inside_macro) == 0);
82 } else {
83 u_austrncpy(buf_inside_macro, (actual), len);
84 buf_inside_macro[len+1] = 0;
85 success = (strncmp((expected), buf_inside_macro, len) == 0);
86 }
87 if (success == FALSE) {
88 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
89 file, line, (expected), buf_inside_macro);
90 }
374ca955
A
91}
92
46f4442e 93#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
374ca955
A
94
95
4388f060
A
96static UBool equals_utf8_utext(const char *utf8, UText *utext) {
97 int32_t u8i = 0;
98 UChar32 u8c = 0;
99 UChar32 utc = 0;
100 UBool stringsEqual = TRUE;
101 utext_setNativeIndex(utext, 0);
102 for (;;) {
103 U8_NEXT_UNSAFE(utf8, u8i, u8c);
104 utc = utext_next32(utext);
105 if (u8c == 0 && utc == U_SENTINEL) {
106 break;
107 }
108 if (u8c != utc || u8c == 0) {
109 stringsEqual = FALSE;
110 break;
111 }
112 }
113 return stringsEqual;
114}
115
116
729e4ab9 117static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
729e4ab9 118 utext_setNativeIndex(actual, 0);
4388f060 119 if (!equals_utf8_utext(expected, actual)) {
729e4ab9
A
120 UChar32 c;
121 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
122 c = utext_next32From(actual, 0);
123 while (c != U_SENTINEL) {
124 if (0x20<c && c <0x7e) {
125 log_err("%c", c);
126 } else {
127 log_err("%#x", c);
128 }
129 c = UTEXT_NEXT32(actual);
130 }
131 log_err("\"\n");
132 }
729e4ab9
A
133}
134
4388f060
A
135/*
136 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
137 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
138 */
729e4ab9 139#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
374ca955 140
4388f060
A
141static UBool testUTextEqual(UText *uta, UText *utb) {
142 UChar32 ca = 0;
143 UChar32 cb = 0;
144 utext_setNativeIndex(uta, 0);
145 utext_setNativeIndex(utb, 0);
146 do {
147 ca = utext_next32(uta);
148 cb = utext_next32(utb);
149 if (ca != cb) {
150 break;
151 }
152 } while (ca != U_SENTINEL);
153 return ca == cb;
154}
155
156
374ca955
A
157
158
159static void TestRegexCAPI(void);
73c04bcf 160static void TestBug4315(void);
729e4ab9 161static void TestUTextAPI(void);
4388f060
A
162static void TestRefreshInput(void);
163static void TestBug8421(void);
b331163b 164static void TestBug10815(void);
f3c0d7a5 165static void TestMatchStartLineWithEmptyText(void);
374ca955
A
166
167void addURegexTest(TestNode** root);
168
169void addURegexTest(TestNode** root)
170{
171 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
73c04bcf 172 addTest(root, &TestBug4315, "regex/TestBug4315");
729e4ab9 173 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
4388f060
A
174 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
175 addTest(root, &TestBug8421, "regex/TestBug8421");
b331163b 176 addTest(root, &TestBug10815, "regex/TestBug10815");
f3c0d7a5 177 addTest(root, &TestMatchStartLineWithEmptyText, "regex/TestMatchStartLineWithEmptyText");
374ca955
A
178}
179
46f4442e
A
180/*
181 * Call back function and context struct used for testing
182 * regular expression user callbacks. This test is mostly the same as
183 * the corresponding C++ test in intltest.
184 */
185typedef struct callBackContext {
186 int32_t maxCalls;
187 int32_t numCalls;
188 int32_t lastSteps;
189} callBackContext;
190
191static UBool U_EXPORT2 U_CALLCONV
192TestCallbackFn(const void *context, int32_t steps) {
193 callBackContext *info = (callBackContext *)context;
194 if (info->lastSteps+1 != steps) {
195 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
196 }
197 info->lastSteps = steps;
198 info->numCalls++;
199 return (info->numCalls < info->maxCalls);
200}
374ca955 201
46f4442e
A
202/*
203 * Regular Expression C API Tests
204 */
374ca955
A
205static void TestRegexCAPI(void) {
206 UErrorCode status = U_ZERO_ERROR;
207 URegularExpression *re;
208 UChar pat[200];
209 UChar *minus1;
210
211 memset(&minus1, -1, sizeof(minus1));
212
213 /* Mimimalist open/close */
b331163b 214 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
374ca955 215 re = uregex_open(pat, -1, 0, 0, &status);
46f4442e 216 if (U_FAILURE(status)) {
729e4ab9 217 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
46f4442e
A
218 return;
219 }
374ca955
A
220 uregex_close(re);
221
222 /* Open with all flag values set */
223 status = U_ZERO_ERROR;
224 re = uregex_open(pat, -1,
4388f060 225 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
374ca955
A
226 0, &status);
227 TEST_ASSERT_SUCCESS(status);
228 uregex_close(re);
229
230 /* Open with an invalid flag */
231 status = U_ZERO_ERROR;
232 re = uregex_open(pat, -1, 0x40000000, 0, &status);
233 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
234 uregex_close(re);
235
729e4ab9
A
236 /* Open with an unimplemented flag */
237 status = U_ZERO_ERROR;
4388f060 238 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
729e4ab9
A
239 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
240 uregex_close(re);
241
73c04bcf
A
242 /* openC with an invalid parameter */
243 status = U_ZERO_ERROR;
244 re = uregex_openC(NULL,
245 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
246 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
247
248 /* openC with an invalid parameter */
249 status = U_USELESS_COLLATOR_ERROR;
250 re = uregex_openC(NULL,
251 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
252 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
374ca955
A
253
254 /* openC open from a C string */
255 {
256 const UChar *p;
257 int32_t len;
258 status = U_ZERO_ERROR;
259 re = uregex_openC("abc*", 0, 0, &status);
260 TEST_ASSERT_SUCCESS(status);
261 p = uregex_pattern(re, &len, &status);
262 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
263
264 /* The TEST_ASSERT_SUCCESS above should change too... */
265 if(U_SUCCESS(status)) {
b331163b 266 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
73c04bcf
A
267 TEST_ASSERT(u_strcmp(pat, p) == 0);
268 TEST_ASSERT(len==(int32_t)strlen("abc*"));
269 }
374ca955
A
270
271 uregex_close(re);
272
273 /* TODO: Open with ParseError parameter */
274 }
275
276 /*
277 * clone
278 */
279 {
280 URegularExpression *clone1;
281 URegularExpression *clone2;
282 URegularExpression *clone3;
283 UChar testString1[30];
284 UChar testString2[30];
285 UBool result;
286
287
288 status = U_ZERO_ERROR;
289 re = uregex_openC("abc*", 0, 0, &status);
290 TEST_ASSERT_SUCCESS(status);
291 clone1 = uregex_clone(re, &status);
292 TEST_ASSERT_SUCCESS(status);
293 TEST_ASSERT(clone1 != NULL);
294
295 status = U_ZERO_ERROR;
296 clone2 = uregex_clone(re, &status);
297 TEST_ASSERT_SUCCESS(status);
298 TEST_ASSERT(clone2 != NULL);
299 uregex_close(re);
300
301 status = U_ZERO_ERROR;
302 clone3 = uregex_clone(clone2, &status);
303 TEST_ASSERT_SUCCESS(status);
304 TEST_ASSERT(clone3 != NULL);
305
b331163b
A
306 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
307 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
374ca955
A
308
309 status = U_ZERO_ERROR;
310 uregex_setText(clone1, testString1, -1, &status);
311 TEST_ASSERT_SUCCESS(status);
312 result = uregex_lookingAt(clone1, 0, &status);
313 TEST_ASSERT_SUCCESS(status);
314 TEST_ASSERT(result==TRUE);
315
316 status = U_ZERO_ERROR;
317 uregex_setText(clone2, testString2, -1, &status);
318 TEST_ASSERT_SUCCESS(status);
319 result = uregex_lookingAt(clone2, 0, &status);
320 TEST_ASSERT_SUCCESS(status);
321 TEST_ASSERT(result==FALSE);
322 result = uregex_find(clone2, 0, &status);
323 TEST_ASSERT_SUCCESS(status);
324 TEST_ASSERT(result==TRUE);
325
326 uregex_close(clone1);
327 uregex_close(clone2);
328 uregex_close(clone3);
329
330 }
331
332 /*
333 * pattern()
334 */
335 {
336 const UChar *resultPat;
337 int32_t resultLen;
b331163b 338 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
374ca955
A
339 status = U_ZERO_ERROR;
340 re = uregex_open(pat, -1, 0, NULL, &status);
341 resultPat = uregex_pattern(re, &resultLen, &status);
342 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
343
344 /* The TEST_ASSERT_SUCCESS above should change too... */
345 if (U_SUCCESS(status)) {
346 TEST_ASSERT(resultLen == -1);
347 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
348 }
349
374ca955
A
350 uregex_close(re);
351
352 status = U_ZERO_ERROR;
353 re = uregex_open(pat, 3, 0, NULL, &status);
354 resultPat = uregex_pattern(re, &resultLen, &status);
355 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
356 TEST_ASSERT_SUCCESS(status);
357
358 /* The TEST_ASSERT_SUCCESS above should change too... */
359 if (U_SUCCESS(status)) {
360 TEST_ASSERT(resultLen == 3);
361 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
362 TEST_ASSERT(u_strlen(resultPat) == 3);
363 }
364
374ca955
A
365 uregex_close(re);
366 }
367
368 /*
369 * flags()
370 */
371 {
372 int32_t t;
373
374 status = U_ZERO_ERROR;
375 re = uregex_open(pat, -1, 0, NULL, &status);
376 t = uregex_flags(re, &status);
377 TEST_ASSERT_SUCCESS(status);
378 TEST_ASSERT(t == 0);
379 uregex_close(re);
380
381 status = U_ZERO_ERROR;
382 re = uregex_open(pat, -1, 0, NULL, &status);
383 t = uregex_flags(re, &status);
384 TEST_ASSERT_SUCCESS(status);
385 TEST_ASSERT(t == 0);
386 uregex_close(re);
387
388 status = U_ZERO_ERROR;
389 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
390 t = uregex_flags(re, &status);
391 TEST_ASSERT_SUCCESS(status);
392 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
393 uregex_close(re);
394 }
395
396 /*
397 * setText() and lookingAt()
398 */
399 {
400 UChar text1[50];
401 UChar text2[50];
402 UBool result;
403
b331163b
A
404 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
405 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
374ca955 406 status = U_ZERO_ERROR;
b331163b 407 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
374ca955
A
408 re = uregex_open(pat, -1, 0, NULL, &status);
409 TEST_ASSERT_SUCCESS(status);
410
411 /* Operation before doing a setText should fail... */
412 status = U_ZERO_ERROR;
413 uregex_lookingAt(re, 0, &status);
414 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
415
416 status = U_ZERO_ERROR;
417 uregex_setText(re, text1, -1, &status);
418 result = uregex_lookingAt(re, 0, &status);
419 TEST_ASSERT(result == TRUE);
420 TEST_ASSERT_SUCCESS(status);
421
422 status = U_ZERO_ERROR;
423 uregex_setText(re, text2, -1, &status);
424 result = uregex_lookingAt(re, 0, &status);
425 TEST_ASSERT(result == FALSE);
426 TEST_ASSERT_SUCCESS(status);
427
428 status = U_ZERO_ERROR;
429 uregex_setText(re, text1, -1, &status);
430 result = uregex_lookingAt(re, 0, &status);
431 TEST_ASSERT(result == TRUE);
432 TEST_ASSERT_SUCCESS(status);
433
434 status = U_ZERO_ERROR;
435 uregex_setText(re, text1, 5, &status);
436 result = uregex_lookingAt(re, 0, &status);
437 TEST_ASSERT(result == FALSE);
438 TEST_ASSERT_SUCCESS(status);
439
440 status = U_ZERO_ERROR;
441 uregex_setText(re, text1, 6, &status);
442 result = uregex_lookingAt(re, 0, &status);
443 TEST_ASSERT(result == TRUE);
444 TEST_ASSERT_SUCCESS(status);
445
446 uregex_close(re);
447 }
448
449
450 /*
451 * getText()
452 */
453 {
454 UChar text1[50];
455 UChar text2[50];
456 const UChar *result;
457 int32_t textLength;
458
b331163b
A
459 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
460 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
374ca955 461 status = U_ZERO_ERROR;
b331163b 462 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
374ca955
A
463 re = uregex_open(pat, -1, 0, NULL, &status);
464
465 uregex_setText(re, text1, -1, &status);
466 result = uregex_getText(re, &textLength, &status);
467 TEST_ASSERT(result == text1);
468 TEST_ASSERT(textLength == -1);
469 TEST_ASSERT_SUCCESS(status);
470
471 status = U_ZERO_ERROR;
472 uregex_setText(re, text2, 7, &status);
473 result = uregex_getText(re, &textLength, &status);
474 TEST_ASSERT(result == text2);
475 TEST_ASSERT(textLength == 7);
476 TEST_ASSERT_SUCCESS(status);
477
478 status = U_ZERO_ERROR;
479 uregex_setText(re, text2, 4, &status);
480 result = uregex_getText(re, &textLength, &status);
481 TEST_ASSERT(result == text2);
482 TEST_ASSERT(textLength == 4);
483 TEST_ASSERT_SUCCESS(status);
484 uregex_close(re);
485 }
486
487 /*
488 * matches()
489 */
490 {
491 UChar text1[50];
492 UBool result;
493 int len;
494 UChar nullString[] = {0,0,0};
495
b331163b 496 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
374ca955 497 status = U_ZERO_ERROR;
b331163b 498 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
374ca955
A
499 re = uregex_open(pat, -1, 0, NULL, &status);
500
501 uregex_setText(re, text1, -1, &status);
502 result = uregex_matches(re, 0, &status);
503 TEST_ASSERT(result == FALSE);
504 TEST_ASSERT_SUCCESS(status);
505
506 status = U_ZERO_ERROR;
507 uregex_setText(re, text1, 6, &status);
508 result = uregex_matches(re, 0, &status);
509 TEST_ASSERT(result == TRUE);
510 TEST_ASSERT_SUCCESS(status);
511
512 status = U_ZERO_ERROR;
513 uregex_setText(re, text1, 6, &status);
514 result = uregex_matches(re, 1, &status);
515 TEST_ASSERT(result == FALSE);
516 TEST_ASSERT_SUCCESS(status);
517 uregex_close(re);
518
519 status = U_ZERO_ERROR;
520 re = uregex_openC(".?", 0, NULL, &status);
521 uregex_setText(re, text1, -1, &status);
522 len = u_strlen(text1);
523 result = uregex_matches(re, len, &status);
524 TEST_ASSERT(result == TRUE);
525 TEST_ASSERT_SUCCESS(status);
526
527 status = U_ZERO_ERROR;
528 uregex_setText(re, nullString, -1, &status);
529 TEST_ASSERT_SUCCESS(status);
530 result = uregex_matches(re, 0, &status);
531 TEST_ASSERT(result == TRUE);
532 TEST_ASSERT_SUCCESS(status);
533 uregex_close(re);
534 }
535
536
537 /*
538 * lookingAt() Used in setText test.
539 */
540
541
542 /*
543 * find(), findNext, start, end, reset
544 */
545 {
546 UChar text1[50];
547 UBool result;
b331163b 548 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
374ca955
A
549 status = U_ZERO_ERROR;
550 re = uregex_openC("rx", 0, NULL, &status);
551
552 uregex_setText(re, text1, -1, &status);
553 result = uregex_find(re, 0, &status);
554 TEST_ASSERT(result == TRUE);
555 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
556 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
557 TEST_ASSERT_SUCCESS(status);
558
559 result = uregex_find(re, 9, &status);
560 TEST_ASSERT(result == TRUE);
561 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
562 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
563 TEST_ASSERT_SUCCESS(status);
564
565 result = uregex_find(re, 14, &status);
566 TEST_ASSERT(result == FALSE);
567 TEST_ASSERT_SUCCESS(status);
568
569 status = U_ZERO_ERROR;
570 uregex_reset(re, 0, &status);
571
572 result = uregex_findNext(re, &status);
573 TEST_ASSERT(result == TRUE);
574 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
575 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
576 TEST_ASSERT_SUCCESS(status);
577
578 result = uregex_findNext(re, &status);
579 TEST_ASSERT(result == TRUE);
580 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
581 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
582 TEST_ASSERT_SUCCESS(status);
583
584 status = U_ZERO_ERROR;
585 uregex_reset(re, 12, &status);
586
587 result = uregex_findNext(re, &status);
588 TEST_ASSERT(result == TRUE);
589 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
590 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
591 TEST_ASSERT_SUCCESS(status);
592
593 result = uregex_findNext(re, &status);
594 TEST_ASSERT(result == FALSE);
595 TEST_ASSERT_SUCCESS(status);
596
597 uregex_close(re);
598 }
599
600 /*
601 * groupCount
602 */
603 {
604 int32_t result;
605
606 status = U_ZERO_ERROR;
607 re = uregex_openC("abc", 0, NULL, &status);
608 result = uregex_groupCount(re, &status);
609 TEST_ASSERT_SUCCESS(status);
610 TEST_ASSERT(result == 0);
611 uregex_close(re);
612
613 status = U_ZERO_ERROR;
614 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
615 result = uregex_groupCount(re, &status);
616 TEST_ASSERT_SUCCESS(status);
617 TEST_ASSERT(result == 3);
618 uregex_close(re);
619
620 }
621
622
623 /*
624 * group()
625 */
626 {
627 UChar text1[80];
628 UChar buf[80];
629 UBool result;
630 int32_t resultSz;
b331163b 631 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
374ca955
A
632
633 status = U_ZERO_ERROR;
634 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
635 TEST_ASSERT_SUCCESS(status);
636
637
638 uregex_setText(re, text1, -1, &status);
639 result = uregex_find(re, 0, &status);
640 TEST_ASSERT(result==TRUE);
641
642 /* Capture Group 0, the full match. Should succeed. */
643 status = U_ZERO_ERROR;
b331163b 644 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
645 TEST_ASSERT_SUCCESS(status);
646 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
647 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
648
649 /* Capture group #1. Should succeed. */
650 status = U_ZERO_ERROR;
b331163b 651 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
652 TEST_ASSERT_SUCCESS(status);
653 TEST_ASSERT_STRING(" interior ", buf, TRUE);
654 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
655
656 /* Capture group out of range. Error. */
657 status = U_ZERO_ERROR;
b331163b 658 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
659 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
660
661 /* NULL buffer, pure pre-flight */
662 status = U_ZERO_ERROR;
663 resultSz = uregex_group(re, 0, NULL, 0, &status);
664 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
665 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
666
667 /* Too small buffer, truncated string */
668 status = U_ZERO_ERROR;
669 memset(buf, -1, sizeof(buf));
670 resultSz = uregex_group(re, 0, buf, 5, &status);
671 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
672 TEST_ASSERT_STRING("abc i", buf, FALSE);
673 TEST_ASSERT(buf[5] == (UChar)0xffff);
674 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
675
676 /* Output string just fits buffer, no NUL term. */
677 status = U_ZERO_ERROR;
678 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
679 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
680 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
681 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
682 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
683
684 uregex_close(re);
685
686 }
46f4442e
A
687
688 /*
689 * Regions
690 */
691
692
693 /* SetRegion(), getRegion() do something */
694 TEST_SETUP(".*", "0123456789ABCDEF", 0)
695 UChar resultString[40];
696 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
697 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
698 uregex_setRegion(re, 3, 6, &status);
699 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
700 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
701 TEST_ASSERT(uregex_findNext(re, &status));
b331163b 702 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
46f4442e
A
703 TEST_ASSERT_STRING("345", resultString, TRUE);
704 TEST_TEARDOWN;
705
706 /* find(start=-1) uses regions */
707 TEST_SETUP(".*", "0123456789ABCDEF", 0);
708 uregex_setRegion(re, 4, 6, &status);
709 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
710 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
711 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
712 TEST_TEARDOWN;
713
714 /* find (start >=0) does not use regions */
715 TEST_SETUP(".*", "0123456789ABCDEF", 0);
716 uregex_setRegion(re, 4, 6, &status);
717 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
718 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
719 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
720 TEST_TEARDOWN;
721
722 /* findNext() obeys regions */
723 TEST_SETUP(".", "0123456789ABCDEF", 0);
724 uregex_setRegion(re, 4, 6, &status);
725 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
726 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
727 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
728 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
729 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
730 TEST_TEARDOWN;
731
732 /* matches(start=-1) uses regions */
733 /* Also, verify that non-greedy *? succeeds in finding the full match. */
734 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
735 uregex_setRegion(re, 4, 6, &status);
736 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
737 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
738 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
739 TEST_TEARDOWN;
740
741 /* matches (start >=0) does not use regions */
742 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
743 uregex_setRegion(re, 4, 6, &status);
744 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
745 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
746 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
747 TEST_TEARDOWN;
748
749 /* lookingAt(start=-1) uses regions */
750 /* Also, verify that non-greedy *? finds the first (shortest) match. */
751 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
752 uregex_setRegion(re, 4, 6, &status);
753 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
754 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
755 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
756 TEST_TEARDOWN;
757
758 /* lookingAt (start >=0) does not use regions */
759 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
760 uregex_setRegion(re, 4, 6, &status);
761 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
762 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
763 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
764 TEST_TEARDOWN;
765
766 /* hitEnd() */
767 TEST_SETUP("[a-f]*", "abcdefghij", 0);
768 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
769 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
770 TEST_TEARDOWN;
771
772 TEST_SETUP("[a-f]*", "abcdef", 0);
773 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
774 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
775 TEST_TEARDOWN;
776
777 /* requireEnd */
778 TEST_SETUP("abcd", "abcd", 0);
779 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
780 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
781 TEST_TEARDOWN;
782
783 TEST_SETUP("abcd$", "abcd", 0);
784 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
785 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
786 TEST_TEARDOWN;
787
788 /* anchoringBounds */
789 TEST_SETUP("abc$", "abcdef", 0);
790 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
791 uregex_useAnchoringBounds(re, FALSE, &status);
792 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
793
794 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
795 uregex_useAnchoringBounds(re, TRUE, &status);
796 uregex_setRegion(re, 0, 3, &status);
797 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
798 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
799 TEST_TEARDOWN;
800
801 /* Transparent Bounds */
802 TEST_SETUP("abc(?=def)", "abcdef", 0);
803 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
804 uregex_useTransparentBounds(re, TRUE, &status);
805 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
806
807 uregex_useTransparentBounds(re, FALSE, &status);
808 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
809 uregex_setRegion(re, 0, 3, &status);
810 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
811 uregex_useTransparentBounds(re, TRUE, &status);
812 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
813 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
814 TEST_TEARDOWN;
815
374ca955
A
816
817 /*
818 * replaceFirst()
819 */
820 {
821 UChar text1[80];
822 UChar text2[80];
823 UChar replText[80];
824 UChar buf[80];
825 int32_t resultSz;
b331163b
A
826 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
827 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
828 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
374ca955
A
829
830 status = U_ZERO_ERROR;
831 re = uregex_openC("x(.*?)x", 0, NULL, &status);
832 TEST_ASSERT_SUCCESS(status);
833
834 /* Normal case, with match */
835 uregex_setText(re, text1, -1, &status);
b331163b 836 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
837 TEST_ASSERT_SUCCESS(status);
838 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
839 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
840
841 /* No match. Text should copy to output with no changes. */
842 status = U_ZERO_ERROR;
843 uregex_setText(re, text2, -1, &status);
b331163b 844 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
845 TEST_ASSERT_SUCCESS(status);
846 TEST_ASSERT_STRING("No match here.", buf, TRUE);
847 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
848
849 /* Match, output just fills buffer, no termination warning. */
850 status = U_ZERO_ERROR;
851 uregex_setText(re, text1, -1, &status);
852 memset(buf, -1, sizeof(buf));
853 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
854 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
855 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
856 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
857 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
858
859 /* Do the replaceFirst again, without first resetting anything.
860 * Should give the same results.
861 */
862 status = U_ZERO_ERROR;
863 memset(buf, -1, sizeof(buf));
864 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
865 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
866 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
867 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
868 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
869
870 /* NULL buffer, zero buffer length */
871 status = U_ZERO_ERROR;
872 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
873 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
874 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
875
876 /* Buffer too small by one */
877 status = U_ZERO_ERROR;
878 memset(buf, -1, sizeof(buf));
879 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
880 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
881 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
882 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
883 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
884
885 uregex_close(re);
886 }
887
888
889 /*
890 * replaceAll()
891 */
892 {
729e4ab9
A
893 UChar text1[80]; /* "Replace xaax x1x x...x." */
894 UChar text2[80]; /* "No match Here" */
895 UChar replText[80]; /* "<$1>" */
896 UChar replText2[80]; /* "<<$1>>" */
897 const char * pattern = "x(.*?)x";
898 const char * expectedResult = "Replace <aa> <1> <...>.";
899 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
374ca955 900 UChar buf[80];
729e4ab9 901 int32_t resultSize;
374ca955 902 int32_t expectedResultSize;
729e4ab9 903 int32_t expectedResultSize2;
374ca955
A
904 int32_t i;
905
b331163b
A
906 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
907 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
908 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
909 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
729e4ab9
A
910 expectedResultSize = strlen(expectedResult);
911 expectedResultSize2 = strlen(expectedResult2);
374ca955
A
912
913 status = U_ZERO_ERROR;
729e4ab9 914 re = uregex_openC(pattern, 0, NULL, &status);
374ca955
A
915 TEST_ASSERT_SUCCESS(status);
916
917 /* Normal case, with match */
918 uregex_setText(re, text1, -1, &status);
b331163b 919 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
374ca955 920 TEST_ASSERT_SUCCESS(status);
729e4ab9
A
921 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
922 TEST_ASSERT(resultSize == expectedResultSize);
374ca955
A
923
924 /* No match. Text should copy to output with no changes. */
925 status = U_ZERO_ERROR;
926 uregex_setText(re, text2, -1, &status);
b331163b 927 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
374ca955
A
928 TEST_ASSERT_SUCCESS(status);
929 TEST_ASSERT_STRING("No match here.", buf, TRUE);
729e4ab9 930 TEST_ASSERT(resultSize == u_strlen(text2));
374ca955
A
931
932 /* Match, output just fills buffer, no termination warning. */
933 status = U_ZERO_ERROR;
934 uregex_setText(re, text1, -1, &status);
935 memset(buf, -1, sizeof(buf));
729e4ab9 936 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
374ca955 937 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
729e4ab9
A
938 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
939 TEST_ASSERT(resultSize == expectedResultSize);
940 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
374ca955
A
941
942 /* Do the replaceFirst again, without first resetting anything.
943 * Should give the same results.
944 */
945 status = U_ZERO_ERROR;
946 memset(buf, -1, sizeof(buf));
729e4ab9 947 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
374ca955
A
948 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
949 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
729e4ab9
A
950 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
951 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
374ca955
A
952
953 /* NULL buffer, zero buffer length */
954 status = U_ZERO_ERROR;
729e4ab9 955 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
374ca955 956 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
729e4ab9 957 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
374ca955
A
958
959 /* Buffer too small. Try every size, which will tickle edge cases
960 * in uregex_appendReplacement (used by replaceAll) */
961 for (i=0; i<expectedResultSize; i++) {
962 char expected[80];
963 status = U_ZERO_ERROR;
964 memset(buf, -1, sizeof(buf));
729e4ab9
A
965 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
966 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
967 strcpy(expected, expectedResult);
968 expected[i] = 0;
969 TEST_ASSERT_STRING(expected, buf, FALSE);
970 TEST_ASSERT(resultSize == expectedResultSize);
971 TEST_ASSERT(buf[i] == (UChar)0xffff);
972 }
973
974 /* Buffer too small. Same as previous test, except this time the replacement
975 * text is longer than the match capture group, making the length of the complete
976 * replacement longer than the original string.
977 */
978 for (i=0; i<expectedResultSize2; i++) {
979 char expected[80];
980 status = U_ZERO_ERROR;
981 memset(buf, -1, sizeof(buf));
982 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
374ca955 983 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
729e4ab9 984 strcpy(expected, expectedResult2);
374ca955
A
985 expected[i] = 0;
986 TEST_ASSERT_STRING(expected, buf, FALSE);
729e4ab9 987 TEST_ASSERT(resultSize == expectedResultSize2);
374ca955
A
988 TEST_ASSERT(buf[i] == (UChar)0xffff);
989 }
990
729e4ab9 991
374ca955
A
992 uregex_close(re);
993 }
994
995
996 /*
997 * appendReplacement()
998 */
999 {
1000 UChar text[100];
1001 UChar repl[100];
1002 UChar buf[100];
1003 UChar *bufPtr;
1004 int32_t bufCap;
1005
1006
1007 status = U_ZERO_ERROR;
1008 re = uregex_openC(".*", 0, 0, &status);
1009 TEST_ASSERT_SUCCESS(status);
1010
b331163b
A
1011 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1012 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
374ca955
A
1013 uregex_setText(re, text, -1, &status);
1014
1015 /* match covers whole target string */
1016 uregex_find(re, 0, &status);
1017 TEST_ASSERT_SUCCESS(status);
1018 bufPtr = buf;
b331163b 1019 bufCap = UPRV_LENGTHOF(buf);
374ca955
A
1020 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1021 TEST_ASSERT_SUCCESS(status);
1022 TEST_ASSERT_STRING("some other", buf, TRUE);
1023
1024 /* Match has \u \U escapes */
1025 uregex_find(re, 0, &status);
1026 TEST_ASSERT_SUCCESS(status);
1027 bufPtr = buf;
b331163b
A
1028 bufCap = UPRV_LENGTHOF(buf);
1029 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
374ca955
A
1030 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1031 TEST_ASSERT_SUCCESS(status);
1032 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1033
729e4ab9
A
1034 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1035 status = U_ZERO_ERROR;
1036 uregex_find(re, 0, &status);
1037 TEST_ASSERT_SUCCESS(status);
1038 bufPtr = buf;
1039 status = U_BUFFER_OVERFLOW_ERROR;
1040 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1041 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1042
374ca955
A
1043 uregex_close(re);
1044 }
1045
1046
1047 /*
1048 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1049 */
1050
1051 /*
1052 * split()
1053 */
1054 {
1055 UChar textToSplit[80];
1056 UChar text2[80];
1057 UChar buf[200];
1058 UChar *fields[10];
1059 int32_t numFields;
1060 int32_t requiredCapacity;
1061 int32_t spaceNeeded;
1062 int32_t sz;
1063
b331163b
A
1064 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1065 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
374ca955
A
1066
1067 status = U_ZERO_ERROR;
1068 re = uregex_openC(":", 0, NULL, &status);
1069
1070
1071 /* Simple split */
1072
1073 uregex_setText(re, textToSplit, -1, &status);
1074 TEST_ASSERT_SUCCESS(status);
1075
73c04bcf
A
1076 /* The TEST_ASSERT_SUCCESS call above should change too... */
1077 if (U_SUCCESS(status)) {
1078 memset(fields, -1, sizeof(fields));
1079 numFields =
b331163b 1080 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
73c04bcf 1081 TEST_ASSERT_SUCCESS(status);
374ca955 1082
73c04bcf
A
1083 /* The TEST_ASSERT_SUCCESS call above should change too... */
1084 if(U_SUCCESS(status)) {
1085 TEST_ASSERT(numFields == 3);
1086 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1087 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1088 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1089 TEST_ASSERT(fields[3] == NULL);
1090
1091 spaceNeeded = u_strlen(textToSplit) -
1092 (numFields - 1) + /* Field delimiters do not appear in output */
1093 numFields; /* Each field gets a NUL terminator */
1094
1095 TEST_ASSERT(spaceNeeded == requiredCapacity);
1096 }
1097 }
374ca955 1098
374ca955
A
1099 uregex_close(re);
1100
1101
1102 /* Split with too few output strings available */
1103 status = U_ZERO_ERROR;
1104 re = uregex_openC(":", 0, NULL, &status);
1105 uregex_setText(re, textToSplit, -1, &status);
1106 TEST_ASSERT_SUCCESS(status);
1107
73c04bcf
A
1108 /* The TEST_ASSERT_SUCCESS call above should change too... */
1109 if(U_SUCCESS(status)) {
374ca955 1110 memset(fields, -1, sizeof(fields));
374ca955 1111 numFields =
b331163b 1112 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
73c04bcf
A
1113 TEST_ASSERT_SUCCESS(status);
1114
1115 /* The TEST_ASSERT_SUCCESS call above should change too... */
1116 if(U_SUCCESS(status)) {
1117 TEST_ASSERT(numFields == 2);
374ca955 1118 TEST_ASSERT_STRING("first ", fields[0], TRUE);
73c04bcf
A
1119 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1120 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1121
1122 spaceNeeded = u_strlen(textToSplit) -
1123 (numFields - 1) + /* Field delimiters do not appear in output */
1124 numFields; /* Each field gets a NUL terminator */
1125
1126 TEST_ASSERT(spaceNeeded == requiredCapacity);
1127
1128 /* Split with a range of output buffer sizes. */
1129 spaceNeeded = u_strlen(textToSplit) -
1130 (numFields - 1) + /* Field delimiters do not appear in output */
1131 numFields; /* Each field gets a NUL terminator */
1132
1133 for (sz=0; sz < spaceNeeded+1; sz++) {
1134 memset(fields, -1, sizeof(fields));
1135 status = U_ZERO_ERROR;
1136 numFields =
1137 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1138 if (sz >= spaceNeeded) {
1139 TEST_ASSERT_SUCCESS(status);
1140 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1141 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1142 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1143 } else {
1144 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1145 }
1146 TEST_ASSERT(numFields == 3);
1147 TEST_ASSERT(fields[3] == NULL);
1148 TEST_ASSERT(spaceNeeded == requiredCapacity);
1149 }
374ca955 1150 }
374ca955 1151 }
73c04bcf 1152
374ca955
A
1153 uregex_close(re);
1154 }
1155
1156
1157
1158
1159 /* Split(), part 2. Patterns with capture groups. The capture group text
1160 * comes out as additional fields. */
1161 {
1162 UChar textToSplit[80];
1163 UChar buf[200];
1164 UChar *fields[10];
1165 int32_t numFields;
1166 int32_t requiredCapacity;
1167 int32_t spaceNeeded;
1168 int32_t sz;
1169
b331163b 1170 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
374ca955
A
1171
1172 status = U_ZERO_ERROR;
1173 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1174
1175 uregex_setText(re, textToSplit, -1, &status);
1176 TEST_ASSERT_SUCCESS(status);
1177
73c04bcf
A
1178 /* The TEST_ASSERT_SUCCESS call above should change too... */
1179 if(U_SUCCESS(status)) {
1180 memset(fields, -1, sizeof(fields));
1181 numFields =
b331163b 1182 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
73c04bcf 1183 TEST_ASSERT_SUCCESS(status);
374ca955 1184
73c04bcf
A
1185 /* The TEST_ASSERT_SUCCESS call above should change too... */
1186 if(U_SUCCESS(status)) {
1187 TEST_ASSERT(numFields == 5);
1188 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1189 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1190 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1191 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1192 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1193 TEST_ASSERT(fields[5] == NULL);
1194 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1195 TEST_ASSERT(spaceNeeded == requiredCapacity);
1196 }
1197 }
374ca955
A
1198
1199 /* Split with too few output strings available (2) */
1200 status = U_ZERO_ERROR;
1201 memset(fields, -1, sizeof(fields));
1202 numFields =
b331163b 1203 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
374ca955 1204 TEST_ASSERT_SUCCESS(status);
374ca955 1205
73c04bcf
A
1206 /* The TEST_ASSERT_SUCCESS call above should change too... */
1207 if(U_SUCCESS(status)) {
1208 TEST_ASSERT(numFields == 2);
1209 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1210 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1211 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1212
1213 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */
1214 TEST_ASSERT(spaceNeeded == requiredCapacity);
1215 }
374ca955
A
1216
1217 /* Split with too few output strings available (3) */
1218 status = U_ZERO_ERROR;
1219 memset(fields, -1, sizeof(fields));
1220 numFields =
b331163b 1221 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
374ca955 1222 TEST_ASSERT_SUCCESS(status);
374ca955 1223
73c04bcf
A
1224 /* The TEST_ASSERT_SUCCESS call above should change too... */
1225 if(U_SUCCESS(status)) {
1226 TEST_ASSERT(numFields == 3);
1227 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1228 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1229 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1230 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1231
1232 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1233 TEST_ASSERT(spaceNeeded == requiredCapacity);
1234 }
374ca955
A
1235
1236 /* Split with just enough output strings available (5) */
1237 status = U_ZERO_ERROR;
1238 memset(fields, -1, sizeof(fields));
1239 numFields =
b331163b 1240 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
374ca955 1241 TEST_ASSERT_SUCCESS(status);
374ca955 1242
73c04bcf
A
1243 /* The TEST_ASSERT_SUCCESS call above should change too... */
1244 if(U_SUCCESS(status)) {
1245 TEST_ASSERT(numFields == 5);
1246 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1247 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1248 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1249 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1250 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1251 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
374ca955 1252
73c04bcf
A
1253 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1254 TEST_ASSERT(spaceNeeded == requiredCapacity);
1255 }
374ca955
A
1256
1257 /* Split, end of text is a field delimiter. */
1258 status = U_ZERO_ERROR;
1259 sz = strlen("first <tag-a> second<tag-b>");
1260 uregex_setText(re, textToSplit, sz, &status);
1261 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
1262
1263 /* The TEST_ASSERT_SUCCESS call above should change too... */
1264 if(U_SUCCESS(status)) {
1265 memset(fields, -1, sizeof(fields));
1266 numFields =
b331163b 1267 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
73c04bcf
A
1268 TEST_ASSERT_SUCCESS(status);
1269
1270 /* The TEST_ASSERT_SUCCESS call above should change too... */
1271 if(U_SUCCESS(status)) {
4388f060 1272 TEST_ASSERT(numFields == 5);
73c04bcf
A
1273 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1274 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1275 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1276 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
4388f060
A
1277 TEST_ASSERT_STRING("", fields[4], TRUE);
1278 TEST_ASSERT(fields[5] == NULL);
73c04bcf
A
1279 TEST_ASSERT(fields[8] == NULL);
1280 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
4388f060 1281 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
73c04bcf
A
1282 TEST_ASSERT(spaceNeeded == requiredCapacity);
1283 }
1284 }
374ca955
A
1285
1286 uregex_close(re);
1287 }
1288
46f4442e
A
1289 /*
1290 * set/getTimeLimit
1291 */
1292 TEST_SETUP("abc$", "abcdef", 0);
1293 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1294 uregex_setTimeLimit(re, 1000, &status);
1295 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1296 TEST_ASSERT_SUCCESS(status);
1297 uregex_setTimeLimit(re, -1, &status);
1298 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1299 status = U_ZERO_ERROR;
1300 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1301 TEST_TEARDOWN;
1302
1303 /*
1304 * set/get Stack Limit
1305 */
1306 TEST_SETUP("abc$", "abcdef", 0);
1307 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1308 uregex_setStackLimit(re, 40000, &status);
1309 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1310 TEST_ASSERT_SUCCESS(status);
1311 uregex_setStackLimit(re, -1, &status);
1312 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1313 status = U_ZERO_ERROR;
1314 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1315 TEST_TEARDOWN;
1316
1317
1318 /*
1319 * Get/Set callback functions
1320 * This test is copied from intltest regex/Callbacks
1321 * The pattern and test data will run long enough to cause the callback
1322 * to be invoked. The nested '+' operators give exponential time
1323 * behavior with increasing string length.
1324 */
1325 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1326 callBackContext cbInfo = {4, 0, 0};
1327 const void *pContext = &cbInfo;
1328 URegexMatchCallback *returnedFn = &TestCallbackFn;
1329
1330 /* Getting the callback fn when it hasn't been set must return NULL */
1331 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1332 TEST_ASSERT_SUCCESS(status);
1333 TEST_ASSERT(returnedFn == NULL);
1334 TEST_ASSERT(pContext == NULL);
1335
1336 /* Set thecallback and do a match. */
1337 /* The callback function should record that it has been called. */
1338 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1339 TEST_ASSERT_SUCCESS(status);
1340 TEST_ASSERT(cbInfo.numCalls == 0);
1341 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1342 TEST_ASSERT_SUCCESS(status);
1343 TEST_ASSERT(cbInfo.numCalls > 0);
1344
1345 /* Getting the callback should return the values that were set above. */
1346 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1347 TEST_ASSERT(returnedFn == &TestCallbackFn);
1348 TEST_ASSERT(pContext == &cbInfo);
1349
1350 TEST_TEARDOWN;
374ca955
A
1351}
1352
46f4442e
A
1353
1354
73c04bcf
A
1355static void TestBug4315(void) {
1356 UErrorCode theICUError = U_ZERO_ERROR;
1357 URegularExpression *theRegEx;
1358 UChar *textBuff;
1359 const char *thePattern;
1360 UChar theString[100];
1361 UChar *destFields[24];
1362 int32_t neededLength1;
1363 int32_t neededLength2;
1364
1365 int32_t wordCount = 0;
1366 int32_t destFieldsSize = 24;
1367
1368 thePattern = "ck ";
1369 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1370
1371 /* open a regex */
1372 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1373 TEST_ASSERT_SUCCESS(theICUError);
1374
1375 /* set the input string */
1376 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1377 TEST_ASSERT_SUCCESS(theICUError);
1378
1379 /* split */
1380 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1381 * error occurs! */
1382 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1383 destFieldsSize, &theICUError);
1384
1385 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1386 TEST_ASSERT(wordCount==3);
1387
1388 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1389 {
1390 theICUError = U_ZERO_ERROR;
1391 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1392 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1393 destFields, destFieldsSize, &theICUError);
1394 TEST_ASSERT(wordCount==3);
1395 TEST_ASSERT_SUCCESS(theICUError);
1396 TEST_ASSERT(neededLength1 == neededLength2);
1397 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1398 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1399 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1400 TEST_ASSERT(destFields[3] == NULL);
1401 free(textBuff);
1402 }
1403 uregex_close(theRegEx);
1404}
1405
729e4ab9
A
1406/* Based on TestRegexCAPI() */
1407static void TestUTextAPI(void) {
1408 UErrorCode status = U_ZERO_ERROR;
1409 URegularExpression *re;
1410 UText patternText = UTEXT_INITIALIZER;
1411 UChar pat[200];
1412 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1413
1414 /* Mimimalist open/close */
1415 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1416 re = uregex_openUText(&patternText, 0, 0, &status);
1417 if (U_FAILURE(status)) {
1418 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1419 utext_close(&patternText);
1420 return;
1421 }
1422 uregex_close(re);
1423
1424 /* Open with all flag values set */
1425 status = U_ZERO_ERROR;
1426 re = uregex_openUText(&patternText,
1427 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1428 0, &status);
1429 TEST_ASSERT_SUCCESS(status);
1430 uregex_close(re);
1431
1432 /* Open with an invalid flag */
1433 status = U_ZERO_ERROR;
1434 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1435 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1436 uregex_close(re);
1437
1438 /* open with an invalid parameter */
1439 status = U_ZERO_ERROR;
1440 re = uregex_openUText(NULL,
1441 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1442 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1443
1444 /*
1445 * clone
1446 */
1447 {
1448 URegularExpression *clone1;
1449 URegularExpression *clone2;
1450 URegularExpression *clone3;
1451 UChar testString1[30];
1452 UChar testString2[30];
1453 UBool result;
1454
1455
1456 status = U_ZERO_ERROR;
1457 re = uregex_openUText(&patternText, 0, 0, &status);
1458 TEST_ASSERT_SUCCESS(status);
1459 clone1 = uregex_clone(re, &status);
1460 TEST_ASSERT_SUCCESS(status);
1461 TEST_ASSERT(clone1 != NULL);
1462
1463 status = U_ZERO_ERROR;
1464 clone2 = uregex_clone(re, &status);
1465 TEST_ASSERT_SUCCESS(status);
1466 TEST_ASSERT(clone2 != NULL);
1467 uregex_close(re);
1468
1469 status = U_ZERO_ERROR;
1470 clone3 = uregex_clone(clone2, &status);
1471 TEST_ASSERT_SUCCESS(status);
1472 TEST_ASSERT(clone3 != NULL);
1473
b331163b
A
1474 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1475 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
729e4ab9
A
1476
1477 status = U_ZERO_ERROR;
1478 uregex_setText(clone1, testString1, -1, &status);
1479 TEST_ASSERT_SUCCESS(status);
1480 result = uregex_lookingAt(clone1, 0, &status);
1481 TEST_ASSERT_SUCCESS(status);
1482 TEST_ASSERT(result==TRUE);
1483
1484 status = U_ZERO_ERROR;
1485 uregex_setText(clone2, testString2, -1, &status);
1486 TEST_ASSERT_SUCCESS(status);
1487 result = uregex_lookingAt(clone2, 0, &status);
1488 TEST_ASSERT_SUCCESS(status);
1489 TEST_ASSERT(result==FALSE);
1490 result = uregex_find(clone2, 0, &status);
1491 TEST_ASSERT_SUCCESS(status);
1492 TEST_ASSERT(result==TRUE);
1493
1494 uregex_close(clone1);
1495 uregex_close(clone2);
1496 uregex_close(clone3);
1497
1498 }
1499
1500 /*
1501 * pattern() and patternText()
1502 */
1503 {
1504 const UChar *resultPat;
1505 int32_t resultLen;
1506 UText *resultText;
1507 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1508 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
b331163b 1509 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
729e4ab9
A
1510 status = U_ZERO_ERROR;
1511
1512 utext_openUTF8(&patternText, str_hello, -1, &status);
1513 re = uregex_open(pat, -1, 0, NULL, &status);
1514 resultPat = uregex_pattern(re, &resultLen, &status);
1515 TEST_ASSERT_SUCCESS(status);
1516
1517 /* The TEST_ASSERT_SUCCESS above should change too... */
1518 if (U_SUCCESS(status)) {
1519 TEST_ASSERT(resultLen == -1);
1520 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1521 }
1522
1523 resultText = uregex_patternUText(re, &status);
1524 TEST_ASSERT_SUCCESS(status);
1525 TEST_ASSERT_UTEXT(str_hello, resultText);
1526
1527 uregex_close(re);
1528
1529 status = U_ZERO_ERROR;
1530 re = uregex_open(pat, 3, 0, NULL, &status);
1531 resultPat = uregex_pattern(re, &resultLen, &status);
1532 TEST_ASSERT_SUCCESS(status);
1533
1534 /* The TEST_ASSERT_SUCCESS above should change too... */
1535 if (U_SUCCESS(status)) {
1536 TEST_ASSERT(resultLen == 3);
1537 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1538 TEST_ASSERT(u_strlen(resultPat) == 3);
1539 }
1540
1541 resultText = uregex_patternUText(re, &status);
1542 TEST_ASSERT_SUCCESS(status);
1543 TEST_ASSERT_UTEXT(str_hel, resultText);
1544
1545 uregex_close(re);
1546 }
1547
1548 /*
1549 * setUText() and lookingAt()
1550 */
1551 {
1552 UText text1 = UTEXT_INITIALIZER;
1553 UText text2 = UTEXT_INITIALIZER;
1554 UBool result;
1555 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1556 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1557 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1558 status = U_ZERO_ERROR;
1559 utext_openUTF8(&text1, str_abcccd, -1, &status);
1560 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1561
1562 utext_openUTF8(&patternText, str_abcd, -1, &status);
1563 re = uregex_openUText(&patternText, 0, NULL, &status);
1564 TEST_ASSERT_SUCCESS(status);
1565
1566 /* Operation before doing a setText should fail... */
1567 status = U_ZERO_ERROR;
1568 uregex_lookingAt(re, 0, &status);
1569 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1570
1571 status = U_ZERO_ERROR;
1572 uregex_setUText(re, &text1, &status);
1573 result = uregex_lookingAt(re, 0, &status);
1574 TEST_ASSERT(result == TRUE);
1575 TEST_ASSERT_SUCCESS(status);
1576
1577 status = U_ZERO_ERROR;
1578 uregex_setUText(re, &text2, &status);
1579 result = uregex_lookingAt(re, 0, &status);
1580 TEST_ASSERT(result == FALSE);
1581 TEST_ASSERT_SUCCESS(status);
1582
1583 status = U_ZERO_ERROR;
1584 uregex_setUText(re, &text1, &status);
1585 result = uregex_lookingAt(re, 0, &status);
1586 TEST_ASSERT(result == TRUE);
1587 TEST_ASSERT_SUCCESS(status);
1588
1589 uregex_close(re);
1590 utext_close(&text1);
1591 utext_close(&text2);
1592 }
1593
1594
1595 /*
1596 * getText() and getUText()
1597 */
1598 {
1599 UText text1 = UTEXT_INITIALIZER;
1600 UText text2 = UTEXT_INITIALIZER;
1601 UChar text2Chars[20];
1602 UText *resultText;
1603 const UChar *result;
1604 int32_t textLength;
1605 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1606 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1607 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1608
1609
1610 status = U_ZERO_ERROR;
1611 utext_openUTF8(&text1, str_abcccd, -1, &status);
b331163b 1612 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
729e4ab9
A
1613 utext_openUChars(&text2, text2Chars, -1, &status);
1614
1615 utext_openUTF8(&patternText, str_abcd, -1, &status);
1616 re = uregex_openUText(&patternText, 0, NULL, &status);
1617
1618 /* First set a UText */
1619 uregex_setUText(re, &text1, &status);
1620 resultText = uregex_getUText(re, NULL, &status);
1621 TEST_ASSERT_SUCCESS(status);
1622 TEST_ASSERT(resultText != &text1);
1623 utext_setNativeIndex(resultText, 0);
1624 utext_setNativeIndex(&text1, 0);
4388f060 1625 TEST_ASSERT(testUTextEqual(resultText, &text1));
729e4ab9
A
1626 utext_close(resultText);
1627
1628 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
57a6839d 1629 (void)result; /* Suppress set but not used warning. */
729e4ab9
A
1630 TEST_ASSERT(textLength == -1 || textLength == 6);
1631 resultText = uregex_getUText(re, NULL, &status);
1632 TEST_ASSERT_SUCCESS(status);
1633 TEST_ASSERT(resultText != &text1);
1634 utext_setNativeIndex(resultText, 0);
1635 utext_setNativeIndex(&text1, 0);
4388f060 1636 TEST_ASSERT(testUTextEqual(resultText, &text1));
729e4ab9
A
1637 utext_close(resultText);
1638
1639 /* Then set a UChar * */
1640 uregex_setText(re, text2Chars, 7, &status);
1641 resultText = uregex_getUText(re, NULL, &status);
1642 TEST_ASSERT_SUCCESS(status);
1643 utext_setNativeIndex(resultText, 0);
1644 utext_setNativeIndex(&text2, 0);
4388f060 1645 TEST_ASSERT(testUTextEqual(resultText, &text2));
729e4ab9
A
1646 utext_close(resultText);
1647 result = uregex_getText(re, &textLength, &status);
1648 TEST_ASSERT(textLength == 7);
1649
1650 uregex_close(re);
1651 utext_close(&text1);
1652 utext_close(&text2);
1653 }
1654
1655 /*
1656 * matches()
1657 */
1658 {
1659 UText text1 = UTEXT_INITIALIZER;
1660 UBool result;
1661 UText nullText = UTEXT_INITIALIZER;
1662 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1663 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1664
1665 status = U_ZERO_ERROR;
1666 utext_openUTF8(&text1, str_abcccde, -1, &status);
1667 utext_openUTF8(&patternText, str_abcd, -1, &status);
1668 re = uregex_openUText(&patternText, 0, NULL, &status);
1669
1670 uregex_setUText(re, &text1, &status);
1671 result = uregex_matches(re, 0, &status);
1672 TEST_ASSERT(result == FALSE);
1673 TEST_ASSERT_SUCCESS(status);
1674 uregex_close(re);
1675
1676 status = U_ZERO_ERROR;
1677 re = uregex_openC(".?", 0, NULL, &status);
1678 uregex_setUText(re, &text1, &status);
1679 result = uregex_matches(re, 7, &status);
1680 TEST_ASSERT(result == TRUE);
1681 TEST_ASSERT_SUCCESS(status);
1682
1683 status = U_ZERO_ERROR;
1684 utext_openUTF8(&nullText, "", -1, &status);
1685 uregex_setUText(re, &nullText, &status);
1686 TEST_ASSERT_SUCCESS(status);
1687 result = uregex_matches(re, 0, &status);
1688 TEST_ASSERT(result == TRUE);
1689 TEST_ASSERT_SUCCESS(status);
1690
1691 uregex_close(re);
1692 utext_close(&text1);
1693 utext_close(&nullText);
1694 }
1695
1696
1697 /*
1698 * lookingAt() Used in setText test.
1699 */
1700
1701
1702 /*
1703 * find(), findNext, start, end, reset
1704 */
1705 {
1706 UChar text1[50];
1707 UBool result;
b331163b 1708 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
729e4ab9
A
1709 status = U_ZERO_ERROR;
1710 re = uregex_openC("rx", 0, NULL, &status);
1711
1712 uregex_setText(re, text1, -1, &status);
1713 result = uregex_find(re, 0, &status);
1714 TEST_ASSERT(result == TRUE);
1715 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1716 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1717 TEST_ASSERT_SUCCESS(status);
1718
1719 result = uregex_find(re, 9, &status);
1720 TEST_ASSERT(result == TRUE);
1721 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1722 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1723 TEST_ASSERT_SUCCESS(status);
1724
1725 result = uregex_find(re, 14, &status);
1726 TEST_ASSERT(result == FALSE);
1727 TEST_ASSERT_SUCCESS(status);
1728
1729 status = U_ZERO_ERROR;
1730 uregex_reset(re, 0, &status);
1731
1732 result = uregex_findNext(re, &status);
1733 TEST_ASSERT(result == TRUE);
1734 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1735 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1736 TEST_ASSERT_SUCCESS(status);
1737
1738 result = uregex_findNext(re, &status);
1739 TEST_ASSERT(result == TRUE);
1740 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1741 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1742 TEST_ASSERT_SUCCESS(status);
1743
1744 status = U_ZERO_ERROR;
1745 uregex_reset(re, 12, &status);
1746
1747 result = uregex_findNext(re, &status);
1748 TEST_ASSERT(result == TRUE);
1749 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1750 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1751 TEST_ASSERT_SUCCESS(status);
1752
1753 result = uregex_findNext(re, &status);
1754 TEST_ASSERT(result == FALSE);
1755 TEST_ASSERT_SUCCESS(status);
1756
1757 uregex_close(re);
1758 }
1759
1760 /*
b331163b 1761 * groupUText()
729e4ab9
A
1762 */
1763 {
1764 UChar text1[80];
1765 UText *actual;
1766 UBool result;
b331163b
A
1767 int64_t groupLen = 0;
1768 UChar groupBuf[20];
729e4ab9 1769
b331163b 1770 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
729e4ab9
A
1771
1772 status = U_ZERO_ERROR;
1773 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1774 TEST_ASSERT_SUCCESS(status);
1775
1776 uregex_setText(re, text1, -1, &status);
1777 result = uregex_find(re, 0, &status);
1778 TEST_ASSERT(result==TRUE);
1779
729e4ab9
A
1780 /* Capture Group 0 with shallow clone API. Should succeed. */
1781 status = U_ZERO_ERROR;
b331163b
A
1782 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1783 TEST_ASSERT_SUCCESS(status);
729e4ab9 1784
b331163b
A
1785 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1786 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1787 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
729e4ab9 1788
b331163b 1789 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
729e4ab9
A
1790 utext_close(actual);
1791
1792 /* Capture group #1. Should succeed. */
1793 status = U_ZERO_ERROR;
b331163b
A
1794
1795 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
729e4ab9 1796 TEST_ASSERT_SUCCESS(status);
b331163b
A
1797 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1798 /* (within the string text1) */
1799 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1800 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1801 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1802
729e4ab9
A
1803 utext_close(actual);
1804
1805 /* Capture group out of range. Error. */
1806 status = U_ZERO_ERROR;
b331163b 1807 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
729e4ab9 1808 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
729e4ab9
A
1809 utext_close(actual);
1810
1811 uregex_close(re);
729e4ab9
A
1812 }
1813
1814 /*
1815 * replaceFirst()
1816 */
1817 {
1818 UChar text1[80];
1819 UChar text2[80];
1820 UText replText = UTEXT_INITIALIZER;
1821 UText *result;
1822 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1823 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
b331163b
A
1824 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1825 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
729e4ab9
A
1826 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1827 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1828 status = U_ZERO_ERROR;
b331163b
A
1829 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1830 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
729e4ab9
A
1831 utext_openUTF8(&replText, str_1x, -1, &status);
1832
1833 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1834 TEST_ASSERT_SUCCESS(status);
1835
1836 /* Normal case, with match */
1837 uregex_setText(re, text1, -1, &status);
1838 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1839 TEST_ASSERT_SUCCESS(status);
1840 TEST_ASSERT_UTEXT(str_Replxxx, result);
1841 utext_close(result);
1842
1843 /* No match. Text should copy to output with no changes. */
1844 uregex_setText(re, text2, -1, &status);
1845 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1846 TEST_ASSERT_SUCCESS(status);
1847 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1848 utext_close(result);
1849
1850 /* Unicode escapes */
1851 uregex_setText(re, text1, -1, &status);
1852 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1853 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1854 TEST_ASSERT_SUCCESS(status);
1855 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1856 utext_close(result);
1857
1858 uregex_close(re);
1859 utext_close(&replText);
1860 }
1861
1862
1863 /*
1864 * replaceAll()
1865 */
1866 {
1867 UChar text1[80];
1868 UChar text2[80];
1869 UText replText = UTEXT_INITIALIZER;
1870 UText *result;
1871 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1872 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1873 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1874 status = U_ZERO_ERROR;
b331163b
A
1875 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1876 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
729e4ab9
A
1877 utext_openUTF8(&replText, str_1, -1, &status);
1878
1879 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1880 TEST_ASSERT_SUCCESS(status);
1881
1882 /* Normal case, with match */
1883 uregex_setText(re, text1, -1, &status);
1884 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1885 TEST_ASSERT_SUCCESS(status);
1886 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1887 utext_close(result);
1888
1889 /* No match. Text should copy to output with no changes. */
1890 uregex_setText(re, text2, -1, &status);
1891 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1892 TEST_ASSERT_SUCCESS(status);
1893 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1894 utext_close(result);
1895
1896 uregex_close(re);
1897 utext_close(&replText);
1898 }
1899
1900
1901 /*
1902 * appendReplacement()
1903 */
1904 {
1905 UChar text[100];
1906 UChar repl[100];
1907 UChar buf[100];
1908 UChar *bufPtr;
1909 int32_t bufCap;
1910
1911 status = U_ZERO_ERROR;
1912 re = uregex_openC(".*", 0, 0, &status);
1913 TEST_ASSERT_SUCCESS(status);
1914
b331163b
A
1915 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1916 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
729e4ab9
A
1917 uregex_setText(re, text, -1, &status);
1918
1919 /* match covers whole target string */
1920 uregex_find(re, 0, &status);
1921 TEST_ASSERT_SUCCESS(status);
1922 bufPtr = buf;
b331163b 1923 bufCap = UPRV_LENGTHOF(buf);
729e4ab9
A
1924 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1925 TEST_ASSERT_SUCCESS(status);
1926 TEST_ASSERT_STRING("some other", buf, TRUE);
1927
1928 /* Match has \u \U escapes */
1929 uregex_find(re, 0, &status);
1930 TEST_ASSERT_SUCCESS(status);
1931 bufPtr = buf;
b331163b
A
1932 bufCap = UPRV_LENGTHOF(buf);
1933 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
729e4ab9
A
1934 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1935 TEST_ASSERT_SUCCESS(status);
1936 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1937
1938 uregex_close(re);
1939 }
1940
1941
1942 /*
1943 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1944 */
1945
1946 /*
1947 * splitUText()
1948 */
1949 {
1950 UChar textToSplit[80];
1951 UChar text2[80];
1952 UText *fields[10];
1953 int32_t numFields;
1954 int32_t i;
1955
b331163b
A
1956 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1957 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
729e4ab9
A
1958
1959 status = U_ZERO_ERROR;
1960 re = uregex_openC(":", 0, NULL, &status);
1961
1962
1963 /* Simple split */
1964
1965 uregex_setText(re, textToSplit, -1, &status);
1966 TEST_ASSERT_SUCCESS(status);
1967
1968 /* The TEST_ASSERT_SUCCESS call above should change too... */
1969 if (U_SUCCESS(status)) {
1970 memset(fields, 0, sizeof(fields));
1971 numFields = uregex_splitUText(re, fields, 10, &status);
1972 TEST_ASSERT_SUCCESS(status);
1973
1974 /* The TEST_ASSERT_SUCCESS call above should change too... */
1975 if(U_SUCCESS(status)) {
1976 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1977 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1978 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1979 TEST_ASSERT(numFields == 3);
1980 TEST_ASSERT_UTEXT(str_first, fields[0]);
1981 TEST_ASSERT_UTEXT(str_second, fields[1]);
1982 TEST_ASSERT_UTEXT(str_third, fields[2]);
1983 TEST_ASSERT(fields[3] == NULL);
1984 }
1985 for(i = 0; i < numFields; i++) {
1986 utext_close(fields[i]);
1987 }
1988 }
1989
1990 uregex_close(re);
1991
1992
1993 /* Split with too few output strings available */
1994 status = U_ZERO_ERROR;
1995 re = uregex_openC(":", 0, NULL, &status);
1996 uregex_setText(re, textToSplit, -1, &status);
1997 TEST_ASSERT_SUCCESS(status);
1998
1999 /* The TEST_ASSERT_SUCCESS call above should change too... */
2000 if(U_SUCCESS(status)) {
2001 fields[0] = NULL;
2002 fields[1] = NULL;
2003 fields[2] = &patternText;
2004 numFields = uregex_splitUText(re, fields, 2, &status);
2005 TEST_ASSERT_SUCCESS(status);
2006
2007 /* The TEST_ASSERT_SUCCESS call above should change too... */
2008 if(U_SUCCESS(status)) {
2009 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2010 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2011 TEST_ASSERT(numFields == 2);
2012 TEST_ASSERT_UTEXT(str_first, fields[0]);
2013 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2014 TEST_ASSERT(fields[2] == &patternText);
2015 }
2016 for(i = 0; i < numFields; i++) {
2017 utext_close(fields[i]);
2018 }
2019 }
2020
2021 uregex_close(re);
2022 }
2023
2024 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2025 * comes out as additional fields. */
2026 {
2027 UChar textToSplit[80];
2028 UText *fields[10];
2029 int32_t numFields;
2030 int32_t i;
2031
b331163b 2032 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
729e4ab9
A
2033
2034 status = U_ZERO_ERROR;
2035 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2036
2037 uregex_setText(re, textToSplit, -1, &status);
2038 TEST_ASSERT_SUCCESS(status);
2039
2040 /* The TEST_ASSERT_SUCCESS call above should change too... */
2041 if(U_SUCCESS(status)) {
2042 memset(fields, 0, sizeof(fields));
2043 numFields = uregex_splitUText(re, fields, 10, &status);
2044 TEST_ASSERT_SUCCESS(status);
2045
2046 /* The TEST_ASSERT_SUCCESS call above should change too... */
2047 if(U_SUCCESS(status)) {
2048 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2049 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2050 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2051 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2052 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2053
2054 TEST_ASSERT(numFields == 5);
2055 TEST_ASSERT_UTEXT(str_first, fields[0]);
2056 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2057 TEST_ASSERT_UTEXT(str_second, fields[2]);
2058 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2059 TEST_ASSERT_UTEXT(str_third, fields[4]);
2060 TEST_ASSERT(fields[5] == NULL);
2061 }
2062 for(i = 0; i < numFields; i++) {
2063 utext_close(fields[i]);
2064 }
2065 }
2066
2067 /* Split with too few output strings available (2) */
2068 status = U_ZERO_ERROR;
2069 fields[0] = NULL;
2070 fields[1] = NULL;
2071 fields[2] = &patternText;
2072 numFields = uregex_splitUText(re, fields, 2, &status);
2073 TEST_ASSERT_SUCCESS(status);
2074
2075 /* The TEST_ASSERT_SUCCESS call above should change too... */
2076 if(U_SUCCESS(status)) {
2077 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2078 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2079 TEST_ASSERT(numFields == 2);
2080 TEST_ASSERT_UTEXT(str_first, fields[0]);
2081 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2082 TEST_ASSERT(fields[2] == &patternText);
2083 }
2084 for(i = 0; i < numFields; i++) {
2085 utext_close(fields[i]);
2086 }
2087
2088
2089 /* Split with too few output strings available (3) */
2090 status = U_ZERO_ERROR;
2091 fields[0] = NULL;
2092 fields[1] = NULL;
2093 fields[2] = NULL;
2094 fields[3] = &patternText;
2095 numFields = uregex_splitUText(re, fields, 3, &status);
2096 TEST_ASSERT_SUCCESS(status);
2097
2098 /* The TEST_ASSERT_SUCCESS call above should change too... */
2099 if(U_SUCCESS(status)) {
2100 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2101 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2102 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2103 TEST_ASSERT(numFields == 3);
2104 TEST_ASSERT_UTEXT(str_first, fields[0]);
2105 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2106 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2107 TEST_ASSERT(fields[3] == &patternText);
2108 }
2109 for(i = 0; i < numFields; i++) {
2110 utext_close(fields[i]);
2111 }
2112
2113 /* Split with just enough output strings available (5) */
2114 status = U_ZERO_ERROR;
2115 fields[0] = NULL;
2116 fields[1] = NULL;
2117 fields[2] = NULL;
2118 fields[3] = NULL;
2119 fields[4] = NULL;
2120 fields[5] = &patternText;
2121 numFields = uregex_splitUText(re, fields, 5, &status);
2122 TEST_ASSERT_SUCCESS(status);
2123
2124 /* The TEST_ASSERT_SUCCESS call above should change too... */
2125 if(U_SUCCESS(status)) {
2126 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2127 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2128 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2129 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2130 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2131
2132 TEST_ASSERT(numFields == 5);
2133 TEST_ASSERT_UTEXT(str_first, fields[0]);
2134 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2135 TEST_ASSERT_UTEXT(str_second, fields[2]);
2136 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2137 TEST_ASSERT_UTEXT(str_third, fields[4]);
2138 TEST_ASSERT(fields[5] == &patternText);
2139 }
2140 for(i = 0; i < numFields; i++) {
2141 utext_close(fields[i]);
2142 }
2143
2144 /* Split, end of text is a field delimiter. */
2145 status = U_ZERO_ERROR;
2146 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2147 TEST_ASSERT_SUCCESS(status);
2148
2149 /* The TEST_ASSERT_SUCCESS call above should change too... */
2150 if(U_SUCCESS(status)) {
2151 memset(fields, 0, sizeof(fields));
2152 fields[9] = &patternText;
2153 numFields = uregex_splitUText(re, fields, 9, &status);
2154 TEST_ASSERT_SUCCESS(status);
2155
2156 /* The TEST_ASSERT_SUCCESS call above should change too... */
2157 if(U_SUCCESS(status)) {
2158 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2159 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2160 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2161 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
4388f060 2162 const char str_empty[] = { 0x00 };
729e4ab9 2163
4388f060 2164 TEST_ASSERT(numFields == 5);
729e4ab9
A
2165 TEST_ASSERT_UTEXT(str_first, fields[0]);
2166 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2167 TEST_ASSERT_UTEXT(str_second, fields[2]);
2168 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
4388f060
A
2169 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2170 TEST_ASSERT(fields[5] == NULL);
729e4ab9
A
2171 TEST_ASSERT(fields[8] == NULL);
2172 TEST_ASSERT(fields[9] == &patternText);
2173 }
2174 for(i = 0; i < numFields; i++) {
2175 utext_close(fields[i]);
2176 }
2177 }
2178
2179 uregex_close(re);
2180 }
2181 utext_close(&patternText);
2182}
2183
4388f060
A
2184
2185static void TestRefreshInput(void) {
2186 /*
2187 * RefreshInput changes out the input of a URegularExpression without
2188 * changing anything else in the match state. Used with Java JNI,
2189 * when Java moves the underlying string storage. This test
2190 * runs a find() loop, moving the text after the first match.
2191 * The right number of matches should still be found.
2192 */
2193 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2194 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2195 UErrorCode status = U_ZERO_ERROR;
2196 URegularExpression *re;
2197 UText ut1 = UTEXT_INITIALIZER;
2198 UText ut2 = UTEXT_INITIALIZER;
2199
2200 re = uregex_openC("[ABC]", 0, 0, &status);
2201 TEST_ASSERT_SUCCESS(status);
2202
2203 utext_openUChars(&ut1, testStr, -1, &status);
2204 TEST_ASSERT_SUCCESS(status);
2205 uregex_setUText(re, &ut1, &status);
2206 TEST_ASSERT_SUCCESS(status);
2207
2208 /* Find the first match "A" in the original string */
2209 TEST_ASSERT(uregex_findNext(re, &status));
2210 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2211
2212 /* Move the string, kill the original string. */
2213 u_strcpy(movedStr, testStr);
2214 u_memset(testStr, 0, u_strlen(testStr));
2215 utext_openUChars(&ut2, movedStr, -1, &status);
2216 TEST_ASSERT_SUCCESS(status);
2217 uregex_refreshUText(re, &ut2, &status);
2218 TEST_ASSERT_SUCCESS(status);
2219
2220 /* Find the following two matches, now working in the moved string. */
2221 TEST_ASSERT(uregex_findNext(re, &status));
2222 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2223 TEST_ASSERT(uregex_findNext(re, &status));
2224 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2225 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2226
2227 uregex_close(re);
2228}
2229
2230
2231static void TestBug8421(void) {
2232 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2233 * was failing.
2234 */
2235 URegularExpression *re;
2236 UErrorCode status = U_ZERO_ERROR;
2237 int32_t limit = -1;
2238
2239 re = uregex_openC("abc", 0, 0, &status);
2240 TEST_ASSERT_SUCCESS(status);
2241
2242 limit = uregex_getTimeLimit(re, &status);
2243 TEST_ASSERT_SUCCESS(status);
2244 TEST_ASSERT(limit == 0);
2245
2246 uregex_setTimeLimit(re, 100, &status);
2247 TEST_ASSERT_SUCCESS(status);
2248 limit = uregex_getTimeLimit(re, &status);
2249 TEST_ASSERT_SUCCESS(status);
2250 TEST_ASSERT(limit == 100);
2251
2252 uregex_close(re);
2253}
2254
b331163b
A
2255static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2256 return FALSE;
2257}
2258
2259static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2260 return FALSE;
2261}
2262
2263static void TestBug10815() {
2264 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2265 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2266 */
2267 URegularExpression *re;
2268 UErrorCode status = U_ZERO_ERROR;
2269 UChar text[100];
2270
2271
2272 // findNext() with a find progress callback function.
2273
2274 re = uregex_openC(".z", 0, 0, &status);
2275 TEST_ASSERT_SUCCESS(status);
2276
2277 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2278 uregex_setText(re, text, -1, &status);
2279 TEST_ASSERT_SUCCESS(status);
2280
2281 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2282 TEST_ASSERT_SUCCESS(status);
2283
2284 uregex_findNext(re, &status);
2285 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2286
2287 uregex_close(re);
2288
2289 // findNext() with a match progress callback function.
2290
2291 status = U_ZERO_ERROR;
2292 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2293 TEST_ASSERT_SUCCESS(status);
2294
2295 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2296 // it will appear to be stuck in a (near) infinite loop.
2297 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2298 uregex_setText(re, text, -1, &status);
2299 TEST_ASSERT_SUCCESS(status);
2300
2301 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2302 TEST_ASSERT_SUCCESS(status);
2303
2304 uregex_findNext(re, &status);
2305 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2306
2307 uregex_close(re);
2308}
2309
f3c0d7a5
A
2310static const UChar startLinePattern[] = { 0x5E, 0x78, 0 }; // "^x"
2311
2312static void TestMatchStartLineWithEmptyText() {
2313 UErrorCode status = U_ZERO_ERROR;
2314 UText* ut = utext_openUChars(NULL, NULL, 0, &status);
2315 TEST_ASSERT_SUCCESS(status);
2316 if (U_SUCCESS(status)) {
2317 URegularExpression *re = uregex_open(startLinePattern, -1, 0, NULL, &status);
2318 TEST_ASSERT_SUCCESS(status);
2319 if (U_SUCCESS(status)) {
2320 uregex_setUText(re, ut, &status);
2321 TEST_ASSERT(U_SUCCESS(status));
2322 if (U_SUCCESS(status)) {
2323 UBool found = uregex_findNext(re, &status);
2324 TEST_ASSERT(U_SUCCESS(status) && !found);
2325 }
2326 uregex_close(re);
2327 }
2328 utext_close(ut);
2329 }
2330}
4388f060 2331
374ca955 2332#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */