]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/reapits.c
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / test / cintltst / reapits.c
CommitLineData
374ca955
A
1/********************************************************************
2 * COPYRIGHT:
4388f060 3 * Copyright (c) 2004-2012, International Business Machines Corporation and
374ca955
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File reapits.c
9*
10*********************************************************************************/
11/*C API TEST FOR Regular Expressions */
12/**
13* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
14* try to test the full functionality. It just calls each function and verifies that it
15* works on a basic level.
16*
17* More complete testing of regular expression functionality is done with the C++ tests.
18**/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_REGULAR_EXPRESSIONS
23
24#include <stdlib.h>
25#include <string.h>
26#include "unicode/uloc.h"
27#include "unicode/uregex.h"
28#include "unicode/ustring.h"
729e4ab9 29#include "unicode/utext.h"
374ca955
A
30#include "cintltst.h"
31
32#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
729e4ab9 33log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
374ca955
A
34
35#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
729e4ab9 36log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
374ca955 37
46f4442e
A
38/*
39 * TEST_SETUP and TEST_TEARDOWN
40 * macros to handle the boilerplate around setting up regex test cases.
41 * parameteres to setup:
42 * pattern: The regex pattern, a (char *) null terminated C string.
43 * testString: The string data, also a (char *) C string.
44 * flags: Regex flags to set when compiling the pattern
45 *
46 * Put arbitrary test code between SETUP and TEARDOWN.
47 * 're" is the compiled, ready-to-go regular expression.
48 */
49#define TEST_SETUP(pattern, testString, flags) { \
50 UChar *srcString = NULL; \
51 status = U_ZERO_ERROR; \
52 re = uregex_openC(pattern, flags, NULL, &status); \
53 TEST_ASSERT_SUCCESS(status); \
54 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
55 u_uastrncpy(srcString, testString, strlen(testString)+1); \
56 uregex_setText(re, srcString, -1, &status); \
57 TEST_ASSERT_SUCCESS(status); \
58 if (U_SUCCESS(status)) {
59
60#define TEST_TEARDOWN \
61 } \
62 TEST_ASSERT_SUCCESS(status); \
63 uregex_close(re); \
64 free(srcString); \
65 }
66
67
729e4ab9
A
68/**
69 * @param expected utf-8 array of bytes to be expected
70 */
46f4442e
A
71static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
72 char buf_inside_macro[120];
73 int32_t len = (int32_t)strlen(expected);
74 UBool success;
75 if (nulTerm) {
76 u_austrncpy(buf_inside_macro, (actual), len+1);
77 buf_inside_macro[len+2] = 0;
78 success = (strcmp((expected), buf_inside_macro) == 0);
79 } else {
80 u_austrncpy(buf_inside_macro, (actual), len);
81 buf_inside_macro[len+1] = 0;
82 success = (strncmp((expected), buf_inside_macro, len) == 0);
83 }
84 if (success == FALSE) {
85 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
86 file, line, (expected), buf_inside_macro);
87 }
374ca955
A
88}
89
46f4442e 90#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
374ca955
A
91
92
4388f060
A
93static UBool equals_utf8_utext(const char *utf8, UText *utext) {
94 int32_t u8i = 0;
95 UChar32 u8c = 0;
96 UChar32 utc = 0;
97 UBool stringsEqual = TRUE;
98 utext_setNativeIndex(utext, 0);
99 for (;;) {
100 U8_NEXT_UNSAFE(utf8, u8i, u8c);
101 utc = utext_next32(utext);
102 if (u8c == 0 && utc == U_SENTINEL) {
103 break;
104 }
105 if (u8c != utc || u8c == 0) {
106 stringsEqual = FALSE;
107 break;
108 }
109 }
110 return stringsEqual;
111}
112
113
729e4ab9 114static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
729e4ab9 115 utext_setNativeIndex(actual, 0);
4388f060 116 if (!equals_utf8_utext(expected, actual)) {
729e4ab9
A
117 UChar32 c;
118 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
119 c = utext_next32From(actual, 0);
120 while (c != U_SENTINEL) {
121 if (0x20<c && c <0x7e) {
122 log_err("%c", c);
123 } else {
124 log_err("%#x", c);
125 }
126 c = UTEXT_NEXT32(actual);
127 }
128 log_err("\"\n");
129 }
729e4ab9
A
130}
131
4388f060
A
132/*
133 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
134 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
135 */
729e4ab9 136#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
374ca955 137
4388f060
A
138static UBool testUTextEqual(UText *uta, UText *utb) {
139 UChar32 ca = 0;
140 UChar32 cb = 0;
141 utext_setNativeIndex(uta, 0);
142 utext_setNativeIndex(utb, 0);
143 do {
144 ca = utext_next32(uta);
145 cb = utext_next32(utb);
146 if (ca != cb) {
147 break;
148 }
149 } while (ca != U_SENTINEL);
150 return ca == cb;
151}
152
153
374ca955
A
154
155
156static void TestRegexCAPI(void);
73c04bcf 157static void TestBug4315(void);
729e4ab9 158static void TestUTextAPI(void);
4388f060
A
159static void TestRefreshInput(void);
160static void TestBug8421(void);
374ca955
A
161
162void addURegexTest(TestNode** root);
163
164void addURegexTest(TestNode** root)
165{
166 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
73c04bcf 167 addTest(root, &TestBug4315, "regex/TestBug4315");
729e4ab9 168 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
4388f060
A
169 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
170 addTest(root, &TestBug8421, "regex/TestBug8421");
374ca955
A
171}
172
46f4442e
A
173/*
174 * Call back function and context struct used for testing
175 * regular expression user callbacks. This test is mostly the same as
176 * the corresponding C++ test in intltest.
177 */
178typedef struct callBackContext {
179 int32_t maxCalls;
180 int32_t numCalls;
181 int32_t lastSteps;
182} callBackContext;
183
184static UBool U_EXPORT2 U_CALLCONV
185TestCallbackFn(const void *context, int32_t steps) {
186 callBackContext *info = (callBackContext *)context;
187 if (info->lastSteps+1 != steps) {
188 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
189 }
190 info->lastSteps = steps;
191 info->numCalls++;
192 return (info->numCalls < info->maxCalls);
193}
374ca955 194
46f4442e
A
195/*
196 * Regular Expression C API Tests
197 */
374ca955
A
198static void TestRegexCAPI(void) {
199 UErrorCode status = U_ZERO_ERROR;
200 URegularExpression *re;
201 UChar pat[200];
202 UChar *minus1;
203
204 memset(&minus1, -1, sizeof(minus1));
205
206 /* Mimimalist open/close */
207 u_uastrncpy(pat, "abc*", sizeof(pat)/2);
208 re = uregex_open(pat, -1, 0, 0, &status);
46f4442e 209 if (U_FAILURE(status)) {
729e4ab9 210 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
46f4442e
A
211 return;
212 }
374ca955
A
213 uregex_close(re);
214
215 /* Open with all flag values set */
216 status = U_ZERO_ERROR;
217 re = uregex_open(pat, -1,
4388f060 218 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
374ca955
A
219 0, &status);
220 TEST_ASSERT_SUCCESS(status);
221 uregex_close(re);
222
223 /* Open with an invalid flag */
224 status = U_ZERO_ERROR;
225 re = uregex_open(pat, -1, 0x40000000, 0, &status);
226 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
227 uregex_close(re);
228
729e4ab9
A
229 /* Open with an unimplemented flag */
230 status = U_ZERO_ERROR;
4388f060 231 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
729e4ab9
A
232 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
233 uregex_close(re);
234
73c04bcf
A
235 /* openC with an invalid parameter */
236 status = U_ZERO_ERROR;
237 re = uregex_openC(NULL,
238 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
239 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
240
241 /* openC with an invalid parameter */
242 status = U_USELESS_COLLATOR_ERROR;
243 re = uregex_openC(NULL,
244 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
245 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
374ca955
A
246
247 /* openC open from a C string */
248 {
249 const UChar *p;
250 int32_t len;
251 status = U_ZERO_ERROR;
252 re = uregex_openC("abc*", 0, 0, &status);
253 TEST_ASSERT_SUCCESS(status);
254 p = uregex_pattern(re, &len, &status);
255 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
256
257 /* The TEST_ASSERT_SUCCESS above should change too... */
258 if(U_SUCCESS(status)) {
259 u_uastrncpy(pat, "abc*", sizeof(pat)/2);
260 TEST_ASSERT(u_strcmp(pat, p) == 0);
261 TEST_ASSERT(len==(int32_t)strlen("abc*"));
262 }
374ca955
A
263
264 uregex_close(re);
265
266 /* TODO: Open with ParseError parameter */
267 }
268
269 /*
270 * clone
271 */
272 {
273 URegularExpression *clone1;
274 URegularExpression *clone2;
275 URegularExpression *clone3;
276 UChar testString1[30];
277 UChar testString2[30];
278 UBool result;
279
280
281 status = U_ZERO_ERROR;
282 re = uregex_openC("abc*", 0, 0, &status);
283 TEST_ASSERT_SUCCESS(status);
284 clone1 = uregex_clone(re, &status);
285 TEST_ASSERT_SUCCESS(status);
286 TEST_ASSERT(clone1 != NULL);
287
288 status = U_ZERO_ERROR;
289 clone2 = uregex_clone(re, &status);
290 TEST_ASSERT_SUCCESS(status);
291 TEST_ASSERT(clone2 != NULL);
292 uregex_close(re);
293
294 status = U_ZERO_ERROR;
295 clone3 = uregex_clone(clone2, &status);
296 TEST_ASSERT_SUCCESS(status);
297 TEST_ASSERT(clone3 != NULL);
298
299 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
300 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
301
302 status = U_ZERO_ERROR;
303 uregex_setText(clone1, testString1, -1, &status);
304 TEST_ASSERT_SUCCESS(status);
305 result = uregex_lookingAt(clone1, 0, &status);
306 TEST_ASSERT_SUCCESS(status);
307 TEST_ASSERT(result==TRUE);
308
309 status = U_ZERO_ERROR;
310 uregex_setText(clone2, testString2, -1, &status);
311 TEST_ASSERT_SUCCESS(status);
312 result = uregex_lookingAt(clone2, 0, &status);
313 TEST_ASSERT_SUCCESS(status);
314 TEST_ASSERT(result==FALSE);
315 result = uregex_find(clone2, 0, &status);
316 TEST_ASSERT_SUCCESS(status);
317 TEST_ASSERT(result==TRUE);
318
319 uregex_close(clone1);
320 uregex_close(clone2);
321 uregex_close(clone3);
322
323 }
324
325 /*
326 * pattern()
327 */
328 {
329 const UChar *resultPat;
330 int32_t resultLen;
331 u_uastrncpy(pat, "hello", sizeof(pat)/2);
332 status = U_ZERO_ERROR;
333 re = uregex_open(pat, -1, 0, NULL, &status);
334 resultPat = uregex_pattern(re, &resultLen, &status);
335 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
336
337 /* The TEST_ASSERT_SUCCESS above should change too... */
338 if (U_SUCCESS(status)) {
339 TEST_ASSERT(resultLen == -1);
340 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
341 }
342
374ca955
A
343 uregex_close(re);
344
345 status = U_ZERO_ERROR;
346 re = uregex_open(pat, 3, 0, NULL, &status);
347 resultPat = uregex_pattern(re, &resultLen, &status);
348 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
349 TEST_ASSERT_SUCCESS(status);
350
351 /* The TEST_ASSERT_SUCCESS above should change too... */
352 if (U_SUCCESS(status)) {
353 TEST_ASSERT(resultLen == 3);
354 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
355 TEST_ASSERT(u_strlen(resultPat) == 3);
356 }
357
374ca955
A
358 uregex_close(re);
359 }
360
361 /*
362 * flags()
363 */
364 {
365 int32_t t;
366
367 status = U_ZERO_ERROR;
368 re = uregex_open(pat, -1, 0, NULL, &status);
369 t = uregex_flags(re, &status);
370 TEST_ASSERT_SUCCESS(status);
371 TEST_ASSERT(t == 0);
372 uregex_close(re);
373
374 status = U_ZERO_ERROR;
375 re = uregex_open(pat, -1, 0, NULL, &status);
376 t = uregex_flags(re, &status);
377 TEST_ASSERT_SUCCESS(status);
378 TEST_ASSERT(t == 0);
379 uregex_close(re);
380
381 status = U_ZERO_ERROR;
382 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
383 t = uregex_flags(re, &status);
384 TEST_ASSERT_SUCCESS(status);
385 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
386 uregex_close(re);
387 }
388
389 /*
390 * setText() and lookingAt()
391 */
392 {
393 UChar text1[50];
394 UChar text2[50];
395 UBool result;
396
397 u_uastrncpy(text1, "abcccd", sizeof(text1)/2);
398 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
399 status = U_ZERO_ERROR;
400 u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
401 re = uregex_open(pat, -1, 0, NULL, &status);
402 TEST_ASSERT_SUCCESS(status);
403
404 /* Operation before doing a setText should fail... */
405 status = U_ZERO_ERROR;
406 uregex_lookingAt(re, 0, &status);
407 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
408
409 status = U_ZERO_ERROR;
410 uregex_setText(re, text1, -1, &status);
411 result = uregex_lookingAt(re, 0, &status);
412 TEST_ASSERT(result == TRUE);
413 TEST_ASSERT_SUCCESS(status);
414
415 status = U_ZERO_ERROR;
416 uregex_setText(re, text2, -1, &status);
417 result = uregex_lookingAt(re, 0, &status);
418 TEST_ASSERT(result == FALSE);
419 TEST_ASSERT_SUCCESS(status);
420
421 status = U_ZERO_ERROR;
422 uregex_setText(re, text1, -1, &status);
423 result = uregex_lookingAt(re, 0, &status);
424 TEST_ASSERT(result == TRUE);
425 TEST_ASSERT_SUCCESS(status);
426
427 status = U_ZERO_ERROR;
428 uregex_setText(re, text1, 5, &status);
429 result = uregex_lookingAt(re, 0, &status);
430 TEST_ASSERT(result == FALSE);
431 TEST_ASSERT_SUCCESS(status);
432
433 status = U_ZERO_ERROR;
434 uregex_setText(re, text1, 6, &status);
435 result = uregex_lookingAt(re, 0, &status);
436 TEST_ASSERT(result == TRUE);
437 TEST_ASSERT_SUCCESS(status);
438
439 uregex_close(re);
440 }
441
442
443 /*
444 * getText()
445 */
446 {
447 UChar text1[50];
448 UChar text2[50];
449 const UChar *result;
450 int32_t textLength;
451
452 u_uastrncpy(text1, "abcccd", sizeof(text1)/2);
453 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
454 status = U_ZERO_ERROR;
455 u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
456 re = uregex_open(pat, -1, 0, NULL, &status);
457
458 uregex_setText(re, text1, -1, &status);
459 result = uregex_getText(re, &textLength, &status);
460 TEST_ASSERT(result == text1);
461 TEST_ASSERT(textLength == -1);
462 TEST_ASSERT_SUCCESS(status);
463
464 status = U_ZERO_ERROR;
465 uregex_setText(re, text2, 7, &status);
466 result = uregex_getText(re, &textLength, &status);
467 TEST_ASSERT(result == text2);
468 TEST_ASSERT(textLength == 7);
469 TEST_ASSERT_SUCCESS(status);
470
471 status = U_ZERO_ERROR;
472 uregex_setText(re, text2, 4, &status);
473 result = uregex_getText(re, &textLength, &status);
474 TEST_ASSERT(result == text2);
475 TEST_ASSERT(textLength == 4);
476 TEST_ASSERT_SUCCESS(status);
477 uregex_close(re);
478 }
479
480 /*
481 * matches()
482 */
483 {
484 UChar text1[50];
485 UBool result;
486 int len;
487 UChar nullString[] = {0,0,0};
488
489 u_uastrncpy(text1, "abcccde", sizeof(text1)/2);
490 status = U_ZERO_ERROR;
491 u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
492 re = uregex_open(pat, -1, 0, NULL, &status);
493
494 uregex_setText(re, text1, -1, &status);
495 result = uregex_matches(re, 0, &status);
496 TEST_ASSERT(result == FALSE);
497 TEST_ASSERT_SUCCESS(status);
498
499 status = U_ZERO_ERROR;
500 uregex_setText(re, text1, 6, &status);
501 result = uregex_matches(re, 0, &status);
502 TEST_ASSERT(result == TRUE);
503 TEST_ASSERT_SUCCESS(status);
504
505 status = U_ZERO_ERROR;
506 uregex_setText(re, text1, 6, &status);
507 result = uregex_matches(re, 1, &status);
508 TEST_ASSERT(result == FALSE);
509 TEST_ASSERT_SUCCESS(status);
510 uregex_close(re);
511
512 status = U_ZERO_ERROR;
513 re = uregex_openC(".?", 0, NULL, &status);
514 uregex_setText(re, text1, -1, &status);
515 len = u_strlen(text1);
516 result = uregex_matches(re, len, &status);
517 TEST_ASSERT(result == TRUE);
518 TEST_ASSERT_SUCCESS(status);
519
520 status = U_ZERO_ERROR;
521 uregex_setText(re, nullString, -1, &status);
522 TEST_ASSERT_SUCCESS(status);
523 result = uregex_matches(re, 0, &status);
524 TEST_ASSERT(result == TRUE);
525 TEST_ASSERT_SUCCESS(status);
526 uregex_close(re);
527 }
528
529
530 /*
531 * lookingAt() Used in setText test.
532 */
533
534
535 /*
536 * find(), findNext, start, end, reset
537 */
538 {
539 UChar text1[50];
540 UBool result;
541 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2);
542 status = U_ZERO_ERROR;
543 re = uregex_openC("rx", 0, NULL, &status);
544
545 uregex_setText(re, text1, -1, &status);
546 result = uregex_find(re, 0, &status);
547 TEST_ASSERT(result == TRUE);
548 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
549 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
550 TEST_ASSERT_SUCCESS(status);
551
552 result = uregex_find(re, 9, &status);
553 TEST_ASSERT(result == TRUE);
554 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
555 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
556 TEST_ASSERT_SUCCESS(status);
557
558 result = uregex_find(re, 14, &status);
559 TEST_ASSERT(result == FALSE);
560 TEST_ASSERT_SUCCESS(status);
561
562 status = U_ZERO_ERROR;
563 uregex_reset(re, 0, &status);
564
565 result = uregex_findNext(re, &status);
566 TEST_ASSERT(result == TRUE);
567 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
568 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
569 TEST_ASSERT_SUCCESS(status);
570
571 result = uregex_findNext(re, &status);
572 TEST_ASSERT(result == TRUE);
573 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
574 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
575 TEST_ASSERT_SUCCESS(status);
576
577 status = U_ZERO_ERROR;
578 uregex_reset(re, 12, &status);
579
580 result = uregex_findNext(re, &status);
581 TEST_ASSERT(result == TRUE);
582 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
583 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
584 TEST_ASSERT_SUCCESS(status);
585
586 result = uregex_findNext(re, &status);
587 TEST_ASSERT(result == FALSE);
588 TEST_ASSERT_SUCCESS(status);
589
590 uregex_close(re);
591 }
592
593 /*
594 * groupCount
595 */
596 {
597 int32_t result;
598
599 status = U_ZERO_ERROR;
600 re = uregex_openC("abc", 0, NULL, &status);
601 result = uregex_groupCount(re, &status);
602 TEST_ASSERT_SUCCESS(status);
603 TEST_ASSERT(result == 0);
604 uregex_close(re);
605
606 status = U_ZERO_ERROR;
607 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
608 result = uregex_groupCount(re, &status);
609 TEST_ASSERT_SUCCESS(status);
610 TEST_ASSERT(result == 3);
611 uregex_close(re);
612
613 }
614
615
616 /*
617 * group()
618 */
619 {
620 UChar text1[80];
621 UChar buf[80];
622 UBool result;
623 int32_t resultSz;
624 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2);
625
626 status = U_ZERO_ERROR;
627 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
628 TEST_ASSERT_SUCCESS(status);
629
630
631 uregex_setText(re, text1, -1, &status);
632 result = uregex_find(re, 0, &status);
633 TEST_ASSERT(result==TRUE);
634
635 /* Capture Group 0, the full match. Should succeed. */
636 status = U_ZERO_ERROR;
637 resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
638 TEST_ASSERT_SUCCESS(status);
639 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
640 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
641
642 /* Capture group #1. Should succeed. */
643 status = U_ZERO_ERROR;
644 resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
645 TEST_ASSERT_SUCCESS(status);
646 TEST_ASSERT_STRING(" interior ", buf, TRUE);
647 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
648
649 /* Capture group out of range. Error. */
650 status = U_ZERO_ERROR;
651 uregex_group(re, 2, buf, sizeof(buf)/2, &status);
652 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
653
654 /* NULL buffer, pure pre-flight */
655 status = U_ZERO_ERROR;
656 resultSz = uregex_group(re, 0, NULL, 0, &status);
657 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
658 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
659
660 /* Too small buffer, truncated string */
661 status = U_ZERO_ERROR;
662 memset(buf, -1, sizeof(buf));
663 resultSz = uregex_group(re, 0, buf, 5, &status);
664 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
665 TEST_ASSERT_STRING("abc i", buf, FALSE);
666 TEST_ASSERT(buf[5] == (UChar)0xffff);
667 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
668
669 /* Output string just fits buffer, no NUL term. */
670 status = U_ZERO_ERROR;
671 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
672 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
673 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
674 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
675 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
676
677 uregex_close(re);
678
679 }
46f4442e
A
680
681 /*
682 * Regions
683 */
684
685
686 /* SetRegion(), getRegion() do something */
687 TEST_SETUP(".*", "0123456789ABCDEF", 0)
688 UChar resultString[40];
689 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
690 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
691 uregex_setRegion(re, 3, 6, &status);
692 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
693 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
694 TEST_ASSERT(uregex_findNext(re, &status));
695 TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
696 TEST_ASSERT_STRING("345", resultString, TRUE);
697 TEST_TEARDOWN;
698
699 /* find(start=-1) uses regions */
700 TEST_SETUP(".*", "0123456789ABCDEF", 0);
701 uregex_setRegion(re, 4, 6, &status);
702 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
703 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
704 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
705 TEST_TEARDOWN;
706
707 /* find (start >=0) does not use regions */
708 TEST_SETUP(".*", "0123456789ABCDEF", 0);
709 uregex_setRegion(re, 4, 6, &status);
710 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
711 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
712 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
713 TEST_TEARDOWN;
714
715 /* findNext() obeys regions */
716 TEST_SETUP(".", "0123456789ABCDEF", 0);
717 uregex_setRegion(re, 4, 6, &status);
718 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
719 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
720 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
721 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
722 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
723 TEST_TEARDOWN;
724
725 /* matches(start=-1) uses regions */
726 /* Also, verify that non-greedy *? succeeds in finding the full match. */
727 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
728 uregex_setRegion(re, 4, 6, &status);
729 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
730 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
731 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
732 TEST_TEARDOWN;
733
734 /* matches (start >=0) does not use regions */
735 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
736 uregex_setRegion(re, 4, 6, &status);
737 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
738 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
739 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
740 TEST_TEARDOWN;
741
742 /* lookingAt(start=-1) uses regions */
743 /* Also, verify that non-greedy *? finds the first (shortest) match. */
744 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
745 uregex_setRegion(re, 4, 6, &status);
746 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
747 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
748 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
749 TEST_TEARDOWN;
750
751 /* lookingAt (start >=0) does not use regions */
752 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
753 uregex_setRegion(re, 4, 6, &status);
754 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
755 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
756 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
757 TEST_TEARDOWN;
758
759 /* hitEnd() */
760 TEST_SETUP("[a-f]*", "abcdefghij", 0);
761 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
762 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
763 TEST_TEARDOWN;
764
765 TEST_SETUP("[a-f]*", "abcdef", 0);
766 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
767 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
768 TEST_TEARDOWN;
769
770 /* requireEnd */
771 TEST_SETUP("abcd", "abcd", 0);
772 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
773 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
774 TEST_TEARDOWN;
775
776 TEST_SETUP("abcd$", "abcd", 0);
777 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
778 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
779 TEST_TEARDOWN;
780
781 /* anchoringBounds */
782 TEST_SETUP("abc$", "abcdef", 0);
783 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
784 uregex_useAnchoringBounds(re, FALSE, &status);
785 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
786
787 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
788 uregex_useAnchoringBounds(re, TRUE, &status);
789 uregex_setRegion(re, 0, 3, &status);
790 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
791 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
792 TEST_TEARDOWN;
793
794 /* Transparent Bounds */
795 TEST_SETUP("abc(?=def)", "abcdef", 0);
796 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
797 uregex_useTransparentBounds(re, TRUE, &status);
798 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
799
800 uregex_useTransparentBounds(re, FALSE, &status);
801 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
802 uregex_setRegion(re, 0, 3, &status);
803 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
804 uregex_useTransparentBounds(re, TRUE, &status);
805 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
806 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
807 TEST_TEARDOWN;
808
374ca955
A
809
810 /*
811 * replaceFirst()
812 */
813 {
814 UChar text1[80];
815 UChar text2[80];
816 UChar replText[80];
817 UChar buf[80];
818 int32_t resultSz;
819 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
820 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
821 u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
822
823 status = U_ZERO_ERROR;
824 re = uregex_openC("x(.*?)x", 0, NULL, &status);
825 TEST_ASSERT_SUCCESS(status);
826
827 /* Normal case, with match */
828 uregex_setText(re, text1, -1, &status);
829 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
830 TEST_ASSERT_SUCCESS(status);
831 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
832 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
833
834 /* No match. Text should copy to output with no changes. */
835 status = U_ZERO_ERROR;
836 uregex_setText(re, text2, -1, &status);
837 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
838 TEST_ASSERT_SUCCESS(status);
839 TEST_ASSERT_STRING("No match here.", buf, TRUE);
840 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
841
842 /* Match, output just fills buffer, no termination warning. */
843 status = U_ZERO_ERROR;
844 uregex_setText(re, text1, -1, &status);
845 memset(buf, -1, sizeof(buf));
846 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
847 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
848 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
849 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
850 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
851
852 /* Do the replaceFirst again, without first resetting anything.
853 * Should give the same results.
854 */
855 status = U_ZERO_ERROR;
856 memset(buf, -1, sizeof(buf));
857 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
858 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
859 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
860 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
861 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
862
863 /* NULL buffer, zero buffer length */
864 status = U_ZERO_ERROR;
865 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
866 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
867 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
868
869 /* Buffer too small by one */
870 status = U_ZERO_ERROR;
871 memset(buf, -1, sizeof(buf));
872 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
873 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
874 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
875 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
876 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
877
878 uregex_close(re);
879 }
880
881
882 /*
883 * replaceAll()
884 */
885 {
729e4ab9
A
886 UChar text1[80]; /* "Replace xaax x1x x...x." */
887 UChar text2[80]; /* "No match Here" */
888 UChar replText[80]; /* "<$1>" */
889 UChar replText2[80]; /* "<<$1>>" */
890 const char * pattern = "x(.*?)x";
891 const char * expectedResult = "Replace <aa> <1> <...>.";
892 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
374ca955 893 UChar buf[80];
729e4ab9 894 int32_t resultSize;
374ca955 895 int32_t expectedResultSize;
729e4ab9 896 int32_t expectedResultSize2;
374ca955
A
897 int32_t i;
898
899 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
900 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
901 u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
729e4ab9
A
902 u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
903 expectedResultSize = strlen(expectedResult);
904 expectedResultSize2 = strlen(expectedResult2);
374ca955
A
905
906 status = U_ZERO_ERROR;
729e4ab9 907 re = uregex_openC(pattern, 0, NULL, &status);
374ca955
A
908 TEST_ASSERT_SUCCESS(status);
909
910 /* Normal case, with match */
911 uregex_setText(re, text1, -1, &status);
729e4ab9 912 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
374ca955 913 TEST_ASSERT_SUCCESS(status);
729e4ab9
A
914 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
915 TEST_ASSERT(resultSize == expectedResultSize);
374ca955
A
916
917 /* No match. Text should copy to output with no changes. */
918 status = U_ZERO_ERROR;
919 uregex_setText(re, text2, -1, &status);
729e4ab9 920 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
374ca955
A
921 TEST_ASSERT_SUCCESS(status);
922 TEST_ASSERT_STRING("No match here.", buf, TRUE);
729e4ab9 923 TEST_ASSERT(resultSize == u_strlen(text2));
374ca955
A
924
925 /* Match, output just fills buffer, no termination warning. */
926 status = U_ZERO_ERROR;
927 uregex_setText(re, text1, -1, &status);
928 memset(buf, -1, sizeof(buf));
729e4ab9 929 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
374ca955 930 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
729e4ab9
A
931 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
932 TEST_ASSERT(resultSize == expectedResultSize);
933 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
374ca955
A
934
935 /* Do the replaceFirst again, without first resetting anything.
936 * Should give the same results.
937 */
938 status = U_ZERO_ERROR;
939 memset(buf, -1, sizeof(buf));
729e4ab9 940 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
374ca955
A
941 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
942 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
729e4ab9
A
943 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
944 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
374ca955
A
945
946 /* NULL buffer, zero buffer length */
947 status = U_ZERO_ERROR;
729e4ab9 948 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
374ca955 949 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
729e4ab9 950 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
374ca955
A
951
952 /* Buffer too small. Try every size, which will tickle edge cases
953 * in uregex_appendReplacement (used by replaceAll) */
954 for (i=0; i<expectedResultSize; i++) {
955 char expected[80];
956 status = U_ZERO_ERROR;
957 memset(buf, -1, sizeof(buf));
729e4ab9
A
958 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
959 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
960 strcpy(expected, expectedResult);
961 expected[i] = 0;
962 TEST_ASSERT_STRING(expected, buf, FALSE);
963 TEST_ASSERT(resultSize == expectedResultSize);
964 TEST_ASSERT(buf[i] == (UChar)0xffff);
965 }
966
967 /* Buffer too small. Same as previous test, except this time the replacement
968 * text is longer than the match capture group, making the length of the complete
969 * replacement longer than the original string.
970 */
971 for (i=0; i<expectedResultSize2; i++) {
972 char expected[80];
973 status = U_ZERO_ERROR;
974 memset(buf, -1, sizeof(buf));
975 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
374ca955 976 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
729e4ab9 977 strcpy(expected, expectedResult2);
374ca955
A
978 expected[i] = 0;
979 TEST_ASSERT_STRING(expected, buf, FALSE);
729e4ab9 980 TEST_ASSERT(resultSize == expectedResultSize2);
374ca955
A
981 TEST_ASSERT(buf[i] == (UChar)0xffff);
982 }
983
729e4ab9 984
374ca955
A
985 uregex_close(re);
986 }
987
988
989 /*
990 * appendReplacement()
991 */
992 {
993 UChar text[100];
994 UChar repl[100];
995 UChar buf[100];
996 UChar *bufPtr;
997 int32_t bufCap;
998
999
1000 status = U_ZERO_ERROR;
1001 re = uregex_openC(".*", 0, 0, &status);
1002 TEST_ASSERT_SUCCESS(status);
1003
1004 u_uastrncpy(text, "whatever", sizeof(text)/2);
1005 u_uastrncpy(repl, "some other", sizeof(repl)/2);
1006 uregex_setText(re, text, -1, &status);
1007
1008 /* match covers whole target string */
1009 uregex_find(re, 0, &status);
1010 TEST_ASSERT_SUCCESS(status);
1011 bufPtr = buf;
1012 bufCap = sizeof(buf) / 2;
1013 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1014 TEST_ASSERT_SUCCESS(status);
1015 TEST_ASSERT_STRING("some other", buf, TRUE);
1016
1017 /* Match has \u \U escapes */
1018 uregex_find(re, 0, &status);
1019 TEST_ASSERT_SUCCESS(status);
1020 bufPtr = buf;
1021 bufCap = sizeof(buf) / 2;
1022 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1023 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1024 TEST_ASSERT_SUCCESS(status);
1025 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1026
729e4ab9
A
1027 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1028 status = U_ZERO_ERROR;
1029 uregex_find(re, 0, &status);
1030 TEST_ASSERT_SUCCESS(status);
1031 bufPtr = buf;
1032 status = U_BUFFER_OVERFLOW_ERROR;
1033 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1034 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1035
374ca955
A
1036 uregex_close(re);
1037 }
1038
1039
1040 /*
1041 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1042 */
1043
1044 /*
1045 * split()
1046 */
1047 {
1048 UChar textToSplit[80];
1049 UChar text2[80];
1050 UChar buf[200];
1051 UChar *fields[10];
1052 int32_t numFields;
1053 int32_t requiredCapacity;
1054 int32_t spaceNeeded;
1055 int32_t sz;
1056
1057 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2);
1058 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
1059
1060 status = U_ZERO_ERROR;
1061 re = uregex_openC(":", 0, NULL, &status);
1062
1063
1064 /* Simple split */
1065
1066 uregex_setText(re, textToSplit, -1, &status);
1067 TEST_ASSERT_SUCCESS(status);
1068
73c04bcf
A
1069 /* The TEST_ASSERT_SUCCESS call above should change too... */
1070 if (U_SUCCESS(status)) {
1071 memset(fields, -1, sizeof(fields));
1072 numFields =
1073 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1074 TEST_ASSERT_SUCCESS(status);
374ca955 1075
73c04bcf
A
1076 /* The TEST_ASSERT_SUCCESS call above should change too... */
1077 if(U_SUCCESS(status)) {
1078 TEST_ASSERT(numFields == 3);
1079 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1080 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1081 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1082 TEST_ASSERT(fields[3] == NULL);
1083
1084 spaceNeeded = u_strlen(textToSplit) -
1085 (numFields - 1) + /* Field delimiters do not appear in output */
1086 numFields; /* Each field gets a NUL terminator */
1087
1088 TEST_ASSERT(spaceNeeded == requiredCapacity);
1089 }
1090 }
374ca955 1091
374ca955
A
1092 uregex_close(re);
1093
1094
1095 /* Split with too few output strings available */
1096 status = U_ZERO_ERROR;
1097 re = uregex_openC(":", 0, NULL, &status);
1098 uregex_setText(re, textToSplit, -1, &status);
1099 TEST_ASSERT_SUCCESS(status);
1100
73c04bcf
A
1101 /* The TEST_ASSERT_SUCCESS call above should change too... */
1102 if(U_SUCCESS(status)) {
374ca955 1103 memset(fields, -1, sizeof(fields));
374ca955 1104 numFields =
73c04bcf
A
1105 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1106 TEST_ASSERT_SUCCESS(status);
1107
1108 /* The TEST_ASSERT_SUCCESS call above should change too... */
1109 if(U_SUCCESS(status)) {
1110 TEST_ASSERT(numFields == 2);
374ca955 1111 TEST_ASSERT_STRING("first ", fields[0], TRUE);
73c04bcf
A
1112 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1113 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1114
1115 spaceNeeded = u_strlen(textToSplit) -
1116 (numFields - 1) + /* Field delimiters do not appear in output */
1117 numFields; /* Each field gets a NUL terminator */
1118
1119 TEST_ASSERT(spaceNeeded == requiredCapacity);
1120
1121 /* Split with a range of output buffer sizes. */
1122 spaceNeeded = u_strlen(textToSplit) -
1123 (numFields - 1) + /* Field delimiters do not appear in output */
1124 numFields; /* Each field gets a NUL terminator */
1125
1126 for (sz=0; sz < spaceNeeded+1; sz++) {
1127 memset(fields, -1, sizeof(fields));
1128 status = U_ZERO_ERROR;
1129 numFields =
1130 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1131 if (sz >= spaceNeeded) {
1132 TEST_ASSERT_SUCCESS(status);
1133 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1134 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1135 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1136 } else {
1137 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1138 }
1139 TEST_ASSERT(numFields == 3);
1140 TEST_ASSERT(fields[3] == NULL);
1141 TEST_ASSERT(spaceNeeded == requiredCapacity);
1142 }
374ca955 1143 }
374ca955 1144 }
73c04bcf 1145
374ca955
A
1146 uregex_close(re);
1147 }
1148
1149
1150
1151
1152 /* Split(), part 2. Patterns with capture groups. The capture group text
1153 * comes out as additional fields. */
1154 {
1155 UChar textToSplit[80];
1156 UChar buf[200];
1157 UChar *fields[10];
1158 int32_t numFields;
1159 int32_t requiredCapacity;
1160 int32_t spaceNeeded;
1161 int32_t sz;
1162
1163 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2);
1164
1165 status = U_ZERO_ERROR;
1166 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1167
1168 uregex_setText(re, textToSplit, -1, &status);
1169 TEST_ASSERT_SUCCESS(status);
1170
73c04bcf
A
1171 /* The TEST_ASSERT_SUCCESS call above should change too... */
1172 if(U_SUCCESS(status)) {
1173 memset(fields, -1, sizeof(fields));
1174 numFields =
1175 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1176 TEST_ASSERT_SUCCESS(status);
374ca955 1177
73c04bcf
A
1178 /* The TEST_ASSERT_SUCCESS call above should change too... */
1179 if(U_SUCCESS(status)) {
1180 TEST_ASSERT(numFields == 5);
1181 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1182 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1183 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1184 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1185 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1186 TEST_ASSERT(fields[5] == NULL);
1187 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1188 TEST_ASSERT(spaceNeeded == requiredCapacity);
1189 }
1190 }
374ca955
A
1191
1192 /* Split with too few output strings available (2) */
1193 status = U_ZERO_ERROR;
1194 memset(fields, -1, sizeof(fields));
1195 numFields =
1196 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1197 TEST_ASSERT_SUCCESS(status);
374ca955 1198
73c04bcf
A
1199 /* The TEST_ASSERT_SUCCESS call above should change too... */
1200 if(U_SUCCESS(status)) {
1201 TEST_ASSERT(numFields == 2);
1202 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1203 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1204 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1205
1206 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */
1207 TEST_ASSERT(spaceNeeded == requiredCapacity);
1208 }
374ca955
A
1209
1210 /* Split with too few output strings available (3) */
1211 status = U_ZERO_ERROR;
1212 memset(fields, -1, sizeof(fields));
1213 numFields =
1214 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
1215 TEST_ASSERT_SUCCESS(status);
374ca955 1216
73c04bcf
A
1217 /* The TEST_ASSERT_SUCCESS call above should change too... */
1218 if(U_SUCCESS(status)) {
1219 TEST_ASSERT(numFields == 3);
1220 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1221 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1222 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1223 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1224
1225 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1226 TEST_ASSERT(spaceNeeded == requiredCapacity);
1227 }
374ca955
A
1228
1229 /* Split with just enough output strings available (5) */
1230 status = U_ZERO_ERROR;
1231 memset(fields, -1, sizeof(fields));
1232 numFields =
1233 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
1234 TEST_ASSERT_SUCCESS(status);
374ca955 1235
73c04bcf
A
1236 /* The TEST_ASSERT_SUCCESS call above should change too... */
1237 if(U_SUCCESS(status)) {
1238 TEST_ASSERT(numFields == 5);
1239 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1240 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1241 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1242 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1243 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1244 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
374ca955 1245
73c04bcf
A
1246 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1247 TEST_ASSERT(spaceNeeded == requiredCapacity);
1248 }
374ca955
A
1249
1250 /* Split, end of text is a field delimiter. */
1251 status = U_ZERO_ERROR;
1252 sz = strlen("first <tag-a> second<tag-b>");
1253 uregex_setText(re, textToSplit, sz, &status);
1254 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
1255
1256 /* The TEST_ASSERT_SUCCESS call above should change too... */
1257 if(U_SUCCESS(status)) {
1258 memset(fields, -1, sizeof(fields));
1259 numFields =
1260 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
1261 TEST_ASSERT_SUCCESS(status);
1262
1263 /* The TEST_ASSERT_SUCCESS call above should change too... */
1264 if(U_SUCCESS(status)) {
4388f060 1265 TEST_ASSERT(numFields == 5);
73c04bcf
A
1266 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1267 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1268 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1269 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
4388f060
A
1270 TEST_ASSERT_STRING("", fields[4], TRUE);
1271 TEST_ASSERT(fields[5] == NULL);
73c04bcf
A
1272 TEST_ASSERT(fields[8] == NULL);
1273 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
4388f060 1274 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
73c04bcf
A
1275 TEST_ASSERT(spaceNeeded == requiredCapacity);
1276 }
1277 }
374ca955
A
1278
1279 uregex_close(re);
1280 }
1281
46f4442e
A
1282 /*
1283 * set/getTimeLimit
1284 */
1285 TEST_SETUP("abc$", "abcdef", 0);
1286 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1287 uregex_setTimeLimit(re, 1000, &status);
1288 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1289 TEST_ASSERT_SUCCESS(status);
1290 uregex_setTimeLimit(re, -1, &status);
1291 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1292 status = U_ZERO_ERROR;
1293 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1294 TEST_TEARDOWN;
1295
1296 /*
1297 * set/get Stack Limit
1298 */
1299 TEST_SETUP("abc$", "abcdef", 0);
1300 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1301 uregex_setStackLimit(re, 40000, &status);
1302 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1303 TEST_ASSERT_SUCCESS(status);
1304 uregex_setStackLimit(re, -1, &status);
1305 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1306 status = U_ZERO_ERROR;
1307 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1308 TEST_TEARDOWN;
1309
1310
1311 /*
1312 * Get/Set callback functions
1313 * This test is copied from intltest regex/Callbacks
1314 * The pattern and test data will run long enough to cause the callback
1315 * to be invoked. The nested '+' operators give exponential time
1316 * behavior with increasing string length.
1317 */
1318 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1319 callBackContext cbInfo = {4, 0, 0};
1320 const void *pContext = &cbInfo;
1321 URegexMatchCallback *returnedFn = &TestCallbackFn;
1322
1323 /* Getting the callback fn when it hasn't been set must return NULL */
1324 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1325 TEST_ASSERT_SUCCESS(status);
1326 TEST_ASSERT(returnedFn == NULL);
1327 TEST_ASSERT(pContext == NULL);
1328
1329 /* Set thecallback and do a match. */
1330 /* The callback function should record that it has been called. */
1331 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1332 TEST_ASSERT_SUCCESS(status);
1333 TEST_ASSERT(cbInfo.numCalls == 0);
1334 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1335 TEST_ASSERT_SUCCESS(status);
1336 TEST_ASSERT(cbInfo.numCalls > 0);
1337
1338 /* Getting the callback should return the values that were set above. */
1339 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1340 TEST_ASSERT(returnedFn == &TestCallbackFn);
1341 TEST_ASSERT(pContext == &cbInfo);
1342
1343 TEST_TEARDOWN;
374ca955
A
1344}
1345
46f4442e
A
1346
1347
73c04bcf
A
1348static void TestBug4315(void) {
1349 UErrorCode theICUError = U_ZERO_ERROR;
1350 URegularExpression *theRegEx;
1351 UChar *textBuff;
1352 const char *thePattern;
1353 UChar theString[100];
1354 UChar *destFields[24];
1355 int32_t neededLength1;
1356 int32_t neededLength2;
1357
1358 int32_t wordCount = 0;
1359 int32_t destFieldsSize = 24;
1360
1361 thePattern = "ck ";
1362 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1363
1364 /* open a regex */
1365 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1366 TEST_ASSERT_SUCCESS(theICUError);
1367
1368 /* set the input string */
1369 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1370 TEST_ASSERT_SUCCESS(theICUError);
1371
1372 /* split */
1373 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1374 * error occurs! */
1375 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1376 destFieldsSize, &theICUError);
1377
1378 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1379 TEST_ASSERT(wordCount==3);
1380
1381 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1382 {
1383 theICUError = U_ZERO_ERROR;
1384 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1385 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1386 destFields, destFieldsSize, &theICUError);
1387 TEST_ASSERT(wordCount==3);
1388 TEST_ASSERT_SUCCESS(theICUError);
1389 TEST_ASSERT(neededLength1 == neededLength2);
1390 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1391 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1392 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1393 TEST_ASSERT(destFields[3] == NULL);
1394 free(textBuff);
1395 }
1396 uregex_close(theRegEx);
1397}
1398
729e4ab9
A
1399/* Based on TestRegexCAPI() */
1400static void TestUTextAPI(void) {
1401 UErrorCode status = U_ZERO_ERROR;
1402 URegularExpression *re;
1403 UText patternText = UTEXT_INITIALIZER;
1404 UChar pat[200];
1405 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1406
1407 /* Mimimalist open/close */
1408 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1409 re = uregex_openUText(&patternText, 0, 0, &status);
1410 if (U_FAILURE(status)) {
1411 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1412 utext_close(&patternText);
1413 return;
1414 }
1415 uregex_close(re);
1416
1417 /* Open with all flag values set */
1418 status = U_ZERO_ERROR;
1419 re = uregex_openUText(&patternText,
1420 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1421 0, &status);
1422 TEST_ASSERT_SUCCESS(status);
1423 uregex_close(re);
1424
1425 /* Open with an invalid flag */
1426 status = U_ZERO_ERROR;
1427 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1428 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1429 uregex_close(re);
1430
1431 /* open with an invalid parameter */
1432 status = U_ZERO_ERROR;
1433 re = uregex_openUText(NULL,
1434 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1435 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1436
1437 /*
1438 * clone
1439 */
1440 {
1441 URegularExpression *clone1;
1442 URegularExpression *clone2;
1443 URegularExpression *clone3;
1444 UChar testString1[30];
1445 UChar testString2[30];
1446 UBool result;
1447
1448
1449 status = U_ZERO_ERROR;
1450 re = uregex_openUText(&patternText, 0, 0, &status);
1451 TEST_ASSERT_SUCCESS(status);
1452 clone1 = uregex_clone(re, &status);
1453 TEST_ASSERT_SUCCESS(status);
1454 TEST_ASSERT(clone1 != NULL);
1455
1456 status = U_ZERO_ERROR;
1457 clone2 = uregex_clone(re, &status);
1458 TEST_ASSERT_SUCCESS(status);
1459 TEST_ASSERT(clone2 != NULL);
1460 uregex_close(re);
1461
1462 status = U_ZERO_ERROR;
1463 clone3 = uregex_clone(clone2, &status);
1464 TEST_ASSERT_SUCCESS(status);
1465 TEST_ASSERT(clone3 != NULL);
1466
1467 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
1468 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
1469
1470 status = U_ZERO_ERROR;
1471 uregex_setText(clone1, testString1, -1, &status);
1472 TEST_ASSERT_SUCCESS(status);
1473 result = uregex_lookingAt(clone1, 0, &status);
1474 TEST_ASSERT_SUCCESS(status);
1475 TEST_ASSERT(result==TRUE);
1476
1477 status = U_ZERO_ERROR;
1478 uregex_setText(clone2, testString2, -1, &status);
1479 TEST_ASSERT_SUCCESS(status);
1480 result = uregex_lookingAt(clone2, 0, &status);
1481 TEST_ASSERT_SUCCESS(status);
1482 TEST_ASSERT(result==FALSE);
1483 result = uregex_find(clone2, 0, &status);
1484 TEST_ASSERT_SUCCESS(status);
1485 TEST_ASSERT(result==TRUE);
1486
1487 uregex_close(clone1);
1488 uregex_close(clone2);
1489 uregex_close(clone3);
1490
1491 }
1492
1493 /*
1494 * pattern() and patternText()
1495 */
1496 {
1497 const UChar *resultPat;
1498 int32_t resultLen;
1499 UText *resultText;
1500 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1501 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1502 u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
1503 status = U_ZERO_ERROR;
1504
1505 utext_openUTF8(&patternText, str_hello, -1, &status);
1506 re = uregex_open(pat, -1, 0, NULL, &status);
1507 resultPat = uregex_pattern(re, &resultLen, &status);
1508 TEST_ASSERT_SUCCESS(status);
1509
1510 /* The TEST_ASSERT_SUCCESS above should change too... */
1511 if (U_SUCCESS(status)) {
1512 TEST_ASSERT(resultLen == -1);
1513 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1514 }
1515
1516 resultText = uregex_patternUText(re, &status);
1517 TEST_ASSERT_SUCCESS(status);
1518 TEST_ASSERT_UTEXT(str_hello, resultText);
1519
1520 uregex_close(re);
1521
1522 status = U_ZERO_ERROR;
1523 re = uregex_open(pat, 3, 0, NULL, &status);
1524 resultPat = uregex_pattern(re, &resultLen, &status);
1525 TEST_ASSERT_SUCCESS(status);
1526
1527 /* The TEST_ASSERT_SUCCESS above should change too... */
1528 if (U_SUCCESS(status)) {
1529 TEST_ASSERT(resultLen == 3);
1530 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1531 TEST_ASSERT(u_strlen(resultPat) == 3);
1532 }
1533
1534 resultText = uregex_patternUText(re, &status);
1535 TEST_ASSERT_SUCCESS(status);
1536 TEST_ASSERT_UTEXT(str_hel, resultText);
1537
1538 uregex_close(re);
1539 }
1540
1541 /*
1542 * setUText() and lookingAt()
1543 */
1544 {
1545 UText text1 = UTEXT_INITIALIZER;
1546 UText text2 = UTEXT_INITIALIZER;
1547 UBool result;
1548 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1549 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1550 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1551 status = U_ZERO_ERROR;
1552 utext_openUTF8(&text1, str_abcccd, -1, &status);
1553 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1554
1555 utext_openUTF8(&patternText, str_abcd, -1, &status);
1556 re = uregex_openUText(&patternText, 0, NULL, &status);
1557 TEST_ASSERT_SUCCESS(status);
1558
1559 /* Operation before doing a setText should fail... */
1560 status = U_ZERO_ERROR;
1561 uregex_lookingAt(re, 0, &status);
1562 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1563
1564 status = U_ZERO_ERROR;
1565 uregex_setUText(re, &text1, &status);
1566 result = uregex_lookingAt(re, 0, &status);
1567 TEST_ASSERT(result == TRUE);
1568 TEST_ASSERT_SUCCESS(status);
1569
1570 status = U_ZERO_ERROR;
1571 uregex_setUText(re, &text2, &status);
1572 result = uregex_lookingAt(re, 0, &status);
1573 TEST_ASSERT(result == FALSE);
1574 TEST_ASSERT_SUCCESS(status);
1575
1576 status = U_ZERO_ERROR;
1577 uregex_setUText(re, &text1, &status);
1578 result = uregex_lookingAt(re, 0, &status);
1579 TEST_ASSERT(result == TRUE);
1580 TEST_ASSERT_SUCCESS(status);
1581
1582 uregex_close(re);
1583 utext_close(&text1);
1584 utext_close(&text2);
1585 }
1586
1587
1588 /*
1589 * getText() and getUText()
1590 */
1591 {
1592 UText text1 = UTEXT_INITIALIZER;
1593 UText text2 = UTEXT_INITIALIZER;
1594 UChar text2Chars[20];
1595 UText *resultText;
1596 const UChar *result;
1597 int32_t textLength;
1598 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1599 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1600 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1601
1602
1603 status = U_ZERO_ERROR;
1604 utext_openUTF8(&text1, str_abcccd, -1, &status);
1605 u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
1606 utext_openUChars(&text2, text2Chars, -1, &status);
1607
1608 utext_openUTF8(&patternText, str_abcd, -1, &status);
1609 re = uregex_openUText(&patternText, 0, NULL, &status);
1610
1611 /* First set a UText */
1612 uregex_setUText(re, &text1, &status);
1613 resultText = uregex_getUText(re, NULL, &status);
1614 TEST_ASSERT_SUCCESS(status);
1615 TEST_ASSERT(resultText != &text1);
1616 utext_setNativeIndex(resultText, 0);
1617 utext_setNativeIndex(&text1, 0);
4388f060 1618 TEST_ASSERT(testUTextEqual(resultText, &text1));
729e4ab9
A
1619 utext_close(resultText);
1620
1621 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1622 TEST_ASSERT(textLength == -1 || textLength == 6);
1623 resultText = uregex_getUText(re, NULL, &status);
1624 TEST_ASSERT_SUCCESS(status);
1625 TEST_ASSERT(resultText != &text1);
1626 utext_setNativeIndex(resultText, 0);
1627 utext_setNativeIndex(&text1, 0);
4388f060 1628 TEST_ASSERT(testUTextEqual(resultText, &text1));
729e4ab9
A
1629 utext_close(resultText);
1630
1631 /* Then set a UChar * */
1632 uregex_setText(re, text2Chars, 7, &status);
1633 resultText = uregex_getUText(re, NULL, &status);
1634 TEST_ASSERT_SUCCESS(status);
1635 utext_setNativeIndex(resultText, 0);
1636 utext_setNativeIndex(&text2, 0);
4388f060 1637 TEST_ASSERT(testUTextEqual(resultText, &text2));
729e4ab9
A
1638 utext_close(resultText);
1639 result = uregex_getText(re, &textLength, &status);
1640 TEST_ASSERT(textLength == 7);
1641
1642 uregex_close(re);
1643 utext_close(&text1);
1644 utext_close(&text2);
1645 }
1646
1647 /*
1648 * matches()
1649 */
1650 {
1651 UText text1 = UTEXT_INITIALIZER;
1652 UBool result;
1653 UText nullText = UTEXT_INITIALIZER;
1654 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1655 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1656
1657 status = U_ZERO_ERROR;
1658 utext_openUTF8(&text1, str_abcccde, -1, &status);
1659 utext_openUTF8(&patternText, str_abcd, -1, &status);
1660 re = uregex_openUText(&patternText, 0, NULL, &status);
1661
1662 uregex_setUText(re, &text1, &status);
1663 result = uregex_matches(re, 0, &status);
1664 TEST_ASSERT(result == FALSE);
1665 TEST_ASSERT_SUCCESS(status);
1666 uregex_close(re);
1667
1668 status = U_ZERO_ERROR;
1669 re = uregex_openC(".?", 0, NULL, &status);
1670 uregex_setUText(re, &text1, &status);
1671 result = uregex_matches(re, 7, &status);
1672 TEST_ASSERT(result == TRUE);
1673 TEST_ASSERT_SUCCESS(status);
1674
1675 status = U_ZERO_ERROR;
1676 utext_openUTF8(&nullText, "", -1, &status);
1677 uregex_setUText(re, &nullText, &status);
1678 TEST_ASSERT_SUCCESS(status);
1679 result = uregex_matches(re, 0, &status);
1680 TEST_ASSERT(result == TRUE);
1681 TEST_ASSERT_SUCCESS(status);
1682
1683 uregex_close(re);
1684 utext_close(&text1);
1685 utext_close(&nullText);
1686 }
1687
1688
1689 /*
1690 * lookingAt() Used in setText test.
1691 */
1692
1693
1694 /*
1695 * find(), findNext, start, end, reset
1696 */
1697 {
1698 UChar text1[50];
1699 UBool result;
1700 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2);
1701 status = U_ZERO_ERROR;
1702 re = uregex_openC("rx", 0, NULL, &status);
1703
1704 uregex_setText(re, text1, -1, &status);
1705 result = uregex_find(re, 0, &status);
1706 TEST_ASSERT(result == TRUE);
1707 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1708 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1709 TEST_ASSERT_SUCCESS(status);
1710
1711 result = uregex_find(re, 9, &status);
1712 TEST_ASSERT(result == TRUE);
1713 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1714 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1715 TEST_ASSERT_SUCCESS(status);
1716
1717 result = uregex_find(re, 14, &status);
1718 TEST_ASSERT(result == FALSE);
1719 TEST_ASSERT_SUCCESS(status);
1720
1721 status = U_ZERO_ERROR;
1722 uregex_reset(re, 0, &status);
1723
1724 result = uregex_findNext(re, &status);
1725 TEST_ASSERT(result == TRUE);
1726 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1727 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1728 TEST_ASSERT_SUCCESS(status);
1729
1730 result = uregex_findNext(re, &status);
1731 TEST_ASSERT(result == TRUE);
1732 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1733 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1734 TEST_ASSERT_SUCCESS(status);
1735
1736 status = U_ZERO_ERROR;
1737 uregex_reset(re, 12, &status);
1738
1739 result = uregex_findNext(re, &status);
1740 TEST_ASSERT(result == TRUE);
1741 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1742 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1743 TEST_ASSERT_SUCCESS(status);
1744
1745 result = uregex_findNext(re, &status);
1746 TEST_ASSERT(result == FALSE);
1747 TEST_ASSERT_SUCCESS(status);
1748
1749 uregex_close(re);
1750 }
1751
1752 /*
1753 * group()
1754 */
1755 {
1756 UChar text1[80];
1757 UText *actual;
1758 UBool result;
1759
1760 const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
1761 const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
1762
1763
1764 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2);
1765
1766 status = U_ZERO_ERROR;
1767 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1768 TEST_ASSERT_SUCCESS(status);
1769
1770 uregex_setText(re, text1, -1, &status);
1771 result = uregex_find(re, 0, &status);
1772 TEST_ASSERT(result==TRUE);
1773
1774 /* Capture Group 0, the full match. Should succeed. */
1775 status = U_ZERO_ERROR;
1776 actual = uregex_groupUTextDeep(re, 0, NULL, &status);
1777 TEST_ASSERT_SUCCESS(status);
1778 TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
1779 utext_close(actual);
1780
1781 /* Capture Group 0 with shallow clone API. Should succeed. */
1782 status = U_ZERO_ERROR;
1783 {
1784 int64_t group_len;
1785 int32_t len16;
1786 UErrorCode shallowStatus = U_ZERO_ERROR;
1787 int64_t nativeIndex;
1788 UChar *groupChars;
1789 UText groupText = UTEXT_INITIALIZER;
1790
1791 actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
1792 TEST_ASSERT_SUCCESS(status);
1793
1794 nativeIndex = utext_getNativeIndex(actual);
1795 /* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */
1796 /* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */
4388f060 1797 len16 = (int32_t)group_len;
729e4ab9
A
1798
1799 groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
1800 utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
1801
1802 utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
1803
1804 TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
1805 utext_close(&groupText);
1806 free(groupChars);
1807 }
1808 utext_close(actual);
1809
1810 /* Capture group #1. Should succeed. */
1811 status = U_ZERO_ERROR;
1812 actual = uregex_groupUTextDeep(re, 1, NULL, &status);
1813 TEST_ASSERT_SUCCESS(status);
1814 TEST_ASSERT_UTEXT(str_interior, actual);
1815 utext_close(actual);
1816
1817 /* Capture group out of range. Error. */
1818 status = U_ZERO_ERROR;
1819 actual = uregex_groupUTextDeep(re, 2, NULL, &status);
1820 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1821 TEST_ASSERT(utext_nativeLength(actual) == 0);
1822 utext_close(actual);
1823
1824 uregex_close(re);
1825
1826 }
1827
1828 /*
1829 * replaceFirst()
1830 */
1831 {
1832 UChar text1[80];
1833 UChar text2[80];
1834 UText replText = UTEXT_INITIALIZER;
1835 UText *result;
1836 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1837 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1838 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
1839 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1840 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1841 status = U_ZERO_ERROR;
1842 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
1843 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
1844 utext_openUTF8(&replText, str_1x, -1, &status);
1845
1846 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1847 TEST_ASSERT_SUCCESS(status);
1848
1849 /* Normal case, with match */
1850 uregex_setText(re, text1, -1, &status);
1851 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1852 TEST_ASSERT_SUCCESS(status);
1853 TEST_ASSERT_UTEXT(str_Replxxx, result);
1854 utext_close(result);
1855
1856 /* No match. Text should copy to output with no changes. */
1857 uregex_setText(re, text2, -1, &status);
1858 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1859 TEST_ASSERT_SUCCESS(status);
1860 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1861 utext_close(result);
1862
1863 /* Unicode escapes */
1864 uregex_setText(re, text1, -1, &status);
1865 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1866 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1867 TEST_ASSERT_SUCCESS(status);
1868 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1869 utext_close(result);
1870
1871 uregex_close(re);
1872 utext_close(&replText);
1873 }
1874
1875
1876 /*
1877 * replaceAll()
1878 */
1879 {
1880 UChar text1[80];
1881 UChar text2[80];
1882 UText replText = UTEXT_INITIALIZER;
1883 UText *result;
1884 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1885 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1886 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1887 status = U_ZERO_ERROR;
1888 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
1889 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
1890 utext_openUTF8(&replText, str_1, -1, &status);
1891
1892 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1893 TEST_ASSERT_SUCCESS(status);
1894
1895 /* Normal case, with match */
1896 uregex_setText(re, text1, -1, &status);
1897 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1898 TEST_ASSERT_SUCCESS(status);
1899 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1900 utext_close(result);
1901
1902 /* No match. Text should copy to output with no changes. */
1903 uregex_setText(re, text2, -1, &status);
1904 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1905 TEST_ASSERT_SUCCESS(status);
1906 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1907 utext_close(result);
1908
1909 uregex_close(re);
1910 utext_close(&replText);
1911 }
1912
1913
1914 /*
1915 * appendReplacement()
1916 */
1917 {
1918 UChar text[100];
1919 UChar repl[100];
1920 UChar buf[100];
1921 UChar *bufPtr;
1922 int32_t bufCap;
1923
1924 status = U_ZERO_ERROR;
1925 re = uregex_openC(".*", 0, 0, &status);
1926 TEST_ASSERT_SUCCESS(status);
1927
1928 u_uastrncpy(text, "whatever", sizeof(text)/2);
1929 u_uastrncpy(repl, "some other", sizeof(repl)/2);
1930 uregex_setText(re, text, -1, &status);
1931
1932 /* match covers whole target string */
1933 uregex_find(re, 0, &status);
1934 TEST_ASSERT_SUCCESS(status);
1935 bufPtr = buf;
1936 bufCap = sizeof(buf) / 2;
1937 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1938 TEST_ASSERT_SUCCESS(status);
1939 TEST_ASSERT_STRING("some other", buf, TRUE);
1940
1941 /* Match has \u \U escapes */
1942 uregex_find(re, 0, &status);
1943 TEST_ASSERT_SUCCESS(status);
1944 bufPtr = buf;
1945 bufCap = sizeof(buf) / 2;
1946 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1947 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1948 TEST_ASSERT_SUCCESS(status);
1949 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1950
1951 uregex_close(re);
1952 }
1953
1954
1955 /*
1956 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1957 */
1958
1959 /*
1960 * splitUText()
1961 */
1962 {
1963 UChar textToSplit[80];
1964 UChar text2[80];
1965 UText *fields[10];
1966 int32_t numFields;
1967 int32_t i;
1968
1969 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2);
1970 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
1971
1972 status = U_ZERO_ERROR;
1973 re = uregex_openC(":", 0, NULL, &status);
1974
1975
1976 /* Simple split */
1977
1978 uregex_setText(re, textToSplit, -1, &status);
1979 TEST_ASSERT_SUCCESS(status);
1980
1981 /* The TEST_ASSERT_SUCCESS call above should change too... */
1982 if (U_SUCCESS(status)) {
1983 memset(fields, 0, sizeof(fields));
1984 numFields = uregex_splitUText(re, fields, 10, &status);
1985 TEST_ASSERT_SUCCESS(status);
1986
1987 /* The TEST_ASSERT_SUCCESS call above should change too... */
1988 if(U_SUCCESS(status)) {
1989 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1990 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1991 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1992 TEST_ASSERT(numFields == 3);
1993 TEST_ASSERT_UTEXT(str_first, fields[0]);
1994 TEST_ASSERT_UTEXT(str_second, fields[1]);
1995 TEST_ASSERT_UTEXT(str_third, fields[2]);
1996 TEST_ASSERT(fields[3] == NULL);
1997 }
1998 for(i = 0; i < numFields; i++) {
1999 utext_close(fields[i]);
2000 }
2001 }
2002
2003 uregex_close(re);
2004
2005
2006 /* Split with too few output strings available */
2007 status = U_ZERO_ERROR;
2008 re = uregex_openC(":", 0, NULL, &status);
2009 uregex_setText(re, textToSplit, -1, &status);
2010 TEST_ASSERT_SUCCESS(status);
2011
2012 /* The TEST_ASSERT_SUCCESS call above should change too... */
2013 if(U_SUCCESS(status)) {
2014 fields[0] = NULL;
2015 fields[1] = NULL;
2016 fields[2] = &patternText;
2017 numFields = uregex_splitUText(re, fields, 2, &status);
2018 TEST_ASSERT_SUCCESS(status);
2019
2020 /* The TEST_ASSERT_SUCCESS call above should change too... */
2021 if(U_SUCCESS(status)) {
2022 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2023 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2024 TEST_ASSERT(numFields == 2);
2025 TEST_ASSERT_UTEXT(str_first, fields[0]);
2026 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2027 TEST_ASSERT(fields[2] == &patternText);
2028 }
2029 for(i = 0; i < numFields; i++) {
2030 utext_close(fields[i]);
2031 }
2032 }
2033
2034 uregex_close(re);
2035 }
2036
2037 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2038 * comes out as additional fields. */
2039 {
2040 UChar textToSplit[80];
2041 UText *fields[10];
2042 int32_t numFields;
2043 int32_t i;
2044
2045 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2);
2046
2047 status = U_ZERO_ERROR;
2048 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2049
2050 uregex_setText(re, textToSplit, -1, &status);
2051 TEST_ASSERT_SUCCESS(status);
2052
2053 /* The TEST_ASSERT_SUCCESS call above should change too... */
2054 if(U_SUCCESS(status)) {
2055 memset(fields, 0, sizeof(fields));
2056 numFields = uregex_splitUText(re, fields, 10, &status);
2057 TEST_ASSERT_SUCCESS(status);
2058
2059 /* The TEST_ASSERT_SUCCESS call above should change too... */
2060 if(U_SUCCESS(status)) {
2061 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2062 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2063 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2064 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2065 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2066
2067 TEST_ASSERT(numFields == 5);
2068 TEST_ASSERT_UTEXT(str_first, fields[0]);
2069 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2070 TEST_ASSERT_UTEXT(str_second, fields[2]);
2071 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2072 TEST_ASSERT_UTEXT(str_third, fields[4]);
2073 TEST_ASSERT(fields[5] == NULL);
2074 }
2075 for(i = 0; i < numFields; i++) {
2076 utext_close(fields[i]);
2077 }
2078 }
2079
2080 /* Split with too few output strings available (2) */
2081 status = U_ZERO_ERROR;
2082 fields[0] = NULL;
2083 fields[1] = NULL;
2084 fields[2] = &patternText;
2085 numFields = uregex_splitUText(re, fields, 2, &status);
2086 TEST_ASSERT_SUCCESS(status);
2087
2088 /* The TEST_ASSERT_SUCCESS call above should change too... */
2089 if(U_SUCCESS(status)) {
2090 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2091 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2092 TEST_ASSERT(numFields == 2);
2093 TEST_ASSERT_UTEXT(str_first, fields[0]);
2094 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2095 TEST_ASSERT(fields[2] == &patternText);
2096 }
2097 for(i = 0; i < numFields; i++) {
2098 utext_close(fields[i]);
2099 }
2100
2101
2102 /* Split with too few output strings available (3) */
2103 status = U_ZERO_ERROR;
2104 fields[0] = NULL;
2105 fields[1] = NULL;
2106 fields[2] = NULL;
2107 fields[3] = &patternText;
2108 numFields = uregex_splitUText(re, fields, 3, &status);
2109 TEST_ASSERT_SUCCESS(status);
2110
2111 /* The TEST_ASSERT_SUCCESS call above should change too... */
2112 if(U_SUCCESS(status)) {
2113 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2114 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2115 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2116 TEST_ASSERT(numFields == 3);
2117 TEST_ASSERT_UTEXT(str_first, fields[0]);
2118 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2119 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2120 TEST_ASSERT(fields[3] == &patternText);
2121 }
2122 for(i = 0; i < numFields; i++) {
2123 utext_close(fields[i]);
2124 }
2125
2126 /* Split with just enough output strings available (5) */
2127 status = U_ZERO_ERROR;
2128 fields[0] = NULL;
2129 fields[1] = NULL;
2130 fields[2] = NULL;
2131 fields[3] = NULL;
2132 fields[4] = NULL;
2133 fields[5] = &patternText;
2134 numFields = uregex_splitUText(re, fields, 5, &status);
2135 TEST_ASSERT_SUCCESS(status);
2136
2137 /* The TEST_ASSERT_SUCCESS call above should change too... */
2138 if(U_SUCCESS(status)) {
2139 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2140 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2141 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2142 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2143 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2144
2145 TEST_ASSERT(numFields == 5);
2146 TEST_ASSERT_UTEXT(str_first, fields[0]);
2147 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2148 TEST_ASSERT_UTEXT(str_second, fields[2]);
2149 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2150 TEST_ASSERT_UTEXT(str_third, fields[4]);
2151 TEST_ASSERT(fields[5] == &patternText);
2152 }
2153 for(i = 0; i < numFields; i++) {
2154 utext_close(fields[i]);
2155 }
2156
2157 /* Split, end of text is a field delimiter. */
2158 status = U_ZERO_ERROR;
2159 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2160 TEST_ASSERT_SUCCESS(status);
2161
2162 /* The TEST_ASSERT_SUCCESS call above should change too... */
2163 if(U_SUCCESS(status)) {
2164 memset(fields, 0, sizeof(fields));
2165 fields[9] = &patternText;
2166 numFields = uregex_splitUText(re, fields, 9, &status);
2167 TEST_ASSERT_SUCCESS(status);
2168
2169 /* The TEST_ASSERT_SUCCESS call above should change too... */
2170 if(U_SUCCESS(status)) {
2171 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2172 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2173 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2174 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
4388f060 2175 const char str_empty[] = { 0x00 };
729e4ab9 2176
4388f060 2177 TEST_ASSERT(numFields == 5);
729e4ab9
A
2178 TEST_ASSERT_UTEXT(str_first, fields[0]);
2179 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2180 TEST_ASSERT_UTEXT(str_second, fields[2]);
2181 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
4388f060
A
2182 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2183 TEST_ASSERT(fields[5] == NULL);
729e4ab9
A
2184 TEST_ASSERT(fields[8] == NULL);
2185 TEST_ASSERT(fields[9] == &patternText);
2186 }
2187 for(i = 0; i < numFields; i++) {
2188 utext_close(fields[i]);
2189 }
2190 }
2191
2192 uregex_close(re);
2193 }
2194 utext_close(&patternText);
2195}
2196
4388f060
A
2197
2198static void TestRefreshInput(void) {
2199 /*
2200 * RefreshInput changes out the input of a URegularExpression without
2201 * changing anything else in the match state. Used with Java JNI,
2202 * when Java moves the underlying string storage. This test
2203 * runs a find() loop, moving the text after the first match.
2204 * The right number of matches should still be found.
2205 */
2206 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2207 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2208 UErrorCode status = U_ZERO_ERROR;
2209 URegularExpression *re;
2210 UText ut1 = UTEXT_INITIALIZER;
2211 UText ut2 = UTEXT_INITIALIZER;
2212
2213 re = uregex_openC("[ABC]", 0, 0, &status);
2214 TEST_ASSERT_SUCCESS(status);
2215
2216 utext_openUChars(&ut1, testStr, -1, &status);
2217 TEST_ASSERT_SUCCESS(status);
2218 uregex_setUText(re, &ut1, &status);
2219 TEST_ASSERT_SUCCESS(status);
2220
2221 /* Find the first match "A" in the original string */
2222 TEST_ASSERT(uregex_findNext(re, &status));
2223 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2224
2225 /* Move the string, kill the original string. */
2226 u_strcpy(movedStr, testStr);
2227 u_memset(testStr, 0, u_strlen(testStr));
2228 utext_openUChars(&ut2, movedStr, -1, &status);
2229 TEST_ASSERT_SUCCESS(status);
2230 uregex_refreshUText(re, &ut2, &status);
2231 TEST_ASSERT_SUCCESS(status);
2232
2233 /* Find the following two matches, now working in the moved string. */
2234 TEST_ASSERT(uregex_findNext(re, &status));
2235 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2236 TEST_ASSERT(uregex_findNext(re, &status));
2237 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2238 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2239
2240 uregex_close(re);
2241}
2242
2243
2244static void TestBug8421(void) {
2245 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2246 * was failing.
2247 */
2248 URegularExpression *re;
2249 UErrorCode status = U_ZERO_ERROR;
2250 int32_t limit = -1;
2251
2252 re = uregex_openC("abc", 0, 0, &status);
2253 TEST_ASSERT_SUCCESS(status);
2254
2255 limit = uregex_getTimeLimit(re, &status);
2256 TEST_ASSERT_SUCCESS(status);
2257 TEST_ASSERT(limit == 0);
2258
2259 uregex_setTimeLimit(re, 100, &status);
2260 TEST_ASSERT_SUCCESS(status);
2261 limit = uregex_getTimeLimit(re, &status);
2262 TEST_ASSERT_SUCCESS(status);
2263 TEST_ASSERT(limit == 100);
2264
2265 uregex_close(re);
2266}
2267
2268
374ca955 2269#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */