]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/reapits.c
ICU-461.16.tar.gz
[apple/icu.git] / icuSources / test / cintltst / reapits.c
CommitLineData
374ca955
A
1/********************************************************************
2 * COPYRIGHT:
729e4ab9 3 * Copyright (c) 2004-2010, International Business Machines Corporation and
374ca955
A
4 * others. All Rights Reserved.
5 ********************************************************************/
6/********************************************************************************
7*
8* File reapits.c
9*
10*********************************************************************************/
11/*C API TEST FOR Regular Expressions */
12/**
13* This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
14* try to test the full functionality. It just calls each function and verifies that it
15* works on a basic level.
16*
17* More complete testing of regular expression functionality is done with the C++ tests.
18**/
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_REGULAR_EXPRESSIONS
23
24#include <stdlib.h>
25#include <string.h>
26#include "unicode/uloc.h"
27#include "unicode/uregex.h"
28#include "unicode/ustring.h"
729e4ab9 29#include "unicode/utext.h"
374ca955
A
30#include "cintltst.h"
31
32#define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
729e4ab9 33log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
374ca955
A
34
35#define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
729e4ab9 36log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
374ca955 37
46f4442e
A
38/*
39 * TEST_SETUP and TEST_TEARDOWN
40 * macros to handle the boilerplate around setting up regex test cases.
41 * parameteres to setup:
42 * pattern: The regex pattern, a (char *) null terminated C string.
43 * testString: The string data, also a (char *) C string.
44 * flags: Regex flags to set when compiling the pattern
45 *
46 * Put arbitrary test code between SETUP and TEARDOWN.
47 * 're" is the compiled, ready-to-go regular expression.
48 */
49#define TEST_SETUP(pattern, testString, flags) { \
50 UChar *srcString = NULL; \
51 status = U_ZERO_ERROR; \
52 re = uregex_openC(pattern, flags, NULL, &status); \
53 TEST_ASSERT_SUCCESS(status); \
54 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
55 u_uastrncpy(srcString, testString, strlen(testString)+1); \
56 uregex_setText(re, srcString, -1, &status); \
57 TEST_ASSERT_SUCCESS(status); \
58 if (U_SUCCESS(status)) {
59
60#define TEST_TEARDOWN \
61 } \
62 TEST_ASSERT_SUCCESS(status); \
63 uregex_close(re); \
64 free(srcString); \
65 }
66
67
729e4ab9
A
68/**
69 * @param expected utf-8 array of bytes to be expected
70 */
46f4442e
A
71static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
72 char buf_inside_macro[120];
73 int32_t len = (int32_t)strlen(expected);
74 UBool success;
75 if (nulTerm) {
76 u_austrncpy(buf_inside_macro, (actual), len+1);
77 buf_inside_macro[len+2] = 0;
78 success = (strcmp((expected), buf_inside_macro) == 0);
79 } else {
80 u_austrncpy(buf_inside_macro, (actual), len);
81 buf_inside_macro[len+1] = 0;
82 success = (strncmp((expected), buf_inside_macro, len) == 0);
83 }
84 if (success == FALSE) {
85 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
86 file, line, (expected), buf_inside_macro);
87 }
374ca955
A
88}
89
46f4442e 90#define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
374ca955
A
91
92
729e4ab9
A
93static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
94 UErrorCode status = U_ZERO_ERROR;
95 UText expectedText = UTEXT_INITIALIZER;
96 utext_openUTF8(&expectedText, expected, -1, &status);
97 utext_setNativeIndex(actual, 0);
98 if (utext_compare(&expectedText, -1, actual, -1) != 0) {
99 UChar32 c;
100 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
101 c = utext_next32From(actual, 0);
102 while (c != U_SENTINEL) {
103 if (0x20<c && c <0x7e) {
104 log_err("%c", c);
105 } else {
106 log_err("%#x", c);
107 }
108 c = UTEXT_NEXT32(actual);
109 }
110 log_err("\"\n");
111 }
112 utext_close(&expectedText);
113}
114
115#define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
374ca955
A
116
117
118
119static void TestRegexCAPI(void);
73c04bcf 120static void TestBug4315(void);
729e4ab9 121static void TestUTextAPI(void);
374ca955
A
122
123void addURegexTest(TestNode** root);
124
125void addURegexTest(TestNode** root)
126{
127 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
73c04bcf 128 addTest(root, &TestBug4315, "regex/TestBug4315");
729e4ab9 129 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
374ca955
A
130}
131
46f4442e
A
132/*
133 * Call back function and context struct used for testing
134 * regular expression user callbacks. This test is mostly the same as
135 * the corresponding C++ test in intltest.
136 */
137typedef struct callBackContext {
138 int32_t maxCalls;
139 int32_t numCalls;
140 int32_t lastSteps;
141} callBackContext;
142
143static UBool U_EXPORT2 U_CALLCONV
144TestCallbackFn(const void *context, int32_t steps) {
145 callBackContext *info = (callBackContext *)context;
146 if (info->lastSteps+1 != steps) {
147 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
148 }
149 info->lastSteps = steps;
150 info->numCalls++;
151 return (info->numCalls < info->maxCalls);
152}
374ca955 153
46f4442e
A
154/*
155 * Regular Expression C API Tests
156 */
374ca955
A
157static void TestRegexCAPI(void) {
158 UErrorCode status = U_ZERO_ERROR;
159 URegularExpression *re;
160 UChar pat[200];
161 UChar *minus1;
162
163 memset(&minus1, -1, sizeof(minus1));
164
165 /* Mimimalist open/close */
166 u_uastrncpy(pat, "abc*", sizeof(pat)/2);
167 re = uregex_open(pat, -1, 0, 0, &status);
46f4442e 168 if (U_FAILURE(status)) {
729e4ab9 169 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
46f4442e
A
170 return;
171 }
374ca955
A
172 uregex_close(re);
173
174 /* Open with all flag values set */
175 status = U_ZERO_ERROR;
176 re = uregex_open(pat, -1,
177 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
178 0, &status);
179 TEST_ASSERT_SUCCESS(status);
180 uregex_close(re);
181
182 /* Open with an invalid flag */
183 status = U_ZERO_ERROR;
184 re = uregex_open(pat, -1, 0x40000000, 0, &status);
185 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
186 uregex_close(re);
187
729e4ab9
A
188 /* Open with an unimplemented flag */
189 status = U_ZERO_ERROR;
190 re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status);
191 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
192 uregex_close(re);
193
73c04bcf
A
194 /* openC with an invalid parameter */
195 status = U_ZERO_ERROR;
196 re = uregex_openC(NULL,
197 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
198 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
199
200 /* openC with an invalid parameter */
201 status = U_USELESS_COLLATOR_ERROR;
202 re = uregex_openC(NULL,
203 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
204 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
374ca955
A
205
206 /* openC open from a C string */
207 {
208 const UChar *p;
209 int32_t len;
210 status = U_ZERO_ERROR;
211 re = uregex_openC("abc*", 0, 0, &status);
212 TEST_ASSERT_SUCCESS(status);
213 p = uregex_pattern(re, &len, &status);
214 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
215
216 /* The TEST_ASSERT_SUCCESS above should change too... */
217 if(U_SUCCESS(status)) {
218 u_uastrncpy(pat, "abc*", sizeof(pat)/2);
219 TEST_ASSERT(u_strcmp(pat, p) == 0);
220 TEST_ASSERT(len==(int32_t)strlen("abc*"));
221 }
374ca955
A
222
223 uregex_close(re);
224
225 /* TODO: Open with ParseError parameter */
226 }
227
228 /*
229 * clone
230 */
231 {
232 URegularExpression *clone1;
233 URegularExpression *clone2;
234 URegularExpression *clone3;
235 UChar testString1[30];
236 UChar testString2[30];
237 UBool result;
238
239
240 status = U_ZERO_ERROR;
241 re = uregex_openC("abc*", 0, 0, &status);
242 TEST_ASSERT_SUCCESS(status);
243 clone1 = uregex_clone(re, &status);
244 TEST_ASSERT_SUCCESS(status);
245 TEST_ASSERT(clone1 != NULL);
246
247 status = U_ZERO_ERROR;
248 clone2 = uregex_clone(re, &status);
249 TEST_ASSERT_SUCCESS(status);
250 TEST_ASSERT(clone2 != NULL);
251 uregex_close(re);
252
253 status = U_ZERO_ERROR;
254 clone3 = uregex_clone(clone2, &status);
255 TEST_ASSERT_SUCCESS(status);
256 TEST_ASSERT(clone3 != NULL);
257
258 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
259 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
260
261 status = U_ZERO_ERROR;
262 uregex_setText(clone1, testString1, -1, &status);
263 TEST_ASSERT_SUCCESS(status);
264 result = uregex_lookingAt(clone1, 0, &status);
265 TEST_ASSERT_SUCCESS(status);
266 TEST_ASSERT(result==TRUE);
267
268 status = U_ZERO_ERROR;
269 uregex_setText(clone2, testString2, -1, &status);
270 TEST_ASSERT_SUCCESS(status);
271 result = uregex_lookingAt(clone2, 0, &status);
272 TEST_ASSERT_SUCCESS(status);
273 TEST_ASSERT(result==FALSE);
274 result = uregex_find(clone2, 0, &status);
275 TEST_ASSERT_SUCCESS(status);
276 TEST_ASSERT(result==TRUE);
277
278 uregex_close(clone1);
279 uregex_close(clone2);
280 uregex_close(clone3);
281
282 }
283
284 /*
285 * pattern()
286 */
287 {
288 const UChar *resultPat;
289 int32_t resultLen;
290 u_uastrncpy(pat, "hello", sizeof(pat)/2);
291 status = U_ZERO_ERROR;
292 re = uregex_open(pat, -1, 0, NULL, &status);
293 resultPat = uregex_pattern(re, &resultLen, &status);
294 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
295
296 /* The TEST_ASSERT_SUCCESS above should change too... */
297 if (U_SUCCESS(status)) {
298 TEST_ASSERT(resultLen == -1);
299 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
300 }
301
374ca955
A
302 uregex_close(re);
303
304 status = U_ZERO_ERROR;
305 re = uregex_open(pat, 3, 0, NULL, &status);
306 resultPat = uregex_pattern(re, &resultLen, &status);
307 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
308 TEST_ASSERT_SUCCESS(status);
309
310 /* The TEST_ASSERT_SUCCESS above should change too... */
311 if (U_SUCCESS(status)) {
312 TEST_ASSERT(resultLen == 3);
313 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
314 TEST_ASSERT(u_strlen(resultPat) == 3);
315 }
316
374ca955
A
317 uregex_close(re);
318 }
319
320 /*
321 * flags()
322 */
323 {
324 int32_t t;
325
326 status = U_ZERO_ERROR;
327 re = uregex_open(pat, -1, 0, NULL, &status);
328 t = uregex_flags(re, &status);
329 TEST_ASSERT_SUCCESS(status);
330 TEST_ASSERT(t == 0);
331 uregex_close(re);
332
333 status = U_ZERO_ERROR;
334 re = uregex_open(pat, -1, 0, NULL, &status);
335 t = uregex_flags(re, &status);
336 TEST_ASSERT_SUCCESS(status);
337 TEST_ASSERT(t == 0);
338 uregex_close(re);
339
340 status = U_ZERO_ERROR;
341 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
342 t = uregex_flags(re, &status);
343 TEST_ASSERT_SUCCESS(status);
344 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
345 uregex_close(re);
346 }
347
348 /*
349 * setText() and lookingAt()
350 */
351 {
352 UChar text1[50];
353 UChar text2[50];
354 UBool result;
355
356 u_uastrncpy(text1, "abcccd", sizeof(text1)/2);
357 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
358 status = U_ZERO_ERROR;
359 u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
360 re = uregex_open(pat, -1, 0, NULL, &status);
361 TEST_ASSERT_SUCCESS(status);
362
363 /* Operation before doing a setText should fail... */
364 status = U_ZERO_ERROR;
365 uregex_lookingAt(re, 0, &status);
366 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
367
368 status = U_ZERO_ERROR;
369 uregex_setText(re, text1, -1, &status);
370 result = uregex_lookingAt(re, 0, &status);
371 TEST_ASSERT(result == TRUE);
372 TEST_ASSERT_SUCCESS(status);
373
374 status = U_ZERO_ERROR;
375 uregex_setText(re, text2, -1, &status);
376 result = uregex_lookingAt(re, 0, &status);
377 TEST_ASSERT(result == FALSE);
378 TEST_ASSERT_SUCCESS(status);
379
380 status = U_ZERO_ERROR;
381 uregex_setText(re, text1, -1, &status);
382 result = uregex_lookingAt(re, 0, &status);
383 TEST_ASSERT(result == TRUE);
384 TEST_ASSERT_SUCCESS(status);
385
386 status = U_ZERO_ERROR;
387 uregex_setText(re, text1, 5, &status);
388 result = uregex_lookingAt(re, 0, &status);
389 TEST_ASSERT(result == FALSE);
390 TEST_ASSERT_SUCCESS(status);
391
392 status = U_ZERO_ERROR;
393 uregex_setText(re, text1, 6, &status);
394 result = uregex_lookingAt(re, 0, &status);
395 TEST_ASSERT(result == TRUE);
396 TEST_ASSERT_SUCCESS(status);
397
398 uregex_close(re);
399 }
400
401
402 /*
403 * getText()
404 */
405 {
406 UChar text1[50];
407 UChar text2[50];
408 const UChar *result;
409 int32_t textLength;
410
411 u_uastrncpy(text1, "abcccd", sizeof(text1)/2);
412 u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
413 status = U_ZERO_ERROR;
414 u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
415 re = uregex_open(pat, -1, 0, NULL, &status);
416
417 uregex_setText(re, text1, -1, &status);
418 result = uregex_getText(re, &textLength, &status);
419 TEST_ASSERT(result == text1);
420 TEST_ASSERT(textLength == -1);
421 TEST_ASSERT_SUCCESS(status);
422
423 status = U_ZERO_ERROR;
424 uregex_setText(re, text2, 7, &status);
425 result = uregex_getText(re, &textLength, &status);
426 TEST_ASSERT(result == text2);
427 TEST_ASSERT(textLength == 7);
428 TEST_ASSERT_SUCCESS(status);
429
430 status = U_ZERO_ERROR;
431 uregex_setText(re, text2, 4, &status);
432 result = uregex_getText(re, &textLength, &status);
433 TEST_ASSERT(result == text2);
434 TEST_ASSERT(textLength == 4);
435 TEST_ASSERT_SUCCESS(status);
436 uregex_close(re);
437 }
438
439 /*
440 * matches()
441 */
442 {
443 UChar text1[50];
444 UBool result;
445 int len;
446 UChar nullString[] = {0,0,0};
447
448 u_uastrncpy(text1, "abcccde", sizeof(text1)/2);
449 status = U_ZERO_ERROR;
450 u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
451 re = uregex_open(pat, -1, 0, NULL, &status);
452
453 uregex_setText(re, text1, -1, &status);
454 result = uregex_matches(re, 0, &status);
455 TEST_ASSERT(result == FALSE);
456 TEST_ASSERT_SUCCESS(status);
457
458 status = U_ZERO_ERROR;
459 uregex_setText(re, text1, 6, &status);
460 result = uregex_matches(re, 0, &status);
461 TEST_ASSERT(result == TRUE);
462 TEST_ASSERT_SUCCESS(status);
463
464 status = U_ZERO_ERROR;
465 uregex_setText(re, text1, 6, &status);
466 result = uregex_matches(re, 1, &status);
467 TEST_ASSERT(result == FALSE);
468 TEST_ASSERT_SUCCESS(status);
469 uregex_close(re);
470
471 status = U_ZERO_ERROR;
472 re = uregex_openC(".?", 0, NULL, &status);
473 uregex_setText(re, text1, -1, &status);
474 len = u_strlen(text1);
475 result = uregex_matches(re, len, &status);
476 TEST_ASSERT(result == TRUE);
477 TEST_ASSERT_SUCCESS(status);
478
479 status = U_ZERO_ERROR;
480 uregex_setText(re, nullString, -1, &status);
481 TEST_ASSERT_SUCCESS(status);
482 result = uregex_matches(re, 0, &status);
483 TEST_ASSERT(result == TRUE);
484 TEST_ASSERT_SUCCESS(status);
485 uregex_close(re);
486 }
487
488
489 /*
490 * lookingAt() Used in setText test.
491 */
492
493
494 /*
495 * find(), findNext, start, end, reset
496 */
497 {
498 UChar text1[50];
499 UBool result;
500 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2);
501 status = U_ZERO_ERROR;
502 re = uregex_openC("rx", 0, NULL, &status);
503
504 uregex_setText(re, text1, -1, &status);
505 result = uregex_find(re, 0, &status);
506 TEST_ASSERT(result == TRUE);
507 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
508 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
509 TEST_ASSERT_SUCCESS(status);
510
511 result = uregex_find(re, 9, &status);
512 TEST_ASSERT(result == TRUE);
513 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
514 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
515 TEST_ASSERT_SUCCESS(status);
516
517 result = uregex_find(re, 14, &status);
518 TEST_ASSERT(result == FALSE);
519 TEST_ASSERT_SUCCESS(status);
520
521 status = U_ZERO_ERROR;
522 uregex_reset(re, 0, &status);
523
524 result = uregex_findNext(re, &status);
525 TEST_ASSERT(result == TRUE);
526 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
527 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
528 TEST_ASSERT_SUCCESS(status);
529
530 result = uregex_findNext(re, &status);
531 TEST_ASSERT(result == TRUE);
532 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
533 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
534 TEST_ASSERT_SUCCESS(status);
535
536 status = U_ZERO_ERROR;
537 uregex_reset(re, 12, &status);
538
539 result = uregex_findNext(re, &status);
540 TEST_ASSERT(result == TRUE);
541 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
542 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
543 TEST_ASSERT_SUCCESS(status);
544
545 result = uregex_findNext(re, &status);
546 TEST_ASSERT(result == FALSE);
547 TEST_ASSERT_SUCCESS(status);
548
549 uregex_close(re);
550 }
551
552 /*
553 * groupCount
554 */
555 {
556 int32_t result;
557
558 status = U_ZERO_ERROR;
559 re = uregex_openC("abc", 0, NULL, &status);
560 result = uregex_groupCount(re, &status);
561 TEST_ASSERT_SUCCESS(status);
562 TEST_ASSERT(result == 0);
563 uregex_close(re);
564
565 status = U_ZERO_ERROR;
566 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
567 result = uregex_groupCount(re, &status);
568 TEST_ASSERT_SUCCESS(status);
569 TEST_ASSERT(result == 3);
570 uregex_close(re);
571
572 }
573
574
575 /*
576 * group()
577 */
578 {
579 UChar text1[80];
580 UChar buf[80];
581 UBool result;
582 int32_t resultSz;
583 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2);
584
585 status = U_ZERO_ERROR;
586 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
587 TEST_ASSERT_SUCCESS(status);
588
589
590 uregex_setText(re, text1, -1, &status);
591 result = uregex_find(re, 0, &status);
592 TEST_ASSERT(result==TRUE);
593
594 /* Capture Group 0, the full match. Should succeed. */
595 status = U_ZERO_ERROR;
596 resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
597 TEST_ASSERT_SUCCESS(status);
598 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
599 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
600
601 /* Capture group #1. Should succeed. */
602 status = U_ZERO_ERROR;
603 resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
604 TEST_ASSERT_SUCCESS(status);
605 TEST_ASSERT_STRING(" interior ", buf, TRUE);
606 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
607
608 /* Capture group out of range. Error. */
609 status = U_ZERO_ERROR;
610 uregex_group(re, 2, buf, sizeof(buf)/2, &status);
611 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
612
613 /* NULL buffer, pure pre-flight */
614 status = U_ZERO_ERROR;
615 resultSz = uregex_group(re, 0, NULL, 0, &status);
616 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
617 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
618
619 /* Too small buffer, truncated string */
620 status = U_ZERO_ERROR;
621 memset(buf, -1, sizeof(buf));
622 resultSz = uregex_group(re, 0, buf, 5, &status);
623 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
624 TEST_ASSERT_STRING("abc i", buf, FALSE);
625 TEST_ASSERT(buf[5] == (UChar)0xffff);
626 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
627
628 /* Output string just fits buffer, no NUL term. */
629 status = U_ZERO_ERROR;
630 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
631 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
632 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
633 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
634 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
635
636 uregex_close(re);
637
638 }
46f4442e
A
639
640 /*
641 * Regions
642 */
643
644
645 /* SetRegion(), getRegion() do something */
646 TEST_SETUP(".*", "0123456789ABCDEF", 0)
647 UChar resultString[40];
648 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
649 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
650 uregex_setRegion(re, 3, 6, &status);
651 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
652 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
653 TEST_ASSERT(uregex_findNext(re, &status));
654 TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
655 TEST_ASSERT_STRING("345", resultString, TRUE);
656 TEST_TEARDOWN;
657
658 /* find(start=-1) uses regions */
659 TEST_SETUP(".*", "0123456789ABCDEF", 0);
660 uregex_setRegion(re, 4, 6, &status);
661 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
662 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
663 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
664 TEST_TEARDOWN;
665
666 /* find (start >=0) does not use regions */
667 TEST_SETUP(".*", "0123456789ABCDEF", 0);
668 uregex_setRegion(re, 4, 6, &status);
669 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
670 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
671 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
672 TEST_TEARDOWN;
673
674 /* findNext() obeys regions */
675 TEST_SETUP(".", "0123456789ABCDEF", 0);
676 uregex_setRegion(re, 4, 6, &status);
677 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
678 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
679 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
680 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
681 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
682 TEST_TEARDOWN;
683
684 /* matches(start=-1) uses regions */
685 /* Also, verify that non-greedy *? succeeds in finding the full match. */
686 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
687 uregex_setRegion(re, 4, 6, &status);
688 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
689 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
690 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
691 TEST_TEARDOWN;
692
693 /* matches (start >=0) does not use regions */
694 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
695 uregex_setRegion(re, 4, 6, &status);
696 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
697 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
698 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
699 TEST_TEARDOWN;
700
701 /* lookingAt(start=-1) uses regions */
702 /* Also, verify that non-greedy *? finds the first (shortest) match. */
703 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
704 uregex_setRegion(re, 4, 6, &status);
705 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
706 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
707 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
708 TEST_TEARDOWN;
709
710 /* lookingAt (start >=0) does not use regions */
711 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
712 uregex_setRegion(re, 4, 6, &status);
713 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
714 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
715 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
716 TEST_TEARDOWN;
717
718 /* hitEnd() */
719 TEST_SETUP("[a-f]*", "abcdefghij", 0);
720 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
721 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
722 TEST_TEARDOWN;
723
724 TEST_SETUP("[a-f]*", "abcdef", 0);
725 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
726 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
727 TEST_TEARDOWN;
728
729 /* requireEnd */
730 TEST_SETUP("abcd", "abcd", 0);
731 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
732 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
733 TEST_TEARDOWN;
734
735 TEST_SETUP("abcd$", "abcd", 0);
736 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
737 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
738 TEST_TEARDOWN;
739
740 /* anchoringBounds */
741 TEST_SETUP("abc$", "abcdef", 0);
742 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
743 uregex_useAnchoringBounds(re, FALSE, &status);
744 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
745
746 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
747 uregex_useAnchoringBounds(re, TRUE, &status);
748 uregex_setRegion(re, 0, 3, &status);
749 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
750 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
751 TEST_TEARDOWN;
752
753 /* Transparent Bounds */
754 TEST_SETUP("abc(?=def)", "abcdef", 0);
755 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
756 uregex_useTransparentBounds(re, TRUE, &status);
757 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
758
759 uregex_useTransparentBounds(re, FALSE, &status);
760 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
761 uregex_setRegion(re, 0, 3, &status);
762 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
763 uregex_useTransparentBounds(re, TRUE, &status);
764 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
765 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
766 TEST_TEARDOWN;
767
374ca955
A
768
769 /*
770 * replaceFirst()
771 */
772 {
773 UChar text1[80];
774 UChar text2[80];
775 UChar replText[80];
776 UChar buf[80];
777 int32_t resultSz;
778 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
779 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
780 u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
781
782 status = U_ZERO_ERROR;
783 re = uregex_openC("x(.*?)x", 0, NULL, &status);
784 TEST_ASSERT_SUCCESS(status);
785
786 /* Normal case, with match */
787 uregex_setText(re, text1, -1, &status);
788 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
789 TEST_ASSERT_SUCCESS(status);
790 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
791 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
792
793 /* No match. Text should copy to output with no changes. */
794 status = U_ZERO_ERROR;
795 uregex_setText(re, text2, -1, &status);
796 resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
797 TEST_ASSERT_SUCCESS(status);
798 TEST_ASSERT_STRING("No match here.", buf, TRUE);
799 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
800
801 /* Match, output just fills buffer, no termination warning. */
802 status = U_ZERO_ERROR;
803 uregex_setText(re, text1, -1, &status);
804 memset(buf, -1, sizeof(buf));
805 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
806 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
807 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
808 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
809 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
810
811 /* Do the replaceFirst again, without first resetting anything.
812 * Should give the same results.
813 */
814 status = U_ZERO_ERROR;
815 memset(buf, -1, sizeof(buf));
816 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
817 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
818 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
819 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
820 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
821
822 /* NULL buffer, zero buffer length */
823 status = U_ZERO_ERROR;
824 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
825 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
826 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
827
828 /* Buffer too small by one */
829 status = U_ZERO_ERROR;
830 memset(buf, -1, sizeof(buf));
831 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
832 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
833 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
834 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
835 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
836
837 uregex_close(re);
838 }
839
840
841 /*
842 * replaceAll()
843 */
844 {
729e4ab9
A
845 UChar text1[80]; /* "Replace xaax x1x x...x." */
846 UChar text2[80]; /* "No match Here" */
847 UChar replText[80]; /* "<$1>" */
848 UChar replText2[80]; /* "<<$1>>" */
849 const char * pattern = "x(.*?)x";
850 const char * expectedResult = "Replace <aa> <1> <...>.";
851 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
374ca955 852 UChar buf[80];
729e4ab9 853 int32_t resultSize;
374ca955 854 int32_t expectedResultSize;
729e4ab9 855 int32_t expectedResultSize2;
374ca955
A
856 int32_t i;
857
858 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
859 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
860 u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
729e4ab9
A
861 u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
862 expectedResultSize = strlen(expectedResult);
863 expectedResultSize2 = strlen(expectedResult2);
374ca955
A
864
865 status = U_ZERO_ERROR;
729e4ab9 866 re = uregex_openC(pattern, 0, NULL, &status);
374ca955
A
867 TEST_ASSERT_SUCCESS(status);
868
869 /* Normal case, with match */
870 uregex_setText(re, text1, -1, &status);
729e4ab9 871 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
374ca955 872 TEST_ASSERT_SUCCESS(status);
729e4ab9
A
873 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
874 TEST_ASSERT(resultSize == expectedResultSize);
374ca955
A
875
876 /* No match. Text should copy to output with no changes. */
877 status = U_ZERO_ERROR;
878 uregex_setText(re, text2, -1, &status);
729e4ab9 879 resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
374ca955
A
880 TEST_ASSERT_SUCCESS(status);
881 TEST_ASSERT_STRING("No match here.", buf, TRUE);
729e4ab9 882 TEST_ASSERT(resultSize == u_strlen(text2));
374ca955
A
883
884 /* Match, output just fills buffer, no termination warning. */
885 status = U_ZERO_ERROR;
886 uregex_setText(re, text1, -1, &status);
887 memset(buf, -1, sizeof(buf));
729e4ab9 888 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
374ca955 889 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
729e4ab9
A
890 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
891 TEST_ASSERT(resultSize == expectedResultSize);
892 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
374ca955
A
893
894 /* Do the replaceFirst again, without first resetting anything.
895 * Should give the same results.
896 */
897 status = U_ZERO_ERROR;
898 memset(buf, -1, sizeof(buf));
729e4ab9 899 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
374ca955
A
900 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
901 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
729e4ab9
A
902 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
903 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
374ca955
A
904
905 /* NULL buffer, zero buffer length */
906 status = U_ZERO_ERROR;
729e4ab9 907 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
374ca955 908 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
729e4ab9 909 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
374ca955
A
910
911 /* Buffer too small. Try every size, which will tickle edge cases
912 * in uregex_appendReplacement (used by replaceAll) */
913 for (i=0; i<expectedResultSize; i++) {
914 char expected[80];
915 status = U_ZERO_ERROR;
916 memset(buf, -1, sizeof(buf));
729e4ab9
A
917 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
918 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
919 strcpy(expected, expectedResult);
920 expected[i] = 0;
921 TEST_ASSERT_STRING(expected, buf, FALSE);
922 TEST_ASSERT(resultSize == expectedResultSize);
923 TEST_ASSERT(buf[i] == (UChar)0xffff);
924 }
925
926 /* Buffer too small. Same as previous test, except this time the replacement
927 * text is longer than the match capture group, making the length of the complete
928 * replacement longer than the original string.
929 */
930 for (i=0; i<expectedResultSize2; i++) {
931 char expected[80];
932 status = U_ZERO_ERROR;
933 memset(buf, -1, sizeof(buf));
934 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
374ca955 935 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
729e4ab9 936 strcpy(expected, expectedResult2);
374ca955
A
937 expected[i] = 0;
938 TEST_ASSERT_STRING(expected, buf, FALSE);
729e4ab9 939 TEST_ASSERT(resultSize == expectedResultSize2);
374ca955
A
940 TEST_ASSERT(buf[i] == (UChar)0xffff);
941 }
942
729e4ab9 943
374ca955
A
944 uregex_close(re);
945 }
946
947
948 /*
949 * appendReplacement()
950 */
951 {
952 UChar text[100];
953 UChar repl[100];
954 UChar buf[100];
955 UChar *bufPtr;
956 int32_t bufCap;
957
958
959 status = U_ZERO_ERROR;
960 re = uregex_openC(".*", 0, 0, &status);
961 TEST_ASSERT_SUCCESS(status);
962
963 u_uastrncpy(text, "whatever", sizeof(text)/2);
964 u_uastrncpy(repl, "some other", sizeof(repl)/2);
965 uregex_setText(re, text, -1, &status);
966
967 /* match covers whole target string */
968 uregex_find(re, 0, &status);
969 TEST_ASSERT_SUCCESS(status);
970 bufPtr = buf;
971 bufCap = sizeof(buf) / 2;
972 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
973 TEST_ASSERT_SUCCESS(status);
974 TEST_ASSERT_STRING("some other", buf, TRUE);
975
976 /* Match has \u \U escapes */
977 uregex_find(re, 0, &status);
978 TEST_ASSERT_SUCCESS(status);
979 bufPtr = buf;
980 bufCap = sizeof(buf) / 2;
981 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
982 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
983 TEST_ASSERT_SUCCESS(status);
984 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
985
729e4ab9
A
986 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
987 status = U_ZERO_ERROR;
988 uregex_find(re, 0, &status);
989 TEST_ASSERT_SUCCESS(status);
990 bufPtr = buf;
991 status = U_BUFFER_OVERFLOW_ERROR;
992 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
993 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
994
374ca955
A
995 uregex_close(re);
996 }
997
998
999 /*
1000 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1001 */
1002
1003 /*
1004 * split()
1005 */
1006 {
1007 UChar textToSplit[80];
1008 UChar text2[80];
1009 UChar buf[200];
1010 UChar *fields[10];
1011 int32_t numFields;
1012 int32_t requiredCapacity;
1013 int32_t spaceNeeded;
1014 int32_t sz;
1015
1016 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2);
1017 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
1018
1019 status = U_ZERO_ERROR;
1020 re = uregex_openC(":", 0, NULL, &status);
1021
1022
1023 /* Simple split */
1024
1025 uregex_setText(re, textToSplit, -1, &status);
1026 TEST_ASSERT_SUCCESS(status);
1027
73c04bcf
A
1028 /* The TEST_ASSERT_SUCCESS call above should change too... */
1029 if (U_SUCCESS(status)) {
1030 memset(fields, -1, sizeof(fields));
1031 numFields =
1032 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1033 TEST_ASSERT_SUCCESS(status);
374ca955 1034
73c04bcf
A
1035 /* The TEST_ASSERT_SUCCESS call above should change too... */
1036 if(U_SUCCESS(status)) {
1037 TEST_ASSERT(numFields == 3);
1038 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1039 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1040 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1041 TEST_ASSERT(fields[3] == NULL);
1042
1043 spaceNeeded = u_strlen(textToSplit) -
1044 (numFields - 1) + /* Field delimiters do not appear in output */
1045 numFields; /* Each field gets a NUL terminator */
1046
1047 TEST_ASSERT(spaceNeeded == requiredCapacity);
1048 }
1049 }
374ca955 1050
374ca955
A
1051 uregex_close(re);
1052
1053
1054 /* Split with too few output strings available */
1055 status = U_ZERO_ERROR;
1056 re = uregex_openC(":", 0, NULL, &status);
1057 uregex_setText(re, textToSplit, -1, &status);
1058 TEST_ASSERT_SUCCESS(status);
1059
73c04bcf
A
1060 /* The TEST_ASSERT_SUCCESS call above should change too... */
1061 if(U_SUCCESS(status)) {
374ca955 1062 memset(fields, -1, sizeof(fields));
374ca955 1063 numFields =
73c04bcf
A
1064 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1065 TEST_ASSERT_SUCCESS(status);
1066
1067 /* The TEST_ASSERT_SUCCESS call above should change too... */
1068 if(U_SUCCESS(status)) {
1069 TEST_ASSERT(numFields == 2);
374ca955 1070 TEST_ASSERT_STRING("first ", fields[0], TRUE);
73c04bcf
A
1071 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1072 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1073
1074 spaceNeeded = u_strlen(textToSplit) -
1075 (numFields - 1) + /* Field delimiters do not appear in output */
1076 numFields; /* Each field gets a NUL terminator */
1077
1078 TEST_ASSERT(spaceNeeded == requiredCapacity);
1079
1080 /* Split with a range of output buffer sizes. */
1081 spaceNeeded = u_strlen(textToSplit) -
1082 (numFields - 1) + /* Field delimiters do not appear in output */
1083 numFields; /* Each field gets a NUL terminator */
1084
1085 for (sz=0; sz < spaceNeeded+1; sz++) {
1086 memset(fields, -1, sizeof(fields));
1087 status = U_ZERO_ERROR;
1088 numFields =
1089 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1090 if (sz >= spaceNeeded) {
1091 TEST_ASSERT_SUCCESS(status);
1092 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1093 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1094 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1095 } else {
1096 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1097 }
1098 TEST_ASSERT(numFields == 3);
1099 TEST_ASSERT(fields[3] == NULL);
1100 TEST_ASSERT(spaceNeeded == requiredCapacity);
1101 }
374ca955 1102 }
374ca955 1103 }
73c04bcf 1104
374ca955
A
1105 uregex_close(re);
1106 }
1107
1108
1109
1110
1111 /* Split(), part 2. Patterns with capture groups. The capture group text
1112 * comes out as additional fields. */
1113 {
1114 UChar textToSplit[80];
1115 UChar buf[200];
1116 UChar *fields[10];
1117 int32_t numFields;
1118 int32_t requiredCapacity;
1119 int32_t spaceNeeded;
1120 int32_t sz;
1121
1122 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2);
1123
1124 status = U_ZERO_ERROR;
1125 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1126
1127 uregex_setText(re, textToSplit, -1, &status);
1128 TEST_ASSERT_SUCCESS(status);
1129
73c04bcf
A
1130 /* The TEST_ASSERT_SUCCESS call above should change too... */
1131 if(U_SUCCESS(status)) {
1132 memset(fields, -1, sizeof(fields));
1133 numFields =
1134 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1135 TEST_ASSERT_SUCCESS(status);
374ca955 1136
73c04bcf
A
1137 /* The TEST_ASSERT_SUCCESS call above should change too... */
1138 if(U_SUCCESS(status)) {
1139 TEST_ASSERT(numFields == 5);
1140 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1141 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1142 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1143 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1144 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1145 TEST_ASSERT(fields[5] == NULL);
1146 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1147 TEST_ASSERT(spaceNeeded == requiredCapacity);
1148 }
1149 }
374ca955
A
1150
1151 /* Split with too few output strings available (2) */
1152 status = U_ZERO_ERROR;
1153 memset(fields, -1, sizeof(fields));
1154 numFields =
1155 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1156 TEST_ASSERT_SUCCESS(status);
374ca955 1157
73c04bcf
A
1158 /* The TEST_ASSERT_SUCCESS call above should change too... */
1159 if(U_SUCCESS(status)) {
1160 TEST_ASSERT(numFields == 2);
1161 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1162 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1163 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1164
1165 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */
1166 TEST_ASSERT(spaceNeeded == requiredCapacity);
1167 }
374ca955
A
1168
1169 /* Split with too few output strings available (3) */
1170 status = U_ZERO_ERROR;
1171 memset(fields, -1, sizeof(fields));
1172 numFields =
1173 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
1174 TEST_ASSERT_SUCCESS(status);
374ca955 1175
73c04bcf
A
1176 /* The TEST_ASSERT_SUCCESS call above should change too... */
1177 if(U_SUCCESS(status)) {
1178 TEST_ASSERT(numFields == 3);
1179 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1180 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1181 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1182 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1183
1184 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1185 TEST_ASSERT(spaceNeeded == requiredCapacity);
1186 }
374ca955
A
1187
1188 /* Split with just enough output strings available (5) */
1189 status = U_ZERO_ERROR;
1190 memset(fields, -1, sizeof(fields));
1191 numFields =
1192 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
1193 TEST_ASSERT_SUCCESS(status);
374ca955 1194
73c04bcf
A
1195 /* The TEST_ASSERT_SUCCESS call above should change too... */
1196 if(U_SUCCESS(status)) {
1197 TEST_ASSERT(numFields == 5);
1198 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1199 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1200 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1201 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1202 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1203 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
374ca955 1204
73c04bcf
A
1205 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1206 TEST_ASSERT(spaceNeeded == requiredCapacity);
1207 }
374ca955
A
1208
1209 /* Split, end of text is a field delimiter. */
1210 status = U_ZERO_ERROR;
1211 sz = strlen("first <tag-a> second<tag-b>");
1212 uregex_setText(re, textToSplit, sz, &status);
1213 TEST_ASSERT_SUCCESS(status);
73c04bcf
A
1214
1215 /* The TEST_ASSERT_SUCCESS call above should change too... */
1216 if(U_SUCCESS(status)) {
1217 memset(fields, -1, sizeof(fields));
1218 numFields =
1219 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
1220 TEST_ASSERT_SUCCESS(status);
1221
1222 /* The TEST_ASSERT_SUCCESS call above should change too... */
1223 if(U_SUCCESS(status)) {
1224 TEST_ASSERT(numFields == 4);
1225 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1226 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1227 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1228 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1229 TEST_ASSERT(fields[4] == NULL);
1230 TEST_ASSERT(fields[8] == NULL);
1231 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1232 spaceNeeded = strlen("first .tag-a. second.tag-b."); /* "." at NUL positions */
1233 TEST_ASSERT(spaceNeeded == requiredCapacity);
1234 }
1235 }
374ca955
A
1236
1237 uregex_close(re);
1238 }
1239
46f4442e
A
1240 /*
1241 * set/getTimeLimit
1242 */
1243 TEST_SETUP("abc$", "abcdef", 0);
1244 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1245 uregex_setTimeLimit(re, 1000, &status);
1246 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1247 TEST_ASSERT_SUCCESS(status);
1248 uregex_setTimeLimit(re, -1, &status);
1249 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1250 status = U_ZERO_ERROR;
1251 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1252 TEST_TEARDOWN;
1253
1254 /*
1255 * set/get Stack Limit
1256 */
1257 TEST_SETUP("abc$", "abcdef", 0);
1258 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1259 uregex_setStackLimit(re, 40000, &status);
1260 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1261 TEST_ASSERT_SUCCESS(status);
1262 uregex_setStackLimit(re, -1, &status);
1263 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1264 status = U_ZERO_ERROR;
1265 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1266 TEST_TEARDOWN;
1267
1268
1269 /*
1270 * Get/Set callback functions
1271 * This test is copied from intltest regex/Callbacks
1272 * The pattern and test data will run long enough to cause the callback
1273 * to be invoked. The nested '+' operators give exponential time
1274 * behavior with increasing string length.
1275 */
1276 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1277 callBackContext cbInfo = {4, 0, 0};
1278 const void *pContext = &cbInfo;
1279 URegexMatchCallback *returnedFn = &TestCallbackFn;
1280
1281 /* Getting the callback fn when it hasn't been set must return NULL */
1282 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1283 TEST_ASSERT_SUCCESS(status);
1284 TEST_ASSERT(returnedFn == NULL);
1285 TEST_ASSERT(pContext == NULL);
1286
1287 /* Set thecallback and do a match. */
1288 /* The callback function should record that it has been called. */
1289 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1290 TEST_ASSERT_SUCCESS(status);
1291 TEST_ASSERT(cbInfo.numCalls == 0);
1292 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1293 TEST_ASSERT_SUCCESS(status);
1294 TEST_ASSERT(cbInfo.numCalls > 0);
1295
1296 /* Getting the callback should return the values that were set above. */
1297 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1298 TEST_ASSERT(returnedFn == &TestCallbackFn);
1299 TEST_ASSERT(pContext == &cbInfo);
1300
1301 TEST_TEARDOWN;
374ca955
A
1302}
1303
46f4442e
A
1304
1305
73c04bcf
A
1306static void TestBug4315(void) {
1307 UErrorCode theICUError = U_ZERO_ERROR;
1308 URegularExpression *theRegEx;
1309 UChar *textBuff;
1310 const char *thePattern;
1311 UChar theString[100];
1312 UChar *destFields[24];
1313 int32_t neededLength1;
1314 int32_t neededLength2;
1315
1316 int32_t wordCount = 0;
1317 int32_t destFieldsSize = 24;
1318
1319 thePattern = "ck ";
1320 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1321
1322 /* open a regex */
1323 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1324 TEST_ASSERT_SUCCESS(theICUError);
1325
1326 /* set the input string */
1327 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1328 TEST_ASSERT_SUCCESS(theICUError);
1329
1330 /* split */
1331 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1332 * error occurs! */
1333 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1334 destFieldsSize, &theICUError);
1335
1336 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1337 TEST_ASSERT(wordCount==3);
1338
1339 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1340 {
1341 theICUError = U_ZERO_ERROR;
1342 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1343 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1344 destFields, destFieldsSize, &theICUError);
1345 TEST_ASSERT(wordCount==3);
1346 TEST_ASSERT_SUCCESS(theICUError);
1347 TEST_ASSERT(neededLength1 == neededLength2);
1348 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1349 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1350 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1351 TEST_ASSERT(destFields[3] == NULL);
1352 free(textBuff);
1353 }
1354 uregex_close(theRegEx);
1355}
1356
729e4ab9
A
1357/* Based on TestRegexCAPI() */
1358static void TestUTextAPI(void) {
1359 UErrorCode status = U_ZERO_ERROR;
1360 URegularExpression *re;
1361 UText patternText = UTEXT_INITIALIZER;
1362 UChar pat[200];
1363 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1364
1365 /* Mimimalist open/close */
1366 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1367 re = uregex_openUText(&patternText, 0, 0, &status);
1368 if (U_FAILURE(status)) {
1369 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1370 utext_close(&patternText);
1371 return;
1372 }
1373 uregex_close(re);
1374
1375 /* Open with all flag values set */
1376 status = U_ZERO_ERROR;
1377 re = uregex_openUText(&patternText,
1378 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1379 0, &status);
1380 TEST_ASSERT_SUCCESS(status);
1381 uregex_close(re);
1382
1383 /* Open with an invalid flag */
1384 status = U_ZERO_ERROR;
1385 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1386 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1387 uregex_close(re);
1388
1389 /* open with an invalid parameter */
1390 status = U_ZERO_ERROR;
1391 re = uregex_openUText(NULL,
1392 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1393 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1394
1395 /*
1396 * clone
1397 */
1398 {
1399 URegularExpression *clone1;
1400 URegularExpression *clone2;
1401 URegularExpression *clone3;
1402 UChar testString1[30];
1403 UChar testString2[30];
1404 UBool result;
1405
1406
1407 status = U_ZERO_ERROR;
1408 re = uregex_openUText(&patternText, 0, 0, &status);
1409 TEST_ASSERT_SUCCESS(status);
1410 clone1 = uregex_clone(re, &status);
1411 TEST_ASSERT_SUCCESS(status);
1412 TEST_ASSERT(clone1 != NULL);
1413
1414 status = U_ZERO_ERROR;
1415 clone2 = uregex_clone(re, &status);
1416 TEST_ASSERT_SUCCESS(status);
1417 TEST_ASSERT(clone2 != NULL);
1418 uregex_close(re);
1419
1420 status = U_ZERO_ERROR;
1421 clone3 = uregex_clone(clone2, &status);
1422 TEST_ASSERT_SUCCESS(status);
1423 TEST_ASSERT(clone3 != NULL);
1424
1425 u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
1426 u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
1427
1428 status = U_ZERO_ERROR;
1429 uregex_setText(clone1, testString1, -1, &status);
1430 TEST_ASSERT_SUCCESS(status);
1431 result = uregex_lookingAt(clone1, 0, &status);
1432 TEST_ASSERT_SUCCESS(status);
1433 TEST_ASSERT(result==TRUE);
1434
1435 status = U_ZERO_ERROR;
1436 uregex_setText(clone2, testString2, -1, &status);
1437 TEST_ASSERT_SUCCESS(status);
1438 result = uregex_lookingAt(clone2, 0, &status);
1439 TEST_ASSERT_SUCCESS(status);
1440 TEST_ASSERT(result==FALSE);
1441 result = uregex_find(clone2, 0, &status);
1442 TEST_ASSERT_SUCCESS(status);
1443 TEST_ASSERT(result==TRUE);
1444
1445 uregex_close(clone1);
1446 uregex_close(clone2);
1447 uregex_close(clone3);
1448
1449 }
1450
1451 /*
1452 * pattern() and patternText()
1453 */
1454 {
1455 const UChar *resultPat;
1456 int32_t resultLen;
1457 UText *resultText;
1458 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1459 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1460 u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
1461 status = U_ZERO_ERROR;
1462
1463 utext_openUTF8(&patternText, str_hello, -1, &status);
1464 re = uregex_open(pat, -1, 0, NULL, &status);
1465 resultPat = uregex_pattern(re, &resultLen, &status);
1466 TEST_ASSERT_SUCCESS(status);
1467
1468 /* The TEST_ASSERT_SUCCESS above should change too... */
1469 if (U_SUCCESS(status)) {
1470 TEST_ASSERT(resultLen == -1);
1471 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1472 }
1473
1474 resultText = uregex_patternUText(re, &status);
1475 TEST_ASSERT_SUCCESS(status);
1476 TEST_ASSERT_UTEXT(str_hello, resultText);
1477
1478 uregex_close(re);
1479
1480 status = U_ZERO_ERROR;
1481 re = uregex_open(pat, 3, 0, NULL, &status);
1482 resultPat = uregex_pattern(re, &resultLen, &status);
1483 TEST_ASSERT_SUCCESS(status);
1484
1485 /* The TEST_ASSERT_SUCCESS above should change too... */
1486 if (U_SUCCESS(status)) {
1487 TEST_ASSERT(resultLen == 3);
1488 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1489 TEST_ASSERT(u_strlen(resultPat) == 3);
1490 }
1491
1492 resultText = uregex_patternUText(re, &status);
1493 TEST_ASSERT_SUCCESS(status);
1494 TEST_ASSERT_UTEXT(str_hel, resultText);
1495
1496 uregex_close(re);
1497 }
1498
1499 /*
1500 * setUText() and lookingAt()
1501 */
1502 {
1503 UText text1 = UTEXT_INITIALIZER;
1504 UText text2 = UTEXT_INITIALIZER;
1505 UBool result;
1506 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1507 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1508 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1509 status = U_ZERO_ERROR;
1510 utext_openUTF8(&text1, str_abcccd, -1, &status);
1511 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1512
1513 utext_openUTF8(&patternText, str_abcd, -1, &status);
1514 re = uregex_openUText(&patternText, 0, NULL, &status);
1515 TEST_ASSERT_SUCCESS(status);
1516
1517 /* Operation before doing a setText should fail... */
1518 status = U_ZERO_ERROR;
1519 uregex_lookingAt(re, 0, &status);
1520 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1521
1522 status = U_ZERO_ERROR;
1523 uregex_setUText(re, &text1, &status);
1524 result = uregex_lookingAt(re, 0, &status);
1525 TEST_ASSERT(result == TRUE);
1526 TEST_ASSERT_SUCCESS(status);
1527
1528 status = U_ZERO_ERROR;
1529 uregex_setUText(re, &text2, &status);
1530 result = uregex_lookingAt(re, 0, &status);
1531 TEST_ASSERT(result == FALSE);
1532 TEST_ASSERT_SUCCESS(status);
1533
1534 status = U_ZERO_ERROR;
1535 uregex_setUText(re, &text1, &status);
1536 result = uregex_lookingAt(re, 0, &status);
1537 TEST_ASSERT(result == TRUE);
1538 TEST_ASSERT_SUCCESS(status);
1539
1540 uregex_close(re);
1541 utext_close(&text1);
1542 utext_close(&text2);
1543 }
1544
1545
1546 /*
1547 * getText() and getUText()
1548 */
1549 {
1550 UText text1 = UTEXT_INITIALIZER;
1551 UText text2 = UTEXT_INITIALIZER;
1552 UChar text2Chars[20];
1553 UText *resultText;
1554 const UChar *result;
1555 int32_t textLength;
1556 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1557 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1558 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1559
1560
1561 status = U_ZERO_ERROR;
1562 utext_openUTF8(&text1, str_abcccd, -1, &status);
1563 u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
1564 utext_openUChars(&text2, text2Chars, -1, &status);
1565
1566 utext_openUTF8(&patternText, str_abcd, -1, &status);
1567 re = uregex_openUText(&patternText, 0, NULL, &status);
1568
1569 /* First set a UText */
1570 uregex_setUText(re, &text1, &status);
1571 resultText = uregex_getUText(re, NULL, &status);
1572 TEST_ASSERT_SUCCESS(status);
1573 TEST_ASSERT(resultText != &text1);
1574 utext_setNativeIndex(resultText, 0);
1575 utext_setNativeIndex(&text1, 0);
1576 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
1577 utext_close(resultText);
1578
1579 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1580 TEST_ASSERT(textLength == -1 || textLength == 6);
1581 resultText = uregex_getUText(re, NULL, &status);
1582 TEST_ASSERT_SUCCESS(status);
1583 TEST_ASSERT(resultText != &text1);
1584 utext_setNativeIndex(resultText, 0);
1585 utext_setNativeIndex(&text1, 0);
1586 TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
1587 utext_close(resultText);
1588
1589 /* Then set a UChar * */
1590 uregex_setText(re, text2Chars, 7, &status);
1591 resultText = uregex_getUText(re, NULL, &status);
1592 TEST_ASSERT_SUCCESS(status);
1593 utext_setNativeIndex(resultText, 0);
1594 utext_setNativeIndex(&text2, 0);
1595 TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0);
1596 utext_close(resultText);
1597 result = uregex_getText(re, &textLength, &status);
1598 TEST_ASSERT(textLength == 7);
1599
1600 uregex_close(re);
1601 utext_close(&text1);
1602 utext_close(&text2);
1603 }
1604
1605 /*
1606 * matches()
1607 */
1608 {
1609 UText text1 = UTEXT_INITIALIZER;
1610 UBool result;
1611 UText nullText = UTEXT_INITIALIZER;
1612 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1613 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1614
1615 status = U_ZERO_ERROR;
1616 utext_openUTF8(&text1, str_abcccde, -1, &status);
1617 utext_openUTF8(&patternText, str_abcd, -1, &status);
1618 re = uregex_openUText(&patternText, 0, NULL, &status);
1619
1620 uregex_setUText(re, &text1, &status);
1621 result = uregex_matches(re, 0, &status);
1622 TEST_ASSERT(result == FALSE);
1623 TEST_ASSERT_SUCCESS(status);
1624 uregex_close(re);
1625
1626 status = U_ZERO_ERROR;
1627 re = uregex_openC(".?", 0, NULL, &status);
1628 uregex_setUText(re, &text1, &status);
1629 result = uregex_matches(re, 7, &status);
1630 TEST_ASSERT(result == TRUE);
1631 TEST_ASSERT_SUCCESS(status);
1632
1633 status = U_ZERO_ERROR;
1634 utext_openUTF8(&nullText, "", -1, &status);
1635 uregex_setUText(re, &nullText, &status);
1636 TEST_ASSERT_SUCCESS(status);
1637 result = uregex_matches(re, 0, &status);
1638 TEST_ASSERT(result == TRUE);
1639 TEST_ASSERT_SUCCESS(status);
1640
1641 uregex_close(re);
1642 utext_close(&text1);
1643 utext_close(&nullText);
1644 }
1645
1646
1647 /*
1648 * lookingAt() Used in setText test.
1649 */
1650
1651
1652 /*
1653 * find(), findNext, start, end, reset
1654 */
1655 {
1656 UChar text1[50];
1657 UBool result;
1658 u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2);
1659 status = U_ZERO_ERROR;
1660 re = uregex_openC("rx", 0, NULL, &status);
1661
1662 uregex_setText(re, text1, -1, &status);
1663 result = uregex_find(re, 0, &status);
1664 TEST_ASSERT(result == TRUE);
1665 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1666 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1667 TEST_ASSERT_SUCCESS(status);
1668
1669 result = uregex_find(re, 9, &status);
1670 TEST_ASSERT(result == TRUE);
1671 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1672 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1673 TEST_ASSERT_SUCCESS(status);
1674
1675 result = uregex_find(re, 14, &status);
1676 TEST_ASSERT(result == FALSE);
1677 TEST_ASSERT_SUCCESS(status);
1678
1679 status = U_ZERO_ERROR;
1680 uregex_reset(re, 0, &status);
1681
1682 result = uregex_findNext(re, &status);
1683 TEST_ASSERT(result == TRUE);
1684 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1685 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1686 TEST_ASSERT_SUCCESS(status);
1687
1688 result = uregex_findNext(re, &status);
1689 TEST_ASSERT(result == TRUE);
1690 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1691 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1692 TEST_ASSERT_SUCCESS(status);
1693
1694 status = U_ZERO_ERROR;
1695 uregex_reset(re, 12, &status);
1696
1697 result = uregex_findNext(re, &status);
1698 TEST_ASSERT(result == TRUE);
1699 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1700 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1701 TEST_ASSERT_SUCCESS(status);
1702
1703 result = uregex_findNext(re, &status);
1704 TEST_ASSERT(result == FALSE);
1705 TEST_ASSERT_SUCCESS(status);
1706
1707 uregex_close(re);
1708 }
1709
1710 /*
1711 * group()
1712 */
1713 {
1714 UChar text1[80];
1715 UText *actual;
1716 UBool result;
1717
1718 const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
1719 const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
1720
1721
1722 u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2);
1723
1724 status = U_ZERO_ERROR;
1725 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1726 TEST_ASSERT_SUCCESS(status);
1727
1728 uregex_setText(re, text1, -1, &status);
1729 result = uregex_find(re, 0, &status);
1730 TEST_ASSERT(result==TRUE);
1731
1732 /* Capture Group 0, the full match. Should succeed. */
1733 status = U_ZERO_ERROR;
1734 actual = uregex_groupUTextDeep(re, 0, NULL, &status);
1735 TEST_ASSERT_SUCCESS(status);
1736 TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
1737 utext_close(actual);
1738
1739 /* Capture Group 0 with shallow clone API. Should succeed. */
1740 status = U_ZERO_ERROR;
1741 {
1742 int64_t group_len;
1743 int32_t len16;
1744 UErrorCode shallowStatus = U_ZERO_ERROR;
1745 int64_t nativeIndex;
1746 UChar *groupChars;
1747 UText groupText = UTEXT_INITIALIZER;
1748
1749 actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
1750 TEST_ASSERT_SUCCESS(status);
1751
1752 nativeIndex = utext_getNativeIndex(actual);
1753 /* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */
1754 /* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */
1755 len16 = group_len;
1756
1757 groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
1758 utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
1759
1760 utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
1761
1762 TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
1763 utext_close(&groupText);
1764 free(groupChars);
1765 }
1766 utext_close(actual);
1767
1768 /* Capture group #1. Should succeed. */
1769 status = U_ZERO_ERROR;
1770 actual = uregex_groupUTextDeep(re, 1, NULL, &status);
1771 TEST_ASSERT_SUCCESS(status);
1772 TEST_ASSERT_UTEXT(str_interior, actual);
1773 utext_close(actual);
1774
1775 /* Capture group out of range. Error. */
1776 status = U_ZERO_ERROR;
1777 actual = uregex_groupUTextDeep(re, 2, NULL, &status);
1778 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1779 TEST_ASSERT(utext_nativeLength(actual) == 0);
1780 utext_close(actual);
1781
1782 uregex_close(re);
1783
1784 }
1785
1786 /*
1787 * replaceFirst()
1788 */
1789 {
1790 UChar text1[80];
1791 UChar text2[80];
1792 UText replText = UTEXT_INITIALIZER;
1793 UText *result;
1794 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1795 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1796 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
1797 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1798 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1799 status = U_ZERO_ERROR;
1800 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
1801 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
1802 utext_openUTF8(&replText, str_1x, -1, &status);
1803
1804 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1805 TEST_ASSERT_SUCCESS(status);
1806
1807 /* Normal case, with match */
1808 uregex_setText(re, text1, -1, &status);
1809 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1810 TEST_ASSERT_SUCCESS(status);
1811 TEST_ASSERT_UTEXT(str_Replxxx, result);
1812 utext_close(result);
1813
1814 /* No match. Text should copy to output with no changes. */
1815 uregex_setText(re, text2, -1, &status);
1816 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1817 TEST_ASSERT_SUCCESS(status);
1818 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1819 utext_close(result);
1820
1821 /* Unicode escapes */
1822 uregex_setText(re, text1, -1, &status);
1823 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1824 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1825 TEST_ASSERT_SUCCESS(status);
1826 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1827 utext_close(result);
1828
1829 uregex_close(re);
1830 utext_close(&replText);
1831 }
1832
1833
1834 /*
1835 * replaceAll()
1836 */
1837 {
1838 UChar text1[80];
1839 UChar text2[80];
1840 UText replText = UTEXT_INITIALIZER;
1841 UText *result;
1842 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1843 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1844 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1845 status = U_ZERO_ERROR;
1846 u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2);
1847 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
1848 utext_openUTF8(&replText, str_1, -1, &status);
1849
1850 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1851 TEST_ASSERT_SUCCESS(status);
1852
1853 /* Normal case, with match */
1854 uregex_setText(re, text1, -1, &status);
1855 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1856 TEST_ASSERT_SUCCESS(status);
1857 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1858 utext_close(result);
1859
1860 /* No match. Text should copy to output with no changes. */
1861 uregex_setText(re, text2, -1, &status);
1862 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1863 TEST_ASSERT_SUCCESS(status);
1864 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1865 utext_close(result);
1866
1867 uregex_close(re);
1868 utext_close(&replText);
1869 }
1870
1871
1872 /*
1873 * appendReplacement()
1874 */
1875 {
1876 UChar text[100];
1877 UChar repl[100];
1878 UChar buf[100];
1879 UChar *bufPtr;
1880 int32_t bufCap;
1881
1882 status = U_ZERO_ERROR;
1883 re = uregex_openC(".*", 0, 0, &status);
1884 TEST_ASSERT_SUCCESS(status);
1885
1886 u_uastrncpy(text, "whatever", sizeof(text)/2);
1887 u_uastrncpy(repl, "some other", sizeof(repl)/2);
1888 uregex_setText(re, text, -1, &status);
1889
1890 /* match covers whole target string */
1891 uregex_find(re, 0, &status);
1892 TEST_ASSERT_SUCCESS(status);
1893 bufPtr = buf;
1894 bufCap = sizeof(buf) / 2;
1895 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1896 TEST_ASSERT_SUCCESS(status);
1897 TEST_ASSERT_STRING("some other", buf, TRUE);
1898
1899 /* Match has \u \U escapes */
1900 uregex_find(re, 0, &status);
1901 TEST_ASSERT_SUCCESS(status);
1902 bufPtr = buf;
1903 bufCap = sizeof(buf) / 2;
1904 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1905 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1906 TEST_ASSERT_SUCCESS(status);
1907 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1908
1909 uregex_close(re);
1910 }
1911
1912
1913 /*
1914 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1915 */
1916
1917 /*
1918 * splitUText()
1919 */
1920 {
1921 UChar textToSplit[80];
1922 UChar text2[80];
1923 UText *fields[10];
1924 int32_t numFields;
1925 int32_t i;
1926
1927 u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2);
1928 u_uastrncpy(text2, "No match here.", sizeof(text2)/2);
1929
1930 status = U_ZERO_ERROR;
1931 re = uregex_openC(":", 0, NULL, &status);
1932
1933
1934 /* Simple split */
1935
1936 uregex_setText(re, textToSplit, -1, &status);
1937 TEST_ASSERT_SUCCESS(status);
1938
1939 /* The TEST_ASSERT_SUCCESS call above should change too... */
1940 if (U_SUCCESS(status)) {
1941 memset(fields, 0, sizeof(fields));
1942 numFields = uregex_splitUText(re, fields, 10, &status);
1943 TEST_ASSERT_SUCCESS(status);
1944
1945 /* The TEST_ASSERT_SUCCESS call above should change too... */
1946 if(U_SUCCESS(status)) {
1947 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1948 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1949 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1950 TEST_ASSERT(numFields == 3);
1951 TEST_ASSERT_UTEXT(str_first, fields[0]);
1952 TEST_ASSERT_UTEXT(str_second, fields[1]);
1953 TEST_ASSERT_UTEXT(str_third, fields[2]);
1954 TEST_ASSERT(fields[3] == NULL);
1955 }
1956 for(i = 0; i < numFields; i++) {
1957 utext_close(fields[i]);
1958 }
1959 }
1960
1961 uregex_close(re);
1962
1963
1964 /* Split with too few output strings available */
1965 status = U_ZERO_ERROR;
1966 re = uregex_openC(":", 0, NULL, &status);
1967 uregex_setText(re, textToSplit, -1, &status);
1968 TEST_ASSERT_SUCCESS(status);
1969
1970 /* The TEST_ASSERT_SUCCESS call above should change too... */
1971 if(U_SUCCESS(status)) {
1972 fields[0] = NULL;
1973 fields[1] = NULL;
1974 fields[2] = &patternText;
1975 numFields = uregex_splitUText(re, fields, 2, &status);
1976 TEST_ASSERT_SUCCESS(status);
1977
1978 /* The TEST_ASSERT_SUCCESS call above should change too... */
1979 if(U_SUCCESS(status)) {
1980 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
1981 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
1982 TEST_ASSERT(numFields == 2);
1983 TEST_ASSERT_UTEXT(str_first, fields[0]);
1984 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
1985 TEST_ASSERT(fields[2] == &patternText);
1986 }
1987 for(i = 0; i < numFields; i++) {
1988 utext_close(fields[i]);
1989 }
1990 }
1991
1992 uregex_close(re);
1993 }
1994
1995 /* splitUText(), part 2. Patterns with capture groups. The capture group text
1996 * comes out as additional fields. */
1997 {
1998 UChar textToSplit[80];
1999 UText *fields[10];
2000 int32_t numFields;
2001 int32_t i;
2002
2003 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2);
2004
2005 status = U_ZERO_ERROR;
2006 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2007
2008 uregex_setText(re, textToSplit, -1, &status);
2009 TEST_ASSERT_SUCCESS(status);
2010
2011 /* The TEST_ASSERT_SUCCESS call above should change too... */
2012 if(U_SUCCESS(status)) {
2013 memset(fields, 0, sizeof(fields));
2014 numFields = uregex_splitUText(re, fields, 10, &status);
2015 TEST_ASSERT_SUCCESS(status);
2016
2017 /* The TEST_ASSERT_SUCCESS call above should change too... */
2018 if(U_SUCCESS(status)) {
2019 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2020 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2021 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2022 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2023 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2024
2025 TEST_ASSERT(numFields == 5);
2026 TEST_ASSERT_UTEXT(str_first, fields[0]);
2027 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2028 TEST_ASSERT_UTEXT(str_second, fields[2]);
2029 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2030 TEST_ASSERT_UTEXT(str_third, fields[4]);
2031 TEST_ASSERT(fields[5] == NULL);
2032 }
2033 for(i = 0; i < numFields; i++) {
2034 utext_close(fields[i]);
2035 }
2036 }
2037
2038 /* Split with too few output strings available (2) */
2039 status = U_ZERO_ERROR;
2040 fields[0] = NULL;
2041 fields[1] = NULL;
2042 fields[2] = &patternText;
2043 numFields = uregex_splitUText(re, fields, 2, &status);
2044 TEST_ASSERT_SUCCESS(status);
2045
2046 /* The TEST_ASSERT_SUCCESS call above should change too... */
2047 if(U_SUCCESS(status)) {
2048 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2049 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2050 TEST_ASSERT(numFields == 2);
2051 TEST_ASSERT_UTEXT(str_first, fields[0]);
2052 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2053 TEST_ASSERT(fields[2] == &patternText);
2054 }
2055 for(i = 0; i < numFields; i++) {
2056 utext_close(fields[i]);
2057 }
2058
2059
2060 /* Split with too few output strings available (3) */
2061 status = U_ZERO_ERROR;
2062 fields[0] = NULL;
2063 fields[1] = NULL;
2064 fields[2] = NULL;
2065 fields[3] = &patternText;
2066 numFields = uregex_splitUText(re, fields, 3, &status);
2067 TEST_ASSERT_SUCCESS(status);
2068
2069 /* The TEST_ASSERT_SUCCESS call above should change too... */
2070 if(U_SUCCESS(status)) {
2071 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2072 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2073 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2074 TEST_ASSERT(numFields == 3);
2075 TEST_ASSERT_UTEXT(str_first, fields[0]);
2076 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2077 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2078 TEST_ASSERT(fields[3] == &patternText);
2079 }
2080 for(i = 0; i < numFields; i++) {
2081 utext_close(fields[i]);
2082 }
2083
2084 /* Split with just enough output strings available (5) */
2085 status = U_ZERO_ERROR;
2086 fields[0] = NULL;
2087 fields[1] = NULL;
2088 fields[2] = NULL;
2089 fields[3] = NULL;
2090 fields[4] = NULL;
2091 fields[5] = &patternText;
2092 numFields = uregex_splitUText(re, fields, 5, &status);
2093 TEST_ASSERT_SUCCESS(status);
2094
2095 /* The TEST_ASSERT_SUCCESS call above should change too... */
2096 if(U_SUCCESS(status)) {
2097 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2098 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2099 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2100 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2101 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2102
2103 TEST_ASSERT(numFields == 5);
2104 TEST_ASSERT_UTEXT(str_first, fields[0]);
2105 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2106 TEST_ASSERT_UTEXT(str_second, fields[2]);
2107 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2108 TEST_ASSERT_UTEXT(str_third, fields[4]);
2109 TEST_ASSERT(fields[5] == &patternText);
2110 }
2111 for(i = 0; i < numFields; i++) {
2112 utext_close(fields[i]);
2113 }
2114
2115 /* Split, end of text is a field delimiter. */
2116 status = U_ZERO_ERROR;
2117 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2118 TEST_ASSERT_SUCCESS(status);
2119
2120 /* The TEST_ASSERT_SUCCESS call above should change too... */
2121 if(U_SUCCESS(status)) {
2122 memset(fields, 0, sizeof(fields));
2123 fields[9] = &patternText;
2124 numFields = uregex_splitUText(re, fields, 9, &status);
2125 TEST_ASSERT_SUCCESS(status);
2126
2127 /* The TEST_ASSERT_SUCCESS call above should change too... */
2128 if(U_SUCCESS(status)) {
2129 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2130 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2131 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2132 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2133
2134 TEST_ASSERT(numFields == 4);
2135 TEST_ASSERT_UTEXT(str_first, fields[0]);
2136 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2137 TEST_ASSERT_UTEXT(str_second, fields[2]);
2138 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2139 TEST_ASSERT(fields[4] == NULL);
2140 TEST_ASSERT(fields[8] == NULL);
2141 TEST_ASSERT(fields[9] == &patternText);
2142 }
2143 for(i = 0; i < numFields; i++) {
2144 utext_close(fields[i]);
2145 }
2146 }
2147
2148 uregex_close(re);
2149 }
2150 utext_close(&patternText);
2151}
2152
374ca955 2153#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */