icuSources/test/cintltst/reapits.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 2004-2010, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /********************************************************************************
   7 *
   8 * File reapits.c
   9 *
  10 *********************************************************************************/
  11 /*C API TEST FOR Regular Expressions */
  12 /**
  13 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
  14 *   try to test the full functionality.  It just calls each function and verifies that it
  15 *   works on a basic level.
  16 *
  17 *   More complete testing of regular expression functionality is done with the C++ tests.
  18 **/
  19
  20 #include "unicode/utypes.h"
  21
  22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
  23
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include "unicode/uloc.h"
  27 #include "unicode/uregex.h"
  28 #include "unicode/ustring.h"
  29 #include "unicode/utext.h"
  30 #include "cintltst.h"
  31
  32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
  33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
  34
  35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
  36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
  37
  38 /*
  39  *   TEST_SETUP and TEST_TEARDOWN
  40  *         macros to handle the boilerplate around setting up regex test cases.
  41  *         parameteres to setup:
  42  *              pattern:     The regex pattern, a (char *) null terminated C string.
  43  *              testString:  The string data, also a (char *) C string.
  44  *              flags:       Regex flags to set when compiling the pattern
  45  *
  46  *         Put arbitrary test code between SETUP and TEARDOWN.
  47  *         're" is the compiled, ready-to-go  regular expression.
  48  */
  49 #define TEST_SETUP(pattern, testString, flags) {  \
  50     UChar   *srcString = NULL;  \
  51     status = U_ZERO_ERROR; \
  52     re = uregex_openC(pattern, flags, NULL, &status);  \
  53     TEST_ASSERT_SUCCESS(status);   \
  54     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
  55     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
  56     uregex_setText(re, srcString, -1, &status); \
  57     TEST_ASSERT_SUCCESS(status);  \
  58     if (U_SUCCESS(status)) {
  59
  60 #define TEST_TEARDOWN  \
  61     }  \
  62     TEST_ASSERT_SUCCESS(status);  \
  63     uregex_close(re);  \
  64     free(srcString);   \
  65     }
  66
  67
  68 /**
  69  * @param expected utf-8 array of bytes to be expected
  70  */
  71 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
  72      char     buf_inside_macro[120];
  73      int32_t  len = (int32_t)strlen(expected);
  74      UBool    success;
  75      if (nulTerm) {
  76          u_austrncpy(buf_inside_macro, (actual), len+1);
  77          buf_inside_macro[len+2] = 0;
  78          success = (strcmp((expected), buf_inside_macro) == 0);
  79      } else {
  80          u_austrncpy(buf_inside_macro, (actual), len);
  81          buf_inside_macro[len+1] = 0;
  82          success = (strncmp((expected), buf_inside_macro, len) == 0);
  83      }
  84      if (success == FALSE) {
  85          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
  86              file, line, (expected), buf_inside_macro);
  87      }
  88 }
  89
  90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
  91
  92
  93 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
  94     UErrorCode status = U_ZERO_ERROR;
  95     UText expectedText = UTEXT_INITIALIZER;
  96     utext_openUTF8(&expectedText, expected, -1, &status);
  97     utext_setNativeIndex(actual, 0);
  98     if (utext_compare(&expectedText, -1, actual, -1) != 0) {
  99         UChar32 c;
 100         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
 101         c = utext_next32From(actual, 0);
 102         while (c != U_SENTINEL) {
 103             if (0x20<c && c <0x7e) {
 104                 log_err("%c", c);
 105             } else {
 106                 log_err("%#x", c);
 107             }
 108             c = UTEXT_NEXT32(actual);
 109         }
 110         log_err("\"\n");
 111     }
 112     utext_close(&expectedText);
 113 }
 114
 115 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
 116
 117
 118
 119 static void TestRegexCAPI(void);
 120 static void TestBug4315(void);
 121 static void TestUTextAPI(void);
 122
 123 void addURegexTest(TestNode** root);
 124
 125 void addURegexTest(TestNode** root)
 126 {
 127     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
 128     addTest(root, &TestBug4315,   "regex/TestBug4315");
 129     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
 130 }
 131
 132 /*
 133  * Call back function and context struct used for testing
 134  *    regular expression user callbacks.  This test is mostly the same as
 135  *   the corresponding C++ test in intltest.
 136  */
 137 typedef struct callBackContext {
 138     int32_t          maxCalls;
 139     int32_t          numCalls;
 140     int32_t          lastSteps;
 141 } callBackContext;
 142
 143 static UBool U_EXPORT2 U_CALLCONV
 144 TestCallbackFn(const void *context, int32_t steps) {
 145   callBackContext  *info = (callBackContext *)context;
 146   if (info->lastSteps+1 != steps) {
 147       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
 148   }
 149   info->lastSteps = steps;
 150   info->numCalls++;
 151   return (info->numCalls < info->maxCalls);
 152 }
 153
 154 /*
 155  *   Regular Expression C API Tests
 156  */
 157 static void TestRegexCAPI(void) {
 158     UErrorCode           status = U_ZERO_ERROR;
 159     URegularExpression  *re;
 160     UChar                pat[200];
 161     UChar               *minus1;
 162
 163     memset(&minus1, -1, sizeof(minus1));
 164
 165     /* Mimimalist open/close */
 166     u_uastrncpy(pat, "abc*", sizeof(pat)/2);
 167     re = uregex_open(pat, -1, 0, 0, &status);
 168     if (U_FAILURE(status)) {
 169          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
 170          return;
 171     }
 172     uregex_close(re);
 173
 174     /* Open with all flag values set */
 175     status = U_ZERO_ERROR;
 176     re = uregex_open(pat, -1,
 177         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
 178         0, &status);
 179     TEST_ASSERT_SUCCESS(status);
 180     uregex_close(re);
 181
 182     /* Open with an invalid flag */
 183     status = U_ZERO_ERROR;
 184     re = uregex_open(pat, -1, 0x40000000, 0, &status);
 185     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
 186     uregex_close(re);
 187
 188     /* Open with an unimplemented flag */
 189     status = U_ZERO_ERROR;
 190     re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status);
 191     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
 192     uregex_close(re);
 193
 194     /* openC with an invalid parameter */
 195     status = U_ZERO_ERROR;
 196     re = uregex_openC(NULL,
 197         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 198     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
 199
 200     /* openC with an invalid parameter */
 201     status = U_USELESS_COLLATOR_ERROR;
 202     re = uregex_openC(NULL,
 203         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 204     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
 205
 206     /* openC   open from a C string */
 207     {
 208         const UChar   *p;
 209         int32_t  len;
 210         status = U_ZERO_ERROR;
 211         re = uregex_openC("abc*", 0, 0, &status);
 212         TEST_ASSERT_SUCCESS(status);
 213         p = uregex_pattern(re, &len, &status);
 214         TEST_ASSERT_SUCCESS(status);
 215
 216         /* The TEST_ASSERT_SUCCESS above should change too... */
 217         if(U_SUCCESS(status)) {
 218             u_uastrncpy(pat, "abc*", sizeof(pat)/2);
 219             TEST_ASSERT(u_strcmp(pat, p) == 0);
 220             TEST_ASSERT(len==(int32_t)strlen("abc*"));
 221         }
 222
 223         uregex_close(re);
 224
 225         /*  TODO:  Open with ParseError parameter */
 226     }
 227
 228     /*
 229      *  clone
 230      */
 231     {
 232         URegularExpression *clone1;
 233         URegularExpression *clone2;
 234         URegularExpression *clone3;
 235         UChar  testString1[30];
 236         UChar  testString2[30];
 237         UBool  result;
 238
 239
 240         status = U_ZERO_ERROR;
 241         re = uregex_openC("abc*", 0, 0, &status);
 242         TEST_ASSERT_SUCCESS(status);
 243         clone1 = uregex_clone(re, &status);
 244         TEST_ASSERT_SUCCESS(status);
 245         TEST_ASSERT(clone1 != NULL);
 246
 247         status = U_ZERO_ERROR;
 248         clone2 = uregex_clone(re, &status);
 249         TEST_ASSERT_SUCCESS(status);
 250         TEST_ASSERT(clone2 != NULL);
 251         uregex_close(re);
 252
 253         status = U_ZERO_ERROR;
 254         clone3 = uregex_clone(clone2, &status);
 255         TEST_ASSERT_SUCCESS(status);
 256         TEST_ASSERT(clone3 != NULL);
 257
 258         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
 259         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
 260
 261         status = U_ZERO_ERROR;
 262         uregex_setText(clone1, testString1, -1, &status);
 263         TEST_ASSERT_SUCCESS(status);
 264         result = uregex_lookingAt(clone1, 0, &status);
 265         TEST_ASSERT_SUCCESS(status);
 266         TEST_ASSERT(result==TRUE);
 267
 268         status = U_ZERO_ERROR;
 269         uregex_setText(clone2, testString2, -1, &status);
 270         TEST_ASSERT_SUCCESS(status);
 271         result = uregex_lookingAt(clone2, 0, &status);
 272         TEST_ASSERT_SUCCESS(status);
 273         TEST_ASSERT(result==FALSE);
 274         result = uregex_find(clone2, 0, &status);
 275         TEST_ASSERT_SUCCESS(status);
 276         TEST_ASSERT(result==TRUE);
 277
 278         uregex_close(clone1);
 279         uregex_close(clone2);
 280         uregex_close(clone3);
 281
 282     }
 283
 284     /*
 285      *  pattern()
 286     */
 287     {
 288         const UChar  *resultPat;
 289         int32_t       resultLen;
 290         u_uastrncpy(pat, "hello", sizeof(pat)/2);
 291         status = U_ZERO_ERROR;
 292         re = uregex_open(pat, -1, 0, NULL, &status);
 293         resultPat = uregex_pattern(re, &resultLen, &status);
 294         TEST_ASSERT_SUCCESS(status);
 295
 296         /* The TEST_ASSERT_SUCCESS above should change too... */
 297         if (U_SUCCESS(status)) {
 298             TEST_ASSERT(resultLen == -1);
 299             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
 300         }
 301
 302         uregex_close(re);
 303
 304         status = U_ZERO_ERROR;
 305         re = uregex_open(pat, 3, 0, NULL, &status);
 306         resultPat = uregex_pattern(re, &resultLen, &status);
 307         TEST_ASSERT_SUCCESS(status);
 308         TEST_ASSERT_SUCCESS(status);
 309
 310         /* The TEST_ASSERT_SUCCESS above should change too... */
 311         if (U_SUCCESS(status)) {
 312             TEST_ASSERT(resultLen == 3);
 313             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
 314             TEST_ASSERT(u_strlen(resultPat) == 3);
 315         }
 316
 317         uregex_close(re);
 318     }
 319
 320     /*
 321      *  flags()
 322      */
 323     {
 324         int32_t  t;
 325
 326         status = U_ZERO_ERROR;
 327         re = uregex_open(pat, -1, 0, NULL, &status);
 328         t  = uregex_flags(re, &status);
 329         TEST_ASSERT_SUCCESS(status);
 330         TEST_ASSERT(t == 0);
 331         uregex_close(re);
 332
 333         status = U_ZERO_ERROR;
 334         re = uregex_open(pat, -1, 0, NULL, &status);
 335         t  = uregex_flags(re, &status);
 336         TEST_ASSERT_SUCCESS(status);
 337         TEST_ASSERT(t == 0);
 338         uregex_close(re);
 339
 340         status = U_ZERO_ERROR;
 341         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
 342         t  = uregex_flags(re, &status);
 343         TEST_ASSERT_SUCCESS(status);
 344         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
 345         uregex_close(re);
 346     }
 347
 348     /*
 349      *  setText() and lookingAt()
 350      */
 351     {
 352         UChar  text1[50];
 353         UChar  text2[50];
 354         UBool  result;
 355
 356         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
 357         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
 358         status = U_ZERO_ERROR;
 359         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
 360         re = uregex_open(pat, -1, 0, NULL, &status);
 361         TEST_ASSERT_SUCCESS(status);
 362
 363         /* Operation before doing a setText should fail... */
 364         status = U_ZERO_ERROR;
 365         uregex_lookingAt(re, 0, &status);
 366         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
 367
 368         status = U_ZERO_ERROR;
 369         uregex_setText(re, text1, -1, &status);
 370         result = uregex_lookingAt(re, 0, &status);
 371         TEST_ASSERT(result == TRUE);
 372         TEST_ASSERT_SUCCESS(status);
 373
 374         status = U_ZERO_ERROR;
 375         uregex_setText(re, text2, -1, &status);
 376         result = uregex_lookingAt(re, 0, &status);
 377         TEST_ASSERT(result == FALSE);
 378         TEST_ASSERT_SUCCESS(status);
 379
 380         status = U_ZERO_ERROR;
 381         uregex_setText(re, text1, -1, &status);
 382         result = uregex_lookingAt(re, 0, &status);
 383         TEST_ASSERT(result == TRUE);
 384         TEST_ASSERT_SUCCESS(status);
 385
 386         status = U_ZERO_ERROR;
 387         uregex_setText(re, text1, 5, &status);
 388         result = uregex_lookingAt(re, 0, &status);
 389         TEST_ASSERT(result == FALSE);
 390         TEST_ASSERT_SUCCESS(status);
 391
 392         status = U_ZERO_ERROR;
 393         uregex_setText(re, text1, 6, &status);
 394         result = uregex_lookingAt(re, 0, &status);
 395         TEST_ASSERT(result == TRUE);
 396         TEST_ASSERT_SUCCESS(status);
 397
 398         uregex_close(re);
 399     }
 400
 401
 402     /*
 403      *  getText()
 404      */
 405     {
 406         UChar    text1[50];
 407         UChar    text2[50];
 408         const UChar   *result;
 409         int32_t  textLength;
 410
 411         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
 412         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
 413         status = U_ZERO_ERROR;
 414         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
 415         re = uregex_open(pat, -1, 0, NULL, &status);
 416
 417         uregex_setText(re, text1, -1, &status);
 418         result = uregex_getText(re, &textLength, &status);
 419         TEST_ASSERT(result == text1);
 420         TEST_ASSERT(textLength == -1);
 421         TEST_ASSERT_SUCCESS(status);
 422
 423         status = U_ZERO_ERROR;
 424         uregex_setText(re, text2, 7, &status);
 425         result = uregex_getText(re, &textLength, &status);
 426         TEST_ASSERT(result == text2);
 427         TEST_ASSERT(textLength == 7);
 428         TEST_ASSERT_SUCCESS(status);
 429
 430         status = U_ZERO_ERROR;
 431         uregex_setText(re, text2, 4, &status);
 432         result = uregex_getText(re, &textLength, &status);
 433         TEST_ASSERT(result == text2);
 434         TEST_ASSERT(textLength == 4);
 435         TEST_ASSERT_SUCCESS(status);
 436         uregex_close(re);
 437     }
 438
 439     /*
 440      *  matches()
 441      */
 442     {
 443         UChar   text1[50];
 444         UBool   result;
 445         int     len;
 446         UChar   nullString[] = {0,0,0};
 447
 448         u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
 449         status = U_ZERO_ERROR;
 450         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
 451         re = uregex_open(pat, -1, 0, NULL, &status);
 452
 453         uregex_setText(re, text1, -1, &status);
 454         result = uregex_matches(re, 0, &status);
 455         TEST_ASSERT(result == FALSE);
 456         TEST_ASSERT_SUCCESS(status);
 457
 458         status = U_ZERO_ERROR;
 459         uregex_setText(re, text1, 6, &status);
 460         result = uregex_matches(re, 0, &status);
 461         TEST_ASSERT(result == TRUE);
 462         TEST_ASSERT_SUCCESS(status);
 463
 464         status = U_ZERO_ERROR;
 465         uregex_setText(re, text1, 6, &status);
 466         result = uregex_matches(re, 1, &status);
 467         TEST_ASSERT(result == FALSE);
 468         TEST_ASSERT_SUCCESS(status);
 469         uregex_close(re);
 470
 471         status = U_ZERO_ERROR;
 472         re = uregex_openC(".?", 0, NULL, &status);
 473         uregex_setText(re, text1, -1, &status);
 474         len = u_strlen(text1);
 475         result = uregex_matches(re, len, &status);
 476         TEST_ASSERT(result == TRUE);
 477         TEST_ASSERT_SUCCESS(status);
 478
 479         status = U_ZERO_ERROR;
 480         uregex_setText(re, nullString, -1, &status);
 481         TEST_ASSERT_SUCCESS(status);
 482         result = uregex_matches(re, 0, &status);
 483         TEST_ASSERT(result == TRUE);
 484         TEST_ASSERT_SUCCESS(status);
 485         uregex_close(re);
 486     }
 487
 488
 489     /*
 490      *  lookingAt()    Used in setText test.
 491      */
 492
 493
 494     /*
 495      *  find(), findNext, start, end, reset
 496      */
 497     {
 498         UChar    text1[50];
 499         UBool    result;
 500         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
 501         status = U_ZERO_ERROR;
 502         re = uregex_openC("rx", 0, NULL, &status);
 503
 504         uregex_setText(re, text1, -1, &status);
 505         result = uregex_find(re, 0, &status);
 506         TEST_ASSERT(result == TRUE);
 507         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 508         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 509         TEST_ASSERT_SUCCESS(status);
 510
 511         result = uregex_find(re, 9, &status);
 512         TEST_ASSERT(result == TRUE);
 513         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
 514         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
 515         TEST_ASSERT_SUCCESS(status);
 516
 517         result = uregex_find(re, 14, &status);
 518         TEST_ASSERT(result == FALSE);
 519         TEST_ASSERT_SUCCESS(status);
 520
 521         status = U_ZERO_ERROR;
 522         uregex_reset(re, 0, &status);
 523
 524         result = uregex_findNext(re, &status);
 525         TEST_ASSERT(result == TRUE);
 526         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 527         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 528         TEST_ASSERT_SUCCESS(status);
 529
 530         result = uregex_findNext(re, &status);
 531         TEST_ASSERT(result == TRUE);
 532         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
 533         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
 534         TEST_ASSERT_SUCCESS(status);
 535
 536         status = U_ZERO_ERROR;
 537         uregex_reset(re, 12, &status);
 538
 539         result = uregex_findNext(re, &status);
 540         TEST_ASSERT(result == TRUE);
 541         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
 542         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
 543         TEST_ASSERT_SUCCESS(status);
 544
 545         result = uregex_findNext(re, &status);
 546         TEST_ASSERT(result == FALSE);
 547         TEST_ASSERT_SUCCESS(status);
 548
 549         uregex_close(re);
 550     }
 551
 552     /*
 553      *  groupCount
 554      */
 555     {
 556         int32_t result;
 557
 558         status = U_ZERO_ERROR;
 559         re = uregex_openC("abc", 0, NULL, &status);
 560         result = uregex_groupCount(re, &status);
 561         TEST_ASSERT_SUCCESS(status);
 562         TEST_ASSERT(result == 0);
 563         uregex_close(re);
 564
 565         status = U_ZERO_ERROR;
 566         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
 567         result = uregex_groupCount(re, &status);
 568         TEST_ASSERT_SUCCESS(status);
 569         TEST_ASSERT(result == 3);
 570         uregex_close(re);
 571
 572     }
 573
 574
 575     /*
 576      *  group()
 577      */
 578     {
 579         UChar    text1[80];
 580         UChar    buf[80];
 581         UBool    result;
 582         int32_t  resultSz;
 583         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
 584
 585         status = U_ZERO_ERROR;
 586         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
 587         TEST_ASSERT_SUCCESS(status);
 588
 589
 590         uregex_setText(re, text1, -1, &status);
 591         result = uregex_find(re, 0, &status);
 592         TEST_ASSERT(result==TRUE);
 593
 594         /*  Capture Group 0, the full match.  Should succeed.  */
 595         status = U_ZERO_ERROR;
 596         resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
 597         TEST_ASSERT_SUCCESS(status);
 598         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
 599         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 600
 601         /*  Capture group #1.  Should succeed. */
 602         status = U_ZERO_ERROR;
 603         resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
 604         TEST_ASSERT_SUCCESS(status);
 605         TEST_ASSERT_STRING(" interior ", buf, TRUE);
 606         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
 607
 608         /*  Capture group out of range.  Error. */
 609         status = U_ZERO_ERROR;
 610         uregex_group(re, 2, buf, sizeof(buf)/2, &status);
 611         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
 612
 613         /* NULL buffer, pure pre-flight */
 614         status = U_ZERO_ERROR;
 615         resultSz = uregex_group(re, 0, NULL, 0, &status);
 616         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 617         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 618
 619         /* Too small buffer, truncated string */
 620         status = U_ZERO_ERROR;
 621         memset(buf, -1, sizeof(buf));
 622         resultSz = uregex_group(re, 0, buf, 5, &status);
 623         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 624         TEST_ASSERT_STRING("abc i", buf, FALSE);
 625         TEST_ASSERT(buf[5] == (UChar)0xffff);
 626         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 627
 628         /* Output string just fits buffer, no NUL term. */
 629         status = U_ZERO_ERROR;
 630         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
 631         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 632         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
 633         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 634         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
 635
 636         uregex_close(re);
 637
 638     }
 639
 640     /*
 641      *  Regions
 642      */
 643
 644
 645         /* SetRegion(), getRegion() do something  */
 646         TEST_SETUP(".*", "0123456789ABCDEF", 0)
 647         UChar resultString[40];
 648         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
 649         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
 650         uregex_setRegion(re, 3, 6, &status);
 651         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
 652         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
 653         TEST_ASSERT(uregex_findNext(re, &status));
 654         TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
 655         TEST_ASSERT_STRING("345", resultString, TRUE);
 656         TEST_TEARDOWN;
 657
 658         /* find(start=-1) uses regions   */
 659         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 660         uregex_setRegion(re, 4, 6, &status);
 661         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 662         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 663         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 664         TEST_TEARDOWN;
 665
 666         /* find (start >=0) does not use regions   */
 667         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 668         uregex_setRegion(re, 4, 6, &status);
 669         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 670         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 671         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 672         TEST_TEARDOWN;
 673
 674         /* findNext() obeys regions    */
 675         TEST_SETUP(".", "0123456789ABCDEF", 0);
 676         uregex_setRegion(re, 4, 6, &status);
 677         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
 678         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 679         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
 680         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
 681         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
 682         TEST_TEARDOWN;
 683
 684         /* matches(start=-1) uses regions                                           */
 685         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
 686         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 687         uregex_setRegion(re, 4, 6, &status);
 688         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
 689         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 690         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 691         TEST_TEARDOWN;
 692
 693         /* matches (start >=0) does not use regions       */
 694         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 695         uregex_setRegion(re, 4, 6, &status);
 696         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
 697         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 698         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 699         TEST_TEARDOWN;
 700
 701         /* lookingAt(start=-1) uses regions                                         */
 702         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
 703         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 704         uregex_setRegion(re, 4, 6, &status);
 705         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
 706         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 707         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
 708         TEST_TEARDOWN;
 709
 710         /* lookingAt (start >=0) does not use regions  */
 711         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 712         uregex_setRegion(re, 4, 6, &status);
 713         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
 714         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 715         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
 716         TEST_TEARDOWN;
 717
 718         /* hitEnd()       */
 719         TEST_SETUP("[a-f]*", "abcdefghij", 0);
 720         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 721         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
 722         TEST_TEARDOWN;
 723
 724         TEST_SETUP("[a-f]*", "abcdef", 0);
 725         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 726         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
 727         TEST_TEARDOWN;
 728
 729         /* requireEnd   */
 730         TEST_SETUP("abcd", "abcd", 0);
 731         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 732         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
 733         TEST_TEARDOWN;
 734
 735         TEST_SETUP("abcd$", "abcd", 0);
 736         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 737         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
 738         TEST_TEARDOWN;
 739
 740         /* anchoringBounds        */
 741         TEST_SETUP("abc$", "abcdef", 0);
 742         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
 743         uregex_useAnchoringBounds(re, FALSE, &status);
 744         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
 745
 746         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
 747         uregex_useAnchoringBounds(re, TRUE, &status);
 748         uregex_setRegion(re, 0, 3, &status);
 749         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 750         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 751         TEST_TEARDOWN;
 752
 753         /* Transparent Bounds      */
 754         TEST_SETUP("abc(?=def)", "abcdef", 0);
 755         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
 756         uregex_useTransparentBounds(re, TRUE, &status);
 757         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
 758
 759         uregex_useTransparentBounds(re, FALSE, &status);
 760         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
 761         uregex_setRegion(re, 0, 3, &status);
 762         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
 763         uregex_useTransparentBounds(re, TRUE, &status);
 764         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
 765         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 766         TEST_TEARDOWN;
 767
 768
 769     /*
 770      *  replaceFirst()
 771      */
 772     {
 773         UChar    text1[80];
 774         UChar    text2[80];
 775         UChar    replText[80];
 776         UChar    buf[80];
 777         int32_t  resultSz;
 778         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
 779         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
 780         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
 781
 782         status = U_ZERO_ERROR;
 783         re = uregex_openC("x(.*?)x", 0, NULL, &status);
 784         TEST_ASSERT_SUCCESS(status);
 785
 786         /*  Normal case, with match */
 787         uregex_setText(re, text1, -1, &status);
 788         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
 789         TEST_ASSERT_SUCCESS(status);
 790         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
 791         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 792
 793         /* No match.  Text should copy to output with no changes.  */
 794         status = U_ZERO_ERROR;
 795         uregex_setText(re, text2, -1, &status);
 796         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
 797         TEST_ASSERT_SUCCESS(status);
 798         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 799         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
 800
 801         /*  Match, output just fills buffer, no termination warning. */
 802         status = U_ZERO_ERROR;
 803         uregex_setText(re, text1, -1, &status);
 804         memset(buf, -1, sizeof(buf));
 805         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
 806         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 807         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 808         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 809         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 810
 811         /* Do the replaceFirst again, without first resetting anything.
 812          *  Should give the same results.
 813          */
 814         status = U_ZERO_ERROR;
 815         memset(buf, -1, sizeof(buf));
 816         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
 817         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 818         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 819         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 820         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 821
 822         /* NULL buffer, zero buffer length */
 823         status = U_ZERO_ERROR;
 824         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
 825         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 826         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 827
 828         /* Buffer too small by one */
 829         status = U_ZERO_ERROR;
 830         memset(buf, -1, sizeof(buf));
 831         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
 832         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 833         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
 834         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 835         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 836
 837         uregex_close(re);
 838     }
 839
 840
 841     /*
 842      *  replaceAll()
 843      */
 844     {
 845         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
 846         UChar    text2[80];          /*  "No match Here"           */
 847         UChar    replText[80];       /*  "<$1>"                    */
 848         UChar    replText2[80];      /*  "<<$1>>"                  */
 849         const char * pattern = "x(.*?)x";
 850         const char * expectedResult = "Replace <aa> <1> <...>.";
 851         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
 852         UChar    buf[80];
 853         int32_t  resultSize;
 854         int32_t  expectedResultSize;
 855         int32_t  expectedResultSize2;
 856         int32_t  i;
 857
 858         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
 859         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
 860         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
 861         u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
 862         expectedResultSize = strlen(expectedResult);
 863         expectedResultSize2 = strlen(expectedResult2);
 864
 865         status = U_ZERO_ERROR;
 866         re = uregex_openC(pattern, 0, NULL, &status);
 867         TEST_ASSERT_SUCCESS(status);
 868
 869         /*  Normal case, with match */
 870         uregex_setText(re, text1, -1, &status);
 871         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
 872         TEST_ASSERT_SUCCESS(status);
 873         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
 874         TEST_ASSERT(resultSize == expectedResultSize);
 875
 876         /* No match.  Text should copy to output with no changes.  */
 877         status = U_ZERO_ERROR;
 878         uregex_setText(re, text2, -1, &status);
 879         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
 880         TEST_ASSERT_SUCCESS(status);
 881         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 882         TEST_ASSERT(resultSize == u_strlen(text2));
 883
 884         /*  Match, output just fills buffer, no termination warning. */
 885         status = U_ZERO_ERROR;
 886         uregex_setText(re, text1, -1, &status);
 887         memset(buf, -1, sizeof(buf));
 888         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
 889         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 890         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
 891         TEST_ASSERT(resultSize == expectedResultSize);
 892         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 893
 894         /* Do the replaceFirst again, without first resetting anything.
 895          *  Should give the same results.
 896          */
 897         status = U_ZERO_ERROR;
 898         memset(buf, -1, sizeof(buf));
 899         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
 900         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 901         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
 902         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 903         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 904
 905         /* NULL buffer, zero buffer length */
 906         status = U_ZERO_ERROR;
 907         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
 908         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 909         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 910
 911         /* Buffer too small.  Try every size, which will tickle edge cases
 912          * in uregex_appendReplacement (used by replaceAll)   */
 913         for (i=0; i<expectedResultSize; i++) {
 914             char  expected[80];
 915             status = U_ZERO_ERROR;
 916             memset(buf, -1, sizeof(buf));
 917             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
 918             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 919             strcpy(expected, expectedResult);
 920             expected[i] = 0;
 921             TEST_ASSERT_STRING(expected, buf, FALSE);
 922             TEST_ASSERT(resultSize == expectedResultSize);
 923             TEST_ASSERT(buf[i] == (UChar)0xffff);
 924         }
 925
 926         /* Buffer too small.  Same as previous test, except this time the replacement
 927          * text is longer than the match capture group, making the length of the complete
 928          * replacement longer than the original string.
 929          */
 930         for (i=0; i<expectedResultSize2; i++) {
 931             char  expected[80];
 932             status = U_ZERO_ERROR;
 933             memset(buf, -1, sizeof(buf));
 934             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
 935             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 936             strcpy(expected, expectedResult2);
 937             expected[i] = 0;
 938             TEST_ASSERT_STRING(expected, buf, FALSE);
 939             TEST_ASSERT(resultSize == expectedResultSize2);
 940             TEST_ASSERT(buf[i] == (UChar)0xffff);
 941         }
 942
 943
 944         uregex_close(re);
 945     }
 946
 947
 948     /*
 949      *  appendReplacement()
 950      */
 951     {
 952         UChar    text[100];
 953         UChar    repl[100];
 954         UChar    buf[100];
 955         UChar   *bufPtr;
 956         int32_t  bufCap;
 957
 958
 959         status = U_ZERO_ERROR;
 960         re = uregex_openC(".*", 0, 0, &status);
 961         TEST_ASSERT_SUCCESS(status);
 962
 963         u_uastrncpy(text, "whatever",  sizeof(text)/2);
 964         u_uastrncpy(repl, "some other", sizeof(repl)/2);
 965         uregex_setText(re, text, -1, &status);
 966
 967         /* match covers whole target string */
 968         uregex_find(re, 0, &status);
 969         TEST_ASSERT_SUCCESS(status);
 970         bufPtr = buf;
 971         bufCap = sizeof(buf) / 2;
 972         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
 973         TEST_ASSERT_SUCCESS(status);
 974         TEST_ASSERT_STRING("some other", buf, TRUE);
 975
 976         /* Match has \u \U escapes */
 977         uregex_find(re, 0, &status);
 978         TEST_ASSERT_SUCCESS(status);
 979         bufPtr = buf;
 980         bufCap = sizeof(buf) / 2;
 981         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
 982         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
 983         TEST_ASSERT_SUCCESS(status);
 984         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
 985
 986         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
 987         status = U_ZERO_ERROR;
 988         uregex_find(re, 0, &status);
 989         TEST_ASSERT_SUCCESS(status);
 990         bufPtr = buf;
 991         status = U_BUFFER_OVERFLOW_ERROR;
 992         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
 993         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 994
 995         uregex_close(re);
 996     }
 997
 998
 999     /*
1000      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1001      */
1002
1003     /*
1004      *  split()
1005      */
1006     {
1007         UChar    textToSplit[80];
1008         UChar    text2[80];
1009         UChar    buf[200];
1010         UChar    *fields[10];
1011         int32_t  numFields;
1012         int32_t  requiredCapacity;
1013         int32_t  spaceNeeded;
1014         int32_t  sz;
1015
1016         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1017         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1018
1019         status = U_ZERO_ERROR;
1020         re = uregex_openC(":", 0, NULL, &status);
1021
1022
1023         /*  Simple split */
1024
1025         uregex_setText(re, textToSplit, -1, &status);
1026         TEST_ASSERT_SUCCESS(status);
1027
1028         /* The TEST_ASSERT_SUCCESS call above should change too... */
1029         if (U_SUCCESS(status)) {
1030             memset(fields, -1, sizeof(fields));
1031             numFields =
1032                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1033             TEST_ASSERT_SUCCESS(status);
1034
1035             /* The TEST_ASSERT_SUCCESS call above should change too... */
1036             if(U_SUCCESS(status)) {
1037                 TEST_ASSERT(numFields == 3);
1038                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1039                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1040                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
1041                 TEST_ASSERT(fields[3] == NULL);
1042
1043                 spaceNeeded = u_strlen(textToSplit) -
1044                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1045                             numFields;          /* Each field gets a NUL terminator */
1046
1047                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1048             }
1049         }
1050
1051         uregex_close(re);
1052
1053
1054         /*  Split with too few output strings available */
1055         status = U_ZERO_ERROR;
1056         re = uregex_openC(":", 0, NULL, &status);
1057         uregex_setText(re, textToSplit, -1, &status);
1058         TEST_ASSERT_SUCCESS(status);
1059
1060         /* The TEST_ASSERT_SUCCESS call above should change too... */
1061         if(U_SUCCESS(status)) {
1062             memset(fields, -1, sizeof(fields));
1063             numFields =
1064                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1065             TEST_ASSERT_SUCCESS(status);
1066
1067             /* The TEST_ASSERT_SUCCESS call above should change too... */
1068             if(U_SUCCESS(status)) {
1069                 TEST_ASSERT(numFields == 2);
1070                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1071                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1072                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1073
1074                 spaceNeeded = u_strlen(textToSplit) -
1075                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1076                             numFields;          /* Each field gets a NUL terminator */
1077
1078                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1079
1080                 /* Split with a range of output buffer sizes.  */
1081                 spaceNeeded = u_strlen(textToSplit) -
1082                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1083                     numFields;          /* Each field gets a NUL terminator */
1084
1085                 for (sz=0; sz < spaceNeeded+1; sz++) {
1086                     memset(fields, -1, sizeof(fields));
1087                     status = U_ZERO_ERROR;
1088                     numFields =
1089                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1090                     if (sz >= spaceNeeded) {
1091                         TEST_ASSERT_SUCCESS(status);
1092                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1093                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
1094                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
1095                     } else {
1096                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1097                     }
1098                     TEST_ASSERT(numFields == 3);
1099                     TEST_ASSERT(fields[3] == NULL);
1100                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1101                 }
1102             }
1103         }
1104
1105         uregex_close(re);
1106     }
1107
1108
1109
1110
1111     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1112      *                   comes out as additional fields.  */
1113     {
1114         UChar    textToSplit[80];
1115         UChar    buf[200];
1116         UChar    *fields[10];
1117         int32_t  numFields;
1118         int32_t  requiredCapacity;
1119         int32_t  spaceNeeded;
1120         int32_t  sz;
1121
1122         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
1123
1124         status = U_ZERO_ERROR;
1125         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1126
1127         uregex_setText(re, textToSplit, -1, &status);
1128         TEST_ASSERT_SUCCESS(status);
1129
1130         /* The TEST_ASSERT_SUCCESS call above should change too... */
1131         if(U_SUCCESS(status)) {
1132             memset(fields, -1, sizeof(fields));
1133             numFields =
1134                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1135             TEST_ASSERT_SUCCESS(status);
1136
1137             /* The TEST_ASSERT_SUCCESS call above should change too... */
1138             if(U_SUCCESS(status)) {
1139                 TEST_ASSERT(numFields == 5);
1140                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1141                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1142                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1143                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1144                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
1145                 TEST_ASSERT(fields[5] == NULL);
1146                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1147                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1148             }
1149         }
1150
1151         /*  Split with too few output strings available (2) */
1152         status = U_ZERO_ERROR;
1153         memset(fields, -1, sizeof(fields));
1154         numFields =
1155             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1156         TEST_ASSERT_SUCCESS(status);
1157
1158         /* The TEST_ASSERT_SUCCESS call above should change too... */
1159         if(U_SUCCESS(status)) {
1160             TEST_ASSERT(numFields == 2);
1161             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1162             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1163             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1164
1165             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1166             TEST_ASSERT(spaceNeeded == requiredCapacity);
1167         }
1168
1169         /*  Split with too few output strings available (3) */
1170         status = U_ZERO_ERROR;
1171         memset(fields, -1, sizeof(fields));
1172         numFields =
1173             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
1174         TEST_ASSERT_SUCCESS(status);
1175
1176         /* The TEST_ASSERT_SUCCESS call above should change too... */
1177         if(U_SUCCESS(status)) {
1178             TEST_ASSERT(numFields == 3);
1179             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1180             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1181             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1182             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1183
1184             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1185             TEST_ASSERT(spaceNeeded == requiredCapacity);
1186         }
1187
1188         /*  Split with just enough output strings available (5) */
1189         status = U_ZERO_ERROR;
1190         memset(fields, -1, sizeof(fields));
1191         numFields =
1192             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
1193         TEST_ASSERT_SUCCESS(status);
1194
1195         /* The TEST_ASSERT_SUCCESS call above should change too... */
1196         if(U_SUCCESS(status)) {
1197             TEST_ASSERT(numFields == 5);
1198             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1199             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1200             TEST_ASSERT_STRING(" second", fields[2], TRUE);
1201             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1202             TEST_ASSERT_STRING("  third", fields[4], TRUE);
1203             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1204
1205             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1206             TEST_ASSERT(spaceNeeded == requiredCapacity);
1207         }
1208
1209         /* Split, end of text is a field delimiter.   */
1210         status = U_ZERO_ERROR;
1211         sz = strlen("first <tag-a> second<tag-b>");
1212         uregex_setText(re, textToSplit, sz, &status);
1213         TEST_ASSERT_SUCCESS(status);
1214
1215         /* The TEST_ASSERT_SUCCESS call above should change too... */
1216         if(U_SUCCESS(status)) {
1217             memset(fields, -1, sizeof(fields));
1218             numFields =
1219                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
1220             TEST_ASSERT_SUCCESS(status);
1221
1222             /* The TEST_ASSERT_SUCCESS call above should change too... */
1223             if(U_SUCCESS(status)) {
1224                 TEST_ASSERT(numFields == 4);
1225                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1226                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1227                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1228                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1229                 TEST_ASSERT(fields[4] == NULL);
1230                 TEST_ASSERT(fields[8] == NULL);
1231                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1232                 spaceNeeded = strlen("first .tag-a. second.tag-b.");  /* "." at NUL positions */
1233                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1234             }
1235         }
1236
1237         uregex_close(re);
1238     }
1239
1240     /*
1241      * set/getTimeLimit
1242      */
1243      TEST_SETUP("abc$", "abcdef", 0);
1244      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1245      uregex_setTimeLimit(re, 1000, &status);
1246      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1247      TEST_ASSERT_SUCCESS(status);
1248      uregex_setTimeLimit(re, -1, &status);
1249      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1250      status = U_ZERO_ERROR;
1251      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1252      TEST_TEARDOWN;
1253
1254      /*
1255       * set/get Stack Limit
1256       */
1257      TEST_SETUP("abc$", "abcdef", 0);
1258      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1259      uregex_setStackLimit(re, 40000, &status);
1260      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1261      TEST_ASSERT_SUCCESS(status);
1262      uregex_setStackLimit(re, -1, &status);
1263      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1264      status = U_ZERO_ERROR;
1265      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1266      TEST_TEARDOWN;
1267
1268
1269      /*
1270       * Get/Set callback functions
1271       *     This test is copied from intltest regex/Callbacks
1272       *     The pattern and test data will run long enough to cause the callback
1273       *       to be invoked.  The nested '+' operators give exponential time
1274       *       behavior with increasing string length.
1275       */
1276      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1277      callBackContext cbInfo = {4, 0, 0};
1278      const void     *pContext   = &cbInfo;
1279      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1280
1281      /*  Getting the callback fn when it hasn't been set must return NULL  */
1282      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1283      TEST_ASSERT_SUCCESS(status);
1284      TEST_ASSERT(returnedFn == NULL);
1285      TEST_ASSERT(pContext == NULL);
1286
1287      /* Set thecallback and do a match.                                   */
1288      /* The callback function should record that it has been called.      */
1289      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1290      TEST_ASSERT_SUCCESS(status);
1291      TEST_ASSERT(cbInfo.numCalls == 0);
1292      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1293      TEST_ASSERT_SUCCESS(status);
1294      TEST_ASSERT(cbInfo.numCalls > 0);
1295
1296      /* Getting the callback should return the values that were set above.  */
1297      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1298      TEST_ASSERT(returnedFn == &TestCallbackFn);
1299      TEST_ASSERT(pContext == &cbInfo);
1300
1301      TEST_TEARDOWN;
1302 }
1303
1304
1305
1306 static void TestBug4315(void) {
1307     UErrorCode      theICUError = U_ZERO_ERROR;
1308     URegularExpression *theRegEx;
1309     UChar           *textBuff;
1310     const char      *thePattern;
1311     UChar            theString[100];
1312     UChar           *destFields[24];
1313     int32_t         neededLength1;
1314     int32_t         neededLength2;
1315
1316     int32_t         wordCount = 0;
1317     int32_t         destFieldsSize = 24;
1318
1319     thePattern  = "ck ";
1320     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1321
1322     /* open a regex */
1323     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1324     TEST_ASSERT_SUCCESS(theICUError);
1325
1326     /* set the input string */
1327     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1328     TEST_ASSERT_SUCCESS(theICUError);
1329
1330     /* split */
1331     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1332      *  error occurs! */
1333     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1334         destFieldsSize, &theICUError);
1335
1336     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1337     TEST_ASSERT(wordCount==3);
1338
1339     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1340     {
1341         theICUError = U_ZERO_ERROR;
1342         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1343         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1344             destFields, destFieldsSize, &theICUError);
1345         TEST_ASSERT(wordCount==3);
1346         TEST_ASSERT_SUCCESS(theICUError);
1347         TEST_ASSERT(neededLength1 == neededLength2);
1348         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1349         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1350         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1351         TEST_ASSERT(destFields[3] == NULL);
1352         free(textBuff);
1353     }
1354     uregex_close(theRegEx);
1355 }
1356
1357 /* Based on TestRegexCAPI() */
1358 static void TestUTextAPI(void) {
1359     UErrorCode           status = U_ZERO_ERROR;
1360     URegularExpression  *re;
1361     UText                patternText = UTEXT_INITIALIZER;
1362     UChar                pat[200];
1363     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1364
1365     /* Mimimalist open/close */
1366     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1367     re = uregex_openUText(&patternText, 0, 0, &status);
1368     if (U_FAILURE(status)) {
1369          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1370          utext_close(&patternText);
1371          return;
1372     }
1373     uregex_close(re);
1374
1375     /* Open with all flag values set */
1376     status = U_ZERO_ERROR;
1377     re = uregex_openUText(&patternText,
1378         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1379         0, &status);
1380     TEST_ASSERT_SUCCESS(status);
1381     uregex_close(re);
1382
1383     /* Open with an invalid flag */
1384     status = U_ZERO_ERROR;
1385     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1386     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1387     uregex_close(re);
1388
1389     /* open with an invalid parameter */
1390     status = U_ZERO_ERROR;
1391     re = uregex_openUText(NULL,
1392         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1393     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1394
1395     /*
1396      *  clone
1397      */
1398     {
1399         URegularExpression *clone1;
1400         URegularExpression *clone2;
1401         URegularExpression *clone3;
1402         UChar  testString1[30];
1403         UChar  testString2[30];
1404         UBool  result;
1405
1406
1407         status = U_ZERO_ERROR;
1408         re = uregex_openUText(&patternText, 0, 0, &status);
1409         TEST_ASSERT_SUCCESS(status);
1410         clone1 = uregex_clone(re, &status);
1411         TEST_ASSERT_SUCCESS(status);
1412         TEST_ASSERT(clone1 != NULL);
1413
1414         status = U_ZERO_ERROR;
1415         clone2 = uregex_clone(re, &status);
1416         TEST_ASSERT_SUCCESS(status);
1417         TEST_ASSERT(clone2 != NULL);
1418         uregex_close(re);
1419
1420         status = U_ZERO_ERROR;
1421         clone3 = uregex_clone(clone2, &status);
1422         TEST_ASSERT_SUCCESS(status);
1423         TEST_ASSERT(clone3 != NULL);
1424
1425         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
1426         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
1427
1428         status = U_ZERO_ERROR;
1429         uregex_setText(clone1, testString1, -1, &status);
1430         TEST_ASSERT_SUCCESS(status);
1431         result = uregex_lookingAt(clone1, 0, &status);
1432         TEST_ASSERT_SUCCESS(status);
1433         TEST_ASSERT(result==TRUE);
1434
1435         status = U_ZERO_ERROR;
1436         uregex_setText(clone2, testString2, -1, &status);
1437         TEST_ASSERT_SUCCESS(status);
1438         result = uregex_lookingAt(clone2, 0, &status);
1439         TEST_ASSERT_SUCCESS(status);
1440         TEST_ASSERT(result==FALSE);
1441         result = uregex_find(clone2, 0, &status);
1442         TEST_ASSERT_SUCCESS(status);
1443         TEST_ASSERT(result==TRUE);
1444
1445         uregex_close(clone1);
1446         uregex_close(clone2);
1447         uregex_close(clone3);
1448
1449     }
1450
1451     /*
1452      *  pattern() and patternText()
1453      */
1454     {
1455         const UChar  *resultPat;
1456         int32_t       resultLen;
1457         UText        *resultText;
1458         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1459         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1460         u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
1461         status = U_ZERO_ERROR;
1462
1463         utext_openUTF8(&patternText, str_hello, -1, &status);
1464         re = uregex_open(pat, -1, 0, NULL, &status);
1465         resultPat = uregex_pattern(re, &resultLen, &status);
1466         TEST_ASSERT_SUCCESS(status);
1467
1468         /* The TEST_ASSERT_SUCCESS above should change too... */
1469         if (U_SUCCESS(status)) {
1470             TEST_ASSERT(resultLen == -1);
1471             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1472         }
1473
1474         resultText = uregex_patternUText(re, &status);
1475         TEST_ASSERT_SUCCESS(status);
1476         TEST_ASSERT_UTEXT(str_hello, resultText);
1477
1478         uregex_close(re);
1479
1480         status = U_ZERO_ERROR;
1481         re = uregex_open(pat, 3, 0, NULL, &status);
1482         resultPat = uregex_pattern(re, &resultLen, &status);
1483         TEST_ASSERT_SUCCESS(status);
1484
1485         /* The TEST_ASSERT_SUCCESS above should change too... */
1486         if (U_SUCCESS(status)) {
1487             TEST_ASSERT(resultLen == 3);
1488             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1489             TEST_ASSERT(u_strlen(resultPat) == 3);
1490         }
1491
1492         resultText = uregex_patternUText(re, &status);
1493         TEST_ASSERT_SUCCESS(status);
1494         TEST_ASSERT_UTEXT(str_hel, resultText);
1495
1496         uregex_close(re);
1497     }
1498
1499     /*
1500      *  setUText() and lookingAt()
1501      */
1502     {
1503         UText  text1 = UTEXT_INITIALIZER;
1504         UText  text2 = UTEXT_INITIALIZER;
1505         UBool  result;
1506         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1507         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1508         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1509         status = U_ZERO_ERROR;
1510         utext_openUTF8(&text1, str_abcccd, -1, &status);
1511         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1512
1513         utext_openUTF8(&patternText, str_abcd, -1, &status);
1514         re = uregex_openUText(&patternText, 0, NULL, &status);
1515         TEST_ASSERT_SUCCESS(status);
1516
1517         /* Operation before doing a setText should fail... */
1518         status = U_ZERO_ERROR;
1519         uregex_lookingAt(re, 0, &status);
1520         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1521
1522         status = U_ZERO_ERROR;
1523         uregex_setUText(re, &text1, &status);
1524         result = uregex_lookingAt(re, 0, &status);
1525         TEST_ASSERT(result == TRUE);
1526         TEST_ASSERT_SUCCESS(status);
1527
1528         status = U_ZERO_ERROR;
1529         uregex_setUText(re, &text2, &status);
1530         result = uregex_lookingAt(re, 0, &status);
1531         TEST_ASSERT(result == FALSE);
1532         TEST_ASSERT_SUCCESS(status);
1533
1534         status = U_ZERO_ERROR;
1535         uregex_setUText(re, &text1, &status);
1536         result = uregex_lookingAt(re, 0, &status);
1537         TEST_ASSERT(result == TRUE);
1538         TEST_ASSERT_SUCCESS(status);
1539
1540         uregex_close(re);
1541         utext_close(&text1);
1542         utext_close(&text2);
1543     }
1544
1545
1546     /*
1547      *  getText() and getUText()
1548      */
1549     {
1550         UText  text1 = UTEXT_INITIALIZER;
1551         UText  text2 = UTEXT_INITIALIZER;
1552         UChar  text2Chars[20];
1553         UText  *resultText;
1554         const UChar   *result;
1555         int32_t  textLength;
1556         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1557         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1558         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1559
1560
1561         status = U_ZERO_ERROR;
1562         utext_openUTF8(&text1, str_abcccd, -1, &status);
1563         u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
1564         utext_openUChars(&text2, text2Chars, -1, &status);
1565
1566         utext_openUTF8(&patternText, str_abcd, -1, &status);
1567         re = uregex_openUText(&patternText, 0, NULL, &status);
1568
1569         /* First set a UText */
1570         uregex_setUText(re, &text1, &status);
1571         resultText = uregex_getUText(re, NULL, &status);
1572         TEST_ASSERT_SUCCESS(status);
1573         TEST_ASSERT(resultText != &text1);
1574         utext_setNativeIndex(resultText, 0);
1575         utext_setNativeIndex(&text1, 0);
1576         TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
1577         utext_close(resultText);
1578
1579         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1580         TEST_ASSERT(textLength == -1 || textLength == 6);
1581         resultText = uregex_getUText(re, NULL, &status);
1582         TEST_ASSERT_SUCCESS(status);
1583         TEST_ASSERT(resultText != &text1);
1584         utext_setNativeIndex(resultText, 0);
1585         utext_setNativeIndex(&text1, 0);
1586         TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
1587         utext_close(resultText);
1588
1589         /* Then set a UChar * */
1590         uregex_setText(re, text2Chars, 7, &status);
1591         resultText = uregex_getUText(re, NULL, &status);
1592         TEST_ASSERT_SUCCESS(status);
1593         utext_setNativeIndex(resultText, 0);
1594         utext_setNativeIndex(&text2, 0);
1595         TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0);
1596         utext_close(resultText);
1597         result = uregex_getText(re, &textLength, &status);
1598         TEST_ASSERT(textLength == 7);
1599
1600         uregex_close(re);
1601         utext_close(&text1);
1602         utext_close(&text2);
1603     }
1604
1605     /*
1606      *  matches()
1607      */
1608     {
1609         UText   text1 = UTEXT_INITIALIZER;
1610         UBool   result;
1611         UText   nullText = UTEXT_INITIALIZER;
1612         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1613         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1614
1615         status = U_ZERO_ERROR;
1616         utext_openUTF8(&text1, str_abcccde, -1, &status);
1617         utext_openUTF8(&patternText, str_abcd, -1, &status);
1618         re = uregex_openUText(&patternText, 0, NULL, &status);
1619
1620         uregex_setUText(re, &text1, &status);
1621         result = uregex_matches(re, 0, &status);
1622         TEST_ASSERT(result == FALSE);
1623         TEST_ASSERT_SUCCESS(status);
1624         uregex_close(re);
1625
1626         status = U_ZERO_ERROR;
1627         re = uregex_openC(".?", 0, NULL, &status);
1628         uregex_setUText(re, &text1, &status);
1629         result = uregex_matches(re, 7, &status);
1630         TEST_ASSERT(result == TRUE);
1631         TEST_ASSERT_SUCCESS(status);
1632
1633         status = U_ZERO_ERROR;
1634         utext_openUTF8(&nullText, "", -1, &status);
1635         uregex_setUText(re, &nullText, &status);
1636         TEST_ASSERT_SUCCESS(status);
1637         result = uregex_matches(re, 0, &status);
1638         TEST_ASSERT(result == TRUE);
1639         TEST_ASSERT_SUCCESS(status);
1640
1641         uregex_close(re);
1642         utext_close(&text1);
1643         utext_close(&nullText);
1644     }
1645
1646
1647     /*
1648      *  lookingAt()    Used in setText test.
1649      */
1650
1651
1652     /*
1653      *  find(), findNext, start, end, reset
1654      */
1655     {
1656         UChar    text1[50];
1657         UBool    result;
1658         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
1659         status = U_ZERO_ERROR;
1660         re = uregex_openC("rx", 0, NULL, &status);
1661
1662         uregex_setText(re, text1, -1, &status);
1663         result = uregex_find(re, 0, &status);
1664         TEST_ASSERT(result == TRUE);
1665         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1666         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1667         TEST_ASSERT_SUCCESS(status);
1668
1669         result = uregex_find(re, 9, &status);
1670         TEST_ASSERT(result == TRUE);
1671         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1672         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1673         TEST_ASSERT_SUCCESS(status);
1674
1675         result = uregex_find(re, 14, &status);
1676         TEST_ASSERT(result == FALSE);
1677         TEST_ASSERT_SUCCESS(status);
1678
1679         status = U_ZERO_ERROR;
1680         uregex_reset(re, 0, &status);
1681
1682         result = uregex_findNext(re, &status);
1683         TEST_ASSERT(result == TRUE);
1684         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1685         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1686         TEST_ASSERT_SUCCESS(status);
1687
1688         result = uregex_findNext(re, &status);
1689         TEST_ASSERT(result == TRUE);
1690         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1691         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1692         TEST_ASSERT_SUCCESS(status);
1693
1694         status = U_ZERO_ERROR;
1695         uregex_reset(re, 12, &status);
1696
1697         result = uregex_findNext(re, &status);
1698         TEST_ASSERT(result == TRUE);
1699         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1700         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1701         TEST_ASSERT_SUCCESS(status);
1702
1703         result = uregex_findNext(re, &status);
1704         TEST_ASSERT(result == FALSE);
1705         TEST_ASSERT_SUCCESS(status);
1706
1707         uregex_close(re);
1708     }
1709
1710     /*
1711      *  group()
1712      */
1713     {
1714         UChar    text1[80];
1715         UText   *actual;
1716         UBool    result;
1717
1718         const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
1719         const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
1720
1721
1722         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
1723
1724         status = U_ZERO_ERROR;
1725         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1726         TEST_ASSERT_SUCCESS(status);
1727
1728         uregex_setText(re, text1, -1, &status);
1729         result = uregex_find(re, 0, &status);
1730         TEST_ASSERT(result==TRUE);
1731
1732         /*  Capture Group 0, the full match.  Should succeed.  */
1733         status = U_ZERO_ERROR;
1734         actual = uregex_groupUTextDeep(re, 0, NULL, &status);
1735         TEST_ASSERT_SUCCESS(status);
1736         TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
1737         utext_close(actual);
1738
1739         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1740         status = U_ZERO_ERROR;
1741         {
1742             int64_t      group_len;
1743             int32_t      len16;
1744             UErrorCode   shallowStatus = U_ZERO_ERROR;
1745             int64_t      nativeIndex;
1746             UChar *groupChars;
1747             UText groupText = UTEXT_INITIALIZER;
1748
1749             actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
1750             TEST_ASSERT_SUCCESS(status);
1751
1752             nativeIndex = utext_getNativeIndex(actual);
1753             /*  Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp]  */
1754             /*  len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus);  */
1755             len16 = group_len;
1756
1757             groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
1758             utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
1759
1760             utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
1761
1762             TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
1763             utext_close(&groupText);
1764             free(groupChars);
1765         }
1766         utext_close(actual);
1767
1768         /*  Capture group #1.  Should succeed. */
1769         status = U_ZERO_ERROR;
1770         actual = uregex_groupUTextDeep(re, 1, NULL, &status);
1771         TEST_ASSERT_SUCCESS(status);
1772         TEST_ASSERT_UTEXT(str_interior, actual);
1773         utext_close(actual);
1774
1775         /*  Capture group out of range.  Error. */
1776         status = U_ZERO_ERROR;
1777         actual = uregex_groupUTextDeep(re, 2, NULL, &status);
1778         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1779         TEST_ASSERT(utext_nativeLength(actual) == 0);
1780         utext_close(actual);
1781
1782         uregex_close(re);
1783
1784     }
1785
1786     /*
1787      *  replaceFirst()
1788      */
1789     {
1790         UChar    text1[80];
1791         UChar    text2[80];
1792         UText    replText = UTEXT_INITIALIZER;
1793         UText   *result;
1794         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1795         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1796         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
1797         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1798         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1799         status = U_ZERO_ERROR;
1800         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1801         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1802         utext_openUTF8(&replText, str_1x, -1, &status);
1803
1804         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1805         TEST_ASSERT_SUCCESS(status);
1806
1807         /*  Normal case, with match */
1808         uregex_setText(re, text1, -1, &status);
1809         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1810         TEST_ASSERT_SUCCESS(status);
1811         TEST_ASSERT_UTEXT(str_Replxxx, result);
1812         utext_close(result);
1813
1814         /* No match.  Text should copy to output with no changes.  */
1815         uregex_setText(re, text2, -1, &status);
1816         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1817         TEST_ASSERT_SUCCESS(status);
1818         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1819         utext_close(result);
1820
1821         /* Unicode escapes */
1822         uregex_setText(re, text1, -1, &status);
1823         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1824         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1825         TEST_ASSERT_SUCCESS(status);
1826         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1827         utext_close(result);
1828
1829         uregex_close(re);
1830         utext_close(&replText);
1831     }
1832
1833
1834     /*
1835      *  replaceAll()
1836      */
1837     {
1838         UChar    text1[80];
1839         UChar    text2[80];
1840         UText    replText = UTEXT_INITIALIZER;
1841         UText   *result;
1842         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1843         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1844         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1845         status = U_ZERO_ERROR;
1846         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1847         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1848         utext_openUTF8(&replText, str_1, -1, &status);
1849
1850         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1851         TEST_ASSERT_SUCCESS(status);
1852
1853         /*  Normal case, with match */
1854         uregex_setText(re, text1, -1, &status);
1855         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1856         TEST_ASSERT_SUCCESS(status);
1857         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1858         utext_close(result);
1859
1860         /* No match.  Text should copy to output with no changes.  */
1861         uregex_setText(re, text2, -1, &status);
1862         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1863         TEST_ASSERT_SUCCESS(status);
1864         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1865         utext_close(result);
1866
1867         uregex_close(re);
1868         utext_close(&replText);
1869     }
1870
1871
1872     /*
1873      *  appendReplacement()
1874      */
1875     {
1876         UChar    text[100];
1877         UChar    repl[100];
1878         UChar    buf[100];
1879         UChar   *bufPtr;
1880         int32_t  bufCap;
1881
1882         status = U_ZERO_ERROR;
1883         re = uregex_openC(".*", 0, 0, &status);
1884         TEST_ASSERT_SUCCESS(status);
1885
1886         u_uastrncpy(text, "whatever",  sizeof(text)/2);
1887         u_uastrncpy(repl, "some other", sizeof(repl)/2);
1888         uregex_setText(re, text, -1, &status);
1889
1890         /* match covers whole target string */
1891         uregex_find(re, 0, &status);
1892         TEST_ASSERT_SUCCESS(status);
1893         bufPtr = buf;
1894         bufCap = sizeof(buf) / 2;
1895         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1896         TEST_ASSERT_SUCCESS(status);
1897         TEST_ASSERT_STRING("some other", buf, TRUE);
1898
1899         /* Match has \u \U escapes */
1900         uregex_find(re, 0, &status);
1901         TEST_ASSERT_SUCCESS(status);
1902         bufPtr = buf;
1903         bufCap = sizeof(buf) / 2;
1904         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1905         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1906         TEST_ASSERT_SUCCESS(status);
1907         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1908
1909         uregex_close(re);
1910     }
1911
1912
1913     /*
1914      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1915      */
1916
1917     /*
1918      *  splitUText()
1919      */
1920     {
1921         UChar    textToSplit[80];
1922         UChar    text2[80];
1923         UText    *fields[10];
1924         int32_t  numFields;
1925         int32_t i;
1926
1927         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1928         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1929
1930         status = U_ZERO_ERROR;
1931         re = uregex_openC(":", 0, NULL, &status);
1932
1933
1934         /*  Simple split */
1935
1936         uregex_setText(re, textToSplit, -1, &status);
1937         TEST_ASSERT_SUCCESS(status);
1938
1939         /* The TEST_ASSERT_SUCCESS call above should change too... */
1940         if (U_SUCCESS(status)) {
1941             memset(fields, 0, sizeof(fields));
1942             numFields = uregex_splitUText(re, fields, 10, &status);
1943             TEST_ASSERT_SUCCESS(status);
1944
1945             /* The TEST_ASSERT_SUCCESS call above should change too... */
1946             if(U_SUCCESS(status)) {
1947               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1948               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1949               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1950                 TEST_ASSERT(numFields == 3);
1951                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1952                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1953                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1954                 TEST_ASSERT(fields[3] == NULL);
1955             }
1956             for(i = 0; i < numFields; i++) {
1957                 utext_close(fields[i]);
1958             }
1959         }
1960
1961         uregex_close(re);
1962
1963
1964         /*  Split with too few output strings available */
1965         status = U_ZERO_ERROR;
1966         re = uregex_openC(":", 0, NULL, &status);
1967         uregex_setText(re, textToSplit, -1, &status);
1968         TEST_ASSERT_SUCCESS(status);
1969
1970         /* The TEST_ASSERT_SUCCESS call above should change too... */
1971         if(U_SUCCESS(status)) {
1972             fields[0] = NULL;
1973             fields[1] = NULL;
1974             fields[2] = &patternText;
1975             numFields = uregex_splitUText(re, fields, 2, &status);
1976             TEST_ASSERT_SUCCESS(status);
1977
1978             /* The TEST_ASSERT_SUCCESS call above should change too... */
1979             if(U_SUCCESS(status)) {
1980                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
1981                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
1982                 TEST_ASSERT(numFields == 2);
1983                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1984                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
1985                 TEST_ASSERT(fields[2] == &patternText);
1986             }
1987             for(i = 0; i < numFields; i++) {
1988                 utext_close(fields[i]);
1989             }
1990         }
1991
1992         uregex_close(re);
1993     }
1994
1995     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
1996      *                   comes out as additional fields.  */
1997     {
1998         UChar    textToSplit[80];
1999         UText    *fields[10];
2000         int32_t  numFields;
2001         int32_t i;
2002
2003         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
2004
2005         status = U_ZERO_ERROR;
2006         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2007
2008         uregex_setText(re, textToSplit, -1, &status);
2009         TEST_ASSERT_SUCCESS(status);
2010
2011         /* The TEST_ASSERT_SUCCESS call above should change too... */
2012         if(U_SUCCESS(status)) {
2013             memset(fields, 0, sizeof(fields));
2014             numFields = uregex_splitUText(re, fields, 10, &status);
2015             TEST_ASSERT_SUCCESS(status);
2016
2017             /* The TEST_ASSERT_SUCCESS call above should change too... */
2018             if(U_SUCCESS(status)) {
2019                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2020                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2021                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2022                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2023                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2024
2025                 TEST_ASSERT(numFields == 5);
2026                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2027                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2028                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2029                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2030                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2031                 TEST_ASSERT(fields[5] == NULL);
2032             }
2033             for(i = 0; i < numFields; i++) {
2034                 utext_close(fields[i]);
2035             }
2036         }
2037
2038         /*  Split with too few output strings available (2) */
2039         status = U_ZERO_ERROR;
2040         fields[0] = NULL;
2041         fields[1] = NULL;
2042         fields[2] = &patternText;
2043         numFields = uregex_splitUText(re, fields, 2, &status);
2044         TEST_ASSERT_SUCCESS(status);
2045
2046         /* The TEST_ASSERT_SUCCESS call above should change too... */
2047         if(U_SUCCESS(status)) {
2048             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2049             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2050             TEST_ASSERT(numFields == 2);
2051             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2052             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2053             TEST_ASSERT(fields[2] == &patternText);
2054         }
2055         for(i = 0; i < numFields; i++) {
2056             utext_close(fields[i]);
2057         }
2058
2059
2060         /*  Split with too few output strings available (3) */
2061         status = U_ZERO_ERROR;
2062         fields[0] = NULL;
2063         fields[1] = NULL;
2064         fields[2] = NULL;
2065         fields[3] = &patternText;
2066         numFields = uregex_splitUText(re, fields, 3, &status);
2067         TEST_ASSERT_SUCCESS(status);
2068
2069         /* The TEST_ASSERT_SUCCESS call above should change too... */
2070         if(U_SUCCESS(status)) {
2071             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2072             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2073             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2074             TEST_ASSERT(numFields == 3);
2075             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2076             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2077             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2078             TEST_ASSERT(fields[3] == &patternText);
2079         }
2080         for(i = 0; i < numFields; i++) {
2081             utext_close(fields[i]);
2082         }
2083
2084         /*  Split with just enough output strings available (5) */
2085         status = U_ZERO_ERROR;
2086         fields[0] = NULL;
2087         fields[1] = NULL;
2088         fields[2] = NULL;
2089         fields[3] = NULL;
2090         fields[4] = NULL;
2091         fields[5] = &patternText;
2092         numFields = uregex_splitUText(re, fields, 5, &status);
2093         TEST_ASSERT_SUCCESS(status);
2094
2095         /* The TEST_ASSERT_SUCCESS call above should change too... */
2096         if(U_SUCCESS(status)) {
2097             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2098             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2099             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2100             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2101             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2102
2103             TEST_ASSERT(numFields == 5);
2104             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2105             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2106             TEST_ASSERT_UTEXT(str_second, fields[2]);
2107             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2108             TEST_ASSERT_UTEXT(str_third, fields[4]);
2109             TEST_ASSERT(fields[5] == &patternText);
2110         }
2111         for(i = 0; i < numFields; i++) {
2112             utext_close(fields[i]);
2113         }
2114
2115         /* Split, end of text is a field delimiter.   */
2116         status = U_ZERO_ERROR;
2117         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2118         TEST_ASSERT_SUCCESS(status);
2119
2120         /* The TEST_ASSERT_SUCCESS call above should change too... */
2121         if(U_SUCCESS(status)) {
2122             memset(fields, 0, sizeof(fields));
2123             fields[9] = &patternText;
2124             numFields = uregex_splitUText(re, fields, 9, &status);
2125             TEST_ASSERT_SUCCESS(status);
2126
2127             /* The TEST_ASSERT_SUCCESS call above should change too... */
2128             if(U_SUCCESS(status)) {
2129                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2130                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2131                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2132                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2133
2134                 TEST_ASSERT(numFields == 4);
2135                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2136                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2137                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2138                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2139                 TEST_ASSERT(fields[4] == NULL);
2140                 TEST_ASSERT(fields[8] == NULL);
2141                 TEST_ASSERT(fields[9] == &patternText);
2142             }
2143             for(i = 0; i < numFields; i++) {
2144                 utext_close(fields[i]);
2145             }
2146         }
2147
2148         uregex_close(re);
2149     }
2150     utext_close(&patternText);
2151 }
2152
2153 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */