icuSources/test/cintltst/reapits.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 2004-2013, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /********************************************************************************
   7 *
   8 * File reapits.c
   9 *
  10 *********************************************************************************/
  11 /*C API TEST FOR Regular Expressions */
  12 /**
  13 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
  14 *   try to test the full functionality.  It just calls each function and verifies that it
  15 *   works on a basic level.
  16 *
  17 *   More complete testing of regular expression functionality is done with the C++ tests.
  18 **/
  19
  20 #include "unicode/utypes.h"
  21
  22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
  23
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include "unicode/uloc.h"
  27 #include "unicode/uregex.h"
  28 #include "unicode/ustring.h"
  29 #include "unicode/utext.h"
  30 #include "cintltst.h"
  31
  32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
  33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
  34
  35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
  36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
  37
  38 /*
  39  *   TEST_SETUP and TEST_TEARDOWN
  40  *         macros to handle the boilerplate around setting up regex test cases.
  41  *         parameteres to setup:
  42  *              pattern:     The regex pattern, a (char *) null terminated C string.
  43  *              testString:  The string data, also a (char *) C string.
  44  *              flags:       Regex flags to set when compiling the pattern
  45  *
  46  *         Put arbitrary test code between SETUP and TEARDOWN.
  47  *         're" is the compiled, ready-to-go  regular expression.
  48  */
  49 #define TEST_SETUP(pattern, testString, flags) {  \
  50     UChar   *srcString = NULL;  \
  51     status = U_ZERO_ERROR; \
  52     re = uregex_openC(pattern, flags, NULL, &status);  \
  53     TEST_ASSERT_SUCCESS(status);   \
  54     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
  55     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
  56     uregex_setText(re, srcString, -1, &status); \
  57     TEST_ASSERT_SUCCESS(status);  \
  58     if (U_SUCCESS(status)) {
  59
  60 #define TEST_TEARDOWN  \
  61     }  \
  62     TEST_ASSERT_SUCCESS(status);  \
  63     uregex_close(re);  \
  64     free(srcString);   \
  65     }
  66
  67
  68 /**
  69  * @param expected utf-8 array of bytes to be expected
  70  */
  71 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
  72      char     buf_inside_macro[120];
  73      int32_t  len = (int32_t)strlen(expected);
  74      UBool    success;
  75      if (nulTerm) {
  76          u_austrncpy(buf_inside_macro, (actual), len+1);
  77          buf_inside_macro[len+2] = 0;
  78          success = (strcmp((expected), buf_inside_macro) == 0);
  79      } else {
  80          u_austrncpy(buf_inside_macro, (actual), len);
  81          buf_inside_macro[len+1] = 0;
  82          success = (strncmp((expected), buf_inside_macro, len) == 0);
  83      }
  84      if (success == FALSE) {
  85          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
  86              file, line, (expected), buf_inside_macro);
  87      }
  88 }
  89
  90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
  91
  92
  93 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
  94     int32_t u8i = 0;
  95     UChar32 u8c = 0;
  96     UChar32 utc = 0;
  97     UBool   stringsEqual = TRUE;
  98     utext_setNativeIndex(utext, 0);
  99     for (;;) {
 100         U8_NEXT_UNSAFE(utf8, u8i, u8c);
 101         utc = utext_next32(utext);
 102         if (u8c == 0 && utc == U_SENTINEL) {
 103             break;
 104         }
 105         if (u8c != utc || u8c == 0) {
 106             stringsEqual = FALSE;
 107             break;
 108         }
 109     }
 110     return stringsEqual;
 111 }
 112
 113
 114 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
 115     utext_setNativeIndex(actual, 0);
 116     if (!equals_utf8_utext(expected, actual)) {
 117         UChar32 c;
 118         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
 119         c = utext_next32From(actual, 0);
 120         while (c != U_SENTINEL) {
 121             if (0x20<c && c <0x7e) {
 122                 log_err("%c", c);
 123             } else {
 124                 log_err("%#x", c);
 125             }
 126             c = UTEXT_NEXT32(actual);
 127         }
 128         log_err("\"\n");
 129     }
 130 }
 131
 132 /*
 133  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
 134  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
 135  */
 136 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
 137
 138 static UBool testUTextEqual(UText *uta, UText *utb) {
 139     UChar32 ca = 0;
 140     UChar32 cb = 0;
 141     utext_setNativeIndex(uta, 0);
 142     utext_setNativeIndex(utb, 0);
 143     do {
 144         ca = utext_next32(uta);
 145         cb = utext_next32(utb);
 146         if (ca != cb) {
 147             break;
 148         }
 149     } while (ca != U_SENTINEL);
 150     return ca == cb;
 151 }
 152
 153
 154
 155
 156 static void TestRegexCAPI(void);
 157 static void TestBug4315(void);
 158 static void TestUTextAPI(void);
 159 static void TestRefreshInput(void);
 160 static void TestBug8421(void);
 161
 162 void addURegexTest(TestNode** root);
 163
 164 void addURegexTest(TestNode** root)
 165 {
 166     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
 167     addTest(root, &TestBug4315,   "regex/TestBug4315");
 168     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
 169     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
 170     addTest(root, &TestBug8421,   "regex/TestBug8421");
 171 }
 172
 173 /*
 174  * Call back function and context struct used for testing
 175  *    regular expression user callbacks.  This test is mostly the same as
 176  *   the corresponding C++ test in intltest.
 177  */
 178 typedef struct callBackContext {
 179     int32_t          maxCalls;
 180     int32_t          numCalls;
 181     int32_t          lastSteps;
 182 } callBackContext;
 183
 184 static UBool U_EXPORT2 U_CALLCONV
 185 TestCallbackFn(const void *context, int32_t steps) {
 186   callBackContext  *info = (callBackContext *)context;
 187   if (info->lastSteps+1 != steps) {
 188       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
 189   }
 190   info->lastSteps = steps;
 191   info->numCalls++;
 192   return (info->numCalls < info->maxCalls);
 193 }
 194
 195 /*
 196  *   Regular Expression C API Tests
 197  */
 198 static void TestRegexCAPI(void) {
 199     UErrorCode           status = U_ZERO_ERROR;
 200     URegularExpression  *re;
 201     UChar                pat[200];
 202     UChar               *minus1;
 203
 204     memset(&minus1, -1, sizeof(minus1));
 205
 206     /* Mimimalist open/close */
 207     u_uastrncpy(pat, "abc*", sizeof(pat)/2);
 208     re = uregex_open(pat, -1, 0, 0, &status);
 209     if (U_FAILURE(status)) {
 210          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
 211          return;
 212     }
 213     uregex_close(re);
 214
 215     /* Open with all flag values set */
 216     status = U_ZERO_ERROR;
 217     re = uregex_open(pat, -1,
 218         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
 219         0, &status);
 220     TEST_ASSERT_SUCCESS(status);
 221     uregex_close(re);
 222
 223     /* Open with an invalid flag */
 224     status = U_ZERO_ERROR;
 225     re = uregex_open(pat, -1, 0x40000000, 0, &status);
 226     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
 227     uregex_close(re);
 228
 229     /* Open with an unimplemented flag */
 230     status = U_ZERO_ERROR;
 231     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
 232     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
 233     uregex_close(re);
 234
 235     /* openC with an invalid parameter */
 236     status = U_ZERO_ERROR;
 237     re = uregex_openC(NULL,
 238         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 239     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
 240
 241     /* openC with an invalid parameter */
 242     status = U_USELESS_COLLATOR_ERROR;
 243     re = uregex_openC(NULL,
 244         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 245     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
 246
 247     /* openC   open from a C string */
 248     {
 249         const UChar   *p;
 250         int32_t  len;
 251         status = U_ZERO_ERROR;
 252         re = uregex_openC("abc*", 0, 0, &status);
 253         TEST_ASSERT_SUCCESS(status);
 254         p = uregex_pattern(re, &len, &status);
 255         TEST_ASSERT_SUCCESS(status);
 256
 257         /* The TEST_ASSERT_SUCCESS above should change too... */
 258         if(U_SUCCESS(status)) {
 259             u_uastrncpy(pat, "abc*", sizeof(pat)/2);
 260             TEST_ASSERT(u_strcmp(pat, p) == 0);
 261             TEST_ASSERT(len==(int32_t)strlen("abc*"));
 262         }
 263
 264         uregex_close(re);
 265
 266         /*  TODO:  Open with ParseError parameter */
 267     }
 268
 269     /*
 270      *  clone
 271      */
 272     {
 273         URegularExpression *clone1;
 274         URegularExpression *clone2;
 275         URegularExpression *clone3;
 276         UChar  testString1[30];
 277         UChar  testString2[30];
 278         UBool  result;
 279
 280
 281         status = U_ZERO_ERROR;
 282         re = uregex_openC("abc*", 0, 0, &status);
 283         TEST_ASSERT_SUCCESS(status);
 284         clone1 = uregex_clone(re, &status);
 285         TEST_ASSERT_SUCCESS(status);
 286         TEST_ASSERT(clone1 != NULL);
 287
 288         status = U_ZERO_ERROR;
 289         clone2 = uregex_clone(re, &status);
 290         TEST_ASSERT_SUCCESS(status);
 291         TEST_ASSERT(clone2 != NULL);
 292         uregex_close(re);
 293
 294         status = U_ZERO_ERROR;
 295         clone3 = uregex_clone(clone2, &status);
 296         TEST_ASSERT_SUCCESS(status);
 297         TEST_ASSERT(clone3 != NULL);
 298
 299         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
 300         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
 301
 302         status = U_ZERO_ERROR;
 303         uregex_setText(clone1, testString1, -1, &status);
 304         TEST_ASSERT_SUCCESS(status);
 305         result = uregex_lookingAt(clone1, 0, &status);
 306         TEST_ASSERT_SUCCESS(status);
 307         TEST_ASSERT(result==TRUE);
 308
 309         status = U_ZERO_ERROR;
 310         uregex_setText(clone2, testString2, -1, &status);
 311         TEST_ASSERT_SUCCESS(status);
 312         result = uregex_lookingAt(clone2, 0, &status);
 313         TEST_ASSERT_SUCCESS(status);
 314         TEST_ASSERT(result==FALSE);
 315         result = uregex_find(clone2, 0, &status);
 316         TEST_ASSERT_SUCCESS(status);
 317         TEST_ASSERT(result==TRUE);
 318
 319         uregex_close(clone1);
 320         uregex_close(clone2);
 321         uregex_close(clone3);
 322
 323     }
 324
 325     /*
 326      *  pattern()
 327     */
 328     {
 329         const UChar  *resultPat;
 330         int32_t       resultLen;
 331         u_uastrncpy(pat, "hello", sizeof(pat)/2);
 332         status = U_ZERO_ERROR;
 333         re = uregex_open(pat, -1, 0, NULL, &status);
 334         resultPat = uregex_pattern(re, &resultLen, &status);
 335         TEST_ASSERT_SUCCESS(status);
 336
 337         /* The TEST_ASSERT_SUCCESS above should change too... */
 338         if (U_SUCCESS(status)) {
 339             TEST_ASSERT(resultLen == -1);
 340             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
 341         }
 342
 343         uregex_close(re);
 344
 345         status = U_ZERO_ERROR;
 346         re = uregex_open(pat, 3, 0, NULL, &status);
 347         resultPat = uregex_pattern(re, &resultLen, &status);
 348         TEST_ASSERT_SUCCESS(status);
 349         TEST_ASSERT_SUCCESS(status);
 350
 351         /* The TEST_ASSERT_SUCCESS above should change too... */
 352         if (U_SUCCESS(status)) {
 353             TEST_ASSERT(resultLen == 3);
 354             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
 355             TEST_ASSERT(u_strlen(resultPat) == 3);
 356         }
 357
 358         uregex_close(re);
 359     }
 360
 361     /*
 362      *  flags()
 363      */
 364     {
 365         int32_t  t;
 366
 367         status = U_ZERO_ERROR;
 368         re = uregex_open(pat, -1, 0, NULL, &status);
 369         t  = uregex_flags(re, &status);
 370         TEST_ASSERT_SUCCESS(status);
 371         TEST_ASSERT(t == 0);
 372         uregex_close(re);
 373
 374         status = U_ZERO_ERROR;
 375         re = uregex_open(pat, -1, 0, NULL, &status);
 376         t  = uregex_flags(re, &status);
 377         TEST_ASSERT_SUCCESS(status);
 378         TEST_ASSERT(t == 0);
 379         uregex_close(re);
 380
 381         status = U_ZERO_ERROR;
 382         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
 383         t  = uregex_flags(re, &status);
 384         TEST_ASSERT_SUCCESS(status);
 385         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
 386         uregex_close(re);
 387     }
 388
 389     /*
 390      *  setText() and lookingAt()
 391      */
 392     {
 393         UChar  text1[50];
 394         UChar  text2[50];
 395         UBool  result;
 396
 397         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
 398         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
 399         status = U_ZERO_ERROR;
 400         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
 401         re = uregex_open(pat, -1, 0, NULL, &status);
 402         TEST_ASSERT_SUCCESS(status);
 403
 404         /* Operation before doing a setText should fail... */
 405         status = U_ZERO_ERROR;
 406         uregex_lookingAt(re, 0, &status);
 407         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
 408
 409         status = U_ZERO_ERROR;
 410         uregex_setText(re, text1, -1, &status);
 411         result = uregex_lookingAt(re, 0, &status);
 412         TEST_ASSERT(result == TRUE);
 413         TEST_ASSERT_SUCCESS(status);
 414
 415         status = U_ZERO_ERROR;
 416         uregex_setText(re, text2, -1, &status);
 417         result = uregex_lookingAt(re, 0, &status);
 418         TEST_ASSERT(result == FALSE);
 419         TEST_ASSERT_SUCCESS(status);
 420
 421         status = U_ZERO_ERROR;
 422         uregex_setText(re, text1, -1, &status);
 423         result = uregex_lookingAt(re, 0, &status);
 424         TEST_ASSERT(result == TRUE);
 425         TEST_ASSERT_SUCCESS(status);
 426
 427         status = U_ZERO_ERROR;
 428         uregex_setText(re, text1, 5, &status);
 429         result = uregex_lookingAt(re, 0, &status);
 430         TEST_ASSERT(result == FALSE);
 431         TEST_ASSERT_SUCCESS(status);
 432
 433         status = U_ZERO_ERROR;
 434         uregex_setText(re, text1, 6, &status);
 435         result = uregex_lookingAt(re, 0, &status);
 436         TEST_ASSERT(result == TRUE);
 437         TEST_ASSERT_SUCCESS(status);
 438
 439         uregex_close(re);
 440     }
 441
 442
 443     /*
 444      *  getText()
 445      */
 446     {
 447         UChar    text1[50];
 448         UChar    text2[50];
 449         const UChar   *result;
 450         int32_t  textLength;
 451
 452         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
 453         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
 454         status = U_ZERO_ERROR;
 455         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
 456         re = uregex_open(pat, -1, 0, NULL, &status);
 457
 458         uregex_setText(re, text1, -1, &status);
 459         result = uregex_getText(re, &textLength, &status);
 460         TEST_ASSERT(result == text1);
 461         TEST_ASSERT(textLength == -1);
 462         TEST_ASSERT_SUCCESS(status);
 463
 464         status = U_ZERO_ERROR;
 465         uregex_setText(re, text2, 7, &status);
 466         result = uregex_getText(re, &textLength, &status);
 467         TEST_ASSERT(result == text2);
 468         TEST_ASSERT(textLength == 7);
 469         TEST_ASSERT_SUCCESS(status);
 470
 471         status = U_ZERO_ERROR;
 472         uregex_setText(re, text2, 4, &status);
 473         result = uregex_getText(re, &textLength, &status);
 474         TEST_ASSERT(result == text2);
 475         TEST_ASSERT(textLength == 4);
 476         TEST_ASSERT_SUCCESS(status);
 477         uregex_close(re);
 478     }
 479
 480     /*
 481      *  matches()
 482      */
 483     {
 484         UChar   text1[50];
 485         UBool   result;
 486         int     len;
 487         UChar   nullString[] = {0,0,0};
 488
 489         u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
 490         status = U_ZERO_ERROR;
 491         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
 492         re = uregex_open(pat, -1, 0, NULL, &status);
 493
 494         uregex_setText(re, text1, -1, &status);
 495         result = uregex_matches(re, 0, &status);
 496         TEST_ASSERT(result == FALSE);
 497         TEST_ASSERT_SUCCESS(status);
 498
 499         status = U_ZERO_ERROR;
 500         uregex_setText(re, text1, 6, &status);
 501         result = uregex_matches(re, 0, &status);
 502         TEST_ASSERT(result == TRUE);
 503         TEST_ASSERT_SUCCESS(status);
 504
 505         status = U_ZERO_ERROR;
 506         uregex_setText(re, text1, 6, &status);
 507         result = uregex_matches(re, 1, &status);
 508         TEST_ASSERT(result == FALSE);
 509         TEST_ASSERT_SUCCESS(status);
 510         uregex_close(re);
 511
 512         status = U_ZERO_ERROR;
 513         re = uregex_openC(".?", 0, NULL, &status);
 514         uregex_setText(re, text1, -1, &status);
 515         len = u_strlen(text1);
 516         result = uregex_matches(re, len, &status);
 517         TEST_ASSERT(result == TRUE);
 518         TEST_ASSERT_SUCCESS(status);
 519
 520         status = U_ZERO_ERROR;
 521         uregex_setText(re, nullString, -1, &status);
 522         TEST_ASSERT_SUCCESS(status);
 523         result = uregex_matches(re, 0, &status);
 524         TEST_ASSERT(result == TRUE);
 525         TEST_ASSERT_SUCCESS(status);
 526         uregex_close(re);
 527     }
 528
 529
 530     /*
 531      *  lookingAt()    Used in setText test.
 532      */
 533
 534
 535     /*
 536      *  find(), findNext, start, end, reset
 537      */
 538     {
 539         UChar    text1[50];
 540         UBool    result;
 541         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
 542         status = U_ZERO_ERROR;
 543         re = uregex_openC("rx", 0, NULL, &status);
 544
 545         uregex_setText(re, text1, -1, &status);
 546         result = uregex_find(re, 0, &status);
 547         TEST_ASSERT(result == TRUE);
 548         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 549         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 550         TEST_ASSERT_SUCCESS(status);
 551
 552         result = uregex_find(re, 9, &status);
 553         TEST_ASSERT(result == TRUE);
 554         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
 555         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
 556         TEST_ASSERT_SUCCESS(status);
 557
 558         result = uregex_find(re, 14, &status);
 559         TEST_ASSERT(result == FALSE);
 560         TEST_ASSERT_SUCCESS(status);
 561
 562         status = U_ZERO_ERROR;
 563         uregex_reset(re, 0, &status);
 564
 565         result = uregex_findNext(re, &status);
 566         TEST_ASSERT(result == TRUE);
 567         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 568         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 569         TEST_ASSERT_SUCCESS(status);
 570
 571         result = uregex_findNext(re, &status);
 572         TEST_ASSERT(result == TRUE);
 573         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
 574         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
 575         TEST_ASSERT_SUCCESS(status);
 576
 577         status = U_ZERO_ERROR;
 578         uregex_reset(re, 12, &status);
 579
 580         result = uregex_findNext(re, &status);
 581         TEST_ASSERT(result == TRUE);
 582         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
 583         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
 584         TEST_ASSERT_SUCCESS(status);
 585
 586         result = uregex_findNext(re, &status);
 587         TEST_ASSERT(result == FALSE);
 588         TEST_ASSERT_SUCCESS(status);
 589
 590         uregex_close(re);
 591     }
 592
 593     /*
 594      *  groupCount
 595      */
 596     {
 597         int32_t result;
 598
 599         status = U_ZERO_ERROR;
 600         re = uregex_openC("abc", 0, NULL, &status);
 601         result = uregex_groupCount(re, &status);
 602         TEST_ASSERT_SUCCESS(status);
 603         TEST_ASSERT(result == 0);
 604         uregex_close(re);
 605
 606         status = U_ZERO_ERROR;
 607         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
 608         result = uregex_groupCount(re, &status);
 609         TEST_ASSERT_SUCCESS(status);
 610         TEST_ASSERT(result == 3);
 611         uregex_close(re);
 612
 613     }
 614
 615
 616     /*
 617      *  group()
 618      */
 619     {
 620         UChar    text1[80];
 621         UChar    buf[80];
 622         UBool    result;
 623         int32_t  resultSz;
 624         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
 625
 626         status = U_ZERO_ERROR;
 627         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
 628         TEST_ASSERT_SUCCESS(status);
 629
 630
 631         uregex_setText(re, text1, -1, &status);
 632         result = uregex_find(re, 0, &status);
 633         TEST_ASSERT(result==TRUE);
 634
 635         /*  Capture Group 0, the full match.  Should succeed.  */
 636         status = U_ZERO_ERROR;
 637         resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
 638         TEST_ASSERT_SUCCESS(status);
 639         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
 640         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 641
 642         /*  Capture group #1.  Should succeed. */
 643         status = U_ZERO_ERROR;
 644         resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
 645         TEST_ASSERT_SUCCESS(status);
 646         TEST_ASSERT_STRING(" interior ", buf, TRUE);
 647         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
 648
 649         /*  Capture group out of range.  Error. */
 650         status = U_ZERO_ERROR;
 651         uregex_group(re, 2, buf, sizeof(buf)/2, &status);
 652         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
 653
 654         /* NULL buffer, pure pre-flight */
 655         status = U_ZERO_ERROR;
 656         resultSz = uregex_group(re, 0, NULL, 0, &status);
 657         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 658         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 659
 660         /* Too small buffer, truncated string */
 661         status = U_ZERO_ERROR;
 662         memset(buf, -1, sizeof(buf));
 663         resultSz = uregex_group(re, 0, buf, 5, &status);
 664         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 665         TEST_ASSERT_STRING("abc i", buf, FALSE);
 666         TEST_ASSERT(buf[5] == (UChar)0xffff);
 667         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 668
 669         /* Output string just fits buffer, no NUL term. */
 670         status = U_ZERO_ERROR;
 671         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
 672         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 673         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
 674         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 675         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
 676
 677         uregex_close(re);
 678
 679     }
 680
 681     /*
 682      *  Regions
 683      */
 684
 685
 686         /* SetRegion(), getRegion() do something  */
 687         TEST_SETUP(".*", "0123456789ABCDEF", 0)
 688         UChar resultString[40];
 689         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
 690         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
 691         uregex_setRegion(re, 3, 6, &status);
 692         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
 693         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
 694         TEST_ASSERT(uregex_findNext(re, &status));
 695         TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
 696         TEST_ASSERT_STRING("345", resultString, TRUE);
 697         TEST_TEARDOWN;
 698
 699         /* find(start=-1) uses regions   */
 700         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 701         uregex_setRegion(re, 4, 6, &status);
 702         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 703         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 704         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 705         TEST_TEARDOWN;
 706
 707         /* find (start >=0) does not use regions   */
 708         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 709         uregex_setRegion(re, 4, 6, &status);
 710         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 711         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 712         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 713         TEST_TEARDOWN;
 714
 715         /* findNext() obeys regions    */
 716         TEST_SETUP(".", "0123456789ABCDEF", 0);
 717         uregex_setRegion(re, 4, 6, &status);
 718         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
 719         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 720         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
 721         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
 722         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
 723         TEST_TEARDOWN;
 724
 725         /* matches(start=-1) uses regions                                           */
 726         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
 727         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 728         uregex_setRegion(re, 4, 6, &status);
 729         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
 730         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 731         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 732         TEST_TEARDOWN;
 733
 734         /* matches (start >=0) does not use regions       */
 735         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 736         uregex_setRegion(re, 4, 6, &status);
 737         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
 738         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 739         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 740         TEST_TEARDOWN;
 741
 742         /* lookingAt(start=-1) uses regions                                         */
 743         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
 744         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 745         uregex_setRegion(re, 4, 6, &status);
 746         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
 747         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 748         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
 749         TEST_TEARDOWN;
 750
 751         /* lookingAt (start >=0) does not use regions  */
 752         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 753         uregex_setRegion(re, 4, 6, &status);
 754         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
 755         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 756         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
 757         TEST_TEARDOWN;
 758
 759         /* hitEnd()       */
 760         TEST_SETUP("[a-f]*", "abcdefghij", 0);
 761         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 762         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
 763         TEST_TEARDOWN;
 764
 765         TEST_SETUP("[a-f]*", "abcdef", 0);
 766         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 767         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
 768         TEST_TEARDOWN;
 769
 770         /* requireEnd   */
 771         TEST_SETUP("abcd", "abcd", 0);
 772         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 773         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
 774         TEST_TEARDOWN;
 775
 776         TEST_SETUP("abcd$", "abcd", 0);
 777         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 778         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
 779         TEST_TEARDOWN;
 780
 781         /* anchoringBounds        */
 782         TEST_SETUP("abc$", "abcdef", 0);
 783         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
 784         uregex_useAnchoringBounds(re, FALSE, &status);
 785         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
 786
 787         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
 788         uregex_useAnchoringBounds(re, TRUE, &status);
 789         uregex_setRegion(re, 0, 3, &status);
 790         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 791         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 792         TEST_TEARDOWN;
 793
 794         /* Transparent Bounds      */
 795         TEST_SETUP("abc(?=def)", "abcdef", 0);
 796         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
 797         uregex_useTransparentBounds(re, TRUE, &status);
 798         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
 799
 800         uregex_useTransparentBounds(re, FALSE, &status);
 801         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
 802         uregex_setRegion(re, 0, 3, &status);
 803         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
 804         uregex_useTransparentBounds(re, TRUE, &status);
 805         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
 806         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 807         TEST_TEARDOWN;
 808
 809
 810     /*
 811      *  replaceFirst()
 812      */
 813     {
 814         UChar    text1[80];
 815         UChar    text2[80];
 816         UChar    replText[80];
 817         UChar    buf[80];
 818         int32_t  resultSz;
 819         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
 820         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
 821         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
 822
 823         status = U_ZERO_ERROR;
 824         re = uregex_openC("x(.*?)x", 0, NULL, &status);
 825         TEST_ASSERT_SUCCESS(status);
 826
 827         /*  Normal case, with match */
 828         uregex_setText(re, text1, -1, &status);
 829         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
 830         TEST_ASSERT_SUCCESS(status);
 831         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
 832         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 833
 834         /* No match.  Text should copy to output with no changes.  */
 835         status = U_ZERO_ERROR;
 836         uregex_setText(re, text2, -1, &status);
 837         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
 838         TEST_ASSERT_SUCCESS(status);
 839         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 840         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
 841
 842         /*  Match, output just fills buffer, no termination warning. */
 843         status = U_ZERO_ERROR;
 844         uregex_setText(re, text1, -1, &status);
 845         memset(buf, -1, sizeof(buf));
 846         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
 847         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 848         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 849         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 850         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 851
 852         /* Do the replaceFirst again, without first resetting anything.
 853          *  Should give the same results.
 854          */
 855         status = U_ZERO_ERROR;
 856         memset(buf, -1, sizeof(buf));
 857         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
 858         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 859         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 860         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 861         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 862
 863         /* NULL buffer, zero buffer length */
 864         status = U_ZERO_ERROR;
 865         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
 866         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 867         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 868
 869         /* Buffer too small by one */
 870         status = U_ZERO_ERROR;
 871         memset(buf, -1, sizeof(buf));
 872         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
 873         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 874         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
 875         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 876         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 877
 878         uregex_close(re);
 879     }
 880
 881
 882     /*
 883      *  replaceAll()
 884      */
 885     {
 886         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
 887         UChar    text2[80];          /*  "No match Here"           */
 888         UChar    replText[80];       /*  "<$1>"                    */
 889         UChar    replText2[80];      /*  "<<$1>>"                  */
 890         const char * pattern = "x(.*?)x";
 891         const char * expectedResult = "Replace <aa> <1> <...>.";
 892         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
 893         UChar    buf[80];
 894         int32_t  resultSize;
 895         int32_t  expectedResultSize;
 896         int32_t  expectedResultSize2;
 897         int32_t  i;
 898
 899         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
 900         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
 901         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
 902         u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
 903         expectedResultSize = strlen(expectedResult);
 904         expectedResultSize2 = strlen(expectedResult2);
 905
 906         status = U_ZERO_ERROR;
 907         re = uregex_openC(pattern, 0, NULL, &status);
 908         TEST_ASSERT_SUCCESS(status);
 909
 910         /*  Normal case, with match */
 911         uregex_setText(re, text1, -1, &status);
 912         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
 913         TEST_ASSERT_SUCCESS(status);
 914         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
 915         TEST_ASSERT(resultSize == expectedResultSize);
 916
 917         /* No match.  Text should copy to output with no changes.  */
 918         status = U_ZERO_ERROR;
 919         uregex_setText(re, text2, -1, &status);
 920         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
 921         TEST_ASSERT_SUCCESS(status);
 922         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 923         TEST_ASSERT(resultSize == u_strlen(text2));
 924
 925         /*  Match, output just fills buffer, no termination warning. */
 926         status = U_ZERO_ERROR;
 927         uregex_setText(re, text1, -1, &status);
 928         memset(buf, -1, sizeof(buf));
 929         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
 930         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 931         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
 932         TEST_ASSERT(resultSize == expectedResultSize);
 933         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 934
 935         /* Do the replaceFirst again, without first resetting anything.
 936          *  Should give the same results.
 937          */
 938         status = U_ZERO_ERROR;
 939         memset(buf, -1, sizeof(buf));
 940         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
 941         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 942         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
 943         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 944         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 945
 946         /* NULL buffer, zero buffer length */
 947         status = U_ZERO_ERROR;
 948         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
 949         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 950         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 951
 952         /* Buffer too small.  Try every size, which will tickle edge cases
 953          * in uregex_appendReplacement (used by replaceAll)   */
 954         for (i=0; i<expectedResultSize; i++) {
 955             char  expected[80];
 956             status = U_ZERO_ERROR;
 957             memset(buf, -1, sizeof(buf));
 958             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
 959             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 960             strcpy(expected, expectedResult);
 961             expected[i] = 0;
 962             TEST_ASSERT_STRING(expected, buf, FALSE);
 963             TEST_ASSERT(resultSize == expectedResultSize);
 964             TEST_ASSERT(buf[i] == (UChar)0xffff);
 965         }
 966
 967         /* Buffer too small.  Same as previous test, except this time the replacement
 968          * text is longer than the match capture group, making the length of the complete
 969          * replacement longer than the original string.
 970          */
 971         for (i=0; i<expectedResultSize2; i++) {
 972             char  expected[80];
 973             status = U_ZERO_ERROR;
 974             memset(buf, -1, sizeof(buf));
 975             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
 976             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 977             strcpy(expected, expectedResult2);
 978             expected[i] = 0;
 979             TEST_ASSERT_STRING(expected, buf, FALSE);
 980             TEST_ASSERT(resultSize == expectedResultSize2);
 981             TEST_ASSERT(buf[i] == (UChar)0xffff);
 982         }
 983
 984
 985         uregex_close(re);
 986     }
 987
 988
 989     /*
 990      *  appendReplacement()
 991      */
 992     {
 993         UChar    text[100];
 994         UChar    repl[100];
 995         UChar    buf[100];
 996         UChar   *bufPtr;
 997         int32_t  bufCap;
 998
 999
1000         status = U_ZERO_ERROR;
1001         re = uregex_openC(".*", 0, 0, &status);
1002         TEST_ASSERT_SUCCESS(status);
1003
1004         u_uastrncpy(text, "whatever",  sizeof(text)/2);
1005         u_uastrncpy(repl, "some other", sizeof(repl)/2);
1006         uregex_setText(re, text, -1, &status);
1007
1008         /* match covers whole target string */
1009         uregex_find(re, 0, &status);
1010         TEST_ASSERT_SUCCESS(status);
1011         bufPtr = buf;
1012         bufCap = sizeof(buf) / 2;
1013         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1014         TEST_ASSERT_SUCCESS(status);
1015         TEST_ASSERT_STRING("some other", buf, TRUE);
1016
1017         /* Match has \u \U escapes */
1018         uregex_find(re, 0, &status);
1019         TEST_ASSERT_SUCCESS(status);
1020         bufPtr = buf;
1021         bufCap = sizeof(buf) / 2;
1022         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1023         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1024         TEST_ASSERT_SUCCESS(status);
1025         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1026
1027         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1028         status = U_ZERO_ERROR;
1029         uregex_find(re, 0, &status);
1030         TEST_ASSERT_SUCCESS(status);
1031         bufPtr = buf;
1032         status = U_BUFFER_OVERFLOW_ERROR;
1033         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1034         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1035
1036         uregex_close(re);
1037     }
1038
1039
1040     /*
1041      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1042      */
1043
1044     /*
1045      *  split()
1046      */
1047     {
1048         UChar    textToSplit[80];
1049         UChar    text2[80];
1050         UChar    buf[200];
1051         UChar    *fields[10];
1052         int32_t  numFields;
1053         int32_t  requiredCapacity;
1054         int32_t  spaceNeeded;
1055         int32_t  sz;
1056
1057         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1058         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1059
1060         status = U_ZERO_ERROR;
1061         re = uregex_openC(":", 0, NULL, &status);
1062
1063
1064         /*  Simple split */
1065
1066         uregex_setText(re, textToSplit, -1, &status);
1067         TEST_ASSERT_SUCCESS(status);
1068
1069         /* The TEST_ASSERT_SUCCESS call above should change too... */
1070         if (U_SUCCESS(status)) {
1071             memset(fields, -1, sizeof(fields));
1072             numFields =
1073                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1074             TEST_ASSERT_SUCCESS(status);
1075
1076             /* The TEST_ASSERT_SUCCESS call above should change too... */
1077             if(U_SUCCESS(status)) {
1078                 TEST_ASSERT(numFields == 3);
1079                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1080                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1081                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
1082                 TEST_ASSERT(fields[3] == NULL);
1083
1084                 spaceNeeded = u_strlen(textToSplit) -
1085                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1086                             numFields;          /* Each field gets a NUL terminator */
1087
1088                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1089             }
1090         }
1091
1092         uregex_close(re);
1093
1094
1095         /*  Split with too few output strings available */
1096         status = U_ZERO_ERROR;
1097         re = uregex_openC(":", 0, NULL, &status);
1098         uregex_setText(re, textToSplit, -1, &status);
1099         TEST_ASSERT_SUCCESS(status);
1100
1101         /* The TEST_ASSERT_SUCCESS call above should change too... */
1102         if(U_SUCCESS(status)) {
1103             memset(fields, -1, sizeof(fields));
1104             numFields =
1105                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1106             TEST_ASSERT_SUCCESS(status);
1107
1108             /* The TEST_ASSERT_SUCCESS call above should change too... */
1109             if(U_SUCCESS(status)) {
1110                 TEST_ASSERT(numFields == 2);
1111                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1112                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1113                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1114
1115                 spaceNeeded = u_strlen(textToSplit) -
1116                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1117                             numFields;          /* Each field gets a NUL terminator */
1118
1119                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1120
1121                 /* Split with a range of output buffer sizes.  */
1122                 spaceNeeded = u_strlen(textToSplit) -
1123                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1124                     numFields;          /* Each field gets a NUL terminator */
1125
1126                 for (sz=0; sz < spaceNeeded+1; sz++) {
1127                     memset(fields, -1, sizeof(fields));
1128                     status = U_ZERO_ERROR;
1129                     numFields =
1130                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1131                     if (sz >= spaceNeeded) {
1132                         TEST_ASSERT_SUCCESS(status);
1133                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1134                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
1135                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
1136                     } else {
1137                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1138                     }
1139                     TEST_ASSERT(numFields == 3);
1140                     TEST_ASSERT(fields[3] == NULL);
1141                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1142                 }
1143             }
1144         }
1145
1146         uregex_close(re);
1147     }
1148
1149
1150
1151
1152     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1153      *                   comes out as additional fields.  */
1154     {
1155         UChar    textToSplit[80];
1156         UChar    buf[200];
1157         UChar    *fields[10];
1158         int32_t  numFields;
1159         int32_t  requiredCapacity;
1160         int32_t  spaceNeeded;
1161         int32_t  sz;
1162
1163         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
1164
1165         status = U_ZERO_ERROR;
1166         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1167
1168         uregex_setText(re, textToSplit, -1, &status);
1169         TEST_ASSERT_SUCCESS(status);
1170
1171         /* The TEST_ASSERT_SUCCESS call above should change too... */
1172         if(U_SUCCESS(status)) {
1173             memset(fields, -1, sizeof(fields));
1174             numFields =
1175                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1176             TEST_ASSERT_SUCCESS(status);
1177
1178             /* The TEST_ASSERT_SUCCESS call above should change too... */
1179             if(U_SUCCESS(status)) {
1180                 TEST_ASSERT(numFields == 5);
1181                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1182                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1183                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1184                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1185                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
1186                 TEST_ASSERT(fields[5] == NULL);
1187                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1188                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1189             }
1190         }
1191
1192         /*  Split with too few output strings available (2) */
1193         status = U_ZERO_ERROR;
1194         memset(fields, -1, sizeof(fields));
1195         numFields =
1196             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1197         TEST_ASSERT_SUCCESS(status);
1198
1199         /* The TEST_ASSERT_SUCCESS call above should change too... */
1200         if(U_SUCCESS(status)) {
1201             TEST_ASSERT(numFields == 2);
1202             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1203             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1204             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1205
1206             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1207             TEST_ASSERT(spaceNeeded == requiredCapacity);
1208         }
1209
1210         /*  Split with too few output strings available (3) */
1211         status = U_ZERO_ERROR;
1212         memset(fields, -1, sizeof(fields));
1213         numFields =
1214             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
1215         TEST_ASSERT_SUCCESS(status);
1216
1217         /* The TEST_ASSERT_SUCCESS call above should change too... */
1218         if(U_SUCCESS(status)) {
1219             TEST_ASSERT(numFields == 3);
1220             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1221             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1222             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1223             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1224
1225             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1226             TEST_ASSERT(spaceNeeded == requiredCapacity);
1227         }
1228
1229         /*  Split with just enough output strings available (5) */
1230         status = U_ZERO_ERROR;
1231         memset(fields, -1, sizeof(fields));
1232         numFields =
1233             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
1234         TEST_ASSERT_SUCCESS(status);
1235
1236         /* The TEST_ASSERT_SUCCESS call above should change too... */
1237         if(U_SUCCESS(status)) {
1238             TEST_ASSERT(numFields == 5);
1239             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1240             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1241             TEST_ASSERT_STRING(" second", fields[2], TRUE);
1242             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1243             TEST_ASSERT_STRING("  third", fields[4], TRUE);
1244             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1245
1246             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1247             TEST_ASSERT(spaceNeeded == requiredCapacity);
1248         }
1249
1250         /* Split, end of text is a field delimiter.   */
1251         status = U_ZERO_ERROR;
1252         sz = strlen("first <tag-a> second<tag-b>");
1253         uregex_setText(re, textToSplit, sz, &status);
1254         TEST_ASSERT_SUCCESS(status);
1255
1256         /* The TEST_ASSERT_SUCCESS call above should change too... */
1257         if(U_SUCCESS(status)) {
1258             memset(fields, -1, sizeof(fields));
1259             numFields =
1260                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
1261             TEST_ASSERT_SUCCESS(status);
1262
1263             /* The TEST_ASSERT_SUCCESS call above should change too... */
1264             if(U_SUCCESS(status)) {
1265                 TEST_ASSERT(numFields == 5);
1266                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1267                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1268                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1269                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1270                 TEST_ASSERT_STRING("",        fields[4], TRUE);
1271                 TEST_ASSERT(fields[5] == NULL);
1272                 TEST_ASSERT(fields[8] == NULL);
1273                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1274                 spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1275                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1276             }
1277         }
1278
1279         uregex_close(re);
1280     }
1281
1282     /*
1283      * set/getTimeLimit
1284      */
1285      TEST_SETUP("abc$", "abcdef", 0);
1286      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1287      uregex_setTimeLimit(re, 1000, &status);
1288      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1289      TEST_ASSERT_SUCCESS(status);
1290      uregex_setTimeLimit(re, -1, &status);
1291      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1292      status = U_ZERO_ERROR;
1293      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1294      TEST_TEARDOWN;
1295
1296      /*
1297       * set/get Stack Limit
1298       */
1299      TEST_SETUP("abc$", "abcdef", 0);
1300      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1301      uregex_setStackLimit(re, 40000, &status);
1302      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1303      TEST_ASSERT_SUCCESS(status);
1304      uregex_setStackLimit(re, -1, &status);
1305      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1306      status = U_ZERO_ERROR;
1307      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1308      TEST_TEARDOWN;
1309
1310
1311      /*
1312       * Get/Set callback functions
1313       *     This test is copied from intltest regex/Callbacks
1314       *     The pattern and test data will run long enough to cause the callback
1315       *       to be invoked.  The nested '+' operators give exponential time
1316       *       behavior with increasing string length.
1317       */
1318      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1319      callBackContext cbInfo = {4, 0, 0};
1320      const void     *pContext   = &cbInfo;
1321      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1322
1323      /*  Getting the callback fn when it hasn't been set must return NULL  */
1324      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1325      TEST_ASSERT_SUCCESS(status);
1326      TEST_ASSERT(returnedFn == NULL);
1327      TEST_ASSERT(pContext == NULL);
1328
1329      /* Set thecallback and do a match.                                   */
1330      /* The callback function should record that it has been called.      */
1331      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1332      TEST_ASSERT_SUCCESS(status);
1333      TEST_ASSERT(cbInfo.numCalls == 0);
1334      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1335      TEST_ASSERT_SUCCESS(status);
1336      TEST_ASSERT(cbInfo.numCalls > 0);
1337
1338      /* Getting the callback should return the values that were set above.  */
1339      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1340      TEST_ASSERT(returnedFn == &TestCallbackFn);
1341      TEST_ASSERT(pContext == &cbInfo);
1342
1343      TEST_TEARDOWN;
1344 }
1345
1346
1347
1348 static void TestBug4315(void) {
1349     UErrorCode      theICUError = U_ZERO_ERROR;
1350     URegularExpression *theRegEx;
1351     UChar           *textBuff;
1352     const char      *thePattern;
1353     UChar            theString[100];
1354     UChar           *destFields[24];
1355     int32_t         neededLength1;
1356     int32_t         neededLength2;
1357
1358     int32_t         wordCount = 0;
1359     int32_t         destFieldsSize = 24;
1360
1361     thePattern  = "ck ";
1362     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1363
1364     /* open a regex */
1365     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1366     TEST_ASSERT_SUCCESS(theICUError);
1367
1368     /* set the input string */
1369     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1370     TEST_ASSERT_SUCCESS(theICUError);
1371
1372     /* split */
1373     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1374      *  error occurs! */
1375     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1376         destFieldsSize, &theICUError);
1377
1378     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1379     TEST_ASSERT(wordCount==3);
1380
1381     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1382     {
1383         theICUError = U_ZERO_ERROR;
1384         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1385         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1386             destFields, destFieldsSize, &theICUError);
1387         TEST_ASSERT(wordCount==3);
1388         TEST_ASSERT_SUCCESS(theICUError);
1389         TEST_ASSERT(neededLength1 == neededLength2);
1390         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1391         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1392         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1393         TEST_ASSERT(destFields[3] == NULL);
1394         free(textBuff);
1395     }
1396     uregex_close(theRegEx);
1397 }
1398
1399 /* Based on TestRegexCAPI() */
1400 static void TestUTextAPI(void) {
1401     UErrorCode           status = U_ZERO_ERROR;
1402     URegularExpression  *re;
1403     UText                patternText = UTEXT_INITIALIZER;
1404     UChar                pat[200];
1405     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1406
1407     /* Mimimalist open/close */
1408     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1409     re = uregex_openUText(&patternText, 0, 0, &status);
1410     if (U_FAILURE(status)) {
1411          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1412          utext_close(&patternText);
1413          return;
1414     }
1415     uregex_close(re);
1416
1417     /* Open with all flag values set */
1418     status = U_ZERO_ERROR;
1419     re = uregex_openUText(&patternText,
1420         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1421         0, &status);
1422     TEST_ASSERT_SUCCESS(status);
1423     uregex_close(re);
1424
1425     /* Open with an invalid flag */
1426     status = U_ZERO_ERROR;
1427     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1428     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1429     uregex_close(re);
1430
1431     /* open with an invalid parameter */
1432     status = U_ZERO_ERROR;
1433     re = uregex_openUText(NULL,
1434         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1435     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1436
1437     /*
1438      *  clone
1439      */
1440     {
1441         URegularExpression *clone1;
1442         URegularExpression *clone2;
1443         URegularExpression *clone3;
1444         UChar  testString1[30];
1445         UChar  testString2[30];
1446         UBool  result;
1447
1448
1449         status = U_ZERO_ERROR;
1450         re = uregex_openUText(&patternText, 0, 0, &status);
1451         TEST_ASSERT_SUCCESS(status);
1452         clone1 = uregex_clone(re, &status);
1453         TEST_ASSERT_SUCCESS(status);
1454         TEST_ASSERT(clone1 != NULL);
1455
1456         status = U_ZERO_ERROR;
1457         clone2 = uregex_clone(re, &status);
1458         TEST_ASSERT_SUCCESS(status);
1459         TEST_ASSERT(clone2 != NULL);
1460         uregex_close(re);
1461
1462         status = U_ZERO_ERROR;
1463         clone3 = uregex_clone(clone2, &status);
1464         TEST_ASSERT_SUCCESS(status);
1465         TEST_ASSERT(clone3 != NULL);
1466
1467         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
1468         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
1469
1470         status = U_ZERO_ERROR;
1471         uregex_setText(clone1, testString1, -1, &status);
1472         TEST_ASSERT_SUCCESS(status);
1473         result = uregex_lookingAt(clone1, 0, &status);
1474         TEST_ASSERT_SUCCESS(status);
1475         TEST_ASSERT(result==TRUE);
1476
1477         status = U_ZERO_ERROR;
1478         uregex_setText(clone2, testString2, -1, &status);
1479         TEST_ASSERT_SUCCESS(status);
1480         result = uregex_lookingAt(clone2, 0, &status);
1481         TEST_ASSERT_SUCCESS(status);
1482         TEST_ASSERT(result==FALSE);
1483         result = uregex_find(clone2, 0, &status);
1484         TEST_ASSERT_SUCCESS(status);
1485         TEST_ASSERT(result==TRUE);
1486
1487         uregex_close(clone1);
1488         uregex_close(clone2);
1489         uregex_close(clone3);
1490
1491     }
1492
1493     /*
1494      *  pattern() and patternText()
1495      */
1496     {
1497         const UChar  *resultPat;
1498         int32_t       resultLen;
1499         UText        *resultText;
1500         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1501         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1502         u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
1503         status = U_ZERO_ERROR;
1504
1505         utext_openUTF8(&patternText, str_hello, -1, &status);
1506         re = uregex_open(pat, -1, 0, NULL, &status);
1507         resultPat = uregex_pattern(re, &resultLen, &status);
1508         TEST_ASSERT_SUCCESS(status);
1509
1510         /* The TEST_ASSERT_SUCCESS above should change too... */
1511         if (U_SUCCESS(status)) {
1512             TEST_ASSERT(resultLen == -1);
1513             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1514         }
1515
1516         resultText = uregex_patternUText(re, &status);
1517         TEST_ASSERT_SUCCESS(status);
1518         TEST_ASSERT_UTEXT(str_hello, resultText);
1519
1520         uregex_close(re);
1521
1522         status = U_ZERO_ERROR;
1523         re = uregex_open(pat, 3, 0, NULL, &status);
1524         resultPat = uregex_pattern(re, &resultLen, &status);
1525         TEST_ASSERT_SUCCESS(status);
1526
1527         /* The TEST_ASSERT_SUCCESS above should change too... */
1528         if (U_SUCCESS(status)) {
1529             TEST_ASSERT(resultLen == 3);
1530             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1531             TEST_ASSERT(u_strlen(resultPat) == 3);
1532         }
1533
1534         resultText = uregex_patternUText(re, &status);
1535         TEST_ASSERT_SUCCESS(status);
1536         TEST_ASSERT_UTEXT(str_hel, resultText);
1537
1538         uregex_close(re);
1539     }
1540
1541     /*
1542      *  setUText() and lookingAt()
1543      */
1544     {
1545         UText  text1 = UTEXT_INITIALIZER;
1546         UText  text2 = UTEXT_INITIALIZER;
1547         UBool  result;
1548         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1549         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1550         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1551         status = U_ZERO_ERROR;
1552         utext_openUTF8(&text1, str_abcccd, -1, &status);
1553         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1554
1555         utext_openUTF8(&patternText, str_abcd, -1, &status);
1556         re = uregex_openUText(&patternText, 0, NULL, &status);
1557         TEST_ASSERT_SUCCESS(status);
1558
1559         /* Operation before doing a setText should fail... */
1560         status = U_ZERO_ERROR;
1561         uregex_lookingAt(re, 0, &status);
1562         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1563
1564         status = U_ZERO_ERROR;
1565         uregex_setUText(re, &text1, &status);
1566         result = uregex_lookingAt(re, 0, &status);
1567         TEST_ASSERT(result == TRUE);
1568         TEST_ASSERT_SUCCESS(status);
1569
1570         status = U_ZERO_ERROR;
1571         uregex_setUText(re, &text2, &status);
1572         result = uregex_lookingAt(re, 0, &status);
1573         TEST_ASSERT(result == FALSE);
1574         TEST_ASSERT_SUCCESS(status);
1575
1576         status = U_ZERO_ERROR;
1577         uregex_setUText(re, &text1, &status);
1578         result = uregex_lookingAt(re, 0, &status);
1579         TEST_ASSERT(result == TRUE);
1580         TEST_ASSERT_SUCCESS(status);
1581
1582         uregex_close(re);
1583         utext_close(&text1);
1584         utext_close(&text2);
1585     }
1586
1587
1588     /*
1589      *  getText() and getUText()
1590      */
1591     {
1592         UText  text1 = UTEXT_INITIALIZER;
1593         UText  text2 = UTEXT_INITIALIZER;
1594         UChar  text2Chars[20];
1595         UText  *resultText;
1596         const UChar   *result;
1597         int32_t  textLength;
1598         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1599         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1600         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1601
1602
1603         status = U_ZERO_ERROR;
1604         utext_openUTF8(&text1, str_abcccd, -1, &status);
1605         u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
1606         utext_openUChars(&text2, text2Chars, -1, &status);
1607
1608         utext_openUTF8(&patternText, str_abcd, -1, &status);
1609         re = uregex_openUText(&patternText, 0, NULL, &status);
1610
1611         /* First set a UText */
1612         uregex_setUText(re, &text1, &status);
1613         resultText = uregex_getUText(re, NULL, &status);
1614         TEST_ASSERT_SUCCESS(status);
1615         TEST_ASSERT(resultText != &text1);
1616         utext_setNativeIndex(resultText, 0);
1617         utext_setNativeIndex(&text1, 0);
1618         TEST_ASSERT(testUTextEqual(resultText, &text1));
1619         utext_close(resultText);
1620
1621         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1622         (void)result;    /* Suppress set but not used warning. */
1623         TEST_ASSERT(textLength == -1 || textLength == 6);
1624         resultText = uregex_getUText(re, NULL, &status);
1625         TEST_ASSERT_SUCCESS(status);
1626         TEST_ASSERT(resultText != &text1);
1627         utext_setNativeIndex(resultText, 0);
1628         utext_setNativeIndex(&text1, 0);
1629         TEST_ASSERT(testUTextEqual(resultText, &text1));
1630         utext_close(resultText);
1631
1632         /* Then set a UChar * */
1633         uregex_setText(re, text2Chars, 7, &status);
1634         resultText = uregex_getUText(re, NULL, &status);
1635         TEST_ASSERT_SUCCESS(status);
1636         utext_setNativeIndex(resultText, 0);
1637         utext_setNativeIndex(&text2, 0);
1638         TEST_ASSERT(testUTextEqual(resultText, &text2));
1639         utext_close(resultText);
1640         result = uregex_getText(re, &textLength, &status);
1641         TEST_ASSERT(textLength == 7);
1642
1643         uregex_close(re);
1644         utext_close(&text1);
1645         utext_close(&text2);
1646     }
1647
1648     /*
1649      *  matches()
1650      */
1651     {
1652         UText   text1 = UTEXT_INITIALIZER;
1653         UBool   result;
1654         UText   nullText = UTEXT_INITIALIZER;
1655         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1656         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1657
1658         status = U_ZERO_ERROR;
1659         utext_openUTF8(&text1, str_abcccde, -1, &status);
1660         utext_openUTF8(&patternText, str_abcd, -1, &status);
1661         re = uregex_openUText(&patternText, 0, NULL, &status);
1662
1663         uregex_setUText(re, &text1, &status);
1664         result = uregex_matches(re, 0, &status);
1665         TEST_ASSERT(result == FALSE);
1666         TEST_ASSERT_SUCCESS(status);
1667         uregex_close(re);
1668
1669         status = U_ZERO_ERROR;
1670         re = uregex_openC(".?", 0, NULL, &status);
1671         uregex_setUText(re, &text1, &status);
1672         result = uregex_matches(re, 7, &status);
1673         TEST_ASSERT(result == TRUE);
1674         TEST_ASSERT_SUCCESS(status);
1675
1676         status = U_ZERO_ERROR;
1677         utext_openUTF8(&nullText, "", -1, &status);
1678         uregex_setUText(re, &nullText, &status);
1679         TEST_ASSERT_SUCCESS(status);
1680         result = uregex_matches(re, 0, &status);
1681         TEST_ASSERT(result == TRUE);
1682         TEST_ASSERT_SUCCESS(status);
1683
1684         uregex_close(re);
1685         utext_close(&text1);
1686         utext_close(&nullText);
1687     }
1688
1689
1690     /*
1691      *  lookingAt()    Used in setText test.
1692      */
1693
1694
1695     /*
1696      *  find(), findNext, start, end, reset
1697      */
1698     {
1699         UChar    text1[50];
1700         UBool    result;
1701         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
1702         status = U_ZERO_ERROR;
1703         re = uregex_openC("rx", 0, NULL, &status);
1704
1705         uregex_setText(re, text1, -1, &status);
1706         result = uregex_find(re, 0, &status);
1707         TEST_ASSERT(result == TRUE);
1708         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1709         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1710         TEST_ASSERT_SUCCESS(status);
1711
1712         result = uregex_find(re, 9, &status);
1713         TEST_ASSERT(result == TRUE);
1714         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1715         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1716         TEST_ASSERT_SUCCESS(status);
1717
1718         result = uregex_find(re, 14, &status);
1719         TEST_ASSERT(result == FALSE);
1720         TEST_ASSERT_SUCCESS(status);
1721
1722         status = U_ZERO_ERROR;
1723         uregex_reset(re, 0, &status);
1724
1725         result = uregex_findNext(re, &status);
1726         TEST_ASSERT(result == TRUE);
1727         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1728         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1729         TEST_ASSERT_SUCCESS(status);
1730
1731         result = uregex_findNext(re, &status);
1732         TEST_ASSERT(result == TRUE);
1733         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1734         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1735         TEST_ASSERT_SUCCESS(status);
1736
1737         status = U_ZERO_ERROR;
1738         uregex_reset(re, 12, &status);
1739
1740         result = uregex_findNext(re, &status);
1741         TEST_ASSERT(result == TRUE);
1742         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1743         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1744         TEST_ASSERT_SUCCESS(status);
1745
1746         result = uregex_findNext(re, &status);
1747         TEST_ASSERT(result == FALSE);
1748         TEST_ASSERT_SUCCESS(status);
1749
1750         uregex_close(re);
1751     }
1752
1753     /*
1754      *  group()
1755      */
1756     {
1757         UChar    text1[80];
1758         UText   *actual;
1759         UBool    result;
1760
1761         const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
1762         const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
1763
1764
1765         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
1766
1767         status = U_ZERO_ERROR;
1768         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1769         TEST_ASSERT_SUCCESS(status);
1770
1771         uregex_setText(re, text1, -1, &status);
1772         result = uregex_find(re, 0, &status);
1773         TEST_ASSERT(result==TRUE);
1774
1775         /*  Capture Group 0, the full match.  Should succeed.  */
1776         status = U_ZERO_ERROR;
1777         actual = uregex_groupUTextDeep(re, 0, NULL, &status);
1778         TEST_ASSERT_SUCCESS(status);
1779         TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
1780         utext_close(actual);
1781
1782         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1783         status = U_ZERO_ERROR;
1784         {
1785             int64_t      group_len;
1786             int32_t      len16;
1787             UErrorCode   shallowStatus = U_ZERO_ERROR;
1788             int64_t      nativeIndex;
1789             UChar *groupChars;
1790             UText groupText = UTEXT_INITIALIZER;
1791
1792             actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
1793             TEST_ASSERT_SUCCESS(status);
1794
1795             nativeIndex = utext_getNativeIndex(actual);
1796             /*  Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp]  */
1797             /*  len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus);  */
1798             len16 = (int32_t)group_len;
1799
1800             groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
1801             utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
1802
1803             utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
1804
1805             TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
1806             utext_close(&groupText);
1807             free(groupChars);
1808         }
1809         utext_close(actual);
1810
1811         /*  Capture group #1.  Should succeed. */
1812         status = U_ZERO_ERROR;
1813         actual = uregex_groupUTextDeep(re, 1, NULL, &status);
1814         TEST_ASSERT_SUCCESS(status);
1815         TEST_ASSERT_UTEXT(str_interior, actual);
1816         utext_close(actual);
1817
1818         /*  Capture group out of range.  Error. */
1819         status = U_ZERO_ERROR;
1820         actual = uregex_groupUTextDeep(re, 2, NULL, &status);
1821         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1822         TEST_ASSERT(utext_nativeLength(actual) == 0);
1823         utext_close(actual);
1824
1825         uregex_close(re);
1826
1827     }
1828
1829     /*
1830      *  replaceFirst()
1831      */
1832     {
1833         UChar    text1[80];
1834         UChar    text2[80];
1835         UText    replText = UTEXT_INITIALIZER;
1836         UText   *result;
1837         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1838         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1839         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
1840         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1841         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1842         status = U_ZERO_ERROR;
1843         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1844         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1845         utext_openUTF8(&replText, str_1x, -1, &status);
1846
1847         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1848         TEST_ASSERT_SUCCESS(status);
1849
1850         /*  Normal case, with match */
1851         uregex_setText(re, text1, -1, &status);
1852         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1853         TEST_ASSERT_SUCCESS(status);
1854         TEST_ASSERT_UTEXT(str_Replxxx, result);
1855         utext_close(result);
1856
1857         /* No match.  Text should copy to output with no changes.  */
1858         uregex_setText(re, text2, -1, &status);
1859         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1860         TEST_ASSERT_SUCCESS(status);
1861         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1862         utext_close(result);
1863
1864         /* Unicode escapes */
1865         uregex_setText(re, text1, -1, &status);
1866         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1867         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1868         TEST_ASSERT_SUCCESS(status);
1869         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1870         utext_close(result);
1871
1872         uregex_close(re);
1873         utext_close(&replText);
1874     }
1875
1876
1877     /*
1878      *  replaceAll()
1879      */
1880     {
1881         UChar    text1[80];
1882         UChar    text2[80];
1883         UText    replText = UTEXT_INITIALIZER;
1884         UText   *result;
1885         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1886         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1887         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1888         status = U_ZERO_ERROR;
1889         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1890         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1891         utext_openUTF8(&replText, str_1, -1, &status);
1892
1893         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1894         TEST_ASSERT_SUCCESS(status);
1895
1896         /*  Normal case, with match */
1897         uregex_setText(re, text1, -1, &status);
1898         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1899         TEST_ASSERT_SUCCESS(status);
1900         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1901         utext_close(result);
1902
1903         /* No match.  Text should copy to output with no changes.  */
1904         uregex_setText(re, text2, -1, &status);
1905         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1906         TEST_ASSERT_SUCCESS(status);
1907         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1908         utext_close(result);
1909
1910         uregex_close(re);
1911         utext_close(&replText);
1912     }
1913
1914
1915     /*
1916      *  appendReplacement()
1917      */
1918     {
1919         UChar    text[100];
1920         UChar    repl[100];
1921         UChar    buf[100];
1922         UChar   *bufPtr;
1923         int32_t  bufCap;
1924
1925         status = U_ZERO_ERROR;
1926         re = uregex_openC(".*", 0, 0, &status);
1927         TEST_ASSERT_SUCCESS(status);
1928
1929         u_uastrncpy(text, "whatever",  sizeof(text)/2);
1930         u_uastrncpy(repl, "some other", sizeof(repl)/2);
1931         uregex_setText(re, text, -1, &status);
1932
1933         /* match covers whole target string */
1934         uregex_find(re, 0, &status);
1935         TEST_ASSERT_SUCCESS(status);
1936         bufPtr = buf;
1937         bufCap = sizeof(buf) / 2;
1938         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1939         TEST_ASSERT_SUCCESS(status);
1940         TEST_ASSERT_STRING("some other", buf, TRUE);
1941
1942         /* Match has \u \U escapes */
1943         uregex_find(re, 0, &status);
1944         TEST_ASSERT_SUCCESS(status);
1945         bufPtr = buf;
1946         bufCap = sizeof(buf) / 2;
1947         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1948         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1949         TEST_ASSERT_SUCCESS(status);
1950         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1951
1952         uregex_close(re);
1953     }
1954
1955
1956     /*
1957      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1958      */
1959
1960     /*
1961      *  splitUText()
1962      */
1963     {
1964         UChar    textToSplit[80];
1965         UChar    text2[80];
1966         UText    *fields[10];
1967         int32_t  numFields;
1968         int32_t i;
1969
1970         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1971         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1972
1973         status = U_ZERO_ERROR;
1974         re = uregex_openC(":", 0, NULL, &status);
1975
1976
1977         /*  Simple split */
1978
1979         uregex_setText(re, textToSplit, -1, &status);
1980         TEST_ASSERT_SUCCESS(status);
1981
1982         /* The TEST_ASSERT_SUCCESS call above should change too... */
1983         if (U_SUCCESS(status)) {
1984             memset(fields, 0, sizeof(fields));
1985             numFields = uregex_splitUText(re, fields, 10, &status);
1986             TEST_ASSERT_SUCCESS(status);
1987
1988             /* The TEST_ASSERT_SUCCESS call above should change too... */
1989             if(U_SUCCESS(status)) {
1990               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1991               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1992               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1993                 TEST_ASSERT(numFields == 3);
1994                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1995                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1996                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1997                 TEST_ASSERT(fields[3] == NULL);
1998             }
1999             for(i = 0; i < numFields; i++) {
2000                 utext_close(fields[i]);
2001             }
2002         }
2003
2004         uregex_close(re);
2005
2006
2007         /*  Split with too few output strings available */
2008         status = U_ZERO_ERROR;
2009         re = uregex_openC(":", 0, NULL, &status);
2010         uregex_setText(re, textToSplit, -1, &status);
2011         TEST_ASSERT_SUCCESS(status);
2012
2013         /* The TEST_ASSERT_SUCCESS call above should change too... */
2014         if(U_SUCCESS(status)) {
2015             fields[0] = NULL;
2016             fields[1] = NULL;
2017             fields[2] = &patternText;
2018             numFields = uregex_splitUText(re, fields, 2, &status);
2019             TEST_ASSERT_SUCCESS(status);
2020
2021             /* The TEST_ASSERT_SUCCESS call above should change too... */
2022             if(U_SUCCESS(status)) {
2023                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2024                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2025                 TEST_ASSERT(numFields == 2);
2026                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2027                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2028                 TEST_ASSERT(fields[2] == &patternText);
2029             }
2030             for(i = 0; i < numFields; i++) {
2031                 utext_close(fields[i]);
2032             }
2033         }
2034
2035         uregex_close(re);
2036     }
2037
2038     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2039      *                   comes out as additional fields.  */
2040     {
2041         UChar    textToSplit[80];
2042         UText    *fields[10];
2043         int32_t  numFields;
2044         int32_t i;
2045
2046         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
2047
2048         status = U_ZERO_ERROR;
2049         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2050
2051         uregex_setText(re, textToSplit, -1, &status);
2052         TEST_ASSERT_SUCCESS(status);
2053
2054         /* The TEST_ASSERT_SUCCESS call above should change too... */
2055         if(U_SUCCESS(status)) {
2056             memset(fields, 0, sizeof(fields));
2057             numFields = uregex_splitUText(re, fields, 10, &status);
2058             TEST_ASSERT_SUCCESS(status);
2059
2060             /* The TEST_ASSERT_SUCCESS call above should change too... */
2061             if(U_SUCCESS(status)) {
2062                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2063                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2064                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2065                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2066                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2067
2068                 TEST_ASSERT(numFields == 5);
2069                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2070                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2071                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2072                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2073                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2074                 TEST_ASSERT(fields[5] == NULL);
2075             }
2076             for(i = 0; i < numFields; i++) {
2077                 utext_close(fields[i]);
2078             }
2079         }
2080
2081         /*  Split with too few output strings available (2) */
2082         status = U_ZERO_ERROR;
2083         fields[0] = NULL;
2084         fields[1] = NULL;
2085         fields[2] = &patternText;
2086         numFields = uregex_splitUText(re, fields, 2, &status);
2087         TEST_ASSERT_SUCCESS(status);
2088
2089         /* The TEST_ASSERT_SUCCESS call above should change too... */
2090         if(U_SUCCESS(status)) {
2091             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2092             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2093             TEST_ASSERT(numFields == 2);
2094             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2095             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2096             TEST_ASSERT(fields[2] == &patternText);
2097         }
2098         for(i = 0; i < numFields; i++) {
2099             utext_close(fields[i]);
2100         }
2101
2102
2103         /*  Split with too few output strings available (3) */
2104         status = U_ZERO_ERROR;
2105         fields[0] = NULL;
2106         fields[1] = NULL;
2107         fields[2] = NULL;
2108         fields[3] = &patternText;
2109         numFields = uregex_splitUText(re, fields, 3, &status);
2110         TEST_ASSERT_SUCCESS(status);
2111
2112         /* The TEST_ASSERT_SUCCESS call above should change too... */
2113         if(U_SUCCESS(status)) {
2114             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2115             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2116             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2117             TEST_ASSERT(numFields == 3);
2118             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2119             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2120             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2121             TEST_ASSERT(fields[3] == &patternText);
2122         }
2123         for(i = 0; i < numFields; i++) {
2124             utext_close(fields[i]);
2125         }
2126
2127         /*  Split with just enough output strings available (5) */
2128         status = U_ZERO_ERROR;
2129         fields[0] = NULL;
2130         fields[1] = NULL;
2131         fields[2] = NULL;
2132         fields[3] = NULL;
2133         fields[4] = NULL;
2134         fields[5] = &patternText;
2135         numFields = uregex_splitUText(re, fields, 5, &status);
2136         TEST_ASSERT_SUCCESS(status);
2137
2138         /* The TEST_ASSERT_SUCCESS call above should change too... */
2139         if(U_SUCCESS(status)) {
2140             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2141             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2142             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2143             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2144             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2145
2146             TEST_ASSERT(numFields == 5);
2147             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2148             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2149             TEST_ASSERT_UTEXT(str_second, fields[2]);
2150             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2151             TEST_ASSERT_UTEXT(str_third, fields[4]);
2152             TEST_ASSERT(fields[5] == &patternText);
2153         }
2154         for(i = 0; i < numFields; i++) {
2155             utext_close(fields[i]);
2156         }
2157
2158         /* Split, end of text is a field delimiter.   */
2159         status = U_ZERO_ERROR;
2160         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2161         TEST_ASSERT_SUCCESS(status);
2162
2163         /* The TEST_ASSERT_SUCCESS call above should change too... */
2164         if(U_SUCCESS(status)) {
2165             memset(fields, 0, sizeof(fields));
2166             fields[9] = &patternText;
2167             numFields = uregex_splitUText(re, fields, 9, &status);
2168             TEST_ASSERT_SUCCESS(status);
2169
2170             /* The TEST_ASSERT_SUCCESS call above should change too... */
2171             if(U_SUCCESS(status)) {
2172                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2173                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2174                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2175                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2176                 const char str_empty[] = { 0x00 };
2177
2178                 TEST_ASSERT(numFields == 5);
2179                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2180                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2181                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2182                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2183                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2184                 TEST_ASSERT(fields[5] == NULL);
2185                 TEST_ASSERT(fields[8] == NULL);
2186                 TEST_ASSERT(fields[9] == &patternText);
2187             }
2188             for(i = 0; i < numFields; i++) {
2189                 utext_close(fields[i]);
2190             }
2191         }
2192
2193         uregex_close(re);
2194     }
2195     utext_close(&patternText);
2196 }
2197
2198
2199 static void TestRefreshInput(void) {
2200     /*
2201      *  RefreshInput changes out the input of a URegularExpression without
2202      *    changing anything else in the match state.  Used with Java JNI,
2203      *    when Java moves the underlying string storage.   This test
2204      *    runs a find() loop, moving the text after the first match.
2205      *    The right number of matches should still be found.
2206      */
2207     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2208     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2209     UErrorCode status = U_ZERO_ERROR;
2210     URegularExpression *re;
2211     UText ut1 = UTEXT_INITIALIZER;
2212     UText ut2 = UTEXT_INITIALIZER;
2213
2214     re = uregex_openC("[ABC]", 0, 0, &status);
2215     TEST_ASSERT_SUCCESS(status);
2216
2217     utext_openUChars(&ut1, testStr, -1, &status);
2218     TEST_ASSERT_SUCCESS(status);
2219     uregex_setUText(re, &ut1, &status);
2220     TEST_ASSERT_SUCCESS(status);
2221
2222     /* Find the first match "A" in the original string */
2223     TEST_ASSERT(uregex_findNext(re, &status));
2224     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2225
2226     /* Move the string, kill the original string.  */
2227     u_strcpy(movedStr, testStr);
2228     u_memset(testStr, 0, u_strlen(testStr));
2229     utext_openUChars(&ut2, movedStr, -1, &status);
2230     TEST_ASSERT_SUCCESS(status);
2231     uregex_refreshUText(re, &ut2, &status);
2232     TEST_ASSERT_SUCCESS(status);
2233
2234     /* Find the following two matches, now working in the moved string. */
2235     TEST_ASSERT(uregex_findNext(re, &status));
2236     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2237     TEST_ASSERT(uregex_findNext(re, &status));
2238     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2239     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2240
2241     uregex_close(re);
2242 }
2243
2244
2245 static void TestBug8421(void) {
2246     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2247      *             was failing.
2248      */
2249     URegularExpression *re;
2250     UErrorCode status = U_ZERO_ERROR;
2251     int32_t  limit = -1;
2252
2253     re = uregex_openC("abc", 0, 0, &status);
2254     TEST_ASSERT_SUCCESS(status);
2255
2256     limit = uregex_getTimeLimit(re, &status);
2257     TEST_ASSERT_SUCCESS(status);
2258     TEST_ASSERT(limit == 0);
2259
2260     uregex_setTimeLimit(re, 100, &status);
2261     TEST_ASSERT_SUCCESS(status);
2262     limit = uregex_getTimeLimit(re, &status);
2263     TEST_ASSERT_SUCCESS(status);
2264     TEST_ASSERT(limit == 100);
2265
2266     uregex_close(re);
2267 }
2268
2269
2270 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */