icuSources/test/cintltst/reapits.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 2004-2015, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /********************************************************************************
   7 *
   8 * File reapits.c
   9 *
  10 *********************************************************************************/
  11 /*C API TEST FOR Regular Expressions */
  12 /**
  13 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
  14 *   try to test the full functionality.  It just calls each function and verifies that it
  15 *   works on a basic level.
  16 *
  17 *   More complete testing of regular expression functionality is done with the C++ tests.
  18 **/
  19
  20 #include "unicode/utypes.h"
  21
  22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
  23
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include "unicode/uloc.h"
  27 #include "unicode/uregex.h"
  28 #include "unicode/ustring.h"
  29 #include "unicode/utext.h"
  30 #include "cintltst.h"
  31 #include "cmemory.h"
  32
  33 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
  34 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
  35
  36 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
  37 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
  38
  39 /*
  40  *   TEST_SETUP and TEST_TEARDOWN
  41  *         macros to handle the boilerplate around setting up regex test cases.
  42  *         parameteres to setup:
  43  *              pattern:     The regex pattern, a (char *) null terminated C string.
  44  *              testString:  The string data, also a (char *) C string.
  45  *              flags:       Regex flags to set when compiling the pattern
  46  *
  47  *         Put arbitrary test code between SETUP and TEARDOWN.
  48  *         're" is the compiled, ready-to-go  regular expression.
  49  */
  50 #define TEST_SETUP(pattern, testString, flags) {  \
  51     UChar   *srcString = NULL;  \
  52     status = U_ZERO_ERROR; \
  53     re = uregex_openC(pattern, flags, NULL, &status);  \
  54     TEST_ASSERT_SUCCESS(status);   \
  55     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
  56     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
  57     uregex_setText(re, srcString, -1, &status); \
  58     TEST_ASSERT_SUCCESS(status);  \
  59     if (U_SUCCESS(status)) {
  60
  61 #define TEST_TEARDOWN  \
  62     }  \
  63     TEST_ASSERT_SUCCESS(status);  \
  64     uregex_close(re);  \
  65     free(srcString);   \
  66     }
  67
  68
  69 /**
  70  * @param expected utf-8 array of bytes to be expected
  71  */
  72 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
  73      char     buf_inside_macro[120];
  74      int32_t  len = (int32_t)strlen(expected);
  75      UBool    success;
  76      if (nulTerm) {
  77          u_austrncpy(buf_inside_macro, (actual), len+1);
  78          buf_inside_macro[len+2] = 0;
  79          success = (strcmp((expected), buf_inside_macro) == 0);
  80      } else {
  81          u_austrncpy(buf_inside_macro, (actual), len);
  82          buf_inside_macro[len+1] = 0;
  83          success = (strncmp((expected), buf_inside_macro, len) == 0);
  84      }
  85      if (success == FALSE) {
  86          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
  87              file, line, (expected), buf_inside_macro);
  88      }
  89 }
  90
  91 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
  92
  93
  94 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
  95     int32_t u8i = 0;
  96     UChar32 u8c = 0;
  97     UChar32 utc = 0;
  98     UBool   stringsEqual = TRUE;
  99     utext_setNativeIndex(utext, 0);
 100     for (;;) {
 101         U8_NEXT_UNSAFE(utf8, u8i, u8c);
 102         utc = utext_next32(utext);
 103         if (u8c == 0 && utc == U_SENTINEL) {
 104             break;
 105         }
 106         if (u8c != utc || u8c == 0) {
 107             stringsEqual = FALSE;
 108             break;
 109         }
 110     }
 111     return stringsEqual;
 112 }
 113
 114
 115 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
 116     utext_setNativeIndex(actual, 0);
 117     if (!equals_utf8_utext(expected, actual)) {
 118         UChar32 c;
 119         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
 120         c = utext_next32From(actual, 0);
 121         while (c != U_SENTINEL) {
 122             if (0x20<c && c <0x7e) {
 123                 log_err("%c", c);
 124             } else {
 125                 log_err("%#x", c);
 126             }
 127             c = UTEXT_NEXT32(actual);
 128         }
 129         log_err("\"\n");
 130     }
 131 }
 132
 133 /*
 134  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
 135  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
 136  */
 137 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
 138
 139 static UBool testUTextEqual(UText *uta, UText *utb) {
 140     UChar32 ca = 0;
 141     UChar32 cb = 0;
 142     utext_setNativeIndex(uta, 0);
 143     utext_setNativeIndex(utb, 0);
 144     do {
 145         ca = utext_next32(uta);
 146         cb = utext_next32(utb);
 147         if (ca != cb) {
 148             break;
 149         }
 150     } while (ca != U_SENTINEL);
 151     return ca == cb;
 152 }
 153
 154
 155
 156
 157 static void TestRegexCAPI(void);
 158 static void TestBug4315(void);
 159 static void TestUTextAPI(void);
 160 static void TestRefreshInput(void);
 161 static void TestBug8421(void);
 162 static void TestBug10815(void);
 163
 164 void addURegexTest(TestNode** root);
 165
 166 void addURegexTest(TestNode** root)
 167 {
 168     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
 169     addTest(root, &TestBug4315,   "regex/TestBug4315");
 170     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
 171     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
 172     addTest(root, &TestBug8421,   "regex/TestBug8421");
 173     addTest(root, &TestBug10815,   "regex/TestBug10815");
 174 }
 175
 176 /*
 177  * Call back function and context struct used for testing
 178  *    regular expression user callbacks.  This test is mostly the same as
 179  *   the corresponding C++ test in intltest.
 180  */
 181 typedef struct callBackContext {
 182     int32_t          maxCalls;
 183     int32_t          numCalls;
 184     int32_t          lastSteps;
 185 } callBackContext;
 186
 187 static UBool U_EXPORT2 U_CALLCONV
 188 TestCallbackFn(const void *context, int32_t steps) {
 189   callBackContext  *info = (callBackContext *)context;
 190   if (info->lastSteps+1 != steps) {
 191       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
 192   }
 193   info->lastSteps = steps;
 194   info->numCalls++;
 195   return (info->numCalls < info->maxCalls);
 196 }
 197
 198 /*
 199  *   Regular Expression C API Tests
 200  */
 201 static void TestRegexCAPI(void) {
 202     UErrorCode           status = U_ZERO_ERROR;
 203     URegularExpression  *re;
 204     UChar                pat[200];
 205     UChar               *minus1;
 206
 207     memset(&minus1, -1, sizeof(minus1));
 208
 209     /* Mimimalist open/close */
 210     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
 211     re = uregex_open(pat, -1, 0, 0, &status);
 212     if (U_FAILURE(status)) {
 213          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
 214          return;
 215     }
 216     uregex_close(re);
 217
 218     /* Open with all flag values set */
 219     status = U_ZERO_ERROR;
 220     re = uregex_open(pat, -1,
 221         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
 222         0, &status);
 223     TEST_ASSERT_SUCCESS(status);
 224     uregex_close(re);
 225
 226     /* Open with an invalid flag */
 227     status = U_ZERO_ERROR;
 228     re = uregex_open(pat, -1, 0x40000000, 0, &status);
 229     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
 230     uregex_close(re);
 231
 232     /* Open with an unimplemented flag */
 233     status = U_ZERO_ERROR;
 234     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
 235     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
 236     uregex_close(re);
 237
 238     /* openC with an invalid parameter */
 239     status = U_ZERO_ERROR;
 240     re = uregex_openC(NULL,
 241         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 242     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
 243
 244     /* openC with an invalid parameter */
 245     status = U_USELESS_COLLATOR_ERROR;
 246     re = uregex_openC(NULL,
 247         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 248     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
 249
 250     /* openC   open from a C string */
 251     {
 252         const UChar   *p;
 253         int32_t  len;
 254         status = U_ZERO_ERROR;
 255         re = uregex_openC("abc*", 0, 0, &status);
 256         TEST_ASSERT_SUCCESS(status);
 257         p = uregex_pattern(re, &len, &status);
 258         TEST_ASSERT_SUCCESS(status);
 259
 260         /* The TEST_ASSERT_SUCCESS above should change too... */
 261         if(U_SUCCESS(status)) {
 262             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
 263             TEST_ASSERT(u_strcmp(pat, p) == 0);
 264             TEST_ASSERT(len==(int32_t)strlen("abc*"));
 265         }
 266
 267         uregex_close(re);
 268
 269         /*  TODO:  Open with ParseError parameter */
 270     }
 271
 272     /*
 273      *  clone
 274      */
 275     {
 276         URegularExpression *clone1;
 277         URegularExpression *clone2;
 278         URegularExpression *clone3;
 279         UChar  testString1[30];
 280         UChar  testString2[30];
 281         UBool  result;
 282
 283
 284         status = U_ZERO_ERROR;
 285         re = uregex_openC("abc*", 0, 0, &status);
 286         TEST_ASSERT_SUCCESS(status);
 287         clone1 = uregex_clone(re, &status);
 288         TEST_ASSERT_SUCCESS(status);
 289         TEST_ASSERT(clone1 != NULL);
 290
 291         status = U_ZERO_ERROR;
 292         clone2 = uregex_clone(re, &status);
 293         TEST_ASSERT_SUCCESS(status);
 294         TEST_ASSERT(clone2 != NULL);
 295         uregex_close(re);
 296
 297         status = U_ZERO_ERROR;
 298         clone3 = uregex_clone(clone2, &status);
 299         TEST_ASSERT_SUCCESS(status);
 300         TEST_ASSERT(clone3 != NULL);
 301
 302         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
 303         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
 304
 305         status = U_ZERO_ERROR;
 306         uregex_setText(clone1, testString1, -1, &status);
 307         TEST_ASSERT_SUCCESS(status);
 308         result = uregex_lookingAt(clone1, 0, &status);
 309         TEST_ASSERT_SUCCESS(status);
 310         TEST_ASSERT(result==TRUE);
 311
 312         status = U_ZERO_ERROR;
 313         uregex_setText(clone2, testString2, -1, &status);
 314         TEST_ASSERT_SUCCESS(status);
 315         result = uregex_lookingAt(clone2, 0, &status);
 316         TEST_ASSERT_SUCCESS(status);
 317         TEST_ASSERT(result==FALSE);
 318         result = uregex_find(clone2, 0, &status);
 319         TEST_ASSERT_SUCCESS(status);
 320         TEST_ASSERT(result==TRUE);
 321
 322         uregex_close(clone1);
 323         uregex_close(clone2);
 324         uregex_close(clone3);
 325
 326     }
 327
 328     /*
 329      *  pattern()
 330     */
 331     {
 332         const UChar  *resultPat;
 333         int32_t       resultLen;
 334         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
 335         status = U_ZERO_ERROR;
 336         re = uregex_open(pat, -1, 0, NULL, &status);
 337         resultPat = uregex_pattern(re, &resultLen, &status);
 338         TEST_ASSERT_SUCCESS(status);
 339
 340         /* The TEST_ASSERT_SUCCESS above should change too... */
 341         if (U_SUCCESS(status)) {
 342             TEST_ASSERT(resultLen == -1);
 343             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
 344         }
 345
 346         uregex_close(re);
 347
 348         status = U_ZERO_ERROR;
 349         re = uregex_open(pat, 3, 0, NULL, &status);
 350         resultPat = uregex_pattern(re, &resultLen, &status);
 351         TEST_ASSERT_SUCCESS(status);
 352         TEST_ASSERT_SUCCESS(status);
 353
 354         /* The TEST_ASSERT_SUCCESS above should change too... */
 355         if (U_SUCCESS(status)) {
 356             TEST_ASSERT(resultLen == 3);
 357             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
 358             TEST_ASSERT(u_strlen(resultPat) == 3);
 359         }
 360
 361         uregex_close(re);
 362     }
 363
 364     /*
 365      *  flags()
 366      */
 367     {
 368         int32_t  t;
 369
 370         status = U_ZERO_ERROR;
 371         re = uregex_open(pat, -1, 0, NULL, &status);
 372         t  = uregex_flags(re, &status);
 373         TEST_ASSERT_SUCCESS(status);
 374         TEST_ASSERT(t == 0);
 375         uregex_close(re);
 376
 377         status = U_ZERO_ERROR;
 378         re = uregex_open(pat, -1, 0, NULL, &status);
 379         t  = uregex_flags(re, &status);
 380         TEST_ASSERT_SUCCESS(status);
 381         TEST_ASSERT(t == 0);
 382         uregex_close(re);
 383
 384         status = U_ZERO_ERROR;
 385         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
 386         t  = uregex_flags(re, &status);
 387         TEST_ASSERT_SUCCESS(status);
 388         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
 389         uregex_close(re);
 390     }
 391
 392     /*
 393      *  setText() and lookingAt()
 394      */
 395     {
 396         UChar  text1[50];
 397         UChar  text2[50];
 398         UBool  result;
 399
 400         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
 401         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
 402         status = U_ZERO_ERROR;
 403         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
 404         re = uregex_open(pat, -1, 0, NULL, &status);
 405         TEST_ASSERT_SUCCESS(status);
 406
 407         /* Operation before doing a setText should fail... */
 408         status = U_ZERO_ERROR;
 409         uregex_lookingAt(re, 0, &status);
 410         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
 411
 412         status = U_ZERO_ERROR;
 413         uregex_setText(re, text1, -1, &status);
 414         result = uregex_lookingAt(re, 0, &status);
 415         TEST_ASSERT(result == TRUE);
 416         TEST_ASSERT_SUCCESS(status);
 417
 418         status = U_ZERO_ERROR;
 419         uregex_setText(re, text2, -1, &status);
 420         result = uregex_lookingAt(re, 0, &status);
 421         TEST_ASSERT(result == FALSE);
 422         TEST_ASSERT_SUCCESS(status);
 423
 424         status = U_ZERO_ERROR;
 425         uregex_setText(re, text1, -1, &status);
 426         result = uregex_lookingAt(re, 0, &status);
 427         TEST_ASSERT(result == TRUE);
 428         TEST_ASSERT_SUCCESS(status);
 429
 430         status = U_ZERO_ERROR;
 431         uregex_setText(re, text1, 5, &status);
 432         result = uregex_lookingAt(re, 0, &status);
 433         TEST_ASSERT(result == FALSE);
 434         TEST_ASSERT_SUCCESS(status);
 435
 436         status = U_ZERO_ERROR;
 437         uregex_setText(re, text1, 6, &status);
 438         result = uregex_lookingAt(re, 0, &status);
 439         TEST_ASSERT(result == TRUE);
 440         TEST_ASSERT_SUCCESS(status);
 441
 442         uregex_close(re);
 443     }
 444
 445
 446     /*
 447      *  getText()
 448      */
 449     {
 450         UChar    text1[50];
 451         UChar    text2[50];
 452         const UChar   *result;
 453         int32_t  textLength;
 454
 455         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
 456         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
 457         status = U_ZERO_ERROR;
 458         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
 459         re = uregex_open(pat, -1, 0, NULL, &status);
 460
 461         uregex_setText(re, text1, -1, &status);
 462         result = uregex_getText(re, &textLength, &status);
 463         TEST_ASSERT(result == text1);
 464         TEST_ASSERT(textLength == -1);
 465         TEST_ASSERT_SUCCESS(status);
 466
 467         status = U_ZERO_ERROR;
 468         uregex_setText(re, text2, 7, &status);
 469         result = uregex_getText(re, &textLength, &status);
 470         TEST_ASSERT(result == text2);
 471         TEST_ASSERT(textLength == 7);
 472         TEST_ASSERT_SUCCESS(status);
 473
 474         status = U_ZERO_ERROR;
 475         uregex_setText(re, text2, 4, &status);
 476         result = uregex_getText(re, &textLength, &status);
 477         TEST_ASSERT(result == text2);
 478         TEST_ASSERT(textLength == 4);
 479         TEST_ASSERT_SUCCESS(status);
 480         uregex_close(re);
 481     }
 482
 483     /*
 484      *  matches()
 485      */
 486     {
 487         UChar   text1[50];
 488         UBool   result;
 489         int     len;
 490         UChar   nullString[] = {0,0,0};
 491
 492         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
 493         status = U_ZERO_ERROR;
 494         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
 495         re = uregex_open(pat, -1, 0, NULL, &status);
 496
 497         uregex_setText(re, text1, -1, &status);
 498         result = uregex_matches(re, 0, &status);
 499         TEST_ASSERT(result == FALSE);
 500         TEST_ASSERT_SUCCESS(status);
 501
 502         status = U_ZERO_ERROR;
 503         uregex_setText(re, text1, 6, &status);
 504         result = uregex_matches(re, 0, &status);
 505         TEST_ASSERT(result == TRUE);
 506         TEST_ASSERT_SUCCESS(status);
 507
 508         status = U_ZERO_ERROR;
 509         uregex_setText(re, text1, 6, &status);
 510         result = uregex_matches(re, 1, &status);
 511         TEST_ASSERT(result == FALSE);
 512         TEST_ASSERT_SUCCESS(status);
 513         uregex_close(re);
 514
 515         status = U_ZERO_ERROR;
 516         re = uregex_openC(".?", 0, NULL, &status);
 517         uregex_setText(re, text1, -1, &status);
 518         len = u_strlen(text1);
 519         result = uregex_matches(re, len, &status);
 520         TEST_ASSERT(result == TRUE);
 521         TEST_ASSERT_SUCCESS(status);
 522
 523         status = U_ZERO_ERROR;
 524         uregex_setText(re, nullString, -1, &status);
 525         TEST_ASSERT_SUCCESS(status);
 526         result = uregex_matches(re, 0, &status);
 527         TEST_ASSERT(result == TRUE);
 528         TEST_ASSERT_SUCCESS(status);
 529         uregex_close(re);
 530     }
 531
 532
 533     /*
 534      *  lookingAt()    Used in setText test.
 535      */
 536
 537
 538     /*
 539      *  find(), findNext, start, end, reset
 540      */
 541     {
 542         UChar    text1[50];
 543         UBool    result;
 544         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
 545         status = U_ZERO_ERROR;
 546         re = uregex_openC("rx", 0, NULL, &status);
 547
 548         uregex_setText(re, text1, -1, &status);
 549         result = uregex_find(re, 0, &status);
 550         TEST_ASSERT(result == TRUE);
 551         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 552         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 553         TEST_ASSERT_SUCCESS(status);
 554
 555         result = uregex_find(re, 9, &status);
 556         TEST_ASSERT(result == TRUE);
 557         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
 558         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
 559         TEST_ASSERT_SUCCESS(status);
 560
 561         result = uregex_find(re, 14, &status);
 562         TEST_ASSERT(result == FALSE);
 563         TEST_ASSERT_SUCCESS(status);
 564
 565         status = U_ZERO_ERROR;
 566         uregex_reset(re, 0, &status);
 567
 568         result = uregex_findNext(re, &status);
 569         TEST_ASSERT(result == TRUE);
 570         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 571         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 572         TEST_ASSERT_SUCCESS(status);
 573
 574         result = uregex_findNext(re, &status);
 575         TEST_ASSERT(result == TRUE);
 576         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
 577         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
 578         TEST_ASSERT_SUCCESS(status);
 579
 580         status = U_ZERO_ERROR;
 581         uregex_reset(re, 12, &status);
 582
 583         result = uregex_findNext(re, &status);
 584         TEST_ASSERT(result == TRUE);
 585         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
 586         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
 587         TEST_ASSERT_SUCCESS(status);
 588
 589         result = uregex_findNext(re, &status);
 590         TEST_ASSERT(result == FALSE);
 591         TEST_ASSERT_SUCCESS(status);
 592
 593         uregex_close(re);
 594     }
 595
 596     /*
 597      *  groupCount
 598      */
 599     {
 600         int32_t result;
 601
 602         status = U_ZERO_ERROR;
 603         re = uregex_openC("abc", 0, NULL, &status);
 604         result = uregex_groupCount(re, &status);
 605         TEST_ASSERT_SUCCESS(status);
 606         TEST_ASSERT(result == 0);
 607         uregex_close(re);
 608
 609         status = U_ZERO_ERROR;
 610         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
 611         result = uregex_groupCount(re, &status);
 612         TEST_ASSERT_SUCCESS(status);
 613         TEST_ASSERT(result == 3);
 614         uregex_close(re);
 615
 616     }
 617
 618
 619     /*
 620      *  group()
 621      */
 622     {
 623         UChar    text1[80];
 624         UChar    buf[80];
 625         UBool    result;
 626         int32_t  resultSz;
 627         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
 628
 629         status = U_ZERO_ERROR;
 630         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
 631         TEST_ASSERT_SUCCESS(status);
 632
 633
 634         uregex_setText(re, text1, -1, &status);
 635         result = uregex_find(re, 0, &status);
 636         TEST_ASSERT(result==TRUE);
 637
 638         /*  Capture Group 0, the full match.  Should succeed.  */
 639         status = U_ZERO_ERROR;
 640         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
 641         TEST_ASSERT_SUCCESS(status);
 642         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
 643         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 644
 645         /*  Capture group #1.  Should succeed. */
 646         status = U_ZERO_ERROR;
 647         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
 648         TEST_ASSERT_SUCCESS(status);
 649         TEST_ASSERT_STRING(" interior ", buf, TRUE);
 650         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
 651
 652         /*  Capture group out of range.  Error. */
 653         status = U_ZERO_ERROR;
 654         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
 655         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
 656
 657         /* NULL buffer, pure pre-flight */
 658         status = U_ZERO_ERROR;
 659         resultSz = uregex_group(re, 0, NULL, 0, &status);
 660         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 661         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 662
 663         /* Too small buffer, truncated string */
 664         status = U_ZERO_ERROR;
 665         memset(buf, -1, sizeof(buf));
 666         resultSz = uregex_group(re, 0, buf, 5, &status);
 667         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 668         TEST_ASSERT_STRING("abc i", buf, FALSE);
 669         TEST_ASSERT(buf[5] == (UChar)0xffff);
 670         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 671
 672         /* Output string just fits buffer, no NUL term. */
 673         status = U_ZERO_ERROR;
 674         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
 675         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 676         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
 677         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 678         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
 679
 680         uregex_close(re);
 681
 682     }
 683
 684     /*
 685      *  Regions
 686      */
 687
 688
 689         /* SetRegion(), getRegion() do something  */
 690         TEST_SETUP(".*", "0123456789ABCDEF", 0)
 691         UChar resultString[40];
 692         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
 693         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
 694         uregex_setRegion(re, 3, 6, &status);
 695         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
 696         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
 697         TEST_ASSERT(uregex_findNext(re, &status));
 698         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
 699         TEST_ASSERT_STRING("345", resultString, TRUE);
 700         TEST_TEARDOWN;
 701
 702         /* find(start=-1) uses regions   */
 703         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 704         uregex_setRegion(re, 4, 6, &status);
 705         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 706         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 707         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 708         TEST_TEARDOWN;
 709
 710         /* find (start >=0) does not use regions   */
 711         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 712         uregex_setRegion(re, 4, 6, &status);
 713         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 714         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 715         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 716         TEST_TEARDOWN;
 717
 718         /* findNext() obeys regions    */
 719         TEST_SETUP(".", "0123456789ABCDEF", 0);
 720         uregex_setRegion(re, 4, 6, &status);
 721         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
 722         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 723         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
 724         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
 725         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
 726         TEST_TEARDOWN;
 727
 728         /* matches(start=-1) uses regions                                           */
 729         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
 730         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 731         uregex_setRegion(re, 4, 6, &status);
 732         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
 733         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 734         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 735         TEST_TEARDOWN;
 736
 737         /* matches (start >=0) does not use regions       */
 738         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 739         uregex_setRegion(re, 4, 6, &status);
 740         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
 741         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 742         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 743         TEST_TEARDOWN;
 744
 745         /* lookingAt(start=-1) uses regions                                         */
 746         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
 747         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 748         uregex_setRegion(re, 4, 6, &status);
 749         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
 750         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 751         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
 752         TEST_TEARDOWN;
 753
 754         /* lookingAt (start >=0) does not use regions  */
 755         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 756         uregex_setRegion(re, 4, 6, &status);
 757         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
 758         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 759         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
 760         TEST_TEARDOWN;
 761
 762         /* hitEnd()       */
 763         TEST_SETUP("[a-f]*", "abcdefghij", 0);
 764         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 765         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
 766         TEST_TEARDOWN;
 767
 768         TEST_SETUP("[a-f]*", "abcdef", 0);
 769         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 770         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
 771         TEST_TEARDOWN;
 772
 773         /* requireEnd   */
 774         TEST_SETUP("abcd", "abcd", 0);
 775         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 776         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
 777         TEST_TEARDOWN;
 778
 779         TEST_SETUP("abcd$", "abcd", 0);
 780         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 781         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
 782         TEST_TEARDOWN;
 783
 784         /* anchoringBounds        */
 785         TEST_SETUP("abc$", "abcdef", 0);
 786         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
 787         uregex_useAnchoringBounds(re, FALSE, &status);
 788         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
 789
 790         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
 791         uregex_useAnchoringBounds(re, TRUE, &status);
 792         uregex_setRegion(re, 0, 3, &status);
 793         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 794         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 795         TEST_TEARDOWN;
 796
 797         /* Transparent Bounds      */
 798         TEST_SETUP("abc(?=def)", "abcdef", 0);
 799         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
 800         uregex_useTransparentBounds(re, TRUE, &status);
 801         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
 802
 803         uregex_useTransparentBounds(re, FALSE, &status);
 804         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
 805         uregex_setRegion(re, 0, 3, &status);
 806         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
 807         uregex_useTransparentBounds(re, TRUE, &status);
 808         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
 809         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 810         TEST_TEARDOWN;
 811
 812
 813     /*
 814      *  replaceFirst()
 815      */
 816     {
 817         UChar    text1[80];
 818         UChar    text2[80];
 819         UChar    replText[80];
 820         UChar    buf[80];
 821         int32_t  resultSz;
 822         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
 823         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
 824         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
 825
 826         status = U_ZERO_ERROR;
 827         re = uregex_openC("x(.*?)x", 0, NULL, &status);
 828         TEST_ASSERT_SUCCESS(status);
 829
 830         /*  Normal case, with match */
 831         uregex_setText(re, text1, -1, &status);
 832         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 833         TEST_ASSERT_SUCCESS(status);
 834         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
 835         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 836
 837         /* No match.  Text should copy to output with no changes.  */
 838         status = U_ZERO_ERROR;
 839         uregex_setText(re, text2, -1, &status);
 840         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 841         TEST_ASSERT_SUCCESS(status);
 842         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 843         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
 844
 845         /*  Match, output just fills buffer, no termination warning. */
 846         status = U_ZERO_ERROR;
 847         uregex_setText(re, text1, -1, &status);
 848         memset(buf, -1, sizeof(buf));
 849         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
 850         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 851         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 852         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 853         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 854
 855         /* Do the replaceFirst again, without first resetting anything.
 856          *  Should give the same results.
 857          */
 858         status = U_ZERO_ERROR;
 859         memset(buf, -1, sizeof(buf));
 860         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
 861         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 862         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 863         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 864         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 865
 866         /* NULL buffer, zero buffer length */
 867         status = U_ZERO_ERROR;
 868         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
 869         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 870         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 871
 872         /* Buffer too small by one */
 873         status = U_ZERO_ERROR;
 874         memset(buf, -1, sizeof(buf));
 875         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
 876         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 877         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
 878         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 879         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 880
 881         uregex_close(re);
 882     }
 883
 884
 885     /*
 886      *  replaceAll()
 887      */
 888     {
 889         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
 890         UChar    text2[80];          /*  "No match Here"           */
 891         UChar    replText[80];       /*  "<$1>"                    */
 892         UChar    replText2[80];      /*  "<<$1>>"                  */
 893         const char * pattern = "x(.*?)x";
 894         const char * expectedResult = "Replace <aa> <1> <...>.";
 895         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
 896         UChar    buf[80];
 897         int32_t  resultSize;
 898         int32_t  expectedResultSize;
 899         int32_t  expectedResultSize2;
 900         int32_t  i;
 901
 902         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
 903         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
 904         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
 905         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
 906         expectedResultSize = strlen(expectedResult);
 907         expectedResultSize2 = strlen(expectedResult2);
 908
 909         status = U_ZERO_ERROR;
 910         re = uregex_openC(pattern, 0, NULL, &status);
 911         TEST_ASSERT_SUCCESS(status);
 912
 913         /*  Normal case, with match */
 914         uregex_setText(re, text1, -1, &status);
 915         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 916         TEST_ASSERT_SUCCESS(status);
 917         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
 918         TEST_ASSERT(resultSize == expectedResultSize);
 919
 920         /* No match.  Text should copy to output with no changes.  */
 921         status = U_ZERO_ERROR;
 922         uregex_setText(re, text2, -1, &status);
 923         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 924         TEST_ASSERT_SUCCESS(status);
 925         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 926         TEST_ASSERT(resultSize == u_strlen(text2));
 927
 928         /*  Match, output just fills buffer, no termination warning. */
 929         status = U_ZERO_ERROR;
 930         uregex_setText(re, text1, -1, &status);
 931         memset(buf, -1, sizeof(buf));
 932         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
 933         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 934         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
 935         TEST_ASSERT(resultSize == expectedResultSize);
 936         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 937
 938         /* Do the replaceFirst again, without first resetting anything.
 939          *  Should give the same results.
 940          */
 941         status = U_ZERO_ERROR;
 942         memset(buf, -1, sizeof(buf));
 943         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
 944         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 945         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
 946         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 947         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 948
 949         /* NULL buffer, zero buffer length */
 950         status = U_ZERO_ERROR;
 951         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
 952         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 953         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 954
 955         /* Buffer too small.  Try every size, which will tickle edge cases
 956          * in uregex_appendReplacement (used by replaceAll)   */
 957         for (i=0; i<expectedResultSize; i++) {
 958             char  expected[80];
 959             status = U_ZERO_ERROR;
 960             memset(buf, -1, sizeof(buf));
 961             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
 962             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 963             strcpy(expected, expectedResult);
 964             expected[i] = 0;
 965             TEST_ASSERT_STRING(expected, buf, FALSE);
 966             TEST_ASSERT(resultSize == expectedResultSize);
 967             TEST_ASSERT(buf[i] == (UChar)0xffff);
 968         }
 969
 970         /* Buffer too small.  Same as previous test, except this time the replacement
 971          * text is longer than the match capture group, making the length of the complete
 972          * replacement longer than the original string.
 973          */
 974         for (i=0; i<expectedResultSize2; i++) {
 975             char  expected[80];
 976             status = U_ZERO_ERROR;
 977             memset(buf, -1, sizeof(buf));
 978             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
 979             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 980             strcpy(expected, expectedResult2);
 981             expected[i] = 0;
 982             TEST_ASSERT_STRING(expected, buf, FALSE);
 983             TEST_ASSERT(resultSize == expectedResultSize2);
 984             TEST_ASSERT(buf[i] == (UChar)0xffff);
 985         }
 986
 987
 988         uregex_close(re);
 989     }
 990
 991
 992     /*
 993      *  appendReplacement()
 994      */
 995     {
 996         UChar    text[100];
 997         UChar    repl[100];
 998         UChar    buf[100];
 999         UChar   *bufPtr;
1000         int32_t  bufCap;
1001
1002
1003         status = U_ZERO_ERROR;
1004         re = uregex_openC(".*", 0, 0, &status);
1005         TEST_ASSERT_SUCCESS(status);
1006
1007         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1008         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1009         uregex_setText(re, text, -1, &status);
1010
1011         /* match covers whole target string */
1012         uregex_find(re, 0, &status);
1013         TEST_ASSERT_SUCCESS(status);
1014         bufPtr = buf;
1015         bufCap = UPRV_LENGTHOF(buf);
1016         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1017         TEST_ASSERT_SUCCESS(status);
1018         TEST_ASSERT_STRING("some other", buf, TRUE);
1019
1020         /* Match has \u \U escapes */
1021         uregex_find(re, 0, &status);
1022         TEST_ASSERT_SUCCESS(status);
1023         bufPtr = buf;
1024         bufCap = UPRV_LENGTHOF(buf);
1025         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1026         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1027         TEST_ASSERT_SUCCESS(status);
1028         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1029
1030         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1031         status = U_ZERO_ERROR;
1032         uregex_find(re, 0, &status);
1033         TEST_ASSERT_SUCCESS(status);
1034         bufPtr = buf;
1035         status = U_BUFFER_OVERFLOW_ERROR;
1036         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1037         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1038
1039         uregex_close(re);
1040     }
1041
1042
1043     /*
1044      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1045      */
1046
1047     /*
1048      *  split()
1049      */
1050     {
1051         UChar    textToSplit[80];
1052         UChar    text2[80];
1053         UChar    buf[200];
1054         UChar    *fields[10];
1055         int32_t  numFields;
1056         int32_t  requiredCapacity;
1057         int32_t  spaceNeeded;
1058         int32_t  sz;
1059
1060         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1061         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1062
1063         status = U_ZERO_ERROR;
1064         re = uregex_openC(":", 0, NULL, &status);
1065
1066
1067         /*  Simple split */
1068
1069         uregex_setText(re, textToSplit, -1, &status);
1070         TEST_ASSERT_SUCCESS(status);
1071
1072         /* The TEST_ASSERT_SUCCESS call above should change too... */
1073         if (U_SUCCESS(status)) {
1074             memset(fields, -1, sizeof(fields));
1075             numFields =
1076                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1077             TEST_ASSERT_SUCCESS(status);
1078
1079             /* The TEST_ASSERT_SUCCESS call above should change too... */
1080             if(U_SUCCESS(status)) {
1081                 TEST_ASSERT(numFields == 3);
1082                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1083                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1084                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
1085                 TEST_ASSERT(fields[3] == NULL);
1086
1087                 spaceNeeded = u_strlen(textToSplit) -
1088                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1089                             numFields;          /* Each field gets a NUL terminator */
1090
1091                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1092             }
1093         }
1094
1095         uregex_close(re);
1096
1097
1098         /*  Split with too few output strings available */
1099         status = U_ZERO_ERROR;
1100         re = uregex_openC(":", 0, NULL, &status);
1101         uregex_setText(re, textToSplit, -1, &status);
1102         TEST_ASSERT_SUCCESS(status);
1103
1104         /* The TEST_ASSERT_SUCCESS call above should change too... */
1105         if(U_SUCCESS(status)) {
1106             memset(fields, -1, sizeof(fields));
1107             numFields =
1108                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1109             TEST_ASSERT_SUCCESS(status);
1110
1111             /* The TEST_ASSERT_SUCCESS call above should change too... */
1112             if(U_SUCCESS(status)) {
1113                 TEST_ASSERT(numFields == 2);
1114                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1115                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1116                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1117
1118                 spaceNeeded = u_strlen(textToSplit) -
1119                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1120                             numFields;          /* Each field gets a NUL terminator */
1121
1122                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1123
1124                 /* Split with a range of output buffer sizes.  */
1125                 spaceNeeded = u_strlen(textToSplit) -
1126                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1127                     numFields;          /* Each field gets a NUL terminator */
1128
1129                 for (sz=0; sz < spaceNeeded+1; sz++) {
1130                     memset(fields, -1, sizeof(fields));
1131                     status = U_ZERO_ERROR;
1132                     numFields =
1133                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1134                     if (sz >= spaceNeeded) {
1135                         TEST_ASSERT_SUCCESS(status);
1136                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1137                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
1138                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
1139                     } else {
1140                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1141                     }
1142                     TEST_ASSERT(numFields == 3);
1143                     TEST_ASSERT(fields[3] == NULL);
1144                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1145                 }
1146             }
1147         }
1148
1149         uregex_close(re);
1150     }
1151
1152
1153
1154
1155     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1156      *                   comes out as additional fields.  */
1157     {
1158         UChar    textToSplit[80];
1159         UChar    buf[200];
1160         UChar    *fields[10];
1161         int32_t  numFields;
1162         int32_t  requiredCapacity;
1163         int32_t  spaceNeeded;
1164         int32_t  sz;
1165
1166         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
1167
1168         status = U_ZERO_ERROR;
1169         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1170
1171         uregex_setText(re, textToSplit, -1, &status);
1172         TEST_ASSERT_SUCCESS(status);
1173
1174         /* The TEST_ASSERT_SUCCESS call above should change too... */
1175         if(U_SUCCESS(status)) {
1176             memset(fields, -1, sizeof(fields));
1177             numFields =
1178                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1179             TEST_ASSERT_SUCCESS(status);
1180
1181             /* The TEST_ASSERT_SUCCESS call above should change too... */
1182             if(U_SUCCESS(status)) {
1183                 TEST_ASSERT(numFields == 5);
1184                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1185                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1186                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1187                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1188                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
1189                 TEST_ASSERT(fields[5] == NULL);
1190                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1191                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1192             }
1193         }
1194
1195         /*  Split with too few output strings available (2) */
1196         status = U_ZERO_ERROR;
1197         memset(fields, -1, sizeof(fields));
1198         numFields =
1199             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1200         TEST_ASSERT_SUCCESS(status);
1201
1202         /* The TEST_ASSERT_SUCCESS call above should change too... */
1203         if(U_SUCCESS(status)) {
1204             TEST_ASSERT(numFields == 2);
1205             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1206             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1207             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1208
1209             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1210             TEST_ASSERT(spaceNeeded == requiredCapacity);
1211         }
1212
1213         /*  Split with too few output strings available (3) */
1214         status = U_ZERO_ERROR;
1215         memset(fields, -1, sizeof(fields));
1216         numFields =
1217             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1218         TEST_ASSERT_SUCCESS(status);
1219
1220         /* The TEST_ASSERT_SUCCESS call above should change too... */
1221         if(U_SUCCESS(status)) {
1222             TEST_ASSERT(numFields == 3);
1223             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1224             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1225             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1226             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1227
1228             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1229             TEST_ASSERT(spaceNeeded == requiredCapacity);
1230         }
1231
1232         /*  Split with just enough output strings available (5) */
1233         status = U_ZERO_ERROR;
1234         memset(fields, -1, sizeof(fields));
1235         numFields =
1236             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1237         TEST_ASSERT_SUCCESS(status);
1238
1239         /* The TEST_ASSERT_SUCCESS call above should change too... */
1240         if(U_SUCCESS(status)) {
1241             TEST_ASSERT(numFields == 5);
1242             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1243             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1244             TEST_ASSERT_STRING(" second", fields[2], TRUE);
1245             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1246             TEST_ASSERT_STRING("  third", fields[4], TRUE);
1247             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1248
1249             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1250             TEST_ASSERT(spaceNeeded == requiredCapacity);
1251         }
1252
1253         /* Split, end of text is a field delimiter.   */
1254         status = U_ZERO_ERROR;
1255         sz = strlen("first <tag-a> second<tag-b>");
1256         uregex_setText(re, textToSplit, sz, &status);
1257         TEST_ASSERT_SUCCESS(status);
1258
1259         /* The TEST_ASSERT_SUCCESS call above should change too... */
1260         if(U_SUCCESS(status)) {
1261             memset(fields, -1, sizeof(fields));
1262             numFields =
1263                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1264             TEST_ASSERT_SUCCESS(status);
1265
1266             /* The TEST_ASSERT_SUCCESS call above should change too... */
1267             if(U_SUCCESS(status)) {
1268                 TEST_ASSERT(numFields == 5);
1269                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1270                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1271                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1272                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1273                 TEST_ASSERT_STRING("",        fields[4], TRUE);
1274                 TEST_ASSERT(fields[5] == NULL);
1275                 TEST_ASSERT(fields[8] == NULL);
1276                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1277                 spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1278                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1279             }
1280         }
1281
1282         uregex_close(re);
1283     }
1284
1285     /*
1286      * set/getTimeLimit
1287      */
1288      TEST_SETUP("abc$", "abcdef", 0);
1289      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1290      uregex_setTimeLimit(re, 1000, &status);
1291      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1292      TEST_ASSERT_SUCCESS(status);
1293      uregex_setTimeLimit(re, -1, &status);
1294      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1295      status = U_ZERO_ERROR;
1296      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1297      TEST_TEARDOWN;
1298
1299      /*
1300       * set/get Stack Limit
1301       */
1302      TEST_SETUP("abc$", "abcdef", 0);
1303      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1304      uregex_setStackLimit(re, 40000, &status);
1305      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1306      TEST_ASSERT_SUCCESS(status);
1307      uregex_setStackLimit(re, -1, &status);
1308      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1309      status = U_ZERO_ERROR;
1310      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1311      TEST_TEARDOWN;
1312
1313
1314      /*
1315       * Get/Set callback functions
1316       *     This test is copied from intltest regex/Callbacks
1317       *     The pattern and test data will run long enough to cause the callback
1318       *       to be invoked.  The nested '+' operators give exponential time
1319       *       behavior with increasing string length.
1320       */
1321      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1322      callBackContext cbInfo = {4, 0, 0};
1323      const void     *pContext   = &cbInfo;
1324      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1325
1326      /*  Getting the callback fn when it hasn't been set must return NULL  */
1327      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1328      TEST_ASSERT_SUCCESS(status);
1329      TEST_ASSERT(returnedFn == NULL);
1330      TEST_ASSERT(pContext == NULL);
1331
1332      /* Set thecallback and do a match.                                   */
1333      /* The callback function should record that it has been called.      */
1334      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1335      TEST_ASSERT_SUCCESS(status);
1336      TEST_ASSERT(cbInfo.numCalls == 0);
1337      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1338      TEST_ASSERT_SUCCESS(status);
1339      TEST_ASSERT(cbInfo.numCalls > 0);
1340
1341      /* Getting the callback should return the values that were set above.  */
1342      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1343      TEST_ASSERT(returnedFn == &TestCallbackFn);
1344      TEST_ASSERT(pContext == &cbInfo);
1345
1346      TEST_TEARDOWN;
1347 }
1348
1349
1350
1351 static void TestBug4315(void) {
1352     UErrorCode      theICUError = U_ZERO_ERROR;
1353     URegularExpression *theRegEx;
1354     UChar           *textBuff;
1355     const char      *thePattern;
1356     UChar            theString[100];
1357     UChar           *destFields[24];
1358     int32_t         neededLength1;
1359     int32_t         neededLength2;
1360
1361     int32_t         wordCount = 0;
1362     int32_t         destFieldsSize = 24;
1363
1364     thePattern  = "ck ";
1365     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1366
1367     /* open a regex */
1368     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1369     TEST_ASSERT_SUCCESS(theICUError);
1370
1371     /* set the input string */
1372     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1373     TEST_ASSERT_SUCCESS(theICUError);
1374
1375     /* split */
1376     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1377      *  error occurs! */
1378     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1379         destFieldsSize, &theICUError);
1380
1381     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1382     TEST_ASSERT(wordCount==3);
1383
1384     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1385     {
1386         theICUError = U_ZERO_ERROR;
1387         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1388         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1389             destFields, destFieldsSize, &theICUError);
1390         TEST_ASSERT(wordCount==3);
1391         TEST_ASSERT_SUCCESS(theICUError);
1392         TEST_ASSERT(neededLength1 == neededLength2);
1393         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1394         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1395         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1396         TEST_ASSERT(destFields[3] == NULL);
1397         free(textBuff);
1398     }
1399     uregex_close(theRegEx);
1400 }
1401
1402 /* Based on TestRegexCAPI() */
1403 static void TestUTextAPI(void) {
1404     UErrorCode           status = U_ZERO_ERROR;
1405     URegularExpression  *re;
1406     UText                patternText = UTEXT_INITIALIZER;
1407     UChar                pat[200];
1408     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1409
1410     /* Mimimalist open/close */
1411     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1412     re = uregex_openUText(&patternText, 0, 0, &status);
1413     if (U_FAILURE(status)) {
1414          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1415          utext_close(&patternText);
1416          return;
1417     }
1418     uregex_close(re);
1419
1420     /* Open with all flag values set */
1421     status = U_ZERO_ERROR;
1422     re = uregex_openUText(&patternText,
1423         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1424         0, &status);
1425     TEST_ASSERT_SUCCESS(status);
1426     uregex_close(re);
1427
1428     /* Open with an invalid flag */
1429     status = U_ZERO_ERROR;
1430     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1431     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1432     uregex_close(re);
1433
1434     /* open with an invalid parameter */
1435     status = U_ZERO_ERROR;
1436     re = uregex_openUText(NULL,
1437         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1438     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1439
1440     /*
1441      *  clone
1442      */
1443     {
1444         URegularExpression *clone1;
1445         URegularExpression *clone2;
1446         URegularExpression *clone3;
1447         UChar  testString1[30];
1448         UChar  testString2[30];
1449         UBool  result;
1450
1451
1452         status = U_ZERO_ERROR;
1453         re = uregex_openUText(&patternText, 0, 0, &status);
1454         TEST_ASSERT_SUCCESS(status);
1455         clone1 = uregex_clone(re, &status);
1456         TEST_ASSERT_SUCCESS(status);
1457         TEST_ASSERT(clone1 != NULL);
1458
1459         status = U_ZERO_ERROR;
1460         clone2 = uregex_clone(re, &status);
1461         TEST_ASSERT_SUCCESS(status);
1462         TEST_ASSERT(clone2 != NULL);
1463         uregex_close(re);
1464
1465         status = U_ZERO_ERROR;
1466         clone3 = uregex_clone(clone2, &status);
1467         TEST_ASSERT_SUCCESS(status);
1468         TEST_ASSERT(clone3 != NULL);
1469
1470         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1471         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1472
1473         status = U_ZERO_ERROR;
1474         uregex_setText(clone1, testString1, -1, &status);
1475         TEST_ASSERT_SUCCESS(status);
1476         result = uregex_lookingAt(clone1, 0, &status);
1477         TEST_ASSERT_SUCCESS(status);
1478         TEST_ASSERT(result==TRUE);
1479
1480         status = U_ZERO_ERROR;
1481         uregex_setText(clone2, testString2, -1, &status);
1482         TEST_ASSERT_SUCCESS(status);
1483         result = uregex_lookingAt(clone2, 0, &status);
1484         TEST_ASSERT_SUCCESS(status);
1485         TEST_ASSERT(result==FALSE);
1486         result = uregex_find(clone2, 0, &status);
1487         TEST_ASSERT_SUCCESS(status);
1488         TEST_ASSERT(result==TRUE);
1489
1490         uregex_close(clone1);
1491         uregex_close(clone2);
1492         uregex_close(clone3);
1493
1494     }
1495
1496     /*
1497      *  pattern() and patternText()
1498      */
1499     {
1500         const UChar  *resultPat;
1501         int32_t       resultLen;
1502         UText        *resultText;
1503         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1504         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1505         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1506         status = U_ZERO_ERROR;
1507
1508         utext_openUTF8(&patternText, str_hello, -1, &status);
1509         re = uregex_open(pat, -1, 0, NULL, &status);
1510         resultPat = uregex_pattern(re, &resultLen, &status);
1511         TEST_ASSERT_SUCCESS(status);
1512
1513         /* The TEST_ASSERT_SUCCESS above should change too... */
1514         if (U_SUCCESS(status)) {
1515             TEST_ASSERT(resultLen == -1);
1516             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1517         }
1518
1519         resultText = uregex_patternUText(re, &status);
1520         TEST_ASSERT_SUCCESS(status);
1521         TEST_ASSERT_UTEXT(str_hello, resultText);
1522
1523         uregex_close(re);
1524
1525         status = U_ZERO_ERROR;
1526         re = uregex_open(pat, 3, 0, NULL, &status);
1527         resultPat = uregex_pattern(re, &resultLen, &status);
1528         TEST_ASSERT_SUCCESS(status);
1529
1530         /* The TEST_ASSERT_SUCCESS above should change too... */
1531         if (U_SUCCESS(status)) {
1532             TEST_ASSERT(resultLen == 3);
1533             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1534             TEST_ASSERT(u_strlen(resultPat) == 3);
1535         }
1536
1537         resultText = uregex_patternUText(re, &status);
1538         TEST_ASSERT_SUCCESS(status);
1539         TEST_ASSERT_UTEXT(str_hel, resultText);
1540
1541         uregex_close(re);
1542     }
1543
1544     /*
1545      *  setUText() and lookingAt()
1546      */
1547     {
1548         UText  text1 = UTEXT_INITIALIZER;
1549         UText  text2 = UTEXT_INITIALIZER;
1550         UBool  result;
1551         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1552         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1553         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1554         status = U_ZERO_ERROR;
1555         utext_openUTF8(&text1, str_abcccd, -1, &status);
1556         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1557
1558         utext_openUTF8(&patternText, str_abcd, -1, &status);
1559         re = uregex_openUText(&patternText, 0, NULL, &status);
1560         TEST_ASSERT_SUCCESS(status);
1561
1562         /* Operation before doing a setText should fail... */
1563         status = U_ZERO_ERROR;
1564         uregex_lookingAt(re, 0, &status);
1565         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1566
1567         status = U_ZERO_ERROR;
1568         uregex_setUText(re, &text1, &status);
1569         result = uregex_lookingAt(re, 0, &status);
1570         TEST_ASSERT(result == TRUE);
1571         TEST_ASSERT_SUCCESS(status);
1572
1573         status = U_ZERO_ERROR;
1574         uregex_setUText(re, &text2, &status);
1575         result = uregex_lookingAt(re, 0, &status);
1576         TEST_ASSERT(result == FALSE);
1577         TEST_ASSERT_SUCCESS(status);
1578
1579         status = U_ZERO_ERROR;
1580         uregex_setUText(re, &text1, &status);
1581         result = uregex_lookingAt(re, 0, &status);
1582         TEST_ASSERT(result == TRUE);
1583         TEST_ASSERT_SUCCESS(status);
1584
1585         uregex_close(re);
1586         utext_close(&text1);
1587         utext_close(&text2);
1588     }
1589
1590
1591     /*
1592      *  getText() and getUText()
1593      */
1594     {
1595         UText  text1 = UTEXT_INITIALIZER;
1596         UText  text2 = UTEXT_INITIALIZER;
1597         UChar  text2Chars[20];
1598         UText  *resultText;
1599         const UChar   *result;
1600         int32_t  textLength;
1601         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1602         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1603         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1604
1605
1606         status = U_ZERO_ERROR;
1607         utext_openUTF8(&text1, str_abcccd, -1, &status);
1608         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1609         utext_openUChars(&text2, text2Chars, -1, &status);
1610
1611         utext_openUTF8(&patternText, str_abcd, -1, &status);
1612         re = uregex_openUText(&patternText, 0, NULL, &status);
1613
1614         /* First set a UText */
1615         uregex_setUText(re, &text1, &status);
1616         resultText = uregex_getUText(re, NULL, &status);
1617         TEST_ASSERT_SUCCESS(status);
1618         TEST_ASSERT(resultText != &text1);
1619         utext_setNativeIndex(resultText, 0);
1620         utext_setNativeIndex(&text1, 0);
1621         TEST_ASSERT(testUTextEqual(resultText, &text1));
1622         utext_close(resultText);
1623
1624         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1625         (void)result;    /* Suppress set but not used warning. */
1626         TEST_ASSERT(textLength == -1 || textLength == 6);
1627         resultText = uregex_getUText(re, NULL, &status);
1628         TEST_ASSERT_SUCCESS(status);
1629         TEST_ASSERT(resultText != &text1);
1630         utext_setNativeIndex(resultText, 0);
1631         utext_setNativeIndex(&text1, 0);
1632         TEST_ASSERT(testUTextEqual(resultText, &text1));
1633         utext_close(resultText);
1634
1635         /* Then set a UChar * */
1636         uregex_setText(re, text2Chars, 7, &status);
1637         resultText = uregex_getUText(re, NULL, &status);
1638         TEST_ASSERT_SUCCESS(status);
1639         utext_setNativeIndex(resultText, 0);
1640         utext_setNativeIndex(&text2, 0);
1641         TEST_ASSERT(testUTextEqual(resultText, &text2));
1642         utext_close(resultText);
1643         result = uregex_getText(re, &textLength, &status);
1644         TEST_ASSERT(textLength == 7);
1645
1646         uregex_close(re);
1647         utext_close(&text1);
1648         utext_close(&text2);
1649     }
1650
1651     /*
1652      *  matches()
1653      */
1654     {
1655         UText   text1 = UTEXT_INITIALIZER;
1656         UBool   result;
1657         UText   nullText = UTEXT_INITIALIZER;
1658         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1659         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1660
1661         status = U_ZERO_ERROR;
1662         utext_openUTF8(&text1, str_abcccde, -1, &status);
1663         utext_openUTF8(&patternText, str_abcd, -1, &status);
1664         re = uregex_openUText(&patternText, 0, NULL, &status);
1665
1666         uregex_setUText(re, &text1, &status);
1667         result = uregex_matches(re, 0, &status);
1668         TEST_ASSERT(result == FALSE);
1669         TEST_ASSERT_SUCCESS(status);
1670         uregex_close(re);
1671
1672         status = U_ZERO_ERROR;
1673         re = uregex_openC(".?", 0, NULL, &status);
1674         uregex_setUText(re, &text1, &status);
1675         result = uregex_matches(re, 7, &status);
1676         TEST_ASSERT(result == TRUE);
1677         TEST_ASSERT_SUCCESS(status);
1678
1679         status = U_ZERO_ERROR;
1680         utext_openUTF8(&nullText, "", -1, &status);
1681         uregex_setUText(re, &nullText, &status);
1682         TEST_ASSERT_SUCCESS(status);
1683         result = uregex_matches(re, 0, &status);
1684         TEST_ASSERT(result == TRUE);
1685         TEST_ASSERT_SUCCESS(status);
1686
1687         uregex_close(re);
1688         utext_close(&text1);
1689         utext_close(&nullText);
1690     }
1691
1692
1693     /*
1694      *  lookingAt()    Used in setText test.
1695      */
1696
1697
1698     /*
1699      *  find(), findNext, start, end, reset
1700      */
1701     {
1702         UChar    text1[50];
1703         UBool    result;
1704         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
1705         status = U_ZERO_ERROR;
1706         re = uregex_openC("rx", 0, NULL, &status);
1707
1708         uregex_setText(re, text1, -1, &status);
1709         result = uregex_find(re, 0, &status);
1710         TEST_ASSERT(result == TRUE);
1711         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1712         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1713         TEST_ASSERT_SUCCESS(status);
1714
1715         result = uregex_find(re, 9, &status);
1716         TEST_ASSERT(result == TRUE);
1717         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1718         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1719         TEST_ASSERT_SUCCESS(status);
1720
1721         result = uregex_find(re, 14, &status);
1722         TEST_ASSERT(result == FALSE);
1723         TEST_ASSERT_SUCCESS(status);
1724
1725         status = U_ZERO_ERROR;
1726         uregex_reset(re, 0, &status);
1727
1728         result = uregex_findNext(re, &status);
1729         TEST_ASSERT(result == TRUE);
1730         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1731         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1732         TEST_ASSERT_SUCCESS(status);
1733
1734         result = uregex_findNext(re, &status);
1735         TEST_ASSERT(result == TRUE);
1736         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1737         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1738         TEST_ASSERT_SUCCESS(status);
1739
1740         status = U_ZERO_ERROR;
1741         uregex_reset(re, 12, &status);
1742
1743         result = uregex_findNext(re, &status);
1744         TEST_ASSERT(result == TRUE);
1745         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1746         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1747         TEST_ASSERT_SUCCESS(status);
1748
1749         result = uregex_findNext(re, &status);
1750         TEST_ASSERT(result == FALSE);
1751         TEST_ASSERT_SUCCESS(status);
1752
1753         uregex_close(re);
1754     }
1755
1756     /*
1757      *  groupUText()
1758      */
1759     {
1760         UChar    text1[80];
1761         UText   *actual;
1762         UBool    result;
1763         int64_t  groupLen = 0;
1764         UChar    groupBuf[20];
1765
1766         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
1767
1768         status = U_ZERO_ERROR;
1769         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1770         TEST_ASSERT_SUCCESS(status);
1771
1772         uregex_setText(re, text1, -1, &status);
1773         result = uregex_find(re, 0, &status);
1774         TEST_ASSERT(result==TRUE);
1775
1776         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1777         status = U_ZERO_ERROR;
1778         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1779         TEST_ASSERT_SUCCESS(status);
1780
1781         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
1782         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
1783         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1784
1785         TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1786         utext_close(actual);
1787
1788         /*  Capture group #1.  Should succeed. */
1789         status = U_ZERO_ERROR;
1790
1791         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1792         TEST_ASSERT_SUCCESS(status);
1793         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
1794                                                            /*    (within the string text1)           */
1795         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
1796         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1797         TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1798
1799         utext_close(actual);
1800
1801         /*  Capture group out of range.  Error. */
1802         status = U_ZERO_ERROR;
1803         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1804         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1805         utext_close(actual);
1806
1807         uregex_close(re);
1808     }
1809
1810     /*
1811      *  replaceFirst()
1812      */
1813     {
1814         UChar    text1[80];
1815         UChar    text2[80];
1816         UText    replText = UTEXT_INITIALIZER;
1817         UText   *result;
1818         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1819         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1820         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1821                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1822         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1823         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1824         status = U_ZERO_ERROR;
1825         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1826         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1827         utext_openUTF8(&replText, str_1x, -1, &status);
1828
1829         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1830         TEST_ASSERT_SUCCESS(status);
1831
1832         /*  Normal case, with match */
1833         uregex_setText(re, text1, -1, &status);
1834         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1835         TEST_ASSERT_SUCCESS(status);
1836         TEST_ASSERT_UTEXT(str_Replxxx, result);
1837         utext_close(result);
1838
1839         /* No match.  Text should copy to output with no changes.  */
1840         uregex_setText(re, text2, -1, &status);
1841         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1842         TEST_ASSERT_SUCCESS(status);
1843         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1844         utext_close(result);
1845
1846         /* Unicode escapes */
1847         uregex_setText(re, text1, -1, &status);
1848         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1849         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1850         TEST_ASSERT_SUCCESS(status);
1851         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1852         utext_close(result);
1853
1854         uregex_close(re);
1855         utext_close(&replText);
1856     }
1857
1858
1859     /*
1860      *  replaceAll()
1861      */
1862     {
1863         UChar    text1[80];
1864         UChar    text2[80];
1865         UText    replText = UTEXT_INITIALIZER;
1866         UText   *result;
1867         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1868         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1869         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1870         status = U_ZERO_ERROR;
1871         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1872         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1873         utext_openUTF8(&replText, str_1, -1, &status);
1874
1875         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1876         TEST_ASSERT_SUCCESS(status);
1877
1878         /*  Normal case, with match */
1879         uregex_setText(re, text1, -1, &status);
1880         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1881         TEST_ASSERT_SUCCESS(status);
1882         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1883         utext_close(result);
1884
1885         /* No match.  Text should copy to output with no changes.  */
1886         uregex_setText(re, text2, -1, &status);
1887         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1888         TEST_ASSERT_SUCCESS(status);
1889         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1890         utext_close(result);
1891
1892         uregex_close(re);
1893         utext_close(&replText);
1894     }
1895
1896
1897     /*
1898      *  appendReplacement()
1899      */
1900     {
1901         UChar    text[100];
1902         UChar    repl[100];
1903         UChar    buf[100];
1904         UChar   *bufPtr;
1905         int32_t  bufCap;
1906
1907         status = U_ZERO_ERROR;
1908         re = uregex_openC(".*", 0, 0, &status);
1909         TEST_ASSERT_SUCCESS(status);
1910
1911         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1912         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1913         uregex_setText(re, text, -1, &status);
1914
1915         /* match covers whole target string */
1916         uregex_find(re, 0, &status);
1917         TEST_ASSERT_SUCCESS(status);
1918         bufPtr = buf;
1919         bufCap = UPRV_LENGTHOF(buf);
1920         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1921         TEST_ASSERT_SUCCESS(status);
1922         TEST_ASSERT_STRING("some other", buf, TRUE);
1923
1924         /* Match has \u \U escapes */
1925         uregex_find(re, 0, &status);
1926         TEST_ASSERT_SUCCESS(status);
1927         bufPtr = buf;
1928         bufCap = UPRV_LENGTHOF(buf);
1929         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1930         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1931         TEST_ASSERT_SUCCESS(status);
1932         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1933
1934         uregex_close(re);
1935     }
1936
1937
1938     /*
1939      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1940      */
1941
1942     /*
1943      *  splitUText()
1944      */
1945     {
1946         UChar    textToSplit[80];
1947         UChar    text2[80];
1948         UText    *fields[10];
1949         int32_t  numFields;
1950         int32_t i;
1951
1952         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1953         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1954
1955         status = U_ZERO_ERROR;
1956         re = uregex_openC(":", 0, NULL, &status);
1957
1958
1959         /*  Simple split */
1960
1961         uregex_setText(re, textToSplit, -1, &status);
1962         TEST_ASSERT_SUCCESS(status);
1963
1964         /* The TEST_ASSERT_SUCCESS call above should change too... */
1965         if (U_SUCCESS(status)) {
1966             memset(fields, 0, sizeof(fields));
1967             numFields = uregex_splitUText(re, fields, 10, &status);
1968             TEST_ASSERT_SUCCESS(status);
1969
1970             /* The TEST_ASSERT_SUCCESS call above should change too... */
1971             if(U_SUCCESS(status)) {
1972               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1973               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1974               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1975                 TEST_ASSERT(numFields == 3);
1976                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1977                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1978                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1979                 TEST_ASSERT(fields[3] == NULL);
1980             }
1981             for(i = 0; i < numFields; i++) {
1982                 utext_close(fields[i]);
1983             }
1984         }
1985
1986         uregex_close(re);
1987
1988
1989         /*  Split with too few output strings available */
1990         status = U_ZERO_ERROR;
1991         re = uregex_openC(":", 0, NULL, &status);
1992         uregex_setText(re, textToSplit, -1, &status);
1993         TEST_ASSERT_SUCCESS(status);
1994
1995         /* The TEST_ASSERT_SUCCESS call above should change too... */
1996         if(U_SUCCESS(status)) {
1997             fields[0] = NULL;
1998             fields[1] = NULL;
1999             fields[2] = &patternText;
2000             numFields = uregex_splitUText(re, fields, 2, &status);
2001             TEST_ASSERT_SUCCESS(status);
2002
2003             /* The TEST_ASSERT_SUCCESS call above should change too... */
2004             if(U_SUCCESS(status)) {
2005                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2006                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2007                 TEST_ASSERT(numFields == 2);
2008                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2009                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2010                 TEST_ASSERT(fields[2] == &patternText);
2011             }
2012             for(i = 0; i < numFields; i++) {
2013                 utext_close(fields[i]);
2014             }
2015         }
2016
2017         uregex_close(re);
2018     }
2019
2020     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2021      *                   comes out as additional fields.  */
2022     {
2023         UChar    textToSplit[80];
2024         UText    *fields[10];
2025         int32_t  numFields;
2026         int32_t i;
2027
2028         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
2029
2030         status = U_ZERO_ERROR;
2031         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2032
2033         uregex_setText(re, textToSplit, -1, &status);
2034         TEST_ASSERT_SUCCESS(status);
2035
2036         /* The TEST_ASSERT_SUCCESS call above should change too... */
2037         if(U_SUCCESS(status)) {
2038             memset(fields, 0, sizeof(fields));
2039             numFields = uregex_splitUText(re, fields, 10, &status);
2040             TEST_ASSERT_SUCCESS(status);
2041
2042             /* The TEST_ASSERT_SUCCESS call above should change too... */
2043             if(U_SUCCESS(status)) {
2044                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2045                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2046                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2047                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2048                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2049
2050                 TEST_ASSERT(numFields == 5);
2051                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2052                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2053                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2054                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2055                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2056                 TEST_ASSERT(fields[5] == NULL);
2057             }
2058             for(i = 0; i < numFields; i++) {
2059                 utext_close(fields[i]);
2060             }
2061         }
2062
2063         /*  Split with too few output strings available (2) */
2064         status = U_ZERO_ERROR;
2065         fields[0] = NULL;
2066         fields[1] = NULL;
2067         fields[2] = &patternText;
2068         numFields = uregex_splitUText(re, fields, 2, &status);
2069         TEST_ASSERT_SUCCESS(status);
2070
2071         /* The TEST_ASSERT_SUCCESS call above should change too... */
2072         if(U_SUCCESS(status)) {
2073             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2074             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2075             TEST_ASSERT(numFields == 2);
2076             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2077             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2078             TEST_ASSERT(fields[2] == &patternText);
2079         }
2080         for(i = 0; i < numFields; i++) {
2081             utext_close(fields[i]);
2082         }
2083
2084
2085         /*  Split with too few output strings available (3) */
2086         status = U_ZERO_ERROR;
2087         fields[0] = NULL;
2088         fields[1] = NULL;
2089         fields[2] = NULL;
2090         fields[3] = &patternText;
2091         numFields = uregex_splitUText(re, fields, 3, &status);
2092         TEST_ASSERT_SUCCESS(status);
2093
2094         /* The TEST_ASSERT_SUCCESS call above should change too... */
2095         if(U_SUCCESS(status)) {
2096             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2097             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2098             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2099             TEST_ASSERT(numFields == 3);
2100             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2101             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2102             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2103             TEST_ASSERT(fields[3] == &patternText);
2104         }
2105         for(i = 0; i < numFields; i++) {
2106             utext_close(fields[i]);
2107         }
2108
2109         /*  Split with just enough output strings available (5) */
2110         status = U_ZERO_ERROR;
2111         fields[0] = NULL;
2112         fields[1] = NULL;
2113         fields[2] = NULL;
2114         fields[3] = NULL;
2115         fields[4] = NULL;
2116         fields[5] = &patternText;
2117         numFields = uregex_splitUText(re, fields, 5, &status);
2118         TEST_ASSERT_SUCCESS(status);
2119
2120         /* The TEST_ASSERT_SUCCESS call above should change too... */
2121         if(U_SUCCESS(status)) {
2122             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2123             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2124             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2125             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2126             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2127
2128             TEST_ASSERT(numFields == 5);
2129             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2130             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2131             TEST_ASSERT_UTEXT(str_second, fields[2]);
2132             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2133             TEST_ASSERT_UTEXT(str_third, fields[4]);
2134             TEST_ASSERT(fields[5] == &patternText);
2135         }
2136         for(i = 0; i < numFields; i++) {
2137             utext_close(fields[i]);
2138         }
2139
2140         /* Split, end of text is a field delimiter.   */
2141         status = U_ZERO_ERROR;
2142         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2143         TEST_ASSERT_SUCCESS(status);
2144
2145         /* The TEST_ASSERT_SUCCESS call above should change too... */
2146         if(U_SUCCESS(status)) {
2147             memset(fields, 0, sizeof(fields));
2148             fields[9] = &patternText;
2149             numFields = uregex_splitUText(re, fields, 9, &status);
2150             TEST_ASSERT_SUCCESS(status);
2151
2152             /* The TEST_ASSERT_SUCCESS call above should change too... */
2153             if(U_SUCCESS(status)) {
2154                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2155                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2156                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2157                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2158                 const char str_empty[] = { 0x00 };
2159
2160                 TEST_ASSERT(numFields == 5);
2161                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2162                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2163                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2164                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2165                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2166                 TEST_ASSERT(fields[5] == NULL);
2167                 TEST_ASSERT(fields[8] == NULL);
2168                 TEST_ASSERT(fields[9] == &patternText);
2169             }
2170             for(i = 0; i < numFields; i++) {
2171                 utext_close(fields[i]);
2172             }
2173         }
2174
2175         uregex_close(re);
2176     }
2177     utext_close(&patternText);
2178 }
2179
2180
2181 static void TestRefreshInput(void) {
2182     /*
2183      *  RefreshInput changes out the input of a URegularExpression without
2184      *    changing anything else in the match state.  Used with Java JNI,
2185      *    when Java moves the underlying string storage.   This test
2186      *    runs a find() loop, moving the text after the first match.
2187      *    The right number of matches should still be found.
2188      */
2189     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2190     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2191     UErrorCode status = U_ZERO_ERROR;
2192     URegularExpression *re;
2193     UText ut1 = UTEXT_INITIALIZER;
2194     UText ut2 = UTEXT_INITIALIZER;
2195
2196     re = uregex_openC("[ABC]", 0, 0, &status);
2197     TEST_ASSERT_SUCCESS(status);
2198
2199     utext_openUChars(&ut1, testStr, -1, &status);
2200     TEST_ASSERT_SUCCESS(status);
2201     uregex_setUText(re, &ut1, &status);
2202     TEST_ASSERT_SUCCESS(status);
2203
2204     /* Find the first match "A" in the original string */
2205     TEST_ASSERT(uregex_findNext(re, &status));
2206     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2207
2208     /* Move the string, kill the original string.  */
2209     u_strcpy(movedStr, testStr);
2210     u_memset(testStr, 0, u_strlen(testStr));
2211     utext_openUChars(&ut2, movedStr, -1, &status);
2212     TEST_ASSERT_SUCCESS(status);
2213     uregex_refreshUText(re, &ut2, &status);
2214     TEST_ASSERT_SUCCESS(status);
2215
2216     /* Find the following two matches, now working in the moved string. */
2217     TEST_ASSERT(uregex_findNext(re, &status));
2218     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2219     TEST_ASSERT(uregex_findNext(re, &status));
2220     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2221     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2222
2223     uregex_close(re);
2224 }
2225
2226
2227 static void TestBug8421(void) {
2228     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2229      *             was failing.
2230      */
2231     URegularExpression *re;
2232     UErrorCode status = U_ZERO_ERROR;
2233     int32_t  limit = -1;
2234
2235     re = uregex_openC("abc", 0, 0, &status);
2236     TEST_ASSERT_SUCCESS(status);
2237
2238     limit = uregex_getTimeLimit(re, &status);
2239     TEST_ASSERT_SUCCESS(status);
2240     TEST_ASSERT(limit == 0);
2241
2242     uregex_setTimeLimit(re, 100, &status);
2243     TEST_ASSERT_SUCCESS(status);
2244     limit = uregex_getTimeLimit(re, &status);
2245     TEST_ASSERT_SUCCESS(status);
2246     TEST_ASSERT(limit == 100);
2247
2248     uregex_close(re);
2249 }
2250
2251 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2252     return FALSE;
2253 }
2254
2255 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2256     return FALSE;
2257 }
2258
2259 static void TestBug10815() {
2260   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2261    *              when the callback function specified by uregex_setMatchCallback() returns FALSE
2262    */
2263     URegularExpression *re;
2264     UErrorCode status = U_ZERO_ERROR;
2265     UChar    text[100];
2266
2267
2268     // findNext() with a find progress callback function.
2269
2270     re = uregex_openC(".z", 0, 0, &status);
2271     TEST_ASSERT_SUCCESS(status);
2272
2273     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
2274     uregex_setText(re, text, -1, &status);
2275     TEST_ASSERT_SUCCESS(status);
2276
2277     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2278     TEST_ASSERT_SUCCESS(status);
2279
2280     uregex_findNext(re, &status);
2281     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2282
2283     uregex_close(re);
2284
2285     // findNext() with a match progress callback function.
2286
2287     status = U_ZERO_ERROR;
2288     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2289     TEST_ASSERT_SUCCESS(status);
2290
2291     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2292     // it will appear to be stuck in a (near) infinite loop.
2293     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
2294     uregex_setText(re, text, -1, &status);
2295     TEST_ASSERT_SUCCESS(status);
2296
2297     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2298     TEST_ASSERT_SUCCESS(status);
2299
2300     uregex_findNext(re, &status);
2301     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2302
2303     uregex_close(re);
2304 }
2305
2306
2307 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */