icuSources/test/cintltst/reapits.c

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /********************************************************************
   4  * COPYRIGHT:
   5  * Copyright (c) 2004-2015, International Business Machines Corporation and
   6  * others. All Rights Reserved.
   7  ********************************************************************/
   8 /********************************************************************************
   9 *
  10 * File reapits.c
  11 *
  12 *********************************************************************************/
  13 /*C API TEST FOR Regular Expressions */
  14 /**
  15 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
  16 *   try to test the full functionality.  It just calls each function and verifies that it
  17 *   works on a basic level.
  18 *
  19 *   More complete testing of regular expression functionality is done with the C++ tests.
  20 **/
  21
  22 #include "unicode/utypes.h"
  23
  24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
  25
  26 #include <stdlib.h>
  27 #include <string.h>
  28 #include "unicode/uloc.h"
  29 #include "unicode/uregex.h"
  30 #include "unicode/ustring.h"
  31 #include "unicode/utext.h"
  32 #include "unicode/utf8.h"
  33 #include "cintltst.h"
  34 #include "cmemory.h"
  35
  36 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
  37     if (U_FAILURE(status)) { \
  38         log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); \
  39     } \
  40 } UPRV_BLOCK_MACRO_END
  41
  42 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
  43     if ((expr)==FALSE) { \
  44         log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr); \
  45     } \
  46 } UPRV_BLOCK_MACRO_END
  47
  48 /*
  49  *   TEST_SETUP and TEST_TEARDOWN
  50  *         macros to handle the boilerplate around setting up regex test cases.
  51  *         parameteres to setup:
  52  *              pattern:     The regex pattern, a (char *) null terminated C string.
  53  *              testString:  The string data, also a (char *) C string.
  54  *              flags:       Regex flags to set when compiling the pattern
  55  *
  56  *         Put arbitrary test code between SETUP and TEARDOWN.
  57  *         're" is the compiled, ready-to-go  regular expression.
  58  */
  59 #define TEST_SETUP(pattern, testString, flags) UPRV_BLOCK_MACRO_BEGIN { \
  60     UChar   *srcString = NULL;  \
  61     status = U_ZERO_ERROR; \
  62     re = uregex_openC(pattern, flags, NULL, &status);  \
  63     TEST_ASSERT_SUCCESS(status);   \
  64     int32_t testStringLen = (int32_t)strlen(testString); \
  65     srcString = (UChar *)malloc( (testStringLen + 2) * sizeof(UChar) ); \
  66     u_uastrncpy(srcString, testString, testStringLen + 1); \
  67     uregex_setText(re, srcString, -1, &status); \
  68     TEST_ASSERT_SUCCESS(status);  \
  69     if (U_SUCCESS(status)) { \
  70         UPRV_BLOCK_MACRO_BEGIN {} UPRV_BLOCK_MACRO_END
  71
  72 #define TEST_TEARDOWN  \
  73     }  \
  74     TEST_ASSERT_SUCCESS(status);  \
  75     uregex_close(re);  \
  76     free(srcString);   \
  77 } UPRV_BLOCK_MACRO_END
  78
  79
  80 /**
  81  * @param expected utf-8 array of bytes to be expected
  82  */
  83 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
  84      char     buf_inside_macro[120];
  85      int32_t  len = (int32_t)strlen(expected);
  86      UBool    success;
  87      if (nulTerm) {
  88          u_austrncpy(buf_inside_macro, (actual), len+1);
  89          buf_inside_macro[len+2] = 0;
  90          success = (strcmp((expected), buf_inside_macro) == 0);
  91      } else {
  92          u_austrncpy(buf_inside_macro, (actual), len);
  93          buf_inside_macro[len+1] = 0;
  94          success = (strncmp((expected), buf_inside_macro, len) == 0);
  95      }
  96      if (success == FALSE) {
  97          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
  98              file, line, (expected), buf_inside_macro);
  99      }
 100 }
 101
 102 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
 103
 104
 105 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
 106     int32_t u8i = 0;
 107     UChar32 u8c = 0;
 108     UChar32 utc = 0;
 109     UBool   stringsEqual = TRUE;
 110     utext_setNativeIndex(utext, 0);
 111     for (;;) {
 112         U8_NEXT_UNSAFE(utf8, u8i, u8c);
 113         utc = utext_next32(utext);
 114         if (u8c == 0 && utc == U_SENTINEL) {
 115             break;
 116         }
 117         if (u8c != utc || u8c == 0) {
 118             stringsEqual = FALSE;
 119             break;
 120         }
 121     }
 122     return stringsEqual;
 123 }
 124
 125
 126 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
 127     utext_setNativeIndex(actual, 0);
 128     if (!equals_utf8_utext(expected, actual)) {
 129         UChar32 c;
 130         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
 131         c = utext_next32From(actual, 0);
 132         while (c != U_SENTINEL) {
 133             if (0x20<c && c <0x7e) {
 134                 log_err("%c", c);
 135             } else {
 136                 log_err("%#x", c);
 137             }
 138             c = UTEXT_NEXT32(actual);
 139         }
 140         log_err("\"\n");
 141     }
 142 }
 143
 144 /*
 145  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
 146  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
 147  */
 148 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
 149
 150 static UBool testUTextEqual(UText *uta, UText *utb) {
 151     UChar32 ca = 0;
 152     UChar32 cb = 0;
 153     utext_setNativeIndex(uta, 0);
 154     utext_setNativeIndex(utb, 0);
 155     do {
 156         ca = utext_next32(uta);
 157         cb = utext_next32(utb);
 158         if (ca != cb) {
 159             break;
 160         }
 161     } while (ca != U_SENTINEL);
 162     return ca == cb;
 163 }
 164
 165
 166
 167
 168 static void TestRegexCAPI(void);
 169 static void TestBug4315(void);
 170 static void TestUTextAPI(void);
 171 static void TestRefreshInput(void);
 172 static void TestBug8421(void);
 173 static void TestBug10815(void);
 174 static void TestMatchStartLineWithEmptyText(void);
 175
 176 void addURegexTest(TestNode** root);
 177
 178 void addURegexTest(TestNode** root)
 179 {
 180     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
 181     addTest(root, &TestBug4315,   "regex/TestBug4315");
 182     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
 183     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
 184     addTest(root, &TestBug8421,   "regex/TestBug8421");
 185     addTest(root, &TestBug10815,   "regex/TestBug10815");
 186     addTest(root, &TestMatchStartLineWithEmptyText,   "regex/TestMatchStartLineWithEmptyText");
 187 }
 188
 189 /*
 190  * Call back function and context struct used for testing
 191  *    regular expression user callbacks.  This test is mostly the same as
 192  *   the corresponding C++ test in intltest.
 193  */
 194 typedef struct callBackContext {
 195     int32_t          maxCalls;
 196     int32_t          numCalls;
 197     int32_t          lastSteps;
 198 } callBackContext;
 199
 200 static UBool U_EXPORT2 U_CALLCONV
 201 TestCallbackFn(const void *context, int32_t steps) {
 202   callBackContext  *info = (callBackContext *)context;
 203   if (info->lastSteps+1 != steps) {
 204       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
 205   }
 206   info->lastSteps = steps;
 207   info->numCalls++;
 208   return (info->numCalls < info->maxCalls);
 209 }
 210
 211 /*
 212  *   Regular Expression C API Tests
 213  */
 214 static void TestRegexCAPI(void) {
 215     UErrorCode           status = U_ZERO_ERROR;
 216     URegularExpression  *re;
 217     UChar                pat[200];
 218     UChar               *minus1;
 219
 220     memset(&minus1, -1, sizeof(minus1));
 221
 222     /* Mimimalist open/close */
 223     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
 224     re = uregex_open(pat, -1, 0, 0, &status);
 225     if (U_FAILURE(status)) {
 226          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
 227          return;
 228     }
 229     uregex_close(re);
 230
 231     /* Open with all flag values set */
 232     status = U_ZERO_ERROR;
 233     re = uregex_open(pat, -1,
 234         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
 235         0, &status);
 236     TEST_ASSERT_SUCCESS(status);
 237     uregex_close(re);
 238
 239     /* Open with an invalid flag */
 240     status = U_ZERO_ERROR;
 241     re = uregex_open(pat, -1, 0x40000000, 0, &status);
 242     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
 243     uregex_close(re);
 244
 245     /* Open with an unimplemented flag */
 246     status = U_ZERO_ERROR;
 247     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
 248     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
 249     uregex_close(re);
 250
 251     /* openC with an invalid parameter */
 252     status = U_ZERO_ERROR;
 253     re = uregex_openC(NULL,
 254         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 255     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
 256
 257     /* openC with an invalid parameter */
 258     status = U_USELESS_COLLATOR_ERROR;
 259     re = uregex_openC(NULL,
 260         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
 261     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
 262
 263     /* openC   open from a C string */
 264     {
 265         const UChar   *p;
 266         int32_t  len;
 267         status = U_ZERO_ERROR;
 268         re = uregex_openC("abc*", 0, 0, &status);
 269         TEST_ASSERT_SUCCESS(status);
 270         p = uregex_pattern(re, &len, &status);
 271         TEST_ASSERT_SUCCESS(status);
 272
 273         /* The TEST_ASSERT_SUCCESS above should change too... */
 274         if(U_SUCCESS(status)) {
 275             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
 276             TEST_ASSERT(u_strcmp(pat, p) == 0);
 277             TEST_ASSERT(len==(int32_t)strlen("abc*"));
 278         }
 279
 280         uregex_close(re);
 281
 282         /*  TODO:  Open with ParseError parameter */
 283     }
 284
 285     /*
 286      *  clone
 287      */
 288     {
 289         URegularExpression *clone1;
 290         URegularExpression *clone2;
 291         URegularExpression *clone3;
 292         UChar  testString1[30];
 293         UChar  testString2[30];
 294         UBool  result;
 295
 296
 297         status = U_ZERO_ERROR;
 298         re = uregex_openC("abc*", 0, 0, &status);
 299         TEST_ASSERT_SUCCESS(status);
 300         clone1 = uregex_clone(re, &status);
 301         TEST_ASSERT_SUCCESS(status);
 302         TEST_ASSERT(clone1 != NULL);
 303
 304         status = U_ZERO_ERROR;
 305         clone2 = uregex_clone(re, &status);
 306         TEST_ASSERT_SUCCESS(status);
 307         TEST_ASSERT(clone2 != NULL);
 308         uregex_close(re);
 309
 310         status = U_ZERO_ERROR;
 311         clone3 = uregex_clone(clone2, &status);
 312         TEST_ASSERT_SUCCESS(status);
 313         TEST_ASSERT(clone3 != NULL);
 314
 315         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
 316         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
 317
 318         status = U_ZERO_ERROR;
 319         uregex_setText(clone1, testString1, -1, &status);
 320         TEST_ASSERT_SUCCESS(status);
 321         result = uregex_lookingAt(clone1, 0, &status);
 322         TEST_ASSERT_SUCCESS(status);
 323         TEST_ASSERT(result==TRUE);
 324
 325         status = U_ZERO_ERROR;
 326         uregex_setText(clone2, testString2, -1, &status);
 327         TEST_ASSERT_SUCCESS(status);
 328         result = uregex_lookingAt(clone2, 0, &status);
 329         TEST_ASSERT_SUCCESS(status);
 330         TEST_ASSERT(result==FALSE);
 331         result = uregex_find(clone2, 0, &status);
 332         TEST_ASSERT_SUCCESS(status);
 333         TEST_ASSERT(result==TRUE);
 334
 335         uregex_close(clone1);
 336         uregex_close(clone2);
 337         uregex_close(clone3);
 338
 339     }
 340
 341     /*
 342      *  pattern()
 343     */
 344     {
 345         const UChar  *resultPat;
 346         int32_t       resultLen;
 347         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
 348         status = U_ZERO_ERROR;
 349         re = uregex_open(pat, -1, 0, NULL, &status);
 350         resultPat = uregex_pattern(re, &resultLen, &status);
 351         TEST_ASSERT_SUCCESS(status);
 352
 353         /* The TEST_ASSERT_SUCCESS above should change too... */
 354         if (U_SUCCESS(status)) {
 355             TEST_ASSERT(resultLen == -1);
 356             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
 357         }
 358
 359         uregex_close(re);
 360
 361         status = U_ZERO_ERROR;
 362         re = uregex_open(pat, 3, 0, NULL, &status);
 363         resultPat = uregex_pattern(re, &resultLen, &status);
 364         TEST_ASSERT_SUCCESS(status);
 365         TEST_ASSERT_SUCCESS(status);
 366
 367         /* The TEST_ASSERT_SUCCESS above should change too... */
 368         if (U_SUCCESS(status)) {
 369             TEST_ASSERT(resultLen == 3);
 370             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
 371             TEST_ASSERT(u_strlen(resultPat) == 3);
 372         }
 373
 374         uregex_close(re);
 375     }
 376
 377     /*
 378      *  flags()
 379      */
 380     {
 381         int32_t  t;
 382
 383         status = U_ZERO_ERROR;
 384         re = uregex_open(pat, -1, 0, NULL, &status);
 385         t  = uregex_flags(re, &status);
 386         TEST_ASSERT_SUCCESS(status);
 387         TEST_ASSERT(t == 0);
 388         uregex_close(re);
 389
 390         status = U_ZERO_ERROR;
 391         re = uregex_open(pat, -1, 0, NULL, &status);
 392         t  = uregex_flags(re, &status);
 393         TEST_ASSERT_SUCCESS(status);
 394         TEST_ASSERT(t == 0);
 395         uregex_close(re);
 396
 397         status = U_ZERO_ERROR;
 398         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
 399         t  = uregex_flags(re, &status);
 400         TEST_ASSERT_SUCCESS(status);
 401         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
 402         uregex_close(re);
 403     }
 404
 405     /*
 406      *  setText() and lookingAt()
 407      */
 408     {
 409         UChar  text1[50];
 410         UChar  text2[50];
 411         UBool  result;
 412
 413         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
 414         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
 415         status = U_ZERO_ERROR;
 416         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
 417         re = uregex_open(pat, -1, 0, NULL, &status);
 418         TEST_ASSERT_SUCCESS(status);
 419
 420         /* Operation before doing a setText should fail... */
 421         status = U_ZERO_ERROR;
 422         uregex_lookingAt(re, 0, &status);
 423         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
 424
 425         status = U_ZERO_ERROR;
 426         uregex_setText(re, text1, -1, &status);
 427         result = uregex_lookingAt(re, 0, &status);
 428         TEST_ASSERT(result == TRUE);
 429         TEST_ASSERT_SUCCESS(status);
 430
 431         status = U_ZERO_ERROR;
 432         uregex_setText(re, text2, -1, &status);
 433         result = uregex_lookingAt(re, 0, &status);
 434         TEST_ASSERT(result == FALSE);
 435         TEST_ASSERT_SUCCESS(status);
 436
 437         status = U_ZERO_ERROR;
 438         uregex_setText(re, text1, -1, &status);
 439         result = uregex_lookingAt(re, 0, &status);
 440         TEST_ASSERT(result == TRUE);
 441         TEST_ASSERT_SUCCESS(status);
 442
 443         status = U_ZERO_ERROR;
 444         uregex_setText(re, text1, 5, &status);
 445         result = uregex_lookingAt(re, 0, &status);
 446         TEST_ASSERT(result == FALSE);
 447         TEST_ASSERT_SUCCESS(status);
 448
 449         status = U_ZERO_ERROR;
 450         uregex_setText(re, text1, 6, &status);
 451         result = uregex_lookingAt(re, 0, &status);
 452         TEST_ASSERT(result == TRUE);
 453         TEST_ASSERT_SUCCESS(status);
 454
 455         uregex_close(re);
 456     }
 457
 458
 459     /*
 460      *  getText()
 461      */
 462     {
 463         UChar    text1[50];
 464         UChar    text2[50];
 465         const UChar   *result;
 466         int32_t  textLength;
 467
 468         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
 469         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
 470         status = U_ZERO_ERROR;
 471         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
 472         re = uregex_open(pat, -1, 0, NULL, &status);
 473
 474         uregex_setText(re, text1, -1, &status);
 475         result = uregex_getText(re, &textLength, &status);
 476         TEST_ASSERT(result == text1);
 477         TEST_ASSERT(textLength == -1);
 478         TEST_ASSERT_SUCCESS(status);
 479
 480         status = U_ZERO_ERROR;
 481         uregex_setText(re, text2, 7, &status);
 482         result = uregex_getText(re, &textLength, &status);
 483         TEST_ASSERT(result == text2);
 484         TEST_ASSERT(textLength == 7);
 485         TEST_ASSERT_SUCCESS(status);
 486
 487         status = U_ZERO_ERROR;
 488         uregex_setText(re, text2, 4, &status);
 489         result = uregex_getText(re, &textLength, &status);
 490         TEST_ASSERT(result == text2);
 491         TEST_ASSERT(textLength == 4);
 492         TEST_ASSERT_SUCCESS(status);
 493         uregex_close(re);
 494     }
 495
 496     /*
 497      *  matches()
 498      */
 499     {
 500         UChar   text1[50];
 501         UBool   result;
 502         int     len;
 503         UChar   nullString[] = {0,0,0};
 504
 505         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
 506         status = U_ZERO_ERROR;
 507         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
 508         re = uregex_open(pat, -1, 0, NULL, &status);
 509
 510         uregex_setText(re, text1, -1, &status);
 511         result = uregex_matches(re, 0, &status);
 512         TEST_ASSERT(result == FALSE);
 513         TEST_ASSERT_SUCCESS(status);
 514
 515         status = U_ZERO_ERROR;
 516         uregex_setText(re, text1, 6, &status);
 517         result = uregex_matches(re, 0, &status);
 518         TEST_ASSERT(result == TRUE);
 519         TEST_ASSERT_SUCCESS(status);
 520
 521         status = U_ZERO_ERROR;
 522         uregex_setText(re, text1, 6, &status);
 523         result = uregex_matches(re, 1, &status);
 524         TEST_ASSERT(result == FALSE);
 525         TEST_ASSERT_SUCCESS(status);
 526         uregex_close(re);
 527
 528         status = U_ZERO_ERROR;
 529         re = uregex_openC(".?", 0, NULL, &status);
 530         uregex_setText(re, text1, -1, &status);
 531         len = u_strlen(text1);
 532         result = uregex_matches(re, len, &status);
 533         TEST_ASSERT(result == TRUE);
 534         TEST_ASSERT_SUCCESS(status);
 535
 536         status = U_ZERO_ERROR;
 537         uregex_setText(re, nullString, -1, &status);
 538         TEST_ASSERT_SUCCESS(status);
 539         result = uregex_matches(re, 0, &status);
 540         TEST_ASSERT(result == TRUE);
 541         TEST_ASSERT_SUCCESS(status);
 542         uregex_close(re);
 543     }
 544
 545
 546     /*
 547      *  lookingAt()    Used in setText test.
 548      */
 549
 550
 551     /*
 552      *  find(), findNext, start, end, reset
 553      */
 554     {
 555         UChar    text1[50];
 556         UBool    result;
 557         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
 558         status = U_ZERO_ERROR;
 559         re = uregex_openC("rx", 0, NULL, &status);
 560
 561         uregex_setText(re, text1, -1, &status);
 562         result = uregex_find(re, 0, &status);
 563         TEST_ASSERT(result == TRUE);
 564         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 565         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 566         TEST_ASSERT_SUCCESS(status);
 567
 568         result = uregex_find(re, 9, &status);
 569         TEST_ASSERT(result == TRUE);
 570         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
 571         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
 572         TEST_ASSERT_SUCCESS(status);
 573
 574         result = uregex_find(re, 14, &status);
 575         TEST_ASSERT(result == FALSE);
 576         TEST_ASSERT_SUCCESS(status);
 577
 578         status = U_ZERO_ERROR;
 579         uregex_reset(re, 0, &status);
 580
 581         result = uregex_findNext(re, &status);
 582         TEST_ASSERT(result == TRUE);
 583         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
 584         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
 585         TEST_ASSERT_SUCCESS(status);
 586
 587         result = uregex_findNext(re, &status);
 588         TEST_ASSERT(result == TRUE);
 589         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
 590         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
 591         TEST_ASSERT_SUCCESS(status);
 592
 593         status = U_ZERO_ERROR;
 594         uregex_reset(re, 12, &status);
 595
 596         result = uregex_findNext(re, &status);
 597         TEST_ASSERT(result == TRUE);
 598         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
 599         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
 600         TEST_ASSERT_SUCCESS(status);
 601
 602         result = uregex_findNext(re, &status);
 603         TEST_ASSERT(result == FALSE);
 604         TEST_ASSERT_SUCCESS(status);
 605
 606         uregex_close(re);
 607     }
 608
 609     /*
 610      *  groupCount
 611      */
 612     {
 613         int32_t result;
 614
 615         status = U_ZERO_ERROR;
 616         re = uregex_openC("abc", 0, NULL, &status);
 617         result = uregex_groupCount(re, &status);
 618         TEST_ASSERT_SUCCESS(status);
 619         TEST_ASSERT(result == 0);
 620         uregex_close(re);
 621
 622         status = U_ZERO_ERROR;
 623         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
 624         result = uregex_groupCount(re, &status);
 625         TEST_ASSERT_SUCCESS(status);
 626         TEST_ASSERT(result == 3);
 627         uregex_close(re);
 628
 629     }
 630
 631
 632     /*
 633      *  group()
 634      */
 635     {
 636         UChar    text1[80];
 637         UChar    buf[80];
 638         UBool    result;
 639         int32_t  resultSz;
 640         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
 641
 642         status = U_ZERO_ERROR;
 643         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
 644         TEST_ASSERT_SUCCESS(status);
 645
 646
 647         uregex_setText(re, text1, -1, &status);
 648         result = uregex_find(re, 0, &status);
 649         TEST_ASSERT(result==TRUE);
 650
 651         /*  Capture Group 0, the full match.  Should succeed.  */
 652         status = U_ZERO_ERROR;
 653         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
 654         TEST_ASSERT_SUCCESS(status);
 655         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
 656         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 657
 658         /*  Capture group #1.  Should succeed. */
 659         status = U_ZERO_ERROR;
 660         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
 661         TEST_ASSERT_SUCCESS(status);
 662         TEST_ASSERT_STRING(" interior ", buf, TRUE);
 663         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
 664
 665         /*  Capture group out of range.  Error. */
 666         status = U_ZERO_ERROR;
 667         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
 668         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
 669
 670         /* NULL buffer, pure pre-flight */
 671         status = U_ZERO_ERROR;
 672         resultSz = uregex_group(re, 0, NULL, 0, &status);
 673         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 674         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 675
 676         /* Too small buffer, truncated string */
 677         status = U_ZERO_ERROR;
 678         memset(buf, -1, sizeof(buf));
 679         resultSz = uregex_group(re, 0, buf, 5, &status);
 680         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 681         TEST_ASSERT_STRING("abc i", buf, FALSE);
 682         TEST_ASSERT(buf[5] == (UChar)0xffff);
 683         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 684
 685         /* Output string just fits buffer, no NUL term. */
 686         status = U_ZERO_ERROR;
 687         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
 688         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 689         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
 690         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
 691         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
 692
 693         uregex_close(re);
 694
 695     }
 696
 697     /*
 698      *  Regions
 699      */
 700
 701
 702         /* SetRegion(), getRegion() do something  */
 703         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 704         UChar resultString[40];
 705         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
 706         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
 707         uregex_setRegion(re, 3, 6, &status);
 708         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
 709         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
 710         TEST_ASSERT(uregex_findNext(re, &status));
 711         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3);
 712         TEST_ASSERT_STRING("345", resultString, TRUE);
 713         TEST_TEARDOWN;
 714
 715         /* find(start=-1) uses regions   */
 716         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 717         uregex_setRegion(re, 4, 6, &status);
 718         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 719         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 720         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 721         TEST_TEARDOWN;
 722
 723         /* find (start >=0) does not use regions   */
 724         TEST_SETUP(".*", "0123456789ABCDEF", 0);
 725         uregex_setRegion(re, 4, 6, &status);
 726         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 727         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 728         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 729         TEST_TEARDOWN;
 730
 731         /* findNext() obeys regions    */
 732         TEST_SETUP(".", "0123456789ABCDEF", 0);
 733         uregex_setRegion(re, 4, 6, &status);
 734         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
 735         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 736         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
 737         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
 738         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
 739         TEST_TEARDOWN;
 740
 741         /* matches(start=-1) uses regions                                           */
 742         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
 743         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 744         uregex_setRegion(re, 4, 6, &status);
 745         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
 746         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 747         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
 748         TEST_TEARDOWN;
 749
 750         /* matches (start >=0) does not use regions       */
 751         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 752         uregex_setRegion(re, 4, 6, &status);
 753         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
 754         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 755         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
 756         TEST_TEARDOWN;
 757
 758         /* lookingAt(start=-1) uses regions                                         */
 759         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
 760         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 761         uregex_setRegion(re, 4, 6, &status);
 762         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
 763         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
 764         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
 765         TEST_TEARDOWN;
 766
 767         /* lookingAt (start >=0) does not use regions  */
 768         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
 769         uregex_setRegion(re, 4, 6, &status);
 770         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
 771         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
 772         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
 773         TEST_TEARDOWN;
 774
 775         /* hitEnd()       */
 776         TEST_SETUP("[a-f]*", "abcdefghij", 0);
 777         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 778         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
 779         TEST_TEARDOWN;
 780
 781         TEST_SETUP("[a-f]*", "abcdef", 0);
 782         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 783         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
 784         TEST_TEARDOWN;
 785
 786         /* requireEnd   */
 787         TEST_SETUP("abcd", "abcd", 0);
 788         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 789         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
 790         TEST_TEARDOWN;
 791
 792         TEST_SETUP("abcd$", "abcd", 0);
 793         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
 794         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
 795         TEST_TEARDOWN;
 796
 797         /* anchoringBounds        */
 798         TEST_SETUP("abc$", "abcdef", 0);
 799         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
 800         uregex_useAnchoringBounds(re, FALSE, &status);
 801         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
 802
 803         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
 804         uregex_useAnchoringBounds(re, TRUE, &status);
 805         uregex_setRegion(re, 0, 3, &status);
 806         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
 807         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 808         TEST_TEARDOWN;
 809
 810         /* Transparent Bounds      */
 811         TEST_SETUP("abc(?=def)", "abcdef", 0);
 812         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
 813         uregex_useTransparentBounds(re, TRUE, &status);
 814         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
 815
 816         uregex_useTransparentBounds(re, FALSE, &status);
 817         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
 818         uregex_setRegion(re, 0, 3, &status);
 819         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
 820         uregex_useTransparentBounds(re, TRUE, &status);
 821         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
 822         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
 823         TEST_TEARDOWN;
 824
 825
 826     /*
 827      *  replaceFirst()
 828      */
 829     {
 830         UChar    text1[80];
 831         UChar    text2[80];
 832         UChar    replText[80];
 833         UChar    buf[80];
 834         int32_t  resultSz;
 835         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
 836         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
 837         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
 838
 839         status = U_ZERO_ERROR;
 840         re = uregex_openC("x(.*?)x", 0, NULL, &status);
 841         TEST_ASSERT_SUCCESS(status);
 842
 843         /*  Normal case, with match */
 844         uregex_setText(re, text1, -1, &status);
 845         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 846         TEST_ASSERT_SUCCESS(status);
 847         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
 848         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 849
 850         /* No match.  Text should copy to output with no changes.  */
 851         status = U_ZERO_ERROR;
 852         uregex_setText(re, text2, -1, &status);
 853         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 854         TEST_ASSERT_SUCCESS(status);
 855         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 856         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
 857
 858         /*  Match, output just fills buffer, no termination warning. */
 859         status = U_ZERO_ERROR;
 860         uregex_setText(re, text1, -1, &status);
 861         memset(buf, -1, sizeof(buf));
 862         resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
 863         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 864         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 865         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 866         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 867
 868         /* Do the replaceFirst again, without first resetting anything.
 869          *  Should give the same results.
 870          */
 871         status = U_ZERO_ERROR;
 872         memset(buf, -1, sizeof(buf));
 873         resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
 874         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 875         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
 876         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 877         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 878
 879         /* NULL buffer, zero buffer length */
 880         status = U_ZERO_ERROR;
 881         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
 882         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 883         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 884
 885         /* Buffer too small by one */
 886         status = U_ZERO_ERROR;
 887         memset(buf, -1, sizeof(buf));
 888         resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x.")-1, &status);
 889         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 890         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
 891         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
 892         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
 893
 894         uregex_close(re);
 895     }
 896
 897
 898     /*
 899      *  replaceAll()
 900      */
 901     {
 902         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
 903         UChar    text2[80];          /*  "No match Here"           */
 904         UChar    replText[80];       /*  "<$1>"                    */
 905         UChar    replText2[80];      /*  "<<$1>>"                  */
 906         const char * pattern = "x(.*?)x";
 907         const char * expectedResult = "Replace <aa> <1> <...>.";
 908         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
 909         UChar    buf[80];
 910         int32_t  resultSize;
 911         int32_t  expectedResultSize;
 912         int32_t  expectedResultSize2;
 913         int32_t  i;
 914
 915         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
 916         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
 917         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
 918         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
 919         expectedResultSize = (int32_t)strlen(expectedResult);
 920         expectedResultSize2 = (int32_t)strlen(expectedResult2);
 921
 922         status = U_ZERO_ERROR;
 923         re = uregex_openC(pattern, 0, NULL, &status);
 924         TEST_ASSERT_SUCCESS(status);
 925
 926         /*  Normal case, with match */
 927         uregex_setText(re, text1, -1, &status);
 928         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 929         TEST_ASSERT_SUCCESS(status);
 930         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
 931         TEST_ASSERT(resultSize == expectedResultSize);
 932
 933         /* No match.  Text should copy to output with no changes.  */
 934         status = U_ZERO_ERROR;
 935         uregex_setText(re, text2, -1, &status);
 936         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
 937         TEST_ASSERT_SUCCESS(status);
 938         TEST_ASSERT_STRING("No match here.", buf, TRUE);
 939         TEST_ASSERT(resultSize == u_strlen(text2));
 940
 941         /*  Match, output just fills buffer, no termination warning. */
 942         status = U_ZERO_ERROR;
 943         uregex_setText(re, text1, -1, &status);
 944         memset(buf, -1, sizeof(buf));
 945         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
 946         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 947         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
 948         TEST_ASSERT(resultSize == expectedResultSize);
 949         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 950
 951         /* Do the replaceFirst again, without first resetting anything.
 952          *  Should give the same results.
 953          */
 954         status = U_ZERO_ERROR;
 955         memset(buf, -1, sizeof(buf));
 956         resultSize = uregex_replaceAll(re, replText, -1, buf, (int32_t)strlen("Replace xaax x1x x...x."), &status);
 957         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
 958         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
 959         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 960         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
 961
 962         /* NULL buffer, zero buffer length */
 963         status = U_ZERO_ERROR;
 964         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
 965         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 966         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
 967
 968         /* Buffer too small.  Try every size, which will tickle edge cases
 969          * in uregex_appendReplacement (used by replaceAll)   */
 970         for (i=0; i<expectedResultSize; i++) {
 971             char  expected[80];
 972             status = U_ZERO_ERROR;
 973             memset(buf, -1, sizeof(buf));
 974             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
 975             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 976             strcpy(expected, expectedResult);
 977             expected[i] = 0;
 978             TEST_ASSERT_STRING(expected, buf, FALSE);
 979             TEST_ASSERT(resultSize == expectedResultSize);
 980             TEST_ASSERT(buf[i] == (UChar)0xffff);
 981         }
 982
 983         /* Buffer too small.  Same as previous test, except this time the replacement
 984          * text is longer than the match capture group, making the length of the complete
 985          * replacement longer than the original string.
 986          */
 987         for (i=0; i<expectedResultSize2; i++) {
 988             char  expected[80];
 989             status = U_ZERO_ERROR;
 990             memset(buf, -1, sizeof(buf));
 991             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
 992             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
 993             strcpy(expected, expectedResult2);
 994             expected[i] = 0;
 995             TEST_ASSERT_STRING(expected, buf, FALSE);
 996             TEST_ASSERT(resultSize == expectedResultSize2);
 997             TEST_ASSERT(buf[i] == (UChar)0xffff);
 998         }
 999
1000
1001         uregex_close(re);
1002     }
1003
1004
1005     /*
1006      *  appendReplacement()
1007      */
1008     {
1009         UChar    text[100];
1010         UChar    repl[100];
1011         UChar    buf[100];
1012         UChar   *bufPtr;
1013         int32_t  bufCap;
1014
1015
1016         status = U_ZERO_ERROR;
1017         re = uregex_openC(".*", 0, 0, &status);
1018         TEST_ASSERT_SUCCESS(status);
1019
1020         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1021         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1022         uregex_setText(re, text, -1, &status);
1023
1024         /* match covers whole target string */
1025         uregex_find(re, 0, &status);
1026         TEST_ASSERT_SUCCESS(status);
1027         bufPtr = buf;
1028         bufCap = UPRV_LENGTHOF(buf);
1029         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1030         TEST_ASSERT_SUCCESS(status);
1031         TEST_ASSERT_STRING("some other", buf, TRUE);
1032
1033         /* Match has \u \U escapes */
1034         uregex_find(re, 0, &status);
1035         TEST_ASSERT_SUCCESS(status);
1036         bufPtr = buf;
1037         bufCap = UPRV_LENGTHOF(buf);
1038         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1039         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1040         TEST_ASSERT_SUCCESS(status);
1041         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1042
1043         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1044         status = U_ZERO_ERROR;
1045         uregex_find(re, 0, &status);
1046         TEST_ASSERT_SUCCESS(status);
1047         bufPtr = buf;
1048         status = U_BUFFER_OVERFLOW_ERROR;
1049         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1050         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1051
1052         uregex_close(re);
1053     }
1054
1055
1056     /*
1057      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1058      */
1059
1060     /*
1061      *  split()
1062      */
1063     {
1064         UChar    textToSplit[80];
1065         UChar    text2[80];
1066         UChar    buf[200];
1067         UChar    *fields[10];
1068         int32_t  numFields;
1069         int32_t  requiredCapacity;
1070         int32_t  spaceNeeded;
1071         int32_t  sz;
1072
1073         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1074         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1075
1076         status = U_ZERO_ERROR;
1077         re = uregex_openC(":", 0, NULL, &status);
1078
1079
1080         /*  Simple split */
1081
1082         uregex_setText(re, textToSplit, -1, &status);
1083         TEST_ASSERT_SUCCESS(status);
1084
1085         /* The TEST_ASSERT_SUCCESS call above should change too... */
1086         if (U_SUCCESS(status)) {
1087             memset(fields, -1, sizeof(fields));
1088             numFields =
1089                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1090             TEST_ASSERT_SUCCESS(status);
1091
1092             /* The TEST_ASSERT_SUCCESS call above should change too... */
1093             if(U_SUCCESS(status)) {
1094                 TEST_ASSERT(numFields == 3);
1095                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1096                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1097                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
1098                 TEST_ASSERT(fields[3] == NULL);
1099
1100                 spaceNeeded = u_strlen(textToSplit) -
1101                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1102                             numFields;          /* Each field gets a NUL terminator */
1103
1104                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1105             }
1106         }
1107
1108         uregex_close(re);
1109
1110
1111         /*  Split with too few output strings available */
1112         status = U_ZERO_ERROR;
1113         re = uregex_openC(":", 0, NULL, &status);
1114         uregex_setText(re, textToSplit, -1, &status);
1115         TEST_ASSERT_SUCCESS(status);
1116
1117         /* The TEST_ASSERT_SUCCESS call above should change too... */
1118         if(U_SUCCESS(status)) {
1119             memset(fields, -1, sizeof(fields));
1120             numFields =
1121                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1122             TEST_ASSERT_SUCCESS(status);
1123
1124             /* The TEST_ASSERT_SUCCESS call above should change too... */
1125             if(U_SUCCESS(status)) {
1126                 TEST_ASSERT(numFields == 2);
1127                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1128                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1129                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1130
1131                 spaceNeeded = u_strlen(textToSplit) -
1132                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1133                             numFields;          /* Each field gets a NUL terminator */
1134
1135                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1136
1137                 /* Split with a range of output buffer sizes.  */
1138                 spaceNeeded = u_strlen(textToSplit) -
1139                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1140                     numFields;          /* Each field gets a NUL terminator */
1141
1142                 for (sz=0; sz < spaceNeeded+1; sz++) {
1143                     memset(fields, -1, sizeof(fields));
1144                     status = U_ZERO_ERROR;
1145                     numFields =
1146                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1147                     if (sz >= spaceNeeded) {
1148                         TEST_ASSERT_SUCCESS(status);
1149                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1150                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
1151                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
1152                     } else {
1153                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1154                     }
1155                     TEST_ASSERT(numFields == 3);
1156                     TEST_ASSERT(fields[3] == NULL);
1157                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1158                 }
1159             }
1160         }
1161
1162         uregex_close(re);
1163     }
1164
1165
1166
1167
1168     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1169      *                   comes out as additional fields.  */
1170     {
1171         UChar    textToSplit[80];
1172         UChar    buf[200];
1173         UChar    *fields[10];
1174         int32_t  numFields;
1175         int32_t  requiredCapacity;
1176         int32_t  spaceNeeded;
1177         int32_t  sz;
1178
1179         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
1180
1181         status = U_ZERO_ERROR;
1182         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1183
1184         uregex_setText(re, textToSplit, -1, &status);
1185         TEST_ASSERT_SUCCESS(status);
1186
1187         /* The TEST_ASSERT_SUCCESS call above should change too... */
1188         if(U_SUCCESS(status)) {
1189             memset(fields, -1, sizeof(fields));
1190             numFields =
1191                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1192             TEST_ASSERT_SUCCESS(status);
1193
1194             /* The TEST_ASSERT_SUCCESS call above should change too... */
1195             if(U_SUCCESS(status)) {
1196                 TEST_ASSERT(numFields == 5);
1197                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1198                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1199                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1200                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1201                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
1202                 TEST_ASSERT(fields[5] == NULL);
1203                 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1204                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1205             }
1206         }
1207
1208         /*  Split with too few output strings available (2) */
1209         status = U_ZERO_ERROR;
1210         memset(fields, -1, sizeof(fields));
1211         numFields =
1212             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1213         TEST_ASSERT_SUCCESS(status);
1214
1215         /* The TEST_ASSERT_SUCCESS call above should change too... */
1216         if(U_SUCCESS(status)) {
1217             TEST_ASSERT(numFields == 2);
1218             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1219             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1220             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1221
1222             spaceNeeded = (int32_t)strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1223             TEST_ASSERT(spaceNeeded == requiredCapacity);
1224         }
1225
1226         /*  Split with too few output strings available (3) */
1227         status = U_ZERO_ERROR;
1228         memset(fields, -1, sizeof(fields));
1229         numFields =
1230             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1231         TEST_ASSERT_SUCCESS(status);
1232
1233         /* The TEST_ASSERT_SUCCESS call above should change too... */
1234         if(U_SUCCESS(status)) {
1235             TEST_ASSERT(numFields == 3);
1236             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1237             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1238             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1239             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1240
1241             spaceNeeded = (int32_t)strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1242             TEST_ASSERT(spaceNeeded == requiredCapacity);
1243         }
1244
1245         /*  Split with just enough output strings available (5) */
1246         status = U_ZERO_ERROR;
1247         memset(fields, -1, sizeof(fields));
1248         numFields =
1249             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1250         TEST_ASSERT_SUCCESS(status);
1251
1252         /* The TEST_ASSERT_SUCCESS call above should change too... */
1253         if(U_SUCCESS(status)) {
1254             TEST_ASSERT(numFields == 5);
1255             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1256             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1257             TEST_ASSERT_STRING(" second", fields[2], TRUE);
1258             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1259             TEST_ASSERT_STRING("  third", fields[4], TRUE);
1260             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1261
1262             spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1263             TEST_ASSERT(spaceNeeded == requiredCapacity);
1264         }
1265
1266         /* Split, end of text is a field delimiter.   */
1267         status = U_ZERO_ERROR;
1268         sz = (int32_t)strlen("first <tag-a> second<tag-b>");
1269         uregex_setText(re, textToSplit, sz, &status);
1270         TEST_ASSERT_SUCCESS(status);
1271
1272         /* The TEST_ASSERT_SUCCESS call above should change too... */
1273         if(U_SUCCESS(status)) {
1274             memset(fields, -1, sizeof(fields));
1275             numFields =
1276                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1277             TEST_ASSERT_SUCCESS(status);
1278
1279             /* The TEST_ASSERT_SUCCESS call above should change too... */
1280             if(U_SUCCESS(status)) {
1281                 TEST_ASSERT(numFields == 5);
1282                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1283                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1284                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1285                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1286                 TEST_ASSERT_STRING("",        fields[4], TRUE);
1287                 TEST_ASSERT(fields[5] == NULL);
1288                 TEST_ASSERT(fields[8] == NULL);
1289                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1290                 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1291                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1292             }
1293         }
1294
1295         uregex_close(re);
1296     }
1297
1298     /*
1299      * set/getTimeLimit
1300      */
1301      TEST_SETUP("abc$", "abcdef", 0);
1302      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1303      uregex_setTimeLimit(re, 1000, &status);
1304      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1305      TEST_ASSERT_SUCCESS(status);
1306      uregex_setTimeLimit(re, -1, &status);
1307      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1308      status = U_ZERO_ERROR;
1309      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1310      TEST_TEARDOWN;
1311
1312      /*
1313       * set/get Stack Limit
1314       */
1315      TEST_SETUP("abc$", "abcdef", 0);
1316      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1317      uregex_setStackLimit(re, 40000, &status);
1318      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1319      TEST_ASSERT_SUCCESS(status);
1320      uregex_setStackLimit(re, -1, &status);
1321      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1322      status = U_ZERO_ERROR;
1323      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1324      TEST_TEARDOWN;
1325
1326
1327      /*
1328       * Get/Set callback functions
1329       *     This test is copied from intltest regex/Callbacks
1330       *     The pattern and test data will run long enough to cause the callback
1331       *       to be invoked.  The nested '+' operators give exponential time
1332       *       behavior with increasing string length.
1333       */
1334      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0);
1335      callBackContext cbInfo = {4, 0, 0};
1336      const void     *pContext   = &cbInfo;
1337      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1338
1339      /*  Getting the callback fn when it hasn't been set must return NULL  */
1340      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1341      TEST_ASSERT_SUCCESS(status);
1342      TEST_ASSERT(returnedFn == NULL);
1343      TEST_ASSERT(pContext == NULL);
1344
1345      /* Set thecallback and do a match.                                   */
1346      /* The callback function should record that it has been called.      */
1347      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1348      TEST_ASSERT_SUCCESS(status);
1349      TEST_ASSERT(cbInfo.numCalls == 0);
1350      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1351      TEST_ASSERT_SUCCESS(status);
1352      TEST_ASSERT(cbInfo.numCalls > 0);
1353
1354      /* Getting the callback should return the values that were set above.  */
1355      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1356      TEST_ASSERT(returnedFn == &TestCallbackFn);
1357      TEST_ASSERT(pContext == &cbInfo);
1358
1359      TEST_TEARDOWN;
1360 }
1361
1362
1363
1364 static void TestBug4315(void) {
1365     UErrorCode      theICUError = U_ZERO_ERROR;
1366     URegularExpression *theRegEx;
1367     UChar           *textBuff;
1368     const char      *thePattern;
1369     UChar            theString[100];
1370     UChar           *destFields[24];
1371     int32_t         neededLength1;
1372     int32_t         neededLength2;
1373
1374     int32_t         wordCount = 0;
1375     int32_t         destFieldsSize = 24;
1376
1377     thePattern  = "ck ";
1378     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1379
1380     /* open a regex */
1381     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1382     TEST_ASSERT_SUCCESS(theICUError);
1383
1384     /* set the input string */
1385     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1386     TEST_ASSERT_SUCCESS(theICUError);
1387
1388     /* split */
1389     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1390      *  error occurs! */
1391     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1392         destFieldsSize, &theICUError);
1393
1394     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1395     TEST_ASSERT(wordCount==3);
1396
1397     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1398     {
1399         theICUError = U_ZERO_ERROR;
1400         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1401         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1402             destFields, destFieldsSize, &theICUError);
1403         TEST_ASSERT(wordCount==3);
1404         TEST_ASSERT_SUCCESS(theICUError);
1405         TEST_ASSERT(neededLength1 == neededLength2);
1406         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1407         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1408         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1409         TEST_ASSERT(destFields[3] == NULL);
1410         free(textBuff);
1411     }
1412     uregex_close(theRegEx);
1413 }
1414
1415 /* Based on TestRegexCAPI() */
1416 static void TestUTextAPI(void) {
1417     UErrorCode           status = U_ZERO_ERROR;
1418     URegularExpression  *re;
1419     UText                patternText = UTEXT_INITIALIZER;
1420     UChar                pat[200];
1421     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1422
1423     /* Mimimalist open/close */
1424     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1425     re = uregex_openUText(&patternText, 0, 0, &status);
1426     if (U_FAILURE(status)) {
1427          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1428          utext_close(&patternText);
1429          return;
1430     }
1431     uregex_close(re);
1432
1433     /* Open with all flag values set */
1434     status = U_ZERO_ERROR;
1435     re = uregex_openUText(&patternText,
1436         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1437         0, &status);
1438     TEST_ASSERT_SUCCESS(status);
1439     uregex_close(re);
1440
1441     /* Open with an invalid flag */
1442     status = U_ZERO_ERROR;
1443     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1444     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1445     uregex_close(re);
1446
1447     /* open with an invalid parameter */
1448     status = U_ZERO_ERROR;
1449     re = uregex_openUText(NULL,
1450         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1451     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1452
1453     /*
1454      *  clone
1455      */
1456     {
1457         URegularExpression *clone1;
1458         URegularExpression *clone2;
1459         URegularExpression *clone3;
1460         UChar  testString1[30];
1461         UChar  testString2[30];
1462         UBool  result;
1463
1464
1465         status = U_ZERO_ERROR;
1466         re = uregex_openUText(&patternText, 0, 0, &status);
1467         TEST_ASSERT_SUCCESS(status);
1468         clone1 = uregex_clone(re, &status);
1469         TEST_ASSERT_SUCCESS(status);
1470         TEST_ASSERT(clone1 != NULL);
1471
1472         status = U_ZERO_ERROR;
1473         clone2 = uregex_clone(re, &status);
1474         TEST_ASSERT_SUCCESS(status);
1475         TEST_ASSERT(clone2 != NULL);
1476         uregex_close(re);
1477
1478         status = U_ZERO_ERROR;
1479         clone3 = uregex_clone(clone2, &status);
1480         TEST_ASSERT_SUCCESS(status);
1481         TEST_ASSERT(clone3 != NULL);
1482
1483         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1484         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1485
1486         status = U_ZERO_ERROR;
1487         uregex_setText(clone1, testString1, -1, &status);
1488         TEST_ASSERT_SUCCESS(status);
1489         result = uregex_lookingAt(clone1, 0, &status);
1490         TEST_ASSERT_SUCCESS(status);
1491         TEST_ASSERT(result==TRUE);
1492
1493         status = U_ZERO_ERROR;
1494         uregex_setText(clone2, testString2, -1, &status);
1495         TEST_ASSERT_SUCCESS(status);
1496         result = uregex_lookingAt(clone2, 0, &status);
1497         TEST_ASSERT_SUCCESS(status);
1498         TEST_ASSERT(result==FALSE);
1499         result = uregex_find(clone2, 0, &status);
1500         TEST_ASSERT_SUCCESS(status);
1501         TEST_ASSERT(result==TRUE);
1502
1503         uregex_close(clone1);
1504         uregex_close(clone2);
1505         uregex_close(clone3);
1506
1507     }
1508
1509     /*
1510      *  pattern() and patternText()
1511      */
1512     {
1513         const UChar  *resultPat;
1514         int32_t       resultLen;
1515         UText        *resultText;
1516         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1517         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1518         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1519         status = U_ZERO_ERROR;
1520
1521         utext_openUTF8(&patternText, str_hello, -1, &status);
1522         re = uregex_open(pat, -1, 0, NULL, &status);
1523         resultPat = uregex_pattern(re, &resultLen, &status);
1524         TEST_ASSERT_SUCCESS(status);
1525
1526         /* The TEST_ASSERT_SUCCESS above should change too... */
1527         if (U_SUCCESS(status)) {
1528             TEST_ASSERT(resultLen == -1);
1529             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1530         }
1531
1532         resultText = uregex_patternUText(re, &status);
1533         TEST_ASSERT_SUCCESS(status);
1534         TEST_ASSERT_UTEXT(str_hello, resultText);
1535
1536         uregex_close(re);
1537
1538         status = U_ZERO_ERROR;
1539         re = uregex_open(pat, 3, 0, NULL, &status);
1540         resultPat = uregex_pattern(re, &resultLen, &status);
1541         TEST_ASSERT_SUCCESS(status);
1542
1543         /* The TEST_ASSERT_SUCCESS above should change too... */
1544         if (U_SUCCESS(status)) {
1545             TEST_ASSERT(resultLen == 3);
1546             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1547             TEST_ASSERT(u_strlen(resultPat) == 3);
1548         }
1549
1550         resultText = uregex_patternUText(re, &status);
1551         TEST_ASSERT_SUCCESS(status);
1552         TEST_ASSERT_UTEXT(str_hel, resultText);
1553
1554         uregex_close(re);
1555     }
1556
1557     /*
1558      *  setUText() and lookingAt()
1559      */
1560     {
1561         UText  text1 = UTEXT_INITIALIZER;
1562         UText  text2 = UTEXT_INITIALIZER;
1563         UBool  result;
1564         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1565         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1566         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1567         status = U_ZERO_ERROR;
1568         utext_openUTF8(&text1, str_abcccd, -1, &status);
1569         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1570
1571         utext_openUTF8(&patternText, str_abcd, -1, &status);
1572         re = uregex_openUText(&patternText, 0, NULL, &status);
1573         TEST_ASSERT_SUCCESS(status);
1574
1575         /* Operation before doing a setText should fail... */
1576         status = U_ZERO_ERROR;
1577         uregex_lookingAt(re, 0, &status);
1578         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1579
1580         status = U_ZERO_ERROR;
1581         uregex_setUText(re, &text1, &status);
1582         result = uregex_lookingAt(re, 0, &status);
1583         TEST_ASSERT(result == TRUE);
1584         TEST_ASSERT_SUCCESS(status);
1585
1586         status = U_ZERO_ERROR;
1587         uregex_setUText(re, &text2, &status);
1588         result = uregex_lookingAt(re, 0, &status);
1589         TEST_ASSERT(result == FALSE);
1590         TEST_ASSERT_SUCCESS(status);
1591
1592         status = U_ZERO_ERROR;
1593         uregex_setUText(re, &text1, &status);
1594         result = uregex_lookingAt(re, 0, &status);
1595         TEST_ASSERT(result == TRUE);
1596         TEST_ASSERT_SUCCESS(status);
1597
1598         uregex_close(re);
1599         utext_close(&text1);
1600         utext_close(&text2);
1601     }
1602
1603
1604     /*
1605      *  getText() and getUText()
1606      */
1607     {
1608         UText  text1 = UTEXT_INITIALIZER;
1609         UText  text2 = UTEXT_INITIALIZER;
1610         UChar  text2Chars[20];
1611         UText  *resultText;
1612         const UChar   *result;
1613         int32_t  textLength;
1614         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1615         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1616         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1617
1618
1619         status = U_ZERO_ERROR;
1620         utext_openUTF8(&text1, str_abcccd, -1, &status);
1621         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1622         utext_openUChars(&text2, text2Chars, -1, &status);
1623
1624         utext_openUTF8(&patternText, str_abcd, -1, &status);
1625         re = uregex_openUText(&patternText, 0, NULL, &status);
1626
1627         /* First set a UText */
1628         uregex_setUText(re, &text1, &status);
1629         resultText = uregex_getUText(re, NULL, &status);
1630         TEST_ASSERT_SUCCESS(status);
1631         TEST_ASSERT(resultText != &text1);
1632         utext_setNativeIndex(resultText, 0);
1633         utext_setNativeIndex(&text1, 0);
1634         TEST_ASSERT(testUTextEqual(resultText, &text1));
1635         utext_close(resultText);
1636
1637         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1638         (void)result;    /* Suppress set but not used warning. */
1639         TEST_ASSERT(textLength == -1 || textLength == 6);
1640         resultText = uregex_getUText(re, NULL, &status);
1641         TEST_ASSERT_SUCCESS(status);
1642         TEST_ASSERT(resultText != &text1);
1643         utext_setNativeIndex(resultText, 0);
1644         utext_setNativeIndex(&text1, 0);
1645         TEST_ASSERT(testUTextEqual(resultText, &text1));
1646         utext_close(resultText);
1647
1648         /* Then set a UChar * */
1649         uregex_setText(re, text2Chars, 7, &status);
1650         resultText = uregex_getUText(re, NULL, &status);
1651         TEST_ASSERT_SUCCESS(status);
1652         utext_setNativeIndex(resultText, 0);
1653         utext_setNativeIndex(&text2, 0);
1654         TEST_ASSERT(testUTextEqual(resultText, &text2));
1655         utext_close(resultText);
1656         result = uregex_getText(re, &textLength, &status);
1657         TEST_ASSERT(textLength == 7);
1658
1659         uregex_close(re);
1660         utext_close(&text1);
1661         utext_close(&text2);
1662     }
1663
1664     /*
1665      *  matches()
1666      */
1667     {
1668         UText   text1 = UTEXT_INITIALIZER;
1669         UBool   result;
1670         UText   nullText = UTEXT_INITIALIZER;
1671         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1672         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1673
1674         status = U_ZERO_ERROR;
1675         utext_openUTF8(&text1, str_abcccde, -1, &status);
1676         utext_openUTF8(&patternText, str_abcd, -1, &status);
1677         re = uregex_openUText(&patternText, 0, NULL, &status);
1678
1679         uregex_setUText(re, &text1, &status);
1680         result = uregex_matches(re, 0, &status);
1681         TEST_ASSERT(result == FALSE);
1682         TEST_ASSERT_SUCCESS(status);
1683         uregex_close(re);
1684
1685         status = U_ZERO_ERROR;
1686         re = uregex_openC(".?", 0, NULL, &status);
1687         uregex_setUText(re, &text1, &status);
1688         result = uregex_matches(re, 7, &status);
1689         TEST_ASSERT(result == TRUE);
1690         TEST_ASSERT_SUCCESS(status);
1691
1692         status = U_ZERO_ERROR;
1693         utext_openUTF8(&nullText, "", -1, &status);
1694         uregex_setUText(re, &nullText, &status);
1695         TEST_ASSERT_SUCCESS(status);
1696         result = uregex_matches(re, 0, &status);
1697         TEST_ASSERT(result == TRUE);
1698         TEST_ASSERT_SUCCESS(status);
1699
1700         uregex_close(re);
1701         utext_close(&text1);
1702         utext_close(&nullText);
1703     }
1704
1705
1706     /*
1707      *  lookingAt()    Used in setText test.
1708      */
1709
1710
1711     /*
1712      *  find(), findNext, start, end, reset
1713      */
1714     {
1715         UChar    text1[50];
1716         UBool    result;
1717         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
1718         status = U_ZERO_ERROR;
1719         re = uregex_openC("rx", 0, NULL, &status);
1720
1721         uregex_setText(re, text1, -1, &status);
1722         result = uregex_find(re, 0, &status);
1723         TEST_ASSERT(result == TRUE);
1724         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1725         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1726         TEST_ASSERT_SUCCESS(status);
1727
1728         result = uregex_find(re, 9, &status);
1729         TEST_ASSERT(result == TRUE);
1730         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1731         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1732         TEST_ASSERT_SUCCESS(status);
1733
1734         result = uregex_find(re, 14, &status);
1735         TEST_ASSERT(result == FALSE);
1736         TEST_ASSERT_SUCCESS(status);
1737
1738         status = U_ZERO_ERROR;
1739         uregex_reset(re, 0, &status);
1740
1741         result = uregex_findNext(re, &status);
1742         TEST_ASSERT(result == TRUE);
1743         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1744         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1745         TEST_ASSERT_SUCCESS(status);
1746
1747         result = uregex_findNext(re, &status);
1748         TEST_ASSERT(result == TRUE);
1749         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1750         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1751         TEST_ASSERT_SUCCESS(status);
1752
1753         status = U_ZERO_ERROR;
1754         uregex_reset(re, 12, &status);
1755
1756         result = uregex_findNext(re, &status);
1757         TEST_ASSERT(result == TRUE);
1758         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1759         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1760         TEST_ASSERT_SUCCESS(status);
1761
1762         result = uregex_findNext(re, &status);
1763         TEST_ASSERT(result == FALSE);
1764         TEST_ASSERT_SUCCESS(status);
1765
1766         uregex_close(re);
1767     }
1768
1769     /*
1770      *  groupUText()
1771      */
1772     {
1773         UChar    text1[80];
1774         UText   *actual;
1775         UBool    result;
1776         int64_t  groupLen = 0;
1777         UChar    groupBuf[20];
1778
1779         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
1780
1781         status = U_ZERO_ERROR;
1782         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1783         TEST_ASSERT_SUCCESS(status);
1784
1785         uregex_setText(re, text1, -1, &status);
1786         result = uregex_find(re, 0, &status);
1787         TEST_ASSERT(result==TRUE);
1788
1789         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1790         status = U_ZERO_ERROR;
1791         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1792         TEST_ASSERT_SUCCESS(status);
1793
1794         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
1795         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
1796         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1797
1798         TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1799         utext_close(actual);
1800
1801         /*  Capture group #1.  Should succeed. */
1802         status = U_ZERO_ERROR;
1803
1804         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1805         TEST_ASSERT_SUCCESS(status);
1806         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
1807                                                            /*    (within the string text1)           */
1808         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
1809         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1810         TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1811
1812         utext_close(actual);
1813
1814         /*  Capture group out of range.  Error. */
1815         status = U_ZERO_ERROR;
1816         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1817         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1818         utext_close(actual);
1819
1820         uregex_close(re);
1821     }
1822
1823     /*
1824      *  replaceFirst()
1825      */
1826     {
1827         UChar    text1[80];
1828         UChar    text2[80];
1829         UText    replText = UTEXT_INITIALIZER;
1830         UText   *result;
1831         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1832         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1833         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1834                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1835         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1836         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1837         status = U_ZERO_ERROR;
1838         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1839         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1840         utext_openUTF8(&replText, str_1x, -1, &status);
1841
1842         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1843         TEST_ASSERT_SUCCESS(status);
1844
1845         /*  Normal case, with match */
1846         uregex_setText(re, text1, -1, &status);
1847         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1848         TEST_ASSERT_SUCCESS(status);
1849         TEST_ASSERT_UTEXT(str_Replxxx, result);
1850         utext_close(result);
1851
1852         /* No match.  Text should copy to output with no changes.  */
1853         uregex_setText(re, text2, -1, &status);
1854         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1855         TEST_ASSERT_SUCCESS(status);
1856         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1857         utext_close(result);
1858
1859         /* Unicode escapes */
1860         uregex_setText(re, text1, -1, &status);
1861         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1862         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1863         TEST_ASSERT_SUCCESS(status);
1864         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1865         utext_close(result);
1866
1867         uregex_close(re);
1868         utext_close(&replText);
1869     }
1870
1871
1872     /*
1873      *  replaceAll()
1874      */
1875     {
1876         UChar    text1[80];
1877         UChar    text2[80];
1878         UText    replText = UTEXT_INITIALIZER;
1879         UText   *result;
1880         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1881         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1882         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1883         status = U_ZERO_ERROR;
1884         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1885         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1886         utext_openUTF8(&replText, str_1, -1, &status);
1887
1888         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1889         TEST_ASSERT_SUCCESS(status);
1890
1891         /*  Normal case, with match */
1892         uregex_setText(re, text1, -1, &status);
1893         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1894         TEST_ASSERT_SUCCESS(status);
1895         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1896         utext_close(result);
1897
1898         /* No match.  Text should copy to output with no changes.  */
1899         uregex_setText(re, text2, -1, &status);
1900         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1901         TEST_ASSERT_SUCCESS(status);
1902         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1903         utext_close(result);
1904
1905         uregex_close(re);
1906         utext_close(&replText);
1907     }
1908
1909
1910     /*
1911      *  appendReplacement()
1912      */
1913     {
1914         UChar    text[100];
1915         UChar    repl[100];
1916         UChar    buf[100];
1917         UChar   *bufPtr;
1918         int32_t  bufCap;
1919
1920         status = U_ZERO_ERROR;
1921         re = uregex_openC(".*", 0, 0, &status);
1922         TEST_ASSERT_SUCCESS(status);
1923
1924         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1925         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1926         uregex_setText(re, text, -1, &status);
1927
1928         /* match covers whole target string */
1929         uregex_find(re, 0, &status);
1930         TEST_ASSERT_SUCCESS(status);
1931         bufPtr = buf;
1932         bufCap = UPRV_LENGTHOF(buf);
1933         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1934         TEST_ASSERT_SUCCESS(status);
1935         TEST_ASSERT_STRING("some other", buf, TRUE);
1936
1937         /* Match has \u \U escapes */
1938         uregex_find(re, 0, &status);
1939         TEST_ASSERT_SUCCESS(status);
1940         bufPtr = buf;
1941         bufCap = UPRV_LENGTHOF(buf);
1942         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1943         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1944         TEST_ASSERT_SUCCESS(status);
1945         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1946
1947         uregex_close(re);
1948     }
1949
1950
1951     /*
1952      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1953      */
1954
1955     /*
1956      *  splitUText()
1957      */
1958     {
1959         UChar    textToSplit[80];
1960         UChar    text2[80];
1961         UText    *fields[10];
1962         int32_t  numFields;
1963         int32_t i;
1964
1965         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1966         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1967
1968         status = U_ZERO_ERROR;
1969         re = uregex_openC(":", 0, NULL, &status);
1970
1971
1972         /*  Simple split */
1973
1974         uregex_setText(re, textToSplit, -1, &status);
1975         TEST_ASSERT_SUCCESS(status);
1976
1977         /* The TEST_ASSERT_SUCCESS call above should change too... */
1978         if (U_SUCCESS(status)) {
1979             memset(fields, 0, sizeof(fields));
1980             numFields = uregex_splitUText(re, fields, 10, &status);
1981             TEST_ASSERT_SUCCESS(status);
1982
1983             /* The TEST_ASSERT_SUCCESS call above should change too... */
1984             if(U_SUCCESS(status)) {
1985               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1986               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1987               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1988                 TEST_ASSERT(numFields == 3);
1989                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1990                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1991                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1992                 TEST_ASSERT(fields[3] == NULL);
1993             }
1994             for(i = 0; i < numFields; i++) {
1995                 utext_close(fields[i]);
1996             }
1997         }
1998
1999         uregex_close(re);
2000
2001
2002         /*  Split with too few output strings available */
2003         status = U_ZERO_ERROR;
2004         re = uregex_openC(":", 0, NULL, &status);
2005         uregex_setText(re, textToSplit, -1, &status);
2006         TEST_ASSERT_SUCCESS(status);
2007
2008         /* The TEST_ASSERT_SUCCESS call above should change too... */
2009         if(U_SUCCESS(status)) {
2010             fields[0] = NULL;
2011             fields[1] = NULL;
2012             fields[2] = &patternText;
2013             numFields = uregex_splitUText(re, fields, 2, &status);
2014             TEST_ASSERT_SUCCESS(status);
2015
2016             /* The TEST_ASSERT_SUCCESS call above should change too... */
2017             if(U_SUCCESS(status)) {
2018                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2019                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2020                 TEST_ASSERT(numFields == 2);
2021                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2022                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2023                 TEST_ASSERT(fields[2] == &patternText);
2024             }
2025             for(i = 0; i < numFields; i++) {
2026                 utext_close(fields[i]);
2027             }
2028         }
2029
2030         uregex_close(re);
2031     }
2032
2033     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2034      *                   comes out as additional fields.  */
2035     {
2036         UChar    textToSplit[80];
2037         UText    *fields[10];
2038         int32_t  numFields;
2039         int32_t i;
2040
2041         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
2042
2043         status = U_ZERO_ERROR;
2044         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2045
2046         uregex_setText(re, textToSplit, -1, &status);
2047         TEST_ASSERT_SUCCESS(status);
2048
2049         /* The TEST_ASSERT_SUCCESS call above should change too... */
2050         if(U_SUCCESS(status)) {
2051             memset(fields, 0, sizeof(fields));
2052             numFields = uregex_splitUText(re, fields, 10, &status);
2053             TEST_ASSERT_SUCCESS(status);
2054
2055             /* The TEST_ASSERT_SUCCESS call above should change too... */
2056             if(U_SUCCESS(status)) {
2057                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2058                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2059                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2060                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2061                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2062
2063                 TEST_ASSERT(numFields == 5);
2064                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2065                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2066                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2067                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2068                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2069                 TEST_ASSERT(fields[5] == NULL);
2070             }
2071             for(i = 0; i < numFields; i++) {
2072                 utext_close(fields[i]);
2073             }
2074         }
2075
2076         /*  Split with too few output strings available (2) */
2077         status = U_ZERO_ERROR;
2078         fields[0] = NULL;
2079         fields[1] = NULL;
2080         fields[2] = &patternText;
2081         numFields = uregex_splitUText(re, fields, 2, &status);
2082         TEST_ASSERT_SUCCESS(status);
2083
2084         /* The TEST_ASSERT_SUCCESS call above should change too... */
2085         if(U_SUCCESS(status)) {
2086             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2087             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2088             TEST_ASSERT(numFields == 2);
2089             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2090             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2091             TEST_ASSERT(fields[2] == &patternText);
2092         }
2093         for(i = 0; i < numFields; i++) {
2094             utext_close(fields[i]);
2095         }
2096
2097
2098         /*  Split with too few output strings available (3) */
2099         status = U_ZERO_ERROR;
2100         fields[0] = NULL;
2101         fields[1] = NULL;
2102         fields[2] = NULL;
2103         fields[3] = &patternText;
2104         numFields = uregex_splitUText(re, fields, 3, &status);
2105         TEST_ASSERT_SUCCESS(status);
2106
2107         /* The TEST_ASSERT_SUCCESS call above should change too... */
2108         if(U_SUCCESS(status)) {
2109             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2110             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2111             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2112             TEST_ASSERT(numFields == 3);
2113             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2114             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2115             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2116             TEST_ASSERT(fields[3] == &patternText);
2117         }
2118         for(i = 0; i < numFields; i++) {
2119             utext_close(fields[i]);
2120         }
2121
2122         /*  Split with just enough output strings available (5) */
2123         status = U_ZERO_ERROR;
2124         fields[0] = NULL;
2125         fields[1] = NULL;
2126         fields[2] = NULL;
2127         fields[3] = NULL;
2128         fields[4] = NULL;
2129         fields[5] = &patternText;
2130         numFields = uregex_splitUText(re, fields, 5, &status);
2131         TEST_ASSERT_SUCCESS(status);
2132
2133         /* The TEST_ASSERT_SUCCESS call above should change too... */
2134         if(U_SUCCESS(status)) {
2135             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2136             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2137             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2138             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2139             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2140
2141             TEST_ASSERT(numFields == 5);
2142             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2143             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2144             TEST_ASSERT_UTEXT(str_second, fields[2]);
2145             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2146             TEST_ASSERT_UTEXT(str_third, fields[4]);
2147             TEST_ASSERT(fields[5] == &patternText);
2148         }
2149         for(i = 0; i < numFields; i++) {
2150             utext_close(fields[i]);
2151         }
2152
2153         /* Split, end of text is a field delimiter.   */
2154         status = U_ZERO_ERROR;
2155         uregex_setText(re, textToSplit, (int32_t)strlen("first <tag-a> second<tag-b>"), &status);
2156         TEST_ASSERT_SUCCESS(status);
2157
2158         /* The TEST_ASSERT_SUCCESS call above should change too... */
2159         if(U_SUCCESS(status)) {
2160             memset(fields, 0, sizeof(fields));
2161             fields[9] = &patternText;
2162             numFields = uregex_splitUText(re, fields, 9, &status);
2163             TEST_ASSERT_SUCCESS(status);
2164
2165             /* The TEST_ASSERT_SUCCESS call above should change too... */
2166             if(U_SUCCESS(status)) {
2167                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2168                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2169                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2170                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2171                 const char str_empty[] = { 0x00 };
2172
2173                 TEST_ASSERT(numFields == 5);
2174                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2175                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2176                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2177                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2178                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2179                 TEST_ASSERT(fields[5] == NULL);
2180                 TEST_ASSERT(fields[8] == NULL);
2181                 TEST_ASSERT(fields[9] == &patternText);
2182             }
2183             for(i = 0; i < numFields; i++) {
2184                 utext_close(fields[i]);
2185             }
2186         }
2187
2188         uregex_close(re);
2189     }
2190     utext_close(&patternText);
2191 }
2192
2193
2194 static void TestRefreshInput(void) {
2195     /*
2196      *  RefreshInput changes out the input of a URegularExpression without
2197      *    changing anything else in the match state.  Used with Java JNI,
2198      *    when Java moves the underlying string storage.   This test
2199      *    runs a find() loop, moving the text after the first match.
2200      *    The right number of matches should still be found.
2201      */
2202     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2203     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2204     UErrorCode status = U_ZERO_ERROR;
2205     URegularExpression *re;
2206     UText ut1 = UTEXT_INITIALIZER;
2207     UText ut2 = UTEXT_INITIALIZER;
2208
2209     re = uregex_openC("[ABC]", 0, 0, &status);
2210     TEST_ASSERT_SUCCESS(status);
2211
2212     utext_openUChars(&ut1, testStr, -1, &status);
2213     TEST_ASSERT_SUCCESS(status);
2214     uregex_setUText(re, &ut1, &status);
2215     TEST_ASSERT_SUCCESS(status);
2216
2217     /* Find the first match "A" in the original string */
2218     TEST_ASSERT(uregex_findNext(re, &status));
2219     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2220
2221     /* Move the string, kill the original string.  */
2222     u_strcpy(movedStr, testStr);
2223     u_memset(testStr, 0, u_strlen(testStr));
2224     utext_openUChars(&ut2, movedStr, -1, &status);
2225     TEST_ASSERT_SUCCESS(status);
2226     uregex_refreshUText(re, &ut2, &status);
2227     TEST_ASSERT_SUCCESS(status);
2228
2229     /* Find the following two matches, now working in the moved string. */
2230     TEST_ASSERT(uregex_findNext(re, &status));
2231     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2232     TEST_ASSERT(uregex_findNext(re, &status));
2233     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2234     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2235
2236     uregex_close(re);
2237 }
2238
2239
2240 static void TestBug8421(void) {
2241     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2242      *             was failing.
2243      */
2244     URegularExpression *re;
2245     UErrorCode status = U_ZERO_ERROR;
2246     int32_t  limit = -1;
2247
2248     re = uregex_openC("abc", 0, 0, &status);
2249     TEST_ASSERT_SUCCESS(status);
2250
2251     limit = uregex_getTimeLimit(re, &status);
2252     TEST_ASSERT_SUCCESS(status);
2253     TEST_ASSERT(limit == 0);
2254
2255     uregex_setTimeLimit(re, 100, &status);
2256     TEST_ASSERT_SUCCESS(status);
2257     limit = uregex_getTimeLimit(re, &status);
2258     TEST_ASSERT_SUCCESS(status);
2259     TEST_ASSERT(limit == 100);
2260
2261     uregex_close(re);
2262 }
2263
2264 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2265     // suppress compiler warnings about unused variables
2266     (void)context;
2267     (void)matchIndex;
2268     return FALSE;
2269 }
2270
2271 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2272     // suppress compiler warnings about unused variables
2273     (void)context;
2274     (void)steps;
2275     return FALSE;
2276 }
2277
2278 static void TestBug10815() {
2279   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2280    *              when the callback function specified by uregex_setMatchCallback() returns FALSE
2281    */
2282     URegularExpression *re;
2283     UErrorCode status = U_ZERO_ERROR;
2284     UChar    text[100];
2285
2286
2287     // findNext() with a find progress callback function.
2288
2289     re = uregex_openC(".z", 0, 0, &status);
2290     TEST_ASSERT_SUCCESS(status);
2291
2292     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
2293     uregex_setText(re, text, -1, &status);
2294     TEST_ASSERT_SUCCESS(status);
2295
2296     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2297     TEST_ASSERT_SUCCESS(status);
2298
2299     uregex_findNext(re, &status);
2300     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2301
2302     uregex_close(re);
2303
2304     // findNext() with a match progress callback function.
2305
2306     status = U_ZERO_ERROR;
2307     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2308     TEST_ASSERT_SUCCESS(status);
2309
2310     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2311     // it will appear to be stuck in a (near) infinite loop.
2312     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
2313     uregex_setText(re, text, -1, &status);
2314     TEST_ASSERT_SUCCESS(status);
2315
2316     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2317     TEST_ASSERT_SUCCESS(status);
2318
2319     uregex_findNext(re, &status);
2320     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2321
2322     uregex_close(re);
2323 }
2324
2325 static const UChar startLinePattern[] = { 0x5E, 0x78, 0 }; // "^x"
2326
2327 static void TestMatchStartLineWithEmptyText() {
2328     UErrorCode status = U_ZERO_ERROR;
2329     UText* ut = utext_openUChars(NULL, NULL, 0, &status);
2330     TEST_ASSERT_SUCCESS(status);
2331     if (U_SUCCESS(status)) {
2332         URegularExpression *re = uregex_open(startLinePattern, -1, 0, NULL, &status);
2333         TEST_ASSERT_SUCCESS(status);
2334         if (U_SUCCESS(status)) {
2335             uregex_setUText(re, ut, &status);
2336             TEST_ASSERT(U_SUCCESS(status));
2337             if (U_SUCCESS(status)) {
2338                 UBool found = uregex_findNext(re, &status);
2339                 TEST_ASSERT(U_SUCCESS(status) && !found);
2340             }
2341             uregex_close(re);
2342         }
2343         utext_close(ut);
2344     }
2345 }
2346
2347 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */