+/* Based on TestRegexCAPI() */
+static void TestUTextAPI(void) {
+ UErrorCode status = U_ZERO_ERROR;
+ URegularExpression *re;
+ UText patternText = UTEXT_INITIALIZER;
+ UChar pat[200];
+ const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
+
+ /* Mimimalist open/close */
+ utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
+ re = uregex_openUText(&patternText, 0, 0, &status);
+ if (U_FAILURE(status)) {
+ log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
+ utext_close(&patternText);
+ return;
+ }
+ uregex_close(re);
+
+ /* Open with all flag values set */
+ status = U_ZERO_ERROR;
+ re = uregex_openUText(&patternText,
+ UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
+ 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+ uregex_close(re);
+
+ /* Open with an invalid flag */
+ status = U_ZERO_ERROR;
+ re = uregex_openUText(&patternText, 0x40000000, 0, &status);
+ TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
+ uregex_close(re);
+
+ /* open with an invalid parameter */
+ status = U_ZERO_ERROR;
+ re = uregex_openUText(NULL,
+ UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
+ TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
+
+ /*
+ * clone
+ */
+ {
+ URegularExpression *clone1;
+ URegularExpression *clone2;
+ URegularExpression *clone3;
+ UChar testString1[30];
+ UChar testString2[30];
+ UBool result;
+
+
+ status = U_ZERO_ERROR;
+ re = uregex_openUText(&patternText, 0, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+ clone1 = uregex_clone(re, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(clone1 != NULL);
+
+ status = U_ZERO_ERROR;
+ clone2 = uregex_clone(re, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(clone2 != NULL);
+ uregex_close(re);
+
+ status = U_ZERO_ERROR;
+ clone3 = uregex_clone(clone2, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(clone3 != NULL);
+
+ u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
+ u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
+
+ status = U_ZERO_ERROR;
+ uregex_setText(clone1, testString1, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+ result = uregex_lookingAt(clone1, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(result==TRUE);
+
+ status = U_ZERO_ERROR;
+ uregex_setText(clone2, testString2, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+ result = uregex_lookingAt(clone2, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(result==FALSE);
+ result = uregex_find(clone2, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(result==TRUE);
+
+ uregex_close(clone1);
+ uregex_close(clone2);
+ uregex_close(clone3);
+
+ }
+
+ /*
+ * pattern() and patternText()
+ */
+ {
+ const UChar *resultPat;
+ int32_t resultLen;
+ UText *resultText;
+ const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
+ const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
+ u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
+ status = U_ZERO_ERROR;
+
+ utext_openUTF8(&patternText, str_hello, -1, &status);
+ re = uregex_open(pat, -1, 0, NULL, &status);
+ resultPat = uregex_pattern(re, &resultLen, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS above should change too... */
+ if (U_SUCCESS(status)) {
+ TEST_ASSERT(resultLen == -1);
+ TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
+ }
+
+ resultText = uregex_patternUText(re, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT_UTEXT(str_hello, resultText);
+
+ uregex_close(re);
+
+ status = U_ZERO_ERROR;
+ re = uregex_open(pat, 3, 0, NULL, &status);
+ resultPat = uregex_pattern(re, &resultLen, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS above should change too... */
+ if (U_SUCCESS(status)) {
+ TEST_ASSERT(resultLen == 3);
+ TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
+ TEST_ASSERT(u_strlen(resultPat) == 3);
+ }
+
+ resultText = uregex_patternUText(re, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT_UTEXT(str_hel, resultText);
+
+ uregex_close(re);
+ }
+
+ /*
+ * setUText() and lookingAt()
+ */
+ {
+ UText text1 = UTEXT_INITIALIZER;
+ UText text2 = UTEXT_INITIALIZER;
+ UBool result;
+ const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
+ const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
+ const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
+ status = U_ZERO_ERROR;
+ utext_openUTF8(&text1, str_abcccd, -1, &status);
+ utext_openUTF8(&text2, str_abcccxd, -1, &status);
+
+ utext_openUTF8(&patternText, str_abcd, -1, &status);
+ re = uregex_openUText(&patternText, 0, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* Operation before doing a setText should fail... */
+ status = U_ZERO_ERROR;
+ uregex_lookingAt(re, 0, &status);
+ TEST_ASSERT( status== U_REGEX_INVALID_STATE);
+
+ status = U_ZERO_ERROR;
+ uregex_setUText(re, &text1, &status);
+ result = uregex_lookingAt(re, 0, &status);
+ TEST_ASSERT(result == TRUE);
+ TEST_ASSERT_SUCCESS(status);
+
+ status = U_ZERO_ERROR;
+ uregex_setUText(re, &text2, &status);
+ result = uregex_lookingAt(re, 0, &status);
+ TEST_ASSERT(result == FALSE);
+ TEST_ASSERT_SUCCESS(status);
+
+ status = U_ZERO_ERROR;
+ uregex_setUText(re, &text1, &status);
+ result = uregex_lookingAt(re, 0, &status);
+ TEST_ASSERT(result == TRUE);
+ TEST_ASSERT_SUCCESS(status);
+
+ uregex_close(re);
+ utext_close(&text1);
+ utext_close(&text2);
+ }
+
+
+ /*
+ * getText() and getUText()
+ */
+ {
+ UText text1 = UTEXT_INITIALIZER;
+ UText text2 = UTEXT_INITIALIZER;
+ UChar text2Chars[20];
+ UText *resultText;
+ const UChar *result;
+ int32_t textLength;
+ const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
+ const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
+ const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
+
+
+ status = U_ZERO_ERROR;
+ utext_openUTF8(&text1, str_abcccd, -1, &status);
+ u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
+ utext_openUChars(&text2, text2Chars, -1, &status);
+
+ utext_openUTF8(&patternText, str_abcd, -1, &status);
+ re = uregex_openUText(&patternText, 0, NULL, &status);
+
+ /* First set a UText */
+ uregex_setUText(re, &text1, &status);
+ resultText = uregex_getUText(re, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(resultText != &text1);
+ utext_setNativeIndex(resultText, 0);
+ utext_setNativeIndex(&text1, 0);
+ TEST_ASSERT(testUTextEqual(resultText, &text1));
+ utext_close(resultText);
+
+ result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
+ (void)result; /* Suppress set but not used warning. */
+ TEST_ASSERT(textLength == -1 || textLength == 6);
+ resultText = uregex_getUText(re, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(resultText != &text1);
+ utext_setNativeIndex(resultText, 0);
+ utext_setNativeIndex(&text1, 0);
+ TEST_ASSERT(testUTextEqual(resultText, &text1));
+ utext_close(resultText);
+
+ /* Then set a UChar * */
+ uregex_setText(re, text2Chars, 7, &status);
+ resultText = uregex_getUText(re, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+ utext_setNativeIndex(resultText, 0);
+ utext_setNativeIndex(&text2, 0);
+ TEST_ASSERT(testUTextEqual(resultText, &text2));
+ utext_close(resultText);
+ result = uregex_getText(re, &textLength, &status);
+ TEST_ASSERT(textLength == 7);
+
+ uregex_close(re);
+ utext_close(&text1);
+ utext_close(&text2);
+ }
+
+ /*
+ * matches()
+ */
+ {
+ UText text1 = UTEXT_INITIALIZER;
+ UBool result;
+ UText nullText = UTEXT_INITIALIZER;
+ const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
+ const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
+
+ status = U_ZERO_ERROR;
+ utext_openUTF8(&text1, str_abcccde, -1, &status);
+ utext_openUTF8(&patternText, str_abcd, -1, &status);
+ re = uregex_openUText(&patternText, 0, NULL, &status);
+
+ uregex_setUText(re, &text1, &status);
+ result = uregex_matches(re, 0, &status);
+ TEST_ASSERT(result == FALSE);
+ TEST_ASSERT_SUCCESS(status);
+ uregex_close(re);
+
+ status = U_ZERO_ERROR;
+ re = uregex_openC(".?", 0, NULL, &status);
+ uregex_setUText(re, &text1, &status);
+ result = uregex_matches(re, 7, &status);
+ TEST_ASSERT(result == TRUE);
+ TEST_ASSERT_SUCCESS(status);
+
+ status = U_ZERO_ERROR;
+ utext_openUTF8(&nullText, "", -1, &status);
+ uregex_setUText(re, &nullText, &status);
+ TEST_ASSERT_SUCCESS(status);
+ result = uregex_matches(re, 0, &status);
+ TEST_ASSERT(result == TRUE);
+ TEST_ASSERT_SUCCESS(status);
+
+ uregex_close(re);
+ utext_close(&text1);
+ utext_close(&nullText);
+ }
+
+
+ /*
+ * lookingAt() Used in setText test.
+ */
+
+
+ /*
+ * find(), findNext, start, end, reset
+ */
+ {
+ UChar text1[50];
+ UBool result;
+ u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
+ status = U_ZERO_ERROR;
+ re = uregex_openC("rx", 0, NULL, &status);
+
+ uregex_setText(re, text1, -1, &status);
+ result = uregex_find(re, 0, &status);
+ TEST_ASSERT(result == TRUE);
+ TEST_ASSERT(uregex_start(re, 0, &status) == 3);
+ TEST_ASSERT(uregex_end(re, 0, &status) == 5);
+ TEST_ASSERT_SUCCESS(status);
+
+ result = uregex_find(re, 9, &status);
+ TEST_ASSERT(result == TRUE);
+ TEST_ASSERT(uregex_start(re, 0, &status) == 11);
+ TEST_ASSERT(uregex_end(re, 0, &status) == 13);
+ TEST_ASSERT_SUCCESS(status);
+
+ result = uregex_find(re, 14, &status);
+ TEST_ASSERT(result == FALSE);
+ TEST_ASSERT_SUCCESS(status);
+
+ status = U_ZERO_ERROR;
+ uregex_reset(re, 0, &status);
+
+ result = uregex_findNext(re, &status);
+ TEST_ASSERT(result == TRUE);
+ TEST_ASSERT(uregex_start(re, 0, &status) == 3);
+ TEST_ASSERT(uregex_end(re, 0, &status) == 5);
+ TEST_ASSERT_SUCCESS(status);
+
+ result = uregex_findNext(re, &status);
+ TEST_ASSERT(result == TRUE);
+ TEST_ASSERT(uregex_start(re, 0, &status) == 6);
+ TEST_ASSERT(uregex_end(re, 0, &status) == 8);
+ TEST_ASSERT_SUCCESS(status);
+
+ status = U_ZERO_ERROR;
+ uregex_reset(re, 12, &status);
+
+ result = uregex_findNext(re, &status);
+ TEST_ASSERT(result == TRUE);
+ TEST_ASSERT(uregex_start(re, 0, &status) == 13);
+ TEST_ASSERT(uregex_end(re, 0, &status) == 15);
+ TEST_ASSERT_SUCCESS(status);
+
+ result = uregex_findNext(re, &status);
+ TEST_ASSERT(result == FALSE);
+ TEST_ASSERT_SUCCESS(status);
+
+ uregex_close(re);
+ }
+
+ /*
+ * groupUText()
+ */
+ {
+ UChar text1[80];
+ UText *actual;
+ UBool result;
+ int64_t groupLen = 0;
+ UChar groupBuf[20];
+
+ u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
+
+ status = U_ZERO_ERROR;
+ re = uregex_openC("abc(.*?)def", 0, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ uregex_setText(re, text1, -1, &status);
+ result = uregex_find(re, 0, &status);
+ TEST_ASSERT(result==TRUE);
+
+ /* Capture Group 0 with shallow clone API. Should succeed. */
+ status = U_ZERO_ERROR;
+ actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
+ TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
+ utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
+
+ TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
+ utext_close(actual);
+
+ /* Capture group #1. Should succeed. */
+ status = U_ZERO_ERROR;
+
+ actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
+ /* (within the string text1) */
+ TEST_ASSERT(10 == groupLen); /* length of " interior " */
+ utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
+ TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
+
+ utext_close(actual);
+
+ /* Capture group out of range. Error. */
+ status = U_ZERO_ERROR;
+ actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
+ TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
+ utext_close(actual);
+
+ uregex_close(re);
+ }
+
+ /*
+ * replaceFirst()
+ */
+ {
+ UChar text1[80];
+ UChar text2[80];
+ UText replText = UTEXT_INITIALIZER;
+ UText *result;
+ const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
+ const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
+ const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
+ 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
+ const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
+ const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
+ status = U_ZERO_ERROR;
+ u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
+ u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
+ utext_openUTF8(&replText, str_1x, -1, &status);
+
+ re = uregex_openC("x(.*?)x", 0, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* Normal case, with match */
+ uregex_setText(re, text1, -1, &status);
+ result = uregex_replaceFirstUText(re, &replText, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT_UTEXT(str_Replxxx, result);
+ utext_close(result);
+
+ /* No match. Text should copy to output with no changes. */
+ uregex_setText(re, text2, -1, &status);
+ result = uregex_replaceFirstUText(re, &replText, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT_UTEXT(str_Nomatchhere, result);
+ utext_close(result);
+
+ /* Unicode escapes */
+ uregex_setText(re, text1, -1, &status);
+ utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
+ result = uregex_replaceFirstUText(re, &replText, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
+ utext_close(result);
+
+ uregex_close(re);
+ utext_close(&replText);
+ }
+
+
+ /*
+ * replaceAll()
+ */
+ {
+ UChar text1[80];
+ UChar text2[80];
+ UText replText = UTEXT_INITIALIZER;
+ UText *result;
+ const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
+ const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
+ const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
+ status = U_ZERO_ERROR;
+ u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
+ u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
+ utext_openUTF8(&replText, str_1, -1, &status);
+
+ re = uregex_openC("x(.*?)x", 0, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* Normal case, with match */
+ uregex_setText(re, text1, -1, &status);
+ result = uregex_replaceAllUText(re, &replText, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT_UTEXT(str_Replaceaa1, result);
+ utext_close(result);
+
+ /* No match. Text should copy to output with no changes. */
+ uregex_setText(re, text2, -1, &status);
+ result = uregex_replaceAllUText(re, &replText, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT_UTEXT(str_Nomatchhere, result);
+ utext_close(result);
+
+ uregex_close(re);
+ utext_close(&replText);
+ }
+
+
+ /*
+ * appendReplacement()
+ */
+ {
+ UChar text[100];
+ UChar repl[100];
+ UChar buf[100];
+ UChar *bufPtr;
+ int32_t bufCap;
+
+ status = U_ZERO_ERROR;
+ re = uregex_openC(".*", 0, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
+ u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
+ uregex_setText(re, text, -1, &status);
+
+ /* match covers whole target string */
+ uregex_find(re, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+ bufPtr = buf;
+ bufCap = UPRV_LENGTHOF(buf);
+ uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT_STRING("some other", buf, TRUE);
+
+ /* Match has \u \U escapes */
+ uregex_find(re, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+ bufPtr = buf;
+ bufCap = UPRV_LENGTHOF(buf);
+ u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
+ uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
+
+ uregex_close(re);
+ }
+
+
+ /*
+ * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
+ */
+
+ /*
+ * splitUText()
+ */
+ {
+ UChar textToSplit[80];
+ UChar text2[80];
+ UText *fields[10];
+ int32_t numFields;
+ int32_t i;
+
+ u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
+ u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
+
+ status = U_ZERO_ERROR;
+ re = uregex_openC(":", 0, NULL, &status);
+
+
+ /* Simple split */
+
+ uregex_setText(re, textToSplit, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if (U_SUCCESS(status)) {
+ memset(fields, 0, sizeof(fields));
+ numFields = uregex_splitUText(re, fields, 10, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
+ const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
+ const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
+ TEST_ASSERT(numFields == 3);
+ TEST_ASSERT_UTEXT(str_first, fields[0]);
+ TEST_ASSERT_UTEXT(str_second, fields[1]);
+ TEST_ASSERT_UTEXT(str_third, fields[2]);
+ TEST_ASSERT(fields[3] == NULL);
+ }
+ for(i = 0; i < numFields; i++) {
+ utext_close(fields[i]);
+ }
+ }
+
+ uregex_close(re);
+
+
+ /* Split with too few output strings available */
+ status = U_ZERO_ERROR;
+ re = uregex_openC(":", 0, NULL, &status);
+ uregex_setText(re, textToSplit, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ fields[0] = NULL;
+ fields[1] = NULL;
+ fields[2] = &patternText;
+ numFields = uregex_splitUText(re, fields, 2, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
+ const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
+ TEST_ASSERT(numFields == 2);
+ TEST_ASSERT_UTEXT(str_first, fields[0]);
+ TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
+ TEST_ASSERT(fields[2] == &patternText);
+ }
+ for(i = 0; i < numFields; i++) {
+ utext_close(fields[i]);
+ }
+ }
+
+ uregex_close(re);
+ }
+
+ /* splitUText(), part 2. Patterns with capture groups. The capture group text
+ * comes out as additional fields. */
+ {
+ UChar textToSplit[80];
+ UText *fields[10];
+ int32_t numFields;
+ int32_t i;
+
+ u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
+
+ status = U_ZERO_ERROR;
+ re = uregex_openC("<(.*?)>", 0, NULL, &status);
+
+ uregex_setText(re, textToSplit, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ memset(fields, 0, sizeof(fields));
+ numFields = uregex_splitUText(re, fields, 10, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
+ const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
+ const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
+ const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
+ const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
+
+ TEST_ASSERT(numFields == 5);
+ TEST_ASSERT_UTEXT(str_first, fields[0]);
+ TEST_ASSERT_UTEXT(str_taga, fields[1]);
+ TEST_ASSERT_UTEXT(str_second, fields[2]);
+ TEST_ASSERT_UTEXT(str_tagb, fields[3]);
+ TEST_ASSERT_UTEXT(str_third, fields[4]);
+ TEST_ASSERT(fields[5] == NULL);
+ }
+ for(i = 0; i < numFields; i++) {
+ utext_close(fields[i]);
+ }
+ }
+
+ /* Split with too few output strings available (2) */
+ status = U_ZERO_ERROR;
+ fields[0] = NULL;
+ fields[1] = NULL;
+ fields[2] = &patternText;
+ numFields = uregex_splitUText(re, fields, 2, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
+ const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
+ TEST_ASSERT(numFields == 2);
+ TEST_ASSERT_UTEXT(str_first, fields[0]);
+ TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
+ TEST_ASSERT(fields[2] == &patternText);
+ }
+ for(i = 0; i < numFields; i++) {
+ utext_close(fields[i]);
+ }
+
+
+ /* Split with too few output strings available (3) */
+ status = U_ZERO_ERROR;
+ fields[0] = NULL;
+ fields[1] = NULL;
+ fields[2] = NULL;
+ fields[3] = &patternText;
+ numFields = uregex_splitUText(re, fields, 3, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
+ const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
+ const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
+ TEST_ASSERT(numFields == 3);
+ TEST_ASSERT_UTEXT(str_first, fields[0]);
+ TEST_ASSERT_UTEXT(str_taga, fields[1]);
+ TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
+ TEST_ASSERT(fields[3] == &patternText);
+ }
+ for(i = 0; i < numFields; i++) {
+ utext_close(fields[i]);
+ }
+
+ /* Split with just enough output strings available (5) */
+ status = U_ZERO_ERROR;
+ fields[0] = NULL;
+ fields[1] = NULL;
+ fields[2] = NULL;
+ fields[3] = NULL;
+ fields[4] = NULL;
+ fields[5] = &patternText;
+ numFields = uregex_splitUText(re, fields, 5, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
+ const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
+ const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
+ const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
+ const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
+
+ TEST_ASSERT(numFields == 5);
+ TEST_ASSERT_UTEXT(str_first, fields[0]);
+ TEST_ASSERT_UTEXT(str_taga, fields[1]);
+ TEST_ASSERT_UTEXT(str_second, fields[2]);
+ TEST_ASSERT_UTEXT(str_tagb, fields[3]);
+ TEST_ASSERT_UTEXT(str_third, fields[4]);
+ TEST_ASSERT(fields[5] == &patternText);
+ }
+ for(i = 0; i < numFields; i++) {
+ utext_close(fields[i]);
+ }
+
+ /* Split, end of text is a field delimiter. */
+ status = U_ZERO_ERROR;
+ uregex_setText(re, textToSplit, (int32_t)strlen("first <tag-a> second<tag-b>"), &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ memset(fields, 0, sizeof(fields));
+ fields[9] = &patternText;
+ numFields = uregex_splitUText(re, fields, 9, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* The TEST_ASSERT_SUCCESS call above should change too... */
+ if(U_SUCCESS(status)) {
+ const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
+ const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
+ const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
+ const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
+ const char str_empty[] = { 0x00 };
+
+ TEST_ASSERT(numFields == 5);
+ TEST_ASSERT_UTEXT(str_first, fields[0]);
+ TEST_ASSERT_UTEXT(str_taga, fields[1]);
+ TEST_ASSERT_UTEXT(str_second, fields[2]);
+ TEST_ASSERT_UTEXT(str_tagb, fields[3]);
+ TEST_ASSERT_UTEXT(str_empty, fields[4]);
+ TEST_ASSERT(fields[5] == NULL);
+ TEST_ASSERT(fields[8] == NULL);
+ TEST_ASSERT(fields[9] == &patternText);
+ }
+ for(i = 0; i < numFields; i++) {
+ utext_close(fields[i]);
+ }
+ }
+
+ uregex_close(re);
+ }
+ utext_close(&patternText);
+}
+
+
+static void TestRefreshInput(void) {
+ /*
+ * RefreshInput changes out the input of a URegularExpression without
+ * changing anything else in the match state. Used with Java JNI,
+ * when Java moves the underlying string storage. This test
+ * runs a find() loop, moving the text after the first match.
+ * The right number of matches should still be found.
+ */
+ UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
+ UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
+ UErrorCode status = U_ZERO_ERROR;
+ URegularExpression *re;
+ UText ut1 = UTEXT_INITIALIZER;
+ UText ut2 = UTEXT_INITIALIZER;
+
+ re = uregex_openC("[ABC]", 0, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ utext_openUChars(&ut1, testStr, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+ uregex_setUText(re, &ut1, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* Find the first match "A" in the original string */
+ TEST_ASSERT(uregex_findNext(re, &status));
+ TEST_ASSERT(uregex_start(re, 0, &status) == 0);
+
+ /* Move the string, kill the original string. */
+ u_strcpy(movedStr, testStr);
+ u_memset(testStr, 0, u_strlen(testStr));
+ utext_openUChars(&ut2, movedStr, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+ uregex_refreshUText(re, &ut2, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ /* Find the following two matches, now working in the moved string. */
+ TEST_ASSERT(uregex_findNext(re, &status));
+ TEST_ASSERT(uregex_start(re, 0, &status) == 2);
+ TEST_ASSERT(uregex_findNext(re, &status));
+ TEST_ASSERT(uregex_start(re, 0, &status) == 4);
+ TEST_ASSERT(FALSE == uregex_findNext(re, &status));
+
+ uregex_close(re);
+}
+
+
+static void TestBug8421(void) {
+ /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
+ * was failing.
+ */
+ URegularExpression *re;
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t limit = -1;
+
+ re = uregex_openC("abc", 0, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ limit = uregex_getTimeLimit(re, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(limit == 0);
+
+ uregex_setTimeLimit(re, 100, &status);
+ TEST_ASSERT_SUCCESS(status);
+ limit = uregex_getTimeLimit(re, &status);
+ TEST_ASSERT_SUCCESS(status);
+ TEST_ASSERT(limit == 100);
+
+ uregex_close(re);
+}
+
+static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
+ // suppress compiler warnings about unused variables
+ (void)context;
+ (void)matchIndex;
+ return FALSE;
+}
+
+static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
+ // suppress compiler warnings about unused variables
+ (void)context;
+ (void)steps;
+ return FALSE;
+}
+
+static void TestBug10815() {
+ /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
+ * when the callback function specified by uregex_setMatchCallback() returns FALSE
+ */
+ URegularExpression *re;
+ UErrorCode status = U_ZERO_ERROR;
+ UChar text[100];
+
+
+ // findNext() with a find progress callback function.
+
+ re = uregex_openC(".z", 0, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
+ uregex_setText(re, text, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ uregex_findNext(re, &status);
+ TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
+
+ uregex_close(re);
+
+ // findNext() with a match progress callback function.
+
+ status = U_ZERO_ERROR;
+ re = uregex_openC("((xxx)*)*y", 0, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ // Pattern + this text gives an exponential time match. Without the callback to stop the match,
+ // it will appear to be stuck in a (near) infinite loop.
+ u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
+ uregex_setText(re, text, -1, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ uregex_setMatchCallback(re, MatchCallback, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+
+ uregex_findNext(re, &status);
+ TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
+
+ uregex_close(re);
+}
+
+static const UChar startLinePattern[] = { 0x5E, 0x78, 0 }; // "^x"
+
+static void TestMatchStartLineWithEmptyText() {
+ UErrorCode status = U_ZERO_ERROR;
+ UText* ut = utext_openUChars(NULL, NULL, 0, &status);
+ TEST_ASSERT_SUCCESS(status);
+ if (U_SUCCESS(status)) {
+ URegularExpression *re = uregex_open(startLinePattern, -1, 0, NULL, &status);
+ TEST_ASSERT_SUCCESS(status);
+ if (U_SUCCESS(status)) {
+ uregex_setUText(re, ut, &status);
+ TEST_ASSERT(U_SUCCESS(status));
+ if (U_SUCCESS(status)) {
+ UBool found = uregex_findNext(re, &status);
+ TEST_ASSERT(U_SUCCESS(status) && !found);
+ }
+ uregex_close(re);
+ }
+ utext_close(ut);
+ }
+}
+