X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..9f1b115531acc2f3640893f76e8bcf6680033062:/icuSources/test/cintltst/reapits.c?ds=sidebyside diff --git a/icuSources/test/cintltst/reapits.c b/icuSources/test/cintltst/reapits.c index 29b53c11..e00a404b 100644 --- a/icuSources/test/cintltst/reapits.c +++ b/icuSources/test/cintltst/reapits.c @@ -1,6 +1,8 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /******************************************************************** * COPYRIGHT: - * Copyright (c) 2004-2010, International Business Machines Corporation and + * Copyright (c) 2004-2015, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************** @@ -28,12 +30,13 @@ #include "unicode/ustring.h" #include "unicode/utext.h" #include "cintltst.h" +#include "cmemory.h" #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ -log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} +log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ -log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}} +log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}} /* * TEST_SETUP and TEST_TEARDOWN @@ -90,12 +93,30 @@ static void test_assert_string(const char *expected, const UChar *actual, UBool #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__) +static UBool equals_utf8_utext(const char *utf8, UText *utext) { + int32_t u8i = 0; + UChar32 u8c = 0; + UChar32 utc = 0; + UBool stringsEqual = TRUE; + utext_setNativeIndex(utext, 0); + for (;;) { + U8_NEXT_UNSAFE(utf8, u8i, u8c); + utc = utext_next32(utext); + if (u8c == 0 && utc == U_SENTINEL) { + break; + } + if (u8c != utc || u8c == 0) { + stringsEqual = FALSE; + break; + } + } + return stringsEqual; +} + + static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) { - UErrorCode status = U_ZERO_ERROR; - UText expectedText = UTEXT_INITIALIZER; - utext_openUTF8(&expectedText, expected, -1, &status); utext_setNativeIndex(actual, 0); - if (utext_compare(&expectedText, -1, actual, -1) != 0) { + if (!equals_utf8_utext(expected, actual)) { UChar32 c; log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected); c = utext_next32From(actual, 0); @@ -109,16 +130,39 @@ static void test_assert_utext(const char *expected, UText *actual, const char *f } log_err("\"\n"); } - utext_close(&expectedText); } +/* + * TEST_ASSERT_UTEXT(const char *expected, const UText *actual) + * Note: Expected is a UTF-8 encoded string, _not_ the system code page. + */ #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__) +static UBool testUTextEqual(UText *uta, UText *utb) { + UChar32 ca = 0; + UChar32 cb = 0; + utext_setNativeIndex(uta, 0); + utext_setNativeIndex(utb, 0); + do { + ca = utext_next32(uta); + cb = utext_next32(utb); + if (ca != cb) { + break; + } + } while (ca != U_SENTINEL); + return ca == cb; +} + + static void TestRegexCAPI(void); static void TestBug4315(void); static void TestUTextAPI(void); +static void TestRefreshInput(void); +static void TestBug8421(void); +static void TestBug10815(void); +static void TestMatchStartLineWithEmptyText(void); void addURegexTest(TestNode** root); @@ -127,6 +171,10 @@ void addURegexTest(TestNode** root) addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); addTest(root, &TestBug4315, "regex/TestBug4315"); addTest(root, &TestUTextAPI, "regex/TestUTextAPI"); + addTest(root, &TestRefreshInput, "regex/TestRefreshInput"); + addTest(root, &TestBug8421, "regex/TestBug8421"); + addTest(root, &TestBug10815, "regex/TestBug10815"); + addTest(root, &TestMatchStartLineWithEmptyText, "regex/TestMatchStartLineWithEmptyText"); } /* @@ -163,7 +211,7 @@ static void TestRegexCAPI(void) { memset(&minus1, -1, sizeof(minus1)); /* Mimimalist open/close */ - u_uastrncpy(pat, "abc*", sizeof(pat)/2); + u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); re = uregex_open(pat, -1, 0, 0, &status); if (U_FAILURE(status)) { log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); @@ -174,7 +222,7 @@ static void TestRegexCAPI(void) { /* Open with all flag values set */ status = U_ZERO_ERROR; re = uregex_open(pat, -1, - UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, + UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL, 0, &status); TEST_ASSERT_SUCCESS(status); uregex_close(re); @@ -187,7 +235,7 @@ static void TestRegexCAPI(void) { /* Open with an unimplemented flag */ status = U_ZERO_ERROR; - re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status); + re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status); TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED); uregex_close(re); @@ -215,7 +263,7 @@ static void TestRegexCAPI(void) { /* The TEST_ASSERT_SUCCESS above should change too... */ if(U_SUCCESS(status)) { - u_uastrncpy(pat, "abc*", sizeof(pat)/2); + u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat)); TEST_ASSERT(u_strcmp(pat, p) == 0); TEST_ASSERT(len==(int32_t)strlen("abc*")); } @@ -255,8 +303,8 @@ static void TestRegexCAPI(void) { TEST_ASSERT_SUCCESS(status); TEST_ASSERT(clone3 != NULL); - u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); - u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); + u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); + u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); status = U_ZERO_ERROR; uregex_setText(clone1, testString1, -1, &status); @@ -287,7 +335,7 @@ static void TestRegexCAPI(void) { { const UChar *resultPat; int32_t resultLen; - u_uastrncpy(pat, "hello", sizeof(pat)/2); + u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); status = U_ZERO_ERROR; re = uregex_open(pat, -1, 0, NULL, &status); resultPat = uregex_pattern(re, &resultLen, &status); @@ -353,10 +401,10 @@ static void TestRegexCAPI(void) { UChar text2[50]; UBool result; - u_uastrncpy(text1, "abcccd", sizeof(text1)/2); - u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); + u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); + u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); status = U_ZERO_ERROR; - u_uastrncpy(pat, "abc*d", sizeof(pat)/2); + u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); re = uregex_open(pat, -1, 0, NULL, &status); TEST_ASSERT_SUCCESS(status); @@ -408,10 +456,10 @@ static void TestRegexCAPI(void) { const UChar *result; int32_t textLength; - u_uastrncpy(text1, "abcccd", sizeof(text1)/2); - u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); + u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1)); + u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2)); status = U_ZERO_ERROR; - u_uastrncpy(pat, "abc*d", sizeof(pat)/2); + u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); re = uregex_open(pat, -1, 0, NULL, &status); uregex_setText(re, text1, -1, &status); @@ -445,9 +493,9 @@ static void TestRegexCAPI(void) { int len; UChar nullString[] = {0,0,0}; - u_uastrncpy(text1, "abcccde", sizeof(text1)/2); + u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1)); status = U_ZERO_ERROR; - u_uastrncpy(pat, "abc*d", sizeof(pat)/2); + u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat)); re = uregex_open(pat, -1, 0, NULL, &status); uregex_setText(re, text1, -1, &status); @@ -497,7 +545,7 @@ static void TestRegexCAPI(void) { { UChar text1[50]; UBool result; - u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); + u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); status = U_ZERO_ERROR; re = uregex_openC("rx", 0, NULL, &status); @@ -580,7 +628,7 @@ static void TestRegexCAPI(void) { UChar buf[80]; UBool result; int32_t resultSz; - u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); + u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1)); status = U_ZERO_ERROR; re = uregex_openC("abc(.*?)def", 0, NULL, &status); @@ -593,21 +641,21 @@ static void TestRegexCAPI(void) { /* Capture Group 0, the full match. Should succeed. */ status = U_ZERO_ERROR; - resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); + resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("abc interior def", buf, TRUE); TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); /* Capture group #1. Should succeed. */ status = U_ZERO_ERROR; - resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); + resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING(" interior ", buf, TRUE); TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); /* Capture group out of range. Error. */ status = U_ZERO_ERROR; - uregex_group(re, 2, buf, sizeof(buf)/2, &status); + uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status); TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); /* NULL buffer, pure pre-flight */ @@ -651,7 +699,7 @@ static void TestRegexCAPI(void) { TEST_ASSERT(uregex_regionStart(re, &status) == 3); TEST_ASSERT(uregex_regionEnd(re, &status) == 6); TEST_ASSERT(uregex_findNext(re, &status)); - TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3) + TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3) TEST_ASSERT_STRING("345", resultString, TRUE); TEST_TEARDOWN; @@ -775,9 +823,9 @@ static void TestRegexCAPI(void) { UChar replText[80]; UChar buf[80]; int32_t resultSz; - u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); - u_uastrncpy(replText, "<$1>", sizeof(replText)/2); + u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); + u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); + u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); status = U_ZERO_ERROR; re = uregex_openC("x(.*?)x", 0, NULL, &status); @@ -785,7 +833,7 @@ static void TestRegexCAPI(void) { /* Normal case, with match */ uregex_setText(re, text1, -1, &status); - resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); + resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("Replace x1x x...x.", buf, TRUE); TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); @@ -793,7 +841,7 @@ static void TestRegexCAPI(void) { /* No match. Text should copy to output with no changes. */ status = U_ZERO_ERROR; uregex_setText(re, text2, -1, &status); - resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); + resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("No match here.", buf, TRUE); TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); @@ -855,10 +903,10 @@ static void TestRegexCAPI(void) { int32_t expectedResultSize2; int32_t i; - u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); - u_uastrncpy(replText, "<$1>", sizeof(replText)/2); - u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2); + u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); + u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); + u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText)); + u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2)); expectedResultSize = strlen(expectedResult); expectedResultSize2 = strlen(expectedResult2); @@ -868,7 +916,7 @@ static void TestRegexCAPI(void) { /* Normal case, with match */ uregex_setText(re, text1, -1, &status); - resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); + resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING(expectedResult, buf, TRUE); TEST_ASSERT(resultSize == expectedResultSize); @@ -876,7 +924,7 @@ static void TestRegexCAPI(void) { /* No match. Text should copy to output with no changes. */ status = U_ZERO_ERROR; uregex_setText(re, text2, -1, &status); - resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); + resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("No match here.", buf, TRUE); TEST_ASSERT(resultSize == u_strlen(text2)); @@ -960,15 +1008,15 @@ static void TestRegexCAPI(void) { re = uregex_openC(".*", 0, 0, &status); TEST_ASSERT_SUCCESS(status); - u_uastrncpy(text, "whatever", sizeof(text)/2); - u_uastrncpy(repl, "some other", sizeof(repl)/2); + u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); + u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); uregex_setText(re, text, -1, &status); /* match covers whole target string */ uregex_find(re, 0, &status); TEST_ASSERT_SUCCESS(status); bufPtr = buf; - bufCap = sizeof(buf) / 2; + bufCap = UPRV_LENGTHOF(buf); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("some other", buf, TRUE); @@ -977,8 +1025,8 @@ static void TestRegexCAPI(void) { uregex_find(re, 0, &status); TEST_ASSERT_SUCCESS(status); bufPtr = buf; - bufCap = sizeof(buf) / 2; - u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); + bufCap = UPRV_LENGTHOF(buf); + u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl)); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); @@ -1013,8 +1061,8 @@ static void TestRegexCAPI(void) { int32_t spaceNeeded; int32_t sz; - u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); + u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit)); + u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); status = U_ZERO_ERROR; re = uregex_openC(":", 0, NULL, &status); @@ -1029,7 +1077,7 @@ static void TestRegexCAPI(void) { if (U_SUCCESS(status)) { memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); + uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1061,7 +1109,7 @@ static void TestRegexCAPI(void) { if(U_SUCCESS(status)) { memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); + uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1119,7 +1167,7 @@ static void TestRegexCAPI(void) { int32_t spaceNeeded; int32_t sz; - u_uastrncpy(textToSplit, "first second third", sizeof(textToSplit)/2); + u_uastrncpy(textToSplit, "first second third", UPRV_LENGTHOF(textToSplit)); status = U_ZERO_ERROR; re = uregex_openC("<(.*?)>", 0, NULL, &status); @@ -1131,7 +1179,7 @@ static void TestRegexCAPI(void) { if(U_SUCCESS(status)) { memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); + uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1152,7 +1200,7 @@ static void TestRegexCAPI(void) { status = U_ZERO_ERROR; memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); + uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1170,7 +1218,7 @@ static void TestRegexCAPI(void) { status = U_ZERO_ERROR; memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status); + uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1189,7 +1237,7 @@ static void TestRegexCAPI(void) { status = U_ZERO_ERROR; memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status); + uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ @@ -1216,20 +1264,21 @@ static void TestRegexCAPI(void) { if(U_SUCCESS(status)) { memset(fields, -1, sizeof(fields)); numFields = - uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status); + uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status); TEST_ASSERT_SUCCESS(status); /* The TEST_ASSERT_SUCCESS call above should change too... */ if(U_SUCCESS(status)) { - TEST_ASSERT(numFields == 4); + TEST_ASSERT(numFields == 5); TEST_ASSERT_STRING("first ", fields[0], TRUE); TEST_ASSERT_STRING("tag-a", fields[1], TRUE); TEST_ASSERT_STRING(" second", fields[2], TRUE); TEST_ASSERT_STRING("tag-b", fields[3], TRUE); - TEST_ASSERT(fields[4] == NULL); + TEST_ASSERT_STRING("", fields[4], TRUE); + TEST_ASSERT(fields[5] == NULL); TEST_ASSERT(fields[8] == NULL); TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); - spaceNeeded = strlen("first .tag-a. second.tag-b."); /* "." at NUL positions */ + spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */ TEST_ASSERT(spaceNeeded == requiredCapacity); } } @@ -1422,8 +1471,8 @@ static void TestUTextAPI(void) { TEST_ASSERT_SUCCESS(status); TEST_ASSERT(clone3 != NULL); - u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); - u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); + u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat)); + u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat)); status = U_ZERO_ERROR; uregex_setText(clone1, testString1, -1, &status); @@ -1457,7 +1506,7 @@ static void TestUTextAPI(void) { UText *resultText; const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */ const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */ - u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */ + u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */ status = U_ZERO_ERROR; utext_openUTF8(&patternText, str_hello, -1, &status); @@ -1560,7 +1609,7 @@ static void TestUTextAPI(void) { status = U_ZERO_ERROR; utext_openUTF8(&text1, str_abcccd, -1, &status); - u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2); + u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars)); utext_openUChars(&text2, text2Chars, -1, &status); utext_openUTF8(&patternText, str_abcd, -1, &status); @@ -1573,17 +1622,18 @@ static void TestUTextAPI(void) { TEST_ASSERT(resultText != &text1); utext_setNativeIndex(resultText, 0); utext_setNativeIndex(&text1, 0); - TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); + TEST_ASSERT(testUTextEqual(resultText, &text1)); utext_close(resultText); result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */ + (void)result; /* Suppress set but not used warning. */ TEST_ASSERT(textLength == -1 || textLength == 6); resultText = uregex_getUText(re, NULL, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT(resultText != &text1); utext_setNativeIndex(resultText, 0); utext_setNativeIndex(&text1, 0); - TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0); + TEST_ASSERT(testUTextEqual(resultText, &text1)); utext_close(resultText); /* Then set a UChar * */ @@ -1592,7 +1642,7 @@ static void TestUTextAPI(void) { TEST_ASSERT_SUCCESS(status); utext_setNativeIndex(resultText, 0); utext_setNativeIndex(&text2, 0); - TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0); + TEST_ASSERT(testUTextEqual(resultText, &text2)); utext_close(resultText); result = uregex_getText(re, &textLength, &status); TEST_ASSERT(textLength == 7); @@ -1655,7 +1705,7 @@ static void TestUTextAPI(void) { { UChar text1[50]; UBool result; - u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); + u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1)); status = U_ZERO_ERROR; re = uregex_openC("rx", 0, NULL, &status); @@ -1708,18 +1758,16 @@ static void TestUTextAPI(void) { } /* - * group() + * groupUText() */ { UChar text1[80]; UText *actual; UBool result; + int64_t groupLen = 0; + UChar groupBuf[20]; - const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */ - const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */ - - - u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); + u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1)); status = U_ZERO_ERROR; re = uregex_openC("abc(.*?)def", 0, NULL, &status); @@ -1729,58 +1777,38 @@ static void TestUTextAPI(void) { result = uregex_find(re, 0, &status); TEST_ASSERT(result==TRUE); - /* Capture Group 0, the full match. Should succeed. */ - status = U_ZERO_ERROR; - actual = uregex_groupUTextDeep(re, 0, NULL, &status); - TEST_ASSERT_SUCCESS(status); - TEST_ASSERT_UTEXT(str_abcinteriordef, actual); - utext_close(actual); - /* Capture Group 0 with shallow clone API. Should succeed. */ status = U_ZERO_ERROR; - { - int64_t group_len; - int32_t len16; - UErrorCode shallowStatus = U_ZERO_ERROR; - int64_t nativeIndex; - UChar *groupChars; - UText groupText = UTEXT_INITIALIZER; + actual = uregex_groupUText(re, 0, NULL, &groupLen, &status); + TEST_ASSERT_SUCCESS(status); - actual = uregex_groupUText(re, 0, NULL, &group_len, &status); - TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */ + TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */ + utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status); - nativeIndex = utext_getNativeIndex(actual); - /* Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp] */ - /* len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus); */ - len16 = group_len; - - groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1)); - utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus); - - utext_openUChars(&groupText, groupChars, len16, &shallowStatus); - - TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText); - utext_close(&groupText); - free(groupChars); - } + TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE); utext_close(actual); /* Capture group #1. Should succeed. */ status = U_ZERO_ERROR; - actual = uregex_groupUTextDeep(re, 1, NULL, &status); + + actual = uregex_groupUText(re, 1, NULL, &groupLen, &status); TEST_ASSERT_SUCCESS(status); - TEST_ASSERT_UTEXT(str_interior, actual); + TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */ + /* (within the string text1) */ + TEST_ASSERT(10 == groupLen); /* length of " interior " */ + utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status); + TEST_ASSERT_STRING(" interior ", groupBuf, TRUE); + utext_close(actual); /* Capture group out of range. Error. */ status = U_ZERO_ERROR; - actual = uregex_groupUTextDeep(re, 2, NULL, &status); + actual = uregex_groupUText(re, 2, NULL, &groupLen, &status); TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); - TEST_ASSERT(utext_nativeLength(actual) == 0); utext_close(actual); uregex_close(re); - } /* @@ -1793,12 +1821,13 @@ static void TestUTextAPI(void) { UText *result; const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace x1x x...x. */ const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ - const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */ + const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, + 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */ const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */ const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */ status = U_ZERO_ERROR; - u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); + u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); + u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); utext_openUTF8(&replText, str_1x, -1, &status); re = uregex_openC("x(.*?)x", 0, NULL, &status); @@ -1843,8 +1872,8 @@ static void TestUTextAPI(void) { const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <1> <...>. */ const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */ status = U_ZERO_ERROR; - u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); + u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1)); + u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); utext_openUTF8(&replText, str_1, -1, &status); re = uregex_openC("x(.*?)x", 0, NULL, &status); @@ -1883,15 +1912,15 @@ static void TestUTextAPI(void) { re = uregex_openC(".*", 0, 0, &status); TEST_ASSERT_SUCCESS(status); - u_uastrncpy(text, "whatever", sizeof(text)/2); - u_uastrncpy(repl, "some other", sizeof(repl)/2); + u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text)); + u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl)); uregex_setText(re, text, -1, &status); /* match covers whole target string */ uregex_find(re, 0, &status); TEST_ASSERT_SUCCESS(status); bufPtr = buf; - bufCap = sizeof(buf) / 2; + bufCap = UPRV_LENGTHOF(buf); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("some other", buf, TRUE); @@ -1900,8 +1929,8 @@ static void TestUTextAPI(void) { uregex_find(re, 0, &status); TEST_ASSERT_SUCCESS(status); bufPtr = buf; - bufCap = sizeof(buf) / 2; - u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); + bufCap = UPRV_LENGTHOF(buf); + u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl)); uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); @@ -1924,8 +1953,8 @@ static void TestUTextAPI(void) { int32_t numFields; int32_t i; - u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); - u_uastrncpy(text2, "No match here.", sizeof(text2)/2); + u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit)); + u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2)); status = U_ZERO_ERROR; re = uregex_openC(":", 0, NULL, &status); @@ -2000,7 +2029,7 @@ static void TestUTextAPI(void) { int32_t numFields; int32_t i; - u_uastrncpy(textToSplit, "first second third", sizeof(textToSplit)/2); + u_uastrncpy(textToSplit, "first second third", UPRV_LENGTHOF(textToSplit)); status = U_ZERO_ERROR; re = uregex_openC("<(.*?)>", 0, NULL, &status); @@ -2130,13 +2159,15 @@ static void TestUTextAPI(void) { const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */ const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */ const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */ + const char str_empty[] = { 0x00 }; - TEST_ASSERT(numFields == 4); + TEST_ASSERT(numFields == 5); TEST_ASSERT_UTEXT(str_first, fields[0]); TEST_ASSERT_UTEXT(str_taga, fields[1]); TEST_ASSERT_UTEXT(str_second, fields[2]); TEST_ASSERT_UTEXT(str_tagb, fields[3]); - TEST_ASSERT(fields[4] == NULL); + TEST_ASSERT_UTEXT(str_empty, fields[4]); + TEST_ASSERT(fields[5] == NULL); TEST_ASSERT(fields[8] == NULL); TEST_ASSERT(fields[9] == &patternText); } @@ -2150,4 +2181,152 @@ static void TestUTextAPI(void) { utext_close(&patternText); } + +static void TestRefreshInput(void) { + /* + * RefreshInput changes out the input of a URegularExpression without + * changing anything else in the match state. Used with Java JNI, + * when Java moves the underlying string storage. This test + * runs a find() loop, moving the text after the first match. + * The right number of matches should still be found. + */ + UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */ + UChar movedStr[] = { 0, 0, 0, 0, 0, 0}; + UErrorCode status = U_ZERO_ERROR; + URegularExpression *re; + UText ut1 = UTEXT_INITIALIZER; + UText ut2 = UTEXT_INITIALIZER; + + re = uregex_openC("[ABC]", 0, 0, &status); + TEST_ASSERT_SUCCESS(status); + + utext_openUChars(&ut1, testStr, -1, &status); + TEST_ASSERT_SUCCESS(status); + uregex_setUText(re, &ut1, &status); + TEST_ASSERT_SUCCESS(status); + + /* Find the first match "A" in the original string */ + TEST_ASSERT(uregex_findNext(re, &status)); + TEST_ASSERT(uregex_start(re, 0, &status) == 0); + + /* Move the string, kill the original string. */ + u_strcpy(movedStr, testStr); + u_memset(testStr, 0, u_strlen(testStr)); + utext_openUChars(&ut2, movedStr, -1, &status); + TEST_ASSERT_SUCCESS(status); + uregex_refreshUText(re, &ut2, &status); + TEST_ASSERT_SUCCESS(status); + + /* Find the following two matches, now working in the moved string. */ + TEST_ASSERT(uregex_findNext(re, &status)); + TEST_ASSERT(uregex_start(re, 0, &status) == 2); + TEST_ASSERT(uregex_findNext(re, &status)); + TEST_ASSERT(uregex_start(re, 0, &status) == 4); + TEST_ASSERT(FALSE == uregex_findNext(re, &status)); + + uregex_close(re); +} + + +static void TestBug8421(void) { + /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched + * was failing. + */ + URegularExpression *re; + UErrorCode status = U_ZERO_ERROR; + int32_t limit = -1; + + re = uregex_openC("abc", 0, 0, &status); + TEST_ASSERT_SUCCESS(status); + + limit = uregex_getTimeLimit(re, &status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(limit == 0); + + uregex_setTimeLimit(re, 100, &status); + TEST_ASSERT_SUCCESS(status); + limit = uregex_getTimeLimit(re, &status); + TEST_ASSERT_SUCCESS(status); + TEST_ASSERT(limit == 100); + + uregex_close(re); +} + +static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) { + return FALSE; +} + +static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) { + return FALSE; +} + +static void TestBug10815() { + /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER + * when the callback function specified by uregex_setMatchCallback() returns FALSE + */ + URegularExpression *re; + UErrorCode status = U_ZERO_ERROR; + UChar text[100]; + + + // findNext() with a find progress callback function. + + re = uregex_openC(".z", 0, 0, &status); + TEST_ASSERT_SUCCESS(status); + + u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text)); + uregex_setText(re, text, -1, &status); + TEST_ASSERT_SUCCESS(status); + + uregex_setFindProgressCallback(re, FindCallback, NULL, &status); + TEST_ASSERT_SUCCESS(status); + + uregex_findNext(re, &status); + TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); + + uregex_close(re); + + // findNext() with a match progress callback function. + + status = U_ZERO_ERROR; + re = uregex_openC("((xxx)*)*y", 0, 0, &status); + TEST_ASSERT_SUCCESS(status); + + // Pattern + this text gives an exponential time match. Without the callback to stop the match, + // it will appear to be stuck in a (near) infinite loop. + u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text)); + uregex_setText(re, text, -1, &status); + TEST_ASSERT_SUCCESS(status); + + uregex_setMatchCallback(re, MatchCallback, NULL, &status); + TEST_ASSERT_SUCCESS(status); + + uregex_findNext(re, &status); + TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER); + + uregex_close(re); +} + +static const UChar startLinePattern[] = { 0x5E, 0x78, 0 }; // "^x" + +static void TestMatchStartLineWithEmptyText() { + UErrorCode status = U_ZERO_ERROR; + UText* ut = utext_openUChars(NULL, NULL, 0, &status); + TEST_ASSERT_SUCCESS(status); + if (U_SUCCESS(status)) { + URegularExpression *re = uregex_open(startLinePattern, -1, 0, NULL, &status); + TEST_ASSERT_SUCCESS(status); + if (U_SUCCESS(status)) { + uregex_setUText(re, ut, &status); + TEST_ASSERT(U_SUCCESS(status)); + if (U_SUCCESS(status)) { + UBool found = uregex_findNext(re, &status); + TEST_ASSERT(U_SUCCESS(status) && !found); + } + uregex_close(re); + } + utext_close(ut); + } +} + #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */