X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..4388f060552cc537e71e957d32f35e9d75a61233:/icuSources/test/cintltst/usrchtst.c diff --git a/icuSources/test/cintltst/usrchtst.c b/icuSources/test/cintltst/usrchtst.c index f02d6cc3..8c88fe2f 100644 --- a/icuSources/test/cintltst/usrchtst.c +++ b/icuSources/test/cintltst/usrchtst.c @@ -1,5 +1,5 @@ /******************************************************************** - * Copyright (c) 2001-2003 International Business Machines + * Copyright (c) 2001-2011 International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************** * File usrchtst.c @@ -10,7 +10,7 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_COLLATION +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO #include "unicode/usearch.h" #include "unicode/ustring.h" @@ -19,6 +19,7 @@ #include #include "usrchdat.c" #include "unicode/ubrk.h" +#include static UBool TOCLOSE_ = TRUE; static UCollator *EN_US_; @@ -47,24 +48,21 @@ static UBreakIterator *EN_CHARACTERBREAKER_; /** * Opening all static collators and break iterators */ -static void open(void) +static void open(UErrorCode* status) { if (TOCLOSE_) { - UErrorCode status = U_ZERO_ERROR; UChar rules[1024]; int32_t rulelength = 0; + *status = U_ZERO_ERROR; - EN_US_ = ucol_open("en_US", &status); - if(status == U_FILE_ACCESS_ERROR) { - log_data_err("Is your data around?\n"); - return; - } else if(U_FAILURE(status)) { - log_err("Error opening collator\n"); + EN_US_ = ucol_open("en_US", status); + if(U_FAILURE(*status)) { + log_err_status(*status, "Error opening collator\n"); return; } - FR_FR_ = ucol_open("fr_FR", &status); - DE_ = ucol_open("de_DE", &status); - ES_ = ucol_open("es_ES", &status); + FR_FR_ = ucol_open("fr_FR", status); + DE_ = ucol_open("de_DE", status); + ES_ = ucol_open("es_ES", status); u_strcpy(rules, ucol_getRules(DE_, &rulelength)); u_unescape(EXTRACOLLATIONRULE, rules + rulelength, 1024 - rulelength); @@ -72,17 +70,17 @@ static void open(void) ucol_close(DE_); DE_ = ucol_openRules(rules, u_strlen(rules), UCOL_ON, UCOL_TERTIARY, - (UParseError *)NULL, &status); + (UParseError *)NULL, status); u_strcpy(rules, ucol_getRules(ES_, &rulelength)); u_unescape(EXTRACOLLATIONRULE, rules + rulelength, 1024 - rulelength); ucol_close(ES_); ES_ = ucol_openRules(rules, u_strlen(rules), UCOL_ON, UCOL_TERTIARY, - NULL, &status); + NULL, status); #if !UCONFIG_NO_BREAK_ITERATION - EN_WORDBREAKER_ = ubrk_open(UBRK_WORD, "en_US", NULL, 0, &status); + EN_WORDBREAKER_ = ubrk_open(UBRK_WORD, "en_US", NULL, 0, status); EN_CHARACTERBREAKER_ = ubrk_open(UBRK_CHARACTER, "en_US", NULL, 0, - &status); + status); #endif TOCLOSE_ = TRUE; } @@ -93,7 +91,12 @@ static void open(void) */ static void TestStart(void) { - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } TOCLOSE_ = FALSE; } @@ -141,20 +144,8 @@ static char *toCharString(const UChar* unichars) *temp ++ = (char)ch; } else { - char digit[5]; - int zerosize; - *temp = 0; - strcat(temp, "\\u"); - temp = temp + 2; - sprintf(digit, "%x", ch); - zerosize = 4 - strlen(digit); - while (zerosize != 0) { - *temp ++ = '0'; - zerosize --; - } - *temp = 0; - strcat(temp, digit); - temp = temp + strlen(digit); + sprintf(temp, "\\u%04x", ch); + temp += 6; /* \uxxxx */ } } *temp = 0; @@ -252,17 +243,25 @@ static void TestOpenClose(void) status = U_ZERO_ERROR; result = usearch_open(pattern, 3, text, 6, "en_US", NULL, &status); if (U_FAILURE(status) || result == NULL) { - log_err("Error: NULL break iterator is valid for opening search\n"); + log_err_status(status, "Error: NULL break iterator is valid for opening search\n"); } else { usearch_close(result); } - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } status = U_ZERO_ERROR; result = usearch_openFromCollator(pattern, 3, text, 6, EN_US_, NULL, &status); if (U_FAILURE(status) || result == NULL) { - log_err("Error: NULL break iterator is valid for opening search\n"); + if (EN_US_ == NULL) { + log_data_err("Opening collator failed.\n"); + } else { + log_err("Error: NULL break iterator is valid for opening search\n"); + } } else { usearch_close(result); @@ -274,7 +273,7 @@ static void TestOpenClose(void) result = usearch_open(pattern, 3, text, 6, "en_US", breakiter, &status); if (U_FAILURE(status) || result == NULL) { - log_err("Error: Break iterator is valid for opening search\n"); + log_err_status(status, "Error: Break iterator is valid for opening search\n"); } else { usearch_close(result); @@ -283,7 +282,11 @@ static void TestOpenClose(void) result = usearch_openFromCollator(pattern, 3, text, 6, EN_US_, breakiter, &status); if (U_FAILURE(status) || result == NULL) { - log_err("Error: Break iterator is valid for opening search\n"); + if (EN_US_ == NULL) { + log_data_err("Opening collator failed.\n"); + } else { + log_err("Error: Break iterator is valid for opening search\n"); + } } else { usearch_close(result); @@ -304,7 +307,11 @@ static void TestInitialization(void) /* simple test on the pattern ce construction */ pattern[0] = 0x41; pattern[1] = 0x42; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } result = usearch_openFromCollator(pattern, 2, text, 3, EN_US_, NULL, &status); if (U_FAILURE(status)) { @@ -330,19 +337,27 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch, const SearchData search) { int count = 0; - int matchlimit = 0; UErrorCode status = U_ZERO_ERROR; int32_t matchindex = search.offset[count]; int32_t textlength; UChar matchtext[128]; + int32_t matchlength; + int32_t nextStart; + UBool isOverlap; + + usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, search.elemCompare, &status); + if (U_FAILURE(status)) { + log_err("Error setting USEARCH_ELEMENT_COMPARISON attribute %s\n", u_errorName(status)); + return FALSE; + } if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) { log_err("Error with the initialization of match start and length\n"); } - /* start of following matches */ + /* start of next matches */ while (U_SUCCESS(status) && matchindex >= 0) { - uint32_t matchlength = search.size[count]; + matchlength = search.size[count]; usearch_next(strsrch, &status); if (matchindex != usearch_getMatchedStart(strsrch) || matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) { @@ -350,9 +365,9 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch, log_err("Text: %s\n", str); str = toCharString(usearch_getPattern(strsrch, &textlength)); log_err("Pattern: %s\n", str); - log_err("Error following match found at %d %d\n", - usearch_getMatchedStart(strsrch), - usearch_getMatchedLength(strsrch)); + log_err("Error next match found at idx %d (len:%d); expected %d (len:%d)\n", + usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch), + matchindex, matchlength); return FALSE; } count ++; @@ -362,30 +377,29 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch, memcmp(matchtext, usearch_getText(strsrch, &textlength) + matchindex, matchlength * sizeof(UChar)) != 0) { - log_err("Error getting following matched text\n"); + log_err("Error getting next matched text\n"); } matchindex = search.offset[count]; } usearch_next(strsrch, &status); - if ((uint32_t)usearch_getMatchedStart(strsrch) != USEARCH_DONE || + if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) { char *str = toCharString(usearch_getText(strsrch, &textlength)); log_err("Text: %s\n", str); str = toCharString(usearch_getPattern(strsrch, &textlength)); log_err("Pattern: %s\n", str); - log_err("Error following match found at %d %d\n", + log_err("Error next match found at %d (len:%d); expected \n", usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch)); return FALSE; } - /* start of preceding matches */ + /* start of previous matches */ count = count == 0 ? 0 : count - 1; - matchlimit = count; matchindex = search.offset[count]; while (U_SUCCESS(status) && matchindex >= 0) { - uint32_t matchlength = search.size[count]; + matchlength = search.size[count]; usearch_previous(strsrch, &status); if (matchindex != usearch_getMatchedStart(strsrch) || matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) { @@ -393,9 +407,9 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch, log_err("Text: %s\n", str); str = toCharString(usearch_getPattern(strsrch, &textlength)); log_err("Pattern: %s\n", str); - log_err("Error preceding match found at %d %d\n", - usearch_getMatchedStart(strsrch), - usearch_getMatchedLength(strsrch)); + log_err("Error previous match found at %d (len:%d); expected %d (len:%d)\n", + usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch), + matchindex, matchlength); return FALSE; } @@ -404,25 +418,124 @@ static UBool assertEqualWithUStringSearch( UStringSearch *strsrch, memcmp(matchtext, usearch_getText(strsrch, &textlength) + matchindex, matchlength * sizeof(UChar)) != 0) { - log_err("Error getting preceding matched text\n"); + log_err("Error getting previous matched text\n"); } matchindex = count > 0 ? search.offset[count - 1] : -1; count --; } usearch_previous(strsrch, &status); - if ((uint32_t)usearch_getMatchedStart(strsrch) != USEARCH_DONE || + if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) { char *str = toCharString(usearch_getText(strsrch, &textlength)); log_err("Text: %s\n", str); str = toCharString(usearch_getPattern(strsrch, &textlength)); log_err("Pattern: %s\n", str); - log_err("Error preceding match found at %d %d\n", + log_err("Error previous match found at %d (len:%d); expected \n", usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch)); return FALSE; } + + isOverlap = (usearch_getAttribute(strsrch, USEARCH_OVERLAP) == USEARCH_ON); + + /* start of following matches */ + count = 0; + matchindex = search.offset[count]; + nextStart = 0; + + while (TRUE) { + usearch_following(strsrch, nextStart, &status); + + if (matchindex < 0) { + if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected \n", + nextStart, isOverlap, + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return FALSE; + } + /* no more matches */ + break; + } + + matchlength = search.size[count]; + if (usearch_getMatchedStart(strsrch) != matchindex + || usearch_getMatchedLength(strsrch) != matchlength + || U_FAILURE(status)) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n", + nextStart, isOverlap, + usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch), + matchindex, matchlength); + return FALSE; + } + + if (isOverlap || usearch_getMatchedLength(strsrch) == 0) { + nextStart = usearch_getMatchedStart(strsrch) + 1; + } else { + nextStart = usearch_getMatchedStart(strsrch) + usearch_getMatchedLength(strsrch); + } + + count++; + matchindex = search.offset[count]; + } + + /* start of preceding matches */ + count = -1; /* last non-negative offset index, could be -1 if no match */ + while (search.offset[count + 1] >= 0) { + count++; + } + usearch_getText(strsrch, &nextStart); + + while (TRUE) { + usearch_preceding(strsrch, nextStart, &status); + + if (count < 0) { + if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected \n", + nextStart, isOverlap, + usearch_getMatchedStart(strsrch), + usearch_getMatchedLength(strsrch)); + return FALSE; + } + /* no more matches */ + break; + } + + matchindex = search.offset[count]; + matchlength = search.size[count]; + if (usearch_getMatchedStart(strsrch) != matchindex + || usearch_getMatchedLength(strsrch) != matchlength + || U_FAILURE(status)) { + char *str = toCharString(usearch_getText(strsrch, &textlength)); + log_err("Text: %s\n", str); + str = toCharString(usearch_getPattern(strsrch, &textlength)); + log_err("Pattern: %s\n", str); + log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n", + nextStart, isOverlap, + usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch), + matchindex, matchlength); + return FALSE; + } + + nextStart = matchindex; + count--; + } + + usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, &status); return TRUE; } @@ -465,28 +578,35 @@ static UBool assertCanonicalEqual(const SearchData search) UCollator *collator = getCollator(search.collator); UBreakIterator *breaker = getBreakIterator(search.breaker); UStringSearch *strsrch; + UBool result = TRUE; CHECK_BREAK_BOOL(search.breaker); u_unescape(search.text, text, 128); u_unescape(search.pattern, pattern, 32); ucol_setStrength(collator, search.strength); + ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator, - breaker, &status); + breaker, &status); usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, &status); if (U_FAILURE(status)) { log_err("Error opening string search %s\n", u_errorName(status)); - return FALSE; + result = FALSE; + goto bail; } if (!assertEqualWithUStringSearch(strsrch, search)) { ucol_setStrength(collator, UCOL_TERTIARY); usearch_close(strsrch); - return FALSE; + result = FALSE; + goto bail; } + +bail: + ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); ucol_setStrength(collator, UCOL_TERTIARY); usearch_close(strsrch); - return TRUE; + return result; } static UBool assertEqualWithAttribute(const SearchData search, @@ -528,7 +648,12 @@ static UBool assertEqualWithAttribute(const SearchData search, static void TestBasic(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (BASIC[count].text != NULL) { if (!assertEqual(BASIC[count])) { log_err("Error at test number %d\n", count); @@ -542,7 +667,11 @@ static void TestNormExact(void) { int count = 0; UErrorCode status = U_ZERO_ERROR; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); if (U_FAILURE(status)) { log_err("Error setting collation normalization %s\n", @@ -575,7 +704,12 @@ static void TestNormExact(void) static void TestStrength(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (STRENGTH[count].text != NULL) { if (!assertEqual(STRENGTH[count])) { log_err("Error at test number %d\n", count); @@ -595,7 +729,11 @@ static void TestBreakIterator(void) { CHECK_BREAK("x"); #if !UCONFIG_NO_BREAK_ITERATION - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } if (usearch_getBreakIterator(NULL) != NULL) { log_err("Expected NULL breakiterator from NULL string search\n"); } @@ -658,16 +796,16 @@ static void TestBreakIterator(void) { search = &(BREAKITERATOREXACT[count + 1]); breaker = getBreakIterator(search->breaker); usearch_setBreakIterator(strsrch, breaker, &status); - if (U_FAILURE(status) || - usearch_getBreakIterator(strsrch) != breaker) { + if (U_FAILURE(status) || usearch_getBreakIterator(strsrch) != breaker) { log_err("Error setting break iterator\n"); usearch_close(strsrch); goto ENDTESTBREAKITERATOR; } usearch_reset(strsrch); if (!assertEqualWithUStringSearch(strsrch, *search)) { - log_err("Error at test number %d\n", count); - goto ENDTESTBREAKITERATOR; + log_err("Error at test number %d\n", count); + usearch_close(strsrch); + goto ENDTESTBREAKITERATOR; } usearch_close(strsrch); count += 2; @@ -690,7 +828,11 @@ static void TestVariable(void) { int count = 0; UErrorCode status = U_ZERO_ERROR; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if (U_FAILURE(status)) { log_err("Error setting collation alternate attribute %s\n", @@ -711,7 +853,12 @@ static void TestVariable(void) static void TestOverlap(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (OVERLAP[count].text != NULL) { if (!assertEqualWithAttribute(OVERLAP[count], USEARCH_OFF, USEARCH_ON)) { @@ -734,7 +881,7 @@ static void TestOverlap(void) const SearchData *search = &(OVERLAP[count]); UCollator *collator = getCollator(search->collator); UStringSearch *strsrch; - UErrorCode status = U_ZERO_ERROR; + status = U_ZERO_ERROR; u_unescape(search->text, text, 128); u_unescape(search->pattern, pattern, 32); @@ -810,7 +957,7 @@ static void TestCollator(void) } usearch_close(strsrch); - open(); + open(&status); if (usearch_getCollator(NULL) != NULL) { log_err("Expected NULL collator from NULL string search\n"); @@ -870,7 +1017,11 @@ static void TestPattern(void) int32_t templength; UErrorCode status = U_ZERO_ERROR; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } if (usearch_getPattern(NULL, &templength) != NULL) { log_err("Error NULL string search expected returning NULL pattern\n"); } @@ -975,7 +1126,11 @@ static void TestText(void) u_unescape(TEXT[0].text, text, 128); u_unescape(TEXT[0].pattern, pattern, 32); - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } if (usearch_getText(NULL, &templength) != NULL) { log_err("Error NULL string search should return NULL text\n"); @@ -1039,7 +1194,12 @@ ENDTESTPATTERN: static void TestCompositeBoundaries(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (COMPOSITEBOUNDARIES[count].text != NULL) { log_verbose("composite %d\n", count); if (!assertEqual(COMPOSITEBOUNDARIES[count])) { @@ -1052,7 +1212,7 @@ static void TestCompositeBoundaries(void) static void TestGetSetOffset(void) { - int index = 0; + int searchDataIndex = 0; UChar pattern[32]; UChar text[128]; UErrorCode status = U_ZERO_ERROR; @@ -1060,7 +1220,11 @@ static void TestGetSetOffset(void) memset(pattern, 0, 32*sizeof(UChar)); memset(text, 0, 128*sizeof(UChar)); - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } if (usearch_getOffset(NULL) != USEARCH_DONE) { log_err("usearch_getOffset(NULL) expected USEARCH_DONE\n"); } @@ -1075,9 +1239,9 @@ static void TestGetSetOffset(void) if (U_SUCCESS(status)) { log_err("Error expecting set offset error\n"); } - while (BASIC[index].text != NULL) { + while (BASIC[searchDataIndex].text != NULL) { int count = 0; - SearchData search = BASIC[index ++]; + SearchData search = BASIC[searchDataIndex ++]; int32_t matchindex = search.offset[count]; int32_t textlength; @@ -1132,7 +1296,7 @@ static void TestGetSetOffset(void) count += 2; } usearch_next(strsrch, &status); - if ((uint32_t)usearch_getMatchedStart(strsrch) != USEARCH_DONE) { + if (usearch_getMatchedStart(strsrch) != USEARCH_DONE) { char *str = toCharString(usearch_getText(strsrch, &textlength)); log_err("Text: %s\n", str); str = toCharString(usearch_getPattern(strsrch, &textlength)); @@ -1158,7 +1322,11 @@ static void TestGetSetAttribute(void) memset(pattern, 0, 32*sizeof(UChar)); memset(text, 0, 128*sizeof(UChar)); - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } if (usearch_getAttribute(NULL, USEARCH_OVERLAP) != USEARCH_DEFAULT || usearch_getAttribute(NULL, USEARCH_CANONICAL_MATCH) != USEARCH_DEFAULT) { @@ -1242,7 +1410,11 @@ static void TestGetMatch(void) int32_t textlength; UChar matchtext[128]; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } if (usearch_getMatchedStart(NULL) != USEARCH_DONE || usearch_getMatchedLength(NULL) != USEARCH_DONE) { @@ -1329,16 +1501,20 @@ static void TestGetMatch(void) static void TestSetMatch(void) { int count = 0; - - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (MATCH[count].text != NULL) { SearchData search = MATCH[count]; int size = 0; - int index = 0; + int offsetIndex = 0; UChar text[128]; UChar pattern[32]; UStringSearch *strsrch; - UErrorCode status = U_ZERO_ERROR; + status = U_ZERO_ERROR; if (usearch_first(NULL, &status) != USEARCH_DONE || usearch_last(NULL, &status) != USEARCH_DONE) { @@ -1370,25 +1546,25 @@ static void TestSetMatch(void) log_err("Error getting last match\n"); } - while (index < size) { - if (index + 2 < size) { - if (usearch_following(strsrch, search.offset[index + 2] - 1, - &status) != search.offset[index + 2] || + while (offsetIndex < size) { + if (offsetIndex + 2 < size) { + if (usearch_following(strsrch, search.offset[offsetIndex + 2] - 1, + &status) != search.offset[offsetIndex + 2] || U_FAILURE(status)) { log_err("Error getting following match at index %d\n", - search.offset[index + 2] - 1); + search.offset[offsetIndex + 2] - 1); } } - if (index + 1 < size) { - if (usearch_preceding(strsrch, search.offset[index + 1] + - search.size[index + 1] + 1, - &status) != search.offset[index + 1] || + if (offsetIndex + 1 < size) { + if (usearch_preceding(strsrch, search.offset[offsetIndex + 1] + + search.size[offsetIndex + 1] + 1, + &status) != search.offset[offsetIndex + 1] || U_FAILURE(status)) { log_err("Error getting preceeding match at index %d\n", - search.offset[index + 1] + 1); + search.offset[offsetIndex + 1] + 1); } } - index += 2; + offsetIndex += 2; } status = U_ZERO_ERROR; if (usearch_following(strsrch, u_strlen(text), &status) != @@ -1412,7 +1588,11 @@ static void TestReset(void) UChar pattern[] = {0x73}; UStringSearch *strsrch; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } strsrch = usearch_openFromCollator(pattern, 1, text, 9, EN_US_, NULL, &status); if (U_FAILURE(status)) { @@ -1452,7 +1632,12 @@ static void TestReset(void) static void TestSupplementary(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (SUPPLEMENTARY[count].text != NULL) { if (!assertEqual(SUPPLEMENTARY[count])) { log_err("Error at test number %d\n", count); @@ -1549,10 +1734,68 @@ static void TestIgnorable(void) ucol_close(collator); } +static void TestDiacriticMatch(void) +{ + UChar pattern[128]; + UChar text[128]; + UErrorCode status = U_ZERO_ERROR; + UStringSearch *strsrch = NULL; + UCollator *coll = NULL; + uint32_t count = 0; + SearchData search; + + memset(pattern, 0, 128*sizeof(UChar)); + memset(text, 0, 128*sizeof(UChar)); + + strsrch = usearch_open(pattern, 1, text, 1, uloc_getDefault(), NULL, &status); + if (U_FAILURE(status)) { + log_err_status(status, "Error opening string search %s\n", u_errorName(status)); + return; + } + + search = DIACRITICMATCH[count]; + while (search.text != NULL) { + if (search.collator != NULL) { + coll = ucol_openFromShortString(search.collator, FALSE, NULL, &status); + } else { + /* Always use "en_US" because some of these tests fail in Danish locales. */ + coll = ucol_open("en_US"/*uloc_getDefault()*/, &status); + ucol_setStrength(coll, search.strength); + } + if (U_FAILURE(status)) { + log_err("Error opening string search collator(\"%s\") %s\n", search.collator, u_errorName(status)); + return; + } + + usearch_setCollator(strsrch, coll, &status); + if (U_FAILURE(status)) { + log_err("Error setting string search collator %s\n", u_errorName(status)); + return; + } + + u_unescape(search.text, text, 128); + u_unescape(search.pattern, pattern, 128); + usearch_setText(strsrch, text, -1, &status); + usearch_setPattern(strsrch, pattern, -1, &status); + if (!assertEqualWithUStringSearch(strsrch, search)) { + log_err("Error at test number %d\n", count); + } + ucol_close(coll); + + search = DIACRITICMATCH[++count]; + } + usearch_close(strsrch); +} + static void TestCanonical(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (BASICCANONICAL[count].text != NULL) { if (!assertCanonicalEqual(BASICCANONICAL[count])) { log_err("Error at test number %d\n", count); @@ -1566,7 +1809,11 @@ static void TestNormCanonical(void) { int count = 0; UErrorCode status = U_ZERO_ERROR; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); count = 0; while (NORMCANONICAL[count].text != NULL) { @@ -1582,7 +1829,12 @@ static void TestNormCanonical(void) static void TestStrengthCanonical(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (STRENGTHCANONICAL[count].text != NULL) { if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) { log_err("Error at test number %d\n", count); @@ -1600,7 +1852,11 @@ static void TestBreakIteratorCanonical(void) { #if !UCONFIG_NO_BREAK_ITERATION - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (count < 4) { /* 0-3 test are fixed */ UChar pattern[32]; @@ -1617,20 +1873,19 @@ static void TestBreakIteratorCanonical(void) { strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator, breaker, &status); if(status == U_FILE_ACCESS_ERROR) { - log_data_err("Is your data around?\n"); - return; + log_data_err("Is your data around?\n"); + goto ENDTESTBREAKITERATOR; } else if(U_FAILURE(status)) { - log_err("Error opening searcher\n"); - return; + log_err("Error opening searcher\n"); + goto ENDTESTBREAKITERATOR; } usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, &status); if (U_FAILURE(status) || usearch_getBreakIterator(strsrch) != breaker) { log_err("Error setting break iterator\n"); - if (strsrch != NULL) { - usearch_close(strsrch); - } + usearch_close(strsrch); + goto ENDTESTBREAKITERATOR; } if (!assertEqualWithUStringSearch(strsrch, *search)) { ucol_setStrength(collator, UCOL_TERTIARY); @@ -1640,8 +1895,7 @@ static void TestBreakIteratorCanonical(void) { search = &(BREAKITERATOREXACT[count + 1]); breaker = getBreakIterator(search->breaker); usearch_setBreakIterator(strsrch, breaker, &status); - if (U_FAILURE(status) || - usearch_getBreakIterator(strsrch) != breaker) { + if (U_FAILURE(status) || usearch_getBreakIterator(strsrch) != breaker) { log_err("Error setting break iterator\n"); usearch_close(strsrch); goto ENDTESTBREAKITERATOR; @@ -1650,8 +1904,9 @@ static void TestBreakIteratorCanonical(void) { usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, &status); if (!assertEqualWithUStringSearch(strsrch, *search)) { - log_err("Error at test number %d\n", count); - goto ENDTESTBREAKITERATOR; + log_err("Error at test number %d\n", count); + usearch_close(strsrch); + goto ENDTESTBREAKITERATOR; } usearch_close(strsrch); count += 2; @@ -1674,7 +1929,11 @@ static void TestVariableCanonical(void) { int count = 0; UErrorCode status = U_ZERO_ERROR; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if (U_FAILURE(status)) { log_err("Error setting collation alternate attribute %s\n", @@ -1695,7 +1954,12 @@ static void TestVariableCanonical(void) static void TestOverlapCanonical(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (OVERLAPCANONICAL[count].text != NULL) { if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], USEARCH_ON, USEARCH_ON)) { @@ -1718,7 +1982,7 @@ static void TestOverlapCanonical(void) const SearchData *search = &(OVERLAPCANONICAL[count]); UCollator *collator = getCollator(search->collator); UStringSearch *strsrch; - UErrorCode status = U_ZERO_ERROR; + status = U_ZERO_ERROR; u_unescape(search->text, text, 128); u_unescape(search->pattern, pattern, 32); @@ -1770,7 +2034,11 @@ static void TestCollatorCanonical(void) UChar text[128]; UStringSearch *strsrch; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } u_unescape(COLLATORCANONICAL[0].text, text, 128); u_unescape(COLLATORCANONICAL[0].pattern, pattern, 32); @@ -1838,7 +2106,11 @@ static void TestPatternCanonical(void) int32_t templength; UErrorCode status = U_ZERO_ERROR; - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } u_unescape(PATTERNCANONICAL[0].text, text, 128); u_unescape(PATTERNCANONICAL[0].pattern, pattern, 32); @@ -1912,7 +2184,11 @@ static void TestTextCanonical(void) u_unescape(TEXTCANONICAL[0].text, text, 128); u_unescape(TEXTCANONICAL[0].pattern, pattern, 32); - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, NULL, &status); usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, @@ -1967,7 +2243,12 @@ ENDTESTPATTERN: static void TestCompositeBoundariesCanonical(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (COMPOSITEBOUNDARIESCANONICAL[count].text != NULL) { log_verbose("composite %d\n", count); if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) { @@ -1980,20 +2261,30 @@ static void TestCompositeBoundariesCanonical(void) static void TestGetSetOffsetCanonical(void) { - int index = 0; + int searchDataIndex = 0; UChar pattern[32]; UChar text[128]; UErrorCode status = U_ZERO_ERROR; UStringSearch *strsrch; + UCollator *collator; memset(pattern, 0, 32*sizeof(UChar)); memset(text, 0, 128*sizeof(UChar)); - open(); + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } strsrch = usearch_openFromCollator(pattern, 16, text, 32, EN_US_, NULL, &status); + + collator = usearch_getCollator(strsrch); + ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); + usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON, &status); + /* testing out of bounds error */ usearch_setOffset(strsrch, -1, &status); if (U_SUCCESS(status)) { @@ -2003,13 +2294,13 @@ static void TestGetSetOffsetCanonical(void) if (U_SUCCESS(status)) { log_err("Error expecting set offset error\n"); } - while (BASICCANONICAL[index].text != NULL) { + while (BASICCANONICAL[searchDataIndex].text != NULL) { int count = 0; - SearchData search = BASICCANONICAL[index ++]; + SearchData search = BASICCANONICAL[searchDataIndex ++]; int32_t matchindex = search.offset[count]; int32_t textlength; - if (BASICCANONICAL[index].text == NULL) { + if (BASICCANONICAL[searchDataIndex].text == NULL) { /* skip the last one */ break; } @@ -2032,7 +2323,7 @@ static void TestGetSetOffsetCanonical(void) log_err("Error match found at %d %d\n", usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch)); - return; + goto bail; } matchindex = search.offset[count + 1] == -1 ? -1 : search.offset[count + 2]; @@ -2041,14 +2332,14 @@ static void TestGetSetOffsetCanonical(void) &status); if (usearch_getOffset(strsrch) != search.offset[count + 1] + 1) { log_err("Error setting offset\n"); - return; + goto bail; } } count += 2; } usearch_next(strsrch, &status); - if ((uint32_t)usearch_getMatchedStart(strsrch) != USEARCH_DONE) { + if (usearch_getMatchedStart(strsrch) != USEARCH_DONE) { char *str = toCharString(usearch_getText(strsrch, &textlength)); log_err("Text: %s\n", str); str = toCharString(usearch_getPattern(strsrch, &textlength)); @@ -2056,9 +2347,12 @@ static void TestGetSetOffsetCanonical(void) log_err("Error match found at %d %d\n", usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch)); - return; + goto bail; } } + +bail: + ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status); usearch_close(strsrch); close(); } @@ -2066,7 +2360,12 @@ static void TestGetSetOffsetCanonical(void) static void TestSupplementaryCanonical(void) { int count = 0; - open(); + UErrorCode status = U_ZERO_ERROR; + open(&status); + if (U_FAILURE(status)) { + log_err_status(status, "Unable to open static collators %s\n", u_errorName(status)); + return; + } while (SUPPLEMENTARYCANONICAL[count].text != NULL) { if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) { log_err("Error at test number %d\n", count); @@ -2081,10 +2380,10 @@ static void TestContractionCanonical(void) UChar rules[128]; UChar pattern[128]; UChar text[128]; - UCollator *collator; + UCollator *collator = NULL; UErrorCode status = U_ZERO_ERROR; int count = 0; - UStringSearch *strsrch; + UStringSearch *strsrch = NULL; memset(rules, 0, 128*sizeof(UChar)); memset(pattern, 0, 128*sizeof(UChar)); memset(text, 0, 128*sizeof(UChar)); @@ -2122,6 +2421,576 @@ static void TestContractionCanonical(void) ucol_close(collator); } +static void TestNumeric(void) { + UCollator *coll = NULL; + UStringSearch *strsrch = NULL; + UErrorCode status = U_ZERO_ERROR; + + UChar pattern[128]; + UChar text[128]; + memset(pattern, 0, 128*sizeof(UChar)); + memset(text, 0, 128*sizeof(UChar)); + + coll = ucol_open("", &status); + if(U_FAILURE(status)) { + log_data_err("Could not open UCA. Is your data around?\n"); + return; + } + + ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); + + strsrch = usearch_openFromCollator(pattern, 1, text, 1, coll, NULL, &status); + + if(status != U_UNSUPPORTED_ERROR || U_SUCCESS(status)) { + log_err("Expected U_UNSUPPORTED_ERROR when trying to instantiate a search object from a CODAN collator, got %s instead\n", u_errorName(status)); + if(strsrch) { + usearch_close(strsrch); + } + } + + ucol_close(coll); + +} + +/* This test is for ticket 4038 due to incorrect backward searching when certain patterns have a length > 1 */ +static void TestForwardBackward(void) { + UErrorCode status = U_ZERO_ERROR; + UCollator *coll = NULL; + UStringSearch *search = NULL; + UChar usrcstr[32], value[4]; + int32_t pos= -1; + int32_t expectedPos = 9; + + coll = ucol_open("en_GB", &status); + if (U_FAILURE(status)) { + log_err_status(status, "ucol_open failed: %s\n", u_errorName(status)); + goto exitTestForwardBackward; + } + ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status); + ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status); + ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status); + + u_uastrcpy(usrcstr, "QBitArray::bitarr_data"); /* text */ + u_uastrcpy(value, "::"); /* pattern */ + + search = usearch_openFromCollator(value, 2, usrcstr, 22, coll, NULL, &status); + if (U_FAILURE(status)) { + log_err("usearch_openFromCollator failed: %s\n", u_errorName(status)); + goto exitTestForwardBackward; + } + + usearch_reset(search); + /* forward search */ + pos = usearch_first(search, &status); + if (pos != expectedPos) { + log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos); + goto exitTestForwardBackward; + } + + pos = -1; + usearch_reset(search); + /* backward search */ + pos = usearch_last(search, &status); + if (pos != expectedPos) { + log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos); + } + +exitTestForwardBackward : + if (coll != NULL) { + ucol_close(coll); + } + if (search != NULL) { + usearch_close(search); + } +} + +#define TEST_ASSERT(x) \ + {if (U_FAILURE(x)) {log_err_status(x, "%s:%d: FAIL: test assertion failure \n", __FILE__, __LINE__);\ + }} + +static void TestSearchForNull(void) { + UCollator *coll; + UErrorCode ec; + UStringSearch *search; + int pos; + int len; + int expectedPos; + int expectedLen; + int expectedNum; + int count = 0; + const UChar zerodigit = 0x0030; /* 0 */ + const UChar nulldigit = 0x0000; /* null */ + + /* static const UChar var[(length)+1]=U_DECLARE_UTF16(cs) */ +#define PATTERN_LEN 4 +#define TEXT_LEN 10 + + U_STRING_DECL(_pattern, "IS 0", PATTERN_LEN); + U_STRING_DECL(_text, "_0IS 0 OK?", TEXT_LEN); + UChar pattern[PATTERN_LEN + 1], text[TEXT_LEN + 1]; + + U_STRING_INIT(_pattern, "IS 0", PATTERN_LEN); + U_STRING_INIT(_text, "_0IS 0 OK?", TEXT_LEN); + expectedPos = 2; + expectedLen = 4; + expectedNum = 1; + + for (pos = 0; pos < PATTERN_LEN; pos++) { + if (_pattern[pos] == zerodigit) { + pattern[pos] = nulldigit; + } else { + pattern[pos] = _pattern[pos]; + } + } + pattern[PATTERN_LEN] = 0x0000; + + for (pos = 0; pos < TEXT_LEN; pos++) { + if (_text[pos] == zerodigit) { + text[pos] = nulldigit; + } else { + text[pos] = _text[pos]; + } + } + text[TEXT_LEN] = 0x0000; + + ec = U_ZERO_ERROR; + + /* create a US-English collator */ + coll = ucol_open("en_US", &ec); + + /* make sure we didn't fail. */ + TEST_ASSERT (ec); + + ucol_setStrength(coll, UCOL_IDENTICAL); + + /* open a search looking for 0 */ + search = usearch_openFromCollator(pattern, PATTERN_LEN, text, + TEXT_LEN, coll, NULL, &ec); + TEST_ASSERT (ec); + + if (coll != NULL && search != NULL) { + pos = usearch_first(search, &ec); + len = usearch_getMatchedLength(search); + if (pos != expectedPos) { + log_err("Expected search result: %d; Got instead: %d\n", expectedPos, + pos); + } + + if (len != expectedLen) { + log_err("Expected search result length: %d; Got instead: %d\n", + expectedLen, len); + } + + for (pos = usearch_first(search, &ec); pos != USEARCH_DONE; pos + = usearch_next(search, &ec)) { + log_verbose("Match at %d\n", pos); + count += 1; + } + + if (count != expectedNum) { + log_err("Expected %d search hits, found %d\n", expectedNum, count); + } + } + + ucol_close(coll); + usearch_close(search); +} + +static void TestStrengthIdentical(void) +{ + UCollator *coll; + UErrorCode ec = U_ZERO_ERROR; + UStringSearch *search; + + UChar pattern[] = {0x05E9, 0x0591, 0x05E9}; + UChar text[] = {0x05E9, 0x0592, 0x05E9}; + int32_t pLen = sizeof (pattern) / sizeof(pattern[0]); + int32_t tLen = sizeof(text) / sizeof (text[0]); + int32_t expectedPos = 0; + int32_t expectedLen = 3; + + int32_t pos; + int32_t len; + + /* create a US-English collator */ + coll = ucol_open ("en_US", &ec); + + /* make sure we didn't fail. */ + TEST_ASSERT (ec); + + ucol_setStrength( coll, UCOL_TERTIARY); + + /* open a search looking for 0 */ + search = usearch_openFromCollator (pattern, pLen, text, tLen, coll, NULL, &ec); + TEST_ASSERT (ec); + + if (coll != NULL && search != NULL) { + pos = usearch_first(search, &ec); + len = usearch_getMatchedLength(search); + + if(pos != expectedPos) { + log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos); + } + + if(len != expectedLen) { + log_err("Expected search result length: %d; Got instead: %d\n", expectedLen, len); + } + + /* Now try it at strength == UCOL_IDENTICAL */ + ucol_setStrength(coll, UCOL_IDENTICAL); + usearch_reset(search); + + pos = usearch_first(search, &ec); + len = usearch_getMatchedLength(search); + + if(pos != -1) { + log_err("Expected failure for strentgh = UCOL_IDENTICAL: got %d instead.\n", pos); + } + } + + usearch_close(search); + ucol_close(coll); +} + +/** +* TestUsingSearchCollator +*/ + +#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0])) + +typedef struct { + const UChar * pattern; + const int32_t * offsets; + int32_t offsetsLen; +} PatternAndOffsets; + +static const UChar scKoText[] = { + 0x0020, +/*01*/ 0xAC00, 0x0020, /* simple LV Hangul */ +/*03*/ 0xAC01, 0x0020, /* simple LVT Hangul */ +/*05*/ 0xAC0F, 0x0020, /* LVTT, last jamo expands for search */ +/*07*/ 0xAFFF, 0x0020, /* LLVVVTT, every jamo expands for search */ +/*09*/ 0x1100, 0x1161, 0x11A8, 0x0020, /* 0xAC01 as conjoining jamo */ +/*13*/ 0x1100, 0x1161, 0x1100, 0x0020, /* 0xAC01 as basic conjoining jamo (per search rules) */ +/*17*/ 0x3131, 0x314F, 0x3131, 0x0020, /* 0xAC01 as compatibility jamo */ +/*21*/ 0x1100, 0x1161, 0x11B6, 0x0020, /* 0xAC0F as conjoining jamo; last expands for search */ +/*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining jamo; last expands for search */ +/*30*/ 0x1101, 0x1170, 0x11B6, 0x0020, /* 0xAFFF as conjoining jamo; all expand for search */ +/*34*/ 0x00E6, 0x0020, /* small letter ae, expands */ +/*36*/ 0x1E4D, 0x0020, /* small letter o with tilde and acute, decomposes */ + 0 +}; + +static const UChar scKoPat0[] = { 0xAC01, 0 }; +static const UChar scKoPat1[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as conjoining jamo */ +static const UChar scKoPat2[] = { 0xAC0F, 0 }; +static const UChar scKoPat3[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0F as basic conjoining jamo */ +static const UChar scKoPat4[] = { 0xAFFF, 0 }; +static const UChar scKoPat5[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as conjoining jamo */ + +static const int32_t scKoSrchOff01[] = { 3, 9, 13 }; +static const int32_t scKoSrchOff23[] = { 5, 21, 25 }; +static const int32_t scKoSrchOff45[] = { 7, 30 }; + +static const PatternAndOffsets scKoSrchPatternsOffsets[] = { + { scKoPat0, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) }, + { scKoPat1, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) }, + { scKoPat2, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) }, + { scKoPat3, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) }, + { scKoPat4, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) }, + { scKoPat5, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) }, + { NULL, NULL, 0 } +}; + +static const int32_t scKoStndOff01[] = { 3, 9 }; +static const int32_t scKoStndOff2[] = { 5, 21 }; +static const int32_t scKoStndOff3[] = { 25 }; +static const int32_t scKoStndOff45[] = { 7, 30 }; + +static const PatternAndOffsets scKoStndPatternsOffsets[] = { + { scKoPat0, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) }, + { scKoPat1, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) }, + { scKoPat2, scKoStndOff2, ARRAY_LENGTH(scKoStndOff2) }, + { scKoPat3, scKoStndOff3, ARRAY_LENGTH(scKoStndOff3) }, + { scKoPat4, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) }, + { scKoPat5, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) }, + { NULL, NULL, 0 } +}; + +typedef struct { + const char * locale; + const UChar * text; + const PatternAndOffsets * patternsAndOffsets; +} TUSCItem; + +static const TUSCItem tuscItems[] = { + { "root", scKoText, scKoStndPatternsOffsets }, + { "root@collation=search", scKoText, scKoSrchPatternsOffsets }, + { "ko@collation=search", scKoText, scKoSrchPatternsOffsets }, + { NULL, NULL, NULL } +}; + +static const UChar dummyPat[] = { 0x0061, 0 }; + +static void TestUsingSearchCollator(void) +{ + const TUSCItem * tuscItemPtr; + for (tuscItemPtr = tuscItems; tuscItemPtr->locale != NULL; tuscItemPtr++) { + UErrorCode status = U_ZERO_ERROR; + UCollator* ucol = ucol_open(tuscItemPtr->locale, &status); + if ( U_SUCCESS(status) ) { + UStringSearch* usrch = usearch_openFromCollator(dummyPat, -1, tuscItemPtr->text, -1, ucol, NULL, &status); + if ( U_SUCCESS(status) ) { + const PatternAndOffsets * patternsOffsetsPtr; + for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; patternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) { + usearch_setPattern(usrch, patternsOffsetsPtr->pattern, -1, &status); + if ( U_SUCCESS(status) ) { + int32_t offset; + const int32_t * nextOffsetPtr; + const int32_t * limitOffsetPtr; + + usearch_reset(usrch); + nextOffsetPtr = patternsOffsetsPtr->offsets; + limitOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen; + while (TRUE) { + offset = usearch_next(usrch, &status); + if ( U_FAILURE(status) || offset == USEARCH_DONE ) { + break; + } + if ( nextOffsetPtr < limitOffsetPtr ) { + if (offset != *nextOffsetPtr) { + log_err("error, locale %s, expected usearch_next %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset); + nextOffsetPtr = limitOffsetPtr; + break; + } + nextOffsetPtr++; + } else { + log_err("error, locale %s, usearch_next returned more matches than expected\n", tuscItemPtr->locale ); + } + } + if ( U_FAILURE(status) ) { + log_err("error, locale %s, usearch_next failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); + } else if ( nextOffsetPtr < limitOffsetPtr ) { + log_err("error, locale %s, usearch_next returned fewer matches than expected\n", tuscItemPtr->locale ); + } + + status = U_ZERO_ERROR; + usearch_reset(usrch); + nextOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen; + limitOffsetPtr = patternsOffsetsPtr->offsets; + while (TRUE) { + offset = usearch_previous(usrch, &status); + if ( U_FAILURE(status) || offset == USEARCH_DONE ) { + break; + } + if ( nextOffsetPtr > limitOffsetPtr ) { + nextOffsetPtr--; + if (offset != *nextOffsetPtr) { + log_err("error, locale %s, expected usearch_previous %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset); + nextOffsetPtr = limitOffsetPtr; + break; + } + } else { + log_err("error, locale %s, usearch_previous returned more matches than expected\n", tuscItemPtr->locale ); + } + } + if ( U_FAILURE(status) ) { + log_err("error, locale %s, usearch_previous failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); + } else if ( nextOffsetPtr > limitOffsetPtr ) { + log_err("error, locale %s, usearch_previous returned fewer matches than expected\n", tuscItemPtr->locale ); + } + + } else { + log_err("error, locale %s, usearch_setPattern failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); + } + } + usearch_close(usrch); + } else { + log_err("error, locale %s, usearch_openFromCollator failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); + } + ucol_close(ucol); + } else { + log_data_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->locale, u_errorName(status) ); + } + } +} + + +static void TestPCEBuffer_with(const UChar *search, uint32_t searchLen, const UChar *source, uint32_t sourceLen) { + UErrorCode icuStatus = U_ZERO_ERROR; + UCollator *coll; + const char *locale; + UBreakIterator *ubrk; + UStringSearch *usearch; + int32_t match = 0; + + + coll = ucol_openFromShortString( "LSK_AS_CX_EX_FX_HX_NX_S4", + FALSE, + NULL, + &icuStatus ); + if ( U_FAILURE(icuStatus) ) + { + log_data_err( "ucol_openFromShortString error %s\n" , u_errorName(icuStatus)); + goto exit; + } + + locale = ucol_getLocaleByType( coll, + ULOC_VALID_LOCALE, + &icuStatus ); + if ( U_FAILURE(icuStatus) ) + { + log_err( "ucol_getLocaleByType error %s\n", u_errorName(icuStatus) ); + goto exit; + } + + log_verbose("locale=%s\n", locale); + + ubrk = ubrk_open( UBRK_CHARACTER, + locale, + source, + sourceLen, + &icuStatus ); + if ( U_FAILURE(icuStatus) ) + { + log_err( "ubrk_open error %s\n", u_errorName(icuStatus) ); + goto exit; + } + + usearch = usearch_openFromCollator( search, + searchLen, + source, + sourceLen, + coll, + ubrk, + &icuStatus ); + if ( U_FAILURE(icuStatus) ) + { + log_err( "usearch_openFromCollator error %s\n", u_errorName(icuStatus) ); + goto exit; + } + + match = usearch_first( usearch, + &icuStatus ); + if ( U_FAILURE(icuStatus) ) + { + log_err( "usearch_first error %s\n", u_errorName(icuStatus) ); + goto exit; + } + + if(match==0) { + log_verbose("OK: match=%d\n", match); + } else { + log_err("Err: match expected 0 got %d\n", match); + } + + usearch_close(usearch); + ubrk_close(ubrk); + ucol_close(coll); + +exit: + return; +} + + +static void TestPCEBuffer_100df(void) { + UChar search[] = + { 0x0020, 0x0020, 0x00df, 0x0020, 0x0041, 0x00df, 0x0020, 0x0061, 0x00df, 0x0020, 0x00c5, 0x00df, 0x0020, 0x212b, 0x00df, 0x0020, 0x0041, 0x030a, 0x00df, 0x0020, 0x00e5, 0x00df, 0x0020, 0x0061, 0x02da, 0x00df, 0x0020, 0x0061, 0x030a, 0x00df, 0x0020, 0xd8fa, 0xdeae, 0x00df, 0x0020, 0x2027, 0x00df }; /* 38 cp, 9 of them unpaired surrogates */ + UChar source[] = + { 0x0020, 0x0020, 0x00df, 0x0020, 0x0041, 0x00df, 0x0020, 0x0061, 0x00df, 0x0020, 0x00c5, 0x00df, 0x0020, 0x212b, 0x00df, 0x0020, 0x0041, 0x030a, 0x00df, 0x0020, 0x00e5, 0x00df, 0x0020, 0x0061, 0x02da, 0x00df, 0x0020, 0x0061, 0x030a, 0x00df, 0x0020, 0xd8fa, 0xdeae, 0x00df, 0x0020, 0x2027, 0x00df }; + uint32_t searchLen = sizeof(search)/sizeof(UChar); + uint32_t sourceLen = sizeof(source)/sizeof(UChar); + TestPCEBuffer_with(search,searchLen,source,sourceLen); + } + + +static void TestPCEBuffer_2surr(void) { + UChar search[] = + { 0x0020, 0x0020, 0xdfff, 0x0020, 0x0041, 0xdfff, 0x0020, 0x0061, 0xdfff, 0x0020, 0x00c5, 0xdfff, 0x0020, 0x212b, 0xdfff, 0x0020, 0x0041, 0x030a, 0xdfff, 0x0020, 0x00e5, 0xdfff, 0x0020, 0x0061, 0x02da, 0xdfff, 0x0020, 0x0061, 0x030a, 0xdfff, 0x0020, 0xd8fa, 0xdeae, 0xdfff, 0x0020, 0x2027, 0xdfff }; /* 38 cp, 9 of them unpaired surrogates */ + UChar source[] = + { 0x0020, 0x0020, 0xdfff, 0x0020, 0x0041, 0xdfff, 0x0020, 0x0061, 0xdfff, 0x0020, 0x00c5, 0xdfff, 0x0020, 0x212b, 0xdfff, 0x0020, 0x0041, 0x030a, 0xdfff, 0x0020, 0x00e5, 0xdfff, 0x0020, 0x0061, 0x02da, 0xdfff, 0x0020, 0x0061, 0x030a, 0xdfff, 0x0020, 0xd8fa, 0xdeae, 0xdfff, 0x0020, 0x2027, 0xdfff }; + uint32_t searchLen = sizeof(search)/sizeof(UChar); + uint32_t sourceLen = sizeof(source)/sizeof(UChar); + TestPCEBuffer_with(search,searchLen,source,sourceLen); +} + +static void TestMatchFollowedByIgnorables(void) { + /* test case for ticket#8482 */ + UChar search[] = { 0x00c9 }; + UChar source[] = { 0x00c9, 0x0000, 0x0041 }; + int32_t searchLen; + int32_t sourceLen; + UErrorCode icuStatus = U_ZERO_ERROR; + UCollator *coll; + const char *locale; + UBreakIterator *ubrk; + UStringSearch *usearch; + int32_t match = 0; + int32_t matchLength = 0; + const int32_t expectedMatchLength = 1; + + searchLen = sizeof(search)/sizeof(UChar); + sourceLen = sizeof(source)/sizeof(UChar); + + coll = ucol_openFromShortString("LHR_AN_CX_EX_FX_HX_NX_S3", + FALSE, + NULL, + &icuStatus); + if (U_FAILURE(icuStatus)) { + log_data_err("ucol_openFromShortString error - %s\n", u_errorName(icuStatus)); + } + + locale = ucol_getLocaleByType(coll, + ULOC_VALID_LOCALE, + &icuStatus); + if (U_FAILURE(icuStatus)) { + log_data_err("ucol_getLocaleByType error - %s\n", u_errorName(icuStatus)); + } + + ubrk = ubrk_open(UBRK_CHARACTER, + locale, + source, + sourceLen, + &icuStatus); + if (U_FAILURE(icuStatus)) { + log_data_err("ubrk_open error - %s\n", u_errorName(icuStatus)); + } + + usearch = usearch_openFromCollator(search, + searchLen, + source, + sourceLen, + coll, + ubrk, + &icuStatus); + if (U_FAILURE(icuStatus)) { + log_data_err("usearch_openFromCollator error - %s\n", u_errorName(icuStatus)); + } + + match = usearch_first(usearch, + &icuStatus); + if (U_FAILURE(icuStatus)) { + log_data_err("usearch_first error - %s\n", u_errorName(icuStatus)); + } else { + + log_verbose("match=%d\n", match); + + matchLength = usearch_getMatchedLength(usearch); + + if (matchLength != expectedMatchLength) { + log_err("Error: matchLength=%d, expected=%d\n", matchLength, expectedMatchLength); + } + } + + usearch_close(usearch); + ubrk_close(ubrk); + ucol_close(coll); +} + +/** +* addSearchTest +*/ + void addSearchTest(TestNode** root) { addTest(root, &TestStart, "tscoll/usrchtst/TestStart"); @@ -2171,6 +3040,15 @@ void addSearchTest(TestNode** root) addTest(root, &TestContractionCanonical, "tscoll/usrchtst/TestContractionCanonical"); addTest(root, &TestEnd, "tscoll/usrchtst/TestEnd"); + addTest(root, &TestNumeric, "tscoll/usrchtst/TestNumeric"); + addTest(root, &TestDiacriticMatch, "tscoll/usrchtst/TestDiacriticMatch"); + addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward"); + addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull"); + addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentical"); + addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCollator"); + addTest(root, &TestPCEBuffer_100df, "tscoll/usrchtst/TestPCEBuffer/1_00df"); + addTest(root, &TestPCEBuffer_2surr, "tscoll/usrchtst/TestPCEBuffer/2_dfff"); + addTest(root, &TestMatchFollowedByIgnorables, "tscoll/usrchtst/TestMatchFollowedByIgnorables"); } #endif /* #if !UCONFIG_NO_COLLATION */