+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/********************************************************************
- * Copyright (c) 2001-2011 International Business Machines
+ * Copyright (c) 2001-2016 International Business Machines
* Corporation and others. All Rights Reserved.
********************************************************************
* File usrchtst.c
const SearchData search)
{
int count = 0;
- int matchlimit = 0;
UErrorCode status = U_ZERO_ERROR;
int32_t matchindex = search.offset[count];
int32_t textlength;
UChar matchtext[128];
+ int32_t matchlength;
+ int32_t nextStart;
+ UBool isOverlap;
usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, search.elemCompare, &status);
if (U_FAILURE(status)) {
log_err("Error setting USEARCH_ELEMENT_COMPARISON attribute %s\n", u_errorName(status));
return FALSE;
- }
+ }
if (usearch_getMatchedStart(strsrch) != USEARCH_DONE ||
usearch_getMatchedLength(strsrch) != 0) {
log_err("Error with the initialization of match start and length\n");
}
- /* start of following matches */
+ /* start of next matches */
while (U_SUCCESS(status) && matchindex >= 0) {
- uint32_t matchlength = search.size[count];
+ matchlength = search.size[count];
usearch_next(strsrch, &status);
if (matchindex != usearch_getMatchedStart(strsrch) ||
matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) {
log_err("Text: %s\n", str);
str = toCharString(usearch_getPattern(strsrch, &textlength));
log_err("Pattern: %s\n", str);
- log_err("Error following match found at idx,len %d,%d; expected %d,%d\n",
+ log_err("Error next match found at idx %d (len:%d); expected %d (len:%d)\n",
usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
matchindex, matchlength);
return FALSE;
memcmp(matchtext,
usearch_getText(strsrch, &textlength) + matchindex,
matchlength * sizeof(UChar)) != 0) {
- log_err("Error getting following matched text\n");
+ log_err("Error getting next matched text\n");
}
matchindex = search.offset[count];
log_err("Text: %s\n", str);
str = toCharString(usearch_getPattern(strsrch, &textlength));
log_err("Pattern: %s\n", str);
- log_err("Error following match found at %d %d\n",
+ log_err("Error next match found at %d (len:%d); expected <NO MATCH>\n",
usearch_getMatchedStart(strsrch),
usearch_getMatchedLength(strsrch));
return FALSE;
}
- /* start of preceding matches */
+ /* start of previous matches */
count = count == 0 ? 0 : count - 1;
- matchlimit = count;
matchindex = search.offset[count];
while (U_SUCCESS(status) && matchindex >= 0) {
- uint32_t matchlength = search.size[count];
+ matchlength = search.size[count];
usearch_previous(strsrch, &status);
if (matchindex != usearch_getMatchedStart(strsrch) ||
matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) {
log_err("Text: %s\n", str);
str = toCharString(usearch_getPattern(strsrch, &textlength));
log_err("Pattern: %s\n", str);
- log_err("Error preceding match found at %d %d\n",
- usearch_getMatchedStart(strsrch),
- usearch_getMatchedLength(strsrch));
+ log_err("Error previous match found at %d (len:%d); expected %d (len:%d)\n",
+ usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
+ matchindex, matchlength);
return FALSE;
}
memcmp(matchtext,
usearch_getText(strsrch, &textlength) + matchindex,
matchlength * sizeof(UChar)) != 0) {
- log_err("Error getting preceding matched text\n");
+ log_err("Error getting previous matched text\n");
}
matchindex = count > 0 ? search.offset[count - 1] : -1;
log_err("Text: %s\n", str);
str = toCharString(usearch_getPattern(strsrch, &textlength));
log_err("Pattern: %s\n", str);
- log_err("Error preceding match found at %d %d\n",
+ log_err("Error previous match found at %d (len:%d); expected <NO MATCH>\n",
usearch_getMatchedStart(strsrch),
usearch_getMatchedLength(strsrch));
return FALSE;
}
+
+ isOverlap = (usearch_getAttribute(strsrch, USEARCH_OVERLAP) == USEARCH_ON);
+
+ /* start of following matches */
+ count = 0;
+ matchindex = search.offset[count];
+ nextStart = 0;
+
+ while (TRUE) {
+ usearch_following(strsrch, nextStart, &status);
+
+ if (matchindex < 0) {
+ if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) {
+ char *str = toCharString(usearch_getText(strsrch, &textlength));
+ log_err("Text: %s\n", str);
+ str = toCharString(usearch_getPattern(strsrch, &textlength));
+ log_err("Pattern: %s\n", str);
+ log_err("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected <NO MATCH>\n",
+ nextStart, isOverlap,
+ usearch_getMatchedStart(strsrch),
+ usearch_getMatchedLength(strsrch));
+ return FALSE;
+ }
+ /* no more matches */
+ break;
+ }
+
+ matchlength = search.size[count];
+ if (usearch_getMatchedStart(strsrch) != matchindex
+ || usearch_getMatchedLength(strsrch) != matchlength
+ || U_FAILURE(status)) {
+ char *str = toCharString(usearch_getText(strsrch, &textlength));
+ log_err("Text: %s\n", str);
+ str = toCharString(usearch_getPattern(strsrch, &textlength));
+ log_err("Pattern: %s\n", str);
+ log_err("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
+ nextStart, isOverlap,
+ usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
+ matchindex, matchlength);
+ return FALSE;
+ }
+
+ if (isOverlap || usearch_getMatchedLength(strsrch) == 0) {
+ nextStart = usearch_getMatchedStart(strsrch) + 1;
+ } else {
+ nextStart = usearch_getMatchedStart(strsrch) + usearch_getMatchedLength(strsrch);
+ }
+
+ count++;
+ matchindex = search.offset[count];
+ }
+
+ /* start of preceding matches */
+ count = -1; /* last non-negative offset index, could be -1 if no match */
+ while (search.offset[count + 1] >= 0) {
+ count++;
+ }
+ usearch_getText(strsrch, &nextStart);
+
+ while (TRUE) {
+ usearch_preceding(strsrch, nextStart, &status);
+
+ if (count < 0) {
+ if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) {
+ char *str = toCharString(usearch_getText(strsrch, &textlength));
+ log_err("Text: %s\n", str);
+ str = toCharString(usearch_getPattern(strsrch, &textlength));
+ log_err("Pattern: %s\n", str);
+ log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected <NO MATCH>\n",
+ nextStart, isOverlap,
+ usearch_getMatchedStart(strsrch),
+ usearch_getMatchedLength(strsrch));
+ return FALSE;
+ }
+ /* no more matches */
+ break;
+ }
+
+ matchindex = search.offset[count];
+ matchlength = search.size[count];
+ if (usearch_getMatchedStart(strsrch) != matchindex
+ || usearch_getMatchedLength(strsrch) != matchlength
+ || U_FAILURE(status)) {
+ char *str = toCharString(usearch_getText(strsrch, &textlength));
+ log_err("Text: %s\n", str);
+ str = toCharString(usearch_getPattern(strsrch, &textlength));
+ log_err("Pattern: %s\n", str);
+ log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
+ nextStart, isOverlap,
+ usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
+ matchindex, matchlength);
+ return FALSE;
+ }
+
+ nextStart = matchindex;
+ count--;
+ }
+
usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, &status);
return TRUE;
}
memset(text, 0, 128*sizeof(UChar));
strsrch = usearch_open(pattern, 1, text, 1, uloc_getDefault(), NULL, &status);
- if (U_FAILURE(status)) {
+ if (U_FAILURE(status)) {
log_err_status(status, "Error opening string search %s\n", u_errorName(status));
return;
}
search = DIACRITICMATCH[count];
while (search.text != NULL) {
- if (search.collator != NULL) {
- coll = ucol_openFromShortString(search.collator, FALSE, NULL, &status);
- } else {
+ if (search.collator != NULL) {
+ coll = ucol_openFromShortString(search.collator, FALSE, NULL, &status);
+ } else {
/* Always use "en_US" because some of these tests fail in Danish locales. */
- coll = ucol_open("en_US"/*uloc_getDefault()*/, &status);
- ucol_setStrength(coll, search.strength);
- }
- if (U_FAILURE(status)) {
- log_err("Error opening string search collator(\"%s\") %s\n", search.collator, u_errorName(status));
- return;
- }
-
- usearch_setCollator(strsrch, coll, &status);
- if (U_FAILURE(status)) {
- log_err("Error setting string search collator %s\n", u_errorName(status));
- return;
- }
+ coll = ucol_open("en_US"/*uloc_getDefault()*/, &status);
+ ucol_setStrength(coll, search.strength);
+ }
+ if (U_FAILURE(status)) {
+ log_err("Error opening string search collator(\"%s\") %s\n", search.collator, u_errorName(status));
+ return;
+ }
+
+ usearch_setCollator(strsrch, coll, &status);
+ if (U_FAILURE(status)) {
+ log_err("Error setting string search collator %s\n", u_errorName(status));
+ return;
+ }
u_unescape(search.text, text, 128);
u_unescape(search.pattern, pattern, 128);
static void TestStrengthIdentical(void)
{
- UCollator *coll;
- UErrorCode ec = U_ZERO_ERROR;
- UStringSearch *search;
-
+ UCollator *coll;
+ UErrorCode ec = U_ZERO_ERROR;
+ UStringSearch *search;
+
UChar pattern[] = {0x05E9, 0x0591, 0x05E9};
UChar text[] = {0x05E9, 0x0592, 0x05E9};
- int32_t pLen = sizeof (pattern) / sizeof(pattern[0]);
- int32_t tLen = sizeof(text) / sizeof (text[0]);
- int32_t expectedPos = 0;
- int32_t expectedLen = 3;
+ int32_t pLen = UPRV_LENGTHOF(pattern);
+ int32_t tLen = UPRV_LENGTHOF(text);
+ int32_t expectedPos = 0;
+ int32_t expectedLen = 3;
- int32_t pos;
- int32_t len;
+ int32_t pos;
+ int32_t len;
/* create a US-English collator */
- coll = ucol_open ("en_US", &ec);
+ coll = ucol_open ("en_US", &ec);
- /* make sure we didn't fail. */
- TEST_ASSERT (ec);
+ /* make sure we didn't fail. */
+ TEST_ASSERT (ec);
ucol_setStrength( coll, UCOL_TERTIARY);
- /* open a search looking for 0 */
- search = usearch_openFromCollator (pattern, pLen, text, tLen, coll, NULL, &ec);
- TEST_ASSERT (ec);
+ /* open a search looking for 0 */
+ search = usearch_openFromCollator (pattern, pLen, text, tLen, coll, NULL, &ec);
+ TEST_ASSERT (ec);
if (coll != NULL && search != NULL) {
- pos = usearch_first(search, &ec);
- len = usearch_getMatchedLength(search);
-
- if(pos != expectedPos) {
- log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos);
- }
-
- if(len != expectedLen) {
- log_err("Expected search result length: %d; Got instead: %d\n", expectedLen, len);
- }
-
+ pos = usearch_first(search, &ec);
+ len = usearch_getMatchedLength(search);
+
+ if(pos != expectedPos) {
+ log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos);
+ }
+
+ if(len != expectedLen) {
+ log_err("Expected search result length: %d; Got instead: %d\n", expectedLen, len);
+ }
+
/* Now try it at strength == UCOL_IDENTICAL */
ucol_setStrength(coll, UCOL_IDENTICAL);
- usearch_reset(search);
+ usearch_reset(search);
- pos = usearch_first(search, &ec);
- len = usearch_getMatchedLength(search);
+ pos = usearch_first(search, &ec);
+ len = usearch_getMatchedLength(search);
- if(pos != -1) {
- log_err("Expected failure for strentgh = UCOL_IDENTICAL: got %d instead.\n", pos);
- }
+ if(pos != -1) {
+ log_err("Expected failure for strentgh = UCOL_IDENTICAL: got %d instead.\n", pos);
+ }
}
usearch_close(search);
* TestUsingSearchCollator
*/
-#define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0]))
-
typedef struct {
const UChar * pattern;
const int32_t * offsets;
int32_t offsetsLen;
+ const int32_t * matchLens;
} PatternAndOffsets;
static const UChar scKoText[] = {
static const int32_t scKoSrchOff45[] = { 7, 30 };
static const PatternAndOffsets scKoSrchPatternsOffsets[] = {
- { scKoPat0, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },
- { scKoPat1, scKoSrchOff01, ARRAY_LENGTH(scKoSrchOff01) },
- { scKoPat2, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },
- { scKoPat3, scKoSrchOff23, ARRAY_LENGTH(scKoSrchOff23) },
- { scKoPat4, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },
- { scKoPat5, scKoSrchOff45, ARRAY_LENGTH(scKoSrchOff45) },
- { NULL, NULL, 0 }
+ { scKoPat0, scKoSrchOff01, UPRV_LENGTHOF(scKoSrchOff01), NULL },
+ { scKoPat1, scKoSrchOff01, UPRV_LENGTHOF(scKoSrchOff01), NULL },
+ { scKoPat2, scKoSrchOff23, UPRV_LENGTHOF(scKoSrchOff23), NULL },
+ { scKoPat3, scKoSrchOff23, UPRV_LENGTHOF(scKoSrchOff23), NULL },
+ { scKoPat4, scKoSrchOff45, UPRV_LENGTHOF(scKoSrchOff45), NULL },
+ { scKoPat5, scKoSrchOff45, UPRV_LENGTHOF(scKoSrchOff45), NULL },
+ { NULL, NULL, 0, NULL }
};
static const int32_t scKoStndOff01[] = { 3, 9 };
static const int32_t scKoStndOff45[] = { 7, 30 };
static const PatternAndOffsets scKoStndPatternsOffsets[] = {
- { scKoPat0, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },
- { scKoPat1, scKoStndOff01, ARRAY_LENGTH(scKoStndOff01) },
- { scKoPat2, scKoStndOff2, ARRAY_LENGTH(scKoStndOff2) },
- { scKoPat3, scKoStndOff3, ARRAY_LENGTH(scKoStndOff3) },
- { scKoPat4, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },
- { scKoPat5, scKoStndOff45, ARRAY_LENGTH(scKoStndOff45) },
- { NULL, NULL, 0 }
+ { scKoPat0, scKoStndOff01, UPRV_LENGTHOF(scKoStndOff01), NULL },
+ { scKoPat1, scKoStndOff01, UPRV_LENGTHOF(scKoStndOff01), NULL },
+ { scKoPat2, scKoStndOff2, UPRV_LENGTHOF(scKoStndOff2), NULL },
+ { scKoPat3, scKoStndOff3, UPRV_LENGTHOF(scKoStndOff3), NULL },
+ { scKoPat4, scKoStndOff45, UPRV_LENGTHOF(scKoStndOff45), NULL },
+ { scKoPat5, scKoStndOff45, UPRV_LENGTHOF(scKoStndOff45), NULL },
+ { NULL, NULL, 0, NULL }
+};
+
+static const UChar scJaText[] = {
+/*00*/ 0x304D,0x305F,0x0020,0x30AD,0x30BF,0x0020, /* kita, hiragana and katakana */
+/*06*/ 0x304D,0x3060,0x0020,0x30AD,0x30C0,0x0020, /* kida, hiragana and katakana */
+/*12*/ 0x306F,0x306D,0x0020,0x30CF,0x30CD,0x0020, /* hane, hiragana and katakana */
+/*18*/ 0x3070,0x306D,0x0020,0x30D0,0x30CD,0x0020, /* bane, hiragana and katakana */
+/*24*/ 0x3071,0x306D,0x0020,0x30D1,0x30CD,0x0020, /* pane, hiragana and katakana */
+/*30*/ 0
+};
+
+static const UChar scJaPatH0[] = { 0x304D,0x305F,0 }; /* kita, hiragana */
+static const UChar scJaPatK0[] = { 0x30AD,0x30BF,0 }; /* kita, katakana */
+static const UChar scJaPatH1[] = { 0x304D,0x3060,0 }; /* kida, hiragana */
+static const UChar scJaPatK1[] = { 0x30AD,0x30C0,0 }; /* kida, katakana */
+static const UChar scJaPatH2[] = { 0x306F,0x306D,0 }; /* hane, hiragana */
+static const UChar scJaPatK2[] = { 0x30CF,0x30CD,0 }; /* hane, katakana */
+static const UChar scJaPatH3[] = { 0x3070,0x306D,0 }; /* bane, hiragana */
+static const UChar scJaPatK3[] = { 0x30D0,0x30CD,0 }; /* bane, katakana */
+static const UChar scJaPatH4[] = { 0x3071,0x306D,0 }; /* pane, hiragana */
+static const UChar scJaPatK4[] = { 0x30D1,0x30CD,0 }; /* pane, katakana */
+
+static const int32_t scJaStndOff01[] = { 0, 3, 6, 9 };
+static const int32_t scJaStndOff234[] = { 12, 15, 18, 21, 24, 27 };
+
+static const int32_t scJaSrchOff0[] = { 0, 3 };
+static const int32_t scJaSrchOff1[] = { 6, 9 };
+static const int32_t scJaSrchOff2[] = { 12, 15 };
+static const int32_t scJaSrchOff3[] = { 18, 21 };
+static const int32_t scJaSrchOff4[] = { 24, 27 };
+
+static const PatternAndOffsets scJaStndPatternsOffsets[] = {
+ { scJaPatH0, scJaStndOff01, UPRV_LENGTHOF(scJaStndOff01), NULL },
+ { scJaPatK0, scJaStndOff01, UPRV_LENGTHOF(scJaStndOff01), NULL },
+ { scJaPatH1, scJaStndOff01, UPRV_LENGTHOF(scJaStndOff01), NULL },
+ { scJaPatK1, scJaStndOff01, UPRV_LENGTHOF(scJaStndOff01), NULL },
+ { scJaPatH2, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
+ { scJaPatK2, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
+ { scJaPatH3, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
+ { scJaPatK3, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
+ { scJaPatH4, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
+ { scJaPatK4, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
+ { NULL, NULL, 0, NULL }
+};
+
+static const PatternAndOffsets scJaSrchPatternsOffsets[] = {
+ { scJaPatH0, scJaSrchOff0, UPRV_LENGTHOF(scJaSrchOff0), NULL },
+ { scJaPatK0, scJaSrchOff0, UPRV_LENGTHOF(scJaSrchOff0), NULL },
+ { scJaPatH1, scJaSrchOff1, UPRV_LENGTHOF(scJaSrchOff1), NULL },
+ { scJaPatK1, scJaSrchOff1, UPRV_LENGTHOF(scJaSrchOff1), NULL },
+ { scJaPatH2, scJaSrchOff2, UPRV_LENGTHOF(scJaSrchOff2), NULL },
+ { scJaPatK2, scJaSrchOff2, UPRV_LENGTHOF(scJaSrchOff2), NULL },
+ { scJaPatH3, scJaSrchOff3, UPRV_LENGTHOF(scJaSrchOff3), NULL },
+ { scJaPatK3, scJaSrchOff3, UPRV_LENGTHOF(scJaSrchOff3), NULL },
+ { scJaPatH4, scJaSrchOff4, UPRV_LENGTHOF(scJaSrchOff4), NULL },
+ { scJaPatK4, scJaSrchOff4, UPRV_LENGTHOF(scJaSrchOff4), NULL },
+ { NULL, NULL, 0, NULL }
+};
+
+static const UChar scFaText[] = { // Apple <rdar://problem/34998959>
+/*00*/ 0x064A,0x0627,0x0649,0x0627,0x06CC,0x0627,
+/*06*/ 0
+};
+
+// Any of the following should match any of the others for fa search, primary strength
+static const UChar scFaPat1[] = { 0x064A,0 };
+static const UChar scFaPat2[] = { 0x0649,0 };
+static const UChar scFaPat3[] = { 0x06CC,0 };
+
+static const int32_t scFaSrchOff[] = { 0, 2, 4 };
+
+static const PatternAndOffsets scFaSrchPatternsOffsets[] = {
+ { scFaPat1, scFaSrchOff, UPRV_LENGTHOF(scFaSrchOff), NULL },
+ { scFaPat2, scFaSrchOff, UPRV_LENGTHOF(scFaSrchOff), NULL },
+ { scFaPat3, scFaSrchOff, UPRV_LENGTHOF(scFaSrchOff), NULL },
+ { NULL, NULL, 0, NULL }
+};
+
+static const UChar scModsText[] = {
+/*00*/ 0x0020,0xD83D,0xDC4D,
+/*03*/ 0x0020,0xD83D,0xDC4D,0xD83C,0xDFFC,
+/*08*/ 0x0020,0xD83D,0xDC4D,0xD83C,0xDFFF,
+/*13*/ 0x0020,0xD83D,0xDC4D,0x0300,
+/*17*/ 0x0020,0
+};
+
+static const UChar scMods0[] = { 0xD83D,0xDC4D,0 }; /* hand with no mods */
+static const UChar scMods1[] = { 0xD83D,0xDC4D,0xD83C,0xDFFC,0 }; /* hand with fitz 3 */
+static const UChar scMods2[] = { 0xD83D,0xDC4D,0xD83C,0xDFFF,0 }; /* hand with fitz 6 */
+static const UChar scMods3[] = { 0xD83D,0xDC4D,0x0300,0 }; /* hand with grave */
+
+static const int32_t scMods012[] = { 1, 4, 9, 14 };
+static const int32_t scModsLens012[] = { 2, 4, 4, 3 };
+
+static const PatternAndOffsets scModsPatternsOffsets[] = {
+ { scMods0, scMods012, UPRV_LENGTHOF(scMods012), scModsLens012 },
+ { scMods1, scMods012, UPRV_LENGTHOF(scMods012), scModsLens012 },
+ { scMods2, scMods012, UPRV_LENGTHOF(scMods012), scModsLens012 },
+ { scMods3, scMods012, UPRV_LENGTHOF(scMods012), scModsLens012 },
+ { NULL, NULL, 0, NULL }
+};
+
+static const UChar scFlagText[] = {
+/*00*/ 0xD83C,0xDDF3,0xD83C,0xDDFF, /*NZ*/
+/*04*/ 0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0xDB40,0xDC7F, /*gbeng*/
+/*18*/ 0x0061,0x0062,0x0063,
+/*21*/ 0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC73,0xDB40,0xDC63,0xDB40,0xDC74,0xDB40,0xDC7F, /*gbsct*/
+/*35*/ 0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC77,0xDB40,0xDC6C,0xDB40,0xDC73,0xDB40,0xDC7F, /*gbwls*/
+/*49*/ 0x0020,0
+};
+
+static const UChar scFlag1[] = { 0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC73,0xDB40,0xDC63,0xDB40,0xDC74,0xDB40,0xDC7F,0 }; /* gbsct */
+static const int32_t scOffFlag1[] = { 21 };
+static const int32_t scLenFlag1[] = { 14 };
+
+static const PatternAndOffsets scFlagPatternsOffsets[] = {
+ { scFlag1, scOffFlag1, UPRV_LENGTHOF(scOffFlag1), scLenFlag1 },
+ { NULL, NULL, 0, NULL }
};
+
typedef struct {
const char * locale;
const UChar * text;
static const TUSCItem tuscItems[] = {
{ "root", scKoText, scKoStndPatternsOffsets },
-#if 1
- /* No jamo tailorings in Apple version of search collator currently */
- { "root@collation=search", scKoText, scKoStndPatternsOffsets },
- { "ko@collation=search", scKoText, scKoStndPatternsOffsets },
-#else
- /* Use this when we do have jamo tailorings */
{ "root@collation=search", scKoText, scKoSrchPatternsOffsets },
{ "ko@collation=search", scKoText, scKoSrchPatternsOffsets },
-#endif
+ { "root@colStrength=primary", scJaText, scJaStndPatternsOffsets },
+ { "root@collation=search;colStrength=primary", scJaText, scJaSrchPatternsOffsets },
+ { "ja@colStrength=primary", scJaText, scJaStndPatternsOffsets },
+ { "ja@collation=search;colStrength=primary", scJaText, scJaSrchPatternsOffsets },
+ { "fa@collation=search;colStrength=primary", scFaText, scFaSrchPatternsOffsets }, // Apple <rdar://problem/34998959>
+ { "root@collation=search;colStrength=primary", scModsText, scModsPatternsOffsets },
+ { "root@collation=search;colStrength=primary", scFlagText, scFlagPatternsOffsets },
{ NULL, NULL, NULL }
};
if ( U_SUCCESS(status) ) {
const PatternAndOffsets * patternsOffsetsPtr;
for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; patternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) {
- usearch_setPattern(usrch, patternsOffsetsPtr->pattern, -1, &status);
+ int32_t patLen = u_strlen(patternsOffsetsPtr->pattern);
+ usearch_setPattern(usrch, patternsOffsetsPtr->pattern, patLen, &status);
if ( U_SUCCESS(status) ) {
int32_t offset;
const int32_t * nextOffsetPtr;
const int32_t * limitOffsetPtr;
+ const int32_t * nextMatchLenPtr;
usearch_reset(usrch);
nextOffsetPtr = patternsOffsetsPtr->offsets;
limitOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen;
+ nextMatchLenPtr = patternsOffsetsPtr->matchLens;
while (TRUE) {
offset = usearch_next(usrch, &status);
if ( U_FAILURE(status) || offset == USEARCH_DONE ) {
break;
}
if ( nextOffsetPtr < limitOffsetPtr ) {
- if (offset != *nextOffsetPtr) {
- log_err("error, locale %s, expected usearch_next %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);
- nextOffsetPtr = limitOffsetPtr;
- break;
- }
- nextOffsetPtr++;
+ if (offset != *nextOffsetPtr) {
+ log_err("error, locale %s, patn (%d) %04X %04X..., expected usearch_next %d, got %d\n",
+ tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], *nextOffsetPtr, offset);
+ nextOffsetPtr = limitOffsetPtr;
+ break;
+ } else if (nextMatchLenPtr != NULL) {
+ int32_t matchLen = usearch_getMatchedLength(usrch);
+ if (matchLen != *nextMatchLenPtr) {
+ log_err("error, locale %s, patn (%d) %04X %04X..., offset %d, expected matchLen %d, got %d\n",
+ tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], offset, *nextMatchLenPtr, matchLen);
+ }
+ nextMatchLenPtr++;
+ }
+ nextOffsetPtr++;
} else {
- log_err("error, locale %s, usearch_next returned more matches than expected\n", tuscItemPtr->locale );
+ log_err("error, locale %s, patn (%d) %04X %04X..., usearch_next returned more matches than expected\n",
+ tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1] );
}
}
if ( U_FAILURE(status) ) {
- log_err("error, locale %s, usearch_next failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
+ log_err("error, locale %s, patn (%d) %04X %04X..., usearch_next failed: %s\n",
+ tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], u_errorName(status) );
} else if ( nextOffsetPtr < limitOffsetPtr ) {
- log_err("error, locale %s, usearch_next returned fewer matches than expected\n", tuscItemPtr->locale );
+ log_err("error, locale %s, patn (%d) %04X %04X..., usearch_next returned fewer matches than expected\n",
+ tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1] );
}
status = U_ZERO_ERROR;
if ( nextOffsetPtr > limitOffsetPtr ) {
nextOffsetPtr--;
if (offset != *nextOffsetPtr) {
- log_err("error, locale %s, expected usearch_previous %d, got %d\n", tuscItemPtr->locale, *nextOffsetPtr, offset);
- nextOffsetPtr = limitOffsetPtr;
- break;
+ log_err("error, locale %s, patn (%d) %04X %04X..., expected usearch_previous %d, got %d\n",
+ tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], *nextOffsetPtr, offset);
+ nextOffsetPtr = limitOffsetPtr;
+ break;
}
} else {
- log_err("error, locale %s, usearch_previous returned more matches than expected\n", tuscItemPtr->locale );
+ log_err("error, locale %s, patn (%d) %04X %04X..., usearch_previous returned more matches than expected\n",
+ tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1] );
}
}
if ( U_FAILURE(status) ) {
- log_err("error, locale %s, usearch_previous failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
+ log_err("error, locale %s, patn (%d) %04X %04X..., usearch_previous failed: %s\n",
+ tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], u_errorName(status) );
} else if ( nextOffsetPtr > limitOffsetPtr ) {
- log_err("error, locale %s, usearch_previous returned fewer matches than expected\n", tuscItemPtr->locale );
+ log_err("error, locale %s, patn (%d) %04X %04X..., usearch_previous returned fewer matches than expected\n",
+ tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1] );
}
} else {
- log_err("error, locale %s, usearch_setPattern failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
+ log_err("error, locale %s, usearch_setPattern failed: %s\n",
+ tuscItemPtr->locale, u_errorName(status) );
}
}
usearch_close(usrch);
}
ucol_close(ucol);
} else {
- log_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
+ log_data_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
}
}
}
+
+static void TestPCEBuffer_with(const UChar *search, uint32_t searchLen, const UChar *source, uint32_t sourceLen) {
+ UErrorCode icuStatus = U_ZERO_ERROR;
+ UCollator *coll;
+ const char *locale;
+ UBreakIterator *ubrk;
+ UStringSearch *usearch;
+ int32_t match = 0;
+
+
+ coll = ucol_openFromShortString( "LSK_AS_CX_EX_FX_HX_NX_S4",
+ FALSE,
+ NULL,
+ &icuStatus );
+ if ( U_FAILURE(icuStatus) )
+ {
+ log_data_err( "ucol_openFromShortString error %s\n" , u_errorName(icuStatus));
+ goto exit;
+ }
+
+ locale = ucol_getLocaleByType( coll,
+ ULOC_VALID_LOCALE,
+ &icuStatus );
+ if ( U_FAILURE(icuStatus) )
+ {
+ log_err( "ucol_getLocaleByType error %s\n", u_errorName(icuStatus) );
+ goto exit;
+ }
+
+ log_verbose("locale=%s\n", locale);
+
+ ubrk = ubrk_open( UBRK_CHARACTER,
+ locale,
+ source,
+ sourceLen,
+ &icuStatus );
+ if ( U_FAILURE(icuStatus) )
+ {
+ log_err( "ubrk_open error %s\n", u_errorName(icuStatus) );
+ goto exit;
+ }
+
+ usearch = usearch_openFromCollator( search,
+ searchLen,
+ source,
+ sourceLen,
+ coll,
+ ubrk,
+ &icuStatus );
+ if ( U_FAILURE(icuStatus) )
+ {
+ log_err( "usearch_openFromCollator error %s\n", u_errorName(icuStatus) );
+ goto exit;
+ }
+
+ match = usearch_first( usearch,
+ &icuStatus );
+ if ( U_FAILURE(icuStatus) )
+ {
+ log_err( "usearch_first error %s\n", u_errorName(icuStatus) );
+ goto exit;
+ }
+
+ if(match==0) {
+ log_verbose("OK: match=%d\n", match);
+ } else {
+ log_err("Err: match expected 0 got %d\n", match);
+ }
+
+ usearch_close(usearch);
+ ubrk_close(ubrk);
+ ucol_close(coll);
+
+exit:
+ return;
+}
+
+
+static void TestPCEBuffer_100df(void) {
+ UChar search[] =
+ { 0x0020, 0x0020, 0x00df, 0x0020, 0x0041, 0x00df, 0x0020, 0x0061, 0x00df, 0x0020, 0x00c5, 0x00df, 0x0020, 0x212b, 0x00df, 0x0020, 0x0041, 0x030a, 0x00df, 0x0020, 0x00e5, 0x00df, 0x0020, 0x0061, 0x02da, 0x00df, 0x0020, 0x0061, 0x030a, 0x00df, 0x0020, 0xd8fa, 0xdeae, 0x00df, 0x0020, 0x2027, 0x00df }; /* 38 cp, 9 of them unpaired surrogates */
+ UChar source[] =
+ { 0x0020, 0x0020, 0x00df, 0x0020, 0x0041, 0x00df, 0x0020, 0x0061, 0x00df, 0x0020, 0x00c5, 0x00df, 0x0020, 0x212b, 0x00df, 0x0020, 0x0041, 0x030a, 0x00df, 0x0020, 0x00e5, 0x00df, 0x0020, 0x0061, 0x02da, 0x00df, 0x0020, 0x0061, 0x030a, 0x00df, 0x0020, 0xd8fa, 0xdeae, 0x00df, 0x0020, 0x2027, 0x00df };
+ uint32_t searchLen = UPRV_LENGTHOF(search);
+ uint32_t sourceLen = UPRV_LENGTHOF(source);
+ TestPCEBuffer_with(search,searchLen,source,sourceLen);
+ }
+
+
+static void TestPCEBuffer_2surr(void) {
+ UChar search[] =
+ { 0x0020, 0x0020, 0xdfff, 0x0020, 0x0041, 0xdfff, 0x0020, 0x0061, 0xdfff, 0x0020, 0x00c5, 0xdfff, 0x0020, 0x212b, 0xdfff, 0x0020, 0x0041, 0x030a, 0xdfff, 0x0020, 0x00e5, 0xdfff, 0x0020, 0x0061, 0x02da, 0xdfff, 0x0020, 0x0061, 0x030a, 0xdfff, 0x0020, 0xd8fa, 0xdeae, 0xdfff, 0x0020, 0x2027, 0xdfff }; /* 38 cp, 9 of them unpaired surrogates */
+ UChar source[] =
+ { 0x0020, 0x0020, 0xdfff, 0x0020, 0x0041, 0xdfff, 0x0020, 0x0061, 0xdfff, 0x0020, 0x00c5, 0xdfff, 0x0020, 0x212b, 0xdfff, 0x0020, 0x0041, 0x030a, 0xdfff, 0x0020, 0x00e5, 0xdfff, 0x0020, 0x0061, 0x02da, 0xdfff, 0x0020, 0x0061, 0x030a, 0xdfff, 0x0020, 0xd8fa, 0xdeae, 0xdfff, 0x0020, 0x2027, 0xdfff };
+ uint32_t searchLen = UPRV_LENGTHOF(search);
+ uint32_t sourceLen = UPRV_LENGTHOF(source);
+ TestPCEBuffer_with(search,searchLen,source,sourceLen);
+}
+
+static void TestMatchFollowedByIgnorables(void) {
+ /* test case for ticket#8482 */
+ UChar search[] = { 0x00c9 };
+ UChar source[] = { 0x00c9, 0x0000, 0x0041 };
+ int32_t searchLen;
+ int32_t sourceLen;
+ UErrorCode icuStatus = U_ZERO_ERROR;
+ UCollator *coll;
+ const char *locale;
+ UBreakIterator *ubrk;
+ UStringSearch *usearch;
+ int32_t match = 0;
+ int32_t matchLength = 0;
+ const int32_t expectedMatchLength = 1;
+
+ searchLen = UPRV_LENGTHOF(search);
+ sourceLen = UPRV_LENGTHOF(source);
+
+ coll = ucol_openFromShortString("LHR_AN_CX_EX_FX_HX_NX_S3",
+ FALSE,
+ NULL,
+ &icuStatus);
+ if (U_FAILURE(icuStatus)) {
+ log_data_err("ucol_openFromShortString error - %s\n", u_errorName(icuStatus));
+ }
+
+ locale = ucol_getLocaleByType(coll,
+ ULOC_VALID_LOCALE,
+ &icuStatus);
+ if (U_FAILURE(icuStatus)) {
+ log_data_err("ucol_getLocaleByType error - %s\n", u_errorName(icuStatus));
+ }
+
+ ubrk = ubrk_open(UBRK_CHARACTER,
+ locale,
+ source,
+ sourceLen,
+ &icuStatus);
+ if (U_FAILURE(icuStatus)) {
+ log_data_err("ubrk_open error - %s\n", u_errorName(icuStatus));
+ }
+
+ usearch = usearch_openFromCollator(search,
+ searchLen,
+ source,
+ sourceLen,
+ coll,
+ ubrk,
+ &icuStatus);
+ if (U_FAILURE(icuStatus)) {
+ log_data_err("usearch_openFromCollator error - %s\n", u_errorName(icuStatus));
+ }
+
+ match = usearch_first(usearch,
+ &icuStatus);
+ if (U_FAILURE(icuStatus)) {
+ log_data_err("usearch_first error - %s\n", u_errorName(icuStatus));
+ } else {
+
+ log_verbose("match=%d\n", match);
+
+ matchLength = usearch_getMatchedLength(usearch);
+
+ if (matchLength != expectedMatchLength) {
+ log_err("Error: matchLength=%d, expected=%d\n", matchLength, expectedMatchLength);
+ }
+ }
+
+ usearch_close(usearch);
+ ubrk_close(ubrk);
+ ucol_close(coll);
+}
+
+static void TestIndicPrefixMatch(void)
+{
+ int count = 0;
+ UErrorCode status = U_ZERO_ERROR;
+ open(&status);
+ if (U_FAILURE(status)) {
+ log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
+ return;
+ }
+ while (INDICPREFIXMATCH[count].text != NULL) {
+ if (!assertEqual(INDICPREFIXMATCH[count])) {
+ log_err("Error at test number %d\n", count);
+ }
+ count ++;
+ }
+ close();
+}
+
/**
* addSearchTest
*/
addTest(root, &TestNumeric, "tscoll/usrchtst/TestNumeric");
addTest(root, &TestDiacriticMatch, "tscoll/usrchtst/TestDiacriticMatch");
addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward");
- addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull");
+ addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull");
addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentical");
addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCollator");
+ addTest(root, &TestPCEBuffer_100df, "tscoll/usrchtst/TestPCEBuffer/1_00df");
+ addTest(root, &TestPCEBuffer_2surr, "tscoll/usrchtst/TestPCEBuffer/2_dfff");
+ addTest(root, &TestMatchFollowedByIgnorables, "tscoll/usrchtst/TestMatchFollowedByIgnorables");
+ addTest(root, &TestIndicPrefixMatch, "tscoll/usrchtst/TestIndicPrefixMatch");
}
#endif /* #if !UCONFIG_NO_COLLATION */