+ }
+#endif
+}
+
+struct UdhrTestCase
+{
+ const char *locale;
+ const char *file;
+};
+
+void SSearchTest::udhrTest()
+{
+ UErrorCode status = U_ZERO_ERROR;
+ char path[PATH_BUFFER_SIZE];
+ const char *udhrPath = getPath(path, "udhr");
+
+ if (udhrPath == NULL) {
+ // couldn't get path: error message already output...
+ return;
+ }
+
+ UdhrTestCase testCases[] = {
+ {"en", "udhr_eng.txt"},
+ {"de", "udhr_deu_1996.txt"},
+ {"fr", "udhr_fra.txt"},
+ {"ru", "udhr_rus.txt"},
+ {"th", "udhr_tha.txt"},
+ {"ja", "udhr_jpn.txt"},
+ {"ko", "udhr_kor.txt"},
+ {"zh", "udhr_cmn_hans.txt"},
+ {"zh_Hant", "udhr_cmn_hant.txt"}
+ };
+
+ int32_t testCount = ARRAY_SIZE(testCases);
+
+ for (int32_t t = 0; t < testCount; t += 1) {
+ int32_t len = 0;
+ char *resolvedFileName = NULL;
+ const char *encoding = NULL;
+ UCHARBUF *ucharBuf = NULL;
+
+ ucbuf_resolveFileName(udhrPath, testCases[t].file, NULL, &len, &status);
+ resolvedFileName = NEW_ARRAY(char, len);
+
+ if(resolvedFileName == NULL){
+ continue;
+ }
+
+ if(status == U_BUFFER_OVERFLOW_ERROR){
+ status = U_ZERO_ERROR;
+ }
+
+ ucbuf_resolveFileName(udhrPath, testCases[t].file, resolvedFileName, &len, &status);
+ ucharBuf = ucbuf_open(resolvedFileName, &encoding, TRUE, FALSE, &status);
+
+ DELETE_ARRAY(resolvedFileName);
+
+ if(U_FAILURE(status)){
+ infoln("Could not open the input file %s. Test skipped\n", testCases[t].file);
+ continue;
+ }
+
+ int32_t targetLen = 0;
+ const UChar *target = ucbuf_getBuffer(ucharBuf, &targetLen, &status);
+
+ /* The first line of the file contains the pattern */
+ int32_t start = 0, end = 0, plen = 0;
+
+ for(end = start; ; end += 1) {
+ UChar ch = target[end];
+
+ if (ch == 0x000A || ch == 0x000D || ch == 0x2028) {
+ break;
+ }
+ }
+
+ plen = end - start;
+
+ UChar *pattern = NEW_ARRAY(UChar, plen);
+ for (int32_t i = 0; i < plen; i += 1) {
+ pattern[i] = target[start++];
+ }
+
+ int32_t offset = 0;
+ UCollator *coll = ucol_open(testCases[t].locale, &status);
+ UCD *ucd = NULL;
+ BMS *bms = NULL;
+
+ if (U_FAILURE(status)) {
+ errln("Could not open collator for %s", testCases[t].locale);
+ goto delete_collator;
+ }
+
+ ucd = ucd_open(coll, &status);
+
+ if (U_FAILURE(status)) {
+ errln("Could not open CollData object for %s", testCases[t].locale);
+ goto delete_ucd;
+ }
+
+ bms = bms_open(ucd, pattern, plen, target, targetLen, &status);
+
+ if (U_FAILURE(status)) {
+ errln("Could not open search object for %s", testCases[t].locale);
+ goto delete_bms;
+ }
+
+ start = end = -1;
+ while (bms_search(bms, offset, &start, &end)) {
+ offset = end;
+ }
+
+ if (offset == 0) {
+ errln("Could not find pattern - locale: %s, file: %s ", testCases[t].locale, testCases[t].file);
+ }
+
+delete_bms:
+ bms_close(bms);
+
+delete_ucd:
+ ucd_close(ucd);
+
+delete_collator:
+ ucol_close(coll);
+
+ DELETE_ARRAY(pattern);
+ ucbuf_close(ucharBuf);
+ }
+
+ ucd_flushCache();
+}
+
+void SSearchTest::bmSearchTest()
+{
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+ UErrorCode status = U_ZERO_ERROR;
+ char path[PATH_BUFFER_SIZE];
+ const char *testFilePath = getPath(path, "ssearch.xml");
+
+ if (testFilePath == NULL) {
+ return; /* Couldn't get path: error message already output. */
+ }
+
+ UXMLParser *parser = UXMLParser::createParser(status);
+ TEST_ASSERT_SUCCESS(status);
+ UXMLElement *root = parser->parseFile(testFilePath, status);
+ TEST_ASSERT_SUCCESS(status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ const UnicodeString *debugTestCase = root->getAttribute("debug");
+ if (debugTestCase != NULL) {
+// setenv("USEARCH_DEBUG", "1", 1);
+ }
+
+
+ const UXMLElement *testCase;
+ int32_t tc = 0;
+
+ while((testCase = root->nextChildElement(tc)) != NULL) {
+
+ if (testCase->getTagName().compare("test-case") != 0) {
+ errln("ssearch, unrecognized XML Element in test file");
+ continue;
+ }
+ const UnicodeString *id = testCase->getAttribute("id");
+ *testId = 0;
+ if (id != NULL) {
+ id->extract(0, id->length(), testId, sizeof(testId), US_INV);
+ }
+
+ // If debugging test case has been specified and this is not it, skip to next.
+ if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) {
+ continue;
+ }
+ //
+ // Get the requested collation strength.
+ // Default is tertiary if the XML attribute is missing from the test case.
+ //
+ const UnicodeString *strength = testCase->getAttribute("strength");
+ UColAttributeValue collatorStrength = UCOL_PRIMARY;
+ if (strength==NULL) { collatorStrength = UCOL_TERTIARY;}
+ else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;}
+ else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;}
+ else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;}
+ else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}
+ else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;}
+ else {
+ // Bogus value supplied for strength. Shouldn't happen, even from
+ // typos, if the XML source has been validated.
+ // This assert is a little deceiving in that strength can be
+ // any of the allowed values, not just TERTIARY, but it will
+ // do the job of getting the error output.
+ TEST_ASSERT(*strength=="TERTIARY")
+ }
+
+ //
+ // Get the collator normalization flag. Default is UCOL_OFF.
+ //
+ UColAttributeValue normalize = UCOL_OFF;
+ const UnicodeString *norm = testCase->getAttribute("norm");
+ TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF");
+ if (norm!=NULL && *norm=="ON") {
+ normalize = UCOL_ON;
+ }
+
+ //
+ // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.
+ //
+ UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;
+ const UnicodeString *alt = testCase->getAttribute("alternate_handling");
+ TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE");
+ if (alt != NULL && *alt == "SHIFTED") {
+ alternateHandling = UCOL_SHIFTED;
+ }
+
+ const UnicodeString defLocale("en");
+ char clocale[100];
+ const UnicodeString *locale = testCase->getAttribute("locale");
+ if (locale == NULL || locale->length()==0) {
+ locale = &defLocale;
+ };
+ locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);
+
+
+ UnicodeString text;
+ UnicodeString target;
+ UnicodeString pattern;
+ int32_t expectedMatchStart = -1;
+ int32_t expectedMatchLimit = -1;
+ const UXMLElement *n;
+ int32_t nodeCount = 0;
+
+ n = testCase->getChildElement("pattern");
+ TEST_ASSERT(n != NULL);
+ if (n==NULL) {
+ continue;
+ }
+ text = n->getText(FALSE);
+ text = text.unescape();
+ pattern.append(text);
+ nodeCount++;
+
+ n = testCase->getChildElement("pre");
+ if (n!=NULL) {
+ text = n->getText(FALSE);
+ text = text.unescape();
+ target.append(text);
+ nodeCount++;
+ }
+
+ n = testCase->getChildElement("m");
+ if (n!=NULL) {
+ expectedMatchStart = target.length();
+ text = n->getText(FALSE);
+ text = text.unescape();
+ target.append(text);
+ expectedMatchLimit = target.length();
+ nodeCount++;
+ }
+
+ n = testCase->getChildElement("post");
+ if (n!=NULL) {
+ text = n->getText(FALSE);
+ text = text.unescape();
+ target.append(text);
+ nodeCount++;
+ }
+
+ // Check that there weren't extra things in the XML
+ TEST_ASSERT(nodeCount == testCase->countChildren());
+
+ // Open a collator and StringSearch based on the parameters
+ // obtained from the XML.
+ //
+ status = U_ZERO_ERROR;
+ UCollator *collator = ucol_open(clocale, &status);
+ ucol_setStrength(collator, collatorStrength);
+ ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, normalize, &status);
+ ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, alternateHandling, &status);
+ UCD *ucd = ucd_open(collator, &status);
+ BMS *bms = bms_open(ucd, pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), &status);
+
+ TEST_ASSERT_SUCCESS(status);
+ if (U_FAILURE(status)) {
+ bms_close(bms);
+ ucd_close(ucd);
+ ucol_close(collator);
+ continue;
+ }
+
+ int32_t foundStart = 0;
+ int32_t foundLimit = 0;
+ UBool foundMatch;
+
+ //
+ // Do the search, check the match result against the expected results.
+ //
+ foundMatch = bms_search(bms, 0, &foundStart, &foundLimit);
+ //TEST_ASSERT_SUCCESS(status);
+ if ((foundMatch && expectedMatchStart < 0) ||
+ (foundStart != expectedMatchStart) ||
+ (foundLimit != expectedMatchLimit)) {
+ TEST_ASSERT(FALSE); // ouput generic error position
+ infoln("Found, expected match start = %d, %d \n"
+ "Found, expected match limit = %d, %d",
+ foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
+ }