/********************************************************************
* COPYRIGHT:
- * Copyright (c) 2012-2014, International Business Machines Corporation
+ * Copyright (c) 2012-2016, International Business Machines Corporation
* and others. All Rights Reserved.
********************************************************************/
//
#include "intltest.h"
#include "alphaindextst.h"
+#include "cmemory.h"
#include "unicode/alphaindex.h"
#include "unicode/coll.h"
// #include <string>
// #include <iostream>
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
namespace {
UnicodeString joinLabelsAndAppend(AlphabeticIndex::ImmutableIndex &index, UnicodeString &dest) {
TESTCASE_AUTO(TestNoLabels);
TESTCASE_AUTO(TestChineseZhuyin);
TESTCASE_AUTO(TestJapaneseKanji);
+ TESTCASE_AUTO(TestChineseUnihan);
TESTCASE_AUTO_END;
}
// if Russian sorts Cyrillic first.
int32_t reorderCodes[20];
int32_t expectedLatinIndex = 0;
- if (index->getCollator().getReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status) > 0) {
+ if (index->getCollator().getReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status) > 0) {
expectedLatinIndex = index->getBucketCount(status) - 1;
}
n = index->getBucketIndex(adam, status);
AlphabeticIndex aindex(Locale::createFromName("zh"), status);
TEST_CHECK_STATUS;
- UnicodeString names[sizeof(pinyinTestData) / sizeof(pinyinTestData[0])];
+ UnicodeString names[UPRV_LENGTHOF(pinyinTestData)];
int32_t nameCount;
for (nameCount=0; pinyinTestData[nameCount] != NULL; nameCount++) {
names[nameCount] = UnicodeString(pinyinTestData[nameCount], -1, UnicodeString::kInvariant).unescape();
/* English*/ {"en", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
/* Spanish*/ {"es", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\\u00D1:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
/* Estonian*/ {"et", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\\u0160:Z:\\u017D:T:U:V:\\u00D5:\\u00C4:\\u00D6:\\u00DC:X:Y"},
+ /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
/* Finnish*/ {"fi", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\\u00C5:\\u00C4:\\u00D6"},
/* Filipino*/ {"fil", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
/* French*/ {"fr", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
/* Vietnamese*/ {"vi", "A:\\u0102:\\u00C2:B:C:D:\\u0110:E:\\u00CA:F:G:H:I:J:K:L:M:N:O:\\u00D4:\\u01A0:P:Q:R:S:T:U:\\u01AF:V:W:X:Y:Z"},
/* Chinese*/ {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
/* Chinese (Traditional Han)*/ {"zh_Hant", "1\\u5283:2\\u5283:3\\u5283:4\\u5283:5\\u5283:6\\u5283:7\\u5283:8\\u5283:9\\u5283:10\\u5283:11\\u5283:12\\u5283:13\\u5283:14\\u5283:15\\u5283:16\\u5283:17\\u5283:18\\u5283:19\\u5283:20\\u5283:21\\u5283:22\\u5283:23\\u5283:24\\u5283:25\\u5283:26\\u5283:27\\u5283:28\\u5283:29\\u5283:30\\u5283:31\\u5283:32\\u5283:33\\u5283:35\\u5283:36\\u5283:39\\u5283:48\\u5283"},
-
- // As of ICU 52, ICU does not have collation data for the following language.
- // Therefore, constructing an AlphabeticIndex for it
- // ends up with a collator for the default locale
- // which makes the test unreliable. (see ticket #10277)
- // It exposes a bigger problem in that it may not be desirable for collation
- // to fall back to the default locale.
-
- // /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
};
void AlphabeticIndexTest::TestIndexCharactersList() {
UErrorCode status = U_ZERO_ERROR;
- for (int32_t i = 0; i < LENGTHOF(localeAndIndexCharactersLists); ++i) {
+ for (int32_t i = 0; i < UPRV_LENGTHOF(localeAndIndexCharactersLists); ++i) {
const char *(&localeAndIndexCharacters)[2] = localeAndIndexCharactersLists[i];
const char *locale = localeAndIndexCharacters[0];
UnicodeString expectedIndexCharacters
return;
}
int32_t reorderCodes[] = { USCRIPT_HAN };
- coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status);
+ coll->setReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status);
TEST_CHECK_STATUS;
AlphabeticIndex index(coll.orphan(), status);
TEST_CHECK_STATUS;
return;
}
int32_t reorderCodes[] = { USCRIPT_HAN };
- coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status);
+ coll->setReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status);
TEST_CHECK_STATUS;
AlphabeticIndex index(coll.orphan(), status);
TEST_CHECK_STATUS;
index.addLabels(Locale::getChinese(), status);
assertEquals("getBucketCount()", 28, index.getBucketCount(status)); // ... A-Z ...
int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x897f), status);
- assertEquals("getBucketIndex(U+897F)", (int32_t)((UChar)0x0058/*X*/ - (UChar)0x0041/*A*/ + 1), (int32_t)bucketIndex);
+ assertEquals("getBucketIndex(U+897F)", (int32_t)((UChar)0x0058/*X*/ - (UChar)0x0041/*A*/ + 1), bucketIndex);
bucketIndex = index.getBucketIndex("i", status);
assertEquals("getBucketIndex(i)", 9, bucketIndex);
bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x03B1), status);
{ "Steiff", 22, "St" },
{ "Thomas", 23, "T" }
};
- for (int32_t i = 0; i < LENGTHOF(testCases); ++i) {
+ for (int32_t i = 0; i < UPRV_LENGTHOF(testCases); ++i) {
const TestCase &testCase = testCases[i];
UnicodeString name = UnicodeString(testCase.name).unescape();
UnicodeString label = UnicodeString(testCase.bucketLabel).unescape();
void AlphabeticIndexTest::TestChineseZhuyin() {
UErrorCode status = U_ZERO_ERROR;
char loc[100];
- uloc_forLanguageTag("zh-u-co-zhuyin", loc, LENGTHOF(loc), NULL, &status);
+ uloc_forLanguageTag("zh-u-co-zhuyin", loc, UPRV_LENGTHOF(loc), NULL, &status);
AlphabeticIndex index(loc, status);
LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
TEST_CHECK_STATUS;
// They should all go into the overflow bucket.
static const UChar32 kanji[] = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 };
int32_t overflowIndex = immIndex->getBucketCount() - 1;
- for(int32_t i = 0; i < LENGTHOF(kanji); ++i) {
+ for(int32_t i = 0; i < UPRV_LENGTHOF(kanji); ++i) {
char msg[40];
sprintf(msg, "kanji[%d]=U+%04lX in overflow bucket", (int)i, (long)kanji[i]);
assertEquals(msg, overflowIndex, immIndex->getBucketIndex(UnicodeString(kanji[i]), status));
}
}
+void AlphabeticIndexTest::TestChineseUnihan() {
+ UErrorCode status = U_ZERO_ERROR;
+ AlphabeticIndex index("zh-u-co-unihan", status);
+ if(U_FAILURE(status)) {
+ dataerrln("unable create an AlphabeticIndex for Chinese/unihan: %s", u_errorName(status));
+ return;
+ }
+ index.setMaxLabelCount(500, status); // ICU 54 default is 99.
+ LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
+ TEST_CHECK_STATUS;
+ int32_t bucketCount = immIndex->getBucketCount();
+ if(bucketCount < 216) {
+ // There should be at least an underflow and overflow label,
+ // and one for each of 214 radicals,
+ // and maybe additional labels for simplified radicals.
+ dataerrln("too few buckets/labels for Chinese/unihan: %d (is zh/unihan data available?)",
+ bucketCount);
+ return;
+ } else {
+ logln("Chinese/unihan has %d buckets/labels", bucketCount);
+ }
+ // bucketIndex = radical number, adjusted for simplified radicals in lower buckets.
+ int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x4e5d), status);
+ assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex);
+ // radical 100, and there is a 90' since Unicode 8
+ bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x7527), status);
+ assertEquals("getBucketIndex(U+7527)", 101, bucketIndex);
+}
+
#endif