X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/57a6839dcb3bba09e8228b822b290604668416fe..f59164e3d128c7675a4d3934206346a3384e53a5:/icuSources/test/intltest/alphaindextst.cpp diff --git a/icuSources/test/intltest/alphaindextst.cpp b/icuSources/test/intltest/alphaindextst.cpp index 3daae4b1..3d74efc8 100644 --- a/icuSources/test/intltest/alphaindextst.cpp +++ b/icuSources/test/intltest/alphaindextst.cpp @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 2012-2014, International Business Machines Corporation + * Copyright (c) 2012-2016, International Business Machines Corporation * and others. All Rights Reserved. ********************************************************************/ // @@ -13,6 +13,7 @@ #include "intltest.h" #include "alphaindextst.h" +#include "cmemory.h" #include "unicode/alphaindex.h" #include "unicode/coll.h" @@ -25,8 +26,6 @@ // #include // #include -#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) - namespace { UnicodeString joinLabelsAndAppend(AlphabeticIndex::ImmutableIndex &index, UnicodeString &dest) { @@ -64,6 +63,7 @@ void AlphabeticIndexTest::runIndexedTest( int32_t index, UBool exec, const char* TESTCASE_AUTO(TestNoLabels); TESTCASE_AUTO(TestChineseZhuyin); TESTCASE_AUTO(TestJapaneseKanji); + TESTCASE_AUTO(TestChineseUnihan); TESTCASE_AUTO_END; } @@ -316,7 +316,7 @@ void AlphabeticIndexTest::APITest() { // if Russian sorts Cyrillic first. int32_t reorderCodes[20]; int32_t expectedLatinIndex = 0; - if (index->getCollator().getReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status) > 0) { + if (index->getCollator().getReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status) > 0) { expectedLatinIndex = index->getBucketCount(status) - 1; } n = index->getBucketIndex(adam, status); @@ -429,7 +429,7 @@ void AlphabeticIndexTest::HackPinyinTest() { AlphabeticIndex aindex(Locale::createFromName("zh"), status); TEST_CHECK_STATUS; - UnicodeString names[sizeof(pinyinTestData) / sizeof(pinyinTestData[0])]; + UnicodeString names[UPRV_LENGTHOF(pinyinTestData)]; int32_t nameCount; for (nameCount=0; pinyinTestData[nameCount] != NULL; nameCount++) { names[nameCount] = UnicodeString(pinyinTestData[nameCount], -1, UnicodeString::kInvariant).unescape(); @@ -494,6 +494,7 @@ static const char *localeAndIndexCharactersLists[][2] = { /* English*/ {"en", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, /* Spanish*/ {"es", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\\u00D1:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, /* Estonian*/ {"et", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\\u0160:Z:\\u017D:T:U:V:\\u00D5:\\u00C4:\\u00D6:\\u00DC:X:Y"}, + /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, /* Finnish*/ {"fi", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\\u00C5:\\u00C4:\\u00D6"}, /* Filipino*/ {"fil", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, /* French*/ {"fr", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, @@ -519,20 +520,11 @@ static const char *localeAndIndexCharactersLists[][2] = { /* Vietnamese*/ {"vi", "A:\\u0102:\\u00C2:B:C:D:\\u0110:E:\\u00CA:F:G:H:I:J:K:L:M:N:O:\\u00D4:\\u01A0:P:Q:R:S:T:U:\\u01AF:V:W:X:Y:Z"}, /* Chinese*/ {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, /* Chinese (Traditional Han)*/ {"zh_Hant", "1\\u5283:2\\u5283:3\\u5283:4\\u5283:5\\u5283:6\\u5283:7\\u5283:8\\u5283:9\\u5283:10\\u5283:11\\u5283:12\\u5283:13\\u5283:14\\u5283:15\\u5283:16\\u5283:17\\u5283:18\\u5283:19\\u5283:20\\u5283:21\\u5283:22\\u5283:23\\u5283:24\\u5283:25\\u5283:26\\u5283:27\\u5283:28\\u5283:29\\u5283:30\\u5283:31\\u5283:32\\u5283:33\\u5283:35\\u5283:36\\u5283:39\\u5283:48\\u5283"}, - - // As of ICU 52, ICU does not have collation data for the following language. - // Therefore, constructing an AlphabeticIndex for it - // ends up with a collator for the default locale - // which makes the test unreliable. (see ticket #10277) - // It exposes a bigger problem in that it may not be desirable for collation - // to fall back to the default locale. - - // /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"}, }; void AlphabeticIndexTest::TestIndexCharactersList() { UErrorCode status = U_ZERO_ERROR; - for (int32_t i = 0; i < LENGTHOF(localeAndIndexCharactersLists); ++i) { + for (int32_t i = 0; i < UPRV_LENGTHOF(localeAndIndexCharactersLists); ++i) { const char *(&localeAndIndexCharacters)[2] = localeAndIndexCharactersLists[i]; const char *locale = localeAndIndexCharacters[0]; UnicodeString expectedIndexCharacters @@ -561,7 +553,7 @@ void AlphabeticIndexTest::TestHaniFirst() { return; } int32_t reorderCodes[] = { USCRIPT_HAN }; - coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status); + coll->setReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status); TEST_CHECK_STATUS; AlphabeticIndex index(coll.orphan(), status); TEST_CHECK_STATUS; @@ -590,7 +582,7 @@ void AlphabeticIndexTest::TestPinyinFirst() { return; } int32_t reorderCodes[] = { USCRIPT_HAN }; - coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status); + coll->setReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status); TEST_CHECK_STATUS; AlphabeticIndex index(coll.orphan(), status); TEST_CHECK_STATUS; @@ -598,7 +590,7 @@ void AlphabeticIndexTest::TestPinyinFirst() { index.addLabels(Locale::getChinese(), status); assertEquals("getBucketCount()", 28, index.getBucketCount(status)); // ... A-Z ... int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x897f), status); - assertEquals("getBucketIndex(U+897F)", (int32_t)((UChar)0x0058/*X*/ - (UChar)0x0041/*A*/ + 1), (int32_t)bucketIndex); + assertEquals("getBucketIndex(U+897F)", (int32_t)((UChar)0x0058/*X*/ - (UChar)0x0041/*A*/ + 1), bucketIndex); bucketIndex = index.getBucketIndex("i", status); assertEquals("getBucketIndex(i)", 9, bucketIndex); bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x03B1), status); @@ -639,7 +631,7 @@ void AlphabeticIndexTest::TestSchSt() { { "Steiff", 22, "St" }, { "Thomas", 23, "T" } }; - for (int32_t i = 0; i < LENGTHOF(testCases); ++i) { + for (int32_t i = 0; i < UPRV_LENGTHOF(testCases); ++i) { const TestCase &testCase = testCases[i]; UnicodeString name = UnicodeString(testCase.name).unescape(); UnicodeString label = UnicodeString(testCase.bucketLabel).unescape(); @@ -672,7 +664,7 @@ void AlphabeticIndexTest::TestNoLabels() { void AlphabeticIndexTest::TestChineseZhuyin() { UErrorCode status = U_ZERO_ERROR; char loc[100]; - uloc_forLanguageTag("zh-u-co-zhuyin", loc, LENGTHOF(loc), NULL, &status); + uloc_forLanguageTag("zh-u-co-zhuyin", loc, UPRV_LENGTHOF(loc), NULL, &status); AlphabeticIndex index(loc, status); LocalPointer immIndex(index.buildImmutableIndex(status)); TEST_CHECK_STATUS; @@ -693,7 +685,7 @@ void AlphabeticIndexTest::TestJapaneseKanji() { // They should all go into the overflow bucket. static const UChar32 kanji[] = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 }; int32_t overflowIndex = immIndex->getBucketCount() - 1; - for(int32_t i = 0; i < LENGTHOF(kanji); ++i) { + for(int32_t i = 0; i < UPRV_LENGTHOF(kanji); ++i) { char msg[40]; sprintf(msg, "kanji[%d]=U+%04lX in overflow bucket", (int)i, (long)kanji[i]); assertEquals(msg, overflowIndex, immIndex->getBucketIndex(UnicodeString(kanji[i]), status)); @@ -701,4 +693,33 @@ void AlphabeticIndexTest::TestJapaneseKanji() { } } +void AlphabeticIndexTest::TestChineseUnihan() { + UErrorCode status = U_ZERO_ERROR; + AlphabeticIndex index("zh-u-co-unihan", status); + if(U_FAILURE(status)) { + dataerrln("unable create an AlphabeticIndex for Chinese/unihan: %s", u_errorName(status)); + return; + } + index.setMaxLabelCount(500, status); // ICU 54 default is 99. + LocalPointer immIndex(index.buildImmutableIndex(status)); + TEST_CHECK_STATUS; + int32_t bucketCount = immIndex->getBucketCount(); + if(bucketCount < 216) { + // There should be at least an underflow and overflow label, + // and one for each of 214 radicals, + // and maybe additional labels for simplified radicals. + dataerrln("too few buckets/labels for Chinese/unihan: %d (is zh/unihan data available?)", + bucketCount); + return; + } else { + logln("Chinese/unihan has %d buckets/labels", bucketCount); + } + // bucketIndex = radical number, adjusted for simplified radicals in lower buckets. + int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x4e5d), status); + assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex); + // radical 100, and there is a 90' since Unicode 8 + bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x7527), status); + assertEquals("getBucketIndex(U+7527)", 101, bucketIndex); +} + #endif