ICU-57166.0.1.tar.gz

[apple/icu.git] / icuSources / test / intltest / alphaindextst.cpp
diff --git a/icuSources/test/intltest/alphaindextst.cpp b/icuSources/test/intltest/alphaindextst.cpp

index 3daae4b1173fab84e877707884ccf066a5ad8edc..3d74efc8ebabc7a8e4ed3d6a13bc63d3daed4b1b 100644 (file)
--- a/icuSources/test/intltest/alphaindextst.cpp
+++ b/icuSources/test/intltest/alphaindextst.cpp
@@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT:
- * Copyright (c) 2012-2014, International Business Machines Corporation
+ * Copyright (c) 2012-2016, International Business Machines Corporation
   * and others. All Rights Reserved.
   ********************************************************************/
  //
@@ -13,6 +13,7 @@
  
  #include "intltest.h"
  #include "alphaindextst.h"
+#include "cmemory.h"
  
  #include "unicode/alphaindex.h"
  #include "unicode/coll.h"
@@ -25,8 +26,6 @@
  // #include <string>
  // #include <iostream>
  
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
  namespace {
  
  UnicodeString joinLabelsAndAppend(AlphabeticIndex::ImmutableIndex &index, UnicodeString &dest) {
@@ -64,6 +63,7 @@ void AlphabeticIndexTest::runIndexedTest( int32_t index, UBool exec, const char*
      TESTCASE_AUTO(TestNoLabels);
      TESTCASE_AUTO(TestChineseZhuyin);
      TESTCASE_AUTO(TestJapaneseKanji);
+    TESTCASE_AUTO(TestChineseUnihan);
      TESTCASE_AUTO_END;
  }
  
@@ -316,7 +316,7 @@ void AlphabeticIndexTest::APITest() {
      // if Russian sorts Cyrillic first.
      int32_t reorderCodes[20];
      int32_t expectedLatinIndex = 0;
-    if (index->getCollator().getReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status) > 0) {
+    if (index->getCollator().getReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status) > 0) {
          expectedLatinIndex = index->getBucketCount(status) - 1;
      }
      n = index->getBucketIndex(adam, status);
@@ -429,7 +429,7 @@ void AlphabeticIndexTest::HackPinyinTest() {
      AlphabeticIndex aindex(Locale::createFromName("zh"), status);
      TEST_CHECK_STATUS; 
  
-    UnicodeString names[sizeof(pinyinTestData) / sizeof(pinyinTestData[0])];
+    UnicodeString names[UPRV_LENGTHOF(pinyinTestData)];
      int32_t  nameCount;
      for (nameCount=0; pinyinTestData[nameCount] != NULL; nameCount++) {
          names[nameCount] = UnicodeString(pinyinTestData[nameCount], -1, UnicodeString::kInvariant).unescape();
@@ -494,6 +494,7 @@ static const char *localeAndIndexCharactersLists[][2] = {
      /* English*/    {"en", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
      /* Spanish*/    {"es", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:\\u00D1:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
      /* Estonian*/   {"et", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:\\u0160:Z:\\u017D:T:U:V:\\u00D5:\\u00C4:\\u00D6:\\u00DC:X:Y"},
+    /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
      /* Finnish*/    {"fi", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z:\\u00C5:\\u00C4:\\u00D6"},
      /* Filipino*/   {"fil", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
      /* French*/ {"fr", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
@@ -519,20 +520,11 @@ static const char *localeAndIndexCharactersLists[][2] = {
      /* Vietnamese*/ {"vi", "A:\\u0102:\\u00C2:B:C:D:\\u0110:E:\\u00CA:F:G:H:I:J:K:L:M:N:O:\\u00D4:\\u01A0:P:Q:R:S:T:U:\\u01AF:V:W:X:Y:Z"},
      /* Chinese*/    {"zh", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
      /* Chinese (Traditional Han)*/  {"zh_Hant", "1\\u5283:2\\u5283:3\\u5283:4\\u5283:5\\u5283:6\\u5283:7\\u5283:8\\u5283:9\\u5283:10\\u5283:11\\u5283:12\\u5283:13\\u5283:14\\u5283:15\\u5283:16\\u5283:17\\u5283:18\\u5283:19\\u5283:20\\u5283:21\\u5283:22\\u5283:23\\u5283:24\\u5283:25\\u5283:26\\u5283:27\\u5283:28\\u5283:29\\u5283:30\\u5283:31\\u5283:32\\u5283:33\\u5283:35\\u5283:36\\u5283:39\\u5283:48\\u5283"},
-
-    // As of ICU 52, ICU does not have collation data for the following language.
-    // Therefore, constructing an AlphabeticIndex for it
-    // ends up with a collator for the default locale
-    // which makes the test unreliable. (see ticket #10277)
-    // It exposes a bigger problem in that it may not be desirable for collation
-    // to fall back to the default locale.
-
-    // /* Basque*/ {"eu", "A:B:C:D:E:F:G:H:I:J:K:L:M:N:O:P:Q:R:S:T:U:V:W:X:Y:Z"},
  };
  
  void AlphabeticIndexTest::TestIndexCharactersList() {
      UErrorCode status = U_ZERO_ERROR;
-    for (int32_t i = 0; i < LENGTHOF(localeAndIndexCharactersLists); ++i) {
+    for (int32_t i = 0; i < UPRV_LENGTHOF(localeAndIndexCharactersLists); ++i) {
          const char *(&localeAndIndexCharacters)[2] = localeAndIndexCharactersLists[i];
          const char *locale = localeAndIndexCharacters[0];
          UnicodeString expectedIndexCharacters
@@ -561,7 +553,7 @@ void AlphabeticIndexTest::TestHaniFirst() {
          return;
      }
      int32_t reorderCodes[] = { USCRIPT_HAN };
-    coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status);
+    coll->setReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status);
      TEST_CHECK_STATUS; 
      AlphabeticIndex index(coll.orphan(), status);
      TEST_CHECK_STATUS; 
@@ -590,7 +582,7 @@ void AlphabeticIndexTest::TestPinyinFirst() {
          return;
      }
      int32_t reorderCodes[] = { USCRIPT_HAN };
-    coll->setReorderCodes(reorderCodes, LENGTHOF(reorderCodes), status);
+    coll->setReorderCodes(reorderCodes, UPRV_LENGTHOF(reorderCodes), status);
      TEST_CHECK_STATUS;
      AlphabeticIndex index(coll.orphan(), status);
      TEST_CHECK_STATUS;
@@ -598,7 +590,7 @@ void AlphabeticIndexTest::TestPinyinFirst() {
      index.addLabels(Locale::getChinese(), status);
      assertEquals("getBucketCount()", 28, index.getBucketCount(status));  // ... A-Z ...
      int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x897f), status);
-    assertEquals("getBucketIndex(U+897F)", (int32_t)((UChar)0x0058/*X*/ - (UChar)0x0041/*A*/ + 1), (int32_t)bucketIndex);
+    assertEquals("getBucketIndex(U+897F)", (int32_t)((UChar)0x0058/*X*/ - (UChar)0x0041/*A*/ + 1), bucketIndex);
      bucketIndex = index.getBucketIndex("i", status);
      assertEquals("getBucketIndex(i)", 9, bucketIndex);
      bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x03B1), status);
@@ -639,7 +631,7 @@ void AlphabeticIndexTest::TestSchSt() {
          { "Steiff", 22, "St" },
          { "Thomas", 23, "T" }
      };
-    for (int32_t i = 0; i < LENGTHOF(testCases); ++i) {
+    for (int32_t i = 0; i < UPRV_LENGTHOF(testCases); ++i) {
          const TestCase &testCase = testCases[i];
          UnicodeString name = UnicodeString(testCase.name).unescape();
          UnicodeString label = UnicodeString(testCase.bucketLabel).unescape();
@@ -672,7 +664,7 @@ void AlphabeticIndexTest::TestNoLabels() {
  void AlphabeticIndexTest::TestChineseZhuyin() {
      UErrorCode status = U_ZERO_ERROR;
      char loc[100];
-    uloc_forLanguageTag("zh-u-co-zhuyin", loc, LENGTHOF(loc), NULL, &status);
+    uloc_forLanguageTag("zh-u-co-zhuyin", loc, UPRV_LENGTHOF(loc), NULL, &status);
      AlphabeticIndex index(loc, status);
      LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
      TEST_CHECK_STATUS; 
@@ -693,7 +685,7 @@ void AlphabeticIndexTest::TestJapaneseKanji() {
      // They should all go into the overflow bucket.
      static const UChar32 kanji[] = { 0x4E9C, 0x95C7, 0x4E00, 0x58F1 };
      int32_t overflowIndex = immIndex->getBucketCount() - 1;
-    for(int32_t i = 0; i < LENGTHOF(kanji); ++i) {
+    for(int32_t i = 0; i < UPRV_LENGTHOF(kanji); ++i) {
          char msg[40];
          sprintf(msg, "kanji[%d]=U+%04lX in overflow bucket", (int)i, (long)kanji[i]);
          assertEquals(msg, overflowIndex, immIndex->getBucketIndex(UnicodeString(kanji[i]), status));
@@ -701,4 +693,33 @@ void AlphabeticIndexTest::TestJapaneseKanji() {
      }
  }
  
+void AlphabeticIndexTest::TestChineseUnihan() {
+    UErrorCode status = U_ZERO_ERROR;
+    AlphabeticIndex index("zh-u-co-unihan", status);
+    if(U_FAILURE(status)) {
+        dataerrln("unable create an AlphabeticIndex for Chinese/unihan: %s", u_errorName(status));
+        return;
+    }
+    index.setMaxLabelCount(500, status);  // ICU 54 default is 99.
+    LocalPointer<AlphabeticIndex::ImmutableIndex> immIndex(index.buildImmutableIndex(status));
+    TEST_CHECK_STATUS;
+    int32_t bucketCount = immIndex->getBucketCount();
+    if(bucketCount < 216) {
+        // There should be at least an underflow and overflow label,
+        // and one for each of 214 radicals,
+        // and maybe additional labels for simplified radicals.
+        dataerrln("too few buckets/labels for Chinese/unihan: %d (is zh/unihan data available?)",
+                  bucketCount);
+        return;
+    } else {
+        logln("Chinese/unihan has %d buckets/labels", bucketCount);
+    }
+    // bucketIndex = radical number, adjusted for simplified radicals in lower buckets.
+    int32_t bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x4e5d), status);
+    assertEquals("getBucketIndex(U+4E5D)", 5, bucketIndex);
+    // radical 100, and there is a 90' since Unicode 8
+    bucketIndex = index.getBucketIndex(UnicodeString((UChar)0x7527), status);
+    assertEquals("getBucketIndex(U+7527)", 101, bucketIndex);
+}
+
  #endif