ICU-62135.0.1.tar.gz

[apple/icu.git] / icuSources / test / intltest / colldata.cpp
diff --git a/icuSources/test/intltest/colldata.cpp b/icuSources/test/intltest/colldata.cpp

index 2f561c67086730eaad2cae27ec5d898b960d4828..1e37c475ba10b22863bca42836a991b57690dd24 100644 (file)
--- a/icuSources/test/intltest/colldata.cpp
+++ b/icuSources/test/intltest/colldata.cpp
@@ -1,7 +1,9 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
   ******************************************************************************
- *   Copyright (C) 1996-2012, International Business Machines                 *
- *   Corporation and others.  All Rights Reserved.                            *
+ *   Copyright (C) 1996-2016, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
   ******************************************************************************
   */
  
@@ -22,18 +24,19 @@
  
  #include "unicode/uniset.h"
  #include "unicode/uset.h"
+#include "unicode/usetiter.h"
  #include "unicode/ustring.h"
  #include "hash.h"
+#include "normalizer2impl.h"
  #include "uhash.h"
-#include "ucol_imp.h"
+#include "usrchimp.h"
  #include "uassert.h"
  
  #include "colldata.h"
  
-#define ARRAY_SIZE(array) (sizeof(array)/sizeof(array[0]))
-#define NEW_ARRAY(type, count) (type *) uprv_malloc((count) * sizeof(type))
+#define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type))
  #define DELETE_ARRAY(array) uprv_free((void *) (array))
-#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (count) * sizeof (src)[0])
+#define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0])
  
  CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
      : ces(NULL), listMax(CELIST_BUFFER_SIZE), listSize(0)
@@ -50,18 +53,16 @@ CEList::CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status)
      }
  
      // **** only set flag if string has Han(gul) ****
-    ucol_forceHanImplicit(elems, &status);
+    // ucol_forceHanImplicit(elems, &status); -- removed for ticket #10476
  
      switch (strength)
      {
      default:
          strengthMask |= UCOL_TERTIARYORDERMASK;
-        /* fall through */
-
+        U_FALLTHROUGH;
      case UCOL_SECONDARY:
          strengthMask |= UCOL_SECONDARYORDERMASK;
-        /* fall through */
-
+        U_FALLTHROUGH;
      case UCOL_PRIMARY:
          strengthMask |= UCOL_PRIMARYORDERMASK;
      }
@@ -410,11 +411,23 @@ bail:
          return;
      }
  
-     UChar32 hanRanges[] = {UCOL_FIRST_HAN, UCOL_LAST_HAN, UCOL_FIRST_HAN_COMPAT, UCOL_LAST_HAN_COMPAT, UCOL_FIRST_HAN_A, UCOL_LAST_HAN_A,
-                            UCOL_FIRST_HAN_B, UCOL_LAST_HAN_B};
-     UChar  jamoRanges[] = {UCOL_FIRST_L_JAMO, UCOL_FIRST_V_JAMO, UCOL_FIRST_T_JAMO, UCOL_LAST_T_JAMO};
-     UnicodeString hanString = UnicodeString::fromUTF32(hanRanges, ARRAY_SIZE(hanRanges));
-     UnicodeString jamoString(FALSE, jamoRanges, ARRAY_SIZE(jamoRanges));
+    UnicodeSet hanRanges(UNICODE_STRING_SIMPLE("[:Unified_Ideograph:]"), status);
+    if (U_FAILURE(status)) {
+        return;
+    }
+    UnicodeSetIterator hanIter(hanRanges);
+    UnicodeString hanString;
+    while(hanIter.nextRange()) {
+        hanString.append(hanIter.getCodepoint());
+        hanString.append(hanIter.getCodepointEnd());
+    }
+    // TODO: Why U+11FF? The old code had an outdated UCOL_LAST_T_JAMO=0x11F9,
+    // but as of Unicode 6.3 the 11xx block is filled,
+    // and there are also more Jamo T at U+D7CB..U+D7FB.
+    // Maybe use [:HST=T:] and look for the end of the last range?
+    // Maybe use script boundary mappings instead of this code??
+    UChar  jamoRanges[] = {Hangul::JAMO_L_BASE, Hangul::JAMO_V_BASE, Hangul::JAMO_T_BASE + 1, 0x11FF};
+     UnicodeString jamoString(FALSE, jamoRanges, UPRV_LENGTHOF(jamoRanges));
       CEList hanList(coll, hanString, status);
       CEList jamoList(coll, jamoString, status);
       int32_t j = 0;