ICU-62123.0.1.tar.gz

[apple/icu.git] / icuSources / test / intltest / csdetest.cpp
diff --git a/icuSources/test/intltest/csdetest.cpp b/icuSources/test/intltest/csdetest.cpp

index cdb39cefeaeed1ef992ecf4765a3be122ea0d97e..4edacad64354f375e5457e622d95f6e99452843a 100644 (file)
--- a/icuSources/test/intltest/csdetest.cpp
+++ b/icuSources/test/intltest/csdetest.cpp
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
   **********************************************************************
- *   Copyright (C) 2005-2012, International Business Machines
+ *   Copyright (C) 2005-2016, International Business Machines
   *   Corporation and others.  All Rights Reserved.
   **********************************************************************
   */
@@ -25,8 +27,6 @@
  #include <stdio.h>
  #endif
  
-#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
-
  #define NEW_ARRAY(type,count) (type *) /*uprv_*/malloc((count) * sizeof(type))
  #define DELETE_ARRAY(array) /*uprv_*/free((void *) (array))
  
@@ -275,6 +275,45 @@ void CharsetDetectionTest::ConstructionTest()
          printf("%s\n", name);
  #endif
      }
+
+    const char* defDisabled[] = {
+        "IBM420_rtl", "IBM420_ltr",
+        "IBM424_rtl", "IBM424_ltr",
+        0
+    };
+
+    LocalUEnumerationPointer eActive(ucsdet_getDetectableCharsets(csd.getAlias(), status));
+    const char *activeName = NULL;
+
+    while ((activeName = uenum_next(eActive.getAlias(), NULL, status))) {
+        // the charset must be included in all list
+        UBool found = FALSE;
+
+        const char *name = NULL;
+        uenum_reset(e.getAlias(), status);
+        while ((name = uenum_next(e.getAlias(), NULL, status))) {
+            if (strcmp(activeName, name) == 0) {
+                found = TRUE;
+                break;
+            }
+        }
+
+        if (!found) {
+            errln(UnicodeString(activeName) + " is not included in the all charset list.");
+        }
+
+        // some charsets are disabled by default
+        found = FALSE;
+        for (int32_t i = 0; defDisabled[i] != 0; i++) {
+            if (strcmp(activeName, defDisabled[i]) == 0) {
+                found = TRUE;
+                break;
+            }
+        }
+        if (found) {
+            errln(UnicodeString(activeName) + " should not be included in the default charset list.");
+        }
+    }
  }
  
  void CharsetDetectionTest::UTF8Test()
@@ -545,6 +584,7 @@ void CharsetDetectionTest::DetectionTest()
  
  void CharsetDetectionTest::IBM424Test()
  {
+#if !UCONFIG_ONLY_HTML_CONVERSION
      UErrorCode status = U_ZERO_ERROR;
      
      static const UChar chars[] = {
@@ -597,6 +637,10 @@ void CharsetDetectionTest::IBM424Test()
      char *bytes_r = extractBytes(s2, "IBM424", brLength);
      
      UCharsetDetector *csd = ucsdet_open(&status);
+       ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status);
      if (U_FAILURE(status)) {
          errln("Error opening charset detector. - %s", u_errorName(status));
      }
@@ -633,10 +677,12 @@ bail:
      freeBytes(bytes);
      freeBytes(bytes_r);
      ucsdet_close(csd);
+#endif
  }
  
  void CharsetDetectionTest::IBM420Test()
  {
+#if !UCONFIG_ONLY_HTML_CONVERSION
      UErrorCode status = U_ZERO_ERROR;
      
      static const UChar chars[] = {
@@ -684,6 +730,10 @@ void CharsetDetectionTest::IBM420Test()
      if (U_FAILURE(status)) {
          errln("Error opening charset detector. - %s", u_errorName(status));
      }
+       ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status);
+       ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status);
      const UCharsetMatch *match;
      const char *name;
  
@@ -717,6 +767,7 @@ bail:
      freeBytes(bytes);
      freeBytes(bytes_r);
      ucsdet_close(csd);
+#endif
  }
  
  
@@ -767,7 +818,7 @@ void CharsetDetectionTest::Ticket6394Test() {
  //               similar Windows and non-Windows SBCS encodings. State was kept in the shared
  //               Charset Recognizer objects, and could be overwritten.
  void CharsetDetectionTest::Ticket6954Test() {
-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FORMATTING
      UErrorCode status = U_ZERO_ERROR;
      UnicodeString sISO = "This is a small sample of some English text. Just enough to be sure that it detects correctly.";
      UnicodeString ssWindows("This is another small sample of some English text. Just enough to be sure that it detects correctly."