X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/51004dcb01e06fef634b61be77ed73dd61cb6db9..4f1e1a09ce4daed860e35d359ce2fceccb0764e8:/icuSources/test/intltest/csdetest.cpp diff --git a/icuSources/test/intltest/csdetest.cpp b/icuSources/test/intltest/csdetest.cpp index cdb39cef..4edacad6 100644 --- a/icuSources/test/intltest/csdetest.cpp +++ b/icuSources/test/intltest/csdetest.cpp @@ -1,6 +1,8 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** - * Copyright (C) 2005-2012, International Business Machines + * Copyright (C) 2005-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -25,8 +27,6 @@ #include #endif -#define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) - #define NEW_ARRAY(type,count) (type *) /*uprv_*/malloc((count) * sizeof(type)) #define DELETE_ARRAY(array) /*uprv_*/free((void *) (array)) @@ -275,6 +275,45 @@ void CharsetDetectionTest::ConstructionTest() printf("%s\n", name); #endif } + + const char* defDisabled[] = { + "IBM420_rtl", "IBM420_ltr", + "IBM424_rtl", "IBM424_ltr", + 0 + }; + + LocalUEnumerationPointer eActive(ucsdet_getDetectableCharsets(csd.getAlias(), status)); + const char *activeName = NULL; + + while ((activeName = uenum_next(eActive.getAlias(), NULL, status))) { + // the charset must be included in all list + UBool found = FALSE; + + const char *name = NULL; + uenum_reset(e.getAlias(), status); + while ((name = uenum_next(e.getAlias(), NULL, status))) { + if (strcmp(activeName, name) == 0) { + found = TRUE; + break; + } + } + + if (!found) { + errln(UnicodeString(activeName) + " is not included in the all charset list."); + } + + // some charsets are disabled by default + found = FALSE; + for (int32_t i = 0; defDisabled[i] != 0; i++) { + if (strcmp(activeName, defDisabled[i]) == 0) { + found = TRUE; + break; + } + } + if (found) { + errln(UnicodeString(activeName) + " should not be included in the default charset list."); + } + } } void CharsetDetectionTest::UTF8Test() @@ -545,6 +584,7 @@ void CharsetDetectionTest::DetectionTest() void CharsetDetectionTest::IBM424Test() { +#if !UCONFIG_ONLY_HTML_CONVERSION UErrorCode status = U_ZERO_ERROR; static const UChar chars[] = { @@ -597,6 +637,10 @@ void CharsetDetectionTest::IBM424Test() char *bytes_r = extractBytes(s2, "IBM424", brLength); UCharsetDetector *csd = ucsdet_open(&status); + ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status); if (U_FAILURE(status)) { errln("Error opening charset detector. - %s", u_errorName(status)); } @@ -633,10 +677,12 @@ bail: freeBytes(bytes); freeBytes(bytes_r); ucsdet_close(csd); +#endif } void CharsetDetectionTest::IBM420Test() { +#if !UCONFIG_ONLY_HTML_CONVERSION UErrorCode status = U_ZERO_ERROR; static const UChar chars[] = { @@ -684,6 +730,10 @@ void CharsetDetectionTest::IBM420Test() if (U_FAILURE(status)) { errln("Error opening charset detector. - %s", u_errorName(status)); } + ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status); const UCharsetMatch *match; const char *name; @@ -717,6 +767,7 @@ bail: freeBytes(bytes); freeBytes(bytes_r); ucsdet_close(csd); +#endif } @@ -767,7 +818,7 @@ void CharsetDetectionTest::Ticket6394Test() { // similar Windows and non-Windows SBCS encodings. State was kept in the shared // Charset Recognizer objects, and could be overwritten. void CharsetDetectionTest::Ticket6954Test() { -#if !UCONFIG_NO_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FORMATTING UErrorCode status = U_ZERO_ERROR; UnicodeString sISO = "This is a small sample of some English text. Just enough to be sure that it detects correctly."; UnicodeString ssWindows("This is another small sample of some English text. Just enough to be sure that it detects correctly."