X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/46f4442e9a5a4f3b98b7c1083586332f6a8a99a4..d25163bfc042dbef00577180ee21dd3460fc3715:/icuSources/test/intltest/csdetest.cpp diff --git a/icuSources/test/intltest/csdetest.cpp b/icuSources/test/intltest/csdetest.cpp index 1e3b64dd..110009c6 100644 --- a/icuSources/test/intltest/csdetest.cpp +++ b/icuSources/test/intltest/csdetest.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** - * Copyright (C) 2005-2008, International Business Machines + * Copyright (C) 2005-2015, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** */ @@ -11,6 +11,7 @@ #include "unicode/ucnv.h" #include "unicode/unistr.h" #include "unicode/putil.h" +#include "unicode/uniset.h" #include "intltest.h" #include "csdetest.h" @@ -32,6 +33,14 @@ #define CH_SPACE 0x0020 #define CH_SLASH 0x002F +#define TEST_ASSERT(x) {if (!(x)) { \ + errln("Failure in file %s, line %d", __FILE__, __LINE__);}} + +#define TEST_ASSERT_SUCCESS(errcode) { if (U_FAILURE(errcode)) { \ + errcheckln(errcode, "Failure in file %s, line %d, status = \"%s\"", __FILE__, __LINE__, u_errorName(errcode));\ + return;}} + + //--------------------------------------------------------------------------- // // Test class boilerplate @@ -75,6 +84,25 @@ void CharsetDetectionTest::runIndexedTest( int32_t index, UBool exec, const char case 5: name = "DetectionTest"; if (exec) DetectionTest(); break; +#if !UCONFIG_NO_LEGACY_CONVERSION + case 6: name = "IBM424Test"; + if (exec) IBM424Test(); + break; + + case 7: name = "IBM420Test"; + if (exec) IBM420Test(); + break; +#else + case 6: + case 7: name = "skip"; break; +#endif + case 8: name = "Ticket6394Test"; + if (exec) Ticket6394Test(); + break; + + case 9: name = "Ticket6954Test"; + if (exec) Ticket6954Test(); + break; default: name = ""; break; //needed to end loop @@ -138,22 +166,22 @@ void CharsetDetectionTest::checkEncoding(const UnicodeString &testString, const u_UCharsToChars(eSplit[0].getBuffer(), codepage, cpLength); codepage[cpLength] = '\0'; - UCharsetDetector *csd = ucsdet_open(&status); + LocalUCharsetDetectorPointer csd(ucsdet_open(&status)); int32_t byteLength = 0; char *bytes = extractBytes(testString, codepage, byteLength); if (bytes == NULL) { #if !UCONFIG_NO_LEGACY_CONVERSION - errln("Can't open a " + encoding + " converter for " + id); + dataerrln("Can't open a " + encoding + " converter for " + id); #endif return; } - ucsdet_setText(csd, bytes, byteLength, &status); + ucsdet_setText(csd.getAlias(), bytes, byteLength, &status); int32_t matchCount = 0; - const UCharsetMatch **matches = ucsdet_detectAll(csd, &matchCount, &status); + const UCharsetMatch **matches = ucsdet_detectAll(csd.getAlias(), &matchCount, &status); UnicodeString name(ucsdet_getName(matches[0], &status)); @@ -207,7 +235,6 @@ void CharsetDetectionTest::checkEncoding(const UnicodeString &testString, const bail: freeBytes(bytes); - ucsdet_close(csd); delete[] eSplit; } @@ -227,10 +254,10 @@ const char *CharsetDetectionTest::getPath(char buffer[2048], const char *filenam void CharsetDetectionTest::ConstructionTest() { - UErrorCode status = U_ZERO_ERROR; - UCharsetDetector *csd = ucsdet_open(&status); - UEnumeration *e = ucsdet_getAllDetectableCharsets(csd, &status); - int32_t count = uenum_count(e, &status); + IcuTestErrorCode status(*this, "ConstructionTest"); + LocalUCharsetDetectorPointer csd(ucsdet_open(status)); + LocalUEnumerationPointer e(ucsdet_getAllDetectableCharsets(csd.getAlias(), status)); + int32_t count = uenum_count(e.getAlias(), status); #ifdef DEBUG_DETECT printf("There are %d recognizers.\n", count); @@ -238,7 +265,7 @@ void CharsetDetectionTest::ConstructionTest() for(int32_t i = 0; i < count; i += 1) { int32_t length; - const char *name = uenum_next(e, &length, &status); + const char *name = uenum_next(e.getAlias(), &length, status); if(name == NULL || length <= 0) { errln("ucsdet_getAllDetectableCharsets() returned a null or empty name!"); @@ -249,8 +276,44 @@ void CharsetDetectionTest::ConstructionTest() #endif } - uenum_close(e); - ucsdet_close(csd); + const char* defDisabled[] = { + "IBM420_rtl", "IBM420_ltr", + "IBM424_rtl", "IBM424_ltr", + 0 + }; + + LocalUEnumerationPointer eActive(ucsdet_getDetectableCharsets(csd.getAlias(), status)); + const char *activeName = NULL; + + while ((activeName = uenum_next(eActive.getAlias(), NULL, status))) { + // the charset must be included in all list + UBool found = FALSE; + + const char *name = NULL; + uenum_reset(e.getAlias(), status); + while ((name = uenum_next(e.getAlias(), NULL, status))) { + if (strcmp(activeName, name) == 0) { + found = TRUE; + break; + } + } + + if (!found) { + errln(UnicodeString(activeName) + " is not included in the all charset list."); + } + + // some charsets are disabled by default + found = FALSE; + for (int32_t i = 0; defDisabled[i] != 0; i++) { + if (strcmp(activeName, defDisabled[i]) == 0) { + found = TRUE; + break; + } + } + if (found) { + errln(UnicodeString(activeName) + " should not be included in the default charset list."); + } + } } void CharsetDetectionTest::UTF8Test() @@ -439,7 +502,7 @@ void CharsetDetectionTest::C1BytesTest() match = ucsdet_detect(csd, &status); if (match == NULL) { - errln("English test with C1 bytes got no matches."); + errcheckln(status, "English test with C1 bytes got no matches. - %s", u_errorName(status)); goto bail; } @@ -483,7 +546,11 @@ void CharsetDetectionTest::DetectionTest() } UXMLParser *parser = UXMLParser::createParser(status); - if (!assertSuccess("UXMLParser::createParser",status)) return; + if (U_FAILURE(status)) { + dataerrln("FAIL: UXMLParser::createParser (%s)", u_errorName(status)); + return; + } + UXMLElement *root = parser->parseFile(testFilePath, status); if (!assertSuccess( "parseFile",status)) return; @@ -515,4 +582,280 @@ void CharsetDetectionTest::DetectionTest() #endif } +void CharsetDetectionTest::IBM424Test() +{ +#if !UCONFIG_ONLY_HTML_CONVERSION + UErrorCode status = U_ZERO_ERROR; + + static const UChar chars[] = { + 0x05D4, 0x05E4, 0x05E8, 0x05E7, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05D4, 0x05E6, 0x05D1, 0x05D0, 0x05D9, 0x0020, 0x05D4, 0x05E8, + 0x05D0, 0x05E9, 0x05D9, 0x002C, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x05D0, 0x05DC, 0x05D5, 0x05E3, 0x0020, 0x05D0, 0x05D1, 0x05D9, + 0x05D7, 0x05D9, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC, 0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x002C, 0x0020, 0x05D4, 0x05D5, 0x05E8, + 0x05D4, 0x0020, 0x05E2, 0x05DC, 0x0020, 0x05E4, 0x05EA, 0x05D9, 0x05D7, 0x05EA, 0x0020, 0x05D7, 0x05E7, 0x05D9, 0x05E8, 0x05EA, + 0x0020, 0x05DE, 0x05E6, 0x0022, 0x05D7, 0x0020, 0x05D1, 0x05E2, 0x05E7, 0x05D1, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D3, 0x05D5, + 0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC, 0x05D9, 0x0020, 0x05E6, 0x05D4, 0x0022, 0x05DC, 0x0020, 0x05DE, + 0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5, 0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4, + 0x0020, 0x05D1, 0x002B, 0x0020, 0x05E8, 0x05E6, 0x05D5, 0x05E2, 0x05EA, 0x0020, 0x05E2, 0x05D6, 0x05D4, 0x002E, 0x0020, 0x05DC, + 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x0020, 0x05D4, 0x05E4, 0x05E6, 0x0022, 0x05E8, 0x002C, 0x0020, 0x05DE, 0x05D4, 0x05E2, 0x05D3, + 0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0020, 0x05E2, 0x05D5, 0x05DC, 0x05D4, 0x0020, 0x05EA, 0x05DE, 0x05D5, 0x05E0, 0x05D4, 0x0020, + 0x05E9, 0x05DC, 0x0020, 0x0022, 0x05D4, 0x05EA, 0x05E0, 0x05D4, 0x05D2, 0x05D5, 0x05EA, 0x0020, 0x05E4, 0x05E1, 0x05D5, 0x05DC, + 0x05D4, 0x0020, 0x05DC, 0x05DB, 0x05D0, 0x05D5, 0x05E8, 0x05D4, 0x0020, 0x05E9, 0x05DC, 0x0020, 0x05D7, 0x05D9, 0x05D9, 0x05DC, + 0x05D9, 0x05DD, 0x0020, 0x05D1, 0x05DE, 0x05D4, 0x05DC, 0x05DA, 0x0020, 0x05DE, 0x05D1, 0x05E6, 0x05E2, 0x0020, 0x05E2, 0x05D5, + 0x05E4, 0x05E8, 0x05EA, 0x0020, 0x05D9, 0x05E6, 0x05D5, 0x05E7, 0x05D4, 0x0022, 0x002E, 0x0020, 0x05DE, 0x05E0, 0x05D3, 0x05DC, + 0x05D1, 0x05DC, 0x05D9, 0x05D8, 0x0020, 0x05E7, 0x05D9, 0x05D1, 0x05DC, 0x0020, 0x05D0, 0x05EA, 0x0020, 0x05D4, 0x05D7, 0x05DC, + 0x05D8, 0x05EA, 0x05D5, 0x0020, 0x05DC, 0x05D0, 0x05D7, 0x05E8, 0x0020, 0x05E9, 0x05E2, 0x05D9, 0x05D9, 0x05DF, 0x0020, 0x05D1, + 0x05EA, 0x05DE, 0x05DC, 0x05D9, 0x05DC, 0x0020, 0x05D4, 0x05E2, 0x05D3, 0x05D5, 0x05D9, 0x05D5, 0x05EA, 0x0000 + }; + + static const UChar chars_reverse[] = { + 0x05EA, 0x05D5, 0x05D9, 0x05D5, 0x05D3, 0x05E2, 0x05D4, 0x0020, 0x05DC, 0x05D9, 0x05DC, 0x05DE, 0x05EA, + 0x05D1, 0x0020, 0x05DF, 0x05D9, 0x05D9, 0x05E2, 0x05E9, 0x0020, 0x05E8, 0x05D7, 0x05D0, 0x05DC, 0x0020, 0x05D5, 0x05EA, 0x05D8, + 0x05DC, 0x05D7, 0x05D4, 0x0020, 0x05EA, 0x05D0, 0x0020, 0x05DC, 0x05D1, 0x05D9, 0x05E7, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05D1, + 0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x002E, 0x0022, 0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4, + 0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE, 0x0020, 0x05DA, 0x05DC, 0x05D4, 0x05DE, 0x05D1, 0x0020, 0x05DD, 0x05D9, + 0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05DC, 0x05E9, 0x0020, 0x05D4, 0x05E8, 0x05D5, 0x05D0, 0x05DB, 0x05DC, 0x0020, 0x05D4, + 0x05DC, 0x05D5, 0x05E1, 0x05E4, 0x0020, 0x05EA, 0x05D5, 0x05D2, 0x05D4, 0x05E0, 0x05EA, 0x05D4, 0x0022, 0x0020, 0x05DC, 0x05E9, + 0x0020, 0x05D4, 0x05E0, 0x05D5, 0x05DE, 0x05EA, 0x0020, 0x05D4, 0x05DC, 0x05D5, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D9, 0x05D5, + 0x05D3, 0x05E2, 0x05D4, 0x05DE, 0x0020, 0x002C, 0x05E8, 0x0022, 0x05E6, 0x05E4, 0x05D4, 0x0020, 0x05D9, 0x05E8, 0x05D1, 0x05D3, + 0x05DC, 0x0020, 0x002E, 0x05D4, 0x05D6, 0x05E2, 0x0020, 0x05EA, 0x05E2, 0x05D5, 0x05E6, 0x05E8, 0x0020, 0x002B, 0x05D1, 0x0020, + 0x05D4, 0x05E7, 0x05D5, 0x05E6, 0x05D9, 0x0020, 0x05EA, 0x05E8, 0x05E4, 0x05D5, 0x05E2, 0x0020, 0x05E2, 0x05E6, 0x05D1, 0x05DE, + 0x05DE, 0x0020, 0x05DC, 0x0022, 0x05D4, 0x05E6, 0x0020, 0x05D9, 0x05DC, 0x05D9, 0x05D9, 0x05D7, 0x0020, 0x05EA, 0x05D5, 0x05D9, + 0x05D5, 0x05D3, 0x05E2, 0x0020, 0x05EA, 0x05D5, 0x05D1, 0x05E7, 0x05E2, 0x05D1, 0x0020, 0x05D7, 0x0022, 0x05E6, 0x05DE, 0x0020, + 0x05EA, 0x05E8, 0x05D9, 0x05E7, 0x05D7, 0x0020, 0x05EA, 0x05D7, 0x05D9, 0x05EA, 0x05E4, 0x0020, 0x05DC, 0x05E2, 0x0020, 0x05D4, + 0x05E8, 0x05D5, 0x05D4, 0x0020, 0x002C, 0x05D8, 0x05D9, 0x05DC, 0x05D1, 0x05DC, 0x05D3, 0x05E0, 0x05DE, 0x0020, 0x05D9, 0x05D7, + 0x05D9, 0x05D1, 0x05D0, 0x0020, 0x05E3, 0x05D5, 0x05DC, 0x05D0, 0x0020, 0x05EA, 0x05EA, 0x0020, 0x002C, 0x05D9, 0x05E9, 0x05D0, + 0x05E8, 0x05D4, 0x0020, 0x05D9, 0x05D0, 0x05D1, 0x05E6, 0x05D4, 0x0020, 0x05D8, 0x05D9, 0x05DC, 0x05E7, 0x05E8, 0x05E4, 0x05D4, + 0x0000 + }; + + int32_t bLength = 0, brLength = 0; + + UnicodeString s1(chars); + UnicodeString s2(chars_reverse); + + char *bytes = extractBytes(s1, "IBM424", bLength); + char *bytes_r = extractBytes(s2, "IBM424", brLength); + + UCharsetDetector *csd = ucsdet_open(&status); + ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status); + if (U_FAILURE(status)) { + errln("Error opening charset detector. - %s", u_errorName(status)); + } + const UCharsetMatch *match; + const char *name; + + ucsdet_setText(csd, bytes, bLength, &status); + match = ucsdet_detect(csd, &status); + + if (match == NULL) { + errcheckln(status, "Encoding detection failure for IBM424_rtl: got no matches. - %s", u_errorName(status)); + goto bail; + } + + name = ucsdet_getName(match, &status); + if (strcmp(name, "IBM424_rtl") != 0) { + errln("Encoding detection failure for IBM424_rtl: got %s", name); + } + + ucsdet_setText(csd, bytes_r, brLength, &status); + match = ucsdet_detect(csd, &status); + + if (match == NULL) { + errln("Encoding detection failure for IBM424_ltr: got no matches."); + goto bail; + } + + name = ucsdet_getName(match, &status); + if (strcmp(name, "IBM424_ltr") != 0) { + errln("Encoding detection failure for IBM424_ltr: got %s", name); + } + +bail: + freeBytes(bytes); + freeBytes(bytes_r); + ucsdet_close(csd); +#endif +} + +void CharsetDetectionTest::IBM420Test() +{ +#if !UCONFIG_ONLY_HTML_CONVERSION + UErrorCode status = U_ZERO_ERROR; + + static const UChar chars[] = { + 0x0648, 0x064F, 0x0636, 0x0639, 0x062A, 0x0020, 0x0648, 0x0646, 0x064F, 0x0641, 0x0630, 0x062A, 0x0020, 0x0628, 0x0631, 0x0627, + 0x0645, 0x062C, 0x0020, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020, 0x0639, 0x062F, 0x064A, 0x062F, 0x0629, 0x0020, 0x0641, + 0x064A, 0x0020, 0x0645, 0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0627, 0x0644, 0x062A, 0x0623, 0x0645, 0x064A, 0x0646, 0x0020, + 0x0627, 0x0644, 0x0648, 0x0637, 0x0646, 0x064A, 0x002C, 0x0020, 0x0645, 0x0639, 0x0020, 0x0645, 0x0644, 0x0627, 0x0626, 0x0645, + 0x062A, 0x0647, 0x0627, 0x0020, 0x062F, 0x0627, 0x0626, 0x0645, 0x0627, 0x064B, 0x0020, 0x0644, 0x0644, 0x0627, 0x062D, 0x062A, + 0x064A, 0x0627, 0x062C, 0x0627, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645, 0x062A, 0x063A, 0x064A, 0x0631, 0x0629, 0x0020, 0x0644, + 0x0644, 0x0645, 0x062C, 0x062A, 0x0645, 0x0639, 0x0020, 0x0648, 0x0644, 0x0644, 0x062F, 0x0648, 0x0644, 0x0629, 0x002E, 0x0020, + 0x062A, 0x0648, 0x0633, 0x0639, 0x062A, 0x0020, 0x0648, 0x062A, 0x0637, 0x0648, 0x0631, 0x062A, 0x0020, 0x0627, 0x0644, 0x0645, + 0x0624, 0x0633, 0x0633, 0x0629, 0x0020, 0x0628, 0x0647, 0x062F, 0x0641, 0x0020, 0x0636, 0x0645, 0x0627, 0x0646, 0x0020, 0x0634, + 0x0628, 0x0643, 0x0629, 0x0020, 0x0623, 0x0645, 0x0627, 0x0646, 0x0020, 0x0644, 0x0633, 0x0643, 0x0627, 0x0646, 0x0020, 0x062F, + 0x0648, 0x0644, 0x0629, 0x0020, 0x0627, 0x0633, 0x0631, 0x0627, 0x0626, 0x064A, 0x0644, 0x0020, 0x0628, 0x0648, 0x062C, 0x0647, + 0x0020, 0x0627, 0x0644, 0x0645, 0x062E, 0x0627, 0x0637, 0x0631, 0x0020, 0x0627, 0x0644, 0x0627, 0x0642, 0x062A, 0x0635, 0x0627, + 0x062F, 0x064A, 0x0629, 0x0020, 0x0648, 0x0627, 0x0644, 0x0627, 0x062C, 0x062A, 0x0645, 0x0627, 0x0639, 0x064A, 0x0629, 0x002E, + 0x0000 + }; + static const UChar chars_reverse[] = { + 0x002E, 0x0629, 0x064A, 0x0639, 0x0627, 0x0645, 0x062A, 0x062C, 0x0627, 0x0644, 0x0627, 0x0648, 0x0020, 0x0629, 0x064A, 0x062F, + 0x0627, 0x0635, 0x062A, 0x0642, 0x0627, 0x0644, 0x0627, 0x0020, 0x0631, 0x0637, 0x0627, 0x062E, 0x0645, 0x0644, 0x0627, 0x0020, + 0x0647, 0x062C, 0x0648, 0x0628, 0x0020, 0x0644, 0x064A, 0x0626, 0x0627, 0x0631, 0x0633, 0x0627, 0x0020, 0x0629, 0x0644, 0x0648, + 0x062F, 0x0020, 0x0646, 0x0627, 0x0643, 0x0633, 0x0644, 0x0020, 0x0646, 0x0627, 0x0645, 0x0623, 0x0020, 0x0629, 0x0643, 0x0628, + 0x0634, 0x0020, 0x0646, 0x0627, 0x0645, 0x0636, 0x0020, 0x0641, 0x062F, 0x0647, 0x0628, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624, + 0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0631, 0x0648, 0x0637, 0x062A, 0x0648, 0x0020, 0x062A, 0x0639, 0x0633, 0x0648, 0x062A, + 0x0020, 0x002E, 0x0629, 0x0644, 0x0648, 0x062F, 0x0644, 0x0644, 0x0648, 0x0020, 0x0639, 0x0645, 0x062A, 0x062C, 0x0645, 0x0644, + 0x0644, 0x0020, 0x0629, 0x0631, 0x064A, 0x063A, 0x062A, 0x0645, 0x0644, 0x0627, 0x0020, 0x062A, 0x0627, 0x062C, 0x0627, 0x064A, + 0x062A, 0x062D, 0x0627, 0x0644, 0x0644, 0x0020, 0x064B, 0x0627, 0x0645, 0x0626, 0x0627, 0x062F, 0x0020, 0x0627, 0x0647, 0x062A, + 0x0645, 0x0626, 0x0627, 0x0644, 0x0645, 0x0020, 0x0639, 0x0645, 0x0020, 0x002C, 0x064A, 0x0646, 0x0637, 0x0648, 0x0644, 0x0627, + 0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0644, 0x0627, 0x0020, 0x0629, 0x0633, 0x0633, 0x0624, 0x0645, 0x0020, 0x064A, + 0x0641, 0x0020, 0x0629, 0x062F, 0x064A, 0x062F, 0x0639, 0x0020, 0x0646, 0x064A, 0x0645, 0x0623, 0x062A, 0x0020, 0x062C, 0x0645, + 0x0627, 0x0631, 0x0628, 0x0020, 0x062A, 0x0630, 0x0641, 0x064F, 0x0646, 0x0648, 0x0020, 0x062A, 0x0639, 0x0636, 0x064F, 0x0648, + 0x0000, + }; + + int32_t bLength = 0, brLength = 0; + + UnicodeString s1(chars); + UnicodeString s2(chars_reverse); + + char *bytes = extractBytes(s1, "IBM420", bLength); + char *bytes_r = extractBytes(s2, "IBM420", brLength); + + UCharsetDetector *csd = ucsdet_open(&status); + if (U_FAILURE(status)) { + errln("Error opening charset detector. - %s", u_errorName(status)); + } + ucsdet_setDetectableCharset(csd, "IBM424_rtl", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM424_ltr", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM420_rtl", TRUE, &status); + ucsdet_setDetectableCharset(csd, "IBM420_ltr", TRUE, &status); + const UCharsetMatch *match; + const char *name; + + ucsdet_setText(csd, bytes, bLength, &status); + match = ucsdet_detect(csd, &status); + + if (match == NULL) { + errcheckln(status, "Encoding detection failure for IBM420_rtl: got no matches. - %s", u_errorName(status)); + goto bail; + } + + name = ucsdet_getName(match, &status); + if (strcmp(name, "IBM420_rtl") != 0) { + errln("Encoding detection failure for IBM420_rtl: got %s\n", name); + } + + ucsdet_setText(csd, bytes_r, brLength, &status); + match = ucsdet_detect(csd, &status); + + if (match == NULL) { + errln("Encoding detection failure for IBM420_ltr: got no matches.\n"); + goto bail; + } + + name = ucsdet_getName(match, &status); + if (strcmp(name, "IBM420_ltr") != 0) { + errln("Encoding detection failure for IBM420_ltr: got %s\n", name); + } + +bail: + freeBytes(bytes); + freeBytes(bytes_r); + ucsdet_close(csd); +#endif +} + + +void CharsetDetectionTest::Ticket6394Test() { +#if !UCONFIG_NO_CONVERSION + const char charText[] = "Here is some random English text that should be detected as ISO-8859-1." + "Ticket 6394 claims that ISO-8859-1 will appear in the array of detected " + "encodings more than once. The hop through UnicodeString is for platforms " + "where this char * string is be EBCDIC and needs conversion to Latin1."; + char latin1Text[sizeof(charText)]; + UnicodeString(charText).extract(0, sizeof(charText)-2, latin1Text, sizeof(latin1Text), "ISO-8859-1"); + + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector *csd = ucsdet_open(&status); + ucsdet_setText(csd, latin1Text, -1, &status); + if (U_FAILURE(status)) { + errln("Fail at file %s, line %d. status = %s", __FILE__, __LINE__, u_errorName(status)); + return; + } + + int32_t matchCount = 0; + const UCharsetMatch **matches = ucsdet_detectAll(csd, &matchCount, &status); + if (U_FAILURE(status)) { + errln("Fail at file %s, line %d. status = %s", __FILE__, __LINE__, u_errorName(status)); + return; + } + + UnicodeSet setOfCharsetNames; // UnicodSets can hold strings. + int32_t i; + for (i=0; i