X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..4f1e1a09ce4daed860e35d359ce2fceccb0764e8:/icuSources/test/intltest/transtst.cpp diff --git a/icuSources/test/intltest/transtst.cpp b/icuSources/test/intltest/transtst.cpp index 42a061f0..0a162acb 100644 --- a/icuSources/test/intltest/transtst.cpp +++ b/icuSources/test/intltest/transtst.cpp @@ -1,6 +1,8 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** -* Copyright (C) 1999-2010, International Business Machines +* Copyright (C) 1999-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -23,6 +25,7 @@ #include "unicode/ustring.h" #include "unicode/usetiter.h" #include "unicode/uscript.h" +#include "unicode/utf16.h" #include "cpdtrans.h" #include "nultrans.h" #include "rbt.h" @@ -182,7 +185,7 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(71,TestAnyX); TESTCASE(72,TestSourceTargetSet); TESTCASE(73,TestGurmukhiDevanagari); - TESTCASE(74,TestRuleWhitespace); + TESTCASE(74,TestPatternWhiteSpace); TESTCASE(75,TestAllCodepoints); TESTCASE(76,TestBoilerplate); TESTCASE(77,TestAlternateSyntax); @@ -197,7 +200,6 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, } } -static const UVersionInfo ICU_39 = {3,9,4,0}; /** * Make sure every system transliterator can be instantiated. * @@ -241,7 +243,8 @@ void TransliteratorTest::TestInstantiation() { if (t == 0) { #if UCONFIG_NO_BREAK_ITERATION // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail. - if (id.compare((UnicodeString)"Thai-Latin") != 0) + if (id.compare((UnicodeString)"Thai-Latn") != 0 && + id.compare((UnicodeString)"Thai-Latin") != 0) #endif dataerrln(UnicodeString("FAIL: Couldn't create ") + id + /*", parse error " + parseError.code +*/ @@ -409,7 +412,7 @@ void TransliteratorTest::TestRuleBasedInverse(void) { "caccb", "xyzzy", }; - int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); + int32_t DATA_length = UPRV_LENGTHOF(DATA); UErrorCode status = U_ZERO_ERROR; UParseError parseError; @@ -457,7 +460,7 @@ void TransliteratorTest::TestKeyboard(void) { 0, "AycAY", // null means finishKeyboardTransliteration }; - keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); + keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA)); delete t; } @@ -492,7 +495,7 @@ void TransliteratorTest::TestKeyboard2(void) { 0, "AycAY", // null means finishKeyboardTransliteration }; - keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); + keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA)); delete t; } @@ -526,7 +529,7 @@ void TransliteratorTest::TestKeyboard3(void) { errln("FAIL: RBT constructor failed"); return; } - keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); + keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA)); delete t; } @@ -877,7 +880,7 @@ void TransliteratorTest::TestJ329(void) { { FALSE, "a > b; c > d" }, { TRUE, "a > b; no operator; c > d" }, }; - int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); + int32_t DATA_length = UPRV_LENGTHOF(DATA); for (int32_t i=0; i "N"; use "\\\\N" for \N expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"), - CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{}\\\\N{}")); - expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{}\\N{}\\N{}\\N{"), + CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{}\\\\N{}\\\\N{}\\\\N{}")); + expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{}\\N{}\\N{}\\N{}\\N{}\\N{"), CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{")); delete uni2name; @@ -1313,7 +1316,7 @@ void TransliteratorTest::TestLiberalizedID(void) { " Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter", " null ; latin-greek ", NULL /*"Null;Latin-Greek"*/, "compound whitespace", }; - const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]); + const int32_t DATA_length = UPRV_LENGTHOF(DATA); UParseError parseError; UErrorCode status= U_ZERO_ERROR; for (int32_t i=0; i | $1 \\u0345;", "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", }; - static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); + static const int32_t DATA_length = UPRV_LENGTHOF(DATA); for (int32_t d=0; d < DATA_length; d+=3) { if (DATA[d] == RBT) { @@ -2786,7 +2789,7 @@ void TransliteratorTest::TestCompoundLatinRT(){ "kimakurvata", "san\\u0304java" }; - const int MAX_LEN = sizeof(source)/sizeof(source[0]); + const int MAX_LEN = UPRV_LENGTHOF(source); const char* const expected[MAX_LEN] = { "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D", "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", @@ -2806,7 +2809,7 @@ void TransliteratorTest::TestCompoundLatinRT(){ "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", "\\u0938\\u0902\\u091c\\u0935" }; - if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) { + if(MAX_LEN != UPRV_LENGTHOF(expected)) { errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!"); return; } @@ -2932,7 +2935,7 @@ void TransliteratorTest::TestLocaleResource() { "el-Latin", "\\u03B2", "v", "Greek-Latin", "\\u03B2", "b", }; - const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]); + const int32_t DATA_length = UPRV_LENGTHOF(DATA); for (int32_t i=0; icreateInverse(status); if (U_FAILURE(status)) { + // The following are forward-only, it is OK that creating an inverse will not work: + // 1. Devanagari-Arabic + // 2. Any-*/BGN + // 2a. Any-*/BGN_1981 + // 3. Any-*/UNGEGN + // 4. Any-*/MNS + // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work. + if ( id.compare((UnicodeString)"Devanagari-Arabic/") != 0 + && !(id.startsWith((UnicodeString)"Any-") && + (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS")) + ) #if UCONFIG_NO_BREAK_ITERATION - // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail. - if (id.compare((UnicodeString)"Latin-Thai/") != 0) + && id.compare((UnicodeString)"Latin-Thai/") != 0 #endif + ) + { errln((UnicodeString)"FAIL: Could not create inverse of " + id); - + } delete t; delete inv; continue; @@ -3607,6 +3622,7 @@ void TransliteratorTest::CheckIncrementalAux(const Transliterator* t, return; } UBool gotError = FALSE; + (void)gotError; // Suppress set but not used warning. // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? @@ -3861,6 +3877,21 @@ void TransliteratorTest::TestAnyX(void) { CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc")); delete anyLatin; + + status = U_ZERO_ERROR; + Transliterator* anyASCII = + Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD, parseError, status); + if (U_FAILURE(status) || anyASCII==0) { + dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status)); + delete anyASCII; + return; + } + + expect(*anyASCII, + CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"), + CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149")); + + delete anyASCII; } /** @@ -3905,7 +3936,7 @@ void TransliteratorTest::TestAny(void) { Transliterator* anyLatin = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { - errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); + dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status)); return; } @@ -3964,9 +3995,9 @@ void TransliteratorTest::TestSourceTargetSet() { } /** - * Test handling of rule whitespace, for both RBT and UnicodeSet. + * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. */ -void TransliteratorTest::TestRuleWhitespace() { +void TransliteratorTest::TestPatternWhiteSpace() { // Rules const char* r = "a > \\u200E b;"; @@ -4011,7 +4042,7 @@ void TransliteratorTest::TestAllCodepoints(){ for(uint32_t i = 0; i<=0x10ffff; i++){ code = uscript_getScript(i,&status); if(code == USCRIPT_INVALID_CODE){ - errln("uscript_getScript for codepoint \\U%08X failed.\n", i); + dataerrln("uscript_getScript for codepoint \\U%08X failed.", i); } const char* myId = uscript_getName(code); if(!myId) { @@ -4032,14 +4063,14 @@ void TransliteratorTest::TestAllCodepoints(){ if(uprv_strcmp(newId,oldId)!=0){ Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status); if(t==NULL || U_FAILURE(status)){ - errln((UnicodeString)"FAIL: Could not create " + id); + dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status)); } delete t; } if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){ Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status); if(t==NULL || U_FAILURE(status)){ - errln((UnicodeString)"FAIL: Could not create " + id); + dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status)); } delete t; } @@ -4281,7 +4312,6 @@ static const char* BEGIN_END_RULES[] = { "::Upper(Lower);" "::([XYZ]);" }; -static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0])); /* (This entire test is commented out below and will need some heavy revision when we re-add @@ -4305,7 +4335,7 @@ static const char* BOGUS_BEGIN_END_RULES[] = { "::Upper;" "::END;" }; -static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0])); +static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES); */ static const char* BEGIN_END_TEST_CASES[] = { @@ -4335,7 +4365,7 @@ static const char* BEGIN_END_TEST_CASES[] = { // BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" }; -static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0])); +static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES); void TransliteratorTest::TestBeginEnd() { // run through the list of test cases above @@ -4522,9 +4552,9 @@ void TransliteratorTest::TestRuleStripping() { static const UChar expectedRule[] = { 0xE001,0x003E,0x0C01,0x003B,0 }; - UChar result[sizeof(rule)/sizeof(rule[0])]; + UChar result[UPRV_LENGTHOF(rule)]; UErrorCode status = U_ZERO_ERROR; - int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status); + int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status); if (len != u_strlen(expectedRule)) { errln("utrans_stripRules return len = %d", len); } @@ -4558,7 +4588,7 @@ void TransliteratorTest::TestHalfwidthFullwidth(void) { "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020", "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000", }; - int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); + int32_t DATA_length = UPRV_LENGTHOF(DATA); for (int32_t i=0; i