X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/48b980fed3435926e0b3a8d72ecb58be703a1c7a..729e4ab9bc6618bc3d8a898e575df7f4019e29ca:/icuSources/test/intltest/ustrtest.cpp?ds=sidebyside diff --git a/icuSources/test/intltest/ustrtest.cpp b/icuSources/test/intltest/ustrtest.cpp index c40b4c4a..cd7e5f73 100644 --- a/icuSources/test/intltest/ustrtest.cpp +++ b/icuSources/test/intltest/ustrtest.cpp @@ -1,15 +1,17 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2008, International Business Machines Corporation and + * Copyright (c) 1997-2010, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ #include "ustrtest.h" +#include "unicode/std_string.h" #include "unicode/unistr.h" #include "unicode/uchar.h" #include "unicode/ustring.h" #include "unicode/locid.h" #include "unicode/ucnv.h" +#include "unicode/uenum.h" #include "cmemory.h" #include "charstr.h" @@ -56,8 +58,10 @@ void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* & case 13: name = "TestUnescape"; if (exec) TestUnescape(); break; case 14: name = "TestCountChar32"; if (exec) TestCountChar32(); break; case 15: name = "TestStringEnumeration"; if (exec) TestStringEnumeration(); break; - case 16: name = "TestCharString"; if (exec) TestCharString(); break; - case 17: name = "TestNameSpace"; if (exec) TestNameSpace(); break; + case 16: name = "TestNameSpace"; if (exec) TestNameSpace(); break; + case 17: name = "TestUTF32"; if (exec) TestUTF32(); break; + case 18: name = "TestUTF8"; if (exec) TestUTF8(); break; + case 19: name = "TestReadOnlyAlias"; if (exec) TestReadOnlyAlias(); break; default: name = ""; break; //needed to end loop } @@ -229,6 +233,29 @@ UnicodeStringTest::TestBasicManipulation() errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1"); } } + +#if U_CHARSET_IS_UTF8 + { + // Test the hardcoded-UTF-8 UnicodeString optimizations. + static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 }; + static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 }; + UnicodeString from8a = UnicodeString((const char *)utf8); + UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1); + UnicodeString from16(FALSE, utf16, LENGTHOF(utf16)); + if(from8a != from16 || from8b != from16) { + errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed"); + } + char buffer[16]; + int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer)); + if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) { + errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed"); + } + length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer)); + if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) { + errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed"); + } + } +#endif } void @@ -979,6 +1006,16 @@ UnicodeStringTest::TestReverse() if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) { errln("reverse() failed with supplementary characters"); } + + // Test case for ticket #8091: + // UnicodeString::reverse() failed to see a lead surrogate in the middle of + // an odd-length string that contains no other lead surrogates. + test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape(); + UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape(); + test.reverse(); + if(test!=expected) { + errln("reverse() failed with only lead surrogate in the middle"); + } } void @@ -1093,6 +1130,30 @@ UnicodeStringTest::TestMiscellaneous() if(test1.hasMetaData() || UnicodeString().hasMetaData()) { errln("UnicodeString::hasMetaData() returns TRUE"); } + + // test getTerminatedBuffer() on a truncated, shared, heap-allocated string + test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789."); + test1.truncate(36); // ensure length()