/********************************************************************
* COPYRIGHT:
- * Copyright (c) 1997-2004, International Business Machines Corporation and
+ * Copyright (c) 1997-2012, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
#include "ustrtest.h"
+#include "unicode/appendable.h"
+#include "unicode/std_string.h"
#include "unicode/unistr.h"
#include "unicode/uchar.h"
#include "unicode/ustring.h"
#include "unicode/locid.h"
#include "unicode/ucnv.h"
+#include "unicode/uenum.h"
+#include "unicode/utf16.h"
#include "cmemory.h"
+#include "charstr.h"
#if 0
#include "unicode/ustream.h"
-#if U_IOSTREAM_SOURCE >= 199711
#include <iostream>
using namespace std;
-#elif U_IOSTREAM_SOURCE >= 198506
-#include <iostream.h>
-#endif
#endif
case 13: name = "TestUnescape"; if (exec) TestUnescape(); break;
case 14: name = "TestCountChar32"; if (exec) TestCountChar32(); break;
case 15: name = "TestStringEnumeration"; if (exec) TestStringEnumeration(); break;
+ case 16: name = "TestNameSpace"; if (exec) TestNameSpace(); break;
+ case 17: name = "TestUTF32"; if (exec) TestUTF32(); break;
+ case 18: name = "TestUTF8"; if (exec) TestUTF8(); break;
+ case 19: name = "TestReadOnlyAlias"; if (exec) TestReadOnlyAlias(); break;
+ case 20: name = "TestAppendable"; if (exec) TestAppendable(); break;
+ case 21: name = "TestUnicodeStringImplementsAppendable"; if (exec) TestUnicodeStringImplementsAppendable(); break;
+ case 22: name = "TestSizeofUnicodeString"; if (exec) TestSizeofUnicodeString(); break;
+ case 23: name = "TestStartsWithAndEndsWithNulTerminated"; if (exec) TestStartsWithAndEndsWithNulTerminated(); break;
default: name = ""; break; //needed to end loop
}
errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
if (test1.length() != 70)
- errln("length() failed: expected 70, got " + test1.length());
+ errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
if (test2.length() != 30)
- errln("length() failed: expected 30, got " + test2.length());
+ errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
UnicodeString test3;
test3.append((UChar32)0x20402);
errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
}
if(test3.length() != 2){
- errln("append or length failed for UChar32, expected 2, got " + test3.length());
+ errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
}
test3.append((UChar32)0x0074);
if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
}
+ if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
+ errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
+ }
buffer[u_strlen(buffer)]=0xe4;
UnicodeString u(buffer, -1, LENGTHOF(buffer));
cnv=ucnv_open("ISO-8859-1", &errorCode);
UnicodeString v(cs, -1, cnv, errorCode);
ucnv_close(cnv);
- if(v!=UnicodeString("a\\xe4\\x85").unescape()) {
+ if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
}
}
+
+#if U_CHARSET_IS_UTF8
+ {
+ // Test the hardcoded-UTF-8 UnicodeString optimizations.
+ static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
+ static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
+ UnicodeString from8a = UnicodeString((const char *)utf8);
+ UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
+ UnicodeString from16(FALSE, utf16, LENGTHOF(utf16));
+ if(from8a != from16 || from8b != from16) {
+ errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
+ }
+ char buffer[16];
+ int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
+ if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
+ errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
+ }
+ length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
+ if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
+ errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
+ }
+ }
+#endif
}
void
errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
length, u_errorName(errorCode));
}
+ // Test again with just the converter name.
+ if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
+ uprv_memcmp(buffer, expect, 13)!=0 ||
+ buffer[13]!=0 ||
+ U_FAILURE(errorCode)
+ ) {
+ errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
+ length, u_errorName(errorCode));
+ }
// try the constructor
UnicodeString t(expect, sizeof(expect), cnv, errorCode);
(startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
;
if (occurrences != 6)
- errln("indexOf failed: expected to find 6 occurrences, found " + occurrences);
-
+ errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
+
for ( occurrences = 0, startPos = 10;
startPos != -1 && startPos < test1.length();
(startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
;
if (occurrences != 4)
- errln("indexOf with starting offset failed: expected to find 4 occurrences, found " + occurrences);
+ errln(UnicodeString("indexOf with starting offset failed: "
+ "expected to find 4 occurrences, found ") + occurrences);
int32_t endPos = 28;
for ( occurrences = 0, startPos = 5;
(startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
;
if (occurrences != 4)
- errln("indexOf with starting and ending offsets failed: expected to find 4 occurrences, found " + occurrences);
+ errln(UnicodeString("indexOf with starting and ending offsets failed: "
+ "expected to find 4 occurrences, found ") + occurrences);
//using UChar32 string
for ( startPos=0, occurrences=0;
(startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
;
if (occurrences != 2)
- errln("indexOf failed: expected to find 2 occurrences, found " + occurrences);
+ errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
//---
for ( occurrences = 0, startPos = 0;
(startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
;
if (occurrences != 16)
- errln("indexOf with character failed: expected to find 16 occurrences, found " + occurrences);
+ errln(UnicodeString("indexOf with character failed: "
+ "expected to find 16 occurrences, found ") + occurrences);
for ( occurrences = 0, startPos = 10;
startPos != -1 && startPos < test1.length();
(startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
;
if (occurrences != 12)
- errln("indexOf with character & start offset failed: expected to find 12 occurrences, found " + occurrences);
+ errln(UnicodeString("indexOf with character & start offset failed: "
+ "expected to find 12 occurrences, found ") + occurrences);
for ( occurrences = 0, startPos = 5, endPos = 28;
startPos != -1 && startPos < test1.length();
(startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
;
if (occurrences != 10)
- errln("indexOf with character & start & end offsets failed: expected to find 10 occurrences, found " + occurrences);
+ errln(UnicodeString("indexOf with character & start & end offsets failed: "
+ "expected to find 10 occurrences, found ") + occurrences);
//testing for UChar32
UnicodeString subString;
(startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
;
if (occurrences != 4)
- errln("lastIndexOf with starting and ending offsets failed: expected to find 4 occurrences, found " + occurrences);
+ errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
+ "expected to find 4 occurrences, found ") + occurrences);
for ( occurrences = 0, startPos = 32;
startPos != -1;
(startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
;
if (occurrences != 11)
- errln("lastIndexOf with character & start & end offsets failed: expected to find 11 occurrences, found " + occurrences);
+ errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
+ "expected to find 11 occurrences, found ") + occurrences);
//testing UChar32
startPos=test3.length();
errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
}
- if (!test1.endsWith(test3)) {
- errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
- }
- if (!test1.endsWith(test3, 0, INT32_MAX)) {
- errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
- }
-
- if(!test1.endsWith(test3.getBuffer(), test3.length())) {
- errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
- }
- if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
- errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
- }
-
- if (!test3.startsWith(test4)) {
- errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
- }
-
- if (test4.startsWith(test3)) {
- errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
- }
+ if (!test1.endsWith(test3)) {
+ errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
+ }
+ if (!test1.endsWith(test3, 0, INT32_MAX)) {
+ errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
+ }
+
+ if(!test1.endsWith(test3.getBuffer(), test3.length())) {
+ errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
+ }
+ if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
+ errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
+ }
+
+ if (!test3.startsWith(test4)) {
+ errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
+ }
+
+ if (test4.startsWith(test3)) {
+ errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
+ }
+}
+
+void
+UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
+ UnicodeString test("abcde");
+ const UChar ab[] = { 0x61, 0x62, 0 };
+ const UChar de[] = { 0x64, 0x65, 0 };
+ assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
+ assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
+ assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
+ assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
}
void
if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
errln("reverse() failed with supplementary characters");
}
+
+ // Test case for ticket #8091:
+ // UnicodeString::reverse() failed to see a lead surrogate in the middle of
+ // an odd-length string that contains no other lead surrogates.
+ test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
+ UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
+ test.reverse();
+ if(test!=expected) {
+ errln("reverse() failed with only lead surrogate in the middle");
+ }
}
void
if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
errln("UnicodeString::hasMetaData() returns TRUE");
}
+
+ // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
+ test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
+ test1.truncate(36); // ensure length()<getCapacity()
+ test2=test1; // share the buffer
+ test1.truncate(5);
+ if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
+ errln("UnicodeString(shared buffer).truncate() failed");
+ }
+ if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
+ errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
+ "modified another copy of the string!");
+ }
+ test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
+ test1.truncate(36); // ensure length()<getCapacity()
+ test2=test1; // share the buffer
+ test1.remove();
+ if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
+ errln("UnicodeString(shared buffer).remove() failed");
+ }
+ if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
+ errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
+ "modified another copy of the string!");
+ }
}
void
// test the UChar32 constructor
UnicodeString c32Test((UChar32)0x10ff2a);
- if( c32Test.length() != UTF_CHAR_LENGTH(0x10ff2a) ||
+ if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
) {
errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
// test the (new) capacity constructor
UnicodeString capTest(5, (UChar32)0x2a, 5);
- if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x2a) ||
+ if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
capTest.char32At(0) != 0x2a ||
capTest.char32At(4) != 0x2a
) {
}
capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
- if( capTest.length() != 5 * UTF_CHAR_LENGTH(0x10ff2a) ||
+ if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
capTest.char32At(0) != 0x10ff2a ||
capTest.char32At(4) != 0x10ff2a
) {
* Test the unescape() function.
*/
void UnicodeStringTest::TestUnescape(void) {
- UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b");
+ UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
UnicodeString OUT("abc");
OUT.append((UChar)0x4567);
OUT.append(" ");
if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
errln("bogus.getBuffer()!=0");
}
+ if (test1.indexOf(test3) != -1) {
+ errln("bogus.indexOf() != -1");
+ }
+ if (test1.lastIndexOf(test3) != -1) {
+ errln("bogus.lastIndexOf() != -1");
+ }
+ if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
+ errln("caseCompare() doesn't work with bogus strings");
+ }
+ if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
+ errln("compareCodePointOrder() doesn't work with bogus strings");
+ }
// verify that non-assignment modifications fail and do not revive a bogus string
test3.setToBogus();
if(ten.clone()!=NULL) {
errln("StringEnumeration.clone()!=NULL");
}
+
+ // test that uenum_openFromStringEnumeration() works
+ // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
+ StringEnumeration *newTen = new TestEnumeration;
+ status=U_ZERO_ERROR;
+ UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
+ if (uten==NULL || U_FAILURE(status)) {
+ errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
+ return;
+ }
+
+ // test uenum_next()
+ for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
+ status=U_ZERO_ERROR;
+ pc=uenum_next(uten, &length, &status);
+ if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
+ errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
+ }
+ }
+ status=U_ZERO_ERROR;
+ if(uenum_next(uten, &length, &status)!=NULL) {
+ errln("File %s, line %d, uenum_next(done)!=NULL");
+ }
+
+ // test the uenum_unext()
+ uenum_reset(uten, &status);
+ for(i=0; i<LENGTHOF(testEnumStrings); ++i) {
+ status=U_ZERO_ERROR;
+ pu=uenum_unext(uten, &length, &status);
+ s=UnicodeString(testEnumStrings[i], "");
+ if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
+ errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
+ }
+ }
+ status=U_ZERO_ERROR;
+ if(uenum_unext(uten, &length, &status)!=NULL) {
+ errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
+ }
+
+ uenum_close(uten);
+}
+
+/*
+ * Namespace test, to make sure that macros like UNICODE_STRING include the
+ * namespace qualifier.
+ *
+ * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
+ */
+namespace bogus {
+ class UnicodeString {
+ public:
+ enum EInvariant { kInvariant };
+ UnicodeString() : i(1) {}
+ UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {}
+ UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
+) : i(length) {}
+ private:
+ int32_t i;
+ };
+}
+
+void
+UnicodeStringTest::TestNameSpace() {
+ // Provoke name collision unless the UnicodeString macros properly
+ // qualify the icu::UnicodeString class.
+ using namespace bogus;
+
+ // Use all UnicodeString macros from unistr.h.
+ icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
+ icu::UnicodeString s2=UNICODE_STRING("def", 3);
+ icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
+
+ // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
+ icu::UnicodeString s4=s1+s2+s3;
+ if(s4.length()!=9) {
+ errln("Something wrong with UnicodeString::operator+().");
+ }
+}
+
+void
+UnicodeStringTest::TestUTF32() {
+ // Input string length US_STACKBUF_SIZE to cause overflow of the
+ // initially chosen fStackBuffer due to supplementary characters.
+ static const UChar32 utf32[] = {
+ 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
+ 0x10000, 0x20000, 0xe0000, 0x10ffff
+ };
+ static const UChar expected_utf16[] = {
+ 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
+ 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
+ };
+ UnicodeString from32 = UnicodeString::fromUTF32(utf32, LENGTHOF(utf32));
+ UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16));
+ if(from32 != expected) {
+ errln("UnicodeString::fromUTF32() did not create the expected string.");
+ }
+
+ static const UChar utf16[] = {
+ 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
+ };
+ static const UChar32 expected_utf32[] = {
+ 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
+ };
+ UChar32 result32[16];
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t length32 =
+ UnicodeString(FALSE, utf16, LENGTHOF(utf16)).
+ toUTF32(result32, LENGTHOF(result32), errorCode);
+ if( length32 != LENGTHOF(expected_utf32) ||
+ 0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
+ result32[length32] != 0
+ ) {
+ errln("UnicodeString::toUTF32() did not create the expected string.");
+ }
+}
+
+class TestCheckedArrayByteSink : public CheckedArrayByteSink {
+public:
+ TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
+ : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
+ virtual void Flush() { calledFlush = TRUE; }
+ UBool calledFlush;
+};
+
+void
+UnicodeStringTest::TestUTF8() {
+ static const uint8_t utf8[] = {
+ // Code points:
+ // 0x41, 0xd900,
+ // 0x61, 0xdc00,
+ // 0x110000, 0x5a,
+ // 0x50000, 0x7a,
+ // 0x10000, 0x20000,
+ // 0xe0000, 0x10ffff
+ 0x41, 0xed, 0xa4, 0x80,
+ 0x61, 0xed, 0xb0, 0x80,
+ 0xf4, 0x90, 0x80, 0x80, 0x5a,
+ 0xf1, 0x90, 0x80, 0x80, 0x7a,
+ 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
+ 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
+ };
+ static const UChar expected_utf16[] = {
+ 0x41, 0xfffd,
+ 0x61, 0xfffd,
+ 0xfffd, 0x5a,
+ 0xd900, 0xdc00, 0x7a,
+ 0xd800, 0xdc00, 0xd840, 0xdc00,
+ 0xdb40, 0xdc00, 0xdbff, 0xdfff
+ };
+ UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
+ UnicodeString expected(FALSE, expected_utf16, LENGTHOF(expected_utf16));
+
+ if(from8 != expected) {
+ errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
+ }
+#if U_HAVE_STD_STRING
+ std::string utf8_string((const char *)utf8, sizeof(utf8));
+ UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
+ if(from8b != expected) {
+ errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
+ }
+#endif
+
+ static const UChar utf16[] = {
+ 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
+ };
+ static const uint8_t expected_utf8[] = {
+ 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
+ 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
+ };
+ UnicodeString us(FALSE, utf16, LENGTHOF(utf16));
+
+ char buffer[64];
+ TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
+ us.toUTF8(sink);
+ if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
+ 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
+ ) {
+ errln("UnicodeString::toUTF8() did not create the expected string.");
+ }
+ if(!sink.calledFlush) {
+ errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
+ }
+#if U_HAVE_STD_STRING
+ // Initial contents for testing that toUTF8String() appends.
+ std::string result8 = "-->";
+ std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
+ // Use the return value just for testing.
+ std::string &result8r = us.toUTF8String(result8);
+ if(result8r != expected8 || &result8r != &result8) {
+ errln("UnicodeString::toUTF8String() did not create the expected string.");
+ }
+#endif
+}
+
+// Test if this compiler supports Return Value Optimization of unnamed temporary objects.
+static UnicodeString wrapUChars(const UChar *uchars) {
+ return UnicodeString(TRUE, uchars, -1);
+}
+
+void
+UnicodeStringTest::TestReadOnlyAlias() {
+ UChar uchars[]={ 0x61, 0x62, 0 };
+ UnicodeString alias(TRUE, uchars, 2);
+ if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
+ errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
+ return;
+ }
+ alias.truncate(1);
+ if(alias.length()!=1 || alias.getBuffer()!=uchars) {
+ errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
+ }
+ if(alias.getTerminatedBuffer()==uchars) {
+ errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
+ "did not allocate and copy as expected.");
+ }
+ if(uchars[1]!=0x62) {
+ errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
+ "modified the original buffer.");
+ }
+ if(1!=u_strlen(alias.getTerminatedBuffer())) {
+ errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
+ "does not return a buffer terminated at the proper length.");
+ }
+
+ alias.setTo(TRUE, uchars, 2);
+ if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
+ errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
+ return;
+ }
+ alias.remove();
+ if(alias.length()!=0) {
+ errln("UnicodeString(read-only-alias).remove() did not work.");
+ }
+ if(alias.getTerminatedBuffer()==uchars) {
+ errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
+ "did not un-alias as expected.");
+ }
+ if(uchars[0]!=0x61) {
+ errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
+ "modified the original buffer.");
+ }
+ if(0!=u_strlen(alias.getTerminatedBuffer())) {
+ errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
+ "does not return a buffer terminated at length 0.");
+ }
+
+ UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
+ alias.setTo(FALSE, longString.getBuffer(), longString.length());
+ alias.remove(0, 10);
+ if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
+ errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
+ }
+ alias.setTo(FALSE, longString.getBuffer(), longString.length());
+ alias.remove(27, 99);
+ if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
+ errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
+ }
+ alias.setTo(FALSE, longString.getBuffer(), longString.length());
+ alias.retainBetween(6, 30);
+ if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
+ errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
+ }
+
+ UChar abc[]={ 0x61, 0x62, 0x63, 0 };
+ UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
+
+ UnicodeString temp;
+ temp.fastCopyFrom(longString.tempSubString());
+ if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
+ errln("UnicodeString.tempSubString() failed");
+ }
+ temp.fastCopyFrom(longString.tempSubString(-3, 5));
+ if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
+ errln("UnicodeString.tempSubString(-3, 5) failed");
+ }
+ temp.fastCopyFrom(longString.tempSubString(17));
+ if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
+ errln("UnicodeString.tempSubString(17) failed");
+ }
+ temp.fastCopyFrom(longString.tempSubString(99));
+ if(!temp.isEmpty()) {
+ errln("UnicodeString.tempSubString(99) failed");
+ }
+ temp.fastCopyFrom(longString.tempSubStringBetween(6));
+ if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
+ errln("UnicodeString.tempSubStringBetween(6) failed");
+ }
+ temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
+ if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
+ errln("UnicodeString.tempSubStringBetween(8, 18) failed");
+ }
+ UnicodeString bogusString;
+ bogusString.setToBogus();
+ temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
+ if(!temp.isBogus()) {
+ errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
+ }
+}
+
+void
+UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
+ static const UChar cde[3]={ 0x63, 0x64, 0x65 };
+ static const UChar fg[3]={ 0x66, 0x67, 0 };
+ if(!app.reserveAppendCapacity(12)) {
+ errln("Appendable.reserve(12) failed");
+ }
+ app.appendCodeUnit(0x61);
+ app.appendCodePoint(0x62);
+ app.appendCodePoint(0x50000);
+ app.appendString(cde, 3);
+ app.appendString(fg, -1);
+ UChar scratch[3];
+ int32_t capacity=-1;
+ UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
+ if(capacity<3) {
+ errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
+ return;
+ }
+ static const UChar hij[3]={ 0x68, 0x69, 0x6a };
+ u_memcpy(buffer, hij, 3);
+ app.appendString(buffer, 3);
+ if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
+ errln("Appendable.append(...) failed");
+ }
+ buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
+ if(buffer!=NULL || capacity!=0) {
+ errln("Appendable.getAppendBuffer(min=0) failed");
+ }
+ capacity=1;
+ buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
+ if(buffer!=NULL || capacity!=0) {
+ errln("Appendable.getAppendBuffer(scratch<min) failed");
+ }
+}
+
+class SimpleAppendable : public Appendable {
+public:
+ explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
+ virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
+ SimpleAppendable &reset() { str.remove(); return *this; }
+private:
+ UnicodeString &str;
+};
+
+void
+UnicodeStringTest::TestAppendable() {
+ UnicodeString dest;
+ SimpleAppendable app(dest);
+ doTestAppendable(dest, app);
+}
+
+void
+UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
+ UnicodeString dest;
+ UnicodeStringAppendable app(dest);
+ doTestAppendable(dest, app);
+}
+
+void
+UnicodeStringTest::TestSizeofUnicodeString() {
+ // See the comments in unistr.h near the declaration of UnicodeString's fields.
+ size_t sizeofUniStr=sizeof(UnicodeString);
+ size_t expected;
+ switch(sizeof(void *)) {
+ case 4:
+ expected=32;
+ break;
+ case 8:
+ expected=40;
+ break;
+ default:
+ logln("This platform has neither 32-bit nor 64-bit pointers.");
+ return;
+ }
+ if(expected!=sizeofUniStr) {
+ errln("sizeof(UnicodeString)=%d, expected %d", (int)sizeofUniStr, (int)expected);
+ }
}