+}
+
+void
+UnicodeStringTest::TestUTF32() {
+ // Input string length US_STACKBUF_SIZE to cause overflow of the
+ // initially chosen fStackBuffer due to supplementary characters.
+ static const UChar32 utf32[] = {
+ 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
+ 0x10000, 0x20000, 0xe0000, 0x10ffff
+ };
+ static const UChar expected_utf16[] = {
+ 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
+ 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
+ };
+ UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
+ UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
+ if(from32 != expected) {
+ errln("UnicodeString::fromUTF32() did not create the expected string.");
+ }
+
+ static const UChar utf16[] = {
+ 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
+ };
+ static const UChar32 expected_utf32[] = {
+ 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
+ };
+ UChar32 result32[16];
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t length32 =
+ UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
+ toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
+ if( length32 != UPRV_LENGTHOF(expected_utf32) ||
+ 0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
+ result32[length32] != 0
+ ) {
+ errln("UnicodeString::toUTF32() did not create the expected string.");
+ }
+}
+
+class TestCheckedArrayByteSink : public CheckedArrayByteSink {
+public:
+ TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
+ : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
+ virtual void Flush() { calledFlush = TRUE; }
+ UBool calledFlush;
+};
+
+void
+UnicodeStringTest::TestUTF8() {
+ static const uint8_t utf8[] = {
+ // Code points:
+ // 0x41, 0xd900,
+ // 0x61, 0xdc00,
+ // 0x110000, 0x5a,
+ // 0x50000, 0x7a,
+ // 0x10000, 0x20000,
+ // 0xe0000, 0x10ffff
+ 0x41, 0xed, 0xa4, 0x80,
+ 0x61, 0xed, 0xb0, 0x80,
+ 0xf4, 0x90, 0x80, 0x80, 0x5a,
+ 0xf1, 0x90, 0x80, 0x80, 0x7a,
+ 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
+ 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
+ };
+ static const UChar expected_utf16[] = {
+ 0x41, 0xfffd,
+ 0x61, 0xfffd,
+ 0xfffd, 0x5a,
+ 0xd900, 0xdc00, 0x7a,
+ 0xd800, 0xdc00, 0xd840, 0xdc00,
+ 0xdb40, 0xdc00, 0xdbff, 0xdfff
+ };
+ UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
+ UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
+
+ if(from8 != expected) {
+ errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
+ }
+ std::string utf8_string((const char *)utf8, sizeof(utf8));
+ UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
+ if(from8b != expected) {
+ errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
+ }
+
+ static const UChar utf16[] = {
+ 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
+ };
+ static const uint8_t expected_utf8[] = {
+ 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
+ 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
+ };
+ UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
+
+ char buffer[64];
+ TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
+ us.toUTF8(sink);
+ if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
+ 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
+ ) {
+ errln("UnicodeString::toUTF8() did not create the expected string.");
+ }
+ if(!sink.calledFlush) {
+ errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
+ }
+ // Initial contents for testing that toUTF8String() appends.
+ std::string result8 = "-->";
+ std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
+ // Use the return value just for testing.
+ std::string &result8r = us.toUTF8String(result8);
+ if(result8r != expected8 || &result8r != &result8) {
+ errln("UnicodeString::toUTF8String() did not create the expected string.");
+ }
+}
+
+// Test if this compiler supports Return Value Optimization of unnamed temporary objects.
+static UnicodeString wrapUChars(const UChar *uchars) {
+ return UnicodeString(TRUE, uchars, -1);
+}
+
+void
+UnicodeStringTest::TestReadOnlyAlias() {
+ UChar uchars[]={ 0x61, 0x62, 0 };
+ UnicodeString alias(TRUE, uchars, 2);
+ if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
+ errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
+ return;
+ }
+ alias.truncate(1);
+ if(alias.length()!=1 || alias.getBuffer()!=uchars) {
+ errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
+ }
+ if(alias.getTerminatedBuffer()==uchars) {
+ errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
+ "did not allocate and copy as expected.");
+ }
+ if(uchars[1]!=0x62) {
+ errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
+ "modified the original buffer.");
+ }
+ if(1!=u_strlen(alias.getTerminatedBuffer())) {
+ errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
+ "does not return a buffer terminated at the proper length.");
+ }
+
+ alias.setTo(TRUE, uchars, 2);
+ if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
+ errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
+ return;
+ }
+ alias.remove();
+ if(alias.length()!=0) {
+ errln("UnicodeString(read-only-alias).remove() did not work.");
+ }
+ if(alias.getTerminatedBuffer()==uchars) {
+ errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
+ "did not un-alias as expected.");
+ }
+ if(uchars[0]!=0x61) {
+ errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
+ "modified the original buffer.");
+ }
+ if(0!=u_strlen(alias.getTerminatedBuffer())) {
+ errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
+ "does not return a buffer terminated at length 0.");
+ }
+
+ UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
+ alias.setTo(FALSE, longString.getBuffer(), longString.length());
+ alias.remove(0, 10);
+ if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
+ errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
+ }
+ alias.setTo(FALSE, longString.getBuffer(), longString.length());
+ alias.remove(27, 99);
+ if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
+ errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
+ }
+ alias.setTo(FALSE, longString.getBuffer(), longString.length());
+ alias.retainBetween(6, 30);
+ if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
+ errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
+ }
+
+ UChar abc[]={ 0x61, 0x62, 0x63, 0 };
+ UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
+
+ UnicodeString temp;
+ temp.fastCopyFrom(longString.tempSubString());
+ if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
+ errln("UnicodeString.tempSubString() failed");
+ }
+ temp.fastCopyFrom(longString.tempSubString(-3, 5));
+ if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
+ errln("UnicodeString.tempSubString(-3, 5) failed");
+ }
+ temp.fastCopyFrom(longString.tempSubString(17));
+ if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
+ errln("UnicodeString.tempSubString(17) failed");
+ }
+ temp.fastCopyFrom(longString.tempSubString(99));
+ if(!temp.isEmpty()) {
+ errln("UnicodeString.tempSubString(99) failed");
+ }
+ temp.fastCopyFrom(longString.tempSubStringBetween(6));
+ if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
+ errln("UnicodeString.tempSubStringBetween(6) failed");
+ }
+ temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
+ if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
+ errln("UnicodeString.tempSubStringBetween(8, 18) failed");
+ }
+ UnicodeString bogusString;
+ bogusString.setToBogus();
+ temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
+ if(!temp.isBogus()) {
+ errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
+ }
+}
+
+void
+UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
+ static const UChar cde[3]={ 0x63, 0x64, 0x65 };
+ static const UChar fg[3]={ 0x66, 0x67, 0 };
+ if(!app.reserveAppendCapacity(12)) {
+ errln("Appendable.reserve(12) failed");
+ }
+ app.appendCodeUnit(0x61);
+ app.appendCodePoint(0x62);
+ app.appendCodePoint(0x50000);
+ app.appendString(cde, 3);
+ app.appendString(fg, -1);
+ UChar scratch[3];
+ int32_t capacity=-1;
+ UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
+ if(capacity<3) {
+ errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
+ return;
+ }
+ static const UChar hij[3]={ 0x68, 0x69, 0x6a };
+ u_memcpy(buffer, hij, 3);
+ app.appendString(buffer, 3);
+ if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
+ errln("Appendable.append(...) failed");
+ }
+ buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
+ if(buffer!=NULL || capacity!=0) {
+ errln("Appendable.getAppendBuffer(min=0) failed");
+ }
+ capacity=1;
+ buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
+ if(buffer!=NULL || capacity!=0) {
+ errln("Appendable.getAppendBuffer(scratch<min) failed");
+ }
+}
+
+class SimpleAppendable : public Appendable {
+public:
+ explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
+ virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
+ SimpleAppendable &reset() { str.remove(); return *this; }
+private:
+ UnicodeString &str;
+};
+
+void
+UnicodeStringTest::TestAppendable() {
+ UnicodeString dest;
+ SimpleAppendable app(dest);
+ doTestAppendable(dest, app);
+}
+
+void
+UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
+ UnicodeString dest;
+ UnicodeStringAppendable app(dest);
+ doTestAppendable(dest, app);
+}
+
+void
+UnicodeStringTest::TestSizeofUnicodeString() {
+ // See the comments in unistr.h near the declaration of UnicodeString's fields.
+ // See the API comments for UNISTR_OBJECT_SIZE.
+ size_t sizeofUniStr=sizeof(UnicodeString);
+ size_t expected=UNISTR_OBJECT_SIZE;
+ if(expected!=sizeofUniStr) {
+ // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
+ // of the compiler might add more internal padding than expected.
+ errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
+ (int)sizeofUniStr, (int)expected);
+ }
+ if(sizeofUniStr<32) {
+ errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
+ }
+ // We assume that the entire UnicodeString object,
+ // minus the vtable pointer and 2 bytes for flags and short length,
+ // is available for internal storage of UChars.
+ int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
+ UnicodeString s;
+ const UChar *emptyBuffer=s.getBuffer();
+ for(int32_t i=0; i<expectedStackBufferLength; ++i) {
+ s.append((UChar)0x2e);
+ }
+ const UChar *fullBuffer=s.getBuffer();
+ if(fullBuffer!=emptyBuffer) {
+ errln("unexpected reallocation when filling with assumed stack buffer size of %d",
+ expectedStackBufferLength);
+ }
+ const UChar *terminatedBuffer=s.getTerminatedBuffer();
+ if(terminatedBuffer==emptyBuffer) {
+ errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
+ expectedStackBufferLength);
+ }
+}
+
+void
+UnicodeStringTest::TestMoveSwap() {
+ static const UChar abc[3] = { 0x61, 0x62, 0x63 }; // "abc"
+ UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc)); // read-only alias
+ UnicodeString s2(100, 0x7a, 100); // 100 * 'z' should be on the heap
+ UnicodeString s3("defg", 4, US_INV); // in stack buffer
+ const UChar *p = s2.getBuffer();
+ s1.swap(s2);
+ if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
+ errln("UnicodeString.swap() did not swap");
+ }
+ swap(s2, s3);
+ if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
+ errln("swap(UnicodeString) did not swap back");
+ }
+ UnicodeString s4;
+ s4.moveFrom(s1);
+ if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
+ errln("UnicodeString.moveFrom(heap) did not move");
+ }
+ UnicodeString s5;
+ s5.moveFrom(s2);
+ if(s5 != UNICODE_STRING_SIMPLE("defg")) {
+ errln("UnicodeString.moveFrom(stack) did not move");
+ }
+ UnicodeString s6;
+ s6.moveFrom(s3);
+ if(s6.getBuffer() != abc || s6.length() != 3) {
+ errln("UnicodeString.moveFrom(alias) did not move");
+ }
+#if U_HAVE_RVALUE_REFERENCES
+ infoln("TestMoveSwap() with rvalue references");
+ s1 = static_cast<UnicodeString &&>(s6);
+ if(s1.getBuffer() != abc || s1.length() != 3) {
+ errln("UnicodeString move assignment operator did not move");
+ }
+ UnicodeString s7(static_cast<UnicodeString &&>(s4));
+ if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
+ errln("UnicodeString move constructor did not move");
+ }
+#else
+ infoln("TestMoveSwap() without rvalue references");
+ UnicodeString s7;
+#endif
+
+ // Move self assignment leaves the object valid but in an undefined state.
+ // Do it to make sure there is no crash,
+ // but do not check for any particular resulting value.
+ s1.moveFrom(s1);
+ s2.moveFrom(s2);
+ s3.moveFrom(s3);
+ s4.moveFrom(s4);
+ s5.moveFrom(s5);
+ s6.moveFrom(s6);
+ s7.moveFrom(s7);
+ // Simple copy assignment must work.
+ UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
+ s1 = s6 = s4 = s7 = simple;
+ if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
+ errln("UnicodeString copy after self-move did not work");
+ }
+}
+
+void
+UnicodeStringTest::TestUInt16Pointers() {
+ static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
+ uint16_t arr[4];
+
+ UnicodeString expected(u"abc");
+ assertEquals("abc from pointer", expected, UnicodeString(carr));
+ assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
+ assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
+
+ UnicodeString alias(arr, 0, 4);
+ alias.append(u'a').append(u'b').append(u'c');
+ assertEquals("abc from writable alias", expected, alias);
+ assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
+
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
+ TEST_ASSERT_STATUS(errorCode);
+ assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
+}
+
+void
+UnicodeStringTest::TestWCharPointers() {
+#if U_SIZEOF_WCHAR_T==2
+ static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
+ wchar_t arr[4];
+
+ UnicodeString expected(u"abc");
+ assertEquals("abc from pointer", expected, UnicodeString(carr));
+ assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
+ assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
+
+ UnicodeString alias(arr, 0, 4);
+ alias.append(u'a').append(u'b').append(u'c');
+ assertEquals("abc from writable alias", expected, alias);
+ assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
+
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
+ TEST_ASSERT_STATUS(errorCode);
+ assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));