1 /********************************************************************
3 * Copyright (c) 1997-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
8 #include "unicode/appendable.h"
9 #include "unicode/std_string.h"
10 #include "unicode/unistr.h"
11 #include "unicode/uchar.h"
12 #include "unicode/ustring.h"
13 #include "unicode/locid.h"
14 #include "unicode/ucnv.h"
15 #include "unicode/uenum.h"
16 #include "unicode/utf16.h"
21 #include "unicode/ustream.h"
28 #define LENGTHOF(array) (int32_t)((sizeof(array)/sizeof((array)[0])))
30 UnicodeStringTest::~UnicodeStringTest() {}
32 void UnicodeStringTest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char *par
)
34 if (exec
) logln("TestSuite UnicodeStringTest: ");
37 name
= "StringCaseTest";
39 logln("StringCaseTest---"); logln("");
44 case 1: name
= "TestBasicManipulation"; if (exec
) TestBasicManipulation(); break;
45 case 2: name
= "TestCompare"; if (exec
) TestCompare(); break;
46 case 3: name
= "TestExtract"; if (exec
) TestExtract(); break;
47 case 4: name
= "TestRemoveReplace"; if (exec
) TestRemoveReplace(); break;
48 case 5: name
= "TestSearching"; if (exec
) TestSearching(); break;
49 case 6: name
= "TestSpacePadding"; if (exec
) TestSpacePadding(); break;
50 case 7: name
= "TestPrefixAndSuffix"; if (exec
) TestPrefixAndSuffix(); break;
51 case 8: name
= "TestFindAndReplace"; if (exec
) TestFindAndReplace(); break;
52 case 9: name
= "TestBogus"; if (exec
) TestBogus(); break;
53 case 10: name
= "TestReverse"; if (exec
) TestReverse(); break;
54 case 11: name
= "TestMiscellaneous"; if (exec
) TestMiscellaneous(); break;
55 case 12: name
= "TestStackAllocation"; if (exec
) TestStackAllocation(); break;
56 case 13: name
= "TestUnescape"; if (exec
) TestUnescape(); break;
57 case 14: name
= "TestCountChar32"; if (exec
) TestCountChar32(); break;
58 case 15: name
= "TestStringEnumeration"; if (exec
) TestStringEnumeration(); break;
59 case 16: name
= "TestNameSpace"; if (exec
) TestNameSpace(); break;
60 case 17: name
= "TestUTF32"; if (exec
) TestUTF32(); break;
61 case 18: name
= "TestUTF8"; if (exec
) TestUTF8(); break;
62 case 19: name
= "TestReadOnlyAlias"; if (exec
) TestReadOnlyAlias(); break;
63 case 20: name
= "TestAppendable"; if (exec
) TestAppendable(); break;
64 case 21: name
= "TestUnicodeStringImplementsAppendable"; if (exec
) TestUnicodeStringImplementsAppendable(); break;
65 case 22: name
= "TestSizeofUnicodeString"; if (exec
) TestSizeofUnicodeString(); break;
66 case 23: name
= "TestStartsWithAndEndsWithNulTerminated"; if (exec
) TestStartsWithAndEndsWithNulTerminated(); break;
68 default: name
= ""; break; //needed to end loop
73 UnicodeStringTest::TestBasicManipulation()
75 UnicodeString
test1("Now is the time for all men to come swiftly to the aid of the party.\n");
76 UnicodeString expectedValue
;
79 c
=(UnicodeString
*)test1
.clone();
80 test1
.insert(24, "good ");
81 expectedValue
= "Now is the time for all good men to come swiftly to the aid of the party.\n";
82 if (test1
!= expectedValue
)
83 errln("insert() failed: expected \"" + expectedValue
+ "\"\n,got \"" + test1
+ "\"");
85 c
->insert(24, "good ");
86 if(*c
!= expectedValue
) {
87 errln("clone()->insert() failed: expected \"" + expectedValue
+ "\"\n,got \"" + *c
+ "\"");
92 expectedValue
= "Now is the time for all good men to come to the aid of the party.\n";
93 if (test1
!= expectedValue
)
94 errln("remove() failed: expected \"" + expectedValue
+ "\"\n,got \"" + test1
+ "\"");
96 test1
.replace(58, 6, "ir country");
97 expectedValue
= "Now is the time for all good men to come to the aid of their country.\n";
98 if (test1
!= expectedValue
)
99 errln("replace() failed: expected \"" + expectedValue
+ "\"\n,got \"" + test1
+ "\"");
102 test1
.extract(0, 15, temp
);
104 UnicodeString
test2(temp
, 15);
106 expectedValue
= "Now is the time";
107 if (test2
!= expectedValue
)
108 errln("extract() failed: expected \"" + expectedValue
+ "\"\n,got \"" + test2
+ "\"");
110 test2
+= " for me to go!\n";
111 expectedValue
= "Now is the time for me to go!\n";
112 if (test2
!= expectedValue
)
113 errln("operator+=() failed: expected \"" + expectedValue
+ "\"\n,got \"" + test2
+ "\"");
115 if (test1
.length() != 70)
116 errln(UnicodeString("length() failed: expected 70, got ") + test1
.length());
117 if (test2
.length() != 30)
118 errln(UnicodeString("length() failed: expected 30, got ") + test2
.length());
121 test3
.append((UChar32
)0x20402);
122 if(test3
!= CharsToUnicodeString("\\uD841\\uDC02")){
123 errln((UnicodeString
)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3
));
125 if(test3
.length() != 2){
126 errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3
.length());
128 test3
.append((UChar32
)0x0074);
129 if(test3
!= CharsToUnicodeString("\\uD841\\uDC02t")){
130 errln((UnicodeString
)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3
));
132 if(test3
.length() != 3){
133 errln((UnicodeString
)"append or length failed for UChar32, expected 2, got " + test3
.length());
136 // test some UChar32 overloads
137 if( test3
.setTo((UChar32
)0x10330).length() != 2 ||
138 test3
.insert(0, (UChar32
)0x20100).length() != 4 ||
139 test3
.replace(2, 2, (UChar32
)0xe0061).length() != 4 ||
140 (test3
= (UChar32
)0x14001).length() != 2
142 errln((UnicodeString
)"simple UChar32 overloads for replace, insert, setTo or = failed");
146 // test moveIndex32()
147 UnicodeString s
=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
150 s
.moveIndex32(2, -1)!=0 ||
151 s
.moveIndex32(2, 1)!=4 ||
152 s
.moveIndex32(2, 2)!=5 ||
153 s
.moveIndex32(5, -2)!=2 ||
154 s
.moveIndex32(0, -1)!=0 ||
155 s
.moveIndex32(6, 1)!=6
157 errln("UnicodeString::moveIndex32() failed");
160 if(s
.getChar32Start(1)!=0 || s
.getChar32Start(2)!=2) {
161 errln("UnicodeString::getChar32Start() failed");
164 if(s
.getChar32Limit(1)!=2 || s
.getChar32Limit(2)!=2) {
165 errln("UnicodeString::getChar32Limit() failed");
170 // test new 2.2 constructors and setTo function that parallel Java's substring function.
171 UnicodeString
src("Hello folks how are you?");
172 UnicodeString
target1("how are you?");
173 if (target1
!= UnicodeString(src
, 12)) {
174 errln("UnicodeString(const UnicodeString&, int32_t) failed");
176 UnicodeString
target2("folks");
177 if (target2
!= UnicodeString(src
, 6, 5)) {
178 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
180 if (target1
!= target2
.setTo(src
, 12)) {
181 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
186 // op+ is new in ICU 2.8
187 UnicodeString s
=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
188 if(s
!=UnicodeString("abcdefghi", "")) {
189 errln("operator+(UniStr, UniStr) failed");
194 // tests for Jitterbug 2360
195 // verify that APIs with source pointer + length accept length == -1
196 // mostly test only where modified, only few functions did not already do this
197 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
198 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
201 UChar buffer
[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff };
202 UnicodeString s
, t(buffer
, -1, LENGTHOF(buffer
));
204 if(s
.setTo(buffer
, -1, LENGTHOF(buffer
)).length()!=u_strlen(buffer
)) {
205 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
207 if(t
.length()!=u_strlen(buffer
)) {
208 errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
211 if(0!=s
.caseCompare(buffer
, -1, U_FOLD_CASE_DEFAULT
)) {
212 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
214 if(0!=s
.caseCompare(0, s
.length(), buffer
, U_FOLD_CASE_DEFAULT
)) {
215 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
218 buffer
[u_strlen(buffer
)]=0xe4;
219 UnicodeString
u(buffer
, -1, LENGTHOF(buffer
));
220 if(s
.setTo(buffer
, -1, LENGTHOF(buffer
)).length()!=LENGTHOF(buffer
)) {
221 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
223 if(u
.length()!=LENGTHOF(buffer
)) {
224 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
227 static const char cs
[]={ 0x61, (char)0xe4, (char)0x85, 0 };
229 UErrorCode errorCode
=U_ZERO_ERROR
;
231 cnv
=ucnv_open("ISO-8859-1", &errorCode
);
232 UnicodeString
v(cs
, -1, cnv
, errorCode
);
234 if(v
!=CharsToUnicodeString("a\\xe4\\x85")) {
235 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
239 #if U_CHARSET_IS_UTF8
241 // Test the hardcoded-UTF-8 UnicodeString optimizations.
242 static const uint8_t utf8
[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
243 static const UChar utf16
[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
244 UnicodeString from8a
= UnicodeString((const char *)utf8
);
245 UnicodeString from8b
= UnicodeString((const char *)utf8
, (int32_t)sizeof(utf8
)-1);
246 UnicodeString
from16(FALSE
, utf16
, LENGTHOF(utf16
));
247 if(from8a
!= from16
|| from8b
!= from16
) {
248 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
251 int32_t length8
=from16
.extract(0, 0x7fffffff, buffer
, (uint32_t)sizeof(buffer
));
252 if(length8
!=((int32_t)sizeof(utf8
)-1) || 0!=uprv_memcmp(buffer
, utf8
, sizeof(utf8
))) {
253 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
255 length8
=from16
.extract(1, 2, buffer
, (uint32_t)sizeof(buffer
));
256 if(length8
!=4 || buffer
[length8
]!=0 || 0!=uprv_memcmp(buffer
, utf8
+1, length8
)) {
257 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
264 UnicodeStringTest::TestCompare()
266 UnicodeString
test1("this is a test");
267 UnicodeString
test2("this is a test");
268 UnicodeString
test3("this is a test of the emergency broadcast system");
269 UnicodeString
test4("never say, \"this is a test\"!!");
271 UnicodeString
test5((UChar
)0x5000);
272 UnicodeString
test6((UChar
)0x5100);
274 UChar uniChars
[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
275 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
276 char chars
[] = "this is a test";
278 // test operator== and operator!=
279 if (test1
!= test2
|| test1
== test3
|| test1
== test4
)
280 errln("operator== or operator!= failed");
282 // test operator> and operator<
283 if (test1
> test2
|| test1
< test2
|| !(test1
< test3
) || !(test1
> test4
) ||
286 errln("operator> or operator< failed");
289 // test operator>= and operator<=
290 if (!(test1
>= test2
) || !(test1
<= test2
) || !(test1
<= test3
) || !(test1
>= test4
))
291 errln("operator>= or operator<= failed");
293 // test compare(UnicodeString)
294 if (test1
.compare(test2
) != 0 || test1
.compare(test3
) >= 0 || test1
.compare(test4
) <= 0)
295 errln("compare(UnicodeString) failed");
297 //test compare(offset, length, UnicodeString)
298 if(test1
.compare(0, 14, test2
) != 0 ||
299 test3
.compare(0, 14, test2
) != 0 ||
300 test4
.compare(12, 14, test2
) != 0 ||
301 test3
.compare(0, 18, test1
) <=0 )
302 errln("compare(offset, length, UnicodeString) failes");
304 // test compare(UChar*)
305 if (test2
.compare(uniChars
) != 0 || test3
.compare(uniChars
) <= 0 || test4
.compare(uniChars
) >= 0)
306 errln("compare(UChar*) failed");
308 // test compare(char*)
309 if (test2
.compare(chars
) != 0 || test3
.compare(chars
) <= 0 || test4
.compare(chars
) >= 0)
310 errln("compare(char*) failed");
312 // test compare(UChar*, length)
313 if (test1
.compare(uniChars
, 4) <= 0 || test1
.compare(uniChars
, 4) <= 0)
314 errln("compare(UChar*, length) failed");
316 // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
317 if (test1
.compare(0, 14, test2
, 0, 14) != 0
318 || test1
.compare(0, 14, test3
, 0, 14) != 0
319 || test1
.compare(0, 14, test4
, 12, 14) != 0)
320 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
322 if (test1
.compare(10, 4, test2
, 0, 4) >= 0
323 || test1
.compare(10, 4, test3
, 22, 9) <= 0
324 || test1
.compare(10, 4, test4
, 22, 4) != 0)
325 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
327 // test compareBetween
328 if (test1
.compareBetween(0, 14, test2
, 0, 14) != 0 || test1
.compareBetween(0, 14, test3
, 0, 14) != 0
329 || test1
.compareBetween(0, 14, test4
, 12, 26) != 0)
330 errln("compareBetween failed");
332 if (test1
.compareBetween(10, 14, test2
, 0, 4) >= 0 || test1
.compareBetween(10, 14, test3
, 22, 31) <= 0
333 || test1
.compareBetween(10, 14, test4
, 22, 26) != 0)
334 errln("compareBetween failed");
336 // test compare() etc. with strings that share a buffer but are not equal
337 test2
=test1
; // share the buffer, length() too large for the stackBuffer
338 test2
.truncate(1); // change only the length, not the buffer
339 if( test1
==test2
|| test1
<=test2
||
340 test1
.compare(test2
)<=0 ||
341 test1
.compareCodePointOrder(test2
)<=0 ||
342 test1
.compareCodePointOrder(0, INT32_MAX
, test2
)<=0 ||
343 test1
.compareCodePointOrder(0, INT32_MAX
, test2
, 0, INT32_MAX
)<=0 ||
344 test1
.compareCodePointOrderBetween(0, INT32_MAX
, test2
, 0, INT32_MAX
)<=0 ||
345 test1
.caseCompare(test2
, U_FOLD_CASE_DEFAULT
)<=0
347 errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
350 /* test compareCodePointOrder() */
352 /* these strings are in ascending order */
353 static const UChar strings
[][4]={
354 { 0x61, 0 }, /* U+0061 */
355 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
356 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
357 { 0xd800, 0 }, /* U+d800 */
358 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
359 { 0xdfff, 0 }, /* U+dfff */
360 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
361 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
362 { 0xd800, 0xdc02, 0 }, /* U+10002 */
363 { 0xd84d, 0xdc56, 0 } /* U+23456 */
365 UnicodeString u
[20]; // must be at least as long as strings[]
368 for(i
=0; i
<(int32_t)(sizeof(strings
)/sizeof(strings
[0])); ++i
) {
369 u
[i
]=UnicodeString(TRUE
, strings
[i
], -1);
372 for(i
=0; i
<(int32_t)(sizeof(strings
)/sizeof(strings
[0])-1); ++i
) {
373 if(u
[i
].compareCodePointOrder(u
[i
+1])>=0 || u
[i
].compareCodePointOrder(0, INT32_MAX
, u
[i
+1].getBuffer())>=0) {
374 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i
);
379 /* test caseCompare() */
382 _mixed
[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 },
383 _otherDefault
[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
384 _otherExcludeSpecialI
[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
385 _different
[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
388 mixed(TRUE
, _mixed
, -1),
389 otherDefault(TRUE
, _otherDefault
, -1),
390 otherExcludeSpecialI(TRUE
, _otherExcludeSpecialI
, -1),
391 different(TRUE
, _different
, -1);
395 /* test caseCompare() */
396 result
=mixed
.caseCompare(otherDefault
, U_FOLD_CASE_DEFAULT
);
397 if(result
!=0 || 0!=mixed
.caseCompareBetween(0, INT32_MAX
, otherDefault
, 0, INT32_MAX
, U_FOLD_CASE_DEFAULT
)) {
398 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result
);
400 result
=mixed
.caseCompare(otherExcludeSpecialI
, U_FOLD_CASE_EXCLUDE_SPECIAL_I
);
402 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result
);
404 result
=mixed
.caseCompare(otherDefault
, U_FOLD_CASE_EXCLUDE_SPECIAL_I
);
405 if(result
==0 || 0==mixed
.caseCompareBetween(0, INT32_MAX
, otherDefault
, 0, INT32_MAX
, U_FOLD_CASE_EXCLUDE_SPECIAL_I
)) {
406 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
409 /* test caseCompare() */
410 result
=mixed
.caseCompare(different
, U_FOLD_CASE_DEFAULT
);
412 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result
);
415 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
416 result
=mixed
.caseCompare(1, 4, different
, 1, 5, U_FOLD_CASE_DEFAULT
);
417 if(result
!=0 || 0!=mixed
.caseCompareBetween(1, 5, different
, 1, 6, U_FOLD_CASE_DEFAULT
)) {
418 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result
);
421 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
422 result
=mixed
.caseCompare(1, 4, different
, 1, 4, U_FOLD_CASE_DEFAULT
);
424 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result
);
428 // test that srcLength=-1 is handled in functions that
429 // take input const UChar */int32_t srcLength (j785)
431 static const UChar u
[]={ 0x61, 0x308, 0x62, 0 };
432 UnicodeString s
=UNICODE_STRING("a\\u0308b", 8).unescape();
434 if(s
.compare(u
, -1)!=0 || s
.compare(0, 999, u
, 0, -1)!=0) {
435 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
438 if(s
.compareCodePointOrder(u
, -1)!=0 || s
.compareCodePointOrder(0, 999, u
, 0, -1)!=0) {
439 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
442 if(s
.caseCompare(u
, -1, U_FOLD_CASE_DEFAULT
)!=0 || s
.caseCompare(0, 999, u
, 0, -1, U_FOLD_CASE_DEFAULT
)!=0) {
443 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
446 if(s
.indexOf(u
, 1, -1, 0, 999)!=1 || s
.indexOf(u
+1, -1, 0, 999)!=1 || s
.indexOf(u
+1, -1, 0)!=1) {
447 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
450 if(s
.lastIndexOf(u
, 1, -1, 0, 999)!=1 || s
.lastIndexOf(u
+1, -1, 0, 999)!=1 || s
.lastIndexOf(u
+1, -1, 0)!=1) {
451 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
454 UnicodeString s2
, s3
;
455 s2
.replace(0, 0, u
+1, -1);
456 s3
.replace(0, 0, u
, 1, -1);
457 if(s
.compare(1, 999, s2
)!=0 || s2
!=s3
) {
458 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
464 UnicodeStringTest::TestExtract()
466 UnicodeString
test1("Now is the time for all good men to come to the aid of their country.", "");
468 UChar test3
[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
469 char test4
[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
471 char test6
[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
473 test1
.extract(11, 12, test2
);
474 test1
.extract(11, 12, test3
);
475 if (test1
.extract(11, 12, test4
) != 12 || test4
[12] != 0) {
476 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
479 // test proper pinning in extractBetween()
480 test1
.extractBetween(-3, 7, test5
);
481 if(test5
!=UNICODE_STRING("Now is ", 7)) {
482 errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
485 test1
.extractBetween(11, 23, test5
);
486 if (test1
.extract(60, 71, test6
) != 9) {
487 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
489 if (test1
.extract(11, 12, test6
) != 12) {
490 errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
493 // convert test4 back to Unicode for comparison
494 UnicodeString
test4b(test4
, 12);
496 if (test1
.extract(11, 12, (char *)NULL
) != 12) {
497 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
499 if (test1
.extract(11, -1, test6
) != 0) {
500 errln("UnicodeString.extract(-1) failed to stop reading the string.");
503 for (int32_t i
= 0; i
< 12; i
++) {
504 if (test1
.charAt((int32_t)(11 + i
)) != test2
.charAt(i
)) {
505 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i
);
508 if (test1
.charAt((int32_t)(11 + i
)) != test3
[i
]) {
509 errln(UnicodeString("extracting into an array of UChar failed at position ") + i
);
512 if (((char)test1
.charAt((int32_t)(11 + i
))) != test4b
.charAt(i
)) {
513 errln(UnicodeString("extracting into an array of char failed at position ") + i
);
516 if (test1
.charAt((int32_t)(11 + i
)) != test5
.charAt(i
)) {
517 errln(UnicodeString("extracting with extractBetween failed at position ") + i
);
522 // test preflighting and overflows with invariant conversion
523 if (test1
.extract(0, 10, (char *)NULL
, "") != 10) {
524 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
527 test4
[2] = (char)0xff;
528 if (test1
.extract(0, 10, test4
, 2, "") != 10) {
529 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
531 if (test4
[2] != (char)0xff) {
532 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
536 // test new, NUL-terminating extract() function
537 UnicodeString
s("terminate", "");
539 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
540 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
542 UErrorCode errorCode
;
545 errorCode
=U_ZERO_ERROR
;
546 length
=s
.extract((UChar
*)NULL
, 0, errorCode
);
547 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=s
.length()) {
548 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length
, s
.length(), u_errorName(errorCode
));
551 errorCode
=U_ZERO_ERROR
;
552 length
=s
.extract(dest
, s
.length()-1, errorCode
);
553 if(errorCode
!=U_BUFFER_OVERFLOW_ERROR
|| length
!=s
.length()) {
554 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
555 length
, u_errorName(errorCode
), s
.length());
558 errorCode
=U_ZERO_ERROR
;
559 length
=s
.extract(dest
, s
.length(), errorCode
);
560 if(errorCode
!=U_STRING_NOT_TERMINATED_WARNING
|| length
!=s
.length()) {
561 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
562 length
, u_errorName(errorCode
), s
.length());
564 if(dest
[length
-1]!=s
[length
-1] || dest
[length
]!=0xa5) {
565 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
568 errorCode
=U_ZERO_ERROR
;
569 length
=s
.extract(dest
, s
.length()+1, errorCode
);
570 if(errorCode
!=U_ZERO_ERROR
|| length
!=s
.length()) {
571 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
572 length
, u_errorName(errorCode
), s
.length());
574 if(dest
[length
-1]!=s
[length
-1] || dest
[length
]!=0 || dest
[length
+1]!=0xa5) {
575 errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
580 // test new UConverter extract() and constructor
581 UnicodeString s
=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
583 static const char expect
[]={
584 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
585 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
586 (char)0xc3, (char)0x84,
587 (char)0xe1, (char)0xbb, (char)0x90
589 UErrorCode errorCode
=U_ZERO_ERROR
;
590 UConverter
*cnv
=ucnv_open("UTF-8", &errorCode
);
593 if(U_SUCCESS(errorCode
)) {
595 if( (length
=s
.extract(NULL
, 0, cnv
, errorCode
))!=13 ||
596 errorCode
!=U_BUFFER_OVERFLOW_ERROR
598 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
599 length
, u_errorName(errorCode
));
601 errorCode
=U_ZERO_ERROR
;
602 if( (length
=s
.extract(buffer
, 2, cnv
, errorCode
))!=13 ||
603 errorCode
!=U_BUFFER_OVERFLOW_ERROR
605 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
606 length
, u_errorName(errorCode
));
610 errorCode
=U_ZERO_ERROR
;
611 if( s
.extract(NULL
, 2, cnv
, errorCode
)==13 || U_SUCCESS(errorCode
)) {
612 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
614 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
615 if( s
.extract(NULL
, 0, cnv
, errorCode
)==13 || U_SUCCESS(errorCode
)) {
616 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
618 errorCode
=U_ZERO_ERROR
;
621 if( (length
=s
.extract(buffer
, sizeof(buffer
), cnv
, errorCode
))!=13 ||
622 uprv_memcmp(buffer
, expect
, 13)!=0 ||
626 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
627 length
, u_errorName(errorCode
));
629 // Test again with just the converter name.
630 if( (length
=s
.extract(0, s
.length(), buffer
, sizeof(buffer
), "UTF-8"))!=13 ||
631 uprv_memcmp(buffer
, expect
, 13)!=0 ||
635 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
636 length
, u_errorName(errorCode
));
639 // try the constructor
640 UnicodeString
t(expect
, sizeof(expect
), cnv
, errorCode
);
641 if(U_FAILURE(errorCode
) || s
!=t
) {
642 errln("UnicodeString(UConverter) conversion failed (%s)",
643 u_errorName(errorCode
));
652 UnicodeStringTest::TestRemoveReplace()
654 UnicodeString
test1("The rain in Spain stays mainly on the plain");
655 UnicodeString
test2("eat SPAMburgers!");
656 UChar test3
[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
657 char test4
[] = "SPAM";
658 UnicodeString
& test5
= test1
;
660 test1
.replace(4, 4, test2
, 4, 4);
661 test1
.replace(12, 5, test3
, 4);
663 test1
.replace(17, 4, test3
);
664 test1
.replace(23, 4, test4
);
665 test1
.replaceBetween(37, 42, test2
, 4, 8);
667 if (test1
!= "The SPAM in SPAM SPAMs SPAMly on the SPAM")
668 errln("One of the replace methods failed:\n"
669 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
670 " got \"" + test1
+ "\"");
673 test1
.removeBetween(26, 28);
675 if (test1
!= "The SPAM in SPAM SPAM SPAM on the SPAM")
676 errln("One of the remove methods failed:\n"
677 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
678 " got \"" + test1
+ "\"");
680 for (int32_t i
= 0; i
< test1
.length(); i
++) {
681 if (test5
[i
] != 0x53 && test5
[i
] != 0x50 && test5
[i
] != 0x41 && test5
[i
] != 0x4d && test5
[i
] != 0x20) {
682 test1
.setCharAt(i
, 0x78);
686 if (test1
!= "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
687 errln("One of the remove methods failed:\n"
688 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
689 " got \"" + test1
+ "\"");
692 if (test1
.length() != 0)
693 errln("Remove() failed: expected empty string, got \"" + test1
+ "\"");
697 UnicodeStringTest::TestSearching()
699 UnicodeString
test1("test test ttest tetest testesteststt");
700 UnicodeString
test2("test");
701 UChar testChar
= 0x74;
703 UChar32 testChar32
= 0x20402;
706 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
708 // 8 9 10 11 12 13 14 15
709 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
712 0xdc02, 0xd841, 0x0073, 0x0000
714 UnicodeString
test3(testData
);
715 UnicodeString
test4(testChar32
);
717 uint16_t occurrences
= 0;
718 int32_t startPos
= 0;
720 startPos
!= -1 && startPos
< test1
.length();
721 (startPos
= test1
.indexOf(test2
, startPos
)) != -1 ? (++occurrences
, startPos
+= 4) : 0)
723 if (occurrences
!= 6)
724 errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences
);
726 for ( occurrences
= 0, startPos
= 10;
727 startPos
!= -1 && startPos
< test1
.length();
728 (startPos
= test1
.indexOf(test2
, startPos
)) != -1 ? (++occurrences
, startPos
+= 4) : 0)
730 if (occurrences
!= 4)
731 errln(UnicodeString("indexOf with starting offset failed: "
732 "expected to find 4 occurrences, found ") + occurrences
);
735 for ( occurrences
= 0, startPos
= 5;
736 startPos
!= -1 && startPos
< test1
.length();
737 (startPos
= test1
.indexOf(test2
, startPos
, endPos
- startPos
)) != -1 ? (++occurrences
, startPos
+= 4) : 0)
739 if (occurrences
!= 4)
740 errln(UnicodeString("indexOf with starting and ending offsets failed: "
741 "expected to find 4 occurrences, found ") + occurrences
);
743 //using UChar32 string
744 for ( startPos
=0, occurrences
=0;
745 startPos
!= -1 && startPos
< test3
.length();
746 (startPos
= test3
.indexOf(test4
, startPos
)) != -1 ? (++occurrences
, startPos
+= 2) : 0)
748 if (occurrences
!= 4)
749 errln((UnicodeString
)"indexOf failed: expected to find 4 occurrences, found " + occurrences
);
751 for ( startPos
=10, occurrences
=0;
752 startPos
!= -1 && startPos
< test3
.length();
753 (startPos
= test3
.indexOf(test4
, startPos
)) != -1 ? (++occurrences
, startPos
+= 2) : 0)
755 if (occurrences
!= 2)
756 errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences
);
759 for ( occurrences
= 0, startPos
= 0;
760 startPos
!= -1 && startPos
< test1
.length();
761 (startPos
= test1
.indexOf(testChar
, startPos
)) != -1 ? (++occurrences
, startPos
+= 1) : 0)
763 if (occurrences
!= 16)
764 errln(UnicodeString("indexOf with character failed: "
765 "expected to find 16 occurrences, found ") + occurrences
);
767 for ( occurrences
= 0, startPos
= 10;
768 startPos
!= -1 && startPos
< test1
.length();
769 (startPos
= test1
.indexOf(testChar
, startPos
)) != -1 ? (++occurrences
, startPos
+= 1) : 0)
771 if (occurrences
!= 12)
772 errln(UnicodeString("indexOf with character & start offset failed: "
773 "expected to find 12 occurrences, found ") + occurrences
);
775 for ( occurrences
= 0, startPos
= 5, endPos
= 28;
776 startPos
!= -1 && startPos
< test1
.length();
777 (startPos
= test1
.indexOf(testChar
, startPos
, endPos
- startPos
)) != -1 ? (++occurrences
, startPos
+= 1) : 0)
779 if (occurrences
!= 10)
780 errln(UnicodeString("indexOf with character & start & end offsets failed: "
781 "expected to find 10 occurrences, found ") + occurrences
);
783 //testing for UChar32
784 UnicodeString subString
;
785 for( occurrences
=0, startPos
=0; startPos
< test3
.length(); startPos
+=1){
786 subString
.append(test3
, startPos
, test3
.length());
787 if(subString
.indexOf(testChar32
) != -1 ){
792 if (occurrences
!= 14)
793 errln((UnicodeString
)"indexOf failed: expected to find 14 occurrences, found " + occurrences
);
795 for ( occurrences
= 0, startPos
= 0;
796 startPos
!= -1 && startPos
< test3
.length();
797 (startPos
= test3
.indexOf(testChar32
, startPos
)) != -1 ? (++occurrences
, startPos
+= 1) : 0)
799 if (occurrences
!= 4)
800 errln((UnicodeString
)"indexOf failed: expected to find 4 occurrences, found " + occurrences
);
802 endPos
=test3
.length();
803 for ( occurrences
= 0, startPos
= 5;
804 startPos
!= -1 && startPos
< test3
.length();
805 (startPos
= test3
.indexOf(testChar32
, startPos
, endPos
- startPos
)) != -1 ? (++occurrences
, startPos
+= 1) : 0)
807 if (occurrences
!= 3)
808 errln((UnicodeString
)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences
);
811 if(test1
.lastIndexOf(test2
)!=29) {
812 errln("test1.lastIndexOf(test2)!=29");
815 if(test1
.lastIndexOf(test2
, 15)!=29 || test1
.lastIndexOf(test2
, 29)!=29 || test1
.lastIndexOf(test2
, 30)!=-1) {
816 errln("test1.lastIndexOf(test2, start) failed");
819 for ( occurrences
= 0, startPos
= 32;
821 (startPos
= test1
.lastIndexOf(test2
, 5, startPos
- 5)) != -1 ? ++occurrences
: 0)
823 if (occurrences
!= 4)
824 errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
825 "expected to find 4 occurrences, found ") + occurrences
);
827 for ( occurrences
= 0, startPos
= 32;
829 (startPos
= test1
.lastIndexOf(testChar
, 5, startPos
- 5)) != -1 ? ++occurrences
: 0)
831 if (occurrences
!= 11)
832 errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
833 "expected to find 11 occurrences, found ") + occurrences
);
836 startPos
=test3
.length();
837 for ( occurrences
= 0;
839 (startPos
= test3
.lastIndexOf(testChar32
, 5, startPos
- 5)) != -1 ? ++occurrences
: 0)
841 if (occurrences
!= 3)
842 errln((UnicodeString
)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences
);
845 for ( occurrences
= 0, endPos
= test3
.length(); endPos
> 0; endPos
-= 1){
847 subString
.append(test3
, 0, endPos
);
848 if(subString
.lastIndexOf(testChar32
) != -1 ){
852 if (occurrences
!= 18)
853 errln((UnicodeString
)"indexOf failed: expected to find 18 occurrences, found " + occurrences
);
856 // test that indexOf(UChar32) and lastIndexOf(UChar32)
857 // do not find surrogate code points when they are part of matched pairs
858 // (= part of supplementary code points)
860 if(test3
.indexOf((UChar32
)0xd841) != 4 || test3
.indexOf((UChar32
)0xdc02) != 3) {
861 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
863 if( UnicodeString(test3
, 0, 17).lastIndexOf((UChar
)0xd841, 0) != 4 ||
864 UnicodeString(test3
, 0, 17).lastIndexOf((UChar32
)0xd841, 2) != 4 ||
865 test3
.lastIndexOf((UChar32
)0xd841, 0, 17) != 4 || test3
.lastIndexOf((UChar32
)0xdc02, 0, 17) != 16
867 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
872 UnicodeStringTest::TestSpacePadding()
874 UnicodeString
test1("hello");
875 UnicodeString
test2(" there");
876 UnicodeString
test3("Hi! How ya doin'? Beautiful day, isn't it?");
879 UnicodeString expectedValue
;
881 returnVal
= test1
.padLeading(15);
882 expectedValue
= " hello";
883 if (returnVal
== FALSE
|| test1
!= expectedValue
)
884 errln("padLeading() failed: expected \"" + expectedValue
+ "\", got \"" + test1
+ "\".");
886 returnVal
= test2
.padTrailing(15);
887 expectedValue
= " there ";
888 if (returnVal
== FALSE
|| test2
!= expectedValue
)
889 errln("padTrailing() failed: expected \"" + expectedValue
+ "\", got \"" + test2
+ "\".");
891 expectedValue
= test3
;
892 returnVal
= test3
.padTrailing(15);
893 if (returnVal
== TRUE
|| test3
!= expectedValue
)
894 errln("padTrailing() failed: expected \"" + expectedValue
+ "\", got \"" + test3
+ "\".");
896 expectedValue
= "hello";
897 test4
.setTo(test1
).trim();
899 if (test4
!= expectedValue
|| test1
== expectedValue
|| test4
!= expectedValue
)
900 errln("trim(UnicodeString&) failed");
903 if (test1
!= expectedValue
)
904 errln("trim() failed: expected \"" + expectedValue
+ "\", got \"" + test1
+ "\".");
907 expectedValue
= "there";
908 if (test2
!= expectedValue
)
909 errln("trim() failed: expected \"" + expectedValue
+ "\", got \"" + test2
+ "\".");
912 expectedValue
= "Hi! How ya doin'? Beautiful day, isn't it?";
913 if (test3
!= expectedValue
)
914 errln("trim() failed: expected \"" + expectedValue
+ "\", got \"" + test3
+ "\".");
916 returnVal
= test1
.truncate(15);
917 expectedValue
= "hello";
918 if (returnVal
== TRUE
|| test1
!= expectedValue
)
919 errln("truncate() failed: expected \"" + expectedValue
+ "\", got \"" + test1
+ "\".");
921 returnVal
= test2
.truncate(15);
922 expectedValue
= "there";
923 if (returnVal
== TRUE
|| test2
!= expectedValue
)
924 errln("truncate() failed: expected \"" + expectedValue
+ "\", got \"" + test2
+ "\".");
926 returnVal
= test3
.truncate(15);
927 expectedValue
= "Hi! How ya doi";
928 if (returnVal
== FALSE
|| test3
!= expectedValue
)
929 errln("truncate() failed: expected \"" + expectedValue
+ "\", got \"" + test3
+ "\".");
933 UnicodeStringTest::TestPrefixAndSuffix()
935 UnicodeString
test1("Now is the time for all good men to come to the aid of their country.");
936 UnicodeString
test2("Now");
937 UnicodeString
test3("country.");
938 UnicodeString
test4("count");
940 if (!test1
.startsWith(test2
) || !test1
.startsWith(test2
, 0, test2
.length())) {
941 errln("startsWith() failed: \"" + test2
+ "\" should be a prefix of \"" + test1
+ "\".");
944 if (test1
.startsWith(test3
) ||
945 test1
.startsWith(test3
.getBuffer(), test3
.length()) ||
946 test1
.startsWith(test3
.getTerminatedBuffer(), 0, -1)
948 errln("startsWith() failed: \"" + test3
+ "\" shouldn't be a prefix of \"" + test1
+ "\".");
951 if (test1
.endsWith(test2
)) {
952 errln("endsWith() failed: \"" + test2
+ "\" shouldn't be a suffix of \"" + test1
+ "\".");
955 if (!test1
.endsWith(test3
)) {
956 errln("endsWith(test3) failed: \"" + test3
+ "\" should be a suffix of \"" + test1
+ "\".");
958 if (!test1
.endsWith(test3
, 0, INT32_MAX
)) {
959 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3
+ "\" should be a suffix of \"" + test1
+ "\".");
962 if(!test1
.endsWith(test3
.getBuffer(), test3
.length())) {
963 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3
+ "\" should be a suffix of \"" + test1
+ "\".");
965 if(!test1
.endsWith(test3
.getTerminatedBuffer(), 0, -1)) {
966 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3
+ "\" should be a suffix of \"" + test1
+ "\".");
969 if (!test3
.startsWith(test4
)) {
970 errln("endsWith(test4) failed: \"" + test4
+ "\" should be a prefix of \"" + test3
+ "\".");
973 if (test4
.startsWith(test3
)) {
974 errln("startsWith(test3) failed: \"" + test3
+ "\" shouldn't be a prefix of \"" + test4
+ "\".");
979 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
980 UnicodeString
test("abcde");
981 const UChar ab
[] = { 0x61, 0x62, 0 };
982 const UChar de
[] = { 0x64, 0x65, 0 };
983 assertTrue("abcde.startsWith(ab, -1)", test
.startsWith(ab
, -1));
984 assertTrue("abcde.startsWith(ab, 0, -1)", test
.startsWith(ab
, 0, -1));
985 assertTrue("abcde.endsWith(de, -1)", test
.endsWith(de
, -1));
986 assertTrue("abcde.endsWith(de, 0, -1)", test
.endsWith(de
, 0, -1));
990 UnicodeStringTest::TestFindAndReplace()
992 UnicodeString
test1("One potato, two potato, three potato, four\n");
993 UnicodeString
test2("potato");
994 UnicodeString
test3("MISSISSIPPI");
996 UnicodeString expectedValue
;
998 test1
.findAndReplace(test2
, test3
);
999 expectedValue
= "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1000 if (test1
!= expectedValue
)
1001 errln("findAndReplace failed: expected \"" + expectedValue
+ "\", got \"" + test1
+ "\".");
1002 test1
.findAndReplace(2, 32, test3
, test2
);
1003 expectedValue
= "One potato, two potato, three MISSISSIPPI, four\n";
1004 if (test1
!= expectedValue
)
1005 errln("findAndReplace failed: expected \"" + expectedValue
+ "\", got \"" + test1
+ "\".");
1009 UnicodeStringTest::TestReverse()
1011 UnicodeString
test("backwards words say to used I");
1016 test
.reverse(10, 3);
1017 test
.reverse(14, 5);
1018 test
.reverse(20, 9);
1020 if (test
!= "I used to say words backwards")
1021 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \""
1024 test
=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1026 if(test
.char32At(0)!=0x1ed0 || test
.char32At(1)!=0xc4 || test
.char32At(2)!=0x1d15f || test
.char32At(4)!=0x2f999) {
1027 errln("reverse() failed with supplementary characters");
1030 // Test case for ticket #8091:
1031 // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1032 // an odd-length string that contains no other lead surrogates.
1033 test
=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1034 UnicodeString expected
=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1036 if(test
!=expected
) {
1037 errln("reverse() failed with only lead surrogate in the middle");
1042 UnicodeStringTest::TestMiscellaneous()
1044 UnicodeString
test1("This is a test");
1045 UnicodeString
test2("This is a test");
1046 UnicodeString
test3("Me too!");
1048 // test getBuffer(minCapacity) and releaseBuffer()
1049 test1
=UnicodeString(); // make sure that it starts with its stackBuffer
1050 UChar
*p
=test1
.getBuffer(20);
1051 if(test1
.getCapacity()<20) {
1052 errln("UnicodeString::getBuffer(20).getCapacity()<20");
1055 test1
.append((UChar
)7); // must not be able to modify the string here
1056 test1
.setCharAt(3, 7);
1058 if( test1
.length()!=0 ||
1059 test1
.charAt(0)!=0xffff || test1
.charAt(3)!=0xffff ||
1060 test1
.getBuffer(10)!=0 || test1
.getBuffer()!=0
1062 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1068 test1
.releaseBuffer(3);
1069 test1
.append((UChar
)4);
1071 if(test1
.length()!=4 || test1
.charAt(0)!=1 || test1
.charAt(1)!=2 || test1
.charAt(2)!=3 || test1
.charAt(3)!=4) {
1072 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1075 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1076 test1
.releaseBuffer(1);
1077 if(test1
.length()!=4 || test1
.charAt(0)!=1 || test1
.charAt(1)!=2 || test1
.charAt(2)!=3 || test1
.charAt(3)!=4) {
1078 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1081 // test getBuffer(const)
1082 const UChar
*q
=test1
.getBuffer(), *r
=test1
.getBuffer();
1083 if( test1
.length()!=4 ||
1084 q
[0]!=1 || q
[1]!=2 || q
[2]!=3 || q
[3]!=4 ||
1085 r
[0]!=1 || r
[1]!=2 || r
[2]!=3 || r
[3]!=4
1087 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1090 // test releaseBuffer() with a NUL-terminated buffer
1091 test1
.getBuffer(20)[2]=0;
1092 test1
.releaseBuffer(); // implicit -1
1093 if(test1
.length()!=2 || test1
.charAt(0)!=1 || test1
.charAt(1) !=2) {
1094 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1097 // test releaseBuffer() with a non-NUL-terminated buffer
1098 p
=test1
.getBuffer(256);
1099 for(int32_t i
=0; i
<test1
.getCapacity(); ++i
) {
1100 p
[i
]=(UChar
)1; // fill the buffer with all non-NUL code units
1102 test1
.releaseBuffer(); // implicit -1
1103 if(test1
.length()!=test1
.getCapacity() || test1
.charAt(1)!=1 || test1
.charAt(100)!=1 || test1
.charAt(test1
.getCapacity()-1)!=1) {
1104 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1107 // test getTerminatedBuffer()
1108 test1
=UnicodeString("This is another test.", "");
1109 test2
=UnicodeString("This is another test.", "");
1110 q
=test1
.getTerminatedBuffer();
1111 if(q
[test1
.length()]!=0 || test1
!=test2
|| test2
.compare(q
, -1)!=0) {
1112 errln("getTerminatedBuffer()[length]!=0");
1115 const UChar u
[]={ 5, 6, 7, 8, 0 };
1116 test1
.setTo(FALSE
, u
, 3);
1117 q
=test1
.getTerminatedBuffer();
1118 if(q
==u
|| q
[0]!=5 || q
[1]!=6 || q
[2]!=7 || q
[3]!=0) {
1119 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1122 test1
.setTo(TRUE
, u
, -1);
1123 q
=test1
.getTerminatedBuffer();
1124 if(q
!=u
|| test1
.length()!=4 || q
[3]!=8 || q
[4]!=0) {
1125 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1128 test1
=UNICODE_STRING("la", 2);
1129 test1
.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
1130 if(test1
!=UNICODE_STRING("la lila", 7)) {
1131 errln("UnicodeString::append(const UChar *, start, length) failed");
1134 test1
.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX
);
1135 if(test1
!=UNICODE_STRING("la dudum lila", 13)) {
1136 errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
1139 static const UChar ucs
[]={ 0x68, 0x6d, 0x20, 0 };
1140 test1
.insert(9, ucs
, -1);
1141 if(test1
!=UNICODE_STRING("la dudum hm lila", 16)) {
1142 errln("UnicodeString::insert(start, const UChar *, length) failed");
1145 test1
.replace(9, 2, (UChar
)0x2b);
1146 if(test1
!=UNICODE_STRING("la dudum + lila", 15)) {
1147 errln("UnicodeString::replace(start, length, UChar) failed");
1150 if(test1
.hasMetaData() || UnicodeString().hasMetaData()) {
1151 errln("UnicodeString::hasMetaData() returns TRUE");
1154 // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1155 test1
=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1156 test1
.truncate(36); // ensure length()<getCapacity()
1157 test2
=test1
; // share the buffer
1159 if(test1
.length()!=5 || test1
.getTerminatedBuffer()[5]!=0) {
1160 errln("UnicodeString(shared buffer).truncate() failed");
1162 if(test2
.length()!=36 || test2
[5]!=0x66 || u_strlen(test2
.getTerminatedBuffer())!=36) {
1163 errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1164 "modified another copy of the string!");
1166 test1
=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1167 test1
.truncate(36); // ensure length()<getCapacity()
1168 test2
=test1
; // share the buffer
1170 if(test1
.length()!=0 || test1
.getTerminatedBuffer()[0]!=0) {
1171 errln("UnicodeString(shared buffer).remove() failed");
1173 if(test2
.length()!=36 || test2
[0]!=0x61 || u_strlen(test2
.getTerminatedBuffer())!=36) {
1174 errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1175 "modified another copy of the string!");
1180 UnicodeStringTest::TestStackAllocation()
1182 UChar testString
[] ={
1183 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1184 UChar guardWord
= 0x4DED;
1185 UnicodeString
* test
= 0;
1187 test
= new UnicodeString(testString
);
1188 if (*test
!= "This is a crazy test.")
1189 errln("Test string failed to initialize properly.");
1190 if (guardWord
!= 0x04DED)
1191 errln("Test string initialization overwrote guard word!");
1193 test
->insert(8, "only ");
1194 test
->remove(15, 6);
1195 if (*test
!= "This is only a test.")
1196 errln("Manipulation of test string failed to work right.");
1197 if (guardWord
!= 0x4DED)
1198 errln("Manipulation of test string overwrote guard word!");
1200 // we have to deinitialize and release the backing store by calling the destructor
1201 // explicitly, since we can't overload operator delete
1204 UChar workingBuffer
[] = {
1205 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1206 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1207 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1210 UChar guardWord2
= 0x4DED;
1212 test
= new UnicodeString(workingBuffer
, 35, 100);
1213 if (*test
!= "Now is the time for all men to come")
1214 errln("Stack-allocated backing store failed to initialize correctly.");
1215 if (guardWord2
!= 0x4DED)
1216 errln("Stack-allocated backing store overwrote guard word!");
1218 test
->insert(24, "good ");
1219 if (*test
!= "Now is the time for all good men to come")
1220 errln("insert() on stack-allocated UnicodeString didn't work right");
1221 if (guardWord2
!= 0x4DED)
1222 errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1224 if (workingBuffer
[24] != 0x67)
1225 errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1227 *test
+= " to the aid of their country.";
1228 if (*test
!= "Now is the time for all good men to come to the aid of their country.")
1229 errln("Stack-allocated UnicodeString overflow didn't work");
1230 if (guardWord2
!= 0x4DED)
1231 errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1235 errln("Assignment to stack-allocated UnicodeString didn't work");
1236 if (workingBuffer
[0] != 0x4e)
1237 errln("Change to UnicodeString after overflow are still affecting original buffer");
1238 if (guardWord2
!= 0x4DED)
1239 errln("Change to UnicodeString after overflow overwrote guard word!");
1241 // test read-only aliasing with setTo()
1242 workingBuffer
[0] = 0x20ac;
1243 workingBuffer
[1] = 0x125;
1244 workingBuffer
[2] = 0;
1245 test
->setTo(TRUE
, workingBuffer
, 2);
1246 if(test
->length() != 2 || test
->charAt(0) != 0x20ac || test
->charAt(1) != 0x125) {
1247 errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1250 UnicodeString
*c
=(UnicodeString
*)test
->clone();
1252 workingBuffer
[1] = 0x109;
1253 if(test
->charAt(1) != 0x109) {
1254 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1257 if(c
->length() != 2 || c
->charAt(1) != 0x125) {
1258 errln("clone(alias) did not copy the buffer");
1262 test
->setTo(TRUE
, workingBuffer
, -1);
1263 if(test
->length() != 2 || test
->charAt(0) != 0x20ac || test
->charAt(1) != 0x109) {
1264 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1267 test
->setTo(FALSE
, workingBuffer
, -1);
1268 if(!test
->isBogus()) {
1269 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1274 test
=new UnicodeString();
1275 UChar buffer
[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1276 test
->setTo(buffer
, 4, 10);
1277 if(test
->length() !=4 || test
->charAt(0) != 0x0061 || test
->charAt(1) != 0x0062 ||
1278 test
->charAt(2) != 0x20ac || test
->charAt(3) != 0x0043){
1279 errln((UnicodeString
)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test
));
1284 // test the UChar32 constructor
1285 UnicodeString
c32Test((UChar32
)0x10ff2a);
1286 if( c32Test
.length() != U16_LENGTH(0x10ff2a) ||
1287 c32Test
.char32At(c32Test
.length() - 1) != 0x10ff2a
1289 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1292 // test the (new) capacity constructor
1293 UnicodeString
capTest(5, (UChar32
)0x2a, 5);
1294 if( capTest
.length() != 5 * U16_LENGTH(0x2a) ||
1295 capTest
.char32At(0) != 0x2a ||
1296 capTest
.char32At(4) != 0x2a
1298 errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1301 capTest
= UnicodeString(5, (UChar32
)0x10ff2a, 5);
1302 if( capTest
.length() != 5 * U16_LENGTH(0x10ff2a) ||
1303 capTest
.char32At(0) != 0x10ff2a ||
1304 capTest
.char32At(4) != 0x10ff2a
1306 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1309 capTest
= UnicodeString(5, (UChar32
)0, 0);
1310 if(capTest
.length() != 0) {
1311 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1316 * Test the unescape() function.
1318 void UnicodeStringTest::TestUnescape(void) {
1319 UnicodeString
IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV
);
1320 UnicodeString
OUT("abc");
1321 OUT
.append((UChar
)0x4567);
1323 OUT
.append((UChar
)0xA);
1324 OUT
.append((UChar
)0xD);
1326 OUT
.append((UChar32
)0x00101234);
1328 OUT
.append((UChar32
)1).append((UChar32
)0x5289).append((UChar
)0x1b);
1329 UnicodeString result
= IN
.unescape();
1330 if (result
!= OUT
) {
1331 errln("FAIL: " + prettify(IN
) + ".unescape() -> " +
1332 prettify(result
) + ", expected " +
1336 // test that an empty string is returned in case of an error
1337 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1338 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1342 /* test code point counting functions --------------------------------------- */
1344 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1346 _refUnicodeStringHasMoreChar32Than(const UnicodeString
&s
, int32_t start
, int32_t length
, int32_t number
) {
1347 int32_t count
=s
.countChar32(start
, length
);
1348 return count
>number
;
1351 /* compare the real function against the reference */
1353 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString
&s
, int32_t start
, int32_t length
, int32_t number
) {
1354 if(s
.hasMoreChar32Than(start
, length
, number
)!=_refUnicodeStringHasMoreChar32Than(s
, start
, length
, number
)) {
1355 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1356 start
, length
, number
, s
.hasMoreChar32Than(start
, length
, number
));
1361 UnicodeStringTest::TestCountChar32(void) {
1363 UnicodeString s
=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1365 // test countChar32()
1366 // note that this also calls and tests u_countChar32(length>=0)
1368 s
.countChar32()!=4 ||
1369 s
.countChar32(1)!=4 ||
1370 s
.countChar32(2)!=3 ||
1371 s
.countChar32(2, 3)!=2 ||
1372 s
.countChar32(2, 0)!=0
1374 errln("UnicodeString::countChar32() failed");
1377 // NUL-terminate the string buffer and test u_countChar32(length=-1)
1378 const UChar
*buffer
=s
.getTerminatedBuffer();
1380 u_countChar32(buffer
, -1)!=4 ||
1381 u_countChar32(buffer
+1, -1)!=4 ||
1382 u_countChar32(buffer
+2, -1)!=3 ||
1383 u_countChar32(buffer
+3, -1)!=3 ||
1384 u_countChar32(buffer
+4, -1)!=2 ||
1385 u_countChar32(buffer
+5, -1)!=1 ||
1386 u_countChar32(buffer
+6, -1)!=0
1388 errln("u_countChar32(length=-1) failed");
1391 // test u_countChar32() with bad input
1392 if(u_countChar32(NULL
, 5)!=0 || u_countChar32(buffer
, -2)!=0) {
1393 errln("u_countChar32(bad input) failed (returned non-zero counts)");
1397 /* test data and variables for hasMoreChar32Than() */
1398 static const UChar str
[]={
1399 0x61, 0x62, 0xd800, 0xdc00,
1400 0xd801, 0xdc01, 0x63, 0xd802,
1401 0x64, 0xdc03, 0x65, 0x66,
1402 0xd804, 0xdc04, 0xd805, 0xdc05,
1405 UnicodeString
string(str
, LENGTHOF(str
));
1406 int32_t start
, length
, number
;
1408 /* test hasMoreChar32Than() */
1409 for(length
=string
.length(); length
>=0; --length
) {
1410 for(start
=0; start
<=length
; ++start
) {
1411 for(number
=-1; number
<=((length
-start
)+2); ++number
) {
1412 _testUnicodeStringHasMoreChar32Than(string
, start
, length
-start
, number
);
1417 /* test hasMoreChar32Than() with pinning */
1418 for(start
=-1; start
<=string
.length()+1; ++start
) {
1419 for(number
=-1; number
<=((string
.length()-start
)+2); ++number
) {
1420 _testUnicodeStringHasMoreChar32Than(string
, start
, 0x7fffffff, number
);
1424 /* test hasMoreChar32Than() with a bogus string */
1425 string
.setToBogus();
1426 for(length
=-1; length
<=1; ++length
) {
1427 for(start
=-1; start
<=length
; ++start
) {
1428 for(number
=-1; number
<=((length
-start
)+2); ++number
) {
1429 _testUnicodeStringHasMoreChar32Than(string
, start
, length
-start
, number
);
1436 UnicodeStringTest::TestBogus() {
1437 UnicodeString
test1("This is a test");
1438 UnicodeString
test2("This is a test");
1439 UnicodeString
test3("Me too!");
1441 // test isBogus() and setToBogus()
1442 if (test1
.isBogus() || test2
.isBogus() || test3
.isBogus()) {
1443 errln("A string returned TRUE for isBogus()!");
1446 // NULL pointers are treated like empty strings
1447 // use other illegal arguments to make a bogus string
1448 test3
.setTo(FALSE
, test1
.getBuffer(), -2);
1449 if(!test3
.isBogus()) {
1450 errln("A bogus string returned FALSE for isBogus()!");
1452 if (test1
.hashCode() != test2
.hashCode() || test1
.hashCode() == test3
.hashCode()) {
1453 errln("hashCode() failed");
1455 if(test3
.getBuffer()!=0 || test3
.getBuffer(20)!=0 || test3
.getTerminatedBuffer()!=0) {
1456 errln("bogus.getBuffer()!=0");
1458 if (test1
.indexOf(test3
) != -1) {
1459 errln("bogus.indexOf() != -1");
1461 if (test1
.lastIndexOf(test3
) != -1) {
1462 errln("bogus.lastIndexOf() != -1");
1464 if (test1
.caseCompare(test3
, U_FOLD_CASE_DEFAULT
) != 1 || test3
.caseCompare(test1
, U_FOLD_CASE_DEFAULT
) != -1) {
1465 errln("caseCompare() doesn't work with bogus strings");
1467 if (test1
.compareCodePointOrder(test3
) != 1 || test3
.compareCodePointOrder(test1
) != -1) {
1468 errln("compareCodePointOrder() doesn't work with bogus strings");
1471 // verify that non-assignment modifications fail and do not revive a bogus string
1473 test3
.append((UChar
)0x61);
1474 if(!test3
.isBogus() || test3
.getBuffer()!=0) {
1475 errln("bogus.append('a') worked but must not");
1479 test3
.findAndReplace(UnicodeString((UChar
)0x61), test2
);
1480 if(!test3
.isBogus() || test3
.getBuffer()!=0) {
1481 errln("bogus.findAndReplace() worked but must not");
1486 if(!test3
.isBogus() || test3
.getBuffer()!=0) {
1487 errln("bogus.trim() revived bogus but must not");
1492 if(!test3
.isBogus() || test3
.getBuffer()!=0) {
1493 errln("bogus.remove(1) revived bogus but must not");
1497 if(!test3
.setCharAt(0, 0x62).isBogus() || !test3
.isEmpty()) {
1498 errln("bogus.setCharAt(0, 'b') worked but must not");
1502 if(test3
.truncate(1) || !test3
.isBogus() || !test3
.isEmpty()) {
1503 errln("bogus.truncate(1) revived bogus but must not");
1506 // verify that assignments revive a bogus string
1508 if(!test3
.isBogus() || (test3
=test1
).isBogus() || test3
!=test1
) {
1509 errln("bogus.operator=() failed");
1513 if(!test3
.isBogus() || test3
.fastCopyFrom(test1
).isBogus() || test3
!=test1
) {
1514 errln("bogus.fastCopyFrom() failed");
1518 if(!test3
.isBogus() || test3
.setTo(test1
).isBogus() || test3
!=test1
) {
1519 errln("bogus.setTo(UniStr) failed");
1523 if(!test3
.isBogus() || test3
.setTo(test1
, 0).isBogus() || test3
!=test1
) {
1524 errln("bogus.setTo(UniStr, 0) failed");
1528 if(!test3
.isBogus() || test3
.setTo(test1
, 0, 0x7fffffff).isBogus() || test3
!=test1
) {
1529 errln("bogus.setTo(UniStr, 0, len) failed");
1533 if(!test3
.isBogus() || test3
.setTo(test1
.getBuffer(), test1
.length()).isBogus() || test3
!=test1
) {
1534 errln("bogus.setTo(const UChar *, len) failed");
1538 if(!test3
.isBogus() || test3
.setTo((UChar
)0x2028).isBogus() || test3
!=UnicodeString((UChar
)0x2028)) {
1539 errln("bogus.setTo(UChar) failed");
1543 if(!test3
.isBogus() || test3
.setTo((UChar32
)0x1d157).isBogus() || test3
!=UnicodeString((UChar32
)0x1d157)) {
1544 errln("bogus.setTo(UChar32) failed");
1548 if(!test3
.isBogus() || test3
.setTo(FALSE
, test1
.getBuffer(), test1
.length()).isBogus() || test3
!=test1
) {
1549 errln("bogus.setTo(readonly alias) failed");
1552 // writable alias to another string's buffer: very bad idea, just convenient for this test
1554 if(!test3
.isBogus() || test3
.setTo((UChar
*)test1
.getBuffer(), test1
.length(), test1
.getCapacity()).isBogus() || test3
!=test1
) {
1555 errln("bogus.setTo(writable alias) failed");
1558 // verify simple, documented ways to turn a bogus string into an empty one
1560 if(!test3
.isBogus() || (test3
=UnicodeString()).isBogus() || !test3
.isEmpty()) {
1561 errln("bogus.operator=(UnicodeString()) failed");
1565 if(!test3
.isBogus() || test3
.setTo(UnicodeString()).isBogus() || !test3
.isEmpty()) {
1566 errln("bogus.setTo(UnicodeString()) failed");
1570 if(test3
.remove().isBogus() || test3
.getBuffer()==0 || !test3
.isEmpty()) {
1571 errln("bogus.remove() failed");
1575 if(test3
.remove(0, INT32_MAX
).isBogus() || test3
.getBuffer()==0 || !test3
.isEmpty()) {
1576 errln("bogus.remove(0, INT32_MAX) failed");
1580 if(test3
.truncate(0) || test3
.isBogus() || !test3
.isEmpty()) {
1581 errln("bogus.truncate(0) failed");
1585 if(!test3
.isBogus() || test3
.setTo((UChar32
)-1).isBogus() || !test3
.isEmpty()) {
1586 errln("bogus.setTo((UChar32)-1) failed");
1589 static const UChar nul
=0;
1592 if(!test3
.isBogus() || test3
.setTo(&nul
, 0).isBogus() || !test3
.isEmpty()) {
1593 errln("bogus.setTo(&nul, 0) failed");
1597 if(!test3
.isBogus() || test3
.getBuffer()!=0) {
1598 errln("setToBogus() failed to make a string bogus");
1602 if(test1
.isBogus() || !(test1
=test3
).isBogus()) {
1603 errln("normal=bogus failed to make the left string bogus");
1606 // test that NULL primitive input string values are treated like
1607 // empty strings, not errors (bogus)
1608 test2
.setTo((UChar32
)0x10005);
1609 if(test2
.insert(1, NULL
, 1).length()!=2) {
1610 errln("UniStr.insert(...NULL...) should not modify the string but does");
1613 UErrorCode errorCode
=U_ZERO_ERROR
;
1615 test4((const UChar
*)NULL
),
1616 test5(TRUE
, (const UChar
*)NULL
, 1),
1617 test6((UChar
*)NULL
, 5, 5),
1618 test7((const char *)NULL
, 3, NULL
, errorCode
);
1619 if(test4
.isBogus() || test5
.isBogus() || test6
.isBogus() || test7
.isBogus()) {
1620 errln("a constructor set to bogus for a NULL input string, should be empty");
1623 test4
.setTo(NULL
, 3);
1624 test5
.setTo(TRUE
, (const UChar
*)NULL
, 1);
1625 test6
.setTo((UChar
*)NULL
, 5, 5);
1626 if(test4
.isBogus() || test5
.isBogus() || test6
.isBogus()) {
1627 errln("a setTo() set to bogus for a NULL input string, should be empty");
1630 // test that bogus==bogus<any
1631 if(test1
!=test3
|| test1
.compare(test3
)!=0) {
1632 errln("bogus==bogus failed");
1636 if(test1
>=test2
|| !(test2
>test1
) || test1
.compare(test2
)>=0 || !(test2
.compare(test1
)>0)) {
1637 errln("bogus<empty failed");
1641 // StringEnumeration ------------------------------------------------------- ***
1642 // most of StringEnumeration is tested elsewhere
1643 // this test improves code coverage
1645 static const char *const
1650 "this is a long string which helps us test some buffer limits",
1651 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1654 class TestEnumeration
: public StringEnumeration
{
1656 TestEnumeration() : i(0) {}
1658 virtual int32_t count(UErrorCode
& /*status*/) const {
1659 return LENGTHOF(testEnumStrings
);
1662 virtual const UnicodeString
*snext(UErrorCode
&status
) {
1663 if(U_SUCCESS(status
) && i
<LENGTHOF(testEnumStrings
)) {
1664 unistr
=UnicodeString(testEnumStrings
[i
++], "");
1671 virtual void reset(UErrorCode
& /*status*/) {
1675 static inline UClassID
getStaticClassID() {
1676 return (UClassID
)&fgClassID
;
1678 virtual UClassID
getDynamicClassID() const {
1679 return getStaticClassID();
1683 static const char fgClassID
;
1688 const char TestEnumeration::fgClassID
=0;
1691 UnicodeStringTest::TestStringEnumeration() {
1693 TestEnumeration ten
;
1700 // test the next() default implementation and ensureCharsCapacity()
1701 for(i
=0; i
<LENGTHOF(testEnumStrings
); ++i
) {
1702 status
=U_ZERO_ERROR
;
1703 pc
=ten
.next(&length
, status
);
1704 s
=UnicodeString(testEnumStrings
[i
], "");
1705 if(U_FAILURE(status
) || pc
==NULL
|| length
!=s
.length() || UnicodeString(pc
, length
, "")!=s
) {
1706 errln("StringEnumeration.next(%d) failed", i
);
1709 status
=U_ZERO_ERROR
;
1710 if(ten
.next(&length
, status
)!=NULL
) {
1711 errln("StringEnumeration.next(done)!=NULL");
1714 // test the unext() default implementation
1716 for(i
=0; i
<LENGTHOF(testEnumStrings
); ++i
) {
1717 status
=U_ZERO_ERROR
;
1718 pu
=ten
.unext(&length
, status
);
1719 s
=UnicodeString(testEnumStrings
[i
], "");
1720 if(U_FAILURE(status
) || pu
==NULL
|| length
!=s
.length() || UnicodeString(TRUE
, pu
, length
)!=s
) {
1721 errln("StringEnumeration.unext(%d) failed", i
);
1724 status
=U_ZERO_ERROR
;
1725 if(ten
.unext(&length
, status
)!=NULL
) {
1726 errln("StringEnumeration.unext(done)!=NULL");
1729 // test that the default clone() implementation works, and returns NULL
1730 if(ten
.clone()!=NULL
) {
1731 errln("StringEnumeration.clone()!=NULL");
1734 // test that uenum_openFromStringEnumeration() works
1735 // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1736 StringEnumeration
*newTen
= new TestEnumeration
;
1737 status
=U_ZERO_ERROR
;
1738 UEnumeration
*uten
= uenum_openFromStringEnumeration(newTen
, &status
);
1739 if (uten
==NULL
|| U_FAILURE(status
)) {
1740 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__
, __LINE__
, u_errorName(status
));
1744 // test uenum_next()
1745 for(i
=0; i
<LENGTHOF(testEnumStrings
); ++i
) {
1746 status
=U_ZERO_ERROR
;
1747 pc
=uenum_next(uten
, &length
, &status
);
1748 if(U_FAILURE(status
) || pc
==NULL
|| strcmp(pc
, testEnumStrings
[i
]) != 0) {
1749 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__
, __LINE__
, i
);
1752 status
=U_ZERO_ERROR
;
1753 if(uenum_next(uten
, &length
, &status
)!=NULL
) {
1754 errln("File %s, line %d, uenum_next(done)!=NULL");
1757 // test the uenum_unext()
1758 uenum_reset(uten
, &status
);
1759 for(i
=0; i
<LENGTHOF(testEnumStrings
); ++i
) {
1760 status
=U_ZERO_ERROR
;
1761 pu
=uenum_unext(uten
, &length
, &status
);
1762 s
=UnicodeString(testEnumStrings
[i
], "");
1763 if(U_FAILURE(status
) || pu
==NULL
|| length
!=s
.length() || UnicodeString(TRUE
, pu
, length
)!=s
) {
1764 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__
, __LINE__
, i
);
1767 status
=U_ZERO_ERROR
;
1768 if(uenum_unext(uten
, &length
, &status
)!=NULL
) {
1769 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__
, __LINE__
);
1776 * Namespace test, to make sure that macros like UNICODE_STRING include the
1777 * namespace qualifier.
1779 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1782 class UnicodeString
{
1784 enum EInvariant
{ kInvariant
};
1785 UnicodeString() : i(1) {}
1786 UnicodeString(UBool
/*isTerminated*/, const UChar
* /*text*/, int32_t textLength
) : i(textLength
) {}
1787 UnicodeString(const char * /*src*/, int32_t length
, enum EInvariant
/*inv*/
1795 UnicodeStringTest::TestNameSpace() {
1796 // Provoke name collision unless the UnicodeString macros properly
1797 // qualify the icu::UnicodeString class.
1798 using namespace bogus
;
1800 // Use all UnicodeString macros from unistr.h.
1801 icu::UnicodeString s1
=icu::UnicodeString("abc", 3, US_INV
);
1802 icu::UnicodeString s2
=UNICODE_STRING("def", 3);
1803 icu::UnicodeString s3
=UNICODE_STRING_SIMPLE("ghi");
1805 // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1806 icu::UnicodeString s4
=s1
+s2
+s3
;
1807 if(s4
.length()!=9) {
1808 errln("Something wrong with UnicodeString::operator+().");
1813 UnicodeStringTest::TestUTF32() {
1814 // Input string length US_STACKBUF_SIZE to cause overflow of the
1815 // initially chosen fStackBuffer due to supplementary characters.
1816 static const UChar32 utf32
[] = {
1817 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1818 0x10000, 0x20000, 0xe0000, 0x10ffff
1820 static const UChar expected_utf16
[] = {
1821 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1822 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1824 UnicodeString from32
= UnicodeString::fromUTF32(utf32
, LENGTHOF(utf32
));
1825 UnicodeString
expected(FALSE
, expected_utf16
, LENGTHOF(expected_utf16
));
1826 if(from32
!= expected
) {
1827 errln("UnicodeString::fromUTF32() did not create the expected string.");
1830 static const UChar utf16
[] = {
1831 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1833 static const UChar32 expected_utf32
[] = {
1834 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1836 UChar32 result32
[16];
1837 UErrorCode errorCode
= U_ZERO_ERROR
;
1839 UnicodeString(FALSE
, utf16
, LENGTHOF(utf16
)).
1840 toUTF32(result32
, LENGTHOF(result32
), errorCode
);
1841 if( length32
!= LENGTHOF(expected_utf32
) ||
1842 0 != uprv_memcmp(result32
, expected_utf32
, length32
*4) ||
1843 result32
[length32
] != 0
1845 errln("UnicodeString::toUTF32() did not create the expected string.");
1849 class TestCheckedArrayByteSink
: public CheckedArrayByteSink
{
1851 TestCheckedArrayByteSink(char* outbuf
, int32_t capacity
)
1852 : CheckedArrayByteSink(outbuf
, capacity
), calledFlush(FALSE
) {}
1853 virtual void Flush() { calledFlush
= TRUE
; }
1858 UnicodeStringTest::TestUTF8() {
1859 static const uint8_t utf8
[] = {
1865 // 0x10000, 0x20000,
1866 // 0xe0000, 0x10ffff
1867 0x41, 0xed, 0xa4, 0x80,
1868 0x61, 0xed, 0xb0, 0x80,
1869 0xf4, 0x90, 0x80, 0x80, 0x5a,
1870 0xf1, 0x90, 0x80, 0x80, 0x7a,
1871 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1872 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1874 static const UChar expected_utf16
[] = {
1878 0xd900, 0xdc00, 0x7a,
1879 0xd800, 0xdc00, 0xd840, 0xdc00,
1880 0xdb40, 0xdc00, 0xdbff, 0xdfff
1882 UnicodeString from8
= UnicodeString::fromUTF8(StringPiece((const char *)utf8
, (int32_t)sizeof(utf8
)));
1883 UnicodeString
expected(FALSE
, expected_utf16
, LENGTHOF(expected_utf16
));
1885 if(from8
!= expected
) {
1886 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1888 #if U_HAVE_STD_STRING
1889 std::string
utf8_string((const char *)utf8
, sizeof(utf8
));
1890 UnicodeString from8b
= UnicodeString::fromUTF8(utf8_string
);
1891 if(from8b
!= expected
) {
1892 errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1896 static const UChar utf16
[] = {
1897 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1899 static const uint8_t expected_utf8
[] = {
1900 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1901 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1903 UnicodeString
us(FALSE
, utf16
, LENGTHOF(utf16
));
1906 TestCheckedArrayByteSink
sink(buffer
, (int32_t)sizeof(buffer
));
1908 if( sink
.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8
) ||
1909 0 != uprv_memcmp(buffer
, expected_utf8
, sizeof(expected_utf8
))
1911 errln("UnicodeString::toUTF8() did not create the expected string.");
1913 if(!sink
.calledFlush
) {
1914 errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1916 #if U_HAVE_STD_STRING
1917 // Initial contents for testing that toUTF8String() appends.
1918 std::string result8
= "-->";
1919 std::string expected8
= "-->" + std::string((const char *)expected_utf8
, sizeof(expected_utf8
));
1920 // Use the return value just for testing.
1921 std::string
&result8r
= us
.toUTF8String(result8
);
1922 if(result8r
!= expected8
|| &result8r
!= &result8
) {
1923 errln("UnicodeString::toUTF8String() did not create the expected string.");
1928 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
1929 static UnicodeString
wrapUChars(const UChar
*uchars
) {
1930 return UnicodeString(TRUE
, uchars
, -1);
1934 UnicodeStringTest::TestReadOnlyAlias() {
1935 UChar uchars
[]={ 0x61, 0x62, 0 };
1936 UnicodeString
alias(TRUE
, uchars
, 2);
1937 if(alias
.length()!=2 || alias
.getBuffer()!=uchars
|| alias
.getTerminatedBuffer()!=uchars
) {
1938 errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1942 if(alias
.length()!=1 || alias
.getBuffer()!=uchars
) {
1943 errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1945 if(alias
.getTerminatedBuffer()==uchars
) {
1946 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1947 "did not allocate and copy as expected.");
1949 if(uchars
[1]!=0x62) {
1950 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1951 "modified the original buffer.");
1953 if(1!=u_strlen(alias
.getTerminatedBuffer())) {
1954 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1955 "does not return a buffer terminated at the proper length.");
1958 alias
.setTo(TRUE
, uchars
, 2);
1959 if(alias
.length()!=2 || alias
.getBuffer()!=uchars
|| alias
.getTerminatedBuffer()!=uchars
) {
1960 errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1964 if(alias
.length()!=0) {
1965 errln("UnicodeString(read-only-alias).remove() did not work.");
1967 if(alias
.getTerminatedBuffer()==uchars
) {
1968 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1969 "did not un-alias as expected.");
1971 if(uchars
[0]!=0x61) {
1972 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1973 "modified the original buffer.");
1975 if(0!=u_strlen(alias
.getTerminatedBuffer())) {
1976 errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1977 "does not return a buffer terminated at length 0.");
1980 UnicodeString longString
=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1981 alias
.setTo(FALSE
, longString
.getBuffer(), longString
.length());
1982 alias
.remove(0, 10);
1983 if(longString
.compare(10, INT32_MAX
, alias
)!=0 || alias
.getBuffer()!=longString
.getBuffer()+10) {
1984 errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1986 alias
.setTo(FALSE
, longString
.getBuffer(), longString
.length());
1987 alias
.remove(27, 99);
1988 if(longString
.compare(0, 27, alias
)!=0 || alias
.getBuffer()!=longString
.getBuffer()) {
1989 errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1991 alias
.setTo(FALSE
, longString
.getBuffer(), longString
.length());
1992 alias
.retainBetween(6, 30);
1993 if(longString
.compare(6, 24, alias
)!=0 || alias
.getBuffer()!=longString
.getBuffer()+6) {
1994 errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
1997 UChar abc
[]={ 0x61, 0x62, 0x63, 0 };
1998 UBool hasRVO
= wrapUChars(abc
).getBuffer()==abc
;
2001 temp
.fastCopyFrom(longString
.tempSubString());
2002 if(temp
!=longString
|| (hasRVO
&& temp
.getBuffer()!=longString
.getBuffer())) {
2003 errln("UnicodeString.tempSubString() failed");
2005 temp
.fastCopyFrom(longString
.tempSubString(-3, 5));
2006 if(longString
.compare(0, 5, temp
)!=0 || (hasRVO
&& temp
.getBuffer()!=longString
.getBuffer())) {
2007 errln("UnicodeString.tempSubString(-3, 5) failed");
2009 temp
.fastCopyFrom(longString
.tempSubString(17));
2010 if(longString
.compare(17, INT32_MAX
, temp
)!=0 || (hasRVO
&& temp
.getBuffer()!=longString
.getBuffer()+17)) {
2011 errln("UnicodeString.tempSubString(17) failed");
2013 temp
.fastCopyFrom(longString
.tempSubString(99));
2014 if(!temp
.isEmpty()) {
2015 errln("UnicodeString.tempSubString(99) failed");
2017 temp
.fastCopyFrom(longString
.tempSubStringBetween(6));
2018 if(longString
.compare(6, INT32_MAX
, temp
)!=0 || (hasRVO
&& temp
.getBuffer()!=longString
.getBuffer()+6)) {
2019 errln("UnicodeString.tempSubStringBetween(6) failed");
2021 temp
.fastCopyFrom(longString
.tempSubStringBetween(8, 18));
2022 if(longString
.compare(8, 10, temp
)!=0 || (hasRVO
&& temp
.getBuffer()!=longString
.getBuffer()+8)) {
2023 errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2025 UnicodeString bogusString
;
2026 bogusString
.setToBogus();
2027 temp
.fastCopyFrom(bogusString
.tempSubStringBetween(8, 18));
2028 if(!temp
.isBogus()) {
2029 errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2034 UnicodeStringTest::doTestAppendable(UnicodeString
&dest
, Appendable
&app
) {
2035 static const UChar cde
[3]={ 0x63, 0x64, 0x65 };
2036 static const UChar fg
[3]={ 0x66, 0x67, 0 };
2037 if(!app
.reserveAppendCapacity(12)) {
2038 errln("Appendable.reserve(12) failed");
2040 app
.appendCodeUnit(0x61);
2041 app
.appendCodePoint(0x62);
2042 app
.appendCodePoint(0x50000);
2043 app
.appendString(cde
, 3);
2044 app
.appendString(fg
, -1);
2046 int32_t capacity
=-1;
2047 UChar
*buffer
=app
.getAppendBuffer(3, 3, scratch
, 3, &capacity
);
2049 errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity
);
2052 static const UChar hij
[3]={ 0x68, 0x69, 0x6a };
2053 u_memcpy(buffer
, hij
, 3);
2054 app
.appendString(buffer
, 3);
2055 if(dest
!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2056 errln("Appendable.append(...) failed");
2058 buffer
=app
.getAppendBuffer(0, 3, scratch
, 3, &capacity
);
2059 if(buffer
!=NULL
|| capacity
!=0) {
2060 errln("Appendable.getAppendBuffer(min=0) failed");
2063 buffer
=app
.getAppendBuffer(3, 3, scratch
, 2, &capacity
);
2064 if(buffer
!=NULL
|| capacity
!=0) {
2065 errln("Appendable.getAppendBuffer(scratch<min) failed");
2069 class SimpleAppendable
: public Appendable
{
2071 explicit SimpleAppendable(UnicodeString
&dest
) : str(dest
) {}
2072 virtual UBool
appendCodeUnit(UChar c
) { str
.append(c
); return TRUE
; }
2073 SimpleAppendable
&reset() { str
.remove(); return *this; }
2079 UnicodeStringTest::TestAppendable() {
2081 SimpleAppendable
app(dest
);
2082 doTestAppendable(dest
, app
);
2086 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2088 UnicodeStringAppendable
app(dest
);
2089 doTestAppendable(dest
, app
);
2093 UnicodeStringTest::TestSizeofUnicodeString() {
2094 // See the comments in unistr.h near the declaration of UnicodeString's fields.
2095 size_t sizeofUniStr
=sizeof(UnicodeString
);
2097 switch(sizeof(void *)) {
2105 logln("This platform has neither 32-bit nor 64-bit pointers.");
2108 if(expected
!=sizeofUniStr
) {
2109 errln("sizeof(UnicodeString)=%d, expected %d", (int)sizeofUniStr
, (int)expected
);