1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
5 * Copyright (c) 1997-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /* file name: strtest.cpp
10 * tab size: 8 (not used)
13 * created on: 1999nov22
14 * created by: Markus W. Scherer
19 #include "unicode/utypes.h"
20 #include "unicode/putil.h"
21 #include "unicode/std_string.h"
22 #include "unicode/stringpiece.h"
23 #include "unicode/unistr.h"
24 #include "unicode/ustring.h"
25 #include "unicode/utf_old.h" // for UTF8_COUNT_TRAIL_BYTES
26 #include "unicode/utf8.h"
32 StringTest::~StringTest() {}
34 void StringTest::TestEndian(void) {
40 if(U_IS_BIG_ENDIAN
!=u
.byte
) {
41 errln("TestEndian: U_IS_BIG_ENDIAN needs to be fixed in platform.h");
45 void StringTest::TestSizeofTypes(void) {
46 if(U_SIZEOF_WCHAR_T
!=sizeof(wchar_t)) {
47 errln("TestSizeofWCharT: U_SIZEOF_WCHAR_T!=sizeof(wchar_t) - U_SIZEOF_WCHAR_T needs to be fixed in platform.h");
49 #ifdef U_INT64_T_UNAVAILABLE
50 errln("int64_t and uint64_t are undefined.");
52 if(8!=sizeof(int64_t)) {
53 errln("TestSizeofTypes: 8!=sizeof(int64_t) - int64_t needs to be fixed in platform.h");
55 if(8!=sizeof(uint64_t)) {
56 errln("TestSizeofTypes: 8!=sizeof(uint64_t) - uint64_t needs to be fixed in platform.h");
59 if(8!=sizeof(double)) {
60 errln("8!=sizeof(double) - putil.c code may not work");
62 if(4!=sizeof(int32_t)) {
63 errln("4!=sizeof(int32_t)");
65 if(4!=sizeof(uint32_t)) {
66 errln("4!=sizeof(uint32_t)");
68 if(2!=sizeof(int16_t)) {
69 errln("2!=sizeof(int16_t)");
71 if(2!=sizeof(uint16_t)) {
72 errln("2!=sizeof(uint16_t)");
74 if(2!=sizeof(UChar
)) {
75 errln("2!=sizeof(UChar)");
77 if(1!=sizeof(int8_t)) {
78 errln("1!=sizeof(int8_t)");
80 if(1!=sizeof(uint8_t)) {
81 errln("1!=sizeof(uint8_t)");
83 if(1!=sizeof(UBool
)) {
84 errln("1!=sizeof(UBool)");
88 void StringTest::TestCharsetFamily(void) {
90 if( (U_CHARSET_FAMILY
==U_ASCII_FAMILY
&& c
!=0x41) ||
91 (U_CHARSET_FAMILY
==U_EBCDIC_FAMILY
&& c
!=0xc1)
93 errln("TestCharsetFamily: U_CHARSET_FAMILY needs to be fixed in platform.h");
97 U_STRING_DECL(ustringVar
, "aZ0 -", 5);
100 StringTest::Test_U_STRING() {
101 U_STRING_INIT(ustringVar
, "aZ0 -", 5);
102 if( u_strlen(ustringVar
)!=5 ||
103 ustringVar
[0]!=0x61 ||
104 ustringVar
[1]!=0x5a ||
105 ustringVar
[2]!=0x30 ||
106 ustringVar
[3]!=0x20 ||
107 ustringVar
[4]!=0x2d ||
110 errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
111 "See putil.h and utypes.h with platform.h.");
116 StringTest::Test_UNICODE_STRING() {
117 UnicodeString ustringVar
=UNICODE_STRING("aZ0 -", 5);
118 if( ustringVar
.length()!=5 ||
119 ustringVar
[0]!=0x61 ||
120 ustringVar
[1]!=0x5a ||
121 ustringVar
[2]!=0x30 ||
122 ustringVar
[3]!=0x20 ||
125 errln("Test_UNICODE_STRING: UNICODE_STRING does not work right! "
126 "See unistr.h and utypes.h with platform.h.");
131 StringTest::Test_UNICODE_STRING_SIMPLE() {
132 UnicodeString ustringVar
=UNICODE_STRING_SIMPLE("aZ0 -");
133 if( ustringVar
.length()!=5 ||
134 ustringVar
[0]!=0x61 ||
135 ustringVar
[1]!=0x5a ||
136 ustringVar
[2]!=0x30 ||
137 ustringVar
[3]!=0x20 ||
140 errln("Test_UNICODE_STRING_SIMPLE: UNICODE_STRING_SIMPLE does not work right! "
141 "See unistr.h and utypes.h with platform.h.");
146 StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
147 #if !U_HIDE_OBSOLETE_UTF_OLD_H
148 if(UTF8_COUNT_TRAIL_BYTES(0x7F) != 0
149 || UTF8_COUNT_TRAIL_BYTES(0xC2) != 1
150 || UTF8_COUNT_TRAIL_BYTES(0xE0) != 2
151 || UTF8_COUNT_TRAIL_BYTES(0xF0) != 3) {
152 errln("UTF8_COUNT_TRAIL_BYTES does not work right! See utf_old.h.");
155 // Note: U8_COUNT_TRAIL_BYTES (current) and UTF8_COUNT_TRAIL_BYTES (deprecated)
156 // have completely different implementations.
157 if (U8_COUNT_TRAIL_BYTES(0x7F) != 0
158 || U8_COUNT_TRAIL_BYTES(0xC2) != 1
159 || U8_COUNT_TRAIL_BYTES(0xE0) != 2
160 || U8_COUNT_TRAIL_BYTES(0xF0) != 3) {
161 errln("U8_COUNT_TRAIL_BYTES does not work right! See utf8.h.");
165 void StringTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) {
167 logln("TestSuite Character and String Test: ");
170 TESTCASE_AUTO(TestEndian
);
171 TESTCASE_AUTO(TestSizeofTypes
);
172 TESTCASE_AUTO(TestCharsetFamily
);
173 TESTCASE_AUTO(Test_U_STRING
);
174 TESTCASE_AUTO(Test_UNICODE_STRING
);
175 TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE
);
176 TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES
);
177 TESTCASE_AUTO(TestSTLCompatibility
);
178 TESTCASE_AUTO(TestStringPiece
);
179 TESTCASE_AUTO(TestStringPieceComparisons
);
180 TESTCASE_AUTO(TestByteSink
);
181 TESTCASE_AUTO(TestCheckedArrayByteSink
);
182 TESTCASE_AUTO(TestStringByteSink
);
183 TESTCASE_AUTO(TestCharString
);
184 TESTCASE_AUTO(TestCStr
);
185 TESTCASE_AUTO(Testctou
);
190 StringTest::TestStringPiece() {
191 // Default constructor.
193 if(!empty
.empty() || empty
.data()!=NULL
|| empty
.length()!=0 || empty
.size()!=0) {
194 errln("StringPiece() failed");
196 // Construct from NULL const char * pointer.
197 StringPiece
null(NULL
);
198 if(!null
.empty() || null
.data()!=NULL
|| null
.length()!=0 || null
.size()!=0) {
199 errln("StringPiece(NULL) failed");
201 // Construct from const char * pointer.
202 static const char *abc_chars
="abc";
203 StringPiece
abc(abc_chars
);
204 if(abc
.empty() || abc
.data()!=abc_chars
|| abc
.length()!=3 || abc
.size()!=3) {
205 errln("StringPiece(abc_chars) failed");
207 // Construct from const char * pointer and length.
208 static const char *abcdefg_chars
="abcdefg";
209 StringPiece
abcd(abcdefg_chars
, 4);
210 if(abcd
.empty() || abcd
.data()!=abcdefg_chars
|| abcd
.length()!=4 || abcd
.size()!=4) {
211 errln("StringPiece(abcdefg_chars, 4) failed");
213 // Construct from std::string.
214 std::string
uvwxyz_string("uvwxyz");
215 StringPiece
uvwxyz(uvwxyz_string
);
216 if(uvwxyz
.empty() || uvwxyz
.data()!=uvwxyz_string
.data() || uvwxyz
.length()!=6 || uvwxyz
.size()!=6) {
217 errln("StringPiece(uvwxyz_string) failed");
219 // Substring constructor with pos.
220 StringPiece
sp(abcd
, -1);
221 if(sp
.empty() || sp
.data()!=abcdefg_chars
|| sp
.length()!=4 || sp
.size()!=4) {
222 errln("StringPiece(abcd, -1) failed");
224 sp
=StringPiece(abcd
, 5);
225 if(!sp
.empty() || sp
.length()!=0 || sp
.size()!=0) {
226 errln("StringPiece(abcd, 5) failed");
228 sp
=StringPiece(abcd
, 2);
229 if(sp
.empty() || sp
.data()!=abcdefg_chars
+2 || sp
.length()!=2 || sp
.size()!=2) {
230 errln("StringPiece(abcd, -1) failed");
232 // Substring constructor with pos and len.
233 sp
=StringPiece(abcd
, -1, 8);
234 if(sp
.empty() || sp
.data()!=abcdefg_chars
|| sp
.length()!=4 || sp
.size()!=4) {
235 errln("StringPiece(abcd, -1, 8) failed");
237 sp
=StringPiece(abcd
, 5, 8);
238 if(!sp
.empty() || sp
.length()!=0 || sp
.size()!=0) {
239 errln("StringPiece(abcd, 5, 8) failed");
241 sp
=StringPiece(abcd
, 2, 8);
242 if(sp
.empty() || sp
.data()!=abcdefg_chars
+2 || sp
.length()!=2 || sp
.size()!=2) {
243 errln("StringPiece(abcd, -1) failed");
245 sp
=StringPiece(abcd
, 2, -1);
246 if(!sp
.empty() || sp
.length()!=0 || sp
.size()!=0) {
247 errln("StringPiece(abcd, 5, -1) failed");
250 const int32_t *ptr_npos
=&StringPiece::npos
;
251 if(StringPiece::npos
!=0x7fffffff || *ptr_npos
!=0x7fffffff) {
252 errln("StringPiece::npos!=0x7fffffff");
254 // substr() method with pos, using len=npos.
256 if(sp
.empty() || sp
.data()!=abcdefg_chars
|| sp
.length()!=4 || sp
.size()!=4) {
257 errln("abcd.substr(-1) failed");
260 if(!sp
.empty() || sp
.length()!=0 || sp
.size()!=0) {
261 errln("abcd.substr(5) failed");
264 if(sp
.empty() || sp
.data()!=abcdefg_chars
+2 || sp
.length()!=2 || sp
.size()!=2) {
265 errln("abcd.substr(-1) failed");
267 // substr() method with pos and len.
268 sp
=abcd
.substr(-1, 8);
269 if(sp
.empty() || sp
.data()!=abcdefg_chars
|| sp
.length()!=4 || sp
.size()!=4) {
270 errln("abcd.substr(-1, 8) failed");
272 sp
=abcd
.substr(5, 8);
273 if(!sp
.empty() || sp
.length()!=0 || sp
.size()!=0) {
274 errln("abcd.substr(5, 8) failed");
276 sp
=abcd
.substr(2, 8);
277 if(sp
.empty() || sp
.data()!=abcdefg_chars
+2 || sp
.length()!=2 || sp
.size()!=2) {
278 errln("abcd.substr(-1) failed");
280 sp
=abcd
.substr(2, -1);
281 if(!sp
.empty() || sp
.length()!=0 || sp
.size()!=0) {
282 errln("abcd.substr(5, -1) failed");
287 if(!sp
.empty() || sp
.data()!=NULL
|| sp
.length()!=0 || sp
.size()!=0) {
288 errln("abcd.clear() failed");
292 sp
.remove_prefix(-1);
293 if(sp
.empty() || sp
.data()!=abcdefg_chars
|| sp
.length()!=4 || sp
.size()!=4) {
294 errln("abcd.remove_prefix(-1) failed");
298 if(sp
.empty() || sp
.data()!=abcdefg_chars
+2 || sp
.length()!=2 || sp
.size()!=2) {
299 errln("abcd.remove_prefix(2) failed");
303 if(!sp
.empty() || sp
.length()!=0 || sp
.size()!=0) {
304 errln("abcd.remove_prefix(5) failed");
308 sp
.remove_suffix(-1);
309 if(sp
.empty() || sp
.data()!=abcdefg_chars
|| sp
.length()!=4 || sp
.size()!=4) {
310 errln("abcd.remove_suffix(-1) failed");
314 if(sp
.empty() || sp
.data()!=abcdefg_chars
|| sp
.length()!=2 || sp
.size()!=2) {
315 errln("abcd.remove_suffix(2) failed");
319 if(!sp
.empty() || sp
.length()!=0 || sp
.size()!=0) {
320 errln("abcd.remove_suffix(5) failed");
325 StringTest::TestStringPieceComparisons() {
327 StringPiece
null(NULL
);
328 StringPiece
abc("abc");
329 StringPiece
abcd("abcdefg", 4);
330 StringPiece
abx("abx");
332 errln("empty!=null");
340 abcd
.remove_suffix(1);
342 errln("abc!=abcd.remove_suffix(1)");
349 // Verify that ByteSink is subclassable and Flush() overridable.
350 class SimpleByteSink
: public ByteSink
{
352 SimpleByteSink(char *outbuf
) : fOutbuf(outbuf
), fLength(0) {}
353 virtual void Append(const char *bytes
, int32_t n
) {
354 if(fOutbuf
!= bytes
) {
355 memcpy(fOutbuf
, bytes
, n
);
360 virtual void Flush() { Append("z", 1); }
361 int32_t length() { return fLength
; }
367 // Test the ByteSink base class.
369 StringTest::TestByteSink() {
372 SimpleByteSink
sink(buffer
);
373 sink
.Append("abc", 3);
375 if(!(sink
.length() == 4 && 0 == memcmp("abcz", buffer
, 4) && buffer
[4] == '!')) {
376 errln("ByteSink (SimpleByteSink) did not Append() or Flush() as expected");
380 int32_t capacity
= -1;
381 char *dest
= sink
.GetAppendBuffer(0, 50, scratch
, (int32_t)sizeof(scratch
), &capacity
);
382 if(dest
!= NULL
|| capacity
!= 0) {
383 errln("ByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
386 dest
= sink
.GetAppendBuffer(10, 50, scratch
, 9, &capacity
);
387 if(dest
!= NULL
|| capacity
!= 0) {
388 errln("ByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
391 dest
= sink
.GetAppendBuffer(5, 50, scratch
, (int32_t)sizeof(scratch
), &capacity
);
392 if(dest
!= scratch
|| capacity
!= (int32_t)sizeof(scratch
)) {
393 errln("ByteSink.GetAppendBuffer() did not properly return the scratch buffer");
398 StringTest::TestCheckedArrayByteSink() {
399 char buffer
[20]; // < 26 for the test code to work
401 CheckedArrayByteSink
sink(buffer
, (int32_t)sizeof(buffer
));
402 sink
.Append("abc", 3);
403 if(!(sink
.NumberOfBytesAppended() == 3 && sink
.NumberOfBytesWritten() == 3 &&
404 0 == memcmp("abc", buffer
, 3) && buffer
[3] == '!') &&
407 errln("CheckedArrayByteSink did not Append() as expected");
411 int32_t capacity
= -1;
412 char *dest
= sink
.GetAppendBuffer(0, 50, scratch
, (int32_t)sizeof(scratch
), &capacity
);
413 if(dest
!= NULL
|| capacity
!= 0) {
414 errln("CheckedArrayByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
417 dest
= sink
.GetAppendBuffer(10, 50, scratch
, 9, &capacity
);
418 if(dest
!= NULL
|| capacity
!= 0) {
419 errln("CheckedArrayByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
422 dest
= sink
.GetAppendBuffer(10, 50, scratch
, (int32_t)sizeof(scratch
), &capacity
);
423 if(dest
!= buffer
+ 3 || capacity
!= (int32_t)sizeof(buffer
) - 3) {
424 errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return its own buffer");
427 memcpy(dest
, "defghijklm", 10);
428 sink
.Append(dest
, 10);
429 if(!(sink
.NumberOfBytesAppended() == 13 && sink
.NumberOfBytesWritten() == 13 &&
430 0 == memcmp("abcdefghijklm", buffer
, 13) &&
433 errln("CheckedArrayByteSink did not Append(its own buffer) as expected");
436 dest
= sink
.GetAppendBuffer(10, 50, scratch
, (int32_t)sizeof(scratch
), &capacity
);
437 if(dest
!= scratch
|| capacity
!= (int32_t)sizeof(scratch
)) {
438 errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return the scratch buffer");
440 memcpy(dest
, "nopqrstuvw", 10);
441 sink
.Append(dest
, 10);
442 if(!(sink
.NumberOfBytesAppended() == 23 &&
443 sink
.NumberOfBytesWritten() == (int32_t)sizeof(buffer
) &&
444 0 == memcmp("abcdefghijklmnopqrstuvwxyz", buffer
, (int32_t)sizeof(buffer
)) &&
447 errln("CheckedArrayByteSink did not Append(scratch buffer) as expected");
450 sink
.Reset().Append("123", 3);
451 if(!(sink
.NumberOfBytesAppended() == 3 && sink
.NumberOfBytesWritten() == 3 &&
452 0 == memcmp("123defghijklmnopqrstuvwxyz", buffer
, (int32_t)sizeof(buffer
)) &&
455 errln("CheckedArrayByteSink did not Reset().Append() as expected");
461 StringTest::TestStringByteSink() {
462 // Not much to test because only the constructors and Append()
463 // are implemented, and trivially so.
464 std::string
result("abc"); // std::string
465 StringByteSink
<std::string
> sink(&result
);
466 sink
.Append("def", 3);
467 if(result
!= "abcdef") {
468 errln("StringByteSink did not Append() as expected");
470 StringByteSink
<std::string
> sink2(&result
, 20);
471 if(result
.capacity() < (result
.length() + 20)) {
472 errln("StringByteSink should have 20 append capacity, has only %d",
473 (int)(result
.capacity() - result
.length()));
475 sink
.Append("ghi", 3);
476 if(result
!= "abcdefghi") {
477 errln("StringByteSink did not Append() as expected");
481 #if defined(_MSC_VER)
486 StringTest::TestSTLCompatibility() {
487 #if defined(_MSC_VER)
488 /* Just make sure that it compiles with STL's placement new usage. */
489 std::vector
<UnicodeString
> myvect
;
490 myvect
.push_back(UnicodeString("blah"));
495 StringTest::TestCharString() {
496 IcuTestErrorCode
errorCode(*this, "TestCharString()");
498 static const char longStr
[] =
499 "This is a long string that is meant to cause reallocation of the internal buffer of CharString.";
500 CharString
chStr(longStr
, errorCode
);
501 if (0 != strcmp(longStr
, chStr
.data()) || (int32_t)strlen(longStr
) != chStr
.length()) {
502 errln("CharString(longStr) failed.");
504 CharString
test("Test", errorCode
);
505 CharString
copy(test
,errorCode
);
506 copy
.copyFrom(chStr
, errorCode
);
507 if (0 != strcmp(longStr
, copy
.data()) || (int32_t)strlen(longStr
) != copy
.length()) {
508 errln("CharString.copyFrom() failed.");
510 StringPiece
sp(chStr
.toStringPiece());
512 chStr
.append(sp
, errorCode
).append(chStr
, errorCode
);
513 strcpy(expected
, longStr
);
514 strcat(expected
, longStr
+4);
515 strcat(expected
, longStr
);
516 strcat(expected
, longStr
+4);
517 if (0 != strcmp(expected
, chStr
.data()) || (int32_t)strlen(expected
) != chStr
.length()) {
518 errln("CharString(longStr).append(substring of self).append(self) failed.");
520 chStr
.clear().append("abc", errorCode
).append("defghij", 3, errorCode
);
521 if (0 != strcmp("abcdef", chStr
.data()) || 6 != chStr
.length()) {
522 errln("CharString.clear().append(abc).append(defghij, 3) failed.");
524 chStr
.appendInvariantChars(UNICODE_STRING_SIMPLE(
525 "This is a long string that is meant to cause reallocation of the internal buffer of CharString."),
527 strcpy(expected
, "abcdef");
528 strcat(expected
, longStr
);
529 if (0 != strcmp(expected
, chStr
.data()) || (int32_t)strlen(expected
) != chStr
.length()) {
530 errln("CharString.appendInvariantChars(longStr) failed.");
532 int32_t appendCapacity
= 0;
533 char *buffer
= chStr
.getAppendBuffer(5, 10, appendCapacity
, errorCode
);
534 if (errorCode
.isFailure()) {
537 memcpy(buffer
, "*****", 5);
538 chStr
.append(buffer
, 5, errorCode
);
539 chStr
.truncate(chStr
.length()-3);
540 strcat(expected
, "**");
541 if (0 != strcmp(expected
, chStr
.data()) || (int32_t)strlen(expected
) != chStr
.length()) {
542 errln("CharString.getAppendBuffer().append(**) failed.");
545 UErrorCode ec
= U_ZERO_ERROR
;
547 chStr
.appendInvariantChars(UnicodeString("The '@' character is not invariant."), ec
);
548 if (ec
!= U_INVARIANT_CONVERSION_ERROR
) {
549 errln("%s:%d expected U_INVARIANT_CONVERSION_ERROR, got %s", __FILE__
, __LINE__
, u_errorName(ec
));
551 if (chStr
.length() != 0) {
552 errln("%s:%d expected length() = 0, got %d", __FILE__
, __LINE__
, chStr
.length());
556 CharString
s1("Short string", errorCode
);
557 CharString
s2(std::move(s1
));
558 assertEquals("s2 should have content of s1", "Short string", s2
.data());
559 CharString
s3("Dummy", errorCode
);
561 assertEquals("s3 should have content of s2", "Short string", s3
.data());
565 CharString
s1("Long string over 40 characters to trigger heap allocation", errorCode
);
566 CharString
s2(std::move(s1
));
567 assertEquals("s2 should have content of s1",
568 "Long string over 40 characters to trigger heap allocation",
570 CharString
s3("Dummy string with over 40 characters to trigger heap allocation", errorCode
);
572 assertEquals("s3 should have content of s2",
573 "Long string over 40 characters to trigger heap allocation",
579 StringTest::TestCStr() {
580 const char *cs
= "This is a test string.";
581 UnicodeString
us(cs
);
582 if (0 != strcmp(CStr(us
)(), cs
)) {
583 errln("%s:%d CStr(s)() failed. Expected \"%s\", got \"%s\"", __FILE__
, __LINE__
, cs
, CStr(us
)());
588 StringTest::Testctou() {
589 const char *cs
= "Fa\\u0127mu";
590 UnicodeString u
= ctou(cs
);
591 assertEquals("Testing unescape@0", (int32_t)0x0046, u
.charAt(0));
592 assertEquals("Testing unescape@2", (int32_t)295, u
.charAt(2));