2 ******************************************************************************
4 * Copyright (C) 2002-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
8 * file name: custrtst.c
10 * tab size: 8 (not used)
13 * created on: 2002oct09
14 * created by: Markus W. Scherer
16 * Tests of ustring.h Unicode string API functions.
19 #include "unicode/ustring.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/uiter.h"
26 /* get the sign of an integer */
27 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
29 /* test setup --------------------------------------------------------------- */
31 static void setUpDataTable(void);
32 static void TestStringCopy(void);
33 static void TestStringFunctions(void);
34 static void TestStringSearching(void);
35 static void TestSurrogateSearching(void);
36 static void TestUnescape(void);
37 static void TestCountChar32(void);
38 static void TestUCharIterator(void);
40 void addUStringTest(TestNode
** root
);
42 void addUStringTest(TestNode
** root
)
44 addTest(root
, &TestStringCopy
, "tsutil/custrtst/TestStringCopy");
45 addTest(root
, &TestStringFunctions
, "tsutil/custrtst/TestStringFunctions");
46 addTest(root
, &TestStringSearching
, "tsutil/custrtst/TestStringSearching");
47 addTest(root
, &TestSurrogateSearching
, "tsutil/custrtst/TestSurrogateSearching");
48 addTest(root
, &TestUnescape
, "tsutil/custrtst/TestUnescape");
49 addTest(root
, &TestCountChar32
, "tsutil/custrtst/TestCountChar32");
50 addTest(root
, &TestUCharIterator
, "tsutil/custrtst/TestUCharIterator");
53 /* test data for TestStringFunctions ---------------------------------------- */
55 UChar
*** dataTable
= NULL
;
57 static const char* raw
[3][4] = {
60 { "English_", "French_", "Croatian_", "English_"},
62 { "United States", "France", "Croatia", "Unites States"},
64 /* Concatenated string */
65 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
68 static void setUpDataTable()
71 if(dataTable
== NULL
) {
72 dataTable
= (UChar
***)calloc(sizeof(UChar
**),3);
74 for (i
= 0; i
< 3; i
++) {
75 dataTable
[i
] = (UChar
**)calloc(sizeof(UChar
*),4);
76 for (j
= 0; j
< 4; j
++){
77 dataTable
[i
][j
] = (UChar
*) malloc(sizeof(UChar
)*(strlen(raw
[i
][j
])+1));
78 u_uastrcpy(dataTable
[i
][j
],raw
[i
][j
]);
84 static void cleanUpDataTable()
87 if(dataTable
!= NULL
) {
89 for(j
= 0; j
<4; j
++) {
90 free(dataTable
[i
][j
]);
99 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
100 static void TestStringFunctions()
110 log_verbose("Testing u_strlen()\n");
111 if( u_strlen(dataTable
[0][0])!= u_strlen(dataTable
[0][3]) || u_strlen(dataTable
[0][0]) == u_strlen(dataTable
[0][2]))
112 log_err("There is an error in u_strlen()");
114 log_verbose("Testing u_memcpy() and u_memcmp()\n");
120 log_verbose("Testing %s\n", u_austrcpy(tempOut
, dataTable
[i
][j
]));
122 temp
[7] = 0xA4; /* Mark the end */
123 u_memcpy(temp
,dataTable
[i
][j
], 7);
126 log_err("an error occured in u_memcpy()\n");
127 if(u_memcmp(temp
, dataTable
[i
][j
], 7)!=0)
128 log_err("an error occured in u_memcpy() or u_memcmp()\n");
131 if(u_memcmp(dataTable
[0][0], dataTable
[1][1], 7)==0)
132 log_err("an error occured in u_memcmp()\n");
134 log_verbose("Testing u_memset()\n");
137 u_memset(nullTemp
, 0xa4, 7);
138 for (i
= 0; i
< 7; i
++) {
139 if(nullTemp
[i
] != 0xa4) {
140 log_err("an error occured in u_memset()\n");
143 if(nullTemp
[7] != 0) {
144 log_err("u_memset() went too far\n");
147 u_memset(nullTemp
, 0, 7);
150 u_memcpy(temp
,nullTemp
, 7);
151 if(u_memcmp(temp
, nullTemp
, 7)!=0 || temp
[7]!=0)
152 log_err("an error occured in u_memcpy() or u_memcmp()\n");
155 log_verbose("Testing u_memmove()\n");
156 for (i
= 0; i
< 7; i
++) {
159 u_memmove(temp
+ 1, temp
, 7);
161 log_err("an error occured in u_memmove()\n");
163 for (i
= 1; i
<= 7; i
++) {
164 if(temp
[i
] != (i
- 1)) {
165 log_err("an error occured in u_memmove()\n");
169 log_verbose("Testing u_strcpy() and u_strcmp()\n");
175 log_verbose("Testing %s\n", u_austrcpy(tempOut
, dataTable
[i
][j
]));
177 u_strcpy(temp
,dataTable
[i
][j
]);
179 if(u_strcmp(temp
,dataTable
[i
][j
])!=0)
180 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
183 if(u_strcmp(dataTable
[0][0], dataTable
[1][1])==0)
184 log_err("an error occured in u_memcmp()\n");
186 log_verbose("testing u_strcat()\n");
190 u_uastrcpy(temp
, "");
191 u_strcpy(temp
,dataTable
[i
][j
]);
192 u_strcat(temp
,dataTable
[i
+1][j
]);
193 if(u_strcmp(temp
,dataTable
[i
+2][j
])!=0)
194 log_err("something threw an error in u_strcat()\n");
197 log_verbose("Testing u_strncmp()\n");
198 for(i
=0,j
=0;j
<4; ++j
)
200 k
=u_strlen(dataTable
[i
][j
]);
201 if(u_strncmp(dataTable
[i
][j
],dataTable
[i
+2][j
],k
)!=0)
202 log_err("Something threw an error in u_strncmp\n");
204 if(u_strncmp(dataTable
[0][0], dataTable
[1][1], 7)==0)
205 log_err("an error occured in u_memcmp()\n");
208 log_verbose("Testing u_strncat\n");
209 for(i
=0,j
=0;j
<4; ++j
)
211 k
=u_strlen(dataTable
[i
][j
]);
215 if(u_strcmp(u_strncat(temp
,dataTable
[i
+2][j
],k
),dataTable
[i
][j
])!=0)
216 log_err("something threw an error in u_strncat or u_uastrcpy()\n");
220 log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
221 for(i
=2,j
=0;j
<4; ++j
)
223 k
=u_strlen(dataTable
[i
][j
]);
224 u_strncpy(temp
, dataTable
[i
][j
],k
);
227 if(u_strncmp(temp
, dataTable
[i
][j
],k
)!=0)
228 log_err("something threw an error in u_strncpy()\n");
231 log_err("something threw an error in u_strncpy()\n");
233 u_memset(temp
, 0x3F, UPRV_LENGTHOF(temp
) - 1);
234 u_uastrncpy(temp
, raw
[i
][j
], k
-1);
235 if(u_strncmp(temp
, dataTable
[i
][j
],k
-1)!=0)
236 log_err("something threw an error in u_uastrncpy(k-1)\n");
238 if(temp
[k
-1] != 0x3F)
239 log_err("something threw an error in u_uastrncpy(k-1)\n");
241 u_memset(temp
, 0x3F, UPRV_LENGTHOF(temp
) - 1);
242 u_uastrncpy(temp
, raw
[i
][j
], k
+1);
243 if(u_strcmp(temp
, dataTable
[i
][j
])!=0)
244 log_err("something threw an error in u_uastrncpy(k+1)\n");
247 log_err("something threw an error in u_uastrncpy(k+1)\n");
249 u_memset(temp
, 0x3F, UPRV_LENGTHOF(temp
) - 1);
250 u_uastrncpy(temp
, raw
[i
][j
], k
);
251 if(u_strncmp(temp
, dataTable
[i
][j
], k
)!=0)
252 log_err("something threw an error in u_uastrncpy(k)\n");
255 log_err("something threw an error in u_uastrncpy(k)\n");
258 log_verbose("Testing u_strchr() and u_memchr()\n");
262 UChar saveVal
= dataTable
[i
][j
][0];
263 UChar
*findPtr
= u_strchr(dataTable
[i
][j
], 0x005F);
264 int32_t dataSize
= (int32_t)(u_strlen(dataTable
[i
][j
]) + 1);
266 log_verbose("%s ", u_austrcpy(tempOut
, findPtr
));
268 if (findPtr
== NULL
|| *findPtr
!= 0x005F) {
269 log_err("u_strchr can't find '_' in the string\n");
272 findPtr
= u_strchr32(dataTable
[i
][j
], 0x005F);
273 if (findPtr
== NULL
|| *findPtr
!= 0x005F) {
274 log_err("u_strchr32 can't find '_' in the string\n");
277 findPtr
= u_strchr(dataTable
[i
][j
], 0);
278 if (findPtr
!= (&(dataTable
[i
][j
][dataSize
- 1]))) {
279 log_err("u_strchr can't find NULL in the string\n");
282 findPtr
= u_strchr32(dataTable
[i
][j
], 0);
283 if (findPtr
!= (&(dataTable
[i
][j
][dataSize
- 1]))) {
284 log_err("u_strchr32 can't find NULL in the string\n");
287 findPtr
= u_memchr(dataTable
[i
][j
], 0, dataSize
);
288 if (findPtr
!= (&(dataTable
[i
][j
][dataSize
- 1]))) {
289 log_err("u_memchr can't find NULL in the string\n");
292 findPtr
= u_memchr32(dataTable
[i
][j
], 0, dataSize
);
293 if (findPtr
!= (&(dataTable
[i
][j
][dataSize
- 1]))) {
294 log_err("u_memchr32 can't find NULL in the string\n");
297 dataTable
[i
][j
][0] = 0;
298 /* Make sure we skip over the NULL termination */
299 findPtr
= u_memchr(dataTable
[i
][j
], 0x005F, dataSize
);
300 if (findPtr
== NULL
|| *findPtr
!= 0x005F) {
301 log_err("u_memchr can't find '_' in the string\n");
304 findPtr
= u_memchr32(dataTable
[i
][j
], 0x005F, dataSize
);
305 if (findPtr
== NULL
|| *findPtr
!= 0x005F) {
306 log_err("u_memchr32 can't find '_' in the string\n");
308 findPtr
= u_memchr32(dataTable
[i
][j
], 0xFFFD, dataSize
);
309 if (findPtr
!= NULL
) {
310 log_err("Should have found NULL when the character is not there.\n");
312 dataTable
[i
][j
][0] = saveVal
; /* Put it back for the other tests */
316 * test that u_strchr32()
317 * does not find surrogate code points when they are part of matched pairs
318 * (= part of supplementary code points)
322 static const UChar s
[]={
323 /* 0 1 2 3 4 5 6 7 8 9 */
324 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
327 if(u_strchr32(s
, 0xd841)!=(s
+3) || u_strchr32(s
, 0xdc02)!=(s
+5)) {
328 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
330 if(u_memchr32(s
, 0xd841, 9)!=(s
+3) || u_memchr32(s
, 0xdc02, 9)!=(s
+5)) {
331 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
335 log_verbose("Testing u_austrcpy()");
336 u_austrcpy(test
,dataTable
[0][0]);
337 if(strcmp(test
,raw
[0][0])!=0)
338 log_err("There is an error in u_austrcpy()");
341 log_verbose("Testing u_strtok_r()");
343 const char tokString
[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
344 const char *tokens
[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
345 UChar delimBuf
[sizeof(test
)];
346 UChar currTokenBuf
[sizeof(tokString
)];
348 uint32_t currToken
= 0;
351 u_uastrcpy(temp
, tokString
);
352 u_uastrcpy(delimBuf
, " ");
354 ptr
= u_strtok_r(temp
, delimBuf
, &state
);
355 u_uastrcpy(delimBuf
, " ,");
356 while (ptr
!= NULL
) {
357 u_uastrcpy(currTokenBuf
, tokens
[currToken
]);
358 if (u_strcmp(ptr
, currTokenBuf
) != 0) {
359 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken
, ptr
, tokens
[currToken
]);
361 ptr
= u_strtok_r(NULL
, delimBuf
, &state
);
365 if (currToken
!= UPRV_LENGTHOF(tokens
)) {
366 log_err("Didn't get correct number of tokens\n");
368 state
= delimBuf
; /* Give it an "invalid" saveState */
369 u_uastrcpy(currTokenBuf
, "");
370 if (u_strtok_r(currTokenBuf
, delimBuf
, &state
) != NULL
) {
371 log_err("Didn't get NULL for empty string\n");
374 log_err("State should be NULL for empty string\n");
376 state
= delimBuf
; /* Give it an "invalid" saveState */
377 u_uastrcpy(currTokenBuf
, ", ,");
378 if (u_strtok_r(currTokenBuf
, delimBuf
, &state
) != NULL
) {
379 log_err("Didn't get NULL for a string of delimiters\n");
382 log_err("State should be NULL for a string of delimiters\n");
385 state
= delimBuf
; /* Give it an "invalid" saveState */
386 u_uastrcpy(currTokenBuf
, "q, ,");
387 if (u_strtok_r(currTokenBuf
, delimBuf
, &state
) == NULL
) {
388 log_err("Got NULL for a string that does not begin with delimiters\n");
390 if (u_strtok_r(NULL
, delimBuf
, &state
) != NULL
) {
391 log_err("Didn't get NULL for a string that ends in delimiters\n");
394 log_err("State should be NULL for empty string\n");
397 state
= delimBuf
; /* Give it an "invalid" saveState */
398 u_uastrcpy(currTokenBuf
, tokString
);
399 u_uastrcpy(temp
, tokString
);
400 u_uastrcpy(delimBuf
, "q"); /* Give it a delimiter that it can't find. */
401 ptr
= u_strtok_r(currTokenBuf
, delimBuf
, &state
);
402 if (ptr
== NULL
|| u_strcmp(ptr
, temp
) != 0) {
403 log_err("Should have recieved the same string when there are no delimiters\n");
405 if (u_strtok_r(NULL
, delimBuf
, &state
) != NULL
) {
406 log_err("Should not have found another token in a one token string\n");
410 /* test u_strcmpCodePointOrder() */
412 /* these strings are in ascending order */
413 static const UChar strings
[][4]={
414 { 0x61, 0 }, /* U+0061 */
415 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
416 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
417 { 0xd800, 0 }, /* U+d800 */
418 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
419 { 0xdfff, 0 }, /* U+dfff */
420 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
421 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
422 { 0xd800, 0xdc02, 0 }, /* U+10002 */
423 { 0xd84d, 0xdc56, 0 } /* U+23456 */
426 UCharIterator iter1
, iter2
;
427 int32_t len1
, len2
, r1
, r2
;
429 for(i
=0; i
<(UPRV_LENGTHOF(strings
)-1); ++i
) {
430 if(u_strcmpCodePointOrder(strings
[i
], strings
[i
+1])>=0) {
431 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i
);
433 if(u_strncmpCodePointOrder(strings
[i
], strings
[i
+1], 10)>=0) {
434 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i
);
437 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
438 if(u_strncmpCodePointOrder(strings
[i
], strings
[i
+1], 2)!=u_memcmpCodePointOrder(strings
[i
], strings
[i
+1], 2)) {
439 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i
);
442 /* test u_strCompare(TRUE) */
443 len1
=u_strlen(strings
[i
]);
444 len2
=u_strlen(strings
[i
+1]);
445 if( u_strCompare(strings
[i
], -1, strings
[i
+1], -1, TRUE
)>=0 ||
446 u_strCompare(strings
[i
], -1, strings
[i
+1], len2
, TRUE
)>=0 ||
447 u_strCompare(strings
[i
], len1
, strings
[i
+1], -1, TRUE
)>=0 ||
448 u_strCompare(strings
[i
], len1
, strings
[i
+1], len2
, TRUE
)>=0
450 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i
);
453 /* test u_strCompare(FALSE) */
454 r1
=u_strCompare(strings
[i
], -1, strings
[i
+1], -1, FALSE
);
455 r2
=u_strcmp(strings
[i
], strings
[i
+1]);
456 if(_SIGN(r1
)!=_SIGN(r2
)) {
457 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i
);
460 /* test u_strCompareIter() */
461 uiter_setString(&iter1
, strings
[i
], len1
);
462 uiter_setString(&iter2
, strings
[i
+1], len2
);
463 if(u_strCompareIter(&iter1
, &iter2
, TRUE
)>=0) {
464 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i
);
466 r1
=u_strCompareIter(&iter1
, &iter2
, FALSE
);
467 if(_SIGN(r1
)!=_SIGN(u_strcmp(strings
[i
], strings
[i
+1]))) {
468 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i
);
476 static void TestStringSearching()
478 const UChar testString
[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
479 const UChar testSurrogateString
[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
480 const UChar surrMatchSet1
[] = {0xdbff, 0xdfff, 0};
481 const UChar surrMatchSet2
[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
482 const UChar surrMatchSet3
[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
483 const UChar surrMatchSet4
[] = {0x0000};
484 const UChar surrMatchSetBad
[] = {0xdbff, 0x0061, 0};
485 const UChar surrMatchSetBad2
[] = {0x0061, 0xdbff, 0};
486 const UChar surrMatchSetBad3
[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */
490 ab
[] = { 0x61, 0x62, 0 },
491 ba
[] = { 0x62, 0x61, 0 },
492 abcd
[] = { 0x61, 0x62, 0x63, 0x64, 0 },
493 cd
[] = { 0x63, 0x64, 0 },
494 dc
[] = { 0x64, 0x63, 0 },
495 cdh
[] = { 0x63, 0x64, 0x68, 0 },
497 fg
[] = { 0x66, 0x67, 0 },
498 gf
[] = { 0x67, 0x66, 0 };
500 log_verbose("Testing u_strpbrk()");
502 if (u_strpbrk(testString
, a
) != &testString
[0]) {
503 log_err("u_strpbrk couldn't find first letter a.\n");
505 if (u_strpbrk(testString
, dc
) != &testString
[2]) {
506 log_err("u_strpbrk couldn't find d or c.\n");
508 if (u_strpbrk(testString
, cd
) != &testString
[2]) {
509 log_err("u_strpbrk couldn't find c or d.\n");
511 if (u_strpbrk(testString
, cdh
) != &testString
[2]) {
512 log_err("u_strpbrk couldn't find c, d or h.\n");
514 if (u_strpbrk(testString
, f
) != NULL
) {
515 log_err("u_strpbrk didn't return NULL for \"f\".\n");
517 if (u_strpbrk(testString
, fg
) != NULL
) {
518 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
520 if (u_strpbrk(testString
, gf
) != NULL
) {
521 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
523 if (u_strpbrk(testString
, empty
) != NULL
) {
524 log_err("u_strpbrk didn't return NULL for \"\".\n");
527 log_verbose("Testing u_strpbrk() with surrogates");
529 if (u_strpbrk(testSurrogateString
, a
) != &testSurrogateString
[1]) {
530 log_err("u_strpbrk couldn't find first letter a.\n");
532 if (u_strpbrk(testSurrogateString
, dc
) != &testSurrogateString
[5]) {
533 log_err("u_strpbrk couldn't find d or c.\n");
535 if (u_strpbrk(testSurrogateString
, cd
) != &testSurrogateString
[5]) {
536 log_err("u_strpbrk couldn't find c or d.\n");
538 if (u_strpbrk(testSurrogateString
, cdh
) != &testSurrogateString
[5]) {
539 log_err("u_strpbrk couldn't find c, d or h.\n");
541 if (u_strpbrk(testSurrogateString
, f
) != NULL
) {
542 log_err("u_strpbrk didn't return NULL for \"f\".\n");
544 if (u_strpbrk(testSurrogateString
, fg
) != NULL
) {
545 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
547 if (u_strpbrk(testSurrogateString
, gf
) != NULL
) {
548 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
550 if (u_strpbrk(testSurrogateString
, surrMatchSet1
) != &testSurrogateString
[3]) {
551 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
553 if (u_strpbrk(testSurrogateString
, surrMatchSet2
) != &testSurrogateString
[1]) {
554 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
556 if (u_strpbrk(testSurrogateString
, surrMatchSet3
) != &testSurrogateString
[3]) {
557 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
559 if (u_strpbrk(testSurrogateString
, surrMatchSet4
) != NULL
) {
560 log_err("u_strpbrk should have returned NULL for empty string.\n");
562 if (u_strpbrk(testSurrogateString
, surrMatchSetBad
) != &testSurrogateString
[0]) {
563 log_err("u_strpbrk should have found bad surrogate.\n");
566 log_verbose("Testing u_strcspn()");
568 if (u_strcspn(testString
, a
) != 0) {
569 log_err("u_strcspn couldn't find first letter a.\n");
571 if (u_strcspn(testString
, dc
) != 2) {
572 log_err("u_strcspn couldn't find d or c.\n");
574 if (u_strcspn(testString
, cd
) != 2) {
575 log_err("u_strcspn couldn't find c or d.\n");
577 if (u_strcspn(testString
, cdh
) != 2) {
578 log_err("u_strcspn couldn't find c, d or h.\n");
580 if (u_strcspn(testString
, f
) != u_strlen(testString
)) {
581 log_err("u_strcspn didn't return NULL for \"f\".\n");
583 if (u_strcspn(testString
, fg
) != u_strlen(testString
)) {
584 log_err("u_strcspn didn't return NULL for \"fg\".\n");
586 if (u_strcspn(testString
, gf
) != u_strlen(testString
)) {
587 log_err("u_strcspn didn't return NULL for \"gf\".\n");
590 log_verbose("Testing u_strcspn() with surrogates");
592 if (u_strcspn(testSurrogateString
, a
) != 1) {
593 log_err("u_strcspn couldn't find first letter a.\n");
595 if (u_strcspn(testSurrogateString
, dc
) != 5) {
596 log_err("u_strcspn couldn't find d or c.\n");
598 if (u_strcspn(testSurrogateString
, cd
) != 5) {
599 log_err("u_strcspn couldn't find c or d.\n");
601 if (u_strcspn(testSurrogateString
, cdh
) != 5) {
602 log_err("u_strcspn couldn't find c, d or h.\n");
604 if (u_strcspn(testSurrogateString
, f
) != u_strlen(testSurrogateString
)) {
605 log_err("u_strcspn didn't return NULL for \"f\".\n");
607 if (u_strcspn(testSurrogateString
, fg
) != u_strlen(testSurrogateString
)) {
608 log_err("u_strcspn didn't return NULL for \"fg\".\n");
610 if (u_strcspn(testSurrogateString
, gf
) != u_strlen(testSurrogateString
)) {
611 log_err("u_strcspn didn't return NULL for \"gf\".\n");
613 if (u_strcspn(testSurrogateString
, surrMatchSet1
) != 3) {
614 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
616 if (u_strcspn(testSurrogateString
, surrMatchSet2
) != 1) {
617 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
619 if (u_strcspn(testSurrogateString
, surrMatchSet3
) != 3) {
620 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
622 if (u_strcspn(testSurrogateString
, surrMatchSet4
) != u_strlen(testSurrogateString
)) {
623 log_err("u_strcspn should have returned strlen for empty string.\n");
627 log_verbose("Testing u_strspn()");
629 if (u_strspn(testString
, a
) != 1) {
630 log_err("u_strspn couldn't skip first letter a.\n");
632 if (u_strspn(testString
, ab
) != 2) {
633 log_err("u_strspn couldn't skip a or b.\n");
635 if (u_strspn(testString
, ba
) != 2) {
636 log_err("u_strspn couldn't skip a or b.\n");
638 if (u_strspn(testString
, f
) != 0) {
639 log_err("u_strspn didn't return 0 for \"f\".\n");
641 if (u_strspn(testString
, dc
) != 0) {
642 log_err("u_strspn couldn't find first letter a (skip d or c).\n");
644 if (u_strspn(testString
, abcd
) != u_strlen(testString
)) {
645 log_err("u_strspn couldn't skip over the whole string.\n");
647 if (u_strspn(testString
, empty
) != 0) {
648 log_err("u_strspn should have returned 0 for empty string.\n");
651 log_verbose("Testing u_strspn() with surrogates");
652 if (u_strspn(testSurrogateString
, surrMatchSetBad
) != 2) {
653 log_err("u_strspn couldn't skip 0xdbff or a.\n");
655 if (u_strspn(testSurrogateString
, surrMatchSetBad2
) != 2) {
656 log_err("u_strspn couldn't skip 0xdbff or a.\n");
658 if (u_strspn(testSurrogateString
, f
) != 0) {
659 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
661 if (u_strspn(testSurrogateString
, dc
) != 0) {
662 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
664 if (u_strspn(testSurrogateString
, cd
) != 0) {
665 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
667 if (u_strspn(testSurrogateString
, testSurrogateString
) != u_strlen(testSurrogateString
)) {
668 log_err("u_strspn couldn't skip whole string.\n");
670 if (u_strspn(testSurrogateString
, surrMatchSet1
) != 0) {
671 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
673 if (u_strspn(testSurrogateString
, surrMatchSetBad3
) != 5) {
674 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
676 if (u_strspn(testSurrogateString
, surrMatchSet4
) != 0) {
677 log_err("u_strspn should have returned 0 for empty string.\n");
682 * All binary Unicode string searches should behave the same for equivalent input.
683 * See Jitterbug 2145.
684 * There are some new functions, too - just test them all.
687 TestSurrogateSearching() {
688 static const UChar s
[]={
689 /* 0 1 2 3 4 5 6 7 8 9 10 11 */
690 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
710 static const UChar a
=0x61, b
=0x62, lead
=0xd801, trail
=0xdc02, nul
=0;
711 static const UChar32 supp
=0x10402, supp2
=0x10403, ill
=0x123456;
713 const UChar
*first
, *last
;
715 /* search for NUL code point: find end of string */
719 first
!=u_strchr(s
, nul
) ||
720 first
!=u_strchr32(s
, nul
) ||
721 first
!=u_memchr(s
, nul
, UPRV_LENGTHOF(s
)) ||
722 first
!=u_memchr32(s
, nul
, UPRV_LENGTHOF(s
)) ||
723 first
!=u_strrchr(s
, nul
) ||
724 first
!=u_strrchr32(s
, nul
) ||
725 first
!=u_memrchr(s
, nul
, UPRV_LENGTHOF(s
)) ||
726 first
!=u_memrchr32(s
, nul
, UPRV_LENGTHOF(s
))
728 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
731 /* search for empty substring: find beginning of string */
733 s
!=u_strstr(s
, &nul
) ||
734 s
!=u_strFindFirst(s
, -1, &nul
, -1) ||
735 s
!=u_strFindFirst(s
, -1, &nul
, 0) ||
736 s
!=u_strFindFirst(s
, UPRV_LENGTHOF(s
), &nul
, -1) ||
737 s
!=u_strFindFirst(s
, UPRV_LENGTHOF(s
), &nul
, 0) ||
738 s
!=u_strrstr(s
, &nul
) ||
739 s
!=u_strFindLast(s
, -1, &nul
, -1) ||
740 s
!=u_strFindLast(s
, -1, &nul
, 0) ||
741 s
!=u_strFindLast(s
, UPRV_LENGTHOF(s
), &nul
, -1) ||
742 s
!=u_strFindLast(s
, UPRV_LENGTHOF(s
), &nul
, 0)
744 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
747 /* find 'a' in s[1..10[ */
751 first
!=u_strchr(s
+1, a
) ||
752 first
!=u_strchr32(s
+1, a
) ||
753 first
!=u_memchr(s
+1, a
, 9) ||
754 first
!=u_memchr32(s
+1, a
, 9) ||
755 first
!=u_strstr(s
+1, sub_a
) ||
756 first
!=u_strFindFirst(s
+1, -1, sub_a
, -1) ||
757 first
!=u_strFindFirst(s
+1, -1, &a
, 1) ||
758 first
!=u_strFindFirst(s
+1, 9, sub_a
, -1) ||
759 first
!=u_strFindFirst(s
+1, 9, &a
, 1) ||
760 (s
+10)!=u_strrchr(s
+1, a
) ||
761 (s
+10)!=u_strrchr32(s
+1, a
) ||
762 last
!=u_memrchr(s
+1, a
, 9) ||
763 last
!=u_memrchr32(s
+1, a
, 9) ||
764 (s
+10)!=u_strrstr(s
+1, sub_a
) ||
765 (s
+10)!=u_strFindLast(s
+1, -1, sub_a
, -1) ||
766 (s
+10)!=u_strFindLast(s
+1, -1, &a
, 1) ||
767 last
!=u_strFindLast(s
+1, 9, sub_a
, -1) ||
768 last
!=u_strFindLast(s
+1, 9, &a
, 1)
770 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
773 /* do not find 'b' in s[1..10[ */
775 NULL
!=u_strchr(s
+1, b
) ||
776 NULL
!=u_strchr32(s
+1, b
) ||
777 NULL
!=u_memchr(s
+1, b
, 9) ||
778 NULL
!=u_memchr32(s
+1, b
, 9) ||
779 NULL
!=u_strstr(s
+1, sub_b
) ||
780 NULL
!=u_strFindFirst(s
+1, -1, sub_b
, -1) ||
781 NULL
!=u_strFindFirst(s
+1, -1, &b
, 1) ||
782 NULL
!=u_strFindFirst(s
+1, 9, sub_b
, -1) ||
783 NULL
!=u_strFindFirst(s
+1, 9, &b
, 1) ||
784 NULL
!=u_strrchr(s
+1, b
) ||
785 NULL
!=u_strrchr32(s
+1, b
) ||
786 NULL
!=u_memrchr(s
+1, b
, 9) ||
787 NULL
!=u_memrchr32(s
+1, b
, 9) ||
788 NULL
!=u_strrstr(s
+1, sub_b
) ||
789 NULL
!=u_strFindLast(s
+1, -1, sub_b
, -1) ||
790 NULL
!=u_strFindLast(s
+1, -1, &b
, 1) ||
791 NULL
!=u_strFindLast(s
+1, 9, sub_b
, -1) ||
792 NULL
!=u_strFindLast(s
+1, 9, &b
, 1)
794 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
797 /* do not find a non-code point in s[1..10[ */
799 NULL
!=u_strchr32(s
+1, ill
) ||
800 NULL
!=u_memchr32(s
+1, ill
, 9) ||
801 NULL
!=u_strrchr32(s
+1, ill
) ||
802 NULL
!=u_memrchr32(s
+1, ill
, 9)
804 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
807 /* find U+d801 in s[1..10[ */
810 first
!=u_strchr(s
+1, lead
) ||
811 first
!=u_strchr32(s
+1, lead
) ||
812 first
!=u_memchr(s
+1, lead
, 9) ||
813 first
!=u_memchr32(s
+1, lead
, 9) ||
814 first
!=u_strstr(s
+1, sub_lead
) ||
815 first
!=u_strFindFirst(s
+1, -1, sub_lead
, -1) ||
816 first
!=u_strFindFirst(s
+1, -1, &lead
, 1) ||
817 first
!=u_strFindFirst(s
+1, 9, sub_lead
, -1) ||
818 first
!=u_strFindFirst(s
+1, 9, &lead
, 1) ||
819 first
!=u_strrchr(s
+1, lead
) ||
820 first
!=u_strrchr32(s
+1, lead
) ||
821 first
!=u_memrchr(s
+1, lead
, 9) ||
822 first
!=u_memrchr32(s
+1, lead
, 9) ||
823 first
!=u_strrstr(s
+1, sub_lead
) ||
824 first
!=u_strFindLast(s
+1, -1, sub_lead
, -1) ||
825 first
!=u_strFindLast(s
+1, -1, &lead
, 1) ||
826 first
!=u_strFindLast(s
+1, 9, sub_lead
, -1) ||
827 first
!=u_strFindLast(s
+1, 9, &lead
, 1)
829 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
832 /* find U+dc02 in s[1..10[ */
835 first
!=u_strchr(s
+1, trail
) ||
836 first
!=u_strchr32(s
+1, trail
) ||
837 first
!=u_memchr(s
+1, trail
, 9) ||
838 first
!=u_memchr32(s
+1, trail
, 9) ||
839 first
!=u_strstr(s
+1, sub_trail
) ||
840 first
!=u_strFindFirst(s
+1, -1, sub_trail
, -1) ||
841 first
!=u_strFindFirst(s
+1, -1, &trail
, 1) ||
842 first
!=u_strFindFirst(s
+1, 9, sub_trail
, -1) ||
843 first
!=u_strFindFirst(s
+1, 9, &trail
, 1) ||
844 first
!=u_strrchr(s
+1, trail
) ||
845 first
!=u_strrchr32(s
+1, trail
) ||
846 first
!=u_memrchr(s
+1, trail
, 9) ||
847 first
!=u_memrchr32(s
+1, trail
, 9) ||
848 first
!=u_strrstr(s
+1, sub_trail
) ||
849 first
!=u_strFindLast(s
+1, -1, sub_trail
, -1) ||
850 first
!=u_strFindLast(s
+1, -1, &trail
, 1) ||
851 first
!=u_strFindLast(s
+1, 9, sub_trail
, -1) ||
852 first
!=u_strFindLast(s
+1, 9, &trail
, 1)
854 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
857 /* find U+10402 in s[1..10[ */
861 first
!=u_strchr32(s
+1, supp
) ||
862 first
!=u_memchr32(s
+1, supp
, 9) ||
863 first
!=u_strstr(s
+1, sub_supp
) ||
864 first
!=u_strFindFirst(s
+1, -1, sub_supp
, -1) ||
865 first
!=u_strFindFirst(s
+1, -1, sub_supp
, 2) ||
866 first
!=u_strFindFirst(s
+1, 9, sub_supp
, -1) ||
867 first
!=u_strFindFirst(s
+1, 9, sub_supp
, 2) ||
868 last
!=u_strrchr32(s
+1, supp
) ||
869 last
!=u_memrchr32(s
+1, supp
, 9) ||
870 last
!=u_strrstr(s
+1, sub_supp
) ||
871 last
!=u_strFindLast(s
+1, -1, sub_supp
, -1) ||
872 last
!=u_strFindLast(s
+1, -1, sub_supp
, 2) ||
873 last
!=u_strFindLast(s
+1, 9, sub_supp
, -1) ||
874 last
!=u_strFindLast(s
+1, 9, sub_supp
, 2)
876 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
879 /* do not find U+10402 in a single UChar */
881 NULL
!=u_memchr32(s
+1, supp
, 1) ||
882 NULL
!=u_strFindFirst(s
+1, 1, sub_supp
, -1) ||
883 NULL
!=u_strFindFirst(s
+1, 1, sub_supp
, 2) ||
884 NULL
!=u_memrchr32(s
+1, supp
, 1) ||
885 NULL
!=u_strFindLast(s
+1, 1, sub_supp
, -1) ||
886 NULL
!=u_strFindLast(s
+1, 1, sub_supp
, 2) ||
887 NULL
!=u_memrchr32(s
+2, supp
, 1) ||
888 NULL
!=u_strFindLast(s
+2, 1, sub_supp
, -1) ||
889 NULL
!=u_strFindLast(s
+2, 1, sub_supp
, 2)
891 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
894 /* do not find U+10403 in s[1..10[ */
896 NULL
!=u_strchr32(s
+1, supp2
) ||
897 NULL
!=u_memchr32(s
+1, supp2
, 9) ||
898 NULL
!=u_strstr(s
+1, sub_supp2
) ||
899 NULL
!=u_strFindFirst(s
+1, -1, sub_supp2
, -1) ||
900 NULL
!=u_strFindFirst(s
+1, -1, sub_supp2
, 2) ||
901 NULL
!=u_strFindFirst(s
+1, 9, sub_supp2
, -1) ||
902 NULL
!=u_strFindFirst(s
+1, 9, sub_supp2
, 2) ||
903 NULL
!=u_strrchr32(s
+1, supp2
) ||
904 NULL
!=u_memrchr32(s
+1, supp2
, 9) ||
905 NULL
!=u_strrstr(s
+1, sub_supp2
) ||
906 NULL
!=u_strFindLast(s
+1, -1, sub_supp2
, -1) ||
907 NULL
!=u_strFindLast(s
+1, -1, sub_supp2
, 2) ||
908 NULL
!=u_strFindLast(s
+1, 9, sub_supp2
, -1) ||
909 NULL
!=u_strFindLast(s
+1, 9, sub_supp2
, 2)
911 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
914 /* find <0061 d801> in s[1..10[ */
917 first
!=u_strstr(s
+1, sub_a_lead
) ||
918 first
!=u_strFindFirst(s
+1, -1, sub_a_lead
, -1) ||
919 first
!=u_strFindFirst(s
+1, -1, sub_a_lead
, 2) ||
920 first
!=u_strFindFirst(s
+1, 9, sub_a_lead
, -1) ||
921 first
!=u_strFindFirst(s
+1, 9, sub_a_lead
, 2) ||
922 first
!=u_strrstr(s
+1, sub_a_lead
) ||
923 first
!=u_strFindLast(s
+1, -1, sub_a_lead
, -1) ||
924 first
!=u_strFindLast(s
+1, -1, sub_a_lead
, 2) ||
925 first
!=u_strFindLast(s
+1, 9, sub_a_lead
, -1) ||
926 first
!=u_strFindLast(s
+1, 9, sub_a_lead
, 2)
928 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
931 /* find <dc02 0061> in s[1..10[ */
934 first
!=u_strstr(s
+1, sub_trail_a
) ||
935 first
!=u_strFindFirst(s
+1, -1, sub_trail_a
, -1) ||
936 first
!=u_strFindFirst(s
+1, -1, sub_trail_a
, 2) ||
937 first
!=u_strFindFirst(s
+1, 9, sub_trail_a
, -1) ||
938 first
!=u_strFindFirst(s
+1, 9, sub_trail_a
, 2) ||
939 first
!=u_strrstr(s
+1, sub_trail_a
) ||
940 first
!=u_strFindLast(s
+1, -1, sub_trail_a
, -1) ||
941 first
!=u_strFindLast(s
+1, -1, sub_trail_a
, 2) ||
942 first
!=u_strFindLast(s
+1, 9, sub_trail_a
, -1) ||
943 first
!=u_strFindLast(s
+1, 9, sub_trail_a
, 2)
945 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
948 /* do not find "aba" in s[1..10[ */
950 NULL
!=u_strstr(s
+1, sub_aba
) ||
951 NULL
!=u_strFindFirst(s
+1, -1, sub_aba
, -1) ||
952 NULL
!=u_strFindFirst(s
+1, -1, sub_aba
, 3) ||
953 NULL
!=u_strFindFirst(s
+1, 9, sub_aba
, -1) ||
954 NULL
!=u_strFindFirst(s
+1, 9, sub_aba
, 3) ||
955 NULL
!=u_strrstr(s
+1, sub_aba
) ||
956 NULL
!=u_strFindLast(s
+1, -1, sub_aba
, -1) ||
957 NULL
!=u_strFindLast(s
+1, -1, sub_aba
, 3) ||
958 NULL
!=u_strFindLast(s
+1, 9, sub_aba
, -1) ||
959 NULL
!=u_strFindLast(s
+1, 9, sub_aba
, 3)
961 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
965 static void TestStringCopy()
970 UChar uchars
[]={0x61, 0x62, 0x63, 0x00};
972 char chars
[]="abc"; /* needs default codepage */
974 log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
976 u_uastrcpy(temp
, "abc");
977 if(u_strcmp(temp
, uchars
) != 0) {
978 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars
), austrdup(temp
));
981 temp
[0] = 0xFB; /* load garbage into it */
986 u_uastrncpy(temp
, "abcabcabc", 3);
987 if(u_strncmp(uchars
, temp
, 3) != 0){
988 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars
), austrdup(temp
));
990 if(temp
[3] != 0xFB) {
991 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
994 charOut
[0] = (char)0x7B; /* load garbage into it */
995 charOut
[1] = (char)0x7B;
996 charOut
[2] = (char)0x7B;
997 charOut
[3] = (char)0x7B;
1007 u_austrncpy(charOut
, temp
, 3);
1008 if(strncmp(chars
, charOut
, 3) != 0){
1009 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars
), austrdup(temp
));
1011 if(charOut
[3] != (char)0x7B) {
1012 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1015 /*Testing u_strchr()*/
1016 log_verbose("Testing u_strchr\n");
1025 result
=u_strchr(temp
, (UChar
)0x62);
1026 if(result
!= temp
+1){
1027 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1029 /*Testing u_strstr()*/
1030 log_verbose("Testing u_strstr\n");
1034 result
=u_strstr(temp
, subString
);
1035 if(result
!= temp
+2){
1036 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1038 result
=u_strstr(temp
, subString
+2); /* subString+2 is an empty string */
1040 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1042 result
=u_strstr(subString
, temp
);
1044 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1047 /*Testing u_strchr32*/
1048 log_verbose("Testing u_strchr32\n");
1049 result
=u_strchr32(temp
, (UChar32
)0x62);
1050 if(result
!= temp
+1){
1051 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1053 result
=u_strchr32(temp
, (UChar32
)0xfb);
1055 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1057 result
=u_strchr32(temp
, (UChar32
)0x20402);
1058 if(result
!= temp
+5){
1059 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1063 result
=u_memchr32(temp
, (UChar32
)0x20402, 7);
1064 if(result
!= temp
+5){
1065 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1067 result
=u_memchr32(temp
, (UChar32
)0x20402, 6);
1069 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1071 result
=u_memchr32(temp
, (UChar32
)0x20402, 1);
1073 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1075 result
=u_memchr32(temp
, (UChar32
)0xfc00, 8);
1076 if(result
!= temp
+7){
1077 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1081 /* test u_unescape() and u_unescapeAt() ------------------------------------- */
1085 static UChar buffer
[200];
1087 static const char* input
=
1088 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1090 static const UChar expect
[]={
1091 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1092 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1093 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1094 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1096 static const int32_t explength
= UPRV_LENGTHOF(expect
)-1;
1099 /* test u_unescape() */
1100 length
=u_unescape(input
, buffer
, UPRV_LENGTHOF(buffer
));
1101 if(length
!=explength
|| u_strcmp(buffer
, expect
)!=0) {
1102 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length
,
1106 /* try preflighting */
1107 length
=u_unescape(input
, NULL
, UPRV_LENGTHOF(buffer
));
1108 if(length
!=explength
|| u_strcmp(buffer
, expect
)!=0) {
1109 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length
, explength
);
1112 /* ### TODO: test u_unescapeAt() */
1115 /* test code point counting functions --------------------------------------- */
1117 /* reference implementation of u_strHasMoreChar32Than() */
1119 _refStrHasMoreChar32Than(const UChar
*s
, int32_t length
, int32_t number
) {
1120 int32_t count
=u_countChar32(s
, length
);
1121 return count
>number
;
1124 /* compare the real function against the reference */
1126 _testStrHasMoreChar32Than(const UChar
*s
, int32_t i
, int32_t length
, int32_t number
) {
1127 if(u_strHasMoreChar32Than(s
, length
, number
)!=_refStrHasMoreChar32Than(s
, length
, number
)) {
1128 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1129 i
, length
, number
, u_strHasMoreChar32Than(s
, length
, number
));
1135 static const UChar string
[]={
1136 0x61, 0x62, 0xd800, 0xdc00,
1137 0xd801, 0xdc01, 0x63, 0xd802,
1138 0x64, 0xdc03, 0x65, 0x66,
1139 0xd804, 0xdc04, 0xd805, 0xdc05,
1143 int32_t i
, length
, number
;
1145 /* test u_strHasMoreChar32Than() with length>=0 */
1146 length
=UPRV_LENGTHOF(string
);
1148 for(i
=0; i
<=length
; ++i
) {
1149 for(number
=-1; number
<=((length
-i
)+2); ++number
) {
1150 _testStrHasMoreChar32Than(string
+i
, i
, length
-i
, number
);
1156 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1157 length
=UPRV_LENGTHOF(string
);
1158 u_memcpy(buffer
, string
, length
);
1161 for(i
=0; i
<=length
; ++i
) {
1162 for(number
=-1; number
<=((length
-i
)+2); ++number
) {
1163 _testStrHasMoreChar32Than(buffer
+i
, i
, -1, number
);
1169 /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1170 for(length
=-1; length
<=1; ++length
) {
1171 for(i
=0; i
<=length
; ++i
) {
1172 for(number
=-2; number
<=2; ++number
) {
1173 _testStrHasMoreChar32Than(NULL
, 0, length
, number
);
1179 /* UCharIterator ------------------------------------------------------------ */
1182 * Compare results from two iterators, should be same.
1183 * Assume that the text is not empty and that
1184 * iteration start==0 and iteration limit==length.
1187 compareIterators(UCharIterator
*iter1
, const char *n1
,
1188 UCharIterator
*iter2
, const char *n2
) {
1189 int32_t i
, pos1
, pos2
, middle
, length
;
1192 /* compare lengths */
1193 length
=iter1
->getIndex(iter1
, UITER_LENGTH
);
1194 pos2
=iter2
->getIndex(iter2
, UITER_LENGTH
);
1196 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1
, length
, pos2
, n2
);
1200 /* set into the middle */
1203 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
1205 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
1209 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
1211 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
1215 /* test current() */
1216 c1
=iter1
->current(iter1
);
1217 c2
=iter2
->current(iter2
);
1219 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1
, c1
, c2
, n2
, middle
);
1223 /* move forward 3 UChars */
1224 for(i
=0; i
<3; ++i
) {
1225 c1
=iter1
->next(iter1
);
1226 c2
=iter2
->next(iter2
);
1228 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
1233 /* move backward 5 UChars */
1234 for(i
=0; i
<5; ++i
) {
1235 c1
=iter1
->previous(iter1
);
1236 c2
=iter2
->previous(iter2
);
1238 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
1243 /* iterate forward from the beginning */
1244 pos1
=iter1
->move(iter1
, 0, UITER_START
);
1246 log_err("%s->move(start) failed\n", n1
);
1249 if(!iter1
->hasNext(iter1
)) {
1250 log_err("%s->hasNext() at the start returns FALSE\n", n1
);
1254 pos2
=iter2
->move(iter2
, 0, UITER_START
);
1256 log_err("%s->move(start) failed\n", n2
);
1259 if(!iter2
->hasNext(iter2
)) {
1260 log_err("%s->hasNext() at the start returns FALSE\n", n2
);
1265 c1
=iter1
->next(iter1
);
1266 c2
=iter2
->next(iter2
);
1268 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
1273 if(iter1
->hasNext(iter1
)) {
1274 log_err("%s->hasNext() at the end returns TRUE\n", n1
);
1277 if(iter2
->hasNext(iter2
)) {
1278 log_err("%s->hasNext() at the end returns TRUE\n", n2
);
1282 /* back to the middle */
1283 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
1285 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
1289 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
1291 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
1295 /* move to index 1 */
1296 pos1
=iter1
->move(iter1
, 1, UITER_ZERO
);
1298 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1
, middle
, pos1
);
1302 pos2
=iter2
->move(iter2
, 1, UITER_ZERO
);
1304 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2
, middle
, pos2
);
1308 /* iterate backward from the end */
1309 pos1
=iter1
->move(iter1
, 0, UITER_LIMIT
);
1311 log_err("%s->move(limit) failed\n", n1
);
1314 if(!iter1
->hasPrevious(iter1
)) {
1315 log_err("%s->hasPrevious() at the end returns FALSE\n", n1
);
1319 pos2
=iter2
->move(iter2
, 0, UITER_LIMIT
);
1321 log_err("%s->move(limit) failed\n", n2
);
1324 if(!iter2
->hasPrevious(iter2
)) {
1325 log_err("%s->hasPrevious() at the end returns FALSE\n", n2
);
1330 c1
=iter1
->previous(iter1
);
1331 c2
=iter2
->previous(iter2
);
1333 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
1338 if(iter1
->hasPrevious(iter1
)) {
1339 log_err("%s->hasPrevious() at the start returns TRUE\n", n1
);
1342 if(iter2
->hasPrevious(iter2
)) {
1343 log_err("%s->hasPrevious() at the start returns TRUE\n", n2
);
1349 * Test the iterator's getState() and setState() functions.
1350 * iter1 and iter2 must be set up for the same iterator type and the same string
1351 * but may be physically different structs (different addresses).
1353 * Assume that the text is not empty and that
1354 * iteration start==0 and iteration limit==length.
1355 * It must be 2<=middle<=length-2.
1358 testIteratorState(UCharIterator
*iter1
, UCharIterator
*iter2
, const char *n
, int32_t middle
) {
1361 UErrorCode errorCode
;
1366 /* get four UChars from the middle of the string */
1367 iter1
->move(iter1
, middle
-2, UITER_ZERO
);
1368 for(i
=0; i
<4; ++i
) {
1369 c
=iter1
->next(iter1
);
1371 /* the test violates the assumptions, see comment above */
1372 log_err("test error: %s[%d]=%d\n", n
, middle
-2+i
, c
);
1378 /* move to the middle and get the state */
1379 iter1
->move(iter1
, -2, UITER_CURRENT
);
1380 state
=uiter_getState(iter1
);
1382 /* set the state into the second iterator and compare the results */
1383 errorCode
=U_ZERO_ERROR
;
1384 uiter_setState(iter2
, state
, &errorCode
);
1385 if(U_FAILURE(errorCode
)) {
1386 log_err("%s->setState(0x%x) failed: %s\n", n
, state
, u_errorName(errorCode
));
1390 c
=iter2
->current(iter2
);
1392 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n
, middle
, c
, u
[2]);
1395 c
=iter2
->previous(iter2
);
1397 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-1, c
, u
[1]);
1400 iter2
->move(iter2
, 2, UITER_CURRENT
);
1401 c
=iter2
->next(iter2
);
1403 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n
, middle
+1, c
, u
[3]);
1406 iter2
->move(iter2
, -3, UITER_CURRENT
);
1407 c
=iter2
->previous(iter2
);
1409 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-2, c
, u
[0]);
1412 /* move the second iterator back to the middle */
1413 iter2
->move(iter2
, 1, UITER_CURRENT
);
1416 /* check that both are in the middle */
1417 i
=iter1
->getIndex(iter1
, UITER_CURRENT
);
1418 j
=iter2
->getIndex(iter2
, UITER_CURRENT
);
1420 log_err("%s->getIndex(current)=%d!=%d as expected\n", n
, i
, middle
);
1423 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n
, j
, i
);
1426 /* compare lengths */
1427 i
=iter1
->getIndex(iter1
, UITER_LENGTH
);
1428 j
=iter2
->getIndex(iter2
, UITER_LENGTH
);
1430 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n
, i
, j
);
1435 TestUCharIterator() {
1436 static const UChar text
[]={
1437 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1441 UCharIterator iter
, iter1
, iter2
;
1443 UErrorCode errorCode
;
1446 /* simple API/code coverage - test NOOP UCharIterator */
1447 uiter_setString(&iter
, NULL
, 0);
1448 if( iter
.current(&iter
)!=-1 || iter
.next(&iter
)!=-1 || iter
.previous(&iter
)!=-1 ||
1449 iter
.move(&iter
, 1, UITER_CURRENT
) || iter
.getIndex(&iter
, UITER_CURRENT
)!=0 ||
1450 iter
.hasNext(&iter
) || iter
.hasPrevious(&iter
)
1452 log_err("NOOP UCharIterator behaves unexpectedly\n");
1455 /* test get/set state */
1456 length
=UPRV_LENGTHOF(text
)-1;
1457 uiter_setString(&iter1
, text
, -1);
1458 uiter_setString(&iter2
, text
, length
);
1459 testIteratorState(&iter1
, &iter2
, "UTF16IteratorState", length
/2);
1460 testIteratorState(&iter1
, &iter2
, "UTF16IteratorStatePlus1", length
/2+1);
1462 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1463 errorCode
=U_ZERO_ERROR
;
1464 u_strToUTF8(bytes
, sizeof(bytes
), &length
, text
, -1, &errorCode
);
1465 if(U_FAILURE(errorCode
)) {
1466 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode
));
1470 uiter_setString(&iter1
, text
, -1);
1471 uiter_setUTF8(&iter2
, bytes
, length
);
1472 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF8Iterator");
1474 /* try again with length=-1 */
1475 uiter_setUTF8(&iter2
, bytes
, -1);
1476 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF8Iterator_1");
1478 /* test get/set state */
1479 length
=UPRV_LENGTHOF(text
)-1;
1480 uiter_setUTF8(&iter1
, bytes
, -1);
1481 testIteratorState(&iter1
, &iter2
, "UTF8IteratorState", length
/2);
1482 testIteratorState(&iter1
, &iter2
, "UTF8IteratorStatePlus1", length
/2+1);
1484 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1485 errorCode
=U_ZERO_ERROR
;
1486 cnv
=ucnv_open("UTF-16BE", &errorCode
);
1487 length
=ucnv_fromUChars(cnv
, bytes
, sizeof(bytes
), text
, -1, &errorCode
);
1489 if(U_FAILURE(errorCode
)) {
1490 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode
));
1494 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1495 bytes
[length
]=bytes
[length
+1]=0;
1497 uiter_setString(&iter1
, text
, -1);
1498 uiter_setUTF16BE(&iter2
, bytes
, length
);
1499 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF16BEIterator");
1501 /* try again with length=-1 */
1502 uiter_setUTF16BE(&iter2
, bytes
, -1);
1503 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF16BEIterator_1");
1505 /* try again after moving the bytes up one, and with length=-1 */
1506 memmove(bytes
+1, bytes
, length
+2);
1507 uiter_setUTF16BE(&iter2
, bytes
+1, -1);
1508 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF16BEIteratorMoved1");
1510 /* ### TODO test other iterators: CharacterIterator, Replaceable */