1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
10 * file name: custrtst.c
12 * tab size: 8 (not used)
15 * created on: 2002oct09
16 * created by: Markus W. Scherer
18 * Tests of ustring.h Unicode string API functions.
21 #include "unicode/ustring.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/uiter.h"
28 /* get the sign of an integer */
29 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
31 /* test setup --------------------------------------------------------------- */
33 static void setUpDataTable(void);
34 static void TestStringCopy(void);
35 static void TestStringFunctions(void);
36 static void TestStringSearching(void);
37 static void TestSurrogateSearching(void);
38 static void TestUnescape(void);
39 static void TestCountChar32(void);
40 static void TestUCharIterator(void);
41 static void TestIsWellFormed(void);
43 void addUStringTest(TestNode
** root
);
45 void addUStringTest(TestNode
** root
)
47 addTest(root
, &TestStringCopy
, "tsutil/custrtst/TestStringCopy");
48 addTest(root
, &TestStringFunctions
, "tsutil/custrtst/TestStringFunctions");
49 addTest(root
, &TestStringSearching
, "tsutil/custrtst/TestStringSearching");
50 addTest(root
, &TestSurrogateSearching
, "tsutil/custrtst/TestSurrogateSearching");
51 addTest(root
, &TestUnescape
, "tsutil/custrtst/TestUnescape");
52 addTest(root
, &TestCountChar32
, "tsutil/custrtst/TestCountChar32");
53 addTest(root
, &TestUCharIterator
, "tsutil/custrtst/TestUCharIterator");
54 addTest(root
, &TestIsWellFormed
, "tsutil/custrtst/TestIsWellFormed");
57 /* test data for TestStringFunctions ---------------------------------------- */
59 UChar
*** dataTable
= NULL
;
61 static const char* raw
[3][4] = {
64 { "English_", "French_", "Croatian_", "English_"},
66 { "United States", "France", "Croatia", "Unites States"},
68 /* Concatenated string */
69 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
72 static void setUpDataTable()
75 if(dataTable
== NULL
) {
76 dataTable
= (UChar
***)calloc(sizeof(UChar
**),3);
78 for (i
= 0; i
< 3; i
++) {
79 dataTable
[i
] = (UChar
**)calloc(sizeof(UChar
*),4);
80 for (j
= 0; j
< 4; j
++){
81 dataTable
[i
][j
] = (UChar
*) malloc(sizeof(UChar
)*(strlen(raw
[i
][j
])+1));
82 u_uastrcpy(dataTable
[i
][j
],raw
[i
][j
]);
88 static void cleanUpDataTable()
91 if(dataTable
!= NULL
) {
93 for(j
= 0; j
<4; j
++) {
94 free(dataTable
[i
][j
]);
103 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
104 static void TestStringFunctions()
114 log_verbose("Testing u_strlen()\n");
115 if( u_strlen(dataTable
[0][0])!= u_strlen(dataTable
[0][3]) || u_strlen(dataTable
[0][0]) == u_strlen(dataTable
[0][2]))
116 log_err("There is an error in u_strlen()");
118 log_verbose("Testing u_memcpy() and u_memcmp()\n");
124 log_verbose("Testing %s\n", u_austrcpy(tempOut
, dataTable
[i
][j
]));
126 temp
[7] = 0xA4; /* Mark the end */
127 u_memcpy(temp
,dataTable
[i
][j
], 7);
130 log_err("an error occured in u_memcpy()\n");
131 if(u_memcmp(temp
, dataTable
[i
][j
], 7)!=0)
132 log_err("an error occured in u_memcpy() or u_memcmp()\n");
135 if(u_memcmp(dataTable
[0][0], dataTable
[1][1], 7)==0)
136 log_err("an error occured in u_memcmp()\n");
138 log_verbose("Testing u_memset()\n");
141 u_memset(nullTemp
, 0xa4, 7);
142 for (i
= 0; i
< 7; i
++) {
143 if(nullTemp
[i
] != 0xa4) {
144 log_err("an error occured in u_memset()\n");
147 if(nullTemp
[7] != 0) {
148 log_err("u_memset() went too far\n");
151 u_memset(nullTemp
, 0, 7);
154 u_memcpy(temp
,nullTemp
, 7);
155 if(u_memcmp(temp
, nullTemp
, 7)!=0 || temp
[7]!=0)
156 log_err("an error occured in u_memcpy() or u_memcmp()\n");
159 log_verbose("Testing u_memmove()\n");
160 for (i
= 0; i
< 7; i
++) {
163 u_memmove(temp
+ 1, temp
, 7);
165 log_err("an error occured in u_memmove()\n");
167 for (i
= 1; i
<= 7; i
++) {
168 if(temp
[i
] != (i
- 1)) {
169 log_err("an error occured in u_memmove()\n");
173 log_verbose("Testing u_strcpy() and u_strcmp()\n");
179 log_verbose("Testing %s\n", u_austrcpy(tempOut
, dataTable
[i
][j
]));
181 u_strcpy(temp
,dataTable
[i
][j
]);
183 if(u_strcmp(temp
,dataTable
[i
][j
])!=0)
184 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
187 if(u_strcmp(dataTable
[0][0], dataTable
[1][1])==0)
188 log_err("an error occured in u_memcmp()\n");
190 log_verbose("testing u_strcat()\n");
194 u_uastrcpy(temp
, "");
195 u_strcpy(temp
,dataTable
[i
][j
]);
196 u_strcat(temp
,dataTable
[i
+1][j
]);
197 if(u_strcmp(temp
,dataTable
[i
+2][j
])!=0)
198 log_err("something threw an error in u_strcat()\n");
201 log_verbose("Testing u_strncmp()\n");
202 for(i
=0,j
=0;j
<4; ++j
)
204 k
=u_strlen(dataTable
[i
][j
]);
205 if(u_strncmp(dataTable
[i
][j
],dataTable
[i
+2][j
],k
)!=0)
206 log_err("Something threw an error in u_strncmp\n");
208 if(u_strncmp(dataTable
[0][0], dataTable
[1][1], 7)==0)
209 log_err("an error occured in u_memcmp()\n");
212 log_verbose("Testing u_strncat\n");
213 for(i
=0,j
=0;j
<4; ++j
)
215 k
=u_strlen(dataTable
[i
][j
]);
219 if(u_strcmp(u_strncat(temp
,dataTable
[i
+2][j
],k
),dataTable
[i
][j
])!=0)
220 log_err("something threw an error in u_strncat or u_uastrcpy()\n");
224 log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
225 for(i
=2,j
=0;j
<4; ++j
)
227 k
=u_strlen(dataTable
[i
][j
]);
228 u_strncpy(temp
, dataTable
[i
][j
],k
);
231 if(u_strncmp(temp
, dataTable
[i
][j
],k
)!=0)
232 log_err("something threw an error in u_strncpy()\n");
235 log_err("something threw an error in u_strncpy()\n");
237 u_memset(temp
, 0x3F, UPRV_LENGTHOF(temp
) - 1);
238 u_uastrncpy(temp
, raw
[i
][j
], k
-1);
239 if(u_strncmp(temp
, dataTable
[i
][j
],k
-1)!=0)
240 log_err("something threw an error in u_uastrncpy(k-1)\n");
242 if(temp
[k
-1] != 0x3F)
243 log_err("something threw an error in u_uastrncpy(k-1)\n");
245 u_memset(temp
, 0x3F, UPRV_LENGTHOF(temp
) - 1);
246 u_uastrncpy(temp
, raw
[i
][j
], k
+1);
247 if(u_strcmp(temp
, dataTable
[i
][j
])!=0)
248 log_err("something threw an error in u_uastrncpy(k+1)\n");
251 log_err("something threw an error in u_uastrncpy(k+1)\n");
253 u_memset(temp
, 0x3F, UPRV_LENGTHOF(temp
) - 1);
254 u_uastrncpy(temp
, raw
[i
][j
], k
);
255 if(u_strncmp(temp
, dataTable
[i
][j
], k
)!=0)
256 log_err("something threw an error in u_uastrncpy(k)\n");
259 log_err("something threw an error in u_uastrncpy(k)\n");
262 log_verbose("Testing u_strchr() and u_memchr()\n");
266 UChar saveVal
= dataTable
[i
][j
][0];
267 UChar
*findPtr
= u_strchr(dataTable
[i
][j
], 0x005F);
268 int32_t dataSize
= (int32_t)(u_strlen(dataTable
[i
][j
]) + 1);
270 log_verbose("%s ", u_austrcpy(tempOut
, findPtr
));
272 if (findPtr
== NULL
|| *findPtr
!= 0x005F) {
273 log_err("u_strchr can't find '_' in the string\n");
276 findPtr
= u_strchr32(dataTable
[i
][j
], 0x005F);
277 if (findPtr
== NULL
|| *findPtr
!= 0x005F) {
278 log_err("u_strchr32 can't find '_' in the string\n");
281 findPtr
= u_strchr(dataTable
[i
][j
], 0);
282 if (findPtr
!= (&(dataTable
[i
][j
][dataSize
- 1]))) {
283 log_err("u_strchr can't find NULL in the string\n");
286 findPtr
= u_strchr32(dataTable
[i
][j
], 0);
287 if (findPtr
!= (&(dataTable
[i
][j
][dataSize
- 1]))) {
288 log_err("u_strchr32 can't find NULL in the string\n");
291 findPtr
= u_memchr(dataTable
[i
][j
], 0, dataSize
);
292 if (findPtr
!= (&(dataTable
[i
][j
][dataSize
- 1]))) {
293 log_err("u_memchr can't find NULL in the string\n");
296 findPtr
= u_memchr32(dataTable
[i
][j
], 0, dataSize
);
297 if (findPtr
!= (&(dataTable
[i
][j
][dataSize
- 1]))) {
298 log_err("u_memchr32 can't find NULL in the string\n");
301 dataTable
[i
][j
][0] = 0;
302 /* Make sure we skip over the NULL termination */
303 findPtr
= u_memchr(dataTable
[i
][j
], 0x005F, dataSize
);
304 if (findPtr
== NULL
|| *findPtr
!= 0x005F) {
305 log_err("u_memchr can't find '_' in the string\n");
308 findPtr
= u_memchr32(dataTable
[i
][j
], 0x005F, dataSize
);
309 if (findPtr
== NULL
|| *findPtr
!= 0x005F) {
310 log_err("u_memchr32 can't find '_' in the string\n");
312 findPtr
= u_memchr32(dataTable
[i
][j
], 0xFFFD, dataSize
);
313 if (findPtr
!= NULL
) {
314 log_err("Should have found NULL when the character is not there.\n");
316 dataTable
[i
][j
][0] = saveVal
; /* Put it back for the other tests */
320 * test that u_strchr32()
321 * does not find surrogate code points when they are part of matched pairs
322 * (= part of supplementary code points)
326 static const UChar s
[]={
327 /* 0 1 2 3 4 5 6 7 8 9 */
328 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
331 if(u_strchr32(s
, 0xd841)!=(s
+3) || u_strchr32(s
, 0xdc02)!=(s
+5)) {
332 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
334 if(u_memchr32(s
, 0xd841, 9)!=(s
+3) || u_memchr32(s
, 0xdc02, 9)!=(s
+5)) {
335 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
339 log_verbose("Testing u_austrcpy()");
340 u_austrcpy(test
,dataTable
[0][0]);
341 if(strcmp(test
,raw
[0][0])!=0)
342 log_err("There is an error in u_austrcpy()");
345 log_verbose("Testing u_strtok_r()");
347 const char tokString
[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
348 const char *tokens
[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
349 UChar delimBuf
[sizeof(test
)];
350 UChar currTokenBuf
[sizeof(tokString
)];
352 uint32_t currToken
= 0;
355 u_uastrcpy(temp
, tokString
);
356 u_uastrcpy(delimBuf
, " ");
358 ptr
= u_strtok_r(temp
, delimBuf
, &state
);
359 u_uastrcpy(delimBuf
, " ,");
360 while (ptr
!= NULL
) {
361 u_uastrcpy(currTokenBuf
, tokens
[currToken
]);
362 if (u_strcmp(ptr
, currTokenBuf
) != 0) {
363 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken
, ptr
, tokens
[currToken
]);
365 ptr
= u_strtok_r(NULL
, delimBuf
, &state
);
369 if (currToken
!= UPRV_LENGTHOF(tokens
)) {
370 log_err("Didn't get correct number of tokens\n");
372 state
= delimBuf
; /* Give it an "invalid" saveState */
373 u_uastrcpy(currTokenBuf
, "");
374 if (u_strtok_r(currTokenBuf
, delimBuf
, &state
) != NULL
) {
375 log_err("Didn't get NULL for empty string\n");
378 log_err("State should be NULL for empty string\n");
380 state
= delimBuf
; /* Give it an "invalid" saveState */
381 u_uastrcpy(currTokenBuf
, ", ,");
382 if (u_strtok_r(currTokenBuf
, delimBuf
, &state
) != NULL
) {
383 log_err("Didn't get NULL for a string of delimiters\n");
386 log_err("State should be NULL for a string of delimiters\n");
389 state
= delimBuf
; /* Give it an "invalid" saveState */
390 u_uastrcpy(currTokenBuf
, "q, ,");
391 if (u_strtok_r(currTokenBuf
, delimBuf
, &state
) == NULL
) {
392 log_err("Got NULL for a string that does not begin with delimiters\n");
394 if (u_strtok_r(NULL
, delimBuf
, &state
) != NULL
) {
395 log_err("Didn't get NULL for a string that ends in delimiters\n");
398 log_err("State should be NULL for empty string\n");
401 state
= delimBuf
; /* Give it an "invalid" saveState */
402 u_uastrcpy(currTokenBuf
, tokString
);
403 u_uastrcpy(temp
, tokString
);
404 u_uastrcpy(delimBuf
, "q"); /* Give it a delimiter that it can't find. */
405 ptr
= u_strtok_r(currTokenBuf
, delimBuf
, &state
);
406 if (ptr
== NULL
|| u_strcmp(ptr
, temp
) != 0) {
407 log_err("Should have recieved the same string when there are no delimiters\n");
409 if (u_strtok_r(NULL
, delimBuf
, &state
) != NULL
) {
410 log_err("Should not have found another token in a one token string\n");
414 /* test u_strcmpCodePointOrder() */
416 /* these strings are in ascending order */
417 static const UChar strings
[][4]={
418 { 0x61, 0 }, /* U+0061 */
419 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
420 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
421 { 0xd800, 0 }, /* U+d800 */
422 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
423 { 0xdfff, 0 }, /* U+dfff */
424 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
425 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
426 { 0xd800, 0xdc02, 0 }, /* U+10002 */
427 { 0xd84d, 0xdc56, 0 } /* U+23456 */
430 UCharIterator iter1
, iter2
;
431 int32_t len1
, len2
, r1
, r2
;
433 for(i
=0; i
<(UPRV_LENGTHOF(strings
)-1); ++i
) {
434 if(u_strcmpCodePointOrder(strings
[i
], strings
[i
+1])>=0) {
435 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i
);
437 if(u_strncmpCodePointOrder(strings
[i
], strings
[i
+1], 10)>=0) {
438 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i
);
441 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
442 if(u_strncmpCodePointOrder(strings
[i
], strings
[i
+1], 2)!=u_memcmpCodePointOrder(strings
[i
], strings
[i
+1], 2)) {
443 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i
);
446 /* test u_strCompare(TRUE) */
447 len1
=u_strlen(strings
[i
]);
448 len2
=u_strlen(strings
[i
+1]);
449 if( u_strCompare(strings
[i
], -1, strings
[i
+1], -1, TRUE
)>=0 ||
450 u_strCompare(strings
[i
], -1, strings
[i
+1], len2
, TRUE
)>=0 ||
451 u_strCompare(strings
[i
], len1
, strings
[i
+1], -1, TRUE
)>=0 ||
452 u_strCompare(strings
[i
], len1
, strings
[i
+1], len2
, TRUE
)>=0
454 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i
);
457 /* test u_strCompare(FALSE) */
458 r1
=u_strCompare(strings
[i
], -1, strings
[i
+1], -1, FALSE
);
459 r2
=u_strcmp(strings
[i
], strings
[i
+1]);
460 if(_SIGN(r1
)!=_SIGN(r2
)) {
461 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i
);
464 /* test u_strCompareIter() */
465 uiter_setString(&iter1
, strings
[i
], len1
);
466 uiter_setString(&iter2
, strings
[i
+1], len2
);
467 if(u_strCompareIter(&iter1
, &iter2
, TRUE
)>=0) {
468 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i
);
470 r1
=u_strCompareIter(&iter1
, &iter2
, FALSE
);
471 if(_SIGN(r1
)!=_SIGN(u_strcmp(strings
[i
], strings
[i
+1]))) {
472 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i
);
480 static void TestStringSearching()
482 const UChar testString
[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
483 const UChar testSurrogateString
[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
484 const UChar surrMatchSet1
[] = {0xdbff, 0xdfff, 0};
485 const UChar surrMatchSet2
[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
486 const UChar surrMatchSet3
[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
487 const UChar surrMatchSet4
[] = {0x0000};
488 const UChar surrMatchSetBad
[] = {0xdbff, 0x0061, 0};
489 const UChar surrMatchSetBad2
[] = {0x0061, 0xdbff, 0};
490 const UChar surrMatchSetBad3
[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */
494 ab
[] = { 0x61, 0x62, 0 },
495 ba
[] = { 0x62, 0x61, 0 },
496 abcd
[] = { 0x61, 0x62, 0x63, 0x64, 0 },
497 cd
[] = { 0x63, 0x64, 0 },
498 dc
[] = { 0x64, 0x63, 0 },
499 cdh
[] = { 0x63, 0x64, 0x68, 0 },
501 fg
[] = { 0x66, 0x67, 0 },
502 gf
[] = { 0x67, 0x66, 0 };
504 log_verbose("Testing u_strpbrk()");
506 if (u_strpbrk(testString
, a
) != &testString
[0]) {
507 log_err("u_strpbrk couldn't find first letter a.\n");
509 if (u_strpbrk(testString
, dc
) != &testString
[2]) {
510 log_err("u_strpbrk couldn't find d or c.\n");
512 if (u_strpbrk(testString
, cd
) != &testString
[2]) {
513 log_err("u_strpbrk couldn't find c or d.\n");
515 if (u_strpbrk(testString
, cdh
) != &testString
[2]) {
516 log_err("u_strpbrk couldn't find c, d or h.\n");
518 if (u_strpbrk(testString
, f
) != NULL
) {
519 log_err("u_strpbrk didn't return NULL for \"f\".\n");
521 if (u_strpbrk(testString
, fg
) != NULL
) {
522 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
524 if (u_strpbrk(testString
, gf
) != NULL
) {
525 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
527 if (u_strpbrk(testString
, empty
) != NULL
) {
528 log_err("u_strpbrk didn't return NULL for \"\".\n");
531 log_verbose("Testing u_strpbrk() with surrogates");
533 if (u_strpbrk(testSurrogateString
, a
) != &testSurrogateString
[1]) {
534 log_err("u_strpbrk couldn't find first letter a.\n");
536 if (u_strpbrk(testSurrogateString
, dc
) != &testSurrogateString
[5]) {
537 log_err("u_strpbrk couldn't find d or c.\n");
539 if (u_strpbrk(testSurrogateString
, cd
) != &testSurrogateString
[5]) {
540 log_err("u_strpbrk couldn't find c or d.\n");
542 if (u_strpbrk(testSurrogateString
, cdh
) != &testSurrogateString
[5]) {
543 log_err("u_strpbrk couldn't find c, d or h.\n");
545 if (u_strpbrk(testSurrogateString
, f
) != NULL
) {
546 log_err("u_strpbrk didn't return NULL for \"f\".\n");
548 if (u_strpbrk(testSurrogateString
, fg
) != NULL
) {
549 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
551 if (u_strpbrk(testSurrogateString
, gf
) != NULL
) {
552 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
554 if (u_strpbrk(testSurrogateString
, surrMatchSet1
) != &testSurrogateString
[3]) {
555 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
557 if (u_strpbrk(testSurrogateString
, surrMatchSet2
) != &testSurrogateString
[1]) {
558 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
560 if (u_strpbrk(testSurrogateString
, surrMatchSet3
) != &testSurrogateString
[3]) {
561 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
563 if (u_strpbrk(testSurrogateString
, surrMatchSet4
) != NULL
) {
564 log_err("u_strpbrk should have returned NULL for empty string.\n");
566 if (u_strpbrk(testSurrogateString
, surrMatchSetBad
) != &testSurrogateString
[0]) {
567 log_err("u_strpbrk should have found bad surrogate.\n");
570 log_verbose("Testing u_strcspn()");
572 if (u_strcspn(testString
, a
) != 0) {
573 log_err("u_strcspn couldn't find first letter a.\n");
575 if (u_strcspn(testString
, dc
) != 2) {
576 log_err("u_strcspn couldn't find d or c.\n");
578 if (u_strcspn(testString
, cd
) != 2) {
579 log_err("u_strcspn couldn't find c or d.\n");
581 if (u_strcspn(testString
, cdh
) != 2) {
582 log_err("u_strcspn couldn't find c, d or h.\n");
584 if (u_strcspn(testString
, f
) != u_strlen(testString
)) {
585 log_err("u_strcspn didn't return NULL for \"f\".\n");
587 if (u_strcspn(testString
, fg
) != u_strlen(testString
)) {
588 log_err("u_strcspn didn't return NULL for \"fg\".\n");
590 if (u_strcspn(testString
, gf
) != u_strlen(testString
)) {
591 log_err("u_strcspn didn't return NULL for \"gf\".\n");
594 log_verbose("Testing u_strcspn() with surrogates");
596 if (u_strcspn(testSurrogateString
, a
) != 1) {
597 log_err("u_strcspn couldn't find first letter a.\n");
599 if (u_strcspn(testSurrogateString
, dc
) != 5) {
600 log_err("u_strcspn couldn't find d or c.\n");
602 if (u_strcspn(testSurrogateString
, cd
) != 5) {
603 log_err("u_strcspn couldn't find c or d.\n");
605 if (u_strcspn(testSurrogateString
, cdh
) != 5) {
606 log_err("u_strcspn couldn't find c, d or h.\n");
608 if (u_strcspn(testSurrogateString
, f
) != u_strlen(testSurrogateString
)) {
609 log_err("u_strcspn didn't return NULL for \"f\".\n");
611 if (u_strcspn(testSurrogateString
, fg
) != u_strlen(testSurrogateString
)) {
612 log_err("u_strcspn didn't return NULL for \"fg\".\n");
614 if (u_strcspn(testSurrogateString
, gf
) != u_strlen(testSurrogateString
)) {
615 log_err("u_strcspn didn't return NULL for \"gf\".\n");
617 if (u_strcspn(testSurrogateString
, surrMatchSet1
) != 3) {
618 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
620 if (u_strcspn(testSurrogateString
, surrMatchSet2
) != 1) {
621 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
623 if (u_strcspn(testSurrogateString
, surrMatchSet3
) != 3) {
624 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
626 if (u_strcspn(testSurrogateString
, surrMatchSet4
) != u_strlen(testSurrogateString
)) {
627 log_err("u_strcspn should have returned strlen for empty string.\n");
631 log_verbose("Testing u_strspn()");
633 if (u_strspn(testString
, a
) != 1) {
634 log_err("u_strspn couldn't skip first letter a.\n");
636 if (u_strspn(testString
, ab
) != 2) {
637 log_err("u_strspn couldn't skip a or b.\n");
639 if (u_strspn(testString
, ba
) != 2) {
640 log_err("u_strspn couldn't skip a or b.\n");
642 if (u_strspn(testString
, f
) != 0) {
643 log_err("u_strspn didn't return 0 for \"f\".\n");
645 if (u_strspn(testString
, dc
) != 0) {
646 log_err("u_strspn couldn't find first letter a (skip d or c).\n");
648 if (u_strspn(testString
, abcd
) != u_strlen(testString
)) {
649 log_err("u_strspn couldn't skip over the whole string.\n");
651 if (u_strspn(testString
, empty
) != 0) {
652 log_err("u_strspn should have returned 0 for empty string.\n");
655 log_verbose("Testing u_strspn() with surrogates");
656 if (u_strspn(testSurrogateString
, surrMatchSetBad
) != 2) {
657 log_err("u_strspn couldn't skip 0xdbff or a.\n");
659 if (u_strspn(testSurrogateString
, surrMatchSetBad2
) != 2) {
660 log_err("u_strspn couldn't skip 0xdbff or a.\n");
662 if (u_strspn(testSurrogateString
, f
) != 0) {
663 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
665 if (u_strspn(testSurrogateString
, dc
) != 0) {
666 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
668 if (u_strspn(testSurrogateString
, cd
) != 0) {
669 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
671 if (u_strspn(testSurrogateString
, testSurrogateString
) != u_strlen(testSurrogateString
)) {
672 log_err("u_strspn couldn't skip whole string.\n");
674 if (u_strspn(testSurrogateString
, surrMatchSet1
) != 0) {
675 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
677 if (u_strspn(testSurrogateString
, surrMatchSetBad3
) != 5) {
678 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
680 if (u_strspn(testSurrogateString
, surrMatchSet4
) != 0) {
681 log_err("u_strspn should have returned 0 for empty string.\n");
686 * All binary Unicode string searches should behave the same for equivalent input.
687 * See Jitterbug 2145.
688 * There are some new functions, too - just test them all.
691 TestSurrogateSearching() {
692 static const UChar s
[]={
693 /* 0 1 2 3 4 5 6 7 8 9 10 11 */
694 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
714 static const UChar a
=0x61, b
=0x62, lead
=0xd801, trail
=0xdc02, nul
=0;
715 static const UChar32 supp
=0x10402, supp2
=0x10403, ill
=0x123456;
717 const UChar
*first
, *last
;
719 /* search for NUL code point: find end of string */
723 first
!=u_strchr(s
, nul
) ||
724 first
!=u_strchr32(s
, nul
) ||
725 first
!=u_memchr(s
, nul
, UPRV_LENGTHOF(s
)) ||
726 first
!=u_memchr32(s
, nul
, UPRV_LENGTHOF(s
)) ||
727 first
!=u_strrchr(s
, nul
) ||
728 first
!=u_strrchr32(s
, nul
) ||
729 first
!=u_memrchr(s
, nul
, UPRV_LENGTHOF(s
)) ||
730 first
!=u_memrchr32(s
, nul
, UPRV_LENGTHOF(s
))
732 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
735 /* search for empty substring: find beginning of string */
737 s
!=u_strstr(s
, &nul
) ||
738 s
!=u_strFindFirst(s
, -1, &nul
, -1) ||
739 s
!=u_strFindFirst(s
, -1, &nul
, 0) ||
740 s
!=u_strFindFirst(s
, UPRV_LENGTHOF(s
), &nul
, -1) ||
741 s
!=u_strFindFirst(s
, UPRV_LENGTHOF(s
), &nul
, 0) ||
742 s
!=u_strrstr(s
, &nul
) ||
743 s
!=u_strFindLast(s
, -1, &nul
, -1) ||
744 s
!=u_strFindLast(s
, -1, &nul
, 0) ||
745 s
!=u_strFindLast(s
, UPRV_LENGTHOF(s
), &nul
, -1) ||
746 s
!=u_strFindLast(s
, UPRV_LENGTHOF(s
), &nul
, 0)
748 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
751 /* find 'a' in s[1..10[ */
755 first
!=u_strchr(s
+1, a
) ||
756 first
!=u_strchr32(s
+1, a
) ||
757 first
!=u_memchr(s
+1, a
, 9) ||
758 first
!=u_memchr32(s
+1, a
, 9) ||
759 first
!=u_strstr(s
+1, sub_a
) ||
760 first
!=u_strFindFirst(s
+1, -1, sub_a
, -1) ||
761 first
!=u_strFindFirst(s
+1, -1, &a
, 1) ||
762 first
!=u_strFindFirst(s
+1, 9, sub_a
, -1) ||
763 first
!=u_strFindFirst(s
+1, 9, &a
, 1) ||
764 (s
+10)!=u_strrchr(s
+1, a
) ||
765 (s
+10)!=u_strrchr32(s
+1, a
) ||
766 last
!=u_memrchr(s
+1, a
, 9) ||
767 last
!=u_memrchr32(s
+1, a
, 9) ||
768 (s
+10)!=u_strrstr(s
+1, sub_a
) ||
769 (s
+10)!=u_strFindLast(s
+1, -1, sub_a
, -1) ||
770 (s
+10)!=u_strFindLast(s
+1, -1, &a
, 1) ||
771 last
!=u_strFindLast(s
+1, 9, sub_a
, -1) ||
772 last
!=u_strFindLast(s
+1, 9, &a
, 1)
774 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
777 /* do not find 'b' in s[1..10[ */
779 NULL
!=u_strchr(s
+1, b
) ||
780 NULL
!=u_strchr32(s
+1, b
) ||
781 NULL
!=u_memchr(s
+1, b
, 9) ||
782 NULL
!=u_memchr32(s
+1, b
, 9) ||
783 NULL
!=u_strstr(s
+1, sub_b
) ||
784 NULL
!=u_strFindFirst(s
+1, -1, sub_b
, -1) ||
785 NULL
!=u_strFindFirst(s
+1, -1, &b
, 1) ||
786 NULL
!=u_strFindFirst(s
+1, 9, sub_b
, -1) ||
787 NULL
!=u_strFindFirst(s
+1, 9, &b
, 1) ||
788 NULL
!=u_strrchr(s
+1, b
) ||
789 NULL
!=u_strrchr32(s
+1, b
) ||
790 NULL
!=u_memrchr(s
+1, b
, 9) ||
791 NULL
!=u_memrchr32(s
+1, b
, 9) ||
792 NULL
!=u_strrstr(s
+1, sub_b
) ||
793 NULL
!=u_strFindLast(s
+1, -1, sub_b
, -1) ||
794 NULL
!=u_strFindLast(s
+1, -1, &b
, 1) ||
795 NULL
!=u_strFindLast(s
+1, 9, sub_b
, -1) ||
796 NULL
!=u_strFindLast(s
+1, 9, &b
, 1)
798 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
801 /* do not find a non-code point in s[1..10[ */
803 NULL
!=u_strchr32(s
+1, ill
) ||
804 NULL
!=u_memchr32(s
+1, ill
, 9) ||
805 NULL
!=u_strrchr32(s
+1, ill
) ||
806 NULL
!=u_memrchr32(s
+1, ill
, 9)
808 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
811 /* find U+d801 in s[1..10[ */
814 first
!=u_strchr(s
+1, lead
) ||
815 first
!=u_strchr32(s
+1, lead
) ||
816 first
!=u_memchr(s
+1, lead
, 9) ||
817 first
!=u_memchr32(s
+1, lead
, 9) ||
818 first
!=u_strstr(s
+1, sub_lead
) ||
819 first
!=u_strFindFirst(s
+1, -1, sub_lead
, -1) ||
820 first
!=u_strFindFirst(s
+1, -1, &lead
, 1) ||
821 first
!=u_strFindFirst(s
+1, 9, sub_lead
, -1) ||
822 first
!=u_strFindFirst(s
+1, 9, &lead
, 1) ||
823 first
!=u_strrchr(s
+1, lead
) ||
824 first
!=u_strrchr32(s
+1, lead
) ||
825 first
!=u_memrchr(s
+1, lead
, 9) ||
826 first
!=u_memrchr32(s
+1, lead
, 9) ||
827 first
!=u_strrstr(s
+1, sub_lead
) ||
828 first
!=u_strFindLast(s
+1, -1, sub_lead
, -1) ||
829 first
!=u_strFindLast(s
+1, -1, &lead
, 1) ||
830 first
!=u_strFindLast(s
+1, 9, sub_lead
, -1) ||
831 first
!=u_strFindLast(s
+1, 9, &lead
, 1)
833 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
836 /* find U+dc02 in s[1..10[ */
839 first
!=u_strchr(s
+1, trail
) ||
840 first
!=u_strchr32(s
+1, trail
) ||
841 first
!=u_memchr(s
+1, trail
, 9) ||
842 first
!=u_memchr32(s
+1, trail
, 9) ||
843 first
!=u_strstr(s
+1, sub_trail
) ||
844 first
!=u_strFindFirst(s
+1, -1, sub_trail
, -1) ||
845 first
!=u_strFindFirst(s
+1, -1, &trail
, 1) ||
846 first
!=u_strFindFirst(s
+1, 9, sub_trail
, -1) ||
847 first
!=u_strFindFirst(s
+1, 9, &trail
, 1) ||
848 first
!=u_strrchr(s
+1, trail
) ||
849 first
!=u_strrchr32(s
+1, trail
) ||
850 first
!=u_memrchr(s
+1, trail
, 9) ||
851 first
!=u_memrchr32(s
+1, trail
, 9) ||
852 first
!=u_strrstr(s
+1, sub_trail
) ||
853 first
!=u_strFindLast(s
+1, -1, sub_trail
, -1) ||
854 first
!=u_strFindLast(s
+1, -1, &trail
, 1) ||
855 first
!=u_strFindLast(s
+1, 9, sub_trail
, -1) ||
856 first
!=u_strFindLast(s
+1, 9, &trail
, 1)
858 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
861 /* find U+10402 in s[1..10[ */
865 first
!=u_strchr32(s
+1, supp
) ||
866 first
!=u_memchr32(s
+1, supp
, 9) ||
867 first
!=u_strstr(s
+1, sub_supp
) ||
868 first
!=u_strFindFirst(s
+1, -1, sub_supp
, -1) ||
869 first
!=u_strFindFirst(s
+1, -1, sub_supp
, 2) ||
870 first
!=u_strFindFirst(s
+1, 9, sub_supp
, -1) ||
871 first
!=u_strFindFirst(s
+1, 9, sub_supp
, 2) ||
872 last
!=u_strrchr32(s
+1, supp
) ||
873 last
!=u_memrchr32(s
+1, supp
, 9) ||
874 last
!=u_strrstr(s
+1, sub_supp
) ||
875 last
!=u_strFindLast(s
+1, -1, sub_supp
, -1) ||
876 last
!=u_strFindLast(s
+1, -1, sub_supp
, 2) ||
877 last
!=u_strFindLast(s
+1, 9, sub_supp
, -1) ||
878 last
!=u_strFindLast(s
+1, 9, sub_supp
, 2)
880 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
883 /* do not find U+10402 in a single UChar */
885 NULL
!=u_memchr32(s
+1, supp
, 1) ||
886 NULL
!=u_strFindFirst(s
+1, 1, sub_supp
, -1) ||
887 NULL
!=u_strFindFirst(s
+1, 1, sub_supp
, 2) ||
888 NULL
!=u_memrchr32(s
+1, supp
, 1) ||
889 NULL
!=u_strFindLast(s
+1, 1, sub_supp
, -1) ||
890 NULL
!=u_strFindLast(s
+1, 1, sub_supp
, 2) ||
891 NULL
!=u_memrchr32(s
+2, supp
, 1) ||
892 NULL
!=u_strFindLast(s
+2, 1, sub_supp
, -1) ||
893 NULL
!=u_strFindLast(s
+2, 1, sub_supp
, 2)
895 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
898 /* do not find U+10403 in s[1..10[ */
900 NULL
!=u_strchr32(s
+1, supp2
) ||
901 NULL
!=u_memchr32(s
+1, supp2
, 9) ||
902 NULL
!=u_strstr(s
+1, sub_supp2
) ||
903 NULL
!=u_strFindFirst(s
+1, -1, sub_supp2
, -1) ||
904 NULL
!=u_strFindFirst(s
+1, -1, sub_supp2
, 2) ||
905 NULL
!=u_strFindFirst(s
+1, 9, sub_supp2
, -1) ||
906 NULL
!=u_strFindFirst(s
+1, 9, sub_supp2
, 2) ||
907 NULL
!=u_strrchr32(s
+1, supp2
) ||
908 NULL
!=u_memrchr32(s
+1, supp2
, 9) ||
909 NULL
!=u_strrstr(s
+1, sub_supp2
) ||
910 NULL
!=u_strFindLast(s
+1, -1, sub_supp2
, -1) ||
911 NULL
!=u_strFindLast(s
+1, -1, sub_supp2
, 2) ||
912 NULL
!=u_strFindLast(s
+1, 9, sub_supp2
, -1) ||
913 NULL
!=u_strFindLast(s
+1, 9, sub_supp2
, 2)
915 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
918 /* find <0061 d801> in s[1..10[ */
921 first
!=u_strstr(s
+1, sub_a_lead
) ||
922 first
!=u_strFindFirst(s
+1, -1, sub_a_lead
, -1) ||
923 first
!=u_strFindFirst(s
+1, -1, sub_a_lead
, 2) ||
924 first
!=u_strFindFirst(s
+1, 9, sub_a_lead
, -1) ||
925 first
!=u_strFindFirst(s
+1, 9, sub_a_lead
, 2) ||
926 first
!=u_strrstr(s
+1, sub_a_lead
) ||
927 first
!=u_strFindLast(s
+1, -1, sub_a_lead
, -1) ||
928 first
!=u_strFindLast(s
+1, -1, sub_a_lead
, 2) ||
929 first
!=u_strFindLast(s
+1, 9, sub_a_lead
, -1) ||
930 first
!=u_strFindLast(s
+1, 9, sub_a_lead
, 2)
932 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
935 /* find <dc02 0061> in s[1..10[ */
938 first
!=u_strstr(s
+1, sub_trail_a
) ||
939 first
!=u_strFindFirst(s
+1, -1, sub_trail_a
, -1) ||
940 first
!=u_strFindFirst(s
+1, -1, sub_trail_a
, 2) ||
941 first
!=u_strFindFirst(s
+1, 9, sub_trail_a
, -1) ||
942 first
!=u_strFindFirst(s
+1, 9, sub_trail_a
, 2) ||
943 first
!=u_strrstr(s
+1, sub_trail_a
) ||
944 first
!=u_strFindLast(s
+1, -1, sub_trail_a
, -1) ||
945 first
!=u_strFindLast(s
+1, -1, sub_trail_a
, 2) ||
946 first
!=u_strFindLast(s
+1, 9, sub_trail_a
, -1) ||
947 first
!=u_strFindLast(s
+1, 9, sub_trail_a
, 2)
949 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
952 /* do not find "aba" in s[1..10[ */
954 NULL
!=u_strstr(s
+1, sub_aba
) ||
955 NULL
!=u_strFindFirst(s
+1, -1, sub_aba
, -1) ||
956 NULL
!=u_strFindFirst(s
+1, -1, sub_aba
, 3) ||
957 NULL
!=u_strFindFirst(s
+1, 9, sub_aba
, -1) ||
958 NULL
!=u_strFindFirst(s
+1, 9, sub_aba
, 3) ||
959 NULL
!=u_strrstr(s
+1, sub_aba
) ||
960 NULL
!=u_strFindLast(s
+1, -1, sub_aba
, -1) ||
961 NULL
!=u_strFindLast(s
+1, -1, sub_aba
, 3) ||
962 NULL
!=u_strFindLast(s
+1, 9, sub_aba
, -1) ||
963 NULL
!=u_strFindLast(s
+1, 9, sub_aba
, 3)
965 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
969 static void TestStringCopy()
974 UChar uchars
[]={0x61, 0x62, 0x63, 0x00};
976 char chars
[]="abc"; /* needs default codepage */
978 log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
980 u_uastrcpy(temp
, "abc");
981 if(u_strcmp(temp
, uchars
) != 0) {
982 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars
), austrdup(temp
));
985 temp
[0] = 0xFB; /* load garbage into it */
990 u_uastrncpy(temp
, "abcabcabc", 3);
991 if(u_strncmp(uchars
, temp
, 3) != 0){
992 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars
), austrdup(temp
));
994 if(temp
[3] != 0xFB) {
995 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
998 charOut
[0] = (char)0x7B; /* load garbage into it */
999 charOut
[1] = (char)0x7B;
1000 charOut
[2] = (char)0x7B;
1001 charOut
[3] = (char)0x7B;
1011 u_austrncpy(charOut
, temp
, 3);
1012 if(strncmp(chars
, charOut
, 3) != 0){
1013 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars
), austrdup(temp
));
1015 if(charOut
[3] != (char)0x7B) {
1016 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1019 /*Testing u_strchr()*/
1020 log_verbose("Testing u_strchr\n");
1029 result
=u_strchr(temp
, (UChar
)0x62);
1030 if(result
!= temp
+1){
1031 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1033 /*Testing u_strstr()*/
1034 log_verbose("Testing u_strstr\n");
1038 result
=u_strstr(temp
, subString
);
1039 if(result
!= temp
+2){
1040 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1042 result
=u_strstr(temp
, subString
+2); /* subString+2 is an empty string */
1044 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1046 result
=u_strstr(subString
, temp
);
1048 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1051 /*Testing u_strchr32*/
1052 log_verbose("Testing u_strchr32\n");
1053 result
=u_strchr32(temp
, (UChar32
)0x62);
1054 if(result
!= temp
+1){
1055 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1057 result
=u_strchr32(temp
, (UChar32
)0xfb);
1059 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1061 result
=u_strchr32(temp
, (UChar32
)0x20402);
1062 if(result
!= temp
+5){
1063 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1067 result
=u_memchr32(temp
, (UChar32
)0x20402, 7);
1068 if(result
!= temp
+5){
1069 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1071 result
=u_memchr32(temp
, (UChar32
)0x20402, 6);
1073 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1075 result
=u_memchr32(temp
, (UChar32
)0x20402, 1);
1077 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1079 result
=u_memchr32(temp
, (UChar32
)0xfc00, 8);
1080 if(result
!= temp
+7){
1081 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result
-temp
, result
);
1085 /* test u_unescape() and u_unescapeAt() ------------------------------------- */
1089 static UChar buffer
[200];
1091 static const char* input
=
1092 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1094 static const UChar expect
[]={
1095 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1096 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1097 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1098 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1100 static const int32_t explength
= UPRV_LENGTHOF(expect
)-1;
1103 /* test u_unescape() */
1104 length
=u_unescape(input
, buffer
, UPRV_LENGTHOF(buffer
));
1105 if(length
!=explength
|| u_strcmp(buffer
, expect
)!=0) {
1106 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length
,
1110 /* try preflighting */
1111 length
=u_unescape(input
, NULL
, UPRV_LENGTHOF(buffer
));
1112 if(length
!=explength
|| u_strcmp(buffer
, expect
)!=0) {
1113 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length
, explength
);
1116 /* ### TODO: test u_unescapeAt() */
1119 /* test code point counting functions --------------------------------------- */
1121 /* reference implementation of u_strHasMoreChar32Than() */
1123 _refStrHasMoreChar32Than(const UChar
*s
, int32_t length
, int32_t number
) {
1124 int32_t count
=u_countChar32(s
, length
);
1125 return count
>number
;
1128 /* compare the real function against the reference */
1130 _testStrHasMoreChar32Than(const UChar
*s
, int32_t i
, int32_t length
, int32_t number
) {
1131 if(u_strHasMoreChar32Than(s
, length
, number
)!=_refStrHasMoreChar32Than(s
, length
, number
)) {
1132 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1133 i
, length
, number
, u_strHasMoreChar32Than(s
, length
, number
));
1139 static const UChar string
[]={
1140 0x61, 0x62, 0xd800, 0xdc00,
1141 0xd801, 0xdc01, 0x63, 0xd802,
1142 0x64, 0xdc03, 0x65, 0x66,
1143 0xd804, 0xdc04, 0xd805, 0xdc05,
1147 int32_t i
, length
, number
;
1149 /* test u_strHasMoreChar32Than() with length>=0 */
1150 length
=UPRV_LENGTHOF(string
);
1152 for(i
=0; i
<=length
; ++i
) {
1153 for(number
=-1; number
<=((length
-i
)+2); ++number
) {
1154 _testStrHasMoreChar32Than(string
+i
, i
, length
-i
, number
);
1160 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1161 length
=UPRV_LENGTHOF(string
);
1162 u_memcpy(buffer
, string
, length
);
1165 for(i
=0; i
<=length
; ++i
) {
1166 for(number
=-1; number
<=((length
-i
)+2); ++number
) {
1167 _testStrHasMoreChar32Than(buffer
+i
, i
, -1, number
);
1173 /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1174 for(length
=-1; length
<=1; ++length
) {
1175 for(i
=0; i
<=length
; ++i
) {
1176 for(number
=-2; number
<=2; ++number
) {
1177 _testStrHasMoreChar32Than(NULL
, 0, length
, number
);
1183 /* UCharIterator ------------------------------------------------------------ */
1186 * Compare results from two iterators, should be same.
1187 * Assume that the text is not empty and that
1188 * iteration start==0 and iteration limit==length.
1191 compareIterators(UCharIterator
*iter1
, const char *n1
,
1192 UCharIterator
*iter2
, const char *n2
) {
1193 int32_t i
, pos1
, pos2
, middle
, length
;
1196 /* compare lengths */
1197 length
=iter1
->getIndex(iter1
, UITER_LENGTH
);
1198 pos2
=iter2
->getIndex(iter2
, UITER_LENGTH
);
1200 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1
, length
, pos2
, n2
);
1204 /* set into the middle */
1207 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
1209 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
1213 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
1215 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
1219 /* test current() */
1220 c1
=iter1
->current(iter1
);
1221 c2
=iter2
->current(iter2
);
1223 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1
, c1
, c2
, n2
, middle
);
1227 /* move forward 3 UChars */
1228 for(i
=0; i
<3; ++i
) {
1229 c1
=iter1
->next(iter1
);
1230 c2
=iter2
->next(iter2
);
1232 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
1237 /* move backward 5 UChars */
1238 for(i
=0; i
<5; ++i
) {
1239 c1
=iter1
->previous(iter1
);
1240 c2
=iter2
->previous(iter2
);
1242 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
1247 /* iterate forward from the beginning */
1248 pos1
=iter1
->move(iter1
, 0, UITER_START
);
1250 log_err("%s->move(start) failed\n", n1
);
1253 if(!iter1
->hasNext(iter1
)) {
1254 log_err("%s->hasNext() at the start returns FALSE\n", n1
);
1258 pos2
=iter2
->move(iter2
, 0, UITER_START
);
1260 log_err("%s->move(start) failed\n", n2
);
1263 if(!iter2
->hasNext(iter2
)) {
1264 log_err("%s->hasNext() at the start returns FALSE\n", n2
);
1269 c1
=iter1
->next(iter1
);
1270 c2
=iter2
->next(iter2
);
1272 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
1277 if(iter1
->hasNext(iter1
)) {
1278 log_err("%s->hasNext() at the end returns TRUE\n", n1
);
1281 if(iter2
->hasNext(iter2
)) {
1282 log_err("%s->hasNext() at the end returns TRUE\n", n2
);
1286 /* back to the middle */
1287 pos1
=iter1
->move(iter1
, middle
, UITER_ZERO
);
1289 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1
, middle
, pos1
);
1293 pos2
=iter2
->move(iter2
, middle
, UITER_ZERO
);
1295 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2
, middle
, pos2
);
1299 /* move to index 1 */
1300 pos1
=iter1
->move(iter1
, 1, UITER_ZERO
);
1302 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1
, middle
, pos1
);
1306 pos2
=iter2
->move(iter2
, 1, UITER_ZERO
);
1308 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2
, middle
, pos2
);
1312 /* iterate backward from the end */
1313 pos1
=iter1
->move(iter1
, 0, UITER_LIMIT
);
1315 log_err("%s->move(limit) failed\n", n1
);
1318 if(!iter1
->hasPrevious(iter1
)) {
1319 log_err("%s->hasPrevious() at the end returns FALSE\n", n1
);
1323 pos2
=iter2
->move(iter2
, 0, UITER_LIMIT
);
1325 log_err("%s->move(limit) failed\n", n2
);
1328 if(!iter2
->hasPrevious(iter2
)) {
1329 log_err("%s->hasPrevious() at the end returns FALSE\n", n2
);
1334 c1
=iter1
->previous(iter1
);
1335 c2
=iter2
->previous(iter2
);
1337 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1
, c1
, c2
, n2
, iter1
->getIndex(iter1
, UITER_CURRENT
));
1342 if(iter1
->hasPrevious(iter1
)) {
1343 log_err("%s->hasPrevious() at the start returns TRUE\n", n1
);
1346 if(iter2
->hasPrevious(iter2
)) {
1347 log_err("%s->hasPrevious() at the start returns TRUE\n", n2
);
1353 * Test the iterator's getState() and setState() functions.
1354 * iter1 and iter2 must be set up for the same iterator type and the same string
1355 * but may be physically different structs (different addresses).
1357 * Assume that the text is not empty and that
1358 * iteration start==0 and iteration limit==length.
1359 * It must be 2<=middle<=length-2.
1362 testIteratorState(UCharIterator
*iter1
, UCharIterator
*iter2
, const char *n
, int32_t middle
) {
1365 UErrorCode errorCode
;
1370 /* get four UChars from the middle of the string */
1371 iter1
->move(iter1
, middle
-2, UITER_ZERO
);
1372 for(i
=0; i
<4; ++i
) {
1373 c
=iter1
->next(iter1
);
1375 /* the test violates the assumptions, see comment above */
1376 log_err("test error: %s[%d]=%d\n", n
, middle
-2+i
, c
);
1382 /* move to the middle and get the state */
1383 iter1
->move(iter1
, -2, UITER_CURRENT
);
1384 state
=uiter_getState(iter1
);
1386 /* set the state into the second iterator and compare the results */
1387 errorCode
=U_ZERO_ERROR
;
1388 uiter_setState(iter2
, state
, &errorCode
);
1389 if(U_FAILURE(errorCode
)) {
1390 log_err("%s->setState(0x%x) failed: %s\n", n
, state
, u_errorName(errorCode
));
1394 c
=iter2
->current(iter2
);
1396 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n
, middle
, c
, u
[2]);
1399 c
=iter2
->previous(iter2
);
1401 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-1, c
, u
[1]);
1404 iter2
->move(iter2
, 2, UITER_CURRENT
);
1405 c
=iter2
->next(iter2
);
1407 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n
, middle
+1, c
, u
[3]);
1410 iter2
->move(iter2
, -3, UITER_CURRENT
);
1411 c
=iter2
->previous(iter2
);
1413 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n
, middle
-2, c
, u
[0]);
1416 /* move the second iterator back to the middle */
1417 iter2
->move(iter2
, 1, UITER_CURRENT
);
1420 /* check that both are in the middle */
1421 i
=iter1
->getIndex(iter1
, UITER_CURRENT
);
1422 j
=iter2
->getIndex(iter2
, UITER_CURRENT
);
1424 log_err("%s->getIndex(current)=%d!=%d as expected\n", n
, i
, middle
);
1427 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n
, j
, i
);
1430 /* compare lengths */
1431 i
=iter1
->getIndex(iter1
, UITER_LENGTH
);
1432 j
=iter2
->getIndex(iter2
, UITER_LENGTH
);
1434 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n
, i
, j
);
1439 TestUCharIterator() {
1440 static const UChar text
[]={
1441 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1445 UCharIterator iter
, iter1
, iter2
;
1447 UErrorCode errorCode
;
1450 /* simple API/code coverage - test NOOP UCharIterator */
1451 uiter_setString(&iter
, NULL
, 0);
1452 if( iter
.current(&iter
)!=-1 || iter
.next(&iter
)!=-1 || iter
.previous(&iter
)!=-1 ||
1453 iter
.move(&iter
, 1, UITER_CURRENT
) || iter
.getIndex(&iter
, UITER_CURRENT
)!=0 ||
1454 iter
.hasNext(&iter
) || iter
.hasPrevious(&iter
)
1456 log_err("NOOP UCharIterator behaves unexpectedly\n");
1459 /* test get/set state */
1460 length
=UPRV_LENGTHOF(text
)-1;
1461 uiter_setString(&iter1
, text
, -1);
1462 uiter_setString(&iter2
, text
, length
);
1463 testIteratorState(&iter1
, &iter2
, "UTF16IteratorState", length
/2);
1464 testIteratorState(&iter1
, &iter2
, "UTF16IteratorStatePlus1", length
/2+1);
1466 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1467 errorCode
=U_ZERO_ERROR
;
1468 u_strToUTF8(bytes
, sizeof(bytes
), &length
, text
, -1, &errorCode
);
1469 if(U_FAILURE(errorCode
)) {
1470 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode
));
1474 uiter_setString(&iter1
, text
, -1);
1475 uiter_setUTF8(&iter2
, bytes
, length
);
1476 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF8Iterator");
1478 /* try again with length=-1 */
1479 uiter_setUTF8(&iter2
, bytes
, -1);
1480 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF8Iterator_1");
1482 /* test get/set state */
1483 length
=UPRV_LENGTHOF(text
)-1;
1484 uiter_setUTF8(&iter1
, bytes
, -1);
1485 testIteratorState(&iter1
, &iter2
, "UTF8IteratorState", length
/2);
1486 testIteratorState(&iter1
, &iter2
, "UTF8IteratorStatePlus1", length
/2+1);
1488 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1489 errorCode
=U_ZERO_ERROR
;
1490 cnv
=ucnv_open("UTF-16BE", &errorCode
);
1491 length
=ucnv_fromUChars(cnv
, bytes
, sizeof(bytes
), text
, -1, &errorCode
);
1493 if(U_FAILURE(errorCode
)) {
1494 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode
));
1498 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1499 bytes
[length
]=bytes
[length
+1]=0;
1501 uiter_setString(&iter1
, text
, -1);
1502 uiter_setUTF16BE(&iter2
, bytes
, length
);
1503 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF16BEIterator");
1505 /* try again with length=-1 */
1506 uiter_setUTF16BE(&iter2
, bytes
, -1);
1507 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF16BEIterator_1");
1509 /* try again after moving the bytes up one, and with length=-1 */
1510 memmove(bytes
+1, bytes
, length
+2);
1511 uiter_setUTF16BE(&iter2
, bytes
+1, -1);
1512 compareIterators(&iter1
, "UTF16Iterator", &iter2
, "UTF16BEIteratorMoved1");
1514 /* ### TODO test other iterators: CharacterIterator, Replaceable */
1517 static const UChar valid0
[] = { 0 }; // test empty string
1518 static const UChar valid1
[] = { 0x0061,0x270C,0xFE0E, // victory hand with text variation selector
1519 0x0062,0x270C,0xFE0F, // victory hand with emoji variation selector
1520 0x0063,0x270C,0xD83C,0xDFFD, // victory hand with skin tone modifier
1521 0x0064,0x270C,0xFE0F,0xD83C,0xDFFD, // victory hand with emoji variation selector and skin tone modifier (obsolete sequence)
1522 0x0065,0xD83D,0xDC69,0xD83C,0xDFFD,0x200D,0xD83C,0xDFEB, // woman teacher (ZWJ sequence) with skin tone
1523 0x0066,0xD83D,0xDC69,0x200D,0xD83D,0xDC69,0x200D,0xD83D,0xDC67, // family (woman, woman, girl - ZWJ sequence)
1524 0x0067,0x0030,0xFE0F,0x20E3, // keypad 0 (emoji combining seq)
1525 0x0068,0xD83C,0xDDEC,0xD83C,0xDDE7, // flag of UK (regional indicator pair)
1526 0x0069,0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0xDB40,0xDC7F, // flag of England (tag seq)
1528 static const UChar valid2
[] = { 0x006B,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,
1529 0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300, // 29 combining marks
1531 static const UChar valid3
[] = { // sample from Bill Siegrist, 100 UTF16 units
1532 0xD83D,0xDC2E, // U+1F42E 🐮
1533 0xD83D,0xDC3C, // U+1F43C 🐼
1534 0xD83D,0xDC39, // U+1F439 🐹
1535 0xD83D,0xDC31, // U+1F431 🐱
1536 0xD83D,0xDE4B,0x200D,0x2640,0xFE0F, // U+1F64B U+200D U+2640 U+FE0F 🙋♀️
1537 0xD83D,0xDE47,0xD83C,0xDFFC,0x200D,0x2642,0xFE0F, // U+1F647 U+1F3FC U+200D U+2642 U+FE0F 🙇🏼♂️
1538 0xD83D,0xDE46,0x200D,0x2642,0xFE0F, // U+1F646 U+200D U+2642 U+FE0F 🙆♂️
1539 0xD83E,0xDDDA,0xD83C,0xDFFF,0x200D,0x2640,0xFE0F, // U+1F9DA U+1F3FF U+200D U+2640 U+FE0F 🧚🏿♀️
1540 0xD83E,0xDDD6,0xD83C,0xDFFE,0x200D,0x2642,0xFE0F, // U+1F9D6 U+1F3FE U+200D U+2642 U+FE0F 🧖🏾♂️
1541 0xD83E,0xDDD6,0xD83C,0xDFFE,0x200D,0x2642,0xFE0F, // U+1F9D6 U+1F3FE U+200D U+2642 U+FE0F 🧖🏾♂️
1542 0xD83E,0xDDDB,0xD83C,0xDFFC,0x200D,0x2642,0xFE0F, // U+1F9DB U+1F3FC U+200D U+2642 U+FE0F 🧛🏼♂️
1543 0xD83E,0xDDD9,0x200D,0x2640,0xFE0F, // U+1F9D9 U+200D U+2640 U+FE0F 🧙♀️
1544 0xD83D,0xDC68,0xD83C,0xDFFE,0x200D,0x2696,0xFE0F, // U+1F468 U+1F3FE U+200D U+2696 U+FE0F 👨🏾⚖️
1545 0xD83D,0xDC69,0xD83C,0xDFFC,0x200D,0xD83D,0xDD27, // U+1F469 U+1F3FC U+200D U+1F527 👩🏼🔧
1546 0xD83D,0xDC69,0xD83C,0xDFFF,0x200D,0xD83C,0xDFEB, // U+1F469 U+1F3FF U+200D U+1F3EB 👩🏿🏫
1547 0xD83D,0xDC68,0xD83C,0xDFFE,0x200D,0xD83D,0xDCBB, // U+1F468 U+1F3FE U+200D U+1F4BB 👨🏾💻
1548 0xD83D,0xDC69,0xD83C,0xDFFD,0x200D,0xD83D,0xDD2C, // U+1F469 U+1F3FD U+200D U+1F52C 👩🏽🔬
1549 0xD83D,0xDC68,0xD83C,0xDFFC,0x200D,0xD83D,0xDE92, // U+1F468 U+1F3FC U+200D U+1F692 👨🏼🚒
1551 static const UChar valid4
[] = { 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
1552 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
1553 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
1554 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
1555 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
1556 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
1557 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 112
1558 0x0061,0x2066,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x2069, // to level 122 in LRI then pop to 112
1559 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066, // to level 122 again
1560 0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C, // pop to level 90
1561 0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C, // pop to level 58
1562 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 74
1563 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 90
1564 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 106
1566 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
1567 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
1568 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
1569 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
1570 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
1571 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
1574 static const UChar malformed1
[] = { 0x0061,0xFFFF,0 }; // non-character (BMP)
1575 static const UChar malformed2
[] = { 0x0062,0xD87F,0xDFFE,0 }; // non-character (supplemental)
1576 static const UChar malformed3
[] = { 0x0063,0xD7FC,0 }; // unassigned
1577 static const UChar malformed4
[] = { 0x0064,0xD800,0 }; // unpaired high surrogate
1578 static const UChar malformed5
[] = { 0x0065,0xDC00,0 }; // unpaired low surrogate
1579 static const UChar malformed6
[] = { 0x0066,0xFE0F,0 }; // emoji variation selector on non-emoji
1580 static const UChar malformed7
[] = { 0x0067,0xDB40,0xDC67,0xDB40,0xDC7F,0 }; // tag sequence on non-emoji
1581 static const UChar malformed8
[] = { 0xDB40,0xDC67,0xDB40,0xDC7F,0 }; // tag sequence with no base
1582 static const UChar malformed9
[] = { 0x0068,0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0x0069,0 }; // tag sequence with no term
1583 static const UChar malformedA
[] = { 0x006A,0xD83C,0xDFF4,0xDB40,0xDC7F,0 }; // tag sequence with no tag_spec, just term
1584 static const UChar malformedB
[] = { 0x006B,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,
1585 0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300, // 31 combining marks
1587 static const UChar malformedC
[] = { 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
1588 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
1589 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
1590 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
1591 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
1592 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
1593 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 112
1594 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 128 (error)
1595 0x0061,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0 }; // start PDFs, too late
1598 const char* descrip
;
1599 const UChar
* string
;
1603 static const StringAndResult wellFormedTests
[] = {
1604 { "valid0", valid0
, TRUE
},
1605 { "valid1", valid1
, TRUE
},
1606 { "valid2", valid2
, TRUE
},
1607 { "valid3", valid3
, TRUE
},
1608 { "valid4", valid4
, TRUE
},
1609 { "malformed1", malformed1
, FALSE
},
1610 { "malformed2", malformed2
, FALSE
},
1611 { "malformed3", malformed3
, FALSE
},
1612 { "malformed4", malformed4
, FALSE
},
1613 { "malformed5", malformed5
, FALSE
},
1614 { "malformed6", malformed6
, FALSE
},
1615 { "malformed7", malformed7
, FALSE
},
1616 { "malformed8", malformed8
, FALSE
},
1617 { "malformed9", malformed9
, FALSE
},
1618 { "malformedA", malformedA
, FALSE
},
1619 { "malformedB", malformedB
, FALSE
},
1620 { "malformedC", malformedC
, FALSE
},
1625 TestIsWellFormed() {
1626 const StringAndResult
* testPtr
;
1627 for (testPtr
= wellFormedTests
; testPtr
->descrip
!= NULL
; testPtr
++) {
1628 UBool result
= u_strIsWellFormed(testPtr
->string
, -1);
1629 if (result
!= testPtr
->result
) {
1630 log_err("test %s with length -1, expected %d, got %d\n", testPtr
->descrip
, testPtr
->result
, result
);
1633 int32_t length
= u_strlen(testPtr
->string
);
1634 result
= u_strIsWellFormed(testPtr
->string
, length
);
1635 if (result
!= testPtr
->result
) {
1636 log_err("test %s with length %d, expected %d, got %d\n", testPtr
->descrip
, length
, testPtr
->result
, result
);