]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/custrtst.c
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / custrtst.c
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4******************************************************************************
5*
2ca993e8 6* Copyright (C) 2002-2016, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10* file name: custrtst.c
f3c0d7a5 11* encoding: UTF-8
b75a7d8f
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002oct09
16* created by: Markus W. Scherer
17*
18* Tests of ustring.h Unicode string API functions.
19*/
20
b75a7d8f 21#include "unicode/ustring.h"
b75a7d8f
A
22#include "unicode/ucnv.h"
23#include "unicode/uiter.h"
24#include "cintltst.h"
b331163b 25#include "cmemory.h"
b75a7d8f 26#include <string.h>
b75a7d8f 27
b75a7d8f
A
28/* get the sign of an integer */
29#define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
30
31/* test setup --------------------------------------------------------------- */
32
33static void setUpDataTable(void);
34static void TestStringCopy(void);
35static void TestStringFunctions(void);
36static void TestStringSearching(void);
37static void TestSurrogateSearching(void);
38static void TestUnescape(void);
39static void TestCountChar32(void);
40static void TestUCharIterator(void);
0f5d89e8 41static void TestIsWellFormed(void);
b75a7d8f
A
42
43void addUStringTest(TestNode** root);
44
45void addUStringTest(TestNode** root)
46{
47 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
48 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
49 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
50 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
51 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
52 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
53 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
0f5d89e8 54 addTest(root, &TestIsWellFormed, "tsutil/custrtst/TestIsWellFormed");
b75a7d8f
A
55}
56
57/* test data for TestStringFunctions ---------------------------------------- */
58
59UChar*** dataTable = NULL;
60
61static const char* raw[3][4] = {
62
63 /* First String */
64 { "English_", "French_", "Croatian_", "English_"},
65 /* Second String */
66 { "United States", "France", "Croatia", "Unites States"},
67
68 /* Concatenated string */
69 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
70};
71
72static void setUpDataTable()
73{
74 int32_t i,j;
75 if(dataTable == NULL) {
76 dataTable = (UChar***)calloc(sizeof(UChar**),3);
77
78 for (i = 0; i < 3; i++) {
79 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
80 for (j = 0; j < 4; j++){
81 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
82 u_uastrcpy(dataTable[i][j],raw[i][j]);
83 }
84 }
85 }
86}
87
88static void cleanUpDataTable()
89{
90 int32_t i,j;
91 if(dataTable != NULL) {
92 for (i=0; i<3; i++) {
93 for(j = 0; j<4; j++) {
94 free(dataTable[i][j]);
95 }
96 free(dataTable[i]);
97 }
98 free(dataTable);
99 }
100 dataTable = NULL;
101}
102
103/*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
104static void TestStringFunctions()
105{
106 int32_t i,j,k;
107 UChar temp[512];
108 UChar nullTemp[512];
109 char test[512];
110 char tempOut[512];
111
112 setUpDataTable();
113
114 log_verbose("Testing u_strlen()\n");
115 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
116 log_err("There is an error in u_strlen()");
117
118 log_verbose("Testing u_memcpy() and u_memcmp()\n");
119
120 for(i=0;i<3;++i)
121 {
122 for(j=0;j<4;++j)
123 {
124 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
125 temp[0] = 0;
126 temp[7] = 0xA4; /* Mark the end */
127 u_memcpy(temp,dataTable[i][j], 7);
128
129 if(temp[7] != 0xA4)
130 log_err("an error occured in u_memcpy()\n");
131 if(u_memcmp(temp, dataTable[i][j], 7)!=0)
132 log_err("an error occured in u_memcpy() or u_memcmp()\n");
133 }
134 }
135 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
136 log_err("an error occured in u_memcmp()\n");
137
138 log_verbose("Testing u_memset()\n");
139 nullTemp[0] = 0;
140 nullTemp[7] = 0;
141 u_memset(nullTemp, 0xa4, 7);
142 for (i = 0; i < 7; i++) {
143 if(nullTemp[i] != 0xa4) {
144 log_err("an error occured in u_memset()\n");
145 }
146 }
147 if(nullTemp[7] != 0) {
148 log_err("u_memset() went too far\n");
149 }
150
151 u_memset(nullTemp, 0, 7);
152 nullTemp[7] = 0xa4;
153 temp[7] = 0;
154 u_memcpy(temp,nullTemp, 7);
155 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
156 log_err("an error occured in u_memcpy() or u_memcmp()\n");
157
158
159 log_verbose("Testing u_memmove()\n");
160 for (i = 0; i < 7; i++) {
161 temp[i] = (UChar)i;
162 }
163 u_memmove(temp + 1, temp, 7);
164 if(temp[0] != 0) {
165 log_err("an error occured in u_memmove()\n");
166 }
167 for (i = 1; i <= 7; i++) {
168 if(temp[i] != (i - 1)) {
169 log_err("an error occured in u_memmove()\n");
170 }
171 }
172
173 log_verbose("Testing u_strcpy() and u_strcmp()\n");
174
175 for(i=0;i<3;++i)
176 {
177 for(j=0;j<4;++j)
178 {
179 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
180 temp[0] = 0;
181 u_strcpy(temp,dataTable[i][j]);
182
183 if(u_strcmp(temp,dataTable[i][j])!=0)
184 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
185 }
186 }
187 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
188 log_err("an error occured in u_memcmp()\n");
189
190 log_verbose("testing u_strcat()\n");
191 i=0;
192 for(j=0; j<2;++j)
193 {
194 u_uastrcpy(temp, "");
195 u_strcpy(temp,dataTable[i][j]);
196 u_strcat(temp,dataTable[i+1][j]);
197 if(u_strcmp(temp,dataTable[i+2][j])!=0)
198 log_err("something threw an error in u_strcat()\n");
199
200 }
201 log_verbose("Testing u_strncmp()\n");
202 for(i=0,j=0;j<4; ++j)
203 {
204 k=u_strlen(dataTable[i][j]);
205 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
206 log_err("Something threw an error in u_strncmp\n");
207 }
208 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
209 log_err("an error occured in u_memcmp()\n");
210
211
212 log_verbose("Testing u_strncat\n");
213 for(i=0,j=0;j<4; ++j)
214 {
215 k=u_strlen(dataTable[i][j]);
216
217 u_uastrcpy(temp,"");
218
219 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
220 log_err("something threw an error in u_strncat or u_uastrcpy()\n");
221
222 }
223
224 log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
225 for(i=2,j=0;j<4; ++j)
226 {
227 k=u_strlen(dataTable[i][j]);
228 u_strncpy(temp, dataTable[i][j],k);
229 temp[k] = 0xa4;
230
231 if(u_strncmp(temp, dataTable[i][j],k)!=0)
232 log_err("something threw an error in u_strncpy()\n");
233
234 if(temp[k] != 0xa4)
235 log_err("something threw an error in u_strncpy()\n");
236
2ca993e8 237 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
b75a7d8f
A
238 u_uastrncpy(temp, raw[i][j], k-1);
239 if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
240 log_err("something threw an error in u_uastrncpy(k-1)\n");
241
242 if(temp[k-1] != 0x3F)
243 log_err("something threw an error in u_uastrncpy(k-1)\n");
244
2ca993e8 245 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
b75a7d8f
A
246 u_uastrncpy(temp, raw[i][j], k+1);
247 if(u_strcmp(temp, dataTable[i][j])!=0)
248 log_err("something threw an error in u_uastrncpy(k+1)\n");
249
250 if(temp[k] != 0)
251 log_err("something threw an error in u_uastrncpy(k+1)\n");
252
2ca993e8 253 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
b75a7d8f
A
254 u_uastrncpy(temp, raw[i][j], k);
255 if(u_strncmp(temp, dataTable[i][j], k)!=0)
256 log_err("something threw an error in u_uastrncpy(k)\n");
257
258 if(temp[k] != 0x3F)
259 log_err("something threw an error in u_uastrncpy(k)\n");
260 }
261
262 log_verbose("Testing u_strchr() and u_memchr()\n");
263
264 for(i=2,j=0;j<4;j++)
265 {
266 UChar saveVal = dataTable[i][j][0];
267 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
268 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
269
270 log_verbose("%s ", u_austrcpy(tempOut, findPtr));
271
272 if (findPtr == NULL || *findPtr != 0x005F) {
273 log_err("u_strchr can't find '_' in the string\n");
274 }
275
276 findPtr = u_strchr32(dataTable[i][j], 0x005F);
277 if (findPtr == NULL || *findPtr != 0x005F) {
278 log_err("u_strchr32 can't find '_' in the string\n");
279 }
280
281 findPtr = u_strchr(dataTable[i][j], 0);
282 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
283 log_err("u_strchr can't find NULL in the string\n");
284 }
285
286 findPtr = u_strchr32(dataTable[i][j], 0);
287 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
288 log_err("u_strchr32 can't find NULL in the string\n");
289 }
290
291 findPtr = u_memchr(dataTable[i][j], 0, dataSize);
292 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
293 log_err("u_memchr can't find NULL in the string\n");
294 }
295
296 findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
297 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
298 log_err("u_memchr32 can't find NULL in the string\n");
299 }
300
301 dataTable[i][j][0] = 0;
302 /* Make sure we skip over the NULL termination */
303 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
304 if (findPtr == NULL || *findPtr != 0x005F) {
305 log_err("u_memchr can't find '_' in the string\n");
306 }
307
308 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
309 if (findPtr == NULL || *findPtr != 0x005F) {
310 log_err("u_memchr32 can't find '_' in the string\n");
311 }
312 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
313 if (findPtr != NULL) {
314 log_err("Should have found NULL when the character is not there.\n");
315 }
316 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */
317 }
318
319 /*
320 * test that u_strchr32()
321 * does not find surrogate code points when they are part of matched pairs
322 * (= part of supplementary code points)
323 * Jitterbug 1542
324 */
325 {
326 static const UChar s[]={
327 /* 0 1 2 3 4 5 6 7 8 9 */
328 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
329 };
330
331 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
332 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
333 }
334 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
335 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
336 }
337 }
338
339 log_verbose("Testing u_austrcpy()");
340 u_austrcpy(test,dataTable[0][0]);
341 if(strcmp(test,raw[0][0])!=0)
342 log_err("There is an error in u_austrcpy()");
343
344
345 log_verbose("Testing u_strtok_r()");
346 {
347 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
348 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
349 UChar delimBuf[sizeof(test)];
350 UChar currTokenBuf[sizeof(tokString)];
351 UChar *state;
352 uint32_t currToken = 0;
353 UChar *ptr;
354
355 u_uastrcpy(temp, tokString);
356 u_uastrcpy(delimBuf, " ");
357
358 ptr = u_strtok_r(temp, delimBuf, &state);
359 u_uastrcpy(delimBuf, " ,");
360 while (ptr != NULL) {
361 u_uastrcpy(currTokenBuf, tokens[currToken]);
362 if (u_strcmp(ptr, currTokenBuf) != 0) {
363 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
364 }
365 ptr = u_strtok_r(NULL, delimBuf, &state);
366 currToken++;
367 }
368
2ca993e8 369 if (currToken != UPRV_LENGTHOF(tokens)) {
b75a7d8f
A
370 log_err("Didn't get correct number of tokens\n");
371 }
372 state = delimBuf; /* Give it an "invalid" saveState */
373 u_uastrcpy(currTokenBuf, "");
374 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
375 log_err("Didn't get NULL for empty string\n");
376 }
377 if (state != NULL) {
378 log_err("State should be NULL for empty string\n");
379 }
380 state = delimBuf; /* Give it an "invalid" saveState */
381 u_uastrcpy(currTokenBuf, ", ,");
382 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
383 log_err("Didn't get NULL for a string of delimiters\n");
384 }
385 if (state != NULL) {
386 log_err("State should be NULL for a string of delimiters\n");
387 }
388
389 state = delimBuf; /* Give it an "invalid" saveState */
390 u_uastrcpy(currTokenBuf, "q, ,");
391 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
392 log_err("Got NULL for a string that does not begin with delimiters\n");
393 }
394 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
395 log_err("Didn't get NULL for a string that ends in delimiters\n");
396 }
397 if (state != NULL) {
398 log_err("State should be NULL for empty string\n");
399 }
400
401 state = delimBuf; /* Give it an "invalid" saveState */
402 u_uastrcpy(currTokenBuf, tokString);
403 u_uastrcpy(temp, tokString);
404 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. */
405 ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
406 if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
407 log_err("Should have recieved the same string when there are no delimiters\n");
408 }
409 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
410 log_err("Should not have found another token in a one token string\n");
411 }
412 }
413
414 /* test u_strcmpCodePointOrder() */
415 {
416 /* these strings are in ascending order */
417 static const UChar strings[][4]={
418 { 0x61, 0 }, /* U+0061 */
419 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
420 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
421 { 0xd800, 0 }, /* U+d800 */
422 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
423 { 0xdfff, 0 }, /* U+dfff */
424 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
425 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
426 { 0xd800, 0xdc02, 0 }, /* U+10002 */
427 { 0xd84d, 0xdc56, 0 } /* U+23456 */
428 };
429
430 UCharIterator iter1, iter2;
431 int32_t len1, len2, r1, r2;
432
2ca993e8 433 for(i=0; i<(UPRV_LENGTHOF(strings)-1); ++i) {
b75a7d8f
A
434 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
435 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
436 }
437 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
438 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
439 }
440
441 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
442 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
443 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
444 }
445
446 /* test u_strCompare(TRUE) */
447 len1=u_strlen(strings[i]);
448 len2=u_strlen(strings[i+1]);
449 if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 ||
450 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 ||
451 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 ||
452 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0
453 ) {
454 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
455 }
456
457 /* test u_strCompare(FALSE) */
458 r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
459 r2=u_strcmp(strings[i], strings[i+1]);
460 if(_SIGN(r1)!=_SIGN(r2)) {
461 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
462 }
463
464 /* test u_strCompareIter() */
465 uiter_setString(&iter1, strings[i], len1);
466 uiter_setString(&iter2, strings[i+1], len2);
467 if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
468 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
469 }
470 r1=u_strCompareIter(&iter1, &iter2, FALSE);
471 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
472 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
473 }
474 }
475 }
476
477 cleanUpDataTable();
478}
479
480static void TestStringSearching()
481{
482 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
483 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
484 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
485 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
486 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
487 const UChar surrMatchSet4[] = {0x0000};
488 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
489 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
490 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */
491 const UChar
492 empty[] = { 0 },
493 a[] = { 0x61, 0 },
494 ab[] = { 0x61, 0x62, 0 },
495 ba[] = { 0x62, 0x61, 0 },
496 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
497 cd[] = { 0x63, 0x64, 0 },
498 dc[] = { 0x64, 0x63, 0 },
499 cdh[] = { 0x63, 0x64, 0x68, 0 },
500 f[] = { 0x66, 0 },
501 fg[] = { 0x66, 0x67, 0 },
502 gf[] = { 0x67, 0x66, 0 };
503
504 log_verbose("Testing u_strpbrk()");
505
506 if (u_strpbrk(testString, a) != &testString[0]) {
507 log_err("u_strpbrk couldn't find first letter a.\n");
508 }
509 if (u_strpbrk(testString, dc) != &testString[2]) {
510 log_err("u_strpbrk couldn't find d or c.\n");
511 }
512 if (u_strpbrk(testString, cd) != &testString[2]) {
513 log_err("u_strpbrk couldn't find c or d.\n");
514 }
515 if (u_strpbrk(testString, cdh) != &testString[2]) {
516 log_err("u_strpbrk couldn't find c, d or h.\n");
517 }
518 if (u_strpbrk(testString, f) != NULL) {
519 log_err("u_strpbrk didn't return NULL for \"f\".\n");
520 }
521 if (u_strpbrk(testString, fg) != NULL) {
522 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
523 }
524 if (u_strpbrk(testString, gf) != NULL) {
525 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
526 }
527 if (u_strpbrk(testString, empty) != NULL) {
528 log_err("u_strpbrk didn't return NULL for \"\".\n");
529 }
530
531 log_verbose("Testing u_strpbrk() with surrogates");
532
533 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
534 log_err("u_strpbrk couldn't find first letter a.\n");
535 }
536 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
537 log_err("u_strpbrk couldn't find d or c.\n");
538 }
539 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
540 log_err("u_strpbrk couldn't find c or d.\n");
541 }
542 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
543 log_err("u_strpbrk couldn't find c, d or h.\n");
544 }
545 if (u_strpbrk(testSurrogateString, f) != NULL) {
546 log_err("u_strpbrk didn't return NULL for \"f\".\n");
547 }
548 if (u_strpbrk(testSurrogateString, fg) != NULL) {
549 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
550 }
551 if (u_strpbrk(testSurrogateString, gf) != NULL) {
552 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
553 }
554 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
555 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
556 }
557 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
558 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
559 }
560 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
561 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
562 }
563 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
564 log_err("u_strpbrk should have returned NULL for empty string.\n");
565 }
566 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
567 log_err("u_strpbrk should have found bad surrogate.\n");
568 }
569
570 log_verbose("Testing u_strcspn()");
571
572 if (u_strcspn(testString, a) != 0) {
573 log_err("u_strcspn couldn't find first letter a.\n");
574 }
575 if (u_strcspn(testString, dc) != 2) {
576 log_err("u_strcspn couldn't find d or c.\n");
577 }
578 if (u_strcspn(testString, cd) != 2) {
579 log_err("u_strcspn couldn't find c or d.\n");
580 }
581 if (u_strcspn(testString, cdh) != 2) {
582 log_err("u_strcspn couldn't find c, d or h.\n");
583 }
584 if (u_strcspn(testString, f) != u_strlen(testString)) {
585 log_err("u_strcspn didn't return NULL for \"f\".\n");
586 }
587 if (u_strcspn(testString, fg) != u_strlen(testString)) {
588 log_err("u_strcspn didn't return NULL for \"fg\".\n");
589 }
590 if (u_strcspn(testString, gf) != u_strlen(testString)) {
591 log_err("u_strcspn didn't return NULL for \"gf\".\n");
592 }
593
594 log_verbose("Testing u_strcspn() with surrogates");
595
596 if (u_strcspn(testSurrogateString, a) != 1) {
597 log_err("u_strcspn couldn't find first letter a.\n");
598 }
599 if (u_strcspn(testSurrogateString, dc) != 5) {
600 log_err("u_strcspn couldn't find d or c.\n");
601 }
602 if (u_strcspn(testSurrogateString, cd) != 5) {
603 log_err("u_strcspn couldn't find c or d.\n");
604 }
605 if (u_strcspn(testSurrogateString, cdh) != 5) {
606 log_err("u_strcspn couldn't find c, d or h.\n");
607 }
608 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
609 log_err("u_strcspn didn't return NULL for \"f\".\n");
610 }
611 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
612 log_err("u_strcspn didn't return NULL for \"fg\".\n");
613 }
614 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
615 log_err("u_strcspn didn't return NULL for \"gf\".\n");
616 }
617 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
618 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
619 }
620 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
621 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
622 }
623 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
624 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
625 }
626 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
627 log_err("u_strcspn should have returned strlen for empty string.\n");
628 }
629
630
631 log_verbose("Testing u_strspn()");
632
633 if (u_strspn(testString, a) != 1) {
634 log_err("u_strspn couldn't skip first letter a.\n");
635 }
636 if (u_strspn(testString, ab) != 2) {
637 log_err("u_strspn couldn't skip a or b.\n");
638 }
639 if (u_strspn(testString, ba) != 2) {
640 log_err("u_strspn couldn't skip a or b.\n");
641 }
642 if (u_strspn(testString, f) != 0) {
643 log_err("u_strspn didn't return 0 for \"f\".\n");
644 }
645 if (u_strspn(testString, dc) != 0) {
646 log_err("u_strspn couldn't find first letter a (skip d or c).\n");
647 }
648 if (u_strspn(testString, abcd) != u_strlen(testString)) {
649 log_err("u_strspn couldn't skip over the whole string.\n");
650 }
651 if (u_strspn(testString, empty) != 0) {
652 log_err("u_strspn should have returned 0 for empty string.\n");
653 }
654
655 log_verbose("Testing u_strspn() with surrogates");
656 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
657 log_err("u_strspn couldn't skip 0xdbff or a.\n");
658 }
659 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
660 log_err("u_strspn couldn't skip 0xdbff or a.\n");
661 }
662 if (u_strspn(testSurrogateString, f) != 0) {
663 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
664 }
665 if (u_strspn(testSurrogateString, dc) != 0) {
666 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
667 }
668 if (u_strspn(testSurrogateString, cd) != 0) {
669 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
670 }
671 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
672 log_err("u_strspn couldn't skip whole string.\n");
673 }
674 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
675 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
676 }
677 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
678 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
679 }
680 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
681 log_err("u_strspn should have returned 0 for empty string.\n");
682 }
683}
684
685/*
686 * All binary Unicode string searches should behave the same for equivalent input.
687 * See Jitterbug 2145.
688 * There are some new functions, too - just test them all.
689 */
690static void
691TestSurrogateSearching() {
692 static const UChar s[]={
693 /* 0 1 2 3 4 5 6 7 8 9 10 11 */
694 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
695 }, sub_a[]={
696 0x61, 0
697 }, sub_b[]={
698 0x62, 0
699 }, sub_lead[]={
700 0xd801, 0
701 }, sub_trail[]={
702 0xdc02, 0
703 }, sub_supp[]={
704 0xd801, 0xdc02, 0
705 }, sub_supp2[]={
706 0xd801, 0xdc03, 0
707 }, sub_a_lead[]={
708 0x61, 0xd801, 0
709 }, sub_trail_a[]={
710 0xdc02, 0x61, 0
711 }, sub_aba[]={
712 0x61, 0x62, 0x61, 0
713 };
714 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
715 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
716
717 const UChar *first, *last;
718
719 /* search for NUL code point: find end of string */
720 first=s+u_strlen(s);
721
722 if(
723 first!=u_strchr(s, nul) ||
724 first!=u_strchr32(s, nul) ||
b331163b
A
725 first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) ||
726 first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) ||
b75a7d8f
A
727 first!=u_strrchr(s, nul) ||
728 first!=u_strrchr32(s, nul) ||
b331163b
A
729 first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) ||
730 first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s))
b75a7d8f
A
731 ) {
732 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
733 }
734
735 /* search for empty substring: find beginning of string */
736 if(
737 s!=u_strstr(s, &nul) ||
738 s!=u_strFindFirst(s, -1, &nul, -1) ||
739 s!=u_strFindFirst(s, -1, &nul, 0) ||
b331163b
A
740 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) ||
741 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) ||
b75a7d8f
A
742 s!=u_strrstr(s, &nul) ||
743 s!=u_strFindLast(s, -1, &nul, -1) ||
744 s!=u_strFindLast(s, -1, &nul, 0) ||
b331163b
A
745 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) ||
746 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0)
b75a7d8f
A
747 ) {
748 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
749 }
750
751 /* find 'a' in s[1..10[ */
752 first=s+3;
753 last=s+7;
754 if(
755 first!=u_strchr(s+1, a) ||
756 first!=u_strchr32(s+1, a) ||
757 first!=u_memchr(s+1, a, 9) ||
758 first!=u_memchr32(s+1, a, 9) ||
759 first!=u_strstr(s+1, sub_a) ||
760 first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
761 first!=u_strFindFirst(s+1, -1, &a, 1) ||
762 first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
763 first!=u_strFindFirst(s+1, 9, &a, 1) ||
764 (s+10)!=u_strrchr(s+1, a) ||
765 (s+10)!=u_strrchr32(s+1, a) ||
766 last!=u_memrchr(s+1, a, 9) ||
767 last!=u_memrchr32(s+1, a, 9) ||
768 (s+10)!=u_strrstr(s+1, sub_a) ||
769 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
770 (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
771 last!=u_strFindLast(s+1, 9, sub_a, -1) ||
772 last!=u_strFindLast(s+1, 9, &a, 1)
773 ) {
774 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
775 }
776
777 /* do not find 'b' in s[1..10[ */
778 if(
779 NULL!=u_strchr(s+1, b) ||
780 NULL!=u_strchr32(s+1, b) ||
781 NULL!=u_memchr(s+1, b, 9) ||
782 NULL!=u_memchr32(s+1, b, 9) ||
783 NULL!=u_strstr(s+1, sub_b) ||
784 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
785 NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
786 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
787 NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
788 NULL!=u_strrchr(s+1, b) ||
789 NULL!=u_strrchr32(s+1, b) ||
790 NULL!=u_memrchr(s+1, b, 9) ||
791 NULL!=u_memrchr32(s+1, b, 9) ||
792 NULL!=u_strrstr(s+1, sub_b) ||
793 NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
794 NULL!=u_strFindLast(s+1, -1, &b, 1) ||
795 NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
796 NULL!=u_strFindLast(s+1, 9, &b, 1)
797 ) {
798 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
799 }
800
801 /* do not find a non-code point in s[1..10[ */
802 if(
803 NULL!=u_strchr32(s+1, ill) ||
804 NULL!=u_memchr32(s+1, ill, 9) ||
805 NULL!=u_strrchr32(s+1, ill) ||
806 NULL!=u_memrchr32(s+1, ill, 9)
807 ) {
808 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
809 }
810
811 /* find U+d801 in s[1..10[ */
812 first=s+6;
813 if(
814 first!=u_strchr(s+1, lead) ||
815 first!=u_strchr32(s+1, lead) ||
816 first!=u_memchr(s+1, lead, 9) ||
817 first!=u_memchr32(s+1, lead, 9) ||
818 first!=u_strstr(s+1, sub_lead) ||
819 first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
820 first!=u_strFindFirst(s+1, -1, &lead, 1) ||
821 first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
822 first!=u_strFindFirst(s+1, 9, &lead, 1) ||
823 first!=u_strrchr(s+1, lead) ||
824 first!=u_strrchr32(s+1, lead) ||
825 first!=u_memrchr(s+1, lead, 9) ||
826 first!=u_memrchr32(s+1, lead, 9) ||
827 first!=u_strrstr(s+1, sub_lead) ||
828 first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
829 first!=u_strFindLast(s+1, -1, &lead, 1) ||
830 first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
831 first!=u_strFindLast(s+1, 9, &lead, 1)
832 ) {
833 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
834 }
835
836 /* find U+dc02 in s[1..10[ */
837 first=s+4;
838 if(
839 first!=u_strchr(s+1, trail) ||
840 first!=u_strchr32(s+1, trail) ||
841 first!=u_memchr(s+1, trail, 9) ||
842 first!=u_memchr32(s+1, trail, 9) ||
843 first!=u_strstr(s+1, sub_trail) ||
844 first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
845 first!=u_strFindFirst(s+1, -1, &trail, 1) ||
846 first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
847 first!=u_strFindFirst(s+1, 9, &trail, 1) ||
848 first!=u_strrchr(s+1, trail) ||
849 first!=u_strrchr32(s+1, trail) ||
850 first!=u_memrchr(s+1, trail, 9) ||
851 first!=u_memrchr32(s+1, trail, 9) ||
852 first!=u_strrstr(s+1, sub_trail) ||
853 first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
854 first!=u_strFindLast(s+1, -1, &trail, 1) ||
855 first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
856 first!=u_strFindLast(s+1, 9, &trail, 1)
857 ) {
858 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
859 }
860
861 /* find U+10402 in s[1..10[ */
862 first=s+1;
863 last=s+8;
864 if(
865 first!=u_strchr32(s+1, supp) ||
866 first!=u_memchr32(s+1, supp, 9) ||
867 first!=u_strstr(s+1, sub_supp) ||
868 first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
869 first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
870 first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
871 first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
872 last!=u_strrchr32(s+1, supp) ||
873 last!=u_memrchr32(s+1, supp, 9) ||
874 last!=u_strrstr(s+1, sub_supp) ||
875 last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
876 last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
877 last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
878 last!=u_strFindLast(s+1, 9, sub_supp, 2)
879 ) {
880 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
881 }
882
883 /* do not find U+10402 in a single UChar */
884 if(
885 NULL!=u_memchr32(s+1, supp, 1) ||
886 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
887 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
888 NULL!=u_memrchr32(s+1, supp, 1) ||
889 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
890 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
891 NULL!=u_memrchr32(s+2, supp, 1) ||
892 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
893 NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
894 ) {
895 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
896 }
897
898 /* do not find U+10403 in s[1..10[ */
899 if(
900 NULL!=u_strchr32(s+1, supp2) ||
901 NULL!=u_memchr32(s+1, supp2, 9) ||
902 NULL!=u_strstr(s+1, sub_supp2) ||
903 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
904 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
905 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
906 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
907 NULL!=u_strrchr32(s+1, supp2) ||
908 NULL!=u_memrchr32(s+1, supp2, 9) ||
909 NULL!=u_strrstr(s+1, sub_supp2) ||
910 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
911 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
912 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
913 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
914 ) {
915 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
916 }
917
918 /* find <0061 d801> in s[1..10[ */
919 first=s+5;
920 if(
921 first!=u_strstr(s+1, sub_a_lead) ||
922 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
923 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
924 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
925 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
926 first!=u_strrstr(s+1, sub_a_lead) ||
927 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
928 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
929 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
930 first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
931 ) {
932 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
933 }
934
935 /* find <dc02 0061> in s[1..10[ */
936 first=s+4;
937 if(
938 first!=u_strstr(s+1, sub_trail_a) ||
939 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
940 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
941 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
942 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
943 first!=u_strrstr(s+1, sub_trail_a) ||
944 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
945 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
946 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
947 first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
948 ) {
949 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
950 }
951
952 /* do not find "aba" in s[1..10[ */
953 if(
954 NULL!=u_strstr(s+1, sub_aba) ||
955 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
956 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
957 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
958 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
959 NULL!=u_strrstr(s+1, sub_aba) ||
960 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
961 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
962 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
963 NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
964 ) {
965 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
966 }
967}
968
969static void TestStringCopy()
970{
971 UChar temp[40];
972 UChar *result=0;
973 UChar subString[5];
974 UChar uchars[]={0x61, 0x62, 0x63, 0x00};
975 char charOut[40];
976 char chars[]="abc"; /* needs default codepage */
977
978 log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
979
980 u_uastrcpy(temp, "abc");
981 if(u_strcmp(temp, uchars) != 0) {
982 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
983 }
984
985 temp[0] = 0xFB; /* load garbage into it */
986 temp[1] = 0xFB;
987 temp[2] = 0xFB;
988 temp[3] = 0xFB;
989
990 u_uastrncpy(temp, "abcabcabc", 3);
991 if(u_strncmp(uchars, temp, 3) != 0){
992 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
993 }
994 if(temp[3] != 0xFB) {
995 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
996 }
997
998 charOut[0] = (char)0x7B; /* load garbage into it */
999 charOut[1] = (char)0x7B;
1000 charOut[2] = (char)0x7B;
1001 charOut[3] = (char)0x7B;
1002
1003 temp[0] = 0x0061;
1004 temp[1] = 0x0062;
1005 temp[2] = 0x0063;
1006 temp[3] = 0x0061;
1007 temp[4] = 0x0062;
1008 temp[5] = 0x0063;
1009 temp[6] = 0x0000;
1010
1011 u_austrncpy(charOut, temp, 3);
1012 if(strncmp(chars, charOut, 3) != 0){
1013 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1014 }
1015 if(charOut[3] != (char)0x7B) {
1016 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1017 }
1018
1019 /*Testing u_strchr()*/
1020 log_verbose("Testing u_strchr\n");
1021 temp[0]=0x42;
1022 temp[1]=0x62;
1023 temp[2]=0x62;
1024 temp[3]=0x63;
1025 temp[4]=0xd841;
1026 temp[5]=0xd841;
1027 temp[6]=0xdc02;
1028 temp[7]=0;
1029 result=u_strchr(temp, (UChar)0x62);
1030 if(result != temp+1){
1031 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1032 }
1033 /*Testing u_strstr()*/
1034 log_verbose("Testing u_strstr\n");
1035 subString[0]=0x62;
1036 subString[1]=0x63;
1037 subString[2]=0;
1038 result=u_strstr(temp, subString);
1039 if(result != temp+2){
1040 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1041 }
1042 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1043 if(result != temp){
1044 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1045 }
1046 result=u_strstr(subString, temp);
1047 if(result != NULL){
1048 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1049 }
1050
1051 /*Testing u_strchr32*/
1052 log_verbose("Testing u_strchr32\n");
1053 result=u_strchr32(temp, (UChar32)0x62);
1054 if(result != temp+1){
1055 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1056 }
1057 result=u_strchr32(temp, (UChar32)0xfb);
1058 if(result != NULL){
1059 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1060 }
1061 result=u_strchr32(temp, (UChar32)0x20402);
1062 if(result != temp+5){
1063 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1064 }
1065
1066 temp[7]=0xfc00;
1067 result=u_memchr32(temp, (UChar32)0x20402, 7);
1068 if(result != temp+5){
1069 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1070 }
1071 result=u_memchr32(temp, (UChar32)0x20402, 6);
1072 if(result != NULL){
1073 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1074 }
1075 result=u_memchr32(temp, (UChar32)0x20402, 1);
1076 if(result != NULL){
1077 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1078 }
1079 result=u_memchr32(temp, (UChar32)0xfc00, 8);
1080 if(result != temp+7){
1081 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1082 }
1083}
1084
1085/* test u_unescape() and u_unescapeAt() ------------------------------------- */
1086
1087static void
1088TestUnescape() {
1089 static UChar buffer[200];
1090
1091 static const char* input =
1092 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1093
1094 static const UChar expect[]={
1095 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1096 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1097 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1098 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1099 };
2ca993e8 1100 static const int32_t explength = UPRV_LENGTHOF(expect)-1;
b75a7d8f
A
1101 int32_t length;
1102
1103 /* test u_unescape() */
2ca993e8 1104 length=u_unescape(input, buffer, UPRV_LENGTHOF(buffer));
b75a7d8f
A
1105 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1106 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1107 explength);
1108 }
1109
1110 /* try preflighting */
2ca993e8 1111 length=u_unescape(input, NULL, UPRV_LENGTHOF(buffer));
b75a7d8f
A
1112 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1113 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1114 }
1115
1116 /* ### TODO: test u_unescapeAt() */
1117}
1118
1119/* test code point counting functions --------------------------------------- */
1120
1121/* reference implementation of u_strHasMoreChar32Than() */
1122static int32_t
1123_refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1124 int32_t count=u_countChar32(s, length);
1125 return count>number;
1126}
1127
1128/* compare the real function against the reference */
1129static void
1130_testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1131 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1132 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1133 i, length, number, u_strHasMoreChar32Than(s, length, number));
1134 }
1135}
1136
1137static void
1138TestCountChar32() {
1139 static const UChar string[]={
1140 0x61, 0x62, 0xd800, 0xdc00,
1141 0xd801, 0xdc01, 0x63, 0xd802,
1142 0x64, 0xdc03, 0x65, 0x66,
1143 0xd804, 0xdc04, 0xd805, 0xdc05,
1144 0x67
1145 };
1146 UChar buffer[100];
1147 int32_t i, length, number;
1148
1149 /* test u_strHasMoreChar32Than() with length>=0 */
b331163b 1150 length=UPRV_LENGTHOF(string);
b75a7d8f
A
1151 while(length>=0) {
1152 for(i=0; i<=length; ++i) {
1153 for(number=-1; number<=((length-i)+2); ++number) {
1154 _testStrHasMoreChar32Than(string+i, i, length-i, number);
1155 }
1156 }
1157 --length;
1158 }
1159
1160 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
b331163b 1161 length=UPRV_LENGTHOF(string);
b75a7d8f
A
1162 u_memcpy(buffer, string, length);
1163 while(length>=0) {
1164 buffer[length]=0;
1165 for(i=0; i<=length; ++i) {
1166 for(number=-1; number<=((length-i)+2); ++number) {
2ca993e8 1167 _testStrHasMoreChar32Than(buffer+i, i, -1, number);
b75a7d8f
A
1168 }
1169 }
1170 --length;
1171 }
1172
1173 /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1174 for(length=-1; length<=1; ++length) {
1175 for(i=0; i<=length; ++i) {
1176 for(number=-2; number<=2; ++number) {
1177 _testStrHasMoreChar32Than(NULL, 0, length, number);
1178 }
1179 }
1180 }
1181}
1182
1183/* UCharIterator ------------------------------------------------------------ */
1184
1185/*
1186 * Compare results from two iterators, should be same.
1187 * Assume that the text is not empty and that
1188 * iteration start==0 and iteration limit==length.
1189 */
1190static void
1191compareIterators(UCharIterator *iter1, const char *n1,
1192 UCharIterator *iter2, const char *n2) {
1193 int32_t i, pos1, pos2, middle, length;
1194 UChar32 c1, c2;
1195
1196 /* compare lengths */
1197 length=iter1->getIndex(iter1, UITER_LENGTH);
1198 pos2=iter2->getIndex(iter2, UITER_LENGTH);
1199 if(length!=pos2) {
1200 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1201 return;
1202 }
1203
1204 /* set into the middle */
1205 middle=length/2;
1206
1207 pos1=iter1->move(iter1, middle, UITER_ZERO);
1208 if(pos1!=middle) {
1209 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1210 return;
1211 }
1212
1213 pos2=iter2->move(iter2, middle, UITER_ZERO);
1214 if(pos2!=middle) {
1215 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1216 return;
1217 }
1218
1219 /* test current() */
1220 c1=iter1->current(iter1);
1221 c2=iter2->current(iter2);
1222 if(c1!=c2) {
1223 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1224 return;
1225 }
1226
1227 /* move forward 3 UChars */
1228 for(i=0; i<3; ++i) {
1229 c1=iter1->next(iter1);
1230 c2=iter2->next(iter2);
1231 if(c1!=c2) {
1232 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1233 return;
1234 }
1235 }
1236
1237 /* move backward 5 UChars */
1238 for(i=0; i<5; ++i) {
1239 c1=iter1->previous(iter1);
1240 c2=iter2->previous(iter2);
1241 if(c1!=c2) {
1242 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1243 return;
1244 }
1245 }
1246
1247 /* iterate forward from the beginning */
1248 pos1=iter1->move(iter1, 0, UITER_START);
1249 if(pos1<0) {
1250 log_err("%s->move(start) failed\n", n1);
1251 return;
1252 }
1253 if(!iter1->hasNext(iter1)) {
1254 log_err("%s->hasNext() at the start returns FALSE\n", n1);
1255 return;
1256 }
1257
1258 pos2=iter2->move(iter2, 0, UITER_START);
1259 if(pos2<0) {
1260 log_err("%s->move(start) failed\n", n2);
1261 return;
1262 }
1263 if(!iter2->hasNext(iter2)) {
1264 log_err("%s->hasNext() at the start returns FALSE\n", n2);
1265 return;
1266 }
1267
1268 do {
1269 c1=iter1->next(iter1);
1270 c2=iter2->next(iter2);
1271 if(c1!=c2) {
1272 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1273 return;
1274 }
1275 } while(c1>=0);
1276
1277 if(iter1->hasNext(iter1)) {
1278 log_err("%s->hasNext() at the end returns TRUE\n", n1);
1279 return;
1280 }
1281 if(iter2->hasNext(iter2)) {
1282 log_err("%s->hasNext() at the end returns TRUE\n", n2);
1283 return;
1284 }
1285
1286 /* back to the middle */
1287 pos1=iter1->move(iter1, middle, UITER_ZERO);
1288 if(pos1!=middle) {
1289 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1290 return;
1291 }
1292
1293 pos2=iter2->move(iter2, middle, UITER_ZERO);
1294 if(pos2!=middle) {
1295 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1296 return;
1297 }
1298
1299 /* move to index 1 */
1300 pos1=iter1->move(iter1, 1, UITER_ZERO);
1301 if(pos1!=1) {
1302 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1303 return;
1304 }
1305
1306 pos2=iter2->move(iter2, 1, UITER_ZERO);
1307 if(pos2!=1) {
1308 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1309 return;
1310 }
1311
1312 /* iterate backward from the end */
1313 pos1=iter1->move(iter1, 0, UITER_LIMIT);
1314 if(pos1<0) {
1315 log_err("%s->move(limit) failed\n", n1);
1316 return;
1317 }
1318 if(!iter1->hasPrevious(iter1)) {
1319 log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
1320 return;
1321 }
1322
1323 pos2=iter2->move(iter2, 0, UITER_LIMIT);
1324 if(pos2<0) {
1325 log_err("%s->move(limit) failed\n", n2);
1326 return;
1327 }
1328 if(!iter2->hasPrevious(iter2)) {
1329 log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
1330 return;
1331 }
1332
1333 do {
1334 c1=iter1->previous(iter1);
1335 c2=iter2->previous(iter2);
1336 if(c1!=c2) {
1337 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1338 return;
1339 }
1340 } while(c1>=0);
1341
1342 if(iter1->hasPrevious(iter1)) {
1343 log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
1344 return;
1345 }
1346 if(iter2->hasPrevious(iter2)) {
1347 log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
1348 return;
1349 }
1350}
1351
1352/*
1353 * Test the iterator's getState() and setState() functions.
1354 * iter1 and iter2 must be set up for the same iterator type and the same string
1355 * but may be physically different structs (different addresses).
1356 *
1357 * Assume that the text is not empty and that
1358 * iteration start==0 and iteration limit==length.
1359 * It must be 2<=middle<=length-2.
1360 */
1361static void
1362testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1363 UChar32 u[4];
1364
1365 UErrorCode errorCode;
1366 UChar32 c;
1367 uint32_t state;
1368 int32_t i, j;
1369
1370 /* get four UChars from the middle of the string */
1371 iter1->move(iter1, middle-2, UITER_ZERO);
1372 for(i=0; i<4; ++i) {
1373 c=iter1->next(iter1);
1374 if(c<0) {
1375 /* the test violates the assumptions, see comment above */
1376 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1377 return;
1378 }
1379 u[i]=c;
1380 }
1381
1382 /* move to the middle and get the state */
1383 iter1->move(iter1, -2, UITER_CURRENT);
1384 state=uiter_getState(iter1);
1385
1386 /* set the state into the second iterator and compare the results */
1387 errorCode=U_ZERO_ERROR;
1388 uiter_setState(iter2, state, &errorCode);
1389 if(U_FAILURE(errorCode)) {
1390 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1391 return;
1392 }
1393
1394 c=iter2->current(iter2);
1395 if(c!=u[2]) {
1396 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1397 }
1398
1399 c=iter2->previous(iter2);
1400 if(c!=u[1]) {
1401 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1402 }
1403
1404 iter2->move(iter2, 2, UITER_CURRENT);
1405 c=iter2->next(iter2);
1406 if(c!=u[3]) {
1407 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1408 }
1409
1410 iter2->move(iter2, -3, UITER_CURRENT);
1411 c=iter2->previous(iter2);
1412 if(c!=u[0]) {
1413 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1414 }
1415
1416 /* move the second iterator back to the middle */
1417 iter2->move(iter2, 1, UITER_CURRENT);
1418 iter2->next(iter2);
1419
1420 /* check that both are in the middle */
1421 i=iter1->getIndex(iter1, UITER_CURRENT);
1422 j=iter2->getIndex(iter2, UITER_CURRENT);
1423 if(i!=middle) {
1424 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1425 }
1426 if(i!=j) {
1427 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1428 }
1429
1430 /* compare lengths */
1431 i=iter1->getIndex(iter1, UITER_LENGTH);
1432 j=iter2->getIndex(iter2, UITER_LENGTH);
1433 if(i!=j) {
1434 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1435 }
1436}
1437
1438static void
1439TestUCharIterator() {
1440 static const UChar text[]={
1441 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1442 };
1443 char bytes[40];
1444
1445 UCharIterator iter, iter1, iter2;
1446 UConverter *cnv;
1447 UErrorCode errorCode;
1448 int32_t length;
1449
1450 /* simple API/code coverage - test NOOP UCharIterator */
1451 uiter_setString(&iter, NULL, 0);
1452 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1453 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1454 iter.hasNext(&iter) || iter.hasPrevious(&iter)
1455 ) {
1456 log_err("NOOP UCharIterator behaves unexpectedly\n");
1457 }
1458
1459 /* test get/set state */
b331163b 1460 length=UPRV_LENGTHOF(text)-1;
b75a7d8f
A
1461 uiter_setString(&iter1, text, -1);
1462 uiter_setString(&iter2, text, length);
1463 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1464 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1465
1466 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1467 errorCode=U_ZERO_ERROR;
1468 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1469 if(U_FAILURE(errorCode)) {
1470 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1471 return;
1472 }
1473
1474 uiter_setString(&iter1, text, -1);
1475 uiter_setUTF8(&iter2, bytes, length);
1476 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1477
1478 /* try again with length=-1 */
1479 uiter_setUTF8(&iter2, bytes, -1);
1480 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1481
1482 /* test get/set state */
b331163b 1483 length=UPRV_LENGTHOF(text)-1;
b75a7d8f
A
1484 uiter_setUTF8(&iter1, bytes, -1);
1485 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1486 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1487
1488 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1489 errorCode=U_ZERO_ERROR;
1490 cnv=ucnv_open("UTF-16BE", &errorCode);
1491 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1492 ucnv_close(cnv);
1493 if(U_FAILURE(errorCode)) {
1494 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1495 return;
1496 }
1497
1498 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1499 bytes[length]=bytes[length+1]=0;
1500
1501 uiter_setString(&iter1, text, -1);
1502 uiter_setUTF16BE(&iter2, bytes, length);
1503 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1504
1505 /* try again with length=-1 */
1506 uiter_setUTF16BE(&iter2, bytes, -1);
1507 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1508
1509 /* try again after moving the bytes up one, and with length=-1 */
1510 memmove(bytes+1, bytes, length+2);
1511 uiter_setUTF16BE(&iter2, bytes+1, -1);
1512 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1513
1514 /* ### TODO test other iterators: CharacterIterator, Replaceable */
1515}
0f5d89e8
A
1516
1517static const UChar valid0[] = { 0 }; // test empty string
1518static const UChar valid1[] = { 0x0061,0x270C,0xFE0E, // victory hand with text variation selector
1519 0x0062,0x270C,0xFE0F, // victory hand with emoji variation selector
1520 0x0063,0x270C,0xD83C,0xDFFD, // victory hand with skin tone modifier
1521 0x0064,0x270C,0xFE0F,0xD83C,0xDFFD, // victory hand with emoji variation selector and skin tone modifier (obsolete sequence)
1522 0x0065,0xD83D,0xDC69,0xD83C,0xDFFD,0x200D,0xD83C,0xDFEB, // woman teacher (ZWJ sequence) with skin tone
1523 0x0066,0xD83D,0xDC69,0x200D,0xD83D,0xDC69,0x200D,0xD83D,0xDC67, // family (woman, woman, girl - ZWJ sequence)
1524 0x0067,0x0030,0xFE0F,0x20E3, // keypad 0 (emoji combining seq)
1525 0x0068,0xD83C,0xDDEC,0xD83C,0xDDE7, // flag of UK (regional indicator pair)
1526 0x0069,0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0xDB40,0xDC7F, // flag of England (tag seq)
1527 0x006A,0 };
1528static const UChar valid2[] = { 0x006B,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,
1529 0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300, // 29 combining marks
1530 0x006C,0 };
1531static const UChar valid3[] = { // sample from Bill Siegrist, 100 UTF16 units
1532 0xD83D,0xDC2E, // U+1F42E 🐮
1533 0xD83D,0xDC3C, // U+1F43C 🐼
1534 0xD83D,0xDC39, // U+1F439 🐹
1535 0xD83D,0xDC31, // U+1F431 🐱
1536 0xD83D,0xDE4B,0x200D,0x2640,0xFE0F, // U+1F64B U+200D U+2640 U+FE0F 🙋‍♀️
1537 0xD83D,0xDE47,0xD83C,0xDFFC,0x200D,0x2642,0xFE0F, // U+1F647 U+1F3FC U+200D U+2642 U+FE0F 🙇🏼‍♂️
1538 0xD83D,0xDE46,0x200D,0x2642,0xFE0F, // U+1F646 U+200D U+2642 U+FE0F 🙆‍♂️
1539 0xD83E,0xDDDA,0xD83C,0xDFFF,0x200D,0x2640,0xFE0F, // U+1F9DA U+1F3FF U+200D U+2640 U+FE0F 🧚🏿‍♀️
1540 0xD83E,0xDDD6,0xD83C,0xDFFE,0x200D,0x2642,0xFE0F, // U+1F9D6 U+1F3FE U+200D U+2642 U+FE0F 🧖🏾‍♂️
1541 0xD83E,0xDDD6,0xD83C,0xDFFE,0x200D,0x2642,0xFE0F, // U+1F9D6 U+1F3FE U+200D U+2642 U+FE0F 🧖🏾‍♂️
1542 0xD83E,0xDDDB,0xD83C,0xDFFC,0x200D,0x2642,0xFE0F, // U+1F9DB U+1F3FC U+200D U+2642 U+FE0F 🧛🏼‍♂️
1543 0xD83E,0xDDD9,0x200D,0x2640,0xFE0F, // U+1F9D9 U+200D U+2640 U+FE0F 🧙‍♀️
1544 0xD83D,0xDC68,0xD83C,0xDFFE,0x200D,0x2696,0xFE0F, // U+1F468 U+1F3FE U+200D U+2696 U+FE0F 👨🏾‍⚖️
1545 0xD83D,0xDC69,0xD83C,0xDFFC,0x200D,0xD83D,0xDD27, // U+1F469 U+1F3FC U+200D U+1F527 👩🏼‍🔧
1546 0xD83D,0xDC69,0xD83C,0xDFFF,0x200D,0xD83C,0xDFEB, // U+1F469 U+1F3FF U+200D U+1F3EB 👩🏿‍🏫
1547 0xD83D,0xDC68,0xD83C,0xDFFE,0x200D,0xD83D,0xDCBB, // U+1F468 U+1F3FE U+200D U+1F4BB 👨🏾‍💻
1548 0xD83D,0xDC69,0xD83C,0xDFFD,0x200D,0xD83D,0xDD2C, // U+1F469 U+1F3FD U+200D U+1F52C 👩🏽‍🔬
1549 0xD83D,0xDC68,0xD83C,0xDFFC,0x200D,0xD83D,0xDE92, // U+1F468 U+1F3FC U+200D U+1F692 👨🏼‍🚒
1550 0 };
1551static const UChar valid4[] = { 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
1552 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
1553 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
1554 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
1555 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
1556 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
1557 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 112
1558 0x0061,0x2066,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x2069, // to level 122 in LRI then pop to 112
1559 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066, // to level 122 again
1560 0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C, // pop to level 90
1561 0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C, // pop to level 58
1562 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 74
1563 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 90
1564 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 106
1565 0x000A, // pop to 0
1566 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
1567 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
1568 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
1569 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
1570 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
1571 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
1572 0x000A,0 };
1573
1574static const UChar malformed1[] = { 0x0061,0xFFFF,0 }; // non-character (BMP)
1575static const UChar malformed2[] = { 0x0062,0xD87F,0xDFFE,0 }; // non-character (supplemental)
1576static const UChar malformed3[] = { 0x0063,0xD7FC,0 }; // unassigned
1577static const UChar malformed4[] = { 0x0064,0xD800,0 }; // unpaired high surrogate
1578static const UChar malformed5[] = { 0x0065,0xDC00,0 }; // unpaired low surrogate
1579static const UChar malformed6[] = { 0x0066,0xFE0F,0 }; // emoji variation selector on non-emoji
1580static const UChar malformed7[] = { 0x0067,0xDB40,0xDC67,0xDB40,0xDC7F,0 }; // tag sequence on non-emoji
1581static const UChar malformed8[] = { 0xDB40,0xDC67,0xDB40,0xDC7F,0 }; // tag sequence with no base
1582static const UChar malformed9[] = { 0x0068,0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0x0069,0 }; // tag sequence with no term
1583static const UChar malformedA[] = { 0x006A,0xD83C,0xDFF4,0xDB40,0xDC7F,0 }; // tag sequence with no tag_spec, just term
1584static const UChar malformedB[] = { 0x006B,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,
1585 0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300, // 31 combining marks
1586 0x006C,0 };
1587static const UChar malformedC[] = { 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
1588 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
1589 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
1590 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
1591 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
1592 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
1593 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 112
1594 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 128 (error)
1595 0x0061,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0 }; // start PDFs, too late
1596
1597typedef struct {
1598 const char* descrip;
1599 const UChar* string;
1600 UBool result;
1601} StringAndResult;
1602
1603static const StringAndResult wellFormedTests[] = {
1604 { "valid0", valid0, TRUE },
1605 { "valid1", valid1, TRUE },
1606 { "valid2", valid2, TRUE },
1607 { "valid3", valid3, TRUE },
1608 { "valid4", valid4, TRUE },
1609 { "malformed1", malformed1, FALSE },
1610 { "malformed2", malformed2, FALSE },
1611 { "malformed3", malformed3, FALSE },
1612 { "malformed4", malformed4, FALSE },
1613 { "malformed5", malformed5, FALSE },
1614 { "malformed6", malformed6, FALSE },
1615 { "malformed7", malformed7, FALSE },
1616 { "malformed8", malformed8, FALSE },
1617 { "malformed9", malformed9, FALSE },
1618 { "malformedA", malformedA, FALSE },
1619 { "malformedB", malformedB, FALSE },
1620 { "malformedC", malformedC, FALSE },
1621 { NULL, NULL, 0 }
1622};
1623
1624static void
1625TestIsWellFormed() {
1626 const StringAndResult* testPtr;
1627 for (testPtr = wellFormedTests; testPtr->descrip != NULL; testPtr++) {
1628 UBool result = u_strIsWellFormed(testPtr->string, -1);
1629 if (result != testPtr->result) {
1630 log_err("test %s with length -1, expected %d, got %d\n", testPtr->descrip, testPtr->result, result);
1631 }
1632
1633 int32_t length = u_strlen(testPtr->string);
1634 result = u_strIsWellFormed(testPtr->string, length);
1635 if (result != testPtr->result) {
1636 log_err("test %s with length %d, expected %d, got %d\n", testPtr->descrip, length, testPtr->result, result);
1637 }
1638 }
1639}