]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ustring.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / common / ustring.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
4* Copyright (C) 1998-2003, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8*
9* File ustring.h
10*
11* Modification History:
12*
13* Date Name Description
14* 12/07/98 bertrand Creation.
15******************************************************************************
16*/
17
18#include "unicode/utypes.h"
19#include "unicode/uchar.h"
20#include "unicode/uiter.h"
21#include "unicode/ustring.h"
22#include "unicode/putil.h"
23#include "unicode/ucnv.h"
24#include "cstring.h"
25#include "cwchar.h"
26#include "cmemory.h"
27#include "umutex.h"
28#include "ustr_imp.h"
29
30/* forward declaractions of definitions for the shared default converter */
31
32static UConverter *gDefaultConverter = NULL;
33
34/* ANSI string.h - style functions ------------------------------------------ */
35
36/* maximum string length for u_uastrcpy() and u_austrcpy() implementations */
37#define MAX_STRLEN 0x0FFFFFFF
38
39/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
40#define U_BMP_MAX 0xffff
41
42/* Forward binary string search functions ----------------------------------- */
43
44/*
45 * Test if a substring match inside a string is at code point boundaries.
46 * All pointers refer to the same buffer.
47 * The limit pointer may be NULL, all others must be real pointers.
48 */
49static U_INLINE UBool
50isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
51 if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
52 /* the leading edge of the match is in the middle of a surrogate pair */
53 return FALSE;
54 }
55 if(U16_IS_LEAD(*(matchLimit-1)) && match!=limit && U16_IS_TRAIL(*matchLimit)) {
56 /* the trailing edge of the match is in the middle of a surrogate pair */
57 return FALSE;
58 }
59 return TRUE;
60}
61
62U_CAPI UChar * U_EXPORT2
63u_strFindFirst(const UChar *s, int32_t length,
64 const UChar *sub, int32_t subLength) {
65 const UChar *start, *p, *q, *subLimit;
66 UChar c, cs, cq;
67
68 if(sub==NULL || subLength<-1) {
69 return (UChar *)s;
70 }
71 if(s==NULL || length<-1) {
72 return NULL;
73 }
74
75 start=s;
76
77 if(length<0 && subLength<0) {
78 /* both strings are NUL-terminated */
79 if((cs=*sub++)==0) {
80 return (UChar *)s;
81 }
82 if(*sub==0 && !U16_IS_SURROGATE(cs)) {
83 /* the substring consists of a single, non-surrogate BMP code point */
84 return u_strchr(s, cs);
85 }
86
87 while((c=*s++)!=0) {
88 if(c==cs) {
89 /* found first substring UChar, compare rest */
90 p=s;
91 q=sub;
92 for(;;) {
93 if((cq=*q)==0) {
94 if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
95 return (UChar *)(s-1); /* well-formed match */
96 } else {
97 break; /* no match because surrogate pair is split */
98 }
99 }
100 if((c=*p)==0) {
101 return NULL; /* no match, and none possible after s */
102 }
103 if(c!=cq) {
104 break; /* no match */
105 }
106 ++p;
107 ++q;
108 }
109 }
110 }
111
112 /* not found */
113 return NULL;
114 }
115
116 if(subLength<0) {
117 subLength=u_strlen(sub);
118 }
119 if(subLength==0) {
120 return (UChar *)s;
121 }
122
123 /* get sub[0] to search for it fast */
124 cs=*sub++;
125 --subLength;
126 subLimit=sub+subLength;
127
128 if(subLength==0 && !U16_IS_SURROGATE(cs)) {
129 /* the substring consists of a single, non-surrogate BMP code point */
130 return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
131 }
132
133 if(length<0) {
134 /* s is NUL-terminated */
135 while((c=*s++)!=0) {
136 if(c==cs) {
137 /* found first substring UChar, compare rest */
138 p=s;
139 q=sub;
140 for(;;) {
141 if(q==subLimit) {
142 if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
143 return (UChar *)(s-1); /* well-formed match */
144 } else {
145 break; /* no match because surrogate pair is split */
146 }
147 }
148 if((c=*p)==0) {
149 return NULL; /* no match, and none possible after s */
150 }
151 if(c!=*q) {
152 break; /* no match */
153 }
154 ++p;
155 ++q;
156 }
157 }
158 }
159 } else {
160 const UChar *limit, *preLimit;
161
162 /* subLength was decremented above */
163 if(length<=subLength) {
164 return NULL; /* s is shorter than sub */
165 }
166
167 limit=s+length;
168
169 /* the substring must start before preLimit */
170 preLimit=limit-subLength;
171
172 while(s!=preLimit) {
173 c=*s++;
174 if(c==cs) {
175 /* found first substring UChar, compare rest */
176 p=s;
177 q=sub;
178 for(;;) {
179 if(q==subLimit) {
180 if(isMatchAtCPBoundary(start, s-1, p, limit)) {
181 return (UChar *)(s-1); /* well-formed match */
182 } else {
183 break; /* no match because surrogate pair is split */
184 }
185 }
186 if(*p!=*q) {
187 break; /* no match */
188 }
189 ++p;
190 ++q;
191 }
192 }
193 }
194 }
195
196 /* not found */
197 return NULL;
198}
199
200U_CAPI UChar * U_EXPORT2
201u_strstr(const UChar *s, const UChar *substring) {
202 return u_strFindFirst(s, -1, substring, -1);
203}
204
205U_CAPI UChar * U_EXPORT2
206u_strchr(const UChar *s, UChar c) {
207 if(U16_IS_SURROGATE(c)) {
208 /* make sure to not find half of a surrogate pair */
209 return u_strFindFirst(s, -1, &c, 1);
210 } else {
211 UChar cs;
212
213 /* trivial search for a BMP code point */
214 for(;;) {
215 if((cs=*s)==c) {
216 return (UChar *)s;
217 }
218 if(cs==0) {
219 return NULL;
220 }
221 ++s;
222 }
223 }
224}
225
226U_CAPI UChar * U_EXPORT2
227u_strchr32(const UChar *s, UChar32 c) {
228 if((uint32_t)c<=U_BMP_MAX) {
229 /* find BMP code point */
230 return u_strchr(s, (UChar)c);
231 } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
232 /* find supplementary code point as surrogate pair */
233 UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
234
235 while((cs=*s++)!=0) {
236 if(cs==lead && *s==trail) {
237 return (UChar *)(s-1);
238 }
239 }
240 return NULL;
241 } else {
242 /* not a Unicode code point, not findable */
243 return NULL;
244 }
245}
246
247U_CAPI UChar * U_EXPORT2
248u_memchr(const UChar *s, UChar c, int32_t count) {
249 if(count<=0) {
250 return NULL; /* no string */
251 } else if(U16_IS_SURROGATE(c)) {
252 /* make sure to not find half of a surrogate pair */
253 return u_strFindFirst(s, count, &c, 1);
254 } else {
255 /* trivial search for a BMP code point */
256 const UChar *limit=s+count;
257 do {
258 if(*s==c) {
259 return (UChar *)s;
260 }
261 } while(++s!=limit);
262 return NULL;
263 }
264}
265
266U_CAPI UChar * U_EXPORT2
267u_memchr32(const UChar *s, UChar32 c, int32_t count) {
268 if((uint32_t)c<=U_BMP_MAX) {
269 /* find BMP code point */
270 return u_memchr(s, (UChar)c, count);
271 } else if(count<2) {
272 /* too short for a surrogate pair */
273 return NULL;
274 } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
275 /* find supplementary code point as surrogate pair */
276 const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
277 UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
278
279 do {
280 if(*s==lead && *(s+1)==trail) {
281 return (UChar *)s;
282 }
283 } while(++s!=limit);
284 return NULL;
285 } else {
286 /* not a Unicode code point, not findable */
287 return NULL;
288 }
289}
290
291/* Backward binary string search functions ---------------------------------- */
292
293U_CAPI UChar * U_EXPORT2
294u_strFindLast(const UChar *s, int32_t length,
295 const UChar *sub, int32_t subLength) {
296 const UChar *start, *limit, *p, *q, *subLimit;
297 UChar c, cs;
298
299 if(sub==NULL || subLength<-1) {
300 return (UChar *)s;
301 }
302 if(s==NULL || length<-1) {
303 return NULL;
304 }
305
306 /*
307 * This implementation is more lazy than the one for u_strFindFirst():
308 * There is no special search code for NUL-terminated strings.
309 * It does not seem to be worth it for searching substrings to
310 * search forward and find all matches like in u_strrchr() and similar.
311 * Therefore, we simply get both string lengths and search backward.
312 *
313 * markus 2002oct23
314 */
315
316 if(subLength<0) {
317 subLength=u_strlen(sub);
318 }
319 if(subLength==0) {
320 return (UChar *)s;
321 }
322
323 /* get sub[subLength-1] to search for it fast */
324 subLimit=sub+subLength;
325 cs=*(--subLimit);
326 --subLength;
327
328 if(subLength==0 && !U16_IS_SURROGATE(cs)) {
329 /* the substring consists of a single, non-surrogate BMP code point */
330 return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
331 }
332
333 if(length<0) {
334 length=u_strlen(s);
335 }
336
337 /* subLength was decremented above */
338 if(length<=subLength) {
339 return NULL; /* s is shorter than sub */
340 }
341
342 start=s;
343 limit=s+length;
344
345 /* the substring must start no later than s+subLength */
346 s+=subLength;
347
348 while(s!=limit) {
349 c=*(--limit);
350 if(c==cs) {
351 /* found last substring UChar, compare rest */
352 p=limit;
353 q=subLimit;
354 for(;;) {
355 if(q==sub) {
356 if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
357 return (UChar *)p; /* well-formed match */
358 } else {
359 break; /* no match because surrogate pair is split */
360 }
361 }
362 if(*(--p)!=*(--q)) {
363 break; /* no match */
364 }
365 }
366 }
367 }
368
369 /* not found */
370 return NULL;
371}
372
373U_CAPI UChar * U_EXPORT2
374u_strrstr(const UChar *s, const UChar *substring) {
375 return u_strFindLast(s, -1, substring, -1);
376}
377
378U_CAPI UChar * U_EXPORT2
379u_strrchr(const UChar *s, UChar c) {
380 if(U16_IS_SURROGATE(c)) {
381 /* make sure to not find half of a surrogate pair */
382 return u_strFindLast(s, -1, &c, 1);
383 } else {
384 const UChar *result=NULL;
385 UChar cs;
386
387 /* trivial search for a BMP code point */
388 for(;;) {
389 if((cs=*s)==c) {
390 result=s;
391 }
392 if(cs==0) {
393 return (UChar *)result;
394 }
395 ++s;
396 }
397 }
398}
399
400U_CAPI UChar * U_EXPORT2
401u_strrchr32(const UChar *s, UChar32 c) {
402 if((uint32_t)c<=U_BMP_MAX) {
403 /* find BMP code point */
404 return u_strrchr(s, (UChar)c);
405 } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
406 /* find supplementary code point as surrogate pair */
407 const UChar *result=NULL;
408 UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
409
410 while((cs=*s++)!=0) {
411 if(cs==lead && *s==trail) {
412 result=s-1;
413 }
414 }
415 return (UChar *)result;
416 } else {
417 /* not a Unicode code point, not findable */
418 return NULL;
419 }
420}
421
422U_CAPI UChar * U_EXPORT2
423u_memrchr(const UChar *s, UChar c, int32_t count) {
424 if(count<=0) {
425 return NULL; /* no string */
426 } else if(U16_IS_SURROGATE(c)) {
427 /* make sure to not find half of a surrogate pair */
428 return u_strFindLast(s, count, &c, 1);
429 } else {
430 /* trivial search for a BMP code point */
431 const UChar *limit=s+count;
432 do {
433 if(*(--limit)==c) {
434 return (UChar *)limit;
435 }
436 } while(s!=limit);
437 return NULL;
438 }
439}
440
441U_CAPI UChar * U_EXPORT2
442u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
443 if((uint32_t)c<=U_BMP_MAX) {
444 /* find BMP code point */
445 return u_memrchr(s, (UChar)c, count);
446 } else if(count<2) {
447 /* too short for a surrogate pair */
448 return NULL;
449 } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
450 /* find supplementary code point as surrogate pair */
451 const UChar *limit=s+count-1;
452 UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
453
454 do {
455 if(*limit==trail && *(limit-1)==lead) {
456 return (UChar *)(limit-1);
457 }
458 } while(s!=--limit);
459 return NULL;
460 } else {
461 /* not a Unicode code point, not findable */
462 return NULL;
463 }
464}
465
466/* Tokenization functions --------------------------------------------------- */
467
468/*
469 * Match each code point in a string against each code point in the matchSet.
470 * Return the index of the first string code point that
471 * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
472 * Return -(string length)-1 if there is no such code point.
473 */
474static int32_t
475_matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
476 int32_t matchLen, matchBMPLen, strItr, matchItr;
477 UChar32 stringCh, matchCh;
478 UChar c, c2;
479
480 /* first part of matchSet contains only BMP code points */
481 matchBMPLen = 0;
482 while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
483 ++matchBMPLen;
484 }
485
486 /* second part of matchSet contains BMP and supplementary code points */
487 matchLen = matchBMPLen;
488 while(matchSet[matchLen] != 0) {
489 ++matchLen;
490 }
491
492 for(strItr = 0; (c = string[strItr]) != 0;) {
493 ++strItr;
494 if(U16_IS_SINGLE(c)) {
495 if(polarity) {
496 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
497 if(c == matchSet[matchItr]) {
498 return strItr - 1; /* one matches */
499 }
500 }
501 } else {
502 for(matchItr = 0; matchItr < matchLen; ++matchItr) {
503 if(c == matchSet[matchItr]) {
504 goto endloop;
505 }
506 }
507 return strItr - 1; /* none matches */
508 }
509 } else {
510 /*
511 * No need to check for string length before U16_IS_TRAIL
512 * because c2 could at worst be the terminating NUL.
513 */
514 if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
515 ++strItr;
516 stringCh = U16_GET_SUPPLEMENTARY(c, c2);
517 } else {
518 stringCh = c; /* unpaired trail surrogate */
519 }
520
521 if(polarity) {
522 for(matchItr = matchBMPLen; matchItr < matchLen;) {
523 U16_NEXT(matchSet, matchItr, matchLen, matchCh);
524 if(stringCh == matchCh) {
525 return strItr - U16_LENGTH(stringCh); /* one matches */
526 }
527 }
528 } else {
529 for(matchItr = matchBMPLen; matchItr < matchLen;) {
530 U16_NEXT(matchSet, matchItr, matchLen, matchCh);
531 if(stringCh == matchCh) {
532 goto endloop;
533 }
534 }
535 return strItr - U16_LENGTH(stringCh); /* none matches */
536 }
537 }
538endloop:
539 /* wish C had continue with labels like Java... */;
540 }
541
542 /* Didn't find it. */
543 return -strItr-1;
544}
545
546/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
547U_CAPI UChar * U_EXPORT2
548u_strpbrk(const UChar *string, const UChar *matchSet)
549{
550 int32_t index = _matchFromSet(string, matchSet, TRUE);
551 if(index >= 0) {
552 return (UChar *)string + index;
553 } else {
554 return NULL;
555 }
556}
557
558/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
559U_CAPI int32_t U_EXPORT2
560u_strcspn(const UChar *string, const UChar *matchSet)
561{
562 int32_t index = _matchFromSet(string, matchSet, TRUE);
563 if(index >= 0) {
564 return index;
565 } else {
566 return -index - 1; /* == u_strlen(string) */
567 }
568}
569
570/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
571U_CAPI int32_t U_EXPORT2
572u_strspn(const UChar *string, const UChar *matchSet)
573{
574 int32_t index = _matchFromSet(string, matchSet, FALSE);
575 if(index >= 0) {
576 return index;
577 } else {
578 return -index - 1; /* == u_strlen(string) */
579 }
580}
581
582/* ----- Text manipulation functions --- */
583
584U_CAPI UChar* U_EXPORT2
585u_strtok_r(UChar *src,
586 const UChar *delim,
587 UChar **saveState)
588{
589 UChar *tokSource;
590 UChar *nextToken;
591 uint32_t nonDelimIdx;
592
593 /* If saveState is NULL, the user messed up. */
594 if (src != NULL) {
595 tokSource = src;
596 *saveState = src; /* Set to "src" in case there are no delimiters */
597 }
598 else if (*saveState) {
599 tokSource = *saveState;
600 }
601 else {
602 /* src == NULL && *saveState == NULL */
603 /* This shouldn't happen. We already finished tokenizing. */
604 return NULL;
605 }
606
607 /* Skip initial delimiters */
608 nonDelimIdx = u_strspn(tokSource, delim);
609 tokSource = &tokSource[nonDelimIdx];
610
611 if (*tokSource) {
612 nextToken = u_strpbrk(tokSource, delim);
613 if (nextToken != NULL) {
614 /* Create a token */
615 *(nextToken++) = 0;
616 *saveState = nextToken;
617 return tokSource;
618 }
619 else if (*saveState) {
620 /* Return the last token */
621 *saveState = NULL;
622 return tokSource;
623 }
624 }
625 else {
626 /* No tokens were found. Only delimiters were left. */
627 *saveState = NULL;
628 }
629 return NULL;
630}
631
632/* Miscellaneous functions -------------------------------------------------- */
633
634U_CAPI UChar* U_EXPORT2
635u_strcat(UChar *dst,
636 const UChar *src)
637{
638 UChar *anchor = dst; /* save a pointer to start of dst */
639
640 while(*dst != 0) { /* To end of first string */
641 ++dst;
642 }
643 while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */
644 }
645
646 return anchor;
647}
648
649U_CAPI UChar* U_EXPORT2
650u_strncat(UChar *dst,
651 const UChar *src,
652 int32_t n )
653{
654 if(n > 0) {
655 UChar *anchor = dst; /* save a pointer to start of dst */
656
657 while(*dst != 0) { /* To end of first string */
658 ++dst;
659 }
660 while((*dst = *src) != 0) { /* copy string 2 over */
661 ++dst;
662 if(--n == 0) {
663 *dst = 0;
664 break;
665 }
666 ++src;
667 }
668
669 return anchor;
670 } else {
671 return dst;
672 }
673}
674
675/* ----- Text property functions --- */
676
677U_CAPI int32_t U_EXPORT2
678u_strcmp(const UChar *s1,
679 const UChar *s2)
680{
681 UChar c1, c2;
682
683 for(;;) {
684 c1=*s1++;
685 c2=*s2++;
686 if (c1 != c2 || c1 == 0) {
687 break;
688 }
689 }
690 return (int32_t)c1 - (int32_t)c2;
691}
692
693U_CAPI int32_t U_EXPORT2
694uprv_strCompare(const UChar *s1, int32_t length1,
695 const UChar *s2, int32_t length2,
696 UBool strncmpStyle, UBool codePointOrder) {
697 const UChar *start1, *start2, *limit1, *limit2;
698 UChar c1, c2;
699
700 /* setup for fix-up */
701 start1=s1;
702 start2=s2;
703
704 /* compare identical prefixes - they do not need to be fixed up */
705 if(length1<0 && length2<0) {
706 /* strcmp style, both NUL-terminated */
707 if(s1==s2) {
708 return 0;
709 }
710
711 for(;;) {
712 c1=*s1;
713 c2=*s2;
714 if(c1!=c2) {
715 break;
716 }
717 if(c1==0) {
718 return 0;
719 }
720 ++s1;
721 ++s2;
722 }
723
724 /* setup for fix-up */
725 limit1=limit2=NULL;
726 } else if(strncmpStyle) {
727 /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
728 if(s1==s2) {
729 return 0;
730 }
731
732 limit1=start1+length1;
733
734 for(;;) {
735 /* both lengths are same, check only one limit */
736 if(s1==limit1) {
737 return 0;
738 }
739
740 c1=*s1;
741 c2=*s2;
742 if(c1!=c2) {
743 break;
744 }
745 if(c1==0) {
746 return 0;
747 }
748 ++s1;
749 ++s2;
750 }
751
752 /* setup for fix-up */
753 limit2=start2+length1; /* use length1 here, too, to enforce assumption */
754 } else {
755 /* memcmp/UnicodeString style, both length-specified */
756 int32_t lengthResult;
757
758 if(length1<0) {
759 length1=u_strlen(s1);
760 }
761 if(length2<0) {
762 length2=u_strlen(s2);
763 }
764
765 /* limit1=start1+min(lenght1, length2) */
766 if(length1<length2) {
767 lengthResult=-1;
768 limit1=start1+length1;
769 } else if(length1==length2) {
770 lengthResult=0;
771 limit1=start1+length1;
772 } else /* length1>length2 */ {
773 lengthResult=1;
774 limit1=start1+length2;
775 }
776
777 if(s1==s2) {
778 return lengthResult;
779 }
780
781 for(;;) {
782 /* check pseudo-limit */
783 if(s1==limit1) {
784 return lengthResult;
785 }
786
787 c1=*s1;
788 c2=*s2;
789 if(c1!=c2) {
790 break;
791 }
792 ++s1;
793 ++s2;
794 }
795
796 /* setup for fix-up */
797 limit1=start1+length1;
798 limit2=start2+length2;
799 }
800
801 /* if both values are in or above the surrogate range, fix them up */
802 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
803 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
804 if(
805 (c1<=0xdbff && (s1+1)!=limit1 && UTF_IS_TRAIL(*(s1+1))) ||
806 (UTF_IS_TRAIL(c1) && start1!=s1 && UTF_IS_LEAD(*(s1-1)))
807 ) {
808 /* part of a surrogate pair, leave >=d800 */
809 } else {
810 /* BMP code point - may be surrogate code point - make <d800 */
811 c1-=0x2800;
812 }
813
814 if(
815 (c2<=0xdbff && (s2+1)!=limit2 && UTF_IS_TRAIL(*(s2+1))) ||
816 (UTF_IS_TRAIL(c2) && start2!=s2 && UTF_IS_LEAD(*(s2-1)))
817 ) {
818 /* part of a surrogate pair, leave >=d800 */
819 } else {
820 /* BMP code point - may be surrogate code point - make <d800 */
821 c2-=0x2800;
822 }
823 }
824
825 /* now c1 and c2 are in the requested (code unit or code point) order */
826 return (int32_t)c1-(int32_t)c2;
827}
828
829/*
830 * Compare two strings as presented by UCharIterators.
831 * Use code unit or code point order.
832 * When the function returns, it is undefined where the iterators
833 * have stopped.
834 */
835U_CAPI int32_t U_EXPORT2
836u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
837 UChar32 c1, c2;
838
839 /* argument checking */
840 if(iter1==NULL || iter2==NULL) {
841 return 0; /* bad arguments */
842 }
843 if(iter1==iter2) {
844 return 0; /* identical iterators */
845 }
846
847 /* reset iterators to start? */
848 iter1->move(iter1, 0, UITER_START);
849 iter2->move(iter2, 0, UITER_START);
850
851 /* compare identical prefixes - they do not need to be fixed up */
852 for(;;) {
853 c1=iter1->next(iter1);
854 c2=iter2->next(iter2);
855 if(c1!=c2) {
856 break;
857 }
858 if(c1==-1) {
859 return 0;
860 }
861 }
862
863 /* if both values are in or above the surrogate range, fix them up */
864 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
865 /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
866 if(
867 (c1<=0xdbff && UTF_IS_TRAIL(iter1->current(iter1))) ||
868 (UTF_IS_TRAIL(c1) && (iter1->previous(iter1), UTF_IS_LEAD(iter1->previous(iter1))))
869 ) {
870 /* part of a surrogate pair, leave >=d800 */
871 } else {
872 /* BMP code point - may be surrogate code point - make <d800 */
873 c1-=0x2800;
874 }
875
876 if(
877 (c2<=0xdbff && UTF_IS_TRAIL(iter2->current(iter2))) ||
878 (UTF_IS_TRAIL(c2) && (iter2->previous(iter2), UTF_IS_LEAD(iter2->previous(iter2))))
879 ) {
880 /* part of a surrogate pair, leave >=d800 */
881 } else {
882 /* BMP code point - may be surrogate code point - make <d800 */
883 c2-=0x2800;
884 }
885 }
886
887 /* now c1 and c2 are in the requested (code unit or code point) order */
888 return (int32_t)c1-(int32_t)c2;
889}
890
891#if 0
892/*
893 * u_strCompareIter() does not leave the iterators _on_ the different units.
894 * This is possible but would cost a few extra indirect function calls to back
895 * up if the last unit (c1 or c2 respectively) was >=0.
896 *
897 * Consistently leaving them _behind_ the different units is not an option
898 * because the current "unit" is the end of the string if that is reached,
899 * and in such a case the iterator does not move.
900 * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
901 * of their strings. Calling previous() on each does not move them to where
902 * the comparison fails.
903 *
904 * So the simplest semantics is to not define where the iterators end up.
905 *
906 * The following fragment is part of what would need to be done for backing up.
907 */
908void fragment {
909 /* iff a surrogate is part of a surrogate pair, leave >=d800 */
910 if(c1<=0xdbff) {
911 if(!UTF_IS_TRAIL(iter1->current(iter1))) {
912 /* lead surrogate code point - make <d800 */
913 c1-=0x2800;
914 }
915 } else if(c1<=0xdfff) {
916 int32_t index=iter1->getIndex(iter1, UITER_CURRENT);
917 iter1->previous(iter1); /* ==c1 */
918 if(!UTF_IS_LEAD(iter1->previous(iter1))) {
919 /* trail surrogate code point - make <d800 */
920 c1-=0x2800;
921 }
922 /* go back to behind where the difference is */
923 iter1->move(iter1, index, UITER_ZERO);
924 } else /* 0xe000<=c1<=0xffff */ {
925 /* BMP code point - make <d800 */
926 c1-=0x2800;
927 }
928}
929#endif
930
931U_CAPI int32_t U_EXPORT2
932u_strCompare(const UChar *s1, int32_t length1,
933 const UChar *s2, int32_t length2,
934 UBool codePointOrder) {
935 /* argument checking */
936 if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
937 return 0;
938 }
939 return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
940}
941
942/* String compare in code point order - u_strcmp() compares in code unit order. */
943U_CAPI int32_t U_EXPORT2
944u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
945 return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
946}
947
948U_CAPI int32_t U_EXPORT2
949u_strncmp(const UChar *s1,
950 const UChar *s2,
951 int32_t n)
952{
953 if(n > 0) {
954 int32_t rc;
955 for(;;) {
956 rc = (int32_t)*s1 - (int32_t)*s2;
957 if(rc != 0 || *s1 == 0 || --n == 0) {
958 return rc;
959 }
960 ++s1;
961 ++s2;
962 }
963 } else {
964 return 0;
965 }
966}
967
968U_CAPI int32_t U_EXPORT2
969u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
970 return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
971}
972
973U_CAPI UChar* U_EXPORT2
974u_strcpy(UChar *dst,
975 const UChar *src)
976{
977 UChar *anchor = dst; /* save a pointer to start of dst */
978
979 while((*(dst++) = *(src++)) != 0) { /* copy string 2 over */
980 }
981
982 return anchor;
983}
984
985U_CAPI UChar* U_EXPORT2
986u_strncpy(UChar *dst,
987 const UChar *src,
988 int32_t n)
989{
990 UChar *anchor = dst; /* save a pointer to start of dst */
991
992 /* copy string 2 over */
993 while(n > 0 && (*(dst++) = *(src++)) != 0) {
994 --n;
995 }
996
997 return anchor;
998}
999
1000U_CAPI int32_t U_EXPORT2
1001u_strlen(const UChar *s)
1002{
1003#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
1004 return uprv_wcslen(s);
1005#else
1006 const UChar *t = s;
1007 while(*t != 0) {
1008 ++t;
1009 }
1010 return t - s;
1011#endif
1012}
1013
1014U_CAPI int32_t U_EXPORT2
1015u_countChar32(const UChar *s, int32_t length) {
1016 int32_t count;
1017
1018 if(s==NULL || length<-1) {
1019 return 0;
1020 }
1021
1022 count=0;
1023 if(length>=0) {
1024 while(length>0) {
1025 ++count;
1026 if(UTF_IS_LEAD(*s) && length>=2 && UTF_IS_TRAIL(*(s+1))) {
1027 s+=2;
1028 length-=2;
1029 } else {
1030 ++s;
1031 --length;
1032 }
1033 }
1034 } else /* length==-1 */ {
1035 UChar c;
1036
1037 for(;;) {
1038 if((c=*s++)==0) {
1039 break;
1040 }
1041 ++count;
1042
1043 /*
1044 * sufficient to look ahead one because of UTF-16;
1045 * safe to look ahead one because at worst that would be the terminating NUL
1046 */
1047 if(UTF_IS_LEAD(c) && UTF_IS_TRAIL(*s)) {
1048 ++s;
1049 }
1050 }
1051 }
1052 return count;
1053}
1054
1055U_CAPI UBool U_EXPORT2
1056u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1057
1058 if(number<0) {
1059 return TRUE;
1060 }
1061 if(s==NULL || length<-1) {
1062 return FALSE;
1063 }
1064
1065 if(length==-1) {
1066 /* s is NUL-terminated */
1067 UChar c;
1068
1069 /* count code points until they exceed */
1070 for(;;) {
1071 if((c=*s++)==0) {
1072 return FALSE;
1073 }
1074 if(number==0) {
1075 return TRUE;
1076 }
1077 if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
1078 ++s;
1079 }
1080 --number;
1081 }
1082 } else {
1083 /* length>=0 known */
1084 const UChar *limit;
1085 int32_t maxSupplementary;
1086
1087 /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
1088 if(((length+1)/2)>number) {
1089 return TRUE;
1090 }
1091
1092 /* check if s does not even contain enough UChars */
1093 maxSupplementary=length-number;
1094 if(maxSupplementary<=0) {
1095 return FALSE;
1096 }
1097 /* there are maxSupplementary=length-number more UChars than asked-for code points */
1098
1099 /*
1100 * count code points until they exceed and also check that there are
1101 * no more than maxSupplementary supplementary code points (UChar pairs)
1102 */
1103 limit=s+length;
1104 for(;;) {
1105 if(s==limit) {
1106 return FALSE;
1107 }
1108 if(number==0) {
1109 return TRUE;
1110 }
1111 if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
1112 ++s;
1113 if(--maxSupplementary<=0) {
1114 /* too many pairs - too few code points */
1115 return FALSE;
1116 }
1117 }
1118 --number;
1119 }
1120 }
1121}
1122
1123U_CAPI UChar * U_EXPORT2
1124u_memcpy(UChar *dest, const UChar *src, int32_t count) {
1125 return (UChar *)uprv_memcpy(dest, src, count*U_SIZEOF_UCHAR);
1126}
1127
1128U_CAPI UChar * U_EXPORT2
1129u_memmove(UChar *dest, const UChar *src, int32_t count) {
1130 return (UChar *)uprv_memmove(dest, src, count*U_SIZEOF_UCHAR);
1131}
1132
1133U_CAPI UChar * U_EXPORT2
1134u_memset(UChar *dest, UChar c, int32_t count) {
1135 if(count > 0) {
1136 UChar *ptr = dest;
1137 UChar *limit = dest + count;
1138
1139 while (ptr < limit) {
1140 *(ptr++) = c;
1141 }
1142 }
1143 return dest;
1144}
1145
1146U_CAPI int32_t U_EXPORT2
1147u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
1148 if(count > 0) {
1149 const UChar *limit = buf1 + count;
1150 int32_t result;
1151
1152 while (buf1 < limit) {
1153 result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
1154 if (result != 0) {
1155 return result;
1156 }
1157 buf1++;
1158 buf2++;
1159 }
1160 }
1161 return 0;
1162}
1163
1164U_CAPI int32_t U_EXPORT2
1165u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
1166 return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
1167}
1168
1169/* conversions between char* and UChar* ------------------------------------- */
1170
1171/*
1172 returns the minimum of (the length of the null-terminated string) and n.
1173*/
1174static int32_t u_astrnlen(const char *s1, int32_t n)
1175{
1176 int32_t len = 0;
1177
1178 if (s1)
1179 {
1180 while (n-- && *(s1++))
1181 {
1182 len++;
1183 }
1184 }
1185 return len;
1186}
1187
1188U_CAPI UChar* U_EXPORT2
1189u_uastrncpy(UChar *ucs1,
1190 const char *s2,
1191 int32_t n)
1192{
1193 UChar *target = ucs1;
1194 UErrorCode err = U_ZERO_ERROR;
1195 UConverter *cnv = u_getDefaultConverter(&err);
1196 if(U_SUCCESS(err) && cnv != NULL) {
1197 ucnv_reset(cnv);
1198 ucnv_toUnicode(cnv,
1199 &target,
1200 ucs1+n,
1201 &s2,
1202 s2+u_astrnlen(s2, n),
1203 NULL,
1204 TRUE,
1205 &err);
1206 ucnv_reset(cnv); /* be good citizens */
1207 u_releaseDefaultConverter(cnv);
1208 if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
1209 *ucs1 = 0; /* failure */
1210 }
1211 if(target < (ucs1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
1212 *target = 0; /* terminate */
1213 }
1214 } else {
1215 *ucs1 = 0;
1216 }
1217 return ucs1;
1218}
1219
1220U_CAPI UChar* U_EXPORT2
1221u_uastrcpy(UChar *ucs1,
1222 const char *s2 )
1223{
1224 UErrorCode err = U_ZERO_ERROR;
1225 UConverter *cnv = u_getDefaultConverter(&err);
1226 if(U_SUCCESS(err) && cnv != NULL) {
1227 ucnv_toUChars(cnv,
1228 ucs1,
1229 MAX_STRLEN,
1230 s2,
1231 uprv_strlen(s2),
1232 &err);
1233 u_releaseDefaultConverter(cnv);
1234 if(U_FAILURE(err)) {
1235 *ucs1 = 0;
1236 }
1237 } else {
1238 *ucs1 = 0;
1239 }
1240 return ucs1;
1241}
1242
1243/*
1244 returns the minimum of (the length of the null-terminated string) and n.
1245*/
1246static int32_t u_ustrnlen(const UChar *ucs1, int32_t n)
1247{
1248 int32_t len = 0;
1249
1250 if (ucs1)
1251 {
1252 while (n-- && *(ucs1++))
1253 {
1254 len++;
1255 }
1256 }
1257 return len;
1258}
1259
1260U_CAPI char* U_EXPORT2
1261u_austrncpy(char *s1,
1262 const UChar *ucs2,
1263 int32_t n)
1264{
1265 char *target = s1;
1266 UErrorCode err = U_ZERO_ERROR;
1267 UConverter *cnv = u_getDefaultConverter(&err);
1268 if(U_SUCCESS(err) && cnv != NULL) {
1269 ucnv_reset(cnv);
1270 ucnv_fromUnicode(cnv,
1271 &target,
1272 s1+n,
1273 &ucs2,
1274 ucs2+u_ustrnlen(ucs2, n),
1275 NULL,
1276 TRUE,
1277 &err);
1278 ucnv_reset(cnv); /* be good citizens */
1279 u_releaseDefaultConverter(cnv);
1280 if(U_FAILURE(err) && (err != U_BUFFER_OVERFLOW_ERROR) ) {
1281 *s1 = 0; /* failure */
1282 }
1283 if(target < (s1+n)) { /* U_BUFFER_OVERFLOW_ERROR isn't an err, just means no termination will happen. */
1284 *target = 0; /* terminate */
1285 }
1286 } else {
1287 *s1 = 0;
1288 }
1289 return s1;
1290}
1291
1292U_CAPI char* U_EXPORT2
1293u_austrcpy(char *s1,
1294 const UChar *ucs2 )
1295{
1296 UErrorCode err = U_ZERO_ERROR;
1297 UConverter *cnv = u_getDefaultConverter(&err);
1298 if(U_SUCCESS(err) && cnv != NULL) {
1299 int32_t len = ucnv_fromUChars(cnv,
1300 s1,
1301 MAX_STRLEN,
1302 ucs2,
1303 -1,
1304 &err);
1305 u_releaseDefaultConverter(cnv);
1306 s1[len] = 0;
1307 } else {
1308 *s1 = 0;
1309 }
1310 return s1;
1311}
1312
1313/* mutexed access to a shared default converter ----------------------------- */
1314
1315U_CAPI UConverter* U_EXPORT2
1316u_getDefaultConverter(UErrorCode *status)
1317{
1318 UConverter *converter = NULL;
1319
1320 if (gDefaultConverter != NULL) {
1321 umtx_lock(NULL);
1322
1323 /* need to check to make sure it wasn't taken out from under us */
1324 if (gDefaultConverter != NULL) {
1325 converter = gDefaultConverter;
1326 gDefaultConverter = NULL;
1327 }
1328 umtx_unlock(NULL);
1329 }
1330
1331 /* if the cache was empty, create a converter */
1332 if(converter == NULL) {
1333 converter = ucnv_open(NULL, status);
1334 if(U_FAILURE(*status)) {
1335 return NULL;
1336 }
1337 }
1338
1339 return converter;
1340}
1341
1342U_CAPI void U_EXPORT2
1343u_releaseDefaultConverter(UConverter *converter)
1344{
1345 if(gDefaultConverter == NULL) {
1346 if (converter != NULL) {
1347 ucnv_reset(converter);
1348 }
1349 umtx_lock(NULL);
1350
1351 if(gDefaultConverter == NULL) {
1352 gDefaultConverter = converter;
1353 converter = NULL;
1354 }
1355 umtx_unlock(NULL);
1356 }
1357
1358 if(converter != NULL) {
1359 ucnv_close(converter);
1360 }
1361}
1362
1363/* u_unescape & support fns ------------------------------------------------- */
1364
1365/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
1366static const UChar UNESCAPE_MAP[] = {
1367 /*" 0x22, 0x22 */
1368 /*' 0x27, 0x27 */
1369 /*? 0x3F, 0x3F */
1370 /*\ 0x5C, 0x5C */
1371 /*a*/ 0x61, 0x07,
1372 /*b*/ 0x62, 0x08,
1373 /*e*/ 0x65, 0x1b,
1374 /*f*/ 0x66, 0x0c,
1375 /*n*/ 0x6E, 0x0a,
1376 /*r*/ 0x72, 0x0d,
1377 /*t*/ 0x74, 0x09,
1378 /*v*/ 0x76, 0x0b
1379};
1380enum { UNESCAPE_MAP_LENGTH = sizeof(UNESCAPE_MAP) / sizeof(UNESCAPE_MAP[0]) };
1381
1382/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
1383static int8_t _digit8(UChar c) {
1384 if (c >= 0x0030 && c <= 0x0037) {
1385 return (int8_t)(c - 0x0030);
1386 }
1387 return -1;
1388}
1389
1390/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
1391static int8_t _digit16(UChar c) {
1392 if (c >= 0x0030 && c <= 0x0039) {
1393 return (int8_t)(c - 0x0030);
1394 }
1395 if (c >= 0x0041 && c <= 0x0046) {
1396 return (int8_t)(c - (0x0041 - 10));
1397 }
1398 if (c >= 0x0061 && c <= 0x0066) {
1399 return (int8_t)(c - (0x0061 - 10));
1400 }
1401 return -1;
1402}
1403
1404/* Parse a single escape sequence. Although this method deals in
1405 * UChars, it does not use C++ or UnicodeString. This allows it to
1406 * be used from C contexts. */
1407U_CAPI UChar32 U_EXPORT2
1408u_unescapeAt(UNESCAPE_CHAR_AT charAt,
1409 int32_t *offset,
1410 int32_t length,
1411 void *context) {
1412
1413 int32_t start = *offset;
1414 UChar c;
1415 UChar32 result = 0;
1416 int8_t n = 0;
1417 int8_t minDig = 0;
1418 int8_t maxDig = 0;
1419 int8_t bitsPerDigit = 4;
1420 int8_t dig;
1421 int32_t i;
1422 UBool braces = FALSE;
1423
1424 /* Check that offset is in range */
1425 if (*offset < 0 || *offset >= length) {
1426 goto err;
1427 }
1428
1429 /* Fetch first UChar after '\\' */
1430 c = charAt((*offset)++, context);
1431
1432 /* Convert hexadecimal and octal escapes */
1433 switch (c) {
1434 case 0x0075 /*'u'*/:
1435 minDig = maxDig = 4;
1436 break;
1437 case 0x0055 /*'U'*/:
1438 minDig = maxDig = 8;
1439 break;
1440 case 0x0078 /*'x'*/:
1441 minDig = 1;
1442 if (*offset < length && charAt(*offset, context) == 0x7B /*{*/) {
1443 ++(*offset);
1444 braces = TRUE;
1445 maxDig = 8;
1446 } else {
1447 maxDig = 2;
1448 }
1449 break;
1450 default:
1451 dig = _digit8(c);
1452 if (dig >= 0) {
1453 minDig = 1;
1454 maxDig = 3;
1455 n = 1; /* Already have first octal digit */
1456 bitsPerDigit = 3;
1457 result = dig;
1458 }
1459 break;
1460 }
1461 if (minDig != 0) {
1462 while (*offset < length && n < maxDig) {
1463 c = charAt(*offset, context);
1464 dig = (int8_t)((bitsPerDigit == 3) ? _digit8(c) : _digit16(c));
1465 if (dig < 0) {
1466 break;
1467 }
1468 result = (result << bitsPerDigit) | dig;
1469 ++(*offset);
1470 ++n;
1471 }
1472 if (n < minDig) {
1473 goto err;
1474 }
1475 if (braces) {
1476 if (c != 0x7D /*}*/) {
1477 goto err;
1478 }
1479 ++(*offset);
1480 }
1481 return result;
1482 }
1483
1484 /* Convert C-style escapes in table */
1485 for (i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
1486 if (c == UNESCAPE_MAP[i]) {
1487 return UNESCAPE_MAP[i+1];
1488 } else if (c < UNESCAPE_MAP[i]) {
1489 break;
1490 }
1491 }
1492
1493 /* Map \cX to control-X: X & 0x1F */
1494 if (c == 0x0063 /*'c'*/ && *offset < length) {
1495 c = charAt((*offset)++, context);
1496 if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
1497 UChar c2 = charAt(*offset, context);
1498 if (UTF_IS_SECOND_SURROGATE(c2)) {
1499 ++(*offset);
1500 c = (UChar) UTF16_GET_PAIR_VALUE(c, c2); /* [sic] */
1501 }
1502 }
1503 return 0x1F & c;
1504 }
1505
1506 /* If no special forms are recognized, then consider
1507 * the backslash to generically escape the next character.
1508 * Deal with surrogate pairs. */
1509 if (UTF_IS_FIRST_SURROGATE(c) && *offset < length) {
1510 UChar c2 = charAt(*offset, context);
1511 if (UTF_IS_SECOND_SURROGATE(c2)) {
1512 ++(*offset);
1513 return UTF16_GET_PAIR_VALUE(c, c2);
1514 }
1515 }
1516 return c;
1517
1518 err:
1519 /* Invalid escape sequence */
1520 *offset = start; /* Reset to initial value */
1521 return (UChar32)0xFFFFFFFF;
1522}
1523
1524/* u_unescapeAt() callback to return a UChar from a char* */
1525static UChar U_CALLCONV
1526_charPtr_charAt(int32_t offset, void *context) {
1527 UChar c16;
1528 /* It would be more efficient to access the invariant tables
1529 * directly but there is no API for that. */
1530 u_charsToUChars(((char*) context) + offset, &c16, 1);
1531 return c16;
1532}
1533
1534/* Append an escape-free segment of the text; used by u_unescape() */
1535static void _appendUChars(UChar *dest, int32_t destCapacity,
1536 const char *src, int32_t srcLen) {
1537 if (destCapacity < 0) {
1538 destCapacity = 0;
1539 }
1540 if (srcLen > destCapacity) {
1541 srcLen = destCapacity;
1542 }
1543 u_charsToUChars(src, dest, srcLen);
1544}
1545
1546/* Do an invariant conversion of char* -> UChar*, with escape parsing */
1547U_CAPI int32_t U_EXPORT2
1548u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
1549 const char *segment = src;
1550 int32_t i = 0;
1551 char c;
1552
1553 while ((c=*src) != 0) {
1554 /* '\\' intentionally written as compiler-specific
1555 * character constant to correspond to compiler-specific
1556 * char* constants. */
1557 if (c == '\\') {
1558 int32_t lenParsed = 0;
1559 UChar32 c32;
1560 if (src != segment) {
1561 if (dest != NULL) {
1562 _appendUChars(dest + i, destCapacity - i,
1563 segment, src - segment);
1564 }
1565 i += src - segment;
1566 }
1567 ++src; /* advance past '\\' */
1568 c32 = u_unescapeAt(_charPtr_charAt, &lenParsed, uprv_strlen(src), (void*)src);
1569 if (lenParsed == 0) {
1570 goto err;
1571 }
1572 src += lenParsed; /* advance past escape seq. */
1573 if (dest != NULL && UTF_CHAR_LENGTH(c32) <= (destCapacity - i)) {
1574 UTF_APPEND_CHAR_UNSAFE(dest, i, c32);
1575 } else {
1576 i += UTF_CHAR_LENGTH(c32);
1577 }
1578 segment = src;
1579 } else {
1580 ++src;
1581 }
1582 }
1583 if (src != segment) {
1584 if (dest != NULL) {
1585 _appendUChars(dest + i, destCapacity - i,
1586 segment, src - segment);
1587 }
1588 i += src - segment;
1589 }
1590 if (dest != NULL && i < destCapacity) {
1591 dest[i] = 0;
1592 }
1593 return i;
1594
1595 err:
1596 if (dest != NULL && destCapacity > 0) {
1597 *dest = 0;
1598 }
1599 return 0;
1600}
1601
1602/* C UGrowBuffer implementation --------------------------------------------- */
1603
1604U_CAPI UBool /* U_CALLCONV U_EXPORT2 */
1605u_growBufferFromStatic(void *context,
1606 UChar **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
1607 int32_t length) {
1608 UChar *newBuffer=(UChar *)uprv_malloc(reqCapacity*U_SIZEOF_UCHAR);
1609 if(newBuffer!=NULL) {
1610 if(length>0) {
1611 uprv_memcpy(newBuffer, *pBuffer, length*U_SIZEOF_UCHAR);
1612 }
1613 *pCapacity=reqCapacity;
1614 } else {
1615 *pCapacity=0;
1616 }
1617
1618 /* release the old pBuffer if it was not statically allocated */
1619 if(*pBuffer!=(UChar *)context) {
1620 uprv_free(*pBuffer);
1621 }
1622
1623 *pBuffer=newBuffer;
1624 return (UBool)(newBuffer!=NULL);
1625}
1626
1627/* NUL-termination of strings ----------------------------------------------- */
1628
1629/**
1630 * NUL-terminate a string no matter what its type.
1631 * Set warning and error codes accordingly.
1632 */
1633#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) \
1634 if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) { \
1635 /* not a public function, so no complete argument checking */ \
1636 \
1637 if(length<0) { \
1638 /* assume that the caller handles this */ \
1639 } else if(length<destCapacity) { \
1640 /* NUL-terminate the string, the NUL fits */ \
1641 dest[length]=0; \
1642 /* unset the not-terminated warning but leave all others */ \
1643 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { \
1644 *pErrorCode=U_ZERO_ERROR; \
1645 } \
1646 } else if(length==destCapacity) { \
1647 /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
1648 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; \
1649 } else /* length>destCapacity */ { \
1650 /* even the string itself did not fit - set an error code */ \
1651 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; \
1652 } \
1653 }
1654
1655U_CAPI int32_t U_EXPORT2
1656u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1657 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1658 return length;
1659}
1660
1661U_CAPI int32_t U_EXPORT2
1662u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1663 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1664 return length;
1665}
1666
1667U_CAPI int32_t U_EXPORT2
1668u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1669 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1670 return length;
1671}
1672
1673U_CAPI int32_t U_EXPORT2
1674u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1675 __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1676 return length;
1677}