1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2005-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: ucasemap.cpp
12 * tab size: 8 (not used)
15 * created on: 2005may06
16 * created by: Markus W. Scherer
18 * Case mapping service object and functions using it.
21 #include "unicode/utypes.h"
22 #include "unicode/brkiter.h"
23 #include "unicode/casemap.h"
24 #include "unicode/edits.h"
25 #include "unicode/ubrk.h"
26 #include "unicode/uloc.h"
27 #include "unicode/ustring.h"
28 #include "unicode/ucasemap.h"
29 #if !UCONFIG_NO_BREAK_ITERATION
30 #include "unicode/utext.h"
32 #include "unicode/utf.h"
33 #include "unicode/utf8.h"
34 #include "unicode/utf16.h"
39 #include "ucasemap_imp.h"
46 // TODO: share with UTF-16? inline in ucasemap_imp.h?
47 int32_t checkOverflowAndEditsError(int32_t destIndex
, int32_t destCapacity
,
48 Edits
*edits
, UErrorCode
&errorCode
) {
49 if (U_SUCCESS(errorCode
)) {
50 if (destIndex
> destCapacity
) {
51 errorCode
= U_BUFFER_OVERFLOW_ERROR
;
52 } else if (edits
!= NULL
) {
53 edits
->copyErrorTo(errorCode
);
65 /* UCaseMap service object -------------------------------------------------- */
67 UCaseMap::UCaseMap(const char *localeID
, uint32_t opts
, UErrorCode
*pErrorCode
) :
68 #if !UCONFIG_NO_BREAK_ITERATION
71 caseLocale(UCASE_LOC_UNKNOWN
), options(opts
) {
72 ucasemap_setLocale(this, localeID
, pErrorCode
);
75 UCaseMap::~UCaseMap() {
76 #if !UCONFIG_NO_BREAK_ITERATION
81 U_CAPI UCaseMap
* U_EXPORT2
82 ucasemap_open(const char *locale
, uint32_t options
, UErrorCode
*pErrorCode
) {
83 if(U_FAILURE(*pErrorCode
)) {
86 UCaseMap
*csm
= new UCaseMap(locale
, options
, pErrorCode
);
88 *pErrorCode
= U_MEMORY_ALLOCATION_ERROR
;
90 } else if (U_FAILURE(*pErrorCode
)) {
98 ucasemap_close(UCaseMap
*csm
) {
102 U_CAPI
const char * U_EXPORT2
103 ucasemap_getLocale(const UCaseMap
*csm
) {
107 U_CAPI
uint32_t U_EXPORT2
108 ucasemap_getOptions(const UCaseMap
*csm
) {
112 U_CAPI
void U_EXPORT2
113 ucasemap_setLocale(UCaseMap
*csm
, const char *locale
, UErrorCode
*pErrorCode
) {
114 if(U_FAILURE(*pErrorCode
)) {
117 if (locale
!= NULL
&& *locale
== 0) {
119 csm
->caseLocale
= UCASE_LOC_ROOT
;
123 int32_t length
=uloc_getName(locale
, csm
->locale
, (int32_t)sizeof(csm
->locale
), pErrorCode
);
124 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
|| length
==sizeof(csm
->locale
)) {
125 *pErrorCode
=U_ZERO_ERROR
;
126 /* we only really need the language code for case mappings */
127 length
=uloc_getLanguage(locale
, csm
->locale
, (int32_t)sizeof(csm
->locale
), pErrorCode
);
129 if(length
==sizeof(csm
->locale
)) {
130 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
132 if(U_SUCCESS(*pErrorCode
)) {
133 csm
->caseLocale
=UCASE_LOC_UNKNOWN
;
134 csm
->caseLocale
= ucase_getCaseLocale(csm
->locale
);
137 csm
->caseLocale
= UCASE_LOC_ROOT
;
141 U_CAPI
void U_EXPORT2
142 ucasemap_setOptions(UCaseMap
*csm
, uint32_t options
, UErrorCode
*pErrorCode
) {
143 if(U_FAILURE(*pErrorCode
)) {
146 csm
->options
=options
;
149 /* UTF-8 string case mappings ----------------------------------------------- */
151 /* TODO(markus): Move to a new, separate utf8case.cpp file. */
153 /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
154 static inline int32_t
155 appendResult(uint8_t *dest
, int32_t destIndex
, int32_t destCapacity
,
156 int32_t result
, const UChar
*s
,
157 int32_t cpLength
, uint32_t options
, icu::Edits
*edits
) {
160 UErrorCode errorCode
;
162 /* decode the result */
164 /* (not) original code point */
166 edits
->addUnchanged(cpLength
);
167 if(options
& UCASEMAP_OMIT_UNCHANGED_TEXT
) {
172 if(destIndex
<destCapacity
&& c
<=0x7f) { // ASCII slightly-fastpath
173 dest
[destIndex
++]=(uint8_t)c
;
178 if(result
<=UCASE_MAX_STRING_LENGTH
) {
179 // string: "result" is the UTF-16 length
180 errorCode
=U_ZERO_ERROR
;
181 if(destIndex
<destCapacity
) {
182 u_strToUTF8((char *)(dest
+destIndex
), destCapacity
-destIndex
, &length
,
183 s
, result
, &errorCode
);
185 u_strToUTF8(NULL
, 0, &length
, s
, result
, &errorCode
);
187 if(U_FAILURE(errorCode
) && errorCode
!= U_BUFFER_OVERFLOW_ERROR
) {
190 if(length
>(INT32_MAX
-destIndex
)) {
191 return -1; // integer overflow
194 edits
->addReplace(cpLength
, length
);
196 // We might have an overflow, but we know the actual length.
197 return destIndex
+length
;
198 } else if(destIndex
<destCapacity
&& result
<=0x7f) { // ASCII slightly-fastpath
199 dest
[destIndex
++]=(uint8_t)result
;
201 edits
->addReplace(cpLength
, 1);
208 edits
->addReplace(cpLength
, length
);
212 // c>=0 single code point
213 if(length
>(INT32_MAX
-destIndex
)) {
214 return -1; // integer overflow
217 if(destIndex
<destCapacity
) {
218 /* append the result */
220 U8_APPEND(dest
, destIndex
, destCapacity
, c
, isError
);
222 /* overflow, nothing written */
232 static inline int32_t
233 appendASCII(uint8_t *dest
, int32_t destIndex
, int32_t destCapacity
, uint8_t c
) {
234 if(destIndex
<destCapacity
) {
236 } else if(destIndex
==INT32_MAX
) {
237 return -1; // integer overflow
242 // See unicode/utf8.h U8_APPEND_UNSAFE().
243 static inline uint8_t getTwoByteLead(UChar32 c
) { return (uint8_t)((c
>> 6) | 0xc0); }
244 static inline uint8_t getTwoByteTrail(UChar32 c
) { return (uint8_t)((c
& 0x3f) | 0x80); }
246 static inline int32_t
247 appendTwoBytes(uint8_t *dest
, int32_t destIndex
, int32_t destCapacity
, UChar32 c
) {
248 U_ASSERT(0x370 <= c
&& c
<= 0x3ff); // 2-byte UTF-8, main Greek block
249 if(2>(INT32_MAX
-destIndex
)) {
250 return -1; // integer overflow
252 int32_t limit
=destIndex
+2;
253 if(limit
<=destCapacity
) {
255 dest
[0]=getTwoByteLead(c
);
256 dest
[1]=getTwoByteTrail(c
);
261 static inline int32_t
262 appendTwoBytes(uint8_t *dest
, int32_t destIndex
, int32_t destCapacity
, const char *s
) {
263 if(2>(INT32_MAX
-destIndex
)) {
264 return -1; // integer overflow
266 int32_t limit
=destIndex
+2;
267 if(limit
<=destCapacity
) {
269 dest
[0]=(uint8_t)s
[0];
270 dest
[1]=(uint8_t)s
[1];
275 static inline int32_t
276 appendUnchanged(uint8_t *dest
, int32_t destIndex
, int32_t destCapacity
,
277 const uint8_t *s
, int32_t length
, uint32_t options
, icu::Edits
*edits
) {
280 edits
->addUnchanged(length
);
281 if(options
& UCASEMAP_OMIT_UNCHANGED_TEXT
) {
285 if(length
>(INT32_MAX
-destIndex
)) {
286 return -1; // integer overflow
288 if((destIndex
+length
)<=destCapacity
) {
289 uprv_memcpy(dest
+destIndex
, s
, length
);
296 static UChar32 U_CALLCONV
297 utf8_caseContextIterator(void *context
, int8_t dir
) {
298 UCaseContext
*csc
=(UCaseContext
*)context
;
302 /* reset for backward iteration */
303 csc
->index
=csc
->cpStart
;
306 /* reset for forward iteration */
307 csc
->index
=csc
->cpLimit
;
310 /* continue current iteration direction */
315 if(csc
->start
<csc
->index
) {
316 U8_PREV((const uint8_t *)csc
->p
, csc
->start
, csc
->index
, c
);
320 if(csc
->index
<csc
->limit
) {
321 U8_NEXT((const uint8_t *)csc
->p
, csc
->index
, csc
->limit
, c
);
329 * Case-maps [srcStart..srcLimit[ but takes
330 * context [0..srcLength[ into account.
333 _caseMap(int32_t caseLocale
, uint32_t options
, UCaseMapFull
*map
,
334 uint8_t *dest
, int32_t destCapacity
,
335 const uint8_t *src
, UCaseContext
*csc
,
336 int32_t srcStart
, int32_t srcLimit
,
338 UErrorCode
&errorCode
) {
339 /* case mapping loop */
340 int32_t srcIndex
=srcStart
;
342 while(srcIndex
<srcLimit
) {
344 csc
->cpStart
=cpStart
=srcIndex
;
346 U8_NEXT(src
, srcIndex
, srcLimit
, c
);
347 csc
->cpLimit
=srcIndex
;
350 destIndex
=appendUnchanged(dest
, destIndex
, destCapacity
,
351 src
+cpStart
, srcIndex
-cpStart
, options
, edits
);
353 errorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
359 c
=map(c
, utf8_caseContextIterator
, csc
, &s
, caseLocale
);
360 destIndex
= appendResult(dest
, destIndex
, destCapacity
, c
, s
,
361 srcIndex
- cpStart
, options
, edits
);
363 errorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
371 #if !UCONFIG_NO_BREAK_ITERATION
373 U_CFUNC
int32_t U_CALLCONV
374 ucasemap_internalUTF8ToTitle(
375 int32_t caseLocale
, uint32_t options
, BreakIterator
*iter
,
376 uint8_t *dest
, int32_t destCapacity
,
377 const uint8_t *src
, int32_t srcLength
,
379 UErrorCode
&errorCode
) {
380 if(U_FAILURE(errorCode
)) {
384 /* set up local variables */
385 UCaseContext csc
=UCASECONTEXT_INITIALIZER
;
390 UBool isFirstIndex
=TRUE
;
392 /* titlecasing loop */
393 while(prev
<srcLength
) {
394 /* find next index where to titlecase */
402 if(index
==UBRK_DONE
|| index
>srcLength
) {
407 * Unicode 4 & 5 section 3.13 Default Case Operations:
409 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
410 * #29, "Text Boundaries." Between each pair of word boundaries, find the first
411 * cased character F. If F exists, map F to default_title(F); then map each
412 * subsequent character C to default_lower(C).
414 * In this implementation, segment [prev..index[ into 3 parts:
415 * a) uncased characters (copy as-is) [prev..titleStart[
416 * b) first case letter (titlecase) [titleStart..titleLimit[
417 * c) subsequent characters (lowercase) [titleLimit..index[
420 /* find and copy uncased characters [prev..titleStart[ */
421 int32_t titleStart
=prev
;
422 int32_t titleLimit
=prev
;
424 U8_NEXT(src
, titleLimit
, index
, c
);
425 if((options
&U_TITLECASE_NO_BREAK_ADJUSTMENT
)==0 && UCASE_NONE
==ucase_getType(c
)) {
426 /* Adjust the titlecasing index (titleStart) to the next cased character. */
428 titleStart
=titleLimit
;
429 if(titleLimit
==index
) {
431 * only uncased characters in [prev..index[
432 * stop with titleStart==titleLimit==index
436 U8_NEXT(src
, titleLimit
, index
, c
);
437 if(UCASE_NONE
!=ucase_getType(c
)) {
438 break; /* cased letter at [titleStart..titleLimit[ */
441 destIndex
=appendUnchanged(dest
, destIndex
, destCapacity
,
442 src
+prev
, titleStart
-prev
, options
, edits
);
444 errorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
449 if(titleStart
<titleLimit
) {
450 /* titlecase c which is from [titleStart..titleLimit[ */
452 csc
.cpStart
=titleStart
;
453 csc
.cpLimit
=titleLimit
;
455 c
=ucase_toFullTitle(c
, utf8_caseContextIterator
, &csc
, &s
, caseLocale
);
456 destIndex
=appendResult(dest
, destIndex
, destCapacity
, c
, s
,
457 titleLimit
-titleStart
, options
, edits
);
460 destIndex
=appendUnchanged(dest
, destIndex
, destCapacity
,
461 src
+titleStart
, titleLimit
-titleStart
, options
, edits
);
464 errorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
468 /* Special case Dutch IJ titlecasing */
469 if (titleStart
+1 < index
&&
470 caseLocale
== UCASE_LOC_DUTCH
&&
471 (src
[titleStart
] == 0x0049 || src
[titleStart
] == 0x0069)) {
472 if (src
[titleStart
+1] == 0x006A) {
473 destIndex
=appendASCII(dest
, destIndex
, destCapacity
, 0x004A);
475 errorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
479 edits
->addReplace(1, 1);
482 } else if (src
[titleStart
+1] == 0x004A) {
483 // Keep the capital J from getting lowercased.
484 destIndex
=appendUnchanged(dest
, destIndex
, destCapacity
,
485 src
+titleStart
+1, 1, options
, edits
);
487 errorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
494 /* lowercase [titleLimit..index[ */
495 if(titleLimit
<index
) {
496 if((options
&U_TITLECASE_NO_LOWERCASE
)==0) {
497 /* Normal operation: Lowercase the rest of the word. */
500 caseLocale
, options
, ucase_toFullLower
,
501 dest
+destIndex
, destCapacity
-destIndex
,
505 if(errorCode
==U_BUFFER_OVERFLOW_ERROR
) {
506 errorCode
=U_ZERO_ERROR
;
508 if(U_FAILURE(errorCode
)) {
512 /* Optionally just copy the rest of the word unchanged. */
513 destIndex
=appendUnchanged(dest
, destIndex
, destCapacity
,
514 src
+titleLimit
, index
-titleLimit
, options
, edits
);
516 errorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
527 return checkOverflowAndEditsError(destIndex
, destCapacity
, edits
, errorCode
);
533 namespace GreekUpper
{
535 UBool
isFollowedByCasedLetter(const uint8_t *s
, int32_t i
, int32_t length
) {
538 U8_NEXT(s
, i
, length
, c
);
539 int32_t type
= ucase_getTypeOrIgnorable(c
);
540 if ((type
& UCASE_IGNORABLE
) != 0) {
541 // Case-ignorable, continue with the loop.
542 } else if (type
!= UCASE_NONE
) {
543 return TRUE
; // Followed by cased letter.
545 return FALSE
; // Uncased and not case-ignorable.
548 return FALSE
; // Not followed by cased letter.
551 // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
552 int32_t toUpper(uint32_t options
,
553 uint8_t *dest
, int32_t destCapacity
,
554 const uint8_t *src
, int32_t srcLength
,
556 UErrorCode
&errorCode
) {
559 for (int32_t i
= 0; i
< srcLength
;) {
560 int32_t nextIndex
= i
;
562 U8_NEXT(src
, nextIndex
, srcLength
, c
);
563 uint32_t nextState
= 0;
564 int32_t type
= ucase_getTypeOrIgnorable(c
);
565 if ((type
& UCASE_IGNORABLE
) != 0) {
566 // c is case-ignorable
567 nextState
|= (state
& AFTER_CASED
);
568 } else if (type
!= UCASE_NONE
) {
570 nextState
|= AFTER_CASED
;
572 uint32_t data
= getLetterData(c
);
574 uint32_t upper
= data
& UPPER_MASK
;
575 // Add a dialytika to this iota or ypsilon vowel
576 // if we removed a tonos from the previous vowel,
577 // and that previous vowel did not also have (or gain) a dialytika.
578 // Adding one only to the final vowel in a longer sequence
579 // (which does not occur in normal writing) would require lookahead.
580 // Set the same flag as for preserving an existing dialytika.
581 if ((data
& HAS_VOWEL
) != 0 && (state
& AFTER_VOWEL_WITH_ACCENT
) != 0 &&
582 (upper
== 0x399 || upper
== 0x3A5)) {
583 data
|= HAS_DIALYTIKA
;
585 int32_t numYpogegrammeni
= 0; // Map each one to a trailing, spacing, capital iota.
586 if ((data
& HAS_YPOGEGRAMMENI
) != 0) {
587 numYpogegrammeni
= 1;
589 // Skip combining diacritics after this Greek letter.
590 int32_t nextNextIndex
= nextIndex
;
591 while (nextIndex
< srcLength
) {
593 U8_NEXT(src
, nextNextIndex
, srcLength
, c2
);
594 uint32_t diacriticData
= getDiacriticData(c2
);
595 if (diacriticData
!= 0) {
596 data
|= diacriticData
;
597 if ((diacriticData
& HAS_YPOGEGRAMMENI
) != 0) {
600 nextIndex
= nextNextIndex
;
602 break; // not a Greek diacritic
605 if ((data
& HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA
) == HAS_VOWEL_AND_ACCENT
) {
606 nextState
|= AFTER_VOWEL_WITH_ACCENT
;
608 // Map according to Greek rules.
609 UBool addTonos
= FALSE
;
610 if (upper
== 0x397 &&
611 (data
& HAS_ACCENT
) != 0 &&
612 numYpogegrammeni
== 0 &&
613 (state
& AFTER_CASED
) == 0 &&
614 !isFollowedByCasedLetter(src
, nextIndex
, srcLength
)) {
615 // Keep disjunctive "or" with (only) a tonos.
616 // We use the same "word boundary" conditions as for the Final_Sigma test.
617 if (i
== nextIndex
) {
618 upper
= 0x389; // Preserve the precomposed form.
622 } else if ((data
& HAS_DIALYTIKA
) != 0) {
623 // Preserve a vowel with dialytika in precomposed form if it exists.
624 if (upper
== 0x399) {
626 data
&= ~HAS_EITHER_DIALYTIKA
;
627 } else if (upper
== 0x3A5) {
629 data
&= ~HAS_EITHER_DIALYTIKA
;
635 // Find out first whether we are changing the text.
636 U_ASSERT(0x370 <= upper
&& upper
<= 0x3ff); // 2-byte UTF-8, main Greek block
637 change
= (i
+ 2) > nextIndex
||
638 src
[i
] != getTwoByteLead(upper
) || src
[i
+ 1] != getTwoByteTrail(upper
) ||
639 numYpogegrammeni
> 0;
641 if ((data
& HAS_EITHER_DIALYTIKA
) != 0) {
642 change
|= (i2
+ 2) > nextIndex
||
643 src
[i2
] != (uint8_t)u8
"\u0308"[0] ||
644 src
[i2
+ 1] != (uint8_t)u8
"\u0308"[1];
648 change
|= (i2
+ 2) > nextIndex
||
649 src
[i2
] != (uint8_t)u8
"\u0301"[0] ||
650 src
[i2
+ 1] != (uint8_t)u8
"\u0301"[1];
653 int32_t oldLength
= nextIndex
- i
;
654 int32_t newLength
= (i2
- i
) + numYpogegrammeni
* 2; // 2 bytes per U+0399
655 change
|= oldLength
!= newLength
;
658 edits
->addReplace(oldLength
, newLength
);
662 edits
->addUnchanged(oldLength
);
664 // Write unchanged text?
665 change
= (options
& UCASEMAP_OMIT_UNCHANGED_TEXT
) == 0;
670 destIndex
=appendTwoBytes(dest
, destIndex
, destCapacity
, upper
);
671 if (destIndex
>= 0 && (data
& HAS_EITHER_DIALYTIKA
) != 0) {
672 destIndex
=appendTwoBytes(dest
, destIndex
, destCapacity
, u8
"\u0308"); // restore or add a dialytika
674 if (destIndex
>= 0 && addTonos
) {
675 destIndex
=appendTwoBytes(dest
, destIndex
, destCapacity
, u8
"\u0301");
677 while (destIndex
>= 0 && numYpogegrammeni
> 0) {
678 destIndex
=appendTwoBytes(dest
, destIndex
, destCapacity
, u8
"\u0399");
682 errorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
688 c
=ucase_toFullUpper(c
, NULL
, NULL
, &s
, UCASE_LOC_GREEK
);
689 destIndex
= appendResult(dest
, destIndex
, destCapacity
, c
, s
,
690 nextIndex
- i
, options
, edits
);
692 errorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
697 destIndex
=appendUnchanged(dest
, destIndex
, destCapacity
,
698 src
+i
, nextIndex
-i
, options
, edits
);
700 errorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
711 } // namespace GreekUpper
714 static int32_t U_CALLCONV
715 ucasemap_internalUTF8ToLower(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_UNUSED
716 uint8_t *dest
, int32_t destCapacity
,
717 const uint8_t *src
, int32_t srcLength
,
719 UErrorCode
&errorCode
) {
720 UCaseContext csc
=UCASECONTEXT_INITIALIZER
;
723 int32_t destIndex
= _caseMap(
724 caseLocale
, options
, ucase_toFullLower
,
726 src
, &csc
, 0, srcLength
,
728 return checkOverflowAndEditsError(destIndex
, destCapacity
, edits
, errorCode
);
731 static int32_t U_CALLCONV
732 ucasemap_internalUTF8ToUpper(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_UNUSED
733 uint8_t *dest
, int32_t destCapacity
,
734 const uint8_t *src
, int32_t srcLength
,
736 UErrorCode
&errorCode
) {
738 if (caseLocale
== UCASE_LOC_GREEK
) {
739 destIndex
= GreekUpper::toUpper(options
, dest
, destCapacity
,
740 src
, srcLength
, edits
, errorCode
);
742 UCaseContext csc
=UCASECONTEXT_INITIALIZER
;
745 destIndex
= _caseMap(
746 caseLocale
, options
, ucase_toFullUpper
,
748 src
, &csc
, 0, srcLength
,
751 return checkOverflowAndEditsError(destIndex
, destCapacity
, edits
, errorCode
);
754 static int32_t U_CALLCONV
755 ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options
, UCASEMAP_BREAK_ITERATOR_UNUSED
756 uint8_t *dest
, int32_t destCapacity
,
757 const uint8_t *src
, int32_t srcLength
,
759 UErrorCode
&errorCode
) {
760 /* case mapping loop */
761 int32_t srcIndex
= 0;
762 int32_t destIndex
= 0;
763 while (srcIndex
< srcLength
) {
764 int32_t cpStart
= srcIndex
;
766 U8_NEXT(src
, srcIndex
, srcLength
, c
);
769 destIndex
=appendUnchanged(dest
, destIndex
, destCapacity
,
770 src
+cpStart
, srcIndex
-cpStart
, options
, edits
);
772 errorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
778 c
= ucase_toFullFolding(c
, &s
, options
);
779 destIndex
= appendResult(dest
, destIndex
, destCapacity
, c
, s
,
780 srcIndex
- cpStart
, options
, edits
);
782 errorCode
= U_INDEX_OUTOFBOUNDS_ERROR
;
787 return checkOverflowAndEditsError(destIndex
, destCapacity
, edits
, errorCode
);
791 ucasemap_mapUTF8(int32_t caseLocale
, uint32_t options
, UCASEMAP_BREAK_ITERATOR_PARAM
792 uint8_t *dest
, int32_t destCapacity
,
793 const uint8_t *src
, int32_t srcLength
,
794 UTF8CaseMapper
*stringCaseMapper
,
796 UErrorCode
&errorCode
) {
799 /* check argument values */
800 if(U_FAILURE(errorCode
)) {
803 if( destCapacity
<0 ||
804 (dest
==NULL
&& destCapacity
>0) ||
808 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
812 /* get the string length */
814 srcLength
=(int32_t)uprv_strlen((const char *)src
);
817 /* check for overlapping source and destination */
819 ((src
>=dest
&& src
<(dest
+destCapacity
)) ||
820 (dest
>=src
&& dest
<(src
+srcLength
)))
822 errorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
829 destLength
=stringCaseMapper(caseLocale
, options
, UCASEMAP_BREAK_ITERATOR
830 dest
, destCapacity
, src
, srcLength
, edits
, errorCode
);
831 return u_terminateChars((char *)dest
, destCapacity
, destLength
, &errorCode
);
834 /* public API functions */
836 U_CAPI
int32_t U_EXPORT2
837 ucasemap_utf8ToLower(const UCaseMap
*csm
,
838 char *dest
, int32_t destCapacity
,
839 const char *src
, int32_t srcLength
,
840 UErrorCode
*pErrorCode
) {
841 return ucasemap_mapUTF8(
842 csm
->caseLocale
, csm
->options
, UCASEMAP_BREAK_ITERATOR_NULL
843 (uint8_t *)dest
, destCapacity
,
844 (const uint8_t *)src
, srcLength
,
845 ucasemap_internalUTF8ToLower
, NULL
, *pErrorCode
);
848 U_CAPI
int32_t U_EXPORT2
849 ucasemap_utf8ToUpper(const UCaseMap
*csm
,
850 char *dest
, int32_t destCapacity
,
851 const char *src
, int32_t srcLength
,
852 UErrorCode
*pErrorCode
) {
853 return ucasemap_mapUTF8(
854 csm
->caseLocale
, csm
->options
, UCASEMAP_BREAK_ITERATOR_NULL
855 (uint8_t *)dest
, destCapacity
,
856 (const uint8_t *)src
, srcLength
,
857 ucasemap_internalUTF8ToUpper
, NULL
, *pErrorCode
);
860 U_CAPI
int32_t U_EXPORT2
861 ucasemap_utf8FoldCase(const UCaseMap
*csm
,
862 char *dest
, int32_t destCapacity
,
863 const char *src
, int32_t srcLength
,
864 UErrorCode
*pErrorCode
) {
865 return ucasemap_mapUTF8(
866 UCASE_LOC_ROOT
, csm
->options
, UCASEMAP_BREAK_ITERATOR_NULL
867 (uint8_t *)dest
, destCapacity
,
868 (const uint8_t *)src
, srcLength
,
869 ucasemap_internalUTF8Fold
, NULL
, *pErrorCode
);
874 int32_t CaseMap::utf8ToLower(
875 const char *locale
, uint32_t options
,
876 const char *src
, int32_t srcLength
,
877 char *dest
, int32_t destCapacity
, Edits
*edits
,
878 UErrorCode
&errorCode
) {
879 return ucasemap_mapUTF8(
880 ustrcase_getCaseLocale(locale
), options
, UCASEMAP_BREAK_ITERATOR_NULL
881 (uint8_t *)dest
, destCapacity
,
882 (const uint8_t *)src
, srcLength
,
883 ucasemap_internalUTF8ToLower
, edits
, errorCode
);
886 int32_t CaseMap::utf8ToUpper(
887 const char *locale
, uint32_t options
,
888 const char *src
, int32_t srcLength
,
889 char *dest
, int32_t destCapacity
, Edits
*edits
,
890 UErrorCode
&errorCode
) {
891 return ucasemap_mapUTF8(
892 ustrcase_getCaseLocale(locale
), options
, UCASEMAP_BREAK_ITERATOR_NULL
893 (uint8_t *)dest
, destCapacity
,
894 (const uint8_t *)src
, srcLength
,
895 ucasemap_internalUTF8ToUpper
, edits
, errorCode
);
898 int32_t CaseMap::utf8Fold(
900 const char *src
, int32_t srcLength
,
901 char *dest
, int32_t destCapacity
, Edits
*edits
,
902 UErrorCode
&errorCode
) {
903 return ucasemap_mapUTF8(
904 UCASE_LOC_ROOT
, options
, UCASEMAP_BREAK_ITERATOR_NULL
905 (uint8_t *)dest
, destCapacity
,
906 (const uint8_t *)src
, srcLength
,
907 ucasemap_internalUTF8Fold
, edits
, errorCode
);