1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1998-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
12 * Implements APIs for the ICU's codeset conversion library;
13 * mostly calls through internal functions;
14 * created by Bertrand A. Damiba
16 * Modification History:
18 * Date Name Description
19 * 04/04/99 helena Fixed internal header inclusion.
20 * 05/09/00 helena Added implementation to handle fallback mappings.
21 * 06/20/2000 helena OS/400 port changes; mostly typecast.
24 #include "unicode/utypes.h"
26 #if !UCONFIG_NO_CONVERSION
30 #include "unicode/ustring.h"
31 #include "unicode/ucnv.h"
32 #include "unicode/ucnv_err.h"
33 #include "unicode/uset.h"
34 #include "unicode/utf.h"
35 #include "unicode/utf16.h"
46 /* size of intermediate and preflighting buffers in ucnv_convert() */
47 #define CHUNK_SIZE 1024
49 typedef struct UAmbiguousConverter
{
51 const UChar variant5c
;
52 } UAmbiguousConverter
;
54 static const UAmbiguousConverter ambiguousConverters
[]={
55 { "ibm-897_P100-1995", 0xa5 },
56 { "ibm-942_P120-1999", 0xa5 },
57 { "ibm-943_P130-1999", 0xa5 },
58 { "ibm-946_P100-1995", 0xa5 },
59 { "ibm-33722_P120-1999", 0xa5 },
60 { "ibm-1041_P100-1995", 0xa5 },
61 /*{ "ibm-54191_P100-2006", 0xa5 },*/
62 /*{ "ibm-62383_P100-2007", 0xa5 },*/
63 /*{ "ibm-891_P100-1995", 0x20a9 },*/
64 { "ibm-944_P100-1995", 0x20a9 },
65 { "ibm-949_P110-1999", 0x20a9 },
66 { "ibm-1363_P110-1997", 0x20a9 },
67 { "ISO_2022,locale=ko,version=0", 0x20a9 },
68 { "ibm-1088_P100-1995", 0x20a9 }
71 /*Calls through createConverter */
72 U_CAPI UConverter
* U_EXPORT2
73 ucnv_open (const char *name
,
78 if (err
== NULL
|| U_FAILURE (*err
)) {
82 r
= ucnv_createConverter(NULL
, name
, err
);
86 U_CAPI UConverter
* U_EXPORT2
87 ucnv_openPackage (const char *packageName
, const char *converterName
, UErrorCode
* err
)
89 return ucnv_createConverterFromPackage(packageName
, converterName
, err
);
92 /*Extracts the UChar* to a char* and calls through createConverter */
93 U_CAPI UConverter
* U_EXPORT2
94 ucnv_openU (const UChar
* name
,
97 char asciiName
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
99 if (err
== NULL
|| U_FAILURE(*err
))
102 return ucnv_open (NULL
, err
);
103 if (u_strlen(name
) >= UCNV_MAX_CONVERTER_NAME_LENGTH
)
105 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
108 return ucnv_open(u_austrcpy(asciiName
, name
), err
);
111 /* Copy the string that is represented by the UConverterPlatform enum
112 * @param platformString An output buffer
113 * @param platform An enum representing a platform
114 * @return the length of the copied string.
117 ucnv_copyPlatformString(char *platformString
, UConverterPlatform pltfrm
)
122 uprv_strcpy(platformString
, "ibm-");
128 /* default to empty string */
133 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
134 *through createConverter*/
135 U_CAPI UConverter
* U_EXPORT2
136 ucnv_openCCSID (int32_t codepage
,
137 UConverterPlatform platform
,
140 char myName
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
143 if (err
== NULL
|| U_FAILURE (*err
))
146 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
147 myNameLen
= ucnv_copyPlatformString(myName
, platform
);
148 T_CString_integerToString(myName
+ myNameLen
, codepage
, 10);
150 return ucnv_createConverter(NULL
, myName
, err
);
153 /* Creating a temporary stack-based object that can be used in one thread,
154 and created from a converter that is shared across threads.
157 U_CAPI UConverter
* U_EXPORT2
158 ucnv_safeClone(const UConverter
* cnv
, void *stackBuffer
, int32_t *pBufferSize
, UErrorCode
*status
)
160 UConverter
*localConverter
, *allocatedConverter
;
161 int32_t stackBufferSize
;
162 int32_t bufferSizeNeeded
;
164 UConverterToUnicodeArgs toUArgs
= {
165 sizeof(UConverterToUnicodeArgs
),
174 UConverterFromUnicodeArgs fromUArgs
= {
175 sizeof(UConverterFromUnicodeArgs
),
185 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE
);
187 if (status
== NULL
|| U_FAILURE(*status
)){
188 UTRACE_EXIT_STATUS(status
? *status
: U_ILLEGAL_ARGUMENT_ERROR
);
193 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
194 UTRACE_EXIT_STATUS(*status
);
198 UTRACE_DATA3(UTRACE_OPEN_CLOSE
, "clone converter %s at %p into stackBuffer %p",
199 ucnv_getName(cnv
, status
), cnv
, stackBuffer
);
201 if (cnv
->sharedData
->impl
->safeClone
!= NULL
) {
202 /* call the custom safeClone function for sizing */
203 bufferSizeNeeded
= 0;
204 cnv
->sharedData
->impl
->safeClone(cnv
, NULL
, &bufferSizeNeeded
, status
);
205 if (U_FAILURE(*status
)) {
206 UTRACE_EXIT_STATUS(*status
);
212 /* inherent sizing */
213 bufferSizeNeeded
= sizeof(UConverter
);
216 if (pBufferSize
== NULL
) {
218 pBufferSize
= &stackBufferSize
;
220 stackBufferSize
= *pBufferSize
;
221 if (stackBufferSize
<= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
222 *pBufferSize
= bufferSizeNeeded
;
223 UTRACE_EXIT_VALUE(bufferSizeNeeded
);
228 /* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter.
229 * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed.
232 uintptr_t p
= reinterpret_cast<uintptr_t>(stackBuffer
);
233 uintptr_t aligned_p
= (p
+ alignof(UConverter
) - 1) & ~(alignof(UConverter
) - 1);
234 ptrdiff_t pointerAdjustment
= aligned_p
- p
;
235 if (bufferSizeNeeded
+ pointerAdjustment
<= stackBufferSize
) {
236 stackBuffer
= reinterpret_cast<void *>(aligned_p
);
237 stackBufferSize
-= static_cast<int32_t>(pointerAdjustment
);
239 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
244 /* Now, see if we must allocate any memory */
245 if (stackBufferSize
< bufferSizeNeeded
|| stackBuffer
== NULL
)
247 /* allocate one here...*/
248 localConverter
= allocatedConverter
= (UConverter
*) uprv_malloc (bufferSizeNeeded
);
250 if(localConverter
== NULL
) {
251 *status
= U_MEMORY_ALLOCATION_ERROR
;
252 UTRACE_EXIT_STATUS(*status
);
255 *status
= U_SAFECLONE_ALLOCATED_WARNING
;
257 /* record the fact that memory was allocated */
258 *pBufferSize
= bufferSizeNeeded
;
260 /* just use the stack buffer */
261 localConverter
= (UConverter
*) stackBuffer
;
262 allocatedConverter
= NULL
;
265 uprv_memset(localConverter
, 0, bufferSizeNeeded
);
267 /* Copy initial state */
268 uprv_memcpy(localConverter
, cnv
, sizeof(UConverter
));
269 localConverter
->isCopyLocal
= localConverter
->isExtraLocal
= FALSE
;
271 /* copy the substitution string */
272 if (cnv
->subChars
== (uint8_t *)cnv
->subUChars
) {
273 localConverter
->subChars
= (uint8_t *)localConverter
->subUChars
;
275 localConverter
->subChars
= (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
276 if (localConverter
->subChars
== NULL
) {
277 uprv_free(allocatedConverter
);
278 UTRACE_EXIT_STATUS(*status
);
281 uprv_memcpy(localConverter
->subChars
, cnv
->subChars
, UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
284 /* now either call the safeclone fcn or not */
285 if (cnv
->sharedData
->impl
->safeClone
!= NULL
) {
286 /* call the custom safeClone function */
287 localConverter
= cnv
->sharedData
->impl
->safeClone(cnv
, localConverter
, pBufferSize
, status
);
290 if(localConverter
==NULL
|| U_FAILURE(*status
)) {
291 if (allocatedConverter
!= NULL
&& allocatedConverter
->subChars
!= (uint8_t *)allocatedConverter
->subUChars
) {
292 uprv_free(allocatedConverter
->subChars
);
294 uprv_free(allocatedConverter
);
295 UTRACE_EXIT_STATUS(*status
);
299 /* increment refcount of shared data if needed */
300 if (cnv
->sharedData
->isReferenceCounted
) {
301 ucnv_incrementRefCount(cnv
->sharedData
);
304 if(localConverter
== (UConverter
*)stackBuffer
) {
305 /* we're using user provided data - set to not destroy */
306 localConverter
->isCopyLocal
= TRUE
;
309 /* allow callback functions to handle any memory allocation */
310 toUArgs
.converter
= fromUArgs
.converter
= localConverter
;
311 cbErr
= U_ZERO_ERROR
;
312 cnv
->fromCharErrorBehaviour(cnv
->toUContext
, &toUArgs
, NULL
, 0, UCNV_CLONE
, &cbErr
);
313 cbErr
= U_ZERO_ERROR
;
314 cnv
->fromUCharErrorBehaviour(cnv
->fromUContext
, &fromUArgs
, NULL
, 0, 0, UCNV_CLONE
, &cbErr
);
316 UTRACE_EXIT_PTR_STATUS(localConverter
, *status
);
317 return localConverter
;
322 /*Decreases the reference counter in the shared immutable section of the object
323 *and frees the mutable part*/
325 U_CAPI
void U_EXPORT2
326 ucnv_close (UConverter
* converter
)
328 UErrorCode errorCode
= U_ZERO_ERROR
;
330 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE
);
332 if (converter
== NULL
)
338 UTRACE_DATA3(UTRACE_OPEN_CLOSE
, "close converter %s at %p, isCopyLocal=%b",
339 ucnv_getName(converter
, &errorCode
), converter
, converter
->isCopyLocal
);
341 /* In order to speed up the close, only call the callbacks when they have been changed.
342 This performance check will only work when the callbacks are set within a shared library
343 or from user code that statically links this code. */
344 /* first, notify the callback functions that the converter is closed */
345 if (converter
->fromCharErrorBehaviour
!= UCNV_TO_U_DEFAULT_CALLBACK
) {
346 UConverterToUnicodeArgs toUArgs
= {
347 sizeof(UConverterToUnicodeArgs
),
357 toUArgs
.converter
= converter
;
358 errorCode
= U_ZERO_ERROR
;
359 converter
->fromCharErrorBehaviour(converter
->toUContext
, &toUArgs
, NULL
, 0, UCNV_CLOSE
, &errorCode
);
361 if (converter
->fromUCharErrorBehaviour
!= UCNV_FROM_U_DEFAULT_CALLBACK
) {
362 UConverterFromUnicodeArgs fromUArgs
= {
363 sizeof(UConverterFromUnicodeArgs
),
372 fromUArgs
.converter
= converter
;
373 errorCode
= U_ZERO_ERROR
;
374 converter
->fromUCharErrorBehaviour(converter
->fromUContext
, &fromUArgs
, NULL
, 0, 0, UCNV_CLOSE
, &errorCode
);
377 if (converter
->sharedData
->impl
->close
!= NULL
) {
378 converter
->sharedData
->impl
->close(converter
);
381 if (converter
->subChars
!= (uint8_t *)converter
->subUChars
) {
382 uprv_free(converter
->subChars
);
385 if (converter
->sharedData
->isReferenceCounted
) {
386 ucnv_unloadSharedDataIfReady(converter
->sharedData
);
389 if(!converter
->isCopyLocal
){
390 uprv_free(converter
);
396 /*returns a single Name from the list, will return NULL if out of bounds
398 U_CAPI
const char* U_EXPORT2
399 ucnv_getAvailableName (int32_t n
)
401 if (0 <= n
&& n
<= 0xffff) {
402 UErrorCode err
= U_ZERO_ERROR
;
403 const char *name
= ucnv_bld_getAvailableConverter((uint16_t)n
, &err
);
404 if (U_SUCCESS(err
)) {
411 U_CAPI
int32_t U_EXPORT2
412 ucnv_countAvailable ()
414 UErrorCode err
= U_ZERO_ERROR
;
415 return ucnv_bld_countAvailableConverters(&err
);
418 U_CAPI
void U_EXPORT2
419 ucnv_getSubstChars (const UConverter
* converter
,
424 if (U_FAILURE (*err
))
427 if (converter
->subCharLen
<= 0) {
428 /* Unicode string or empty string from ucnv_setSubstString(). */
433 if (*len
< converter
->subCharLen
) /*not enough space in subChars */
435 *err
= U_INDEX_OUTOFBOUNDS_ERROR
;
439 uprv_memcpy (mySubChar
, converter
->subChars
, converter
->subCharLen
); /*fills in the subchars */
440 *len
= converter
->subCharLen
; /*store # of bytes copied to buffer */
443 U_CAPI
void U_EXPORT2
444 ucnv_setSubstChars (UConverter
* converter
,
445 const char *mySubChar
,
449 if (U_FAILURE (*err
))
452 /*Makes sure that the subChar is within the codepages char length boundaries */
453 if ((len
> converter
->sharedData
->staticData
->maxBytesPerChar
)
454 || (len
< converter
->sharedData
->staticData
->minBytesPerChar
))
456 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
460 uprv_memcpy (converter
->subChars
, mySubChar
, len
); /*copies the subchars */
461 converter
->subCharLen
= len
; /*sets the new len */
464 * There is currently (2001Feb) no separate API to set/get subChar1.
465 * In order to always have subChar written after it is explicitly set,
466 * we set subChar1 to 0.
468 converter
->subChar1
= 0;
473 U_CAPI
void U_EXPORT2
474 ucnv_setSubstString(UConverter
*cnv
,
478 alignas(UConverter
) char cloneBuffer
[U_CNV_SAFECLONE_BUFFERSIZE
];
479 char chars
[UCNV_ERROR_BUFFER_LENGTH
];
483 int32_t cloneSize
, length8
;
485 /* Let the following functions check all arguments. */
486 cloneSize
= sizeof(cloneBuffer
);
487 clone
= ucnv_safeClone(cnv
, cloneBuffer
, &cloneSize
, err
);
488 ucnv_setFromUCallBack(clone
, UCNV_FROM_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, err
);
489 length8
= ucnv_fromUChars(clone
, chars
, (int32_t)sizeof(chars
), s
, length
, err
);
491 if (U_FAILURE(*err
)) {
495 if (cnv
->sharedData
->impl
->writeSub
== NULL
496 #if !UCONFIG_NO_LEGACY_CONVERSION
497 || (cnv
->sharedData
->staticData
->conversionType
== UCNV_MBCS
&&
498 ucnv_MBCSGetType(cnv
) != UCNV_EBCDIC_STATEFUL
)
501 /* The converter is not stateful. Store the charset bytes as a fixed string. */
502 subChars
= (uint8_t *)chars
;
505 * The converter has a non-default writeSub() function, indicating
506 * that it is stateful.
507 * Store the Unicode string for on-the-fly conversion for correct
510 if (length
> UCNV_ERROR_BUFFER_LENGTH
) {
512 * Should not occur. The converter should output at least one byte
513 * per UChar, which means that ucnv_fromUChars() should catch all
516 *err
= U_BUFFER_OVERFLOW_ERROR
;
519 subChars
= (uint8_t *)s
;
521 length
= u_strlen(s
);
523 length8
= length
* U_SIZEOF_UCHAR
;
527 * For storing the substitution string, select either the small buffer inside
528 * UConverter or allocate a subChars buffer.
530 if (length8
> UCNV_MAX_SUBCHAR_LEN
) {
531 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
532 if (cnv
->subChars
== (uint8_t *)cnv
->subUChars
) {
533 /* Allocate a new buffer for the string. */
534 cnv
->subChars
= (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
535 if (cnv
->subChars
== NULL
) {
536 cnv
->subChars
= (uint8_t *)cnv
->subUChars
;
537 *err
= U_MEMORY_ALLOCATION_ERROR
;
540 uprv_memset(cnv
->subChars
, 0, UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
544 /* Copy the substitution string into the UConverter or its subChars buffer. */
548 uprv_memcpy(cnv
->subChars
, subChars
, length8
);
549 if (subChars
== (uint8_t *)chars
) {
550 cnv
->subCharLen
= (int8_t)length8
;
551 } else /* subChars == s */ {
552 cnv
->subCharLen
= (int8_t)-length
;
556 /* See comment in ucnv_setSubstChars(). */
560 /*resets the internal states of a converter
561 *goal : have the same behaviour than a freshly created converter
563 static void _reset(UConverter
*converter
, UConverterResetChoice choice
,
564 UBool callCallback
) {
565 if(converter
== NULL
) {
570 /* first, notify the callback functions that the converter is reset */
571 UErrorCode errorCode
;
573 if(choice
<=UCNV_RESET_TO_UNICODE
&& converter
->fromCharErrorBehaviour
!= UCNV_TO_U_DEFAULT_CALLBACK
) {
574 UConverterToUnicodeArgs toUArgs
= {
575 sizeof(UConverterToUnicodeArgs
),
584 toUArgs
.converter
= converter
;
585 errorCode
= U_ZERO_ERROR
;
586 converter
->fromCharErrorBehaviour(converter
->toUContext
, &toUArgs
, NULL
, 0, UCNV_RESET
, &errorCode
);
588 if(choice
!=UCNV_RESET_TO_UNICODE
&& converter
->fromUCharErrorBehaviour
!= UCNV_FROM_U_DEFAULT_CALLBACK
) {
589 UConverterFromUnicodeArgs fromUArgs
= {
590 sizeof(UConverterFromUnicodeArgs
),
599 fromUArgs
.converter
= converter
;
600 errorCode
= U_ZERO_ERROR
;
601 converter
->fromUCharErrorBehaviour(converter
->fromUContext
, &fromUArgs
, NULL
, 0, 0, UCNV_RESET
, &errorCode
);
605 /* now reset the converter itself */
606 if(choice
<=UCNV_RESET_TO_UNICODE
) {
607 converter
->toUnicodeStatus
= converter
->sharedData
->toUnicodeStatus
;
609 converter
->toULength
= 0;
610 converter
->invalidCharLength
= converter
->UCharErrorBufferLength
= 0;
611 converter
->preToULength
= 0;
613 if(choice
!=UCNV_RESET_TO_UNICODE
) {
614 converter
->fromUnicodeStatus
= 0;
615 converter
->fromUChar32
= 0;
616 converter
->invalidUCharLength
= converter
->charErrorBufferLength
= 0;
617 converter
->preFromUFirstCP
= U_SENTINEL
;
618 converter
->preFromULength
= 0;
621 if (converter
->sharedData
->impl
->reset
!= NULL
) {
622 /* call the custom reset function */
623 converter
->sharedData
->impl
->reset(converter
, choice
);
627 U_CAPI
void U_EXPORT2
628 ucnv_reset(UConverter
*converter
)
630 _reset(converter
, UCNV_RESET_BOTH
, TRUE
);
633 U_CAPI
void U_EXPORT2
634 ucnv_resetToUnicode(UConverter
*converter
)
636 _reset(converter
, UCNV_RESET_TO_UNICODE
, TRUE
);
639 U_CAPI
void U_EXPORT2
640 ucnv_resetFromUnicode(UConverter
*converter
)
642 _reset(converter
, UCNV_RESET_FROM_UNICODE
, TRUE
);
645 U_CAPI
int8_t U_EXPORT2
646 ucnv_getMaxCharSize (const UConverter
* converter
)
648 return converter
->maxBytesPerUChar
;
652 U_CAPI
int8_t U_EXPORT2
653 ucnv_getMinCharSize (const UConverter
* converter
)
655 return converter
->sharedData
->staticData
->minBytesPerChar
;
658 U_CAPI
const char* U_EXPORT2
659 ucnv_getName (const UConverter
* converter
, UErrorCode
* err
)
662 if (U_FAILURE (*err
))
664 if(converter
->sharedData
->impl
->getName
){
665 const char* temp
= converter
->sharedData
->impl
->getName(converter
);
669 return converter
->sharedData
->staticData
->name
;
672 U_CAPI
int32_t U_EXPORT2
673 ucnv_getCCSID(const UConverter
* converter
,
677 if (U_FAILURE (*err
))
680 ccsid
= converter
->sharedData
->staticData
->codepage
;
682 /* Rare case. This is for cases like gb18030,
683 which doesn't have an IBM canonical name, but does have an IBM alias. */
684 const char *standardName
= ucnv_getStandardName(ucnv_getName(converter
, err
), "IBM", err
);
685 if (U_SUCCESS(*err
) && standardName
) {
686 const char *ccsidStr
= uprv_strchr(standardName
, '-');
688 ccsid
= (int32_t)atol(ccsidStr
+1); /* +1 to skip '-' */
696 U_CAPI UConverterPlatform U_EXPORT2
697 ucnv_getPlatform (const UConverter
* converter
,
700 if (U_FAILURE (*err
))
703 return (UConverterPlatform
)converter
->sharedData
->staticData
->platform
;
706 U_CAPI
void U_EXPORT2
707 ucnv_getToUCallBack (const UConverter
* converter
,
708 UConverterToUCallback
*action
,
709 const void **context
)
711 *action
= converter
->fromCharErrorBehaviour
;
712 *context
= converter
->toUContext
;
715 U_CAPI
void U_EXPORT2
716 ucnv_getFromUCallBack (const UConverter
* converter
,
717 UConverterFromUCallback
*action
,
718 const void **context
)
720 *action
= converter
->fromUCharErrorBehaviour
;
721 *context
= converter
->fromUContext
;
724 U_CAPI
void U_EXPORT2
725 ucnv_setToUCallBack (UConverter
* converter
,
726 UConverterToUCallback newAction
,
727 const void* newContext
,
728 UConverterToUCallback
*oldAction
,
729 const void** oldContext
,
732 if (U_FAILURE (*err
))
734 if (oldAction
) *oldAction
= converter
->fromCharErrorBehaviour
;
735 converter
->fromCharErrorBehaviour
= newAction
;
736 if (oldContext
) *oldContext
= converter
->toUContext
;
737 converter
->toUContext
= newContext
;
740 U_CAPI
void U_EXPORT2
741 ucnv_setFromUCallBack (UConverter
* converter
,
742 UConverterFromUCallback newAction
,
743 const void* newContext
,
744 UConverterFromUCallback
*oldAction
,
745 const void** oldContext
,
748 if (U_FAILURE (*err
))
750 if (oldAction
) *oldAction
= converter
->fromUCharErrorBehaviour
;
751 converter
->fromUCharErrorBehaviour
= newAction
;
752 if (oldContext
) *oldContext
= converter
->fromUContext
;
753 converter
->fromUContext
= newContext
;
757 _updateOffsets(int32_t *offsets
, int32_t length
,
758 int32_t sourceIndex
, int32_t errorInputLength
) {
760 int32_t delta
, offset
;
764 * adjust each offset by adding the previous sourceIndex
765 * minus the length of the input sequence that caused an
768 delta
=sourceIndex
-errorInputLength
;
771 * set each offset to -1 because this conversion function
772 * does not handle offsets
777 limit
=offsets
+length
;
779 /* most common case, nothing to do */
781 /* add the delta to each offset (but not if the offset is <0) */
782 while(offsets
<limit
) {
785 *offsets
=offset
+delta
;
789 } else /* delta<0 */ {
791 * set each offset to -1 because this conversion function
792 * does not handle offsets
793 * or the error input sequence started in a previous buffer
795 while(offsets
<limit
) {
801 /* ucnv_fromUnicode --------------------------------------------------------- */
804 * Implementation note for m:n conversions
806 * While collecting source units to find the longest match for m:n conversion,
807 * some source units may need to be stored for a partial match.
808 * When a second buffer does not yield a match on all of the previously stored
809 * source units, then they must be "replayed", i.e., fed back into the converter.
811 * The code relies on the fact that replaying will not nest -
812 * converting a replay buffer will not result in a replay.
813 * This is because a replay is necessary only after the _continuation_ of a
814 * partial match failed, but a replay buffer is converted as a whole.
815 * It may result in some of its units being stored again for a partial match,
816 * but there will not be a continuation _during_ the replay which could fail.
818 * It is conceivable that a callback function could call the converter
819 * recursively in a way that causes another replay to be stored, but that
820 * would be an error in the callback function.
821 * Such violations will cause assertion failures in a debug build,
822 * and wrong output, but they will not cause a crash.
826 _fromUnicodeWithCallback(UConverterFromUnicodeArgs
*pArgs
, UErrorCode
*err
) {
827 UConverterFromUnicode fromUnicode
;
833 int32_t errorInputLength
;
834 UBool converterSawEndOfInput
, calledCallback
;
836 /* variables for m:n conversion */
837 UChar replay
[UCNV_EXT_MAX_UCHARS
];
838 const UChar
*realSource
, *realSourceLimit
;
839 int32_t realSourceIndex
;
842 cnv
=pArgs
->converter
;
845 offsets
=pArgs
->offsets
;
847 /* get the converter implementation function */
850 fromUnicode
=cnv
->sharedData
->impl
->fromUnicode
;
852 fromUnicode
=cnv
->sharedData
->impl
->fromUnicodeWithOffsets
;
853 if(fromUnicode
==NULL
) {
854 /* there is no WithOffsets implementation */
855 fromUnicode
=cnv
->sharedData
->impl
->fromUnicode
;
856 /* we will write -1 for each offset */
861 if(cnv
->preFromULength
>=0) {
865 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
866 realSourceLimit
=NULL
;
871 * Previous m:n conversion stored source units from a partial match
872 * and failed to consume all of them.
873 * We need to "replay" them from a temporary buffer and convert them first.
875 realSource
=pArgs
->source
;
876 realSourceLimit
=pArgs
->sourceLimit
;
877 realFlush
=pArgs
->flush
;
878 realSourceIndex
=sourceIndex
;
880 uprv_memcpy(replay
, cnv
->preFromU
, -cnv
->preFromULength
*U_SIZEOF_UCHAR
);
881 pArgs
->source
=replay
;
882 pArgs
->sourceLimit
=replay
-cnv
->preFromULength
;
886 cnv
->preFromULength
=0;
890 * loop for conversion and error handling
896 * handle end of input
897 * handle errors/call callback
902 if(U_SUCCESS(*err
)) {
904 fromUnicode(pArgs
, err
);
907 * set a flag for whether the converter
908 * successfully processed the end of the input
910 * need not check cnv->preFromULength==0 because a replay (<0) will cause
911 * s<sourceLimit before converterSawEndOfInput is checked
913 converterSawEndOfInput
=
914 (UBool
)(U_SUCCESS(*err
) &&
915 pArgs
->flush
&& pArgs
->source
==pArgs
->sourceLimit
&&
916 cnv
->fromUChar32
==0);
918 /* handle error from ucnv_convertEx() */
919 converterSawEndOfInput
=FALSE
;
922 /* no callback called yet for this iteration */
923 calledCallback
=FALSE
;
925 /* no sourceIndex adjustment for conversion, only for callback output */
929 * loop for offsets and error handling
931 * iterates at most 3 times:
932 * 1. to clean up after the conversion function
933 * 2. after the callback
934 * 3. after the callback again if there was truncated input
937 /* update offsets if we write any */
939 int32_t length
=(int32_t)(pArgs
->target
-t
);
941 _updateOffsets(offsets
, length
, sourceIndex
, errorInputLength
);
944 * if a converter handles offsets and updates the offsets
945 * pointer at the end, then pArgs->offset should not change
947 * however, some converters do not handle offsets at all
948 * (sourceIndex<0) or may not update the offsets pointer
950 pArgs
->offsets
=offsets
+=length
;
954 sourceIndex
+=(int32_t)(pArgs
->source
-s
);
958 if(cnv
->preFromULength
<0) {
960 * switch the source to new replay units (cannot occur while replaying)
961 * after offset handling and before end-of-input and callback handling
963 if(realSource
==NULL
) {
964 realSource
=pArgs
->source
;
965 realSourceLimit
=pArgs
->sourceLimit
;
966 realFlush
=pArgs
->flush
;
967 realSourceIndex
=sourceIndex
;
969 uprv_memcpy(replay
, cnv
->preFromU
, -cnv
->preFromULength
*U_SIZEOF_UCHAR
);
970 pArgs
->source
=replay
;
971 pArgs
->sourceLimit
=replay
-cnv
->preFromULength
;
973 if((sourceIndex
+=cnv
->preFromULength
)<0) {
977 cnv
->preFromULength
=0;
979 /* see implementation note before _fromUnicodeWithCallback() */
980 U_ASSERT(realSource
==NULL
);
981 *err
=U_INTERNAL_PROGRAM_ERROR
;
985 /* update pointers */
989 if(U_SUCCESS(*err
)) {
990 if(s
<pArgs
->sourceLimit
) {
992 * continue with the conversion loop while there is still input left
993 * (continue converting by breaking out of only the inner loop)
996 } else if(realSource
!=NULL
) {
997 /* switch back from replaying to the real source and continue */
998 pArgs
->source
=realSource
;
999 pArgs
->sourceLimit
=realSourceLimit
;
1000 pArgs
->flush
=realFlush
;
1001 sourceIndex
=realSourceIndex
;
1005 } else if(pArgs
->flush
&& cnv
->fromUChar32
!=0) {
1007 * the entire input stream is consumed
1008 * and there is a partial, truncated input sequence left
1011 /* inject an error and continue with callback handling */
1012 *err
=U_TRUNCATED_CHAR_FOUND
;
1013 calledCallback
=FALSE
; /* new error condition */
1015 /* input consumed */
1018 * return to the conversion loop once more if the flush
1019 * flag is set and the conversion function has not
1020 * successfully processed the end of the input yet
1022 * (continue converting by breaking out of only the inner loop)
1024 if(!converterSawEndOfInput
) {
1028 /* reset the converter without calling the callback function */
1029 _reset(cnv
, UCNV_RESET_FROM_UNICODE
, FALSE
);
1032 /* done successfully */
1037 /* U_FAILURE(*err) */
1041 if( calledCallback
||
1042 (e
=*err
)==U_BUFFER_OVERFLOW_ERROR
||
1043 (e
!=U_INVALID_CHAR_FOUND
&&
1044 e
!=U_ILLEGAL_CHAR_FOUND
&&
1045 e
!=U_TRUNCATED_CHAR_FOUND
)
1048 * the callback did not or cannot resolve the error:
1049 * set output pointers and return
1051 * the check for buffer overflow is redundant but it is
1052 * a high-runner case and hopefully documents the intent
1055 * if we were replaying, then the replay buffer must be
1056 * copied back into the UConverter
1057 * and the real arguments must be restored
1059 if(realSource
!=NULL
) {
1062 U_ASSERT(cnv
->preFromULength
==0);
1064 length
=(int32_t)(pArgs
->sourceLimit
-pArgs
->source
);
1066 u_memcpy(cnv
->preFromU
, pArgs
->source
, length
);
1067 cnv
->preFromULength
=(int8_t)-length
;
1070 pArgs
->source
=realSource
;
1071 pArgs
->sourceLimit
=realSourceLimit
;
1072 pArgs
->flush
=realFlush
;
1079 /* callback handling */
1083 /* get and write the code point */
1084 codePoint
=cnv
->fromUChar32
;
1086 U16_APPEND_UNSAFE(cnv
->invalidUCharBuffer
, errorInputLength
, codePoint
);
1087 cnv
->invalidUCharLength
=(int8_t)errorInputLength
;
1089 /* set the converter state to deal with the next character */
1092 /* call the callback function */
1093 cnv
->fromUCharErrorBehaviour(cnv
->fromUContext
, pArgs
,
1094 cnv
->invalidUCharBuffer
, errorInputLength
, codePoint
,
1095 *err
==U_INVALID_CHAR_FOUND
? UCNV_UNASSIGNED
: UCNV_ILLEGAL
,
1100 * loop back to the offset handling
1102 * this flag will indicate after offset handling
1103 * that a callback was called;
1104 * if the callback did not resolve the error, then we return
1106 calledCallback
=TRUE
;
1112 * Output the fromUnicode overflow buffer.
1113 * Call this function if(cnv->charErrorBufferLength>0).
1114 * @return TRUE if overflow
1117 ucnv_outputOverflowFromUnicode(UConverter
*cnv
,
1118 char **target
, const char *targetLimit
,
1126 if(pOffsets
!=NULL
) {
1132 overflow
=(char *)cnv
->charErrorBuffer
;
1133 length
=cnv
->charErrorBufferLength
;
1136 if(t
==targetLimit
) {
1137 /* the overflow buffer contains too much, keep the rest */
1141 overflow
[j
++]=overflow
[i
++];
1144 cnv
->charErrorBufferLength
=(int8_t)j
;
1149 *err
=U_BUFFER_OVERFLOW_ERROR
;
1153 /* copy the overflow contents to the target */
1156 *offsets
++=-1; /* no source index available for old output */
1160 /* the overflow buffer is completely copied to the target */
1161 cnv
->charErrorBufferLength
=0;
1169 U_CAPI
void U_EXPORT2
1170 ucnv_fromUnicode(UConverter
*cnv
,
1171 char **target
, const char *targetLimit
,
1172 const UChar
**source
, const UChar
*sourceLimit
,
1176 UConverterFromUnicodeArgs args
;
1180 /* check parameters */
1181 if(err
==NULL
|| U_FAILURE(*err
)) {
1185 if(cnv
==NULL
|| target
==NULL
|| source
==NULL
) {
1186 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1193 if ((const void *)U_MAX_PTR(sourceLimit
) == (const void *)sourceLimit
) {
1195 Prevent code from going into an infinite loop in case we do hit this
1196 limit. The limit pointer is expected to be on a UChar * boundary.
1197 This also prevents the next argument check from failing.
1199 sourceLimit
= (const UChar
*)(((const char *)sourceLimit
) - 1);
1203 * All these conditions should never happen.
1205 * 1) Make sure that the limits are >= to the address source or target
1207 * 2) Make sure that the buffer sizes do not exceed the number range for
1208 * int32_t because some functions use the size (in units or bytes)
1209 * rather than comparing pointers, and because offsets are int32_t values.
1211 * size_t is guaranteed to be unsigned and large enough for the job.
1213 * Return with an error instead of adjusting the limits because we would
1214 * not be able to maintain the semantics that either the source must be
1215 * consumed or the target filled (unless an error occurs).
1216 * An adjustment would be targetLimit=t+0x7fffffff; for example.
1218 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1219 * to a char * pointer and provide an incomplete UChar code unit.
1221 if (sourceLimit
<s
|| targetLimit
<t
||
1222 ((size_t)(sourceLimit
-s
)>(size_t)0x3fffffff && sourceLimit
>s
) ||
1223 ((size_t)(targetLimit
-t
)>(size_t)0x7fffffff && targetLimit
>t
) ||
1224 (((const char *)sourceLimit
-(const char *)s
) & 1) != 0)
1226 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1230 /* output the target overflow buffer */
1231 if( cnv
->charErrorBufferLength
>0 &&
1232 ucnv_outputOverflowFromUnicode(cnv
, target
, targetLimit
, &offsets
, err
)
1234 /* U_BUFFER_OVERFLOW_ERROR */
1237 /* *target may have moved, therefore stop using t */
1239 if(!flush
&& s
==sourceLimit
&& cnv
->preFromULength
>=0) {
1240 /* the overflow buffer is emptied and there is no new input: we are done */
1245 * Do not simply return with a buffer overflow error if
1246 * !flush && t==targetLimit
1247 * because it is possible that the source will not generate any output.
1248 * For example, the skip callback may be called;
1249 * it does not output anything.
1252 /* prepare the converter arguments */
1255 args
.offsets
=offsets
;
1257 args
.sourceLimit
=sourceLimit
;
1258 args
.target
=*target
;
1259 args
.targetLimit
=targetLimit
;
1260 args
.size
=sizeof(args
);
1262 _fromUnicodeWithCallback(&args
, err
);
1264 *source
=args
.source
;
1265 *target
=args
.target
;
1268 /* ucnv_toUnicode() --------------------------------------------------------- */
1271 _toUnicodeWithCallback(UConverterToUnicodeArgs
*pArgs
, UErrorCode
*err
) {
1272 UConverterToUnicode toUnicode
;
1277 int32_t sourceIndex
;
1278 int32_t errorInputLength
;
1279 UBool converterSawEndOfInput
, calledCallback
;
1281 /* variables for m:n conversion */
1282 char replay
[UCNV_EXT_MAX_BYTES
];
1283 const char *realSource
, *realSourceLimit
;
1284 int32_t realSourceIndex
;
1287 cnv
=pArgs
->converter
;
1290 offsets
=pArgs
->offsets
;
1292 /* get the converter implementation function */
1295 toUnicode
=cnv
->sharedData
->impl
->toUnicode
;
1297 toUnicode
=cnv
->sharedData
->impl
->toUnicodeWithOffsets
;
1298 if(toUnicode
==NULL
) {
1299 /* there is no WithOffsets implementation */
1300 toUnicode
=cnv
->sharedData
->impl
->toUnicode
;
1301 /* we will write -1 for each offset */
1306 if(cnv
->preToULength
>=0) {
1310 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1311 realSourceLimit
=NULL
;
1316 * Previous m:n conversion stored source units from a partial match
1317 * and failed to consume all of them.
1318 * We need to "replay" them from a temporary buffer and convert them first.
1320 realSource
=pArgs
->source
;
1321 realSourceLimit
=pArgs
->sourceLimit
;
1322 realFlush
=pArgs
->flush
;
1323 realSourceIndex
=sourceIndex
;
1325 uprv_memcpy(replay
, cnv
->preToU
, -cnv
->preToULength
);
1326 pArgs
->source
=replay
;
1327 pArgs
->sourceLimit
=replay
-cnv
->preToULength
;
1331 cnv
->preToULength
=0;
1335 * loop for conversion and error handling
1341 * handle end of input
1342 * handle errors/call callback
1347 if(U_SUCCESS(*err
)) {
1349 toUnicode(pArgs
, err
);
1352 * set a flag for whether the converter
1353 * successfully processed the end of the input
1355 * need not check cnv->preToULength==0 because a replay (<0) will cause
1356 * s<sourceLimit before converterSawEndOfInput is checked
1358 converterSawEndOfInput
=
1359 (UBool
)(U_SUCCESS(*err
) &&
1360 pArgs
->flush
&& pArgs
->source
==pArgs
->sourceLimit
&&
1363 /* handle error from getNextUChar() or ucnv_convertEx() */
1364 converterSawEndOfInput
=FALSE
;
1367 /* no callback called yet for this iteration */
1368 calledCallback
=FALSE
;
1370 /* no sourceIndex adjustment for conversion, only for callback output */
1374 * loop for offsets and error handling
1376 * iterates at most 3 times:
1377 * 1. to clean up after the conversion function
1378 * 2. after the callback
1379 * 3. after the callback again if there was truncated input
1382 /* update offsets if we write any */
1384 int32_t length
=(int32_t)(pArgs
->target
-t
);
1386 _updateOffsets(offsets
, length
, sourceIndex
, errorInputLength
);
1389 * if a converter handles offsets and updates the offsets
1390 * pointer at the end, then pArgs->offset should not change
1392 * however, some converters do not handle offsets at all
1393 * (sourceIndex<0) or may not update the offsets pointer
1395 pArgs
->offsets
=offsets
+=length
;
1398 if(sourceIndex
>=0) {
1399 sourceIndex
+=(int32_t)(pArgs
->source
-s
);
1403 if(cnv
->preToULength
<0) {
1405 * switch the source to new replay units (cannot occur while replaying)
1406 * after offset handling and before end-of-input and callback handling
1408 if(realSource
==NULL
) {
1409 realSource
=pArgs
->source
;
1410 realSourceLimit
=pArgs
->sourceLimit
;
1411 realFlush
=pArgs
->flush
;
1412 realSourceIndex
=sourceIndex
;
1414 uprv_memcpy(replay
, cnv
->preToU
, -cnv
->preToULength
);
1415 pArgs
->source
=replay
;
1416 pArgs
->sourceLimit
=replay
-cnv
->preToULength
;
1418 if((sourceIndex
+=cnv
->preToULength
)<0) {
1422 cnv
->preToULength
=0;
1424 /* see implementation note before _fromUnicodeWithCallback() */
1425 U_ASSERT(realSource
==NULL
);
1426 *err
=U_INTERNAL_PROGRAM_ERROR
;
1430 /* update pointers */
1434 if(U_SUCCESS(*err
)) {
1435 if(s
<pArgs
->sourceLimit
) {
1437 * continue with the conversion loop while there is still input left
1438 * (continue converting by breaking out of only the inner loop)
1441 } else if(realSource
!=NULL
) {
1442 /* switch back from replaying to the real source and continue */
1443 pArgs
->source
=realSource
;
1444 pArgs
->sourceLimit
=realSourceLimit
;
1445 pArgs
->flush
=realFlush
;
1446 sourceIndex
=realSourceIndex
;
1450 } else if(pArgs
->flush
&& cnv
->toULength
>0) {
1452 * the entire input stream is consumed
1453 * and there is a partial, truncated input sequence left
1456 /* inject an error and continue with callback handling */
1457 *err
=U_TRUNCATED_CHAR_FOUND
;
1458 calledCallback
=FALSE
; /* new error condition */
1460 /* input consumed */
1463 * return to the conversion loop once more if the flush
1464 * flag is set and the conversion function has not
1465 * successfully processed the end of the input yet
1467 * (continue converting by breaking out of only the inner loop)
1469 if(!converterSawEndOfInput
) {
1473 /* reset the converter without calling the callback function */
1474 _reset(cnv
, UCNV_RESET_TO_UNICODE
, FALSE
);
1477 /* done successfully */
1482 /* U_FAILURE(*err) */
1486 if( calledCallback
||
1487 (e
=*err
)==U_BUFFER_OVERFLOW_ERROR
||
1488 (e
!=U_INVALID_CHAR_FOUND
&&
1489 e
!=U_ILLEGAL_CHAR_FOUND
&&
1490 e
!=U_TRUNCATED_CHAR_FOUND
&&
1491 e
!=U_ILLEGAL_ESCAPE_SEQUENCE
&&
1492 e
!=U_UNSUPPORTED_ESCAPE_SEQUENCE
)
1495 * the callback did not or cannot resolve the error:
1496 * set output pointers and return
1498 * the check for buffer overflow is redundant but it is
1499 * a high-runner case and hopefully documents the intent
1502 * if we were replaying, then the replay buffer must be
1503 * copied back into the UConverter
1504 * and the real arguments must be restored
1506 if(realSource
!=NULL
) {
1509 U_ASSERT(cnv
->preToULength
==0);
1511 length
=(int32_t)(pArgs
->sourceLimit
-pArgs
->source
);
1513 uprv_memcpy(cnv
->preToU
, pArgs
->source
, length
);
1514 cnv
->preToULength
=(int8_t)-length
;
1517 pArgs
->source
=realSource
;
1518 pArgs
->sourceLimit
=realSourceLimit
;
1519 pArgs
->flush
=realFlush
;
1526 /* copy toUBytes[] to invalidCharBuffer[] */
1527 errorInputLength
=cnv
->invalidCharLength
=cnv
->toULength
;
1528 if(errorInputLength
>0) {
1529 uprv_memcpy(cnv
->invalidCharBuffer
, cnv
->toUBytes
, errorInputLength
);
1532 /* set the converter state to deal with the next character */
1535 /* call the callback function */
1536 if(cnv
->toUCallbackReason
==UCNV_ILLEGAL
&& *err
==U_INVALID_CHAR_FOUND
) {
1537 cnv
->toUCallbackReason
= UCNV_UNASSIGNED
;
1539 cnv
->fromCharErrorBehaviour(cnv
->toUContext
, pArgs
,
1540 cnv
->invalidCharBuffer
, errorInputLength
,
1541 cnv
->toUCallbackReason
,
1543 cnv
->toUCallbackReason
= UCNV_ILLEGAL
; /* reset to default value */
1546 * loop back to the offset handling
1548 * this flag will indicate after offset handling
1549 * that a callback was called;
1550 * if the callback did not resolve the error, then we return
1552 calledCallback
=TRUE
;
1558 * Output the toUnicode overflow buffer.
1559 * Call this function if(cnv->UCharErrorBufferLength>0).
1560 * @return TRUE if overflow
1563 ucnv_outputOverflowToUnicode(UConverter
*cnv
,
1564 UChar
**target
, const UChar
*targetLimit
,
1568 UChar
*overflow
, *t
;
1572 if(pOffsets
!=NULL
) {
1578 overflow
=cnv
->UCharErrorBuffer
;
1579 length
=cnv
->UCharErrorBufferLength
;
1582 if(t
==targetLimit
) {
1583 /* the overflow buffer contains too much, keep the rest */
1587 overflow
[j
++]=overflow
[i
++];
1590 cnv
->UCharErrorBufferLength
=(int8_t)j
;
1595 *err
=U_BUFFER_OVERFLOW_ERROR
;
1599 /* copy the overflow contents to the target */
1602 *offsets
++=-1; /* no source index available for old output */
1606 /* the overflow buffer is completely copied to the target */
1607 cnv
->UCharErrorBufferLength
=0;
1615 U_CAPI
void U_EXPORT2
1616 ucnv_toUnicode(UConverter
*cnv
,
1617 UChar
**target
, const UChar
*targetLimit
,
1618 const char **source
, const char *sourceLimit
,
1622 UConverterToUnicodeArgs args
;
1626 /* check parameters */
1627 if(err
==NULL
|| U_FAILURE(*err
)) {
1631 if(cnv
==NULL
|| target
==NULL
|| source
==NULL
) {
1632 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1639 if ((const void *)U_MAX_PTR(targetLimit
) == (const void *)targetLimit
) {
1641 Prevent code from going into an infinite loop in case we do hit this
1642 limit. The limit pointer is expected to be on a UChar * boundary.
1643 This also prevents the next argument check from failing.
1645 targetLimit
= (const UChar
*)(((const char *)targetLimit
) - 1);
1649 * All these conditions should never happen.
1651 * 1) Make sure that the limits are >= to the address source or target
1653 * 2) Make sure that the buffer sizes do not exceed the number range for
1654 * int32_t because some functions use the size (in units or bytes)
1655 * rather than comparing pointers, and because offsets are int32_t values.
1657 * size_t is guaranteed to be unsigned and large enough for the job.
1659 * Return with an error instead of adjusting the limits because we would
1660 * not be able to maintain the semantics that either the source must be
1661 * consumed or the target filled (unless an error occurs).
1662 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1664 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1665 * to a char * pointer and provide an incomplete UChar code unit.
1667 if (sourceLimit
<s
|| targetLimit
<t
||
1668 ((size_t)(sourceLimit
-s
)>(size_t)0x7fffffff && sourceLimit
>s
) ||
1669 ((size_t)(targetLimit
-t
)>(size_t)0x3fffffff && targetLimit
>t
) ||
1670 (((const char *)targetLimit
-(const char *)t
) & 1) != 0
1672 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1676 /* output the target overflow buffer */
1677 if( cnv
->UCharErrorBufferLength
>0 &&
1678 ucnv_outputOverflowToUnicode(cnv
, target
, targetLimit
, &offsets
, err
)
1680 /* U_BUFFER_OVERFLOW_ERROR */
1683 /* *target may have moved, therefore stop using t */
1685 if(!flush
&& s
==sourceLimit
&& cnv
->preToULength
>=0) {
1686 /* the overflow buffer is emptied and there is no new input: we are done */
1691 * Do not simply return with a buffer overflow error if
1692 * !flush && t==targetLimit
1693 * because it is possible that the source will not generate any output.
1694 * For example, the skip callback may be called;
1695 * it does not output anything.
1698 /* prepare the converter arguments */
1701 args
.offsets
=offsets
;
1703 args
.sourceLimit
=sourceLimit
;
1704 args
.target
=*target
;
1705 args
.targetLimit
=targetLimit
;
1706 args
.size
=sizeof(args
);
1708 _toUnicodeWithCallback(&args
, err
);
1710 *source
=args
.source
;
1711 *target
=args
.target
;
1714 /* ucnv_to/fromUChars() ----------------------------------------------------- */
1716 U_CAPI
int32_t U_EXPORT2
1717 ucnv_fromUChars(UConverter
*cnv
,
1718 char *dest
, int32_t destCapacity
,
1719 const UChar
*src
, int32_t srcLength
,
1720 UErrorCode
*pErrorCode
) {
1721 const UChar
*srcLimit
;
1722 char *originalDest
, *destLimit
;
1725 /* check arguments */
1726 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1731 destCapacity
<0 || (destCapacity
>0 && dest
==NULL
) ||
1732 srcLength
<-1 || (srcLength
!=0 && src
==NULL
)
1734 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1739 ucnv_resetFromUnicode(cnv
);
1742 srcLength
=u_strlen(src
);
1745 srcLimit
=src
+srcLength
;
1746 destCapacity
=pinCapacity(dest
, destCapacity
);
1747 destLimit
=dest
+destCapacity
;
1749 /* perform the conversion */
1750 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1751 destLength
=(int32_t)(dest
-originalDest
);
1753 /* if an overflow occurs, then get the preflighting length */
1754 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
1757 destLimit
=buffer
+sizeof(buffer
);
1760 *pErrorCode
=U_ZERO_ERROR
;
1761 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1762 destLength
+=(int32_t)(dest
-buffer
);
1763 } while(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
);
1769 return u_terminateChars(originalDest
, destCapacity
, destLength
, pErrorCode
);
1772 U_CAPI
int32_t U_EXPORT2
1773 ucnv_toUChars(UConverter
*cnv
,
1774 UChar
*dest
, int32_t destCapacity
,
1775 const char *src
, int32_t srcLength
,
1776 UErrorCode
*pErrorCode
) {
1777 const char *srcLimit
;
1778 UChar
*originalDest
, *destLimit
;
1781 /* check arguments */
1782 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1787 destCapacity
<0 || (destCapacity
>0 && dest
==NULL
) ||
1788 srcLength
<-1 || (srcLength
!=0 && src
==NULL
))
1790 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1795 ucnv_resetToUnicode(cnv
);
1798 srcLength
=(int32_t)uprv_strlen(src
);
1801 srcLimit
=src
+srcLength
;
1802 destCapacity
=pinCapacity(dest
, destCapacity
);
1803 destLimit
=dest
+destCapacity
;
1805 /* perform the conversion */
1806 ucnv_toUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1807 destLength
=(int32_t)(dest
-originalDest
);
1809 /* if an overflow occurs, then get the preflighting length */
1810 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
)
1814 destLimit
=buffer
+UPRV_LENGTHOF(buffer
);
1817 *pErrorCode
=U_ZERO_ERROR
;
1818 ucnv_toUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1819 destLength
+=(int32_t)(dest
-buffer
);
1821 while(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
);
1827 return u_terminateUChars(originalDest
, destCapacity
, destLength
, pErrorCode
);
1830 /* ucnv_getNextUChar() ------------------------------------------------------ */
1832 U_CAPI UChar32 U_EXPORT2
1833 ucnv_getNextUChar(UConverter
*cnv
,
1834 const char **source
, const char *sourceLimit
,
1836 UConverterToUnicodeArgs args
;
1837 UChar buffer
[U16_MAX_LENGTH
];
1842 /* check parameters */
1843 if(err
==NULL
|| U_FAILURE(*err
)) {
1847 if(cnv
==NULL
|| source
==NULL
) {
1848 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1854 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1859 * Make sure that the buffer sizes do not exceed the number range for
1860 * int32_t because some functions use the size (in units or bytes)
1861 * rather than comparing pointers, and because offsets are int32_t values.
1863 * size_t is guaranteed to be unsigned and large enough for the job.
1865 * Return with an error instead of adjusting the limits because we would
1866 * not be able to maintain the semantics that either the source must be
1867 * consumed or the target filled (unless an error occurs).
1868 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1870 if(((size_t)(sourceLimit
-s
)>(size_t)0x7fffffff && sourceLimit
>s
)) {
1871 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1877 /* flush the target overflow buffer */
1878 if(cnv
->UCharErrorBufferLength
>0) {
1881 overflow
=cnv
->UCharErrorBuffer
;
1883 length
=cnv
->UCharErrorBufferLength
;
1884 U16_NEXT(overflow
, i
, length
, c
);
1886 /* move the remaining overflow contents up to the beginning */
1887 if((cnv
->UCharErrorBufferLength
=(int8_t)(length
-i
))>0) {
1888 uprv_memmove(cnv
->UCharErrorBuffer
, cnv
->UCharErrorBuffer
+i
,
1889 cnv
->UCharErrorBufferLength
*U_SIZEOF_UCHAR
);
1892 if(!U16_IS_LEAD(c
) || i
<length
) {
1896 * Continue if the overflow buffer contained only a lead surrogate,
1897 * in case the converter outputs single surrogates from complete
1903 * flush==TRUE is implied for ucnv_getNextUChar()
1905 * do not simply return even if s==sourceLimit because the converter may
1906 * not have seen flush==TRUE before
1909 /* prepare the converter arguments */
1914 args
.sourceLimit
=sourceLimit
;
1916 args
.targetLimit
=buffer
+1;
1917 args
.size
=sizeof(args
);
1921 * call the native getNextUChar() implementation if we are
1922 * at a character boundary (toULength==0)
1924 * unlike with _toUnicode(), getNextUChar() implementations must set
1925 * U_TRUNCATED_CHAR_FOUND for truncated input,
1926 * in addition to setting toULength/toUBytes[]
1928 if(cnv
->toULength
==0 && cnv
->sharedData
->impl
->getNextUChar
!=NULL
) {
1929 c
=cnv
->sharedData
->impl
->getNextUChar(&args
, err
);
1930 *source
=s
=args
.source
;
1931 if(*err
==U_INDEX_OUTOFBOUNDS_ERROR
) {
1932 /* reset the converter without calling the callback function */
1933 _reset(cnv
, UCNV_RESET_TO_UNICODE
, FALSE
);
1934 return 0xffff; /* no output */
1935 } else if(U_SUCCESS(*err
) && c
>=0) {
1938 * else fall through to use _toUnicode() because
1939 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1940 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1945 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1946 _toUnicodeWithCallback(&args
, err
);
1948 if(*err
==U_BUFFER_OVERFLOW_ERROR
) {
1953 length
=(int32_t)(args
.target
-buffer
);
1955 /* write the lead surrogate from the overflow buffer */
1957 args
.target
=buffer
+1;
1962 /* buffer contents starts at i and ends before length */
1964 if(U_FAILURE(*err
)) {
1965 c
=0xffff; /* no output */
1966 } else if(length
==0) {
1967 /* no input or only state changes */
1968 *err
=U_INDEX_OUTOFBOUNDS_ERROR
;
1969 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1970 c
=0xffff; /* no output */
1974 if(!U16_IS_LEAD(c
)) {
1975 /* consume c=buffer[0], done */
1977 /* got a lead surrogate, see if a trail surrogate follows */
1980 if(cnv
->UCharErrorBufferLength
>0) {
1981 /* got overflow output from the conversion */
1982 if(U16_IS_TRAIL(c2
=cnv
->UCharErrorBuffer
[0])) {
1983 /* got a trail surrogate, too */
1984 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
1986 /* move the remaining overflow contents up to the beginning */
1987 if((--cnv
->UCharErrorBufferLength
)>0) {
1988 uprv_memmove(cnv
->UCharErrorBuffer
, cnv
->UCharErrorBuffer
+1,
1989 cnv
->UCharErrorBufferLength
*U_SIZEOF_UCHAR
);
1992 /* c is an unpaired lead surrogate, just return it */
1994 } else if(args
.source
<sourceLimit
) {
1995 /* convert once more, to buffer[1] */
1996 args
.targetLimit
=buffer
+2;
1997 _toUnicodeWithCallback(&args
, err
);
1998 if(*err
==U_BUFFER_OVERFLOW_ERROR
) {
2002 length
=(int32_t)(args
.target
-buffer
);
2003 if(U_SUCCESS(*err
) && length
==2 && U16_IS_TRAIL(c2
=buffer
[1])) {
2004 /* got a trail surrogate, too */
2005 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
2013 * move leftover output from buffer[i..length[
2014 * into the beginning of the overflow buffer
2017 /* move further overflow back */
2018 int32_t delta
=length
-i
;
2019 if((length
=cnv
->UCharErrorBufferLength
)>0) {
2020 uprv_memmove(cnv
->UCharErrorBuffer
+delta
, cnv
->UCharErrorBuffer
,
2021 length
*U_SIZEOF_UCHAR
);
2023 cnv
->UCharErrorBufferLength
=(int8_t)(length
+delta
);
2025 cnv
->UCharErrorBuffer
[0]=buffer
[i
++];
2027 cnv
->UCharErrorBuffer
[1]=buffer
[i
];
2031 *source
=args
.source
;
2035 /* ucnv_convert() and siblings ---------------------------------------------- */
2037 U_CAPI
void U_EXPORT2
2038 ucnv_convertEx(UConverter
*targetCnv
, UConverter
*sourceCnv
,
2039 char **target
, const char *targetLimit
,
2040 const char **source
, const char *sourceLimit
,
2041 UChar
*pivotStart
, UChar
**pivotSource
,
2042 UChar
**pivotTarget
, const UChar
*pivotLimit
,
2043 UBool reset
, UBool flush
,
2044 UErrorCode
*pErrorCode
) {
2045 UChar pivotBuffer
[CHUNK_SIZE
];
2046 const UChar
*myPivotSource
;
2047 UChar
*myPivotTarget
;
2051 UConverterToUnicodeArgs toUArgs
;
2052 UConverterFromUnicodeArgs fromUArgs
;
2053 UConverterConvert convert
;
2055 /* error checking */
2056 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2060 if( targetCnv
==NULL
|| sourceCnv
==NULL
||
2061 source
==NULL
|| *source
==NULL
||
2062 target
==NULL
|| *target
==NULL
|| targetLimit
==NULL
2064 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2070 if((sourceLimit
!=NULL
&& sourceLimit
<s
) || targetLimit
<t
) {
2071 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2076 * Make sure that the buffer sizes do not exceed the number range for
2077 * int32_t. See ucnv_toUnicode() for a more detailed comment.
2080 (sourceLimit
!=NULL
&& ((size_t)(sourceLimit
-s
)>(size_t)0x7fffffff && sourceLimit
>s
)) ||
2081 ((size_t)(targetLimit
-t
)>(size_t)0x7fffffff && targetLimit
>t
)
2083 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2087 if(pivotStart
==NULL
) {
2089 /* streaming conversion requires an explicit pivot buffer */
2090 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2094 /* use the stack pivot buffer */
2095 myPivotSource
=myPivotTarget
=pivotStart
=pivotBuffer
;
2096 pivotSource
=(UChar
**)&myPivotSource
;
2097 pivotTarget
=&myPivotTarget
;
2098 pivotLimit
=pivotBuffer
+CHUNK_SIZE
;
2099 } else if( pivotStart
>=pivotLimit
||
2100 pivotSource
==NULL
|| *pivotSource
==NULL
||
2101 pivotTarget
==NULL
|| *pivotTarget
==NULL
||
2104 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2108 if(sourceLimit
==NULL
) {
2109 /* get limit of single-byte-NUL-terminated source string */
2110 sourceLimit
=uprv_strchr(*source
, 0);
2114 ucnv_resetToUnicode(sourceCnv
);
2115 ucnv_resetFromUnicode(targetCnv
);
2116 *pivotSource
=*pivotTarget
=pivotStart
;
2117 } else if(targetCnv
->charErrorBufferLength
>0) {
2118 /* output the targetCnv overflow buffer */
2119 if(ucnv_outputOverflowFromUnicode(targetCnv
, target
, targetLimit
, NULL
, pErrorCode
)) {
2120 /* U_BUFFER_OVERFLOW_ERROR */
2123 /* *target has moved, therefore stop using t */
2126 targetCnv
->preFromULength
>=0 && *pivotSource
==*pivotTarget
&&
2127 sourceCnv
->UCharErrorBufferLength
==0 && sourceCnv
->preToULength
>=0 && s
==sourceLimit
2129 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2134 /* Is direct-UTF-8 conversion available? */
2135 if( sourceCnv
->sharedData
->staticData
->conversionType
==UCNV_UTF8
&&
2136 targetCnv
->sharedData
->impl
->fromUTF8
!=NULL
2138 convert
=targetCnv
->sharedData
->impl
->fromUTF8
;
2139 } else if( targetCnv
->sharedData
->staticData
->conversionType
==UCNV_UTF8
&&
2140 sourceCnv
->sharedData
->impl
->toUTF8
!=NULL
2142 convert
=sourceCnv
->sharedData
->impl
->toUTF8
;
2148 * If direct-UTF-8 conversion is available, then we use a smaller
2149 * pivot buffer for error handling and partial matches
2150 * so that we quickly return to direct conversion.
2152 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2154 * We could reduce the pivot buffer size further, at the cost of
2155 * buffer overflows from callbacks.
2156 * The pivot buffer should not be smaller than the maximum number of
2157 * fromUnicode extension table input UChars
2158 * (for m:n conversion, see
2159 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2160 * or 2 for surrogate pairs.
2162 * Too small a buffer can cause thrashing between pivoting and direct
2163 * conversion, with function call overhead outweighing the benefits
2164 * of direct conversion.
2166 if(convert
!=NULL
&& (pivotLimit
-pivotStart
)>32) {
2167 pivotLimit
=pivotStart
+32;
2170 /* prepare the converter arguments */
2171 fromUArgs
.converter
=targetCnv
;
2172 fromUArgs
.flush
=FALSE
;
2173 fromUArgs
.offsets
=NULL
;
2174 fromUArgs
.target
=*target
;
2175 fromUArgs
.targetLimit
=targetLimit
;
2176 fromUArgs
.size
=sizeof(fromUArgs
);
2178 toUArgs
.converter
=sourceCnv
;
2179 toUArgs
.flush
=flush
;
2180 toUArgs
.offsets
=NULL
;
2182 toUArgs
.sourceLimit
=sourceLimit
;
2183 toUArgs
.targetLimit
=pivotLimit
;
2184 toUArgs
.size
=sizeof(toUArgs
);
2187 * TODO: Consider separating this function into two functions,
2188 * extracting exactly the conversion loop,
2189 * for readability and to reduce the set of visible variables.
2191 * Otherwise stop using s and t from here on.
2198 * The sequence of steps in the loop may appear backward,
2199 * but the principle is simple:
2201 * source - sourceCnv overflow - pivot - targetCnv overflow - target
2202 * empty out later buffers before refilling them from earlier ones.
2204 * The targetCnv overflow buffer is flushed out only once before the loop.
2208 * if(pivot not empty or error or replay or flush fromUnicode) {
2209 * fromUnicode(pivot -> target);
2212 * For pivoting conversion; and for direct conversion for
2213 * error callback handling and flushing the replay buffer.
2215 if( *pivotSource
<*pivotTarget
||
2216 U_FAILURE(*pErrorCode
) ||
2217 targetCnv
->preFromULength
<0 ||
2220 fromUArgs
.source
=*pivotSource
;
2221 fromUArgs
.sourceLimit
=*pivotTarget
;
2222 _fromUnicodeWithCallback(&fromUArgs
, pErrorCode
);
2223 if(U_FAILURE(*pErrorCode
)) {
2224 /* target overflow, or conversion error */
2225 *pivotSource
=(UChar
*)fromUArgs
.source
;
2230 * _fromUnicodeWithCallback() must have consumed the pivot contents
2231 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
2235 /* The pivot buffer is empty; reset it so we start at pivotStart. */
2236 *pivotSource
=*pivotTarget
=pivotStart
;
2239 * if(sourceCnv overflow buffer not empty) {
2240 * move(sourceCnv overflow buffer -> pivot);
2244 /* output the sourceCnv overflow buffer */
2245 if(sourceCnv
->UCharErrorBufferLength
>0) {
2246 if(ucnv_outputOverflowToUnicode(sourceCnv
, pivotTarget
, pivotLimit
, NULL
, pErrorCode
)) {
2247 /* U_BUFFER_OVERFLOW_ERROR */
2248 *pErrorCode
=U_ZERO_ERROR
;
2254 * check for end of input and break if done
2256 * Checking both flush and fromUArgs.flush ensures that the converters
2257 * have been called with the flush flag set if the ucnv_convertEx()
2260 if( toUArgs
.source
==sourceLimit
&&
2261 sourceCnv
->preToULength
>=0 && sourceCnv
->toULength
==0 &&
2262 (!flush
|| fromUArgs
.flush
)
2264 /* done successfully */
2269 * use direct conversion if available
2270 * but not if continuing a partial match
2271 * or flushing the toUnicode replay buffer
2273 if(convert
!=NULL
&& targetCnv
->preFromUFirstCP
<0 && sourceCnv
->preToULength
==0) {
2274 if(*pErrorCode
==U_USING_DEFAULT_WARNING
) {
2275 /* remove a warning that may be set by this function */
2276 *pErrorCode
=U_ZERO_ERROR
;
2278 convert(&fromUArgs
, &toUArgs
, pErrorCode
);
2279 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2281 } else if(U_FAILURE(*pErrorCode
)) {
2282 if(sourceCnv
->toULength
>0) {
2284 * Fall through to calling _toUnicodeWithCallback()
2285 * for callback handling.
2287 * The pivot buffer will be reset with
2288 * *pivotSource=*pivotTarget=pivotStart;
2289 * which indicates a toUnicode error to the caller
2290 * (*pivotSource==pivotStart shows no pivot UChars consumed).
2294 * Indicate a fromUnicode error to the caller
2295 * (*pivotSource>pivotStart shows some pivot UChars consumed).
2297 *pivotSource
=*pivotTarget
=pivotStart
+1;
2299 * Loop around to calling _fromUnicodeWithCallbacks()
2300 * for callback handling.
2304 } else if(*pErrorCode
==U_USING_DEFAULT_WARNING
) {
2306 * No error, but the implementation requested to temporarily
2307 * fall back to pivoting.
2309 *pErrorCode
=U_ZERO_ERROR
;
2311 * The following else branches are almost identical to the end-of-input
2312 * handling in _toUnicodeWithCallback().
2313 * Avoid calling it just for the end of input.
2315 } else if(flush
&& sourceCnv
->toULength
>0) { /* flush==toUArgs.flush */
2317 * the entire input stream is consumed
2318 * and there is a partial, truncated input sequence left
2321 /* inject an error and continue with callback handling */
2322 *pErrorCode
=U_TRUNCATED_CHAR_FOUND
;
2324 /* input consumed */
2326 /* reset the converters without calling the callback functions */
2327 _reset(sourceCnv
, UCNV_RESET_TO_UNICODE
, FALSE
);
2328 _reset(targetCnv
, UCNV_RESET_FROM_UNICODE
, FALSE
);
2331 /* done successfully */
2337 * toUnicode(source -> pivot);
2339 * For pivoting conversion; and for direct conversion for
2340 * error callback handling, continuing partial matches
2341 * and flushing the replay buffer.
2343 * The pivot buffer is empty and reset.
2345 toUArgs
.target
=pivotStart
; /* ==*pivotTarget */
2346 /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2347 _toUnicodeWithCallback(&toUArgs
, pErrorCode
);
2348 *pivotTarget
=toUArgs
.target
;
2349 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2350 /* pivot overflow: continue with the conversion loop */
2351 *pErrorCode
=U_ZERO_ERROR
;
2352 } else if(U_FAILURE(*pErrorCode
) || (!flush
&& *pivotTarget
==pivotStart
)) {
2353 /* conversion error, or there was nothing left to convert */
2358 * _toUnicodeWithCallback() wrote into the pivot buffer,
2359 * continue with fromUnicode conversion.
2361 * Set the fromUnicode flush flag if we flush and if toUnicode has
2362 * processed the end of the input.
2364 if( flush
&& toUArgs
.source
==sourceLimit
&&
2365 sourceCnv
->preToULength
>=0 &&
2366 sourceCnv
->UCharErrorBufferLength
==0
2368 fromUArgs
.flush
=TRUE
;
2373 * The conversion loop is exited when one of the following is true:
2374 * - the entire source text has been converted successfully to the target buffer
2375 * - a target buffer overflow occurred
2376 * - a conversion error occurred
2379 *source
=toUArgs
.source
;
2380 *target
=fromUArgs
.target
;
2382 /* terminate the target buffer if possible */
2383 if(flush
&& U_SUCCESS(*pErrorCode
)) {
2384 if(*target
!=targetLimit
) {
2386 if(*pErrorCode
==U_STRING_NOT_TERMINATED_WARNING
) {
2387 *pErrorCode
=U_ZERO_ERROR
;
2390 *pErrorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2395 /* internal implementation of ucnv_convert() etc. with preflighting */
2397 ucnv_internalConvert(UConverter
*outConverter
, UConverter
*inConverter
,
2398 char *target
, int32_t targetCapacity
,
2399 const char *source
, int32_t sourceLength
,
2400 UErrorCode
*pErrorCode
) {
2401 UChar pivotBuffer
[CHUNK_SIZE
];
2402 UChar
*pivot
, *pivot2
;
2405 const char *sourceLimit
;
2406 const char *targetLimit
;
2407 int32_t targetLength
=0;
2410 if(sourceLength
<0) {
2411 sourceLimit
=uprv_strchr(source
, 0);
2413 sourceLimit
=source
+sourceLength
;
2416 /* if there is no input data, we're done */
2417 if(source
==sourceLimit
) {
2418 return u_terminateChars(target
, targetCapacity
, 0, pErrorCode
);
2421 pivot
=pivot2
=pivotBuffer
;
2425 if(targetCapacity
>0) {
2426 /* perform real conversion */
2427 targetLimit
=target
+targetCapacity
;
2428 ucnv_convertEx(outConverter
, inConverter
,
2429 &myTarget
, targetLimit
,
2430 &source
, sourceLimit
,
2431 pivotBuffer
, &pivot
, &pivot2
, pivotBuffer
+CHUNK_SIZE
,
2435 targetLength
=(int32_t)(myTarget
-target
);
2439 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2440 * to it but continue the conversion in order to store in targetCapacity
2441 * the number of bytes that was required.
2443 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
|| targetCapacity
==0)
2445 char targetBuffer
[CHUNK_SIZE
];
2447 targetLimit
=targetBuffer
+CHUNK_SIZE
;
2449 *pErrorCode
=U_ZERO_ERROR
;
2450 myTarget
=targetBuffer
;
2451 ucnv_convertEx(outConverter
, inConverter
,
2452 &myTarget
, targetLimit
,
2453 &source
, sourceLimit
,
2454 pivotBuffer
, &pivot
, &pivot2
, pivotBuffer
+CHUNK_SIZE
,
2458 targetLength
+=(int32_t)(myTarget
-targetBuffer
);
2459 } while(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
);
2461 /* done with preflighting, set warnings and errors as appropriate */
2462 return u_terminateChars(target
, targetCapacity
, targetLength
, pErrorCode
);
2465 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2466 return targetLength
;
2469 U_CAPI
int32_t U_EXPORT2
2470 ucnv_convert(const char *toConverterName
, const char *fromConverterName
,
2471 char *target
, int32_t targetCapacity
,
2472 const char *source
, int32_t sourceLength
,
2473 UErrorCode
*pErrorCode
) {
2474 UConverter in
, out
; /* stack-allocated */
2475 UConverter
*inConverter
, *outConverter
;
2476 int32_t targetLength
;
2478 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2482 if( source
==NULL
|| sourceLength
<-1 ||
2483 targetCapacity
<0 || (targetCapacity
>0 && target
==NULL
)
2485 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2489 /* if there is no input data, we're done */
2490 if(sourceLength
==0 || (sourceLength
<0 && *source
==0)) {
2491 return u_terminateChars(target
, targetCapacity
, 0, pErrorCode
);
2494 /* create the converters */
2495 inConverter
=ucnv_createConverter(&in
, fromConverterName
, pErrorCode
);
2496 if(U_FAILURE(*pErrorCode
)) {
2500 outConverter
=ucnv_createConverter(&out
, toConverterName
, pErrorCode
);
2501 if(U_FAILURE(*pErrorCode
)) {
2502 ucnv_close(inConverter
);
2506 targetLength
=ucnv_internalConvert(outConverter
, inConverter
,
2507 target
, targetCapacity
,
2508 source
, sourceLength
,
2511 ucnv_close(inConverter
);
2512 ucnv_close(outConverter
);
2514 return targetLength
;
2519 ucnv_convertAlgorithmic(UBool convertToAlgorithmic
,
2520 UConverterType algorithmicType
,
2522 char *target
, int32_t targetCapacity
,
2523 const char *source
, int32_t sourceLength
,
2524 UErrorCode
*pErrorCode
) {
2525 UConverter algoConverterStatic
; /* stack-allocated */
2526 UConverter
*algoConverter
, *to
, *from
;
2527 int32_t targetLength
;
2529 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2533 if( cnv
==NULL
|| source
==NULL
|| sourceLength
<-1 ||
2534 targetCapacity
<0 || (targetCapacity
>0 && target
==NULL
)
2536 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2540 /* if there is no input data, we're done */
2541 if(sourceLength
==0 || (sourceLength
<0 && *source
==0)) {
2542 return u_terminateChars(target
, targetCapacity
, 0, pErrorCode
);
2545 /* create the algorithmic converter */
2546 algoConverter
=ucnv_createAlgorithmicConverter(&algoConverterStatic
, algorithmicType
,
2548 if(U_FAILURE(*pErrorCode
)) {
2552 /* reset the other converter */
2553 if(convertToAlgorithmic
) {
2554 /* cnv->Unicode->algo */
2555 ucnv_resetToUnicode(cnv
);
2559 /* algo->Unicode->cnv */
2560 ucnv_resetFromUnicode(cnv
);
2565 targetLength
=ucnv_internalConvert(to
, from
,
2566 target
, targetCapacity
,
2567 source
, sourceLength
,
2570 ucnv_close(algoConverter
);
2572 return targetLength
;
2575 U_CAPI
int32_t U_EXPORT2
2576 ucnv_toAlgorithmic(UConverterType algorithmicType
,
2578 char *target
, int32_t targetCapacity
,
2579 const char *source
, int32_t sourceLength
,
2580 UErrorCode
*pErrorCode
) {
2581 return ucnv_convertAlgorithmic(TRUE
, algorithmicType
, cnv
,
2582 target
, targetCapacity
,
2583 source
, sourceLength
,
2587 U_CAPI
int32_t U_EXPORT2
2588 ucnv_fromAlgorithmic(UConverter
*cnv
,
2589 UConverterType algorithmicType
,
2590 char *target
, int32_t targetCapacity
,
2591 const char *source
, int32_t sourceLength
,
2592 UErrorCode
*pErrorCode
) {
2593 return ucnv_convertAlgorithmic(FALSE
, algorithmicType
, cnv
,
2594 target
, targetCapacity
,
2595 source
, sourceLength
,
2599 U_CAPI UConverterType U_EXPORT2
2600 ucnv_getType(const UConverter
* converter
)
2602 int8_t type
= converter
->sharedData
->staticData
->conversionType
;
2603 #if !UCONFIG_NO_LEGACY_CONVERSION
2604 if(type
== UCNV_MBCS
) {
2605 return ucnv_MBCSGetType(converter
);
2608 return (UConverterType
)type
;
2611 U_CAPI
void U_EXPORT2
2612 ucnv_getStarters(const UConverter
* converter
,
2613 UBool starters
[256],
2616 if (err
== NULL
|| U_FAILURE(*err
)) {
2620 if(converter
->sharedData
->impl
->getStarters
!= NULL
) {
2621 converter
->sharedData
->impl
->getStarters(converter
, starters
, err
);
2623 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
2627 static const UAmbiguousConverter
*ucnv_getAmbiguous(const UConverter
*cnv
)
2629 UErrorCode errorCode
;
2637 errorCode
=U_ZERO_ERROR
;
2638 name
=ucnv_getName(cnv
, &errorCode
);
2639 if(U_FAILURE(errorCode
)) {
2643 for(i
=0; i
<UPRV_LENGTHOF(ambiguousConverters
); ++i
)
2645 if(0==uprv_strcmp(name
, ambiguousConverters
[i
].name
))
2647 return ambiguousConverters
+i
;
2654 U_CAPI
void U_EXPORT2
2655 ucnv_fixFileSeparator(const UConverter
*cnv
,
2657 int32_t sourceLength
) {
2658 const UAmbiguousConverter
*a
;
2662 if(cnv
==NULL
|| source
==NULL
|| sourceLength
<=0 || (a
=ucnv_getAmbiguous(cnv
))==NULL
)
2667 variant5c
=a
->variant5c
;
2668 for(i
=0; i
<sourceLength
; ++i
) {
2669 if(source
[i
]==variant5c
) {
2675 U_CAPI UBool U_EXPORT2
2676 ucnv_isAmbiguous(const UConverter
*cnv
) {
2677 return (UBool
)(ucnv_getAmbiguous(cnv
)!=NULL
);
2680 U_CAPI
void U_EXPORT2
2681 ucnv_setFallback(UConverter
*cnv
, UBool usesFallback
)
2683 cnv
->useFallback
= usesFallback
;
2686 U_CAPI UBool U_EXPORT2
2687 ucnv_usesFallback(const UConverter
*cnv
)
2689 return cnv
->useFallback
;
2692 U_CAPI
void U_EXPORT2
2693 ucnv_getInvalidChars (const UConverter
* converter
,
2698 if (err
== NULL
|| U_FAILURE(*err
))
2702 if (len
== NULL
|| errBytes
== NULL
|| converter
== NULL
)
2704 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
2707 if (*len
< converter
->invalidCharLength
)
2709 *err
= U_INDEX_OUTOFBOUNDS_ERROR
;
2712 if ((*len
= converter
->invalidCharLength
) > 0)
2714 uprv_memcpy (errBytes
, converter
->invalidCharBuffer
, *len
);
2718 U_CAPI
void U_EXPORT2
2719 ucnv_getInvalidUChars (const UConverter
* converter
,
2724 if (err
== NULL
|| U_FAILURE(*err
))
2728 if (len
== NULL
|| errChars
== NULL
|| converter
== NULL
)
2730 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
2733 if (*len
< converter
->invalidUCharLength
)
2735 *err
= U_INDEX_OUTOFBOUNDS_ERROR
;
2738 if ((*len
= converter
->invalidUCharLength
) > 0)
2740 u_memcpy (errChars
, converter
->invalidUCharBuffer
, *len
);
2744 #define SIG_MAX_LEN 5
2746 U_CAPI
const char* U_EXPORT2
2747 ucnv_detectUnicodeSignature( const char* source
,
2748 int32_t sourceLength
,
2749 int32_t* signatureLength
,
2750 UErrorCode
* pErrorCode
) {
2753 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2754 * bytes we don't misdetect something
2756 char start
[SIG_MAX_LEN
]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2759 if((pErrorCode
==NULL
) || U_FAILURE(*pErrorCode
)){
2763 if(source
== NULL
|| sourceLength
< -1){
2764 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
2768 if(signatureLength
== NULL
) {
2769 signatureLength
= &dummy
;
2772 if(sourceLength
==-1){
2773 sourceLength
=(int32_t)uprv_strlen(source
);
2777 while(i
<sourceLength
&& i
<SIG_MAX_LEN
){
2782 if(start
[0] == '\xFE' && start
[1] == '\xFF') {
2785 } else if(start
[0] == '\xFF' && start
[1] == '\xFE') {
2786 if(start
[2] == '\x00' && start
[3] =='\x00') {
2793 } else if(start
[0] == '\xEF' && start
[1] == '\xBB' && start
[2] == '\xBF') {
2796 } else if(start
[0] == '\x00' && start
[1] == '\x00' &&
2797 start
[2] == '\xFE' && start
[3]=='\xFF') {
2800 } else if(start
[0] == '\x0E' && start
[1] == '\xFE' && start
[2] == '\xFF') {
2803 } else if(start
[0] == '\xFB' && start
[1] == '\xEE' && start
[2] == '\x28') {
2806 } else if(start
[0] == '\x2B' && start
[1] == '\x2F' && start
[2] == '\x76') {
2808 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2809 * depending on the second UTF-16 code unit.
2810 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2813 * So far we have +/v
2815 if(start
[3] == '\x38' && start
[4] == '\x2D') {
2819 } else if(start
[3] == '\x38' || start
[3] == '\x39' || start
[3] == '\x2B' || start
[3] == '\x2F') {
2820 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2824 }else if(start
[0]=='\xDD' && start
[1]== '\x73'&& start
[2]=='\x66' && start
[3]=='\x73'){
2826 return "UTF-EBCDIC";
2830 /* no known Unicode signature byte sequence recognized */
2835 U_CAPI
int32_t U_EXPORT2
2836 ucnv_fromUCountPending(const UConverter
* cnv
, UErrorCode
* status
)
2838 if(status
== NULL
|| U_FAILURE(*status
)){
2842 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2846 if(cnv
->preFromUFirstCP
>= 0){
2847 return U16_LENGTH(cnv
->preFromUFirstCP
)+cnv
->preFromULength
;
2848 }else if(cnv
->preFromULength
< 0){
2849 return -cnv
->preFromULength
;
2850 }else if(cnv
->fromUChar32
> 0){
2857 U_CAPI
int32_t U_EXPORT2
2858 ucnv_toUCountPending(const UConverter
* cnv
, UErrorCode
* status
){
2860 if(status
== NULL
|| U_FAILURE(*status
)){
2864 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2868 if(cnv
->preToULength
> 0){
2869 return cnv
->preToULength
;
2870 }else if(cnv
->preToULength
< 0){
2871 return -cnv
->preToULength
;
2872 }else if(cnv
->toULength
> 0){
2873 return cnv
->toULength
;
2878 U_CAPI UBool U_EXPORT2
2879 ucnv_isFixedWidth(UConverter
*cnv
, UErrorCode
*status
){
2880 if (U_FAILURE(*status
)) {
2885 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2889 switch (ucnv_getType(cnv
)) {
2892 case UCNV_UTF32_BigEndian
:
2893 case UCNV_UTF32_LittleEndian
:
2904 * Hey, Emacs, please set the following:
2907 * indent-tabs-mode: nil