2 ******************************************************************************
4 * Copyright (C) 1998-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * Implements APIs for the ICU's codeset conversion library;
11 * mostly calls through internal functions;
12 * created by Bertrand A. Damiba
14 * Modification History:
16 * Date Name Description
17 * 04/04/99 helena Fixed internal header inclusion.
18 * 05/09/00 helena Added implementation to handle fallback mappings.
19 * 06/20/2000 helena OS/400 port changes; mostly typecast.
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_CONVERSION
26 #include "unicode/ustring.h"
27 #include "unicode/ucnv.h"
28 #include "unicode/ucnv_err.h"
29 #include "unicode/uset.h"
40 /* size of intermediate and preflighting buffers in ucnv_convert() */
41 #define CHUNK_SIZE 1024
43 typedef struct UAmbiguousConverter
{
45 const UChar variant5c
;
46 } UAmbiguousConverter
;
48 static const UAmbiguousConverter ambiguousConverters
[]={
49 { "ibm-897_P100-1995", 0xa5 },
50 { "ibm-942_P120-1999", 0xa5 },
51 { "ibm-943_P130-1999", 0xa5 },
52 { "ibm-946_P100-1995", 0xa5 },
53 { "ibm-33722_P120-1999", 0xa5 },
54 /*{ "ibm-54191_P100-2006", 0xa5 },*/
55 /*{ "ibm-62383_P100-2007", 0xa5 },*/
56 /*{ "ibm-891_P100-1995", 0x20a9 },*/
57 { "ibm-944_P100-1995", 0x20a9 },
58 { "ibm-949_P110-1999", 0x20a9 },
59 { "ibm-1363_P110-1997", 0x20a9 },
60 { "ISO_2022,locale=ko,version=0", 0x20a9 }
63 /*Calls through createConverter */
64 U_CAPI UConverter
* U_EXPORT2
65 ucnv_open (const char *name
,
70 if (err
== NULL
|| U_FAILURE (*err
)) {
74 r
= ucnv_createConverter(NULL
, name
, err
);
78 U_CAPI UConverter
* U_EXPORT2
79 ucnv_openPackage (const char *packageName
, const char *converterName
, UErrorCode
* err
)
81 return ucnv_createConverterFromPackage(packageName
, converterName
, err
);
84 /*Extracts the UChar* to a char* and calls through createConverter */
85 U_CAPI UConverter
* U_EXPORT2
86 ucnv_openU (const UChar
* name
,
89 char asciiName
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
91 if (err
== NULL
|| U_FAILURE(*err
))
94 return ucnv_open (NULL
, err
);
95 if (u_strlen(name
) >= UCNV_MAX_CONVERTER_NAME_LENGTH
)
97 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
100 return ucnv_open(u_austrcpy(asciiName
, name
), err
);
103 /* Copy the string that is represented by the UConverterPlatform enum
104 * @param platformString An output buffer
105 * @param platform An enum representing a platform
106 * @return the length of the copied string.
109 ucnv_copyPlatformString(char *platformString
, UConverterPlatform pltfrm
)
114 uprv_strcpy(platformString
, "ibm-");
120 /* default to empty string */
125 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
126 *through createConverter*/
127 U_CAPI UConverter
* U_EXPORT2
128 ucnv_openCCSID (int32_t codepage
,
129 UConverterPlatform platform
,
132 char myName
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
135 if (err
== NULL
|| U_FAILURE (*err
))
138 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
139 myNameLen
= ucnv_copyPlatformString(myName
, platform
);
140 T_CString_integerToString(myName
+ myNameLen
, codepage
, 10);
142 return ucnv_createConverter(NULL
, myName
, err
);
145 /* Creating a temporary stack-based object that can be used in one thread,
146 and created from a converter that is shared across threads.
149 U_CAPI UConverter
* U_EXPORT2
150 ucnv_safeClone(const UConverter
* cnv
, void *stackBuffer
, int32_t *pBufferSize
, UErrorCode
*status
)
152 UConverter
*localConverter
, *allocatedConverter
;
153 int32_t bufferSizeNeeded
;
154 char *stackBufferChars
= (char *)stackBuffer
;
156 UConverterToUnicodeArgs toUArgs
= {
157 sizeof(UConverterToUnicodeArgs
),
166 UConverterFromUnicodeArgs fromUArgs
= {
167 sizeof(UConverterFromUnicodeArgs
),
177 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE
);
179 if (status
== NULL
|| U_FAILURE(*status
)){
180 UTRACE_EXIT_STATUS(status
? *status
: U_ILLEGAL_ARGUMENT_ERROR
);
184 if (!pBufferSize
|| !cnv
){
185 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
186 UTRACE_EXIT_STATUS(*status
);
190 UTRACE_DATA3(UTRACE_OPEN_CLOSE
, "clone converter %s at %p into stackBuffer %p",
191 ucnv_getName(cnv
, status
), cnv
, stackBuffer
);
193 if (cnv
->sharedData
->impl
->safeClone
!= NULL
) {
194 /* call the custom safeClone function for sizing */
195 bufferSizeNeeded
= 0;
196 cnv
->sharedData
->impl
->safeClone(cnv
, NULL
, &bufferSizeNeeded
, status
);
200 /* inherent sizing */
201 bufferSizeNeeded
= sizeof(UConverter
);
204 if (*pBufferSize
<= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
205 *pBufferSize
= bufferSizeNeeded
;
206 UTRACE_EXIT_VALUE(bufferSizeNeeded
);
211 /* Pointers on 64-bit platforms need to be aligned
212 * on a 64-bit boundary in memory.
214 if (U_ALIGNMENT_OFFSET(stackBuffer
) != 0) {
215 int32_t offsetUp
= (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars
);
216 if(*pBufferSize
> offsetUp
) {
217 *pBufferSize
-= offsetUp
;
218 stackBufferChars
+= offsetUp
;
220 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
225 stackBuffer
= (void *)stackBufferChars
;
227 /* Now, see if we must allocate any memory */
228 if (*pBufferSize
< bufferSizeNeeded
|| stackBuffer
== NULL
)
230 /* allocate one here...*/
231 localConverter
= allocatedConverter
= (UConverter
*) uprv_malloc (bufferSizeNeeded
);
233 if(localConverter
== NULL
) {
234 *status
= U_MEMORY_ALLOCATION_ERROR
;
235 UTRACE_EXIT_STATUS(*status
);
239 if (U_SUCCESS(*status
)) {
240 *status
= U_SAFECLONE_ALLOCATED_WARNING
;
243 /* record the fact that memory was allocated */
244 *pBufferSize
= bufferSizeNeeded
;
246 /* just use the stack buffer */
247 localConverter
= (UConverter
*) stackBuffer
;
248 allocatedConverter
= NULL
;
251 uprv_memset(localConverter
, 0, bufferSizeNeeded
);
253 /* Copy initial state */
254 uprv_memcpy(localConverter
, cnv
, sizeof(UConverter
));
255 localConverter
->isCopyLocal
= localConverter
->isExtraLocal
= FALSE
;
257 /* copy the substitution string */
258 if (cnv
->subChars
== (uint8_t *)cnv
->subUChars
) {
259 localConverter
->subChars
= (uint8_t *)localConverter
->subUChars
;
261 localConverter
->subChars
= (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
262 if (localConverter
->subChars
== NULL
) {
263 uprv_free(allocatedConverter
);
264 UTRACE_EXIT_STATUS(*status
);
267 uprv_memcpy(localConverter
->subChars
, cnv
->subChars
, UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
270 /* now either call the safeclone fcn or not */
271 if (cnv
->sharedData
->impl
->safeClone
!= NULL
) {
272 /* call the custom safeClone function */
273 localConverter
= cnv
->sharedData
->impl
->safeClone(cnv
, localConverter
, pBufferSize
, status
);
276 if(localConverter
==NULL
|| U_FAILURE(*status
)) {
277 if (allocatedConverter
!= NULL
&& allocatedConverter
->subChars
!= (uint8_t *)allocatedConverter
->subUChars
) {
278 uprv_free(allocatedConverter
->subChars
);
280 uprv_free(allocatedConverter
);
281 UTRACE_EXIT_STATUS(*status
);
285 /* increment refcount of shared data if needed */
287 Checking whether it's an algorithic converter is okay
288 in multithreaded applications because the value never changes.
289 Don't check referenceCounter for any other value.
291 if (cnv
->sharedData
->referenceCounter
!= ~0) {
292 ucnv_incrementRefCount(cnv
->sharedData
);
295 if(localConverter
== (UConverter
*)stackBuffer
) {
296 /* we're using user provided data - set to not destroy */
297 localConverter
->isCopyLocal
= TRUE
;
300 /* allow callback functions to handle any memory allocation */
301 toUArgs
.converter
= fromUArgs
.converter
= localConverter
;
302 cbErr
= U_ZERO_ERROR
;
303 cnv
->fromCharErrorBehaviour(cnv
->toUContext
, &toUArgs
, NULL
, 0, UCNV_CLONE
, &cbErr
);
304 cbErr
= U_ZERO_ERROR
;
305 cnv
->fromUCharErrorBehaviour(cnv
->fromUContext
, &fromUArgs
, NULL
, 0, 0, UCNV_CLONE
, &cbErr
);
307 UTRACE_EXIT_PTR_STATUS(localConverter
, *status
);
308 return localConverter
;
313 /*Decreases the reference counter in the shared immutable section of the object
314 *and frees the mutable part*/
316 U_CAPI
void U_EXPORT2
317 ucnv_close (UConverter
* converter
)
319 UErrorCode errorCode
= U_ZERO_ERROR
;
321 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE
);
323 if (converter
== NULL
)
329 UTRACE_DATA3(UTRACE_OPEN_CLOSE
, "close converter %s at %p, isCopyLocal=%b",
330 ucnv_getName(converter
, &errorCode
), converter
, converter
->isCopyLocal
);
332 /* In order to speed up the close, only call the callbacks when they have been changed.
333 This performance check will only work when the callbacks are set within a shared library
334 or from user code that statically links this code. */
335 /* first, notify the callback functions that the converter is closed */
336 if (converter
->fromCharErrorBehaviour
!= UCNV_TO_U_DEFAULT_CALLBACK
) {
337 UConverterToUnicodeArgs toUArgs
= {
338 sizeof(UConverterToUnicodeArgs
),
348 toUArgs
.converter
= converter
;
349 errorCode
= U_ZERO_ERROR
;
350 converter
->fromCharErrorBehaviour(converter
->toUContext
, &toUArgs
, NULL
, 0, UCNV_CLOSE
, &errorCode
);
352 if (converter
->fromUCharErrorBehaviour
!= UCNV_FROM_U_DEFAULT_CALLBACK
) {
353 UConverterFromUnicodeArgs fromUArgs
= {
354 sizeof(UConverterFromUnicodeArgs
),
363 fromUArgs
.converter
= converter
;
364 errorCode
= U_ZERO_ERROR
;
365 converter
->fromUCharErrorBehaviour(converter
->fromUContext
, &fromUArgs
, NULL
, 0, 0, UCNV_CLOSE
, &errorCode
);
368 if (converter
->sharedData
->impl
->close
!= NULL
) {
369 converter
->sharedData
->impl
->close(converter
);
372 if (converter
->subChars
!= (uint8_t *)converter
->subUChars
) {
373 uprv_free(converter
->subChars
);
377 Checking whether it's an algorithic converter is okay
378 in multithreaded applications because the value never changes.
379 Don't check referenceCounter for any other value.
381 if (converter
->sharedData
->referenceCounter
!= ~0) {
382 ucnv_unloadSharedDataIfReady(converter
->sharedData
);
385 if(!converter
->isCopyLocal
){
386 uprv_free(converter
);
392 /*returns a single Name from the list, will return NULL if out of bounds
394 U_CAPI
const char* U_EXPORT2
395 ucnv_getAvailableName (int32_t n
)
397 if (0 <= n
&& n
<= 0xffff) {
398 UErrorCode err
= U_ZERO_ERROR
;
399 const char *name
= ucnv_bld_getAvailableConverter((uint16_t)n
, &err
);
400 if (U_SUCCESS(err
)) {
407 U_CAPI
int32_t U_EXPORT2
408 ucnv_countAvailable ()
410 UErrorCode err
= U_ZERO_ERROR
;
411 return ucnv_bld_countAvailableConverters(&err
);
414 U_CAPI
void U_EXPORT2
415 ucnv_getSubstChars (const UConverter
* converter
,
420 if (U_FAILURE (*err
))
423 if (converter
->subCharLen
<= 0) {
424 /* Unicode string or empty string from ucnv_setSubstString(). */
429 if (*len
< converter
->subCharLen
) /*not enough space in subChars */
431 *err
= U_INDEX_OUTOFBOUNDS_ERROR
;
435 uprv_memcpy (mySubChar
, converter
->subChars
, converter
->subCharLen
); /*fills in the subchars */
436 *len
= converter
->subCharLen
; /*store # of bytes copied to buffer */
439 U_CAPI
void U_EXPORT2
440 ucnv_setSubstChars (UConverter
* converter
,
441 const char *mySubChar
,
445 if (U_FAILURE (*err
))
448 /*Makes sure that the subChar is within the codepages char length boundaries */
449 if ((len
> converter
->sharedData
->staticData
->maxBytesPerChar
)
450 || (len
< converter
->sharedData
->staticData
->minBytesPerChar
))
452 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
456 uprv_memcpy (converter
->subChars
, mySubChar
, len
); /*copies the subchars */
457 converter
->subCharLen
= len
; /*sets the new len */
460 * There is currently (2001Feb) no separate API to set/get subChar1.
461 * In order to always have subChar written after it is explicitly set,
462 * we set subChar1 to 0.
464 converter
->subChar1
= 0;
469 U_CAPI
void U_EXPORT2
470 ucnv_setSubstString(UConverter
*cnv
,
474 UAlignedMemory cloneBuffer
[U_CNV_SAFECLONE_BUFFERSIZE
/ sizeof(UAlignedMemory
) + 1];
475 char chars
[UCNV_ERROR_BUFFER_LENGTH
];
479 int32_t cloneSize
, length8
;
481 /* Let the following functions check all arguments. */
482 cloneSize
= sizeof(cloneBuffer
);
483 clone
= ucnv_safeClone(cnv
, cloneBuffer
, &cloneSize
, err
);
484 ucnv_setFromUCallBack(clone
, UCNV_FROM_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, err
);
485 length8
= ucnv_fromUChars(clone
, chars
, (int32_t)sizeof(chars
), s
, length
, err
);
487 if (U_FAILURE(*err
)) {
491 if (cnv
->sharedData
->impl
->writeSub
== NULL
492 #if !UCONFIG_NO_LEGACY_CONVERSION
493 || (cnv
->sharedData
->staticData
->conversionType
== UCNV_MBCS
&&
494 ucnv_MBCSGetType(cnv
) != UCNV_EBCDIC_STATEFUL
)
497 /* The converter is not stateful. Store the charset bytes as a fixed string. */
498 subChars
= (uint8_t *)chars
;
501 * The converter has a non-default writeSub() function, indicating
502 * that it is stateful.
503 * Store the Unicode string for on-the-fly conversion for correct
506 if (length
> UCNV_ERROR_BUFFER_LENGTH
) {
508 * Should not occur. The converter should output at least one byte
509 * per UChar, which means that ucnv_fromUChars() should catch all
512 *err
= U_BUFFER_OVERFLOW_ERROR
;
515 subChars
= (uint8_t *)s
;
517 length
= u_strlen(s
);
519 length8
= length
* U_SIZEOF_UCHAR
;
523 * For storing the substitution string, select either the small buffer inside
524 * UConverter or allocate a subChars buffer.
526 if (length8
> UCNV_MAX_SUBCHAR_LEN
) {
527 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
528 if (cnv
->subChars
== (uint8_t *)cnv
->subUChars
) {
529 /* Allocate a new buffer for the string. */
530 cnv
->subChars
= (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
531 if (cnv
->subChars
== NULL
) {
532 cnv
->subChars
= (uint8_t *)cnv
->subUChars
;
533 *err
= U_MEMORY_ALLOCATION_ERROR
;
536 uprv_memset(cnv
->subChars
, 0, UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
540 /* Copy the substitution string into the UConverter or its subChars buffer. */
544 uprv_memcpy(cnv
->subChars
, subChars
, length8
);
545 if (subChars
== (uint8_t *)chars
) {
546 cnv
->subCharLen
= (int8_t)length8
;
547 } else /* subChars == s */ {
548 cnv
->subCharLen
= (int8_t)-length
;
552 /* See comment in ucnv_setSubstChars(). */
556 /*resets the internal states of a converter
557 *goal : have the same behaviour than a freshly created converter
559 static void _reset(UConverter
*converter
, UConverterResetChoice choice
,
560 UBool callCallback
) {
561 if(converter
== NULL
) {
566 /* first, notify the callback functions that the converter is reset */
567 UErrorCode errorCode
;
569 if(choice
<=UCNV_RESET_TO_UNICODE
&& converter
->fromCharErrorBehaviour
!= UCNV_TO_U_DEFAULT_CALLBACK
) {
570 UConverterToUnicodeArgs toUArgs
= {
571 sizeof(UConverterToUnicodeArgs
),
580 toUArgs
.converter
= converter
;
581 errorCode
= U_ZERO_ERROR
;
582 converter
->fromCharErrorBehaviour(converter
->toUContext
, &toUArgs
, NULL
, 0, UCNV_RESET
, &errorCode
);
584 if(choice
!=UCNV_RESET_TO_UNICODE
&& converter
->fromUCharErrorBehaviour
!= UCNV_FROM_U_DEFAULT_CALLBACK
) {
585 UConverterFromUnicodeArgs fromUArgs
= {
586 sizeof(UConverterFromUnicodeArgs
),
595 fromUArgs
.converter
= converter
;
596 errorCode
= U_ZERO_ERROR
;
597 converter
->fromUCharErrorBehaviour(converter
->fromUContext
, &fromUArgs
, NULL
, 0, 0, UCNV_RESET
, &errorCode
);
601 /* now reset the converter itself */
602 if(choice
<=UCNV_RESET_TO_UNICODE
) {
603 converter
->toUnicodeStatus
= converter
->sharedData
->toUnicodeStatus
;
605 converter
->toULength
= 0;
606 converter
->invalidCharLength
= converter
->UCharErrorBufferLength
= 0;
607 converter
->preToULength
= 0;
609 if(choice
!=UCNV_RESET_TO_UNICODE
) {
610 converter
->fromUnicodeStatus
= 0;
611 converter
->fromUChar32
= 0;
612 converter
->invalidUCharLength
= converter
->charErrorBufferLength
= 0;
613 converter
->preFromUFirstCP
= U_SENTINEL
;
614 converter
->preFromULength
= 0;
617 if (converter
->sharedData
->impl
->reset
!= NULL
) {
618 /* call the custom reset function */
619 converter
->sharedData
->impl
->reset(converter
, choice
);
623 U_CAPI
void U_EXPORT2
624 ucnv_reset(UConverter
*converter
)
626 _reset(converter
, UCNV_RESET_BOTH
, TRUE
);
629 U_CAPI
void U_EXPORT2
630 ucnv_resetToUnicode(UConverter
*converter
)
632 _reset(converter
, UCNV_RESET_TO_UNICODE
, TRUE
);
635 U_CAPI
void U_EXPORT2
636 ucnv_resetFromUnicode(UConverter
*converter
)
638 _reset(converter
, UCNV_RESET_FROM_UNICODE
, TRUE
);
641 U_CAPI
int8_t U_EXPORT2
642 ucnv_getMaxCharSize (const UConverter
* converter
)
644 return converter
->maxBytesPerUChar
;
648 U_CAPI
int8_t U_EXPORT2
649 ucnv_getMinCharSize (const UConverter
* converter
)
651 return converter
->sharedData
->staticData
->minBytesPerChar
;
654 U_CAPI
const char* U_EXPORT2
655 ucnv_getName (const UConverter
* converter
, UErrorCode
* err
)
658 if (U_FAILURE (*err
))
660 if(converter
->sharedData
->impl
->getName
){
661 const char* temp
= converter
->sharedData
->impl
->getName(converter
);
665 return converter
->sharedData
->staticData
->name
;
668 U_CAPI
int32_t U_EXPORT2
669 ucnv_getCCSID(const UConverter
* converter
,
673 if (U_FAILURE (*err
))
676 ccsid
= converter
->sharedData
->staticData
->codepage
;
678 /* Rare case. This is for cases like gb18030,
679 which doesn't have an IBM cannonical name, but does have an IBM alias. */
680 const char *standardName
= ucnv_getStandardName(ucnv_getName(converter
, err
), "IBM", err
);
681 if (U_SUCCESS(*err
) && standardName
) {
682 const char *ccsidStr
= uprv_strchr(standardName
, '-');
684 ccsid
= (int32_t)atol(ccsidStr
+1); /* +1 to skip '-' */
692 U_CAPI UConverterPlatform U_EXPORT2
693 ucnv_getPlatform (const UConverter
* converter
,
696 if (U_FAILURE (*err
))
699 return (UConverterPlatform
)converter
->sharedData
->staticData
->platform
;
702 U_CAPI
void U_EXPORT2
703 ucnv_getToUCallBack (const UConverter
* converter
,
704 UConverterToUCallback
*action
,
705 const void **context
)
707 *action
= converter
->fromCharErrorBehaviour
;
708 *context
= converter
->toUContext
;
711 U_CAPI
void U_EXPORT2
712 ucnv_getFromUCallBack (const UConverter
* converter
,
713 UConverterFromUCallback
*action
,
714 const void **context
)
716 *action
= converter
->fromUCharErrorBehaviour
;
717 *context
= converter
->fromUContext
;
720 U_CAPI
void U_EXPORT2
721 ucnv_setToUCallBack (UConverter
* converter
,
722 UConverterToUCallback newAction
,
723 const void* newContext
,
724 UConverterToUCallback
*oldAction
,
725 const void** oldContext
,
728 if (U_FAILURE (*err
))
730 if (oldAction
) *oldAction
= converter
->fromCharErrorBehaviour
;
731 converter
->fromCharErrorBehaviour
= newAction
;
732 if (oldContext
) *oldContext
= converter
->toUContext
;
733 converter
->toUContext
= newContext
;
736 U_CAPI
void U_EXPORT2
737 ucnv_setFromUCallBack (UConverter
* converter
,
738 UConverterFromUCallback newAction
,
739 const void* newContext
,
740 UConverterFromUCallback
*oldAction
,
741 const void** oldContext
,
744 if (U_FAILURE (*err
))
746 if (oldAction
) *oldAction
= converter
->fromUCharErrorBehaviour
;
747 converter
->fromUCharErrorBehaviour
= newAction
;
748 if (oldContext
) *oldContext
= converter
->fromUContext
;
749 converter
->fromUContext
= newContext
;
753 _updateOffsets(int32_t *offsets
, int32_t length
,
754 int32_t sourceIndex
, int32_t errorInputLength
) {
756 int32_t delta
, offset
;
760 * adjust each offset by adding the previous sourceIndex
761 * minus the length of the input sequence that caused an
764 delta
=sourceIndex
-errorInputLength
;
767 * set each offset to -1 because this conversion function
768 * does not handle offsets
773 limit
=offsets
+length
;
775 /* most common case, nothing to do */
777 /* add the delta to each offset (but not if the offset is <0) */
778 while(offsets
<limit
) {
781 *offsets
=offset
+delta
;
785 } else /* delta<0 */ {
787 * set each offset to -1 because this conversion function
788 * does not handle offsets
789 * or the error input sequence started in a previous buffer
791 while(offsets
<limit
) {
797 /* ucnv_fromUnicode --------------------------------------------------------- */
800 * Implementation note for m:n conversions
802 * While collecting source units to find the longest match for m:n conversion,
803 * some source units may need to be stored for a partial match.
804 * When a second buffer does not yield a match on all of the previously stored
805 * source units, then they must be "replayed", i.e., fed back into the converter.
807 * The code relies on the fact that replaying will not nest -
808 * converting a replay buffer will not result in a replay.
809 * This is because a replay is necessary only after the _continuation_ of a
810 * partial match failed, but a replay buffer is converted as a whole.
811 * It may result in some of its units being stored again for a partial match,
812 * but there will not be a continuation _during_ the replay which could fail.
814 * It is conceivable that a callback function could call the converter
815 * recursively in a way that causes another replay to be stored, but that
816 * would be an error in the callback function.
817 * Such violations will cause assertion failures in a debug build,
818 * and wrong output, but they will not cause a crash.
822 _fromUnicodeWithCallback(UConverterFromUnicodeArgs
*pArgs
, UErrorCode
*err
) {
823 UConverterFromUnicode fromUnicode
;
829 int32_t errorInputLength
;
830 UBool converterSawEndOfInput
, calledCallback
;
832 /* variables for m:n conversion */
833 UChar replay
[UCNV_EXT_MAX_UCHARS
];
834 const UChar
*realSource
, *realSourceLimit
;
835 int32_t realSourceIndex
;
838 cnv
=pArgs
->converter
;
841 offsets
=pArgs
->offsets
;
843 /* get the converter implementation function */
846 fromUnicode
=cnv
->sharedData
->impl
->fromUnicode
;
848 fromUnicode
=cnv
->sharedData
->impl
->fromUnicodeWithOffsets
;
849 if(fromUnicode
==NULL
) {
850 /* there is no WithOffsets implementation */
851 fromUnicode
=cnv
->sharedData
->impl
->fromUnicode
;
852 /* we will write -1 for each offset */
857 if(cnv
->preFromULength
>=0) {
861 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
862 realSourceLimit
=NULL
;
867 * Previous m:n conversion stored source units from a partial match
868 * and failed to consume all of them.
869 * We need to "replay" them from a temporary buffer and convert them first.
871 realSource
=pArgs
->source
;
872 realSourceLimit
=pArgs
->sourceLimit
;
873 realFlush
=pArgs
->flush
;
874 realSourceIndex
=sourceIndex
;
876 uprv_memcpy(replay
, cnv
->preFromU
, -cnv
->preFromULength
*U_SIZEOF_UCHAR
);
877 pArgs
->source
=replay
;
878 pArgs
->sourceLimit
=replay
-cnv
->preFromULength
;
882 cnv
->preFromULength
=0;
886 * loop for conversion and error handling
892 * handle end of input
893 * handle errors/call callback
898 if(U_SUCCESS(*err
)) {
900 fromUnicode(pArgs
, err
);
903 * set a flag for whether the converter
904 * successfully processed the end of the input
906 * need not check cnv->preFromULength==0 because a replay (<0) will cause
907 * s<sourceLimit before converterSawEndOfInput is checked
909 converterSawEndOfInput
=
910 (UBool
)(U_SUCCESS(*err
) &&
911 pArgs
->flush
&& pArgs
->source
==pArgs
->sourceLimit
&&
912 cnv
->fromUChar32
==0);
914 /* handle error from ucnv_convertEx() */
915 converterSawEndOfInput
=FALSE
;
918 /* no callback called yet for this iteration */
919 calledCallback
=FALSE
;
921 /* no sourceIndex adjustment for conversion, only for callback output */
925 * loop for offsets and error handling
927 * iterates at most 3 times:
928 * 1. to clean up after the conversion function
929 * 2. after the callback
930 * 3. after the callback again if there was truncated input
933 /* update offsets if we write any */
935 int32_t length
=(int32_t)(pArgs
->target
-t
);
937 _updateOffsets(offsets
, length
, sourceIndex
, errorInputLength
);
940 * if a converter handles offsets and updates the offsets
941 * pointer at the end, then pArgs->offset should not change
943 * however, some converters do not handle offsets at all
944 * (sourceIndex<0) or may not update the offsets pointer
946 pArgs
->offsets
=offsets
+=length
;
950 sourceIndex
+=(int32_t)(pArgs
->source
-s
);
954 if(cnv
->preFromULength
<0) {
956 * switch the source to new replay units (cannot occur while replaying)
957 * after offset handling and before end-of-input and callback handling
959 if(realSource
==NULL
) {
960 realSource
=pArgs
->source
;
961 realSourceLimit
=pArgs
->sourceLimit
;
962 realFlush
=pArgs
->flush
;
963 realSourceIndex
=sourceIndex
;
965 uprv_memcpy(replay
, cnv
->preFromU
, -cnv
->preFromULength
*U_SIZEOF_UCHAR
);
966 pArgs
->source
=replay
;
967 pArgs
->sourceLimit
=replay
-cnv
->preFromULength
;
969 if((sourceIndex
+=cnv
->preFromULength
)<0) {
973 cnv
->preFromULength
=0;
975 /* see implementation note before _fromUnicodeWithCallback() */
976 U_ASSERT(realSource
==NULL
);
977 *err
=U_INTERNAL_PROGRAM_ERROR
;
981 /* update pointers */
985 if(U_SUCCESS(*err
)) {
986 if(s
<pArgs
->sourceLimit
) {
988 * continue with the conversion loop while there is still input left
989 * (continue converting by breaking out of only the inner loop)
992 } else if(realSource
!=NULL
) {
993 /* switch back from replaying to the real source and continue */
994 pArgs
->source
=realSource
;
995 pArgs
->sourceLimit
=realSourceLimit
;
996 pArgs
->flush
=realFlush
;
997 sourceIndex
=realSourceIndex
;
1001 } else if(pArgs
->flush
&& cnv
->fromUChar32
!=0) {
1003 * the entire input stream is consumed
1004 * and there is a partial, truncated input sequence left
1007 /* inject an error and continue with callback handling */
1008 *err
=U_TRUNCATED_CHAR_FOUND
;
1009 calledCallback
=FALSE
; /* new error condition */
1011 /* input consumed */
1014 * return to the conversion loop once more if the flush
1015 * flag is set and the conversion function has not
1016 * successfully processed the end of the input yet
1018 * (continue converting by breaking out of only the inner loop)
1020 if(!converterSawEndOfInput
) {
1024 /* reset the converter without calling the callback function */
1025 _reset(cnv
, UCNV_RESET_FROM_UNICODE
, FALSE
);
1028 /* done successfully */
1033 /* U_FAILURE(*err) */
1037 if( calledCallback
||
1038 (e
=*err
)==U_BUFFER_OVERFLOW_ERROR
||
1039 (e
!=U_INVALID_CHAR_FOUND
&&
1040 e
!=U_ILLEGAL_CHAR_FOUND
&&
1041 e
!=U_TRUNCATED_CHAR_FOUND
)
1044 * the callback did not or cannot resolve the error:
1045 * set output pointers and return
1047 * the check for buffer overflow is redundant but it is
1048 * a high-runner case and hopefully documents the intent
1051 * if we were replaying, then the replay buffer must be
1052 * copied back into the UConverter
1053 * and the real arguments must be restored
1055 if(realSource
!=NULL
) {
1058 U_ASSERT(cnv
->preFromULength
==0);
1060 length
=(int32_t)(pArgs
->sourceLimit
-pArgs
->source
);
1062 uprv_memcpy(cnv
->preFromU
, pArgs
->source
, length
*U_SIZEOF_UCHAR
);
1063 cnv
->preFromULength
=(int8_t)-length
;
1066 pArgs
->source
=realSource
;
1067 pArgs
->sourceLimit
=realSourceLimit
;
1068 pArgs
->flush
=realFlush
;
1075 /* callback handling */
1079 /* get and write the code point */
1080 codePoint
=cnv
->fromUChar32
;
1082 U16_APPEND_UNSAFE(cnv
->invalidUCharBuffer
, errorInputLength
, codePoint
);
1083 cnv
->invalidUCharLength
=(int8_t)errorInputLength
;
1085 /* set the converter state to deal with the next character */
1088 /* call the callback function */
1089 cnv
->fromUCharErrorBehaviour(cnv
->fromUContext
, pArgs
,
1090 cnv
->invalidUCharBuffer
, errorInputLength
, codePoint
,
1091 *err
==U_INVALID_CHAR_FOUND
? UCNV_UNASSIGNED
: UCNV_ILLEGAL
,
1096 * loop back to the offset handling
1098 * this flag will indicate after offset handling
1099 * that a callback was called;
1100 * if the callback did not resolve the error, then we return
1102 calledCallback
=TRUE
;
1108 * Output the fromUnicode overflow buffer.
1109 * Call this function if(cnv->charErrorBufferLength>0).
1110 * @return TRUE if overflow
1113 ucnv_outputOverflowFromUnicode(UConverter
*cnv
,
1114 char **target
, const char *targetLimit
,
1122 if(pOffsets
!=NULL
) {
1128 overflow
=(char *)cnv
->charErrorBuffer
;
1129 length
=cnv
->charErrorBufferLength
;
1132 if(t
==targetLimit
) {
1133 /* the overflow buffer contains too much, keep the rest */
1137 overflow
[j
++]=overflow
[i
++];
1140 cnv
->charErrorBufferLength
=(int8_t)j
;
1145 *err
=U_BUFFER_OVERFLOW_ERROR
;
1149 /* copy the overflow contents to the target */
1152 *offsets
++=-1; /* no source index available for old output */
1156 /* the overflow buffer is completely copied to the target */
1157 cnv
->charErrorBufferLength
=0;
1165 U_CAPI
void U_EXPORT2
1166 ucnv_fromUnicode(UConverter
*cnv
,
1167 char **target
, const char *targetLimit
,
1168 const UChar
**source
, const UChar
*sourceLimit
,
1172 UConverterFromUnicodeArgs args
;
1176 /* check parameters */
1177 if(err
==NULL
|| U_FAILURE(*err
)) {
1181 if(cnv
==NULL
|| target
==NULL
|| source
==NULL
) {
1182 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1189 if ((const void *)U_MAX_PTR(sourceLimit
) == (const void *)sourceLimit
) {
1191 Prevent code from going into an infinite loop in case we do hit this
1192 limit. The limit pointer is expected to be on a UChar * boundary.
1193 This also prevents the next argument check from failing.
1195 sourceLimit
= (const UChar
*)(((const char *)sourceLimit
) - 1);
1199 * All these conditions should never happen.
1201 * 1) Make sure that the limits are >= to the address source or target
1203 * 2) Make sure that the buffer sizes do not exceed the number range for
1204 * int32_t because some functions use the size (in units or bytes)
1205 * rather than comparing pointers, and because offsets are int32_t values.
1207 * size_t is guaranteed to be unsigned and large enough for the job.
1209 * Return with an error instead of adjusting the limits because we would
1210 * not be able to maintain the semantics that either the source must be
1211 * consumed or the target filled (unless an error occurs).
1212 * An adjustment would be targetLimit=t+0x7fffffff; for example.
1214 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1215 * to a char * pointer and provide an incomplete UChar code unit.
1217 if (sourceLimit
<s
|| targetLimit
<t
||
1218 ((size_t)(sourceLimit
-s
)>(size_t)0x3fffffff && sourceLimit
>s
) ||
1219 ((size_t)(targetLimit
-t
)>(size_t)0x7fffffff && targetLimit
>t
) ||
1220 (((const char *)sourceLimit
-(const char *)s
) & 1) != 0)
1222 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1226 /* output the target overflow buffer */
1227 if( cnv
->charErrorBufferLength
>0 &&
1228 ucnv_outputOverflowFromUnicode(cnv
, target
, targetLimit
, &offsets
, err
)
1230 /* U_BUFFER_OVERFLOW_ERROR */
1233 /* *target may have moved, therefore stop using t */
1235 if(!flush
&& s
==sourceLimit
&& cnv
->preFromULength
>=0) {
1236 /* the overflow buffer is emptied and there is no new input: we are done */
1241 * Do not simply return with a buffer overflow error if
1242 * !flush && t==targetLimit
1243 * because it is possible that the source will not generate any output.
1244 * For example, the skip callback may be called;
1245 * it does not output anything.
1248 /* prepare the converter arguments */
1251 args
.offsets
=offsets
;
1253 args
.sourceLimit
=sourceLimit
;
1254 args
.target
=*target
;
1255 args
.targetLimit
=targetLimit
;
1256 args
.size
=sizeof(args
);
1258 _fromUnicodeWithCallback(&args
, err
);
1260 *source
=args
.source
;
1261 *target
=args
.target
;
1264 /* ucnv_toUnicode() --------------------------------------------------------- */
1267 _toUnicodeWithCallback(UConverterToUnicodeArgs
*pArgs
, UErrorCode
*err
) {
1268 UConverterToUnicode toUnicode
;
1273 int32_t sourceIndex
;
1274 int32_t errorInputLength
;
1275 UBool converterSawEndOfInput
, calledCallback
;
1277 /* variables for m:n conversion */
1278 char replay
[UCNV_EXT_MAX_BYTES
];
1279 const char *realSource
, *realSourceLimit
;
1280 int32_t realSourceIndex
;
1283 cnv
=pArgs
->converter
;
1286 offsets
=pArgs
->offsets
;
1288 /* get the converter implementation function */
1291 toUnicode
=cnv
->sharedData
->impl
->toUnicode
;
1293 toUnicode
=cnv
->sharedData
->impl
->toUnicodeWithOffsets
;
1294 if(toUnicode
==NULL
) {
1295 /* there is no WithOffsets implementation */
1296 toUnicode
=cnv
->sharedData
->impl
->toUnicode
;
1297 /* we will write -1 for each offset */
1302 if(cnv
->preToULength
>=0) {
1306 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1307 realSourceLimit
=NULL
;
1312 * Previous m:n conversion stored source units from a partial match
1313 * and failed to consume all of them.
1314 * We need to "replay" them from a temporary buffer and convert them first.
1316 realSource
=pArgs
->source
;
1317 realSourceLimit
=pArgs
->sourceLimit
;
1318 realFlush
=pArgs
->flush
;
1319 realSourceIndex
=sourceIndex
;
1321 uprv_memcpy(replay
, cnv
->preToU
, -cnv
->preToULength
);
1322 pArgs
->source
=replay
;
1323 pArgs
->sourceLimit
=replay
-cnv
->preToULength
;
1327 cnv
->preToULength
=0;
1331 * loop for conversion and error handling
1337 * handle end of input
1338 * handle errors/call callback
1343 if(U_SUCCESS(*err
)) {
1345 toUnicode(pArgs
, err
);
1348 * set a flag for whether the converter
1349 * successfully processed the end of the input
1351 * need not check cnv->preToULength==0 because a replay (<0) will cause
1352 * s<sourceLimit before converterSawEndOfInput is checked
1354 converterSawEndOfInput
=
1355 (UBool
)(U_SUCCESS(*err
) &&
1356 pArgs
->flush
&& pArgs
->source
==pArgs
->sourceLimit
&&
1359 /* handle error from getNextUChar() or ucnv_convertEx() */
1360 converterSawEndOfInput
=FALSE
;
1363 /* no callback called yet for this iteration */
1364 calledCallback
=FALSE
;
1366 /* no sourceIndex adjustment for conversion, only for callback output */
1370 * loop for offsets and error handling
1372 * iterates at most 3 times:
1373 * 1. to clean up after the conversion function
1374 * 2. after the callback
1375 * 3. after the callback again if there was truncated input
1378 /* update offsets if we write any */
1380 int32_t length
=(int32_t)(pArgs
->target
-t
);
1382 _updateOffsets(offsets
, length
, sourceIndex
, errorInputLength
);
1385 * if a converter handles offsets and updates the offsets
1386 * pointer at the end, then pArgs->offset should not change
1388 * however, some converters do not handle offsets at all
1389 * (sourceIndex<0) or may not update the offsets pointer
1391 pArgs
->offsets
=offsets
+=length
;
1394 if(sourceIndex
>=0) {
1395 sourceIndex
+=(int32_t)(pArgs
->source
-s
);
1399 if(cnv
->preToULength
<0) {
1401 * switch the source to new replay units (cannot occur while replaying)
1402 * after offset handling and before end-of-input and callback handling
1404 if(realSource
==NULL
) {
1405 realSource
=pArgs
->source
;
1406 realSourceLimit
=pArgs
->sourceLimit
;
1407 realFlush
=pArgs
->flush
;
1408 realSourceIndex
=sourceIndex
;
1410 uprv_memcpy(replay
, cnv
->preToU
, -cnv
->preToULength
);
1411 pArgs
->source
=replay
;
1412 pArgs
->sourceLimit
=replay
-cnv
->preToULength
;
1414 if((sourceIndex
+=cnv
->preToULength
)<0) {
1418 cnv
->preToULength
=0;
1420 /* see implementation note before _fromUnicodeWithCallback() */
1421 U_ASSERT(realSource
==NULL
);
1422 *err
=U_INTERNAL_PROGRAM_ERROR
;
1426 /* update pointers */
1430 if(U_SUCCESS(*err
)) {
1431 if(s
<pArgs
->sourceLimit
) {
1433 * continue with the conversion loop while there is still input left
1434 * (continue converting by breaking out of only the inner loop)
1437 } else if(realSource
!=NULL
) {
1438 /* switch back from replaying to the real source and continue */
1439 pArgs
->source
=realSource
;
1440 pArgs
->sourceLimit
=realSourceLimit
;
1441 pArgs
->flush
=realFlush
;
1442 sourceIndex
=realSourceIndex
;
1446 } else if(pArgs
->flush
&& cnv
->toULength
>0) {
1448 * the entire input stream is consumed
1449 * and there is a partial, truncated input sequence left
1452 /* inject an error and continue with callback handling */
1453 *err
=U_TRUNCATED_CHAR_FOUND
;
1454 calledCallback
=FALSE
; /* new error condition */
1456 /* input consumed */
1459 * return to the conversion loop once more if the flush
1460 * flag is set and the conversion function has not
1461 * successfully processed the end of the input yet
1463 * (continue converting by breaking out of only the inner loop)
1465 if(!converterSawEndOfInput
) {
1469 /* reset the converter without calling the callback function */
1470 _reset(cnv
, UCNV_RESET_TO_UNICODE
, FALSE
);
1473 /* done successfully */
1478 /* U_FAILURE(*err) */
1482 if( calledCallback
||
1483 (e
=*err
)==U_BUFFER_OVERFLOW_ERROR
||
1484 (e
!=U_INVALID_CHAR_FOUND
&&
1485 e
!=U_ILLEGAL_CHAR_FOUND
&&
1486 e
!=U_TRUNCATED_CHAR_FOUND
&&
1487 e
!=U_ILLEGAL_ESCAPE_SEQUENCE
&&
1488 e
!=U_UNSUPPORTED_ESCAPE_SEQUENCE
)
1491 * the callback did not or cannot resolve the error:
1492 * set output pointers and return
1494 * the check for buffer overflow is redundant but it is
1495 * a high-runner case and hopefully documents the intent
1498 * if we were replaying, then the replay buffer must be
1499 * copied back into the UConverter
1500 * and the real arguments must be restored
1502 if(realSource
!=NULL
) {
1505 U_ASSERT(cnv
->preToULength
==0);
1507 length
=(int32_t)(pArgs
->sourceLimit
-pArgs
->source
);
1509 uprv_memcpy(cnv
->preToU
, pArgs
->source
, length
);
1510 cnv
->preToULength
=(int8_t)-length
;
1513 pArgs
->source
=realSource
;
1514 pArgs
->sourceLimit
=realSourceLimit
;
1515 pArgs
->flush
=realFlush
;
1522 /* copy toUBytes[] to invalidCharBuffer[] */
1523 errorInputLength
=cnv
->invalidCharLength
=cnv
->toULength
;
1524 if(errorInputLength
>0) {
1525 uprv_memcpy(cnv
->invalidCharBuffer
, cnv
->toUBytes
, errorInputLength
);
1528 /* set the converter state to deal with the next character */
1531 /* call the callback function */
1532 if(cnv
->toUCallbackReason
==UCNV_ILLEGAL
&& *err
==U_INVALID_CHAR_FOUND
) {
1533 cnv
->toUCallbackReason
= UCNV_UNASSIGNED
;
1535 cnv
->fromCharErrorBehaviour(cnv
->toUContext
, pArgs
,
1536 cnv
->invalidCharBuffer
, errorInputLength
,
1537 cnv
->toUCallbackReason
,
1539 cnv
->toUCallbackReason
= UCNV_ILLEGAL
; /* reset to default value */
1542 * loop back to the offset handling
1544 * this flag will indicate after offset handling
1545 * that a callback was called;
1546 * if the callback did not resolve the error, then we return
1548 calledCallback
=TRUE
;
1554 * Output the toUnicode overflow buffer.
1555 * Call this function if(cnv->UCharErrorBufferLength>0).
1556 * @return TRUE if overflow
1559 ucnv_outputOverflowToUnicode(UConverter
*cnv
,
1560 UChar
**target
, const UChar
*targetLimit
,
1564 UChar
*overflow
, *t
;
1568 if(pOffsets
!=NULL
) {
1574 overflow
=cnv
->UCharErrorBuffer
;
1575 length
=cnv
->UCharErrorBufferLength
;
1578 if(t
==targetLimit
) {
1579 /* the overflow buffer contains too much, keep the rest */
1583 overflow
[j
++]=overflow
[i
++];
1586 cnv
->UCharErrorBufferLength
=(int8_t)j
;
1591 *err
=U_BUFFER_OVERFLOW_ERROR
;
1595 /* copy the overflow contents to the target */
1598 *offsets
++=-1; /* no source index available for old output */
1602 /* the overflow buffer is completely copied to the target */
1603 cnv
->UCharErrorBufferLength
=0;
1611 U_CAPI
void U_EXPORT2
1612 ucnv_toUnicode(UConverter
*cnv
,
1613 UChar
**target
, const UChar
*targetLimit
,
1614 const char **source
, const char *sourceLimit
,
1618 UConverterToUnicodeArgs args
;
1622 /* check parameters */
1623 if(err
==NULL
|| U_FAILURE(*err
)) {
1627 if(cnv
==NULL
|| target
==NULL
|| source
==NULL
) {
1628 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1635 if ((const void *)U_MAX_PTR(targetLimit
) == (const void *)targetLimit
) {
1637 Prevent code from going into an infinite loop in case we do hit this
1638 limit. The limit pointer is expected to be on a UChar * boundary.
1639 This also prevents the next argument check from failing.
1641 targetLimit
= (const UChar
*)(((const char *)targetLimit
) - 1);
1645 * All these conditions should never happen.
1647 * 1) Make sure that the limits are >= to the address source or target
1649 * 2) Make sure that the buffer sizes do not exceed the number range for
1650 * int32_t because some functions use the size (in units or bytes)
1651 * rather than comparing pointers, and because offsets are int32_t values.
1653 * size_t is guaranteed to be unsigned and large enough for the job.
1655 * Return with an error instead of adjusting the limits because we would
1656 * not be able to maintain the semantics that either the source must be
1657 * consumed or the target filled (unless an error occurs).
1658 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1660 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1661 * to a char * pointer and provide an incomplete UChar code unit.
1663 if (sourceLimit
<s
|| targetLimit
<t
||
1664 ((size_t)(sourceLimit
-s
)>(size_t)0x7fffffff && sourceLimit
>s
) ||
1665 ((size_t)(targetLimit
-t
)>(size_t)0x3fffffff && targetLimit
>t
) ||
1666 (((const char *)targetLimit
-(const char *)t
) & 1) != 0
1668 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1672 /* output the target overflow buffer */
1673 if( cnv
->UCharErrorBufferLength
>0 &&
1674 ucnv_outputOverflowToUnicode(cnv
, target
, targetLimit
, &offsets
, err
)
1676 /* U_BUFFER_OVERFLOW_ERROR */
1679 /* *target may have moved, therefore stop using t */
1681 if(!flush
&& s
==sourceLimit
&& cnv
->preToULength
>=0) {
1682 /* the overflow buffer is emptied and there is no new input: we are done */
1687 * Do not simply return with a buffer overflow error if
1688 * !flush && t==targetLimit
1689 * because it is possible that the source will not generate any output.
1690 * For example, the skip callback may be called;
1691 * it does not output anything.
1694 /* prepare the converter arguments */
1697 args
.offsets
=offsets
;
1699 args
.sourceLimit
=sourceLimit
;
1700 args
.target
=*target
;
1701 args
.targetLimit
=targetLimit
;
1702 args
.size
=sizeof(args
);
1704 _toUnicodeWithCallback(&args
, err
);
1706 *source
=args
.source
;
1707 *target
=args
.target
;
1710 /* ucnv_to/fromUChars() ----------------------------------------------------- */
1712 U_CAPI
int32_t U_EXPORT2
1713 ucnv_fromUChars(UConverter
*cnv
,
1714 char *dest
, int32_t destCapacity
,
1715 const UChar
*src
, int32_t srcLength
,
1716 UErrorCode
*pErrorCode
) {
1717 const UChar
*srcLimit
;
1718 char *originalDest
, *destLimit
;
1721 /* check arguments */
1722 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1727 destCapacity
<0 || (destCapacity
>0 && dest
==NULL
) ||
1728 srcLength
<-1 || (srcLength
!=0 && src
==NULL
)
1730 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1735 ucnv_resetFromUnicode(cnv
);
1738 srcLength
=u_strlen(src
);
1741 srcLimit
=src
+srcLength
;
1742 destLimit
=dest
+destCapacity
;
1744 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1745 if(destLimit
<dest
|| (destLimit
==NULL
&& dest
!=NULL
)) {
1746 destLimit
=(char *)U_MAX_PTR(dest
);
1749 /* perform the conversion */
1750 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1751 destLength
=(int32_t)(dest
-originalDest
);
1753 /* if an overflow occurs, then get the preflighting length */
1754 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
1757 destLimit
=buffer
+sizeof(buffer
);
1760 *pErrorCode
=U_ZERO_ERROR
;
1761 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1762 destLength
+=(int32_t)(dest
-buffer
);
1763 } while(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
);
1769 return u_terminateChars(originalDest
, destCapacity
, destLength
, pErrorCode
);
1772 U_CAPI
int32_t U_EXPORT2
1773 ucnv_toUChars(UConverter
*cnv
,
1774 UChar
*dest
, int32_t destCapacity
,
1775 const char *src
, int32_t srcLength
,
1776 UErrorCode
*pErrorCode
) {
1777 const char *srcLimit
;
1778 UChar
*originalDest
, *destLimit
;
1781 /* check arguments */
1782 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1787 destCapacity
<0 || (destCapacity
>0 && dest
==NULL
) ||
1788 srcLength
<-1 || (srcLength
!=0 && src
==NULL
))
1790 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1795 ucnv_resetToUnicode(cnv
);
1798 srcLength
=(int32_t)uprv_strlen(src
);
1801 srcLimit
=src
+srcLength
;
1802 destLimit
=dest
+destCapacity
;
1804 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1805 if(destLimit
<dest
|| (destLimit
==NULL
&& dest
!=NULL
)) {
1806 destLimit
=(UChar
*)U_MAX_PTR(dest
);
1809 /* perform the conversion */
1810 ucnv_toUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1811 destLength
=(int32_t)(dest
-originalDest
);
1813 /* if an overflow occurs, then get the preflighting length */
1814 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
)
1818 destLimit
=buffer
+sizeof(buffer
)/U_SIZEOF_UCHAR
;
1821 *pErrorCode
=U_ZERO_ERROR
;
1822 ucnv_toUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1823 destLength
+=(int32_t)(dest
-buffer
);
1825 while(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
);
1831 return u_terminateUChars(originalDest
, destCapacity
, destLength
, pErrorCode
);
1834 /* ucnv_getNextUChar() ------------------------------------------------------ */
1836 U_CAPI UChar32 U_EXPORT2
1837 ucnv_getNextUChar(UConverter
*cnv
,
1838 const char **source
, const char *sourceLimit
,
1840 UConverterToUnicodeArgs args
;
1841 UChar buffer
[U16_MAX_LENGTH
];
1846 /* check parameters */
1847 if(err
==NULL
|| U_FAILURE(*err
)) {
1851 if(cnv
==NULL
|| source
==NULL
) {
1852 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1858 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1863 * Make sure that the buffer sizes do not exceed the number range for
1864 * int32_t because some functions use the size (in units or bytes)
1865 * rather than comparing pointers, and because offsets are int32_t values.
1867 * size_t is guaranteed to be unsigned and large enough for the job.
1869 * Return with an error instead of adjusting the limits because we would
1870 * not be able to maintain the semantics that either the source must be
1871 * consumed or the target filled (unless an error occurs).
1872 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1874 if(((size_t)(sourceLimit
-s
)>(size_t)0x7fffffff && sourceLimit
>s
)) {
1875 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1881 /* flush the target overflow buffer */
1882 if(cnv
->UCharErrorBufferLength
>0) {
1885 overflow
=cnv
->UCharErrorBuffer
;
1887 length
=cnv
->UCharErrorBufferLength
;
1888 U16_NEXT(overflow
, i
, length
, c
);
1890 /* move the remaining overflow contents up to the beginning */
1891 if((cnv
->UCharErrorBufferLength
=(int8_t)(length
-i
))>0) {
1892 uprv_memmove(cnv
->UCharErrorBuffer
, cnv
->UCharErrorBuffer
+i
,
1893 cnv
->UCharErrorBufferLength
*U_SIZEOF_UCHAR
);
1896 if(!U16_IS_LEAD(c
) || i
<length
) {
1900 * Continue if the overflow buffer contained only a lead surrogate,
1901 * in case the converter outputs single surrogates from complete
1907 * flush==TRUE is implied for ucnv_getNextUChar()
1909 * do not simply return even if s==sourceLimit because the converter may
1910 * not have seen flush==TRUE before
1913 /* prepare the converter arguments */
1918 args
.sourceLimit
=sourceLimit
;
1920 args
.targetLimit
=buffer
+1;
1921 args
.size
=sizeof(args
);
1925 * call the native getNextUChar() implementation if we are
1926 * at a character boundary (toULength==0)
1928 * unlike with _toUnicode(), getNextUChar() implementations must set
1929 * U_TRUNCATED_CHAR_FOUND for truncated input,
1930 * in addition to setting toULength/toUBytes[]
1932 if(cnv
->toULength
==0 && cnv
->sharedData
->impl
->getNextUChar
!=NULL
) {
1933 c
=cnv
->sharedData
->impl
->getNextUChar(&args
, err
);
1934 *source
=s
=args
.source
;
1935 if(*err
==U_INDEX_OUTOFBOUNDS_ERROR
) {
1936 /* reset the converter without calling the callback function */
1937 _reset(cnv
, UCNV_RESET_TO_UNICODE
, FALSE
);
1938 return 0xffff; /* no output */
1939 } else if(U_SUCCESS(*err
) && c
>=0) {
1942 * else fall through to use _toUnicode() because
1943 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1944 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1949 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1950 _toUnicodeWithCallback(&args
, err
);
1952 if(*err
==U_BUFFER_OVERFLOW_ERROR
) {
1957 length
=(int32_t)(args
.target
-buffer
);
1959 /* write the lead surrogate from the overflow buffer */
1961 args
.target
=buffer
+1;
1966 /* buffer contents starts at i and ends before length */
1968 if(U_FAILURE(*err
)) {
1969 c
=0xffff; /* no output */
1970 } else if(length
==0) {
1971 /* no input or only state changes */
1972 *err
=U_INDEX_OUTOFBOUNDS_ERROR
;
1973 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1974 c
=0xffff; /* no output */
1978 if(!U16_IS_LEAD(c
)) {
1979 /* consume c=buffer[0], done */
1981 /* got a lead surrogate, see if a trail surrogate follows */
1984 if(cnv
->UCharErrorBufferLength
>0) {
1985 /* got overflow output from the conversion */
1986 if(U16_IS_TRAIL(c2
=cnv
->UCharErrorBuffer
[0])) {
1987 /* got a trail surrogate, too */
1988 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
1990 /* move the remaining overflow contents up to the beginning */
1991 if((--cnv
->UCharErrorBufferLength
)>0) {
1992 uprv_memmove(cnv
->UCharErrorBuffer
, cnv
->UCharErrorBuffer
+1,
1993 cnv
->UCharErrorBufferLength
*U_SIZEOF_UCHAR
);
1996 /* c is an unpaired lead surrogate, just return it */
1998 } else if(args
.source
<sourceLimit
) {
1999 /* convert once more, to buffer[1] */
2000 args
.targetLimit
=buffer
+2;
2001 _toUnicodeWithCallback(&args
, err
);
2002 if(*err
==U_BUFFER_OVERFLOW_ERROR
) {
2006 length
=(int32_t)(args
.target
-buffer
);
2007 if(U_SUCCESS(*err
) && length
==2 && U16_IS_TRAIL(c2
=buffer
[1])) {
2008 /* got a trail surrogate, too */
2009 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
2017 * move leftover output from buffer[i..length[
2018 * into the beginning of the overflow buffer
2021 /* move further overflow back */
2022 int32_t delta
=length
-i
;
2023 if((length
=cnv
->UCharErrorBufferLength
)>0) {
2024 uprv_memmove(cnv
->UCharErrorBuffer
+delta
, cnv
->UCharErrorBuffer
,
2025 length
*U_SIZEOF_UCHAR
);
2027 cnv
->UCharErrorBufferLength
=(int8_t)(length
+delta
);
2029 cnv
->UCharErrorBuffer
[0]=buffer
[i
++];
2031 cnv
->UCharErrorBuffer
[1]=buffer
[i
];
2035 *source
=args
.source
;
2039 /* ucnv_convert() and siblings ---------------------------------------------- */
2041 U_CAPI
void U_EXPORT2
2042 ucnv_convertEx(UConverter
*targetCnv
, UConverter
*sourceCnv
,
2043 char **target
, const char *targetLimit
,
2044 const char **source
, const char *sourceLimit
,
2045 UChar
*pivotStart
, UChar
**pivotSource
,
2046 UChar
**pivotTarget
, const UChar
*pivotLimit
,
2047 UBool reset
, UBool flush
,
2048 UErrorCode
*pErrorCode
) {
2049 UChar pivotBuffer
[CHUNK_SIZE
];
2050 const UChar
*myPivotSource
;
2051 UChar
*myPivotTarget
;
2055 UConverterToUnicodeArgs toUArgs
;
2056 UConverterFromUnicodeArgs fromUArgs
;
2057 UConverterConvert convert
;
2059 /* error checking */
2060 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2064 if( targetCnv
==NULL
|| sourceCnv
==NULL
||
2065 source
==NULL
|| *source
==NULL
||
2066 target
==NULL
|| *target
==NULL
|| targetLimit
==NULL
2068 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2074 if((sourceLimit
!=NULL
&& sourceLimit
<s
) || targetLimit
<t
) {
2075 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2080 * Make sure that the buffer sizes do not exceed the number range for
2081 * int32_t. See ucnv_toUnicode() for a more detailed comment.
2084 (sourceLimit
!=NULL
&& ((size_t)(sourceLimit
-s
)>(size_t)0x7fffffff && sourceLimit
>s
)) ||
2085 ((size_t)(targetLimit
-t
)>(size_t)0x7fffffff && targetLimit
>t
)
2087 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2091 if(pivotStart
==NULL
) {
2093 /* streaming conversion requires an explicit pivot buffer */
2094 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2098 /* use the stack pivot buffer */
2099 myPivotSource
=myPivotTarget
=pivotStart
=pivotBuffer
;
2100 pivotSource
=(UChar
**)&myPivotSource
;
2101 pivotTarget
=&myPivotTarget
;
2102 pivotLimit
=pivotBuffer
+CHUNK_SIZE
;
2103 } else if( pivotStart
>=pivotLimit
||
2104 pivotSource
==NULL
|| *pivotSource
==NULL
||
2105 pivotTarget
==NULL
|| *pivotTarget
==NULL
||
2108 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2112 if(sourceLimit
==NULL
) {
2113 /* get limit of single-byte-NUL-terminated source string */
2114 sourceLimit
=uprv_strchr(*source
, 0);
2118 ucnv_resetToUnicode(sourceCnv
);
2119 ucnv_resetFromUnicode(targetCnv
);
2120 *pivotSource
=*pivotTarget
=pivotStart
;
2121 } else if(targetCnv
->charErrorBufferLength
>0) {
2122 /* output the targetCnv overflow buffer */
2123 if(ucnv_outputOverflowFromUnicode(targetCnv
, target
, targetLimit
, NULL
, pErrorCode
)) {
2124 /* U_BUFFER_OVERFLOW_ERROR */
2127 /* *target has moved, therefore stop using t */
2130 targetCnv
->preFromULength
>=0 && *pivotSource
==*pivotTarget
&&
2131 sourceCnv
->UCharErrorBufferLength
==0 && sourceCnv
->preToULength
>=0 && s
==sourceLimit
2133 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2138 /* Is direct-UTF-8 conversion available? */
2139 if( sourceCnv
->sharedData
->staticData
->conversionType
==UCNV_UTF8
&&
2140 targetCnv
->sharedData
->impl
->fromUTF8
!=NULL
2142 convert
=targetCnv
->sharedData
->impl
->fromUTF8
;
2143 } else if( targetCnv
->sharedData
->staticData
->conversionType
==UCNV_UTF8
&&
2144 sourceCnv
->sharedData
->impl
->toUTF8
!=NULL
2146 convert
=sourceCnv
->sharedData
->impl
->toUTF8
;
2152 * If direct-UTF-8 conversion is available, then we use a smaller
2153 * pivot buffer for error handling and partial matches
2154 * so that we quickly return to direct conversion.
2156 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2158 * We could reduce the pivot buffer size further, at the cost of
2159 * buffer overflows from callbacks.
2160 * The pivot buffer should not be smaller than the maximum number of
2161 * fromUnicode extension table input UChars
2162 * (for m:n conversion, see
2163 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2164 * or 2 for surrogate pairs.
2166 * Too small a buffer can cause thrashing between pivoting and direct
2167 * conversion, with function call overhead outweighing the benefits
2168 * of direct conversion.
2170 if(convert
!=NULL
&& (pivotLimit
-pivotStart
)>32) {
2171 pivotLimit
=pivotStart
+32;
2174 /* prepare the converter arguments */
2175 fromUArgs
.converter
=targetCnv
;
2176 fromUArgs
.flush
=FALSE
;
2177 fromUArgs
.offsets
=NULL
;
2178 fromUArgs
.target
=*target
;
2179 fromUArgs
.targetLimit
=targetLimit
;
2180 fromUArgs
.size
=sizeof(fromUArgs
);
2182 toUArgs
.converter
=sourceCnv
;
2183 toUArgs
.flush
=flush
;
2184 toUArgs
.offsets
=NULL
;
2186 toUArgs
.sourceLimit
=sourceLimit
;
2187 toUArgs
.targetLimit
=pivotLimit
;
2188 toUArgs
.size
=sizeof(toUArgs
);
2191 * TODO: Consider separating this function into two functions,
2192 * extracting exactly the conversion loop,
2193 * for readability and to reduce the set of visible variables.
2195 * Otherwise stop using s and t from here on.
2202 * The sequence of steps in the loop may appear backward,
2203 * but the principle is simple:
2205 * source - sourceCnv overflow - pivot - targetCnv overflow - target
2206 * empty out later buffers before refilling them from earlier ones.
2208 * The targetCnv overflow buffer is flushed out only once before the loop.
2212 * if(pivot not empty or error or replay or flush fromUnicode) {
2213 * fromUnicode(pivot -> target);
2216 * For pivoting conversion; and for direct conversion for
2217 * error callback handling and flushing the replay buffer.
2219 if( *pivotSource
<*pivotTarget
||
2220 U_FAILURE(*pErrorCode
) ||
2221 targetCnv
->preFromULength
<0 ||
2224 fromUArgs
.source
=*pivotSource
;
2225 fromUArgs
.sourceLimit
=*pivotTarget
;
2226 _fromUnicodeWithCallback(&fromUArgs
, pErrorCode
);
2227 if(U_FAILURE(*pErrorCode
)) {
2228 /* target overflow, or conversion error */
2229 *pivotSource
=(UChar
*)fromUArgs
.source
;
2234 * _fromUnicodeWithCallback() must have consumed the pivot contents
2235 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
2239 /* The pivot buffer is empty; reset it so we start at pivotStart. */
2240 *pivotSource
=*pivotTarget
=pivotStart
;
2243 * if(sourceCnv overflow buffer not empty) {
2244 * move(sourceCnv overflow buffer -> pivot);
2248 /* output the sourceCnv overflow buffer */
2249 if(sourceCnv
->UCharErrorBufferLength
>0) {
2250 if(ucnv_outputOverflowToUnicode(sourceCnv
, pivotTarget
, pivotLimit
, NULL
, pErrorCode
)) {
2251 /* U_BUFFER_OVERFLOW_ERROR */
2252 *pErrorCode
=U_ZERO_ERROR
;
2258 * check for end of input and break if done
2260 * Checking both flush and fromUArgs.flush ensures that the converters
2261 * have been called with the flush flag set if the ucnv_convertEx()
2264 if( toUArgs
.source
==sourceLimit
&&
2265 sourceCnv
->preToULength
>=0 && sourceCnv
->toULength
==0 &&
2266 (!flush
|| fromUArgs
.flush
)
2268 /* done successfully */
2273 * use direct conversion if available
2274 * but not if continuing a partial match
2275 * or flushing the toUnicode replay buffer
2277 if(convert
!=NULL
&& targetCnv
->preFromUFirstCP
<0 && sourceCnv
->preToULength
==0) {
2278 if(*pErrorCode
==U_USING_DEFAULT_WARNING
) {
2279 /* remove a warning that may be set by this function */
2280 *pErrorCode
=U_ZERO_ERROR
;
2282 convert(&fromUArgs
, &toUArgs
, pErrorCode
);
2283 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2285 } else if(U_FAILURE(*pErrorCode
)) {
2286 if(sourceCnv
->toULength
>0) {
2288 * Fall through to calling _toUnicodeWithCallback()
2289 * for callback handling.
2291 * The pivot buffer will be reset with
2292 * *pivotSource=*pivotTarget=pivotStart;
2293 * which indicates a toUnicode error to the caller
2294 * (*pivotSource==pivotStart shows no pivot UChars consumed).
2298 * Indicate a fromUnicode error to the caller
2299 * (*pivotSource>pivotStart shows some pivot UChars consumed).
2301 *pivotSource
=*pivotTarget
=pivotStart
+1;
2303 * Loop around to calling _fromUnicodeWithCallbacks()
2304 * for callback handling.
2308 } else if(*pErrorCode
==U_USING_DEFAULT_WARNING
) {
2310 * No error, but the implementation requested to temporarily
2311 * fall back to pivoting.
2313 *pErrorCode
=U_ZERO_ERROR
;
2315 * The following else branches are almost identical to the end-of-input
2316 * handling in _toUnicodeWithCallback().
2317 * Avoid calling it just for the end of input.
2319 } else if(flush
&& sourceCnv
->toULength
>0) { /* flush==toUArgs.flush */
2321 * the entire input stream is consumed
2322 * and there is a partial, truncated input sequence left
2325 /* inject an error and continue with callback handling */
2326 *pErrorCode
=U_TRUNCATED_CHAR_FOUND
;
2328 /* input consumed */
2330 /* reset the converters without calling the callback functions */
2331 _reset(sourceCnv
, UCNV_RESET_TO_UNICODE
, FALSE
);
2332 _reset(targetCnv
, UCNV_RESET_FROM_UNICODE
, FALSE
);
2335 /* done successfully */
2341 * toUnicode(source -> pivot);
2343 * For pivoting conversion; and for direct conversion for
2344 * error callback handling, continuing partial matches
2345 * and flushing the replay buffer.
2347 * The pivot buffer is empty and reset.
2349 toUArgs
.target
=pivotStart
; /* ==*pivotTarget */
2350 /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2351 _toUnicodeWithCallback(&toUArgs
, pErrorCode
);
2352 *pivotTarget
=toUArgs
.target
;
2353 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2354 /* pivot overflow: continue with the conversion loop */
2355 *pErrorCode
=U_ZERO_ERROR
;
2356 } else if(U_FAILURE(*pErrorCode
) || (!flush
&& *pivotTarget
==pivotStart
)) {
2357 /* conversion error, or there was nothing left to convert */
2362 * _toUnicodeWithCallback() wrote into the pivot buffer,
2363 * continue with fromUnicode conversion.
2365 * Set the fromUnicode flush flag if we flush and if toUnicode has
2366 * processed the end of the input.
2368 if( flush
&& toUArgs
.source
==sourceLimit
&&
2369 sourceCnv
->preToULength
>=0 &&
2370 sourceCnv
->UCharErrorBufferLength
==0
2372 fromUArgs
.flush
=TRUE
;
2377 * The conversion loop is exited when one of the following is true:
2378 * - the entire source text has been converted successfully to the target buffer
2379 * - a target buffer overflow occurred
2380 * - a conversion error occurred
2383 *source
=toUArgs
.source
;
2384 *target
=fromUArgs
.target
;
2386 /* terminate the target buffer if possible */
2387 if(flush
&& U_SUCCESS(*pErrorCode
)) {
2388 if(*target
!=targetLimit
) {
2390 if(*pErrorCode
==U_STRING_NOT_TERMINATED_WARNING
) {
2391 *pErrorCode
=U_ZERO_ERROR
;
2394 *pErrorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2399 /* internal implementation of ucnv_convert() etc. with preflighting */
2401 ucnv_internalConvert(UConverter
*outConverter
, UConverter
*inConverter
,
2402 char *target
, int32_t targetCapacity
,
2403 const char *source
, int32_t sourceLength
,
2404 UErrorCode
*pErrorCode
) {
2405 UChar pivotBuffer
[CHUNK_SIZE
];
2406 UChar
*pivot
, *pivot2
;
2409 const char *sourceLimit
;
2410 const char *targetLimit
;
2411 int32_t targetLength
=0;
2414 if(sourceLength
<0) {
2415 sourceLimit
=uprv_strchr(source
, 0);
2417 sourceLimit
=source
+sourceLength
;
2420 /* if there is no input data, we're done */
2421 if(source
==sourceLimit
) {
2422 return u_terminateChars(target
, targetCapacity
, 0, pErrorCode
);
2425 pivot
=pivot2
=pivotBuffer
;
2429 if(targetCapacity
>0) {
2430 /* perform real conversion */
2431 targetLimit
=target
+targetCapacity
;
2432 ucnv_convertEx(outConverter
, inConverter
,
2433 &myTarget
, targetLimit
,
2434 &source
, sourceLimit
,
2435 pivotBuffer
, &pivot
, &pivot2
, pivotBuffer
+CHUNK_SIZE
,
2439 targetLength
=(int32_t)(myTarget
-target
);
2443 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2444 * to it but continue the conversion in order to store in targetCapacity
2445 * the number of bytes that was required.
2447 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
|| targetCapacity
==0)
2449 char targetBuffer
[CHUNK_SIZE
];
2451 targetLimit
=targetBuffer
+CHUNK_SIZE
;
2453 *pErrorCode
=U_ZERO_ERROR
;
2454 myTarget
=targetBuffer
;
2455 ucnv_convertEx(outConverter
, inConverter
,
2456 &myTarget
, targetLimit
,
2457 &source
, sourceLimit
,
2458 pivotBuffer
, &pivot
, &pivot2
, pivotBuffer
+CHUNK_SIZE
,
2462 targetLength
+=(int32_t)(myTarget
-targetBuffer
);
2463 } while(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
);
2465 /* done with preflighting, set warnings and errors as appropriate */
2466 return u_terminateChars(target
, targetCapacity
, targetLength
, pErrorCode
);
2469 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2470 return targetLength
;
2473 U_CAPI
int32_t U_EXPORT2
2474 ucnv_convert(const char *toConverterName
, const char *fromConverterName
,
2475 char *target
, int32_t targetCapacity
,
2476 const char *source
, int32_t sourceLength
,
2477 UErrorCode
*pErrorCode
) {
2478 UConverter in
, out
; /* stack-allocated */
2479 UConverter
*inConverter
, *outConverter
;
2480 int32_t targetLength
;
2482 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2486 if( source
==NULL
|| sourceLength
<-1 ||
2487 targetCapacity
<0 || (targetCapacity
>0 && target
==NULL
)
2489 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2493 /* if there is no input data, we're done */
2494 if(sourceLength
==0 || (sourceLength
<0 && *source
==0)) {
2495 return u_terminateChars(target
, targetCapacity
, 0, pErrorCode
);
2498 /* create the converters */
2499 inConverter
=ucnv_createConverter(&in
, fromConverterName
, pErrorCode
);
2500 if(U_FAILURE(*pErrorCode
)) {
2504 outConverter
=ucnv_createConverter(&out
, toConverterName
, pErrorCode
);
2505 if(U_FAILURE(*pErrorCode
)) {
2506 ucnv_close(inConverter
);
2510 targetLength
=ucnv_internalConvert(outConverter
, inConverter
,
2511 target
, targetCapacity
,
2512 source
, sourceLength
,
2515 ucnv_close(inConverter
);
2516 ucnv_close(outConverter
);
2518 return targetLength
;
2523 ucnv_convertAlgorithmic(UBool convertToAlgorithmic
,
2524 UConverterType algorithmicType
,
2526 char *target
, int32_t targetCapacity
,
2527 const char *source
, int32_t sourceLength
,
2528 UErrorCode
*pErrorCode
) {
2529 UConverter algoConverterStatic
; /* stack-allocated */
2530 UConverter
*algoConverter
, *to
, *from
;
2531 int32_t targetLength
;
2533 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2537 if( cnv
==NULL
|| source
==NULL
|| sourceLength
<-1 ||
2538 targetCapacity
<0 || (targetCapacity
>0 && target
==NULL
)
2540 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2544 /* if there is no input data, we're done */
2545 if(sourceLength
==0 || (sourceLength
<0 && *source
==0)) {
2546 return u_terminateChars(target
, targetCapacity
, 0, pErrorCode
);
2549 /* create the algorithmic converter */
2550 algoConverter
=ucnv_createAlgorithmicConverter(&algoConverterStatic
, algorithmicType
,
2552 if(U_FAILURE(*pErrorCode
)) {
2556 /* reset the other converter */
2557 if(convertToAlgorithmic
) {
2558 /* cnv->Unicode->algo */
2559 ucnv_resetToUnicode(cnv
);
2563 /* algo->Unicode->cnv */
2564 ucnv_resetFromUnicode(cnv
);
2569 targetLength
=ucnv_internalConvert(to
, from
,
2570 target
, targetCapacity
,
2571 source
, sourceLength
,
2574 ucnv_close(algoConverter
);
2576 return targetLength
;
2579 U_CAPI
int32_t U_EXPORT2
2580 ucnv_toAlgorithmic(UConverterType algorithmicType
,
2582 char *target
, int32_t targetCapacity
,
2583 const char *source
, int32_t sourceLength
,
2584 UErrorCode
*pErrorCode
) {
2585 return ucnv_convertAlgorithmic(TRUE
, algorithmicType
, cnv
,
2586 target
, targetCapacity
,
2587 source
, sourceLength
,
2591 U_CAPI
int32_t U_EXPORT2
2592 ucnv_fromAlgorithmic(UConverter
*cnv
,
2593 UConverterType algorithmicType
,
2594 char *target
, int32_t targetCapacity
,
2595 const char *source
, int32_t sourceLength
,
2596 UErrorCode
*pErrorCode
) {
2597 return ucnv_convertAlgorithmic(FALSE
, algorithmicType
, cnv
,
2598 target
, targetCapacity
,
2599 source
, sourceLength
,
2603 U_CAPI UConverterType U_EXPORT2
2604 ucnv_getType(const UConverter
* converter
)
2606 int8_t type
= converter
->sharedData
->staticData
->conversionType
;
2607 #if !UCONFIG_NO_LEGACY_CONVERSION
2608 if(type
== UCNV_MBCS
) {
2609 return ucnv_MBCSGetType(converter
);
2612 return (UConverterType
)type
;
2615 U_CAPI
void U_EXPORT2
2616 ucnv_getStarters(const UConverter
* converter
,
2617 UBool starters
[256],
2620 if (err
== NULL
|| U_FAILURE(*err
)) {
2624 if(converter
->sharedData
->impl
->getStarters
!= NULL
) {
2625 converter
->sharedData
->impl
->getStarters(converter
, starters
, err
);
2627 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
2631 static const UAmbiguousConverter
*ucnv_getAmbiguous(const UConverter
*cnv
)
2633 UErrorCode errorCode
;
2641 errorCode
=U_ZERO_ERROR
;
2642 name
=ucnv_getName(cnv
, &errorCode
);
2643 if(U_FAILURE(errorCode
)) {
2647 for(i
=0; i
<(int32_t)(sizeof(ambiguousConverters
)/sizeof(UAmbiguousConverter
)); ++i
)
2649 if(0==uprv_strcmp(name
, ambiguousConverters
[i
].name
))
2651 return ambiguousConverters
+i
;
2658 U_CAPI
void U_EXPORT2
2659 ucnv_fixFileSeparator(const UConverter
*cnv
,
2661 int32_t sourceLength
) {
2662 const UAmbiguousConverter
*a
;
2666 if(cnv
==NULL
|| source
==NULL
|| sourceLength
<=0 || (a
=ucnv_getAmbiguous(cnv
))==NULL
)
2671 variant5c
=a
->variant5c
;
2672 for(i
=0; i
<sourceLength
; ++i
) {
2673 if(source
[i
]==variant5c
) {
2679 U_CAPI UBool U_EXPORT2
2680 ucnv_isAmbiguous(const UConverter
*cnv
) {
2681 return (UBool
)(ucnv_getAmbiguous(cnv
)!=NULL
);
2684 U_CAPI
void U_EXPORT2
2685 ucnv_setFallback(UConverter
*cnv
, UBool usesFallback
)
2687 cnv
->useFallback
= usesFallback
;
2690 U_CAPI UBool U_EXPORT2
2691 ucnv_usesFallback(const UConverter
*cnv
)
2693 return cnv
->useFallback
;
2696 U_CAPI
void U_EXPORT2
2697 ucnv_getInvalidChars (const UConverter
* converter
,
2702 if (err
== NULL
|| U_FAILURE(*err
))
2706 if (len
== NULL
|| errBytes
== NULL
|| converter
== NULL
)
2708 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
2711 if (*len
< converter
->invalidCharLength
)
2713 *err
= U_INDEX_OUTOFBOUNDS_ERROR
;
2716 if ((*len
= converter
->invalidCharLength
) > 0)
2718 uprv_memcpy (errBytes
, converter
->invalidCharBuffer
, *len
);
2722 U_CAPI
void U_EXPORT2
2723 ucnv_getInvalidUChars (const UConverter
* converter
,
2728 if (err
== NULL
|| U_FAILURE(*err
))
2732 if (len
== NULL
|| errChars
== NULL
|| converter
== NULL
)
2734 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
2737 if (*len
< converter
->invalidUCharLength
)
2739 *err
= U_INDEX_OUTOFBOUNDS_ERROR
;
2742 if ((*len
= converter
->invalidUCharLength
) > 0)
2744 uprv_memcpy (errChars
, converter
->invalidUCharBuffer
, sizeof(UChar
) * (*len
));
2748 #define SIG_MAX_LEN 5
2750 U_CAPI
const char* U_EXPORT2
2751 ucnv_detectUnicodeSignature( const char* source
,
2752 int32_t sourceLength
,
2753 int32_t* signatureLength
,
2754 UErrorCode
* pErrorCode
) {
2757 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2758 * bytes we don't misdetect something
2760 char start
[SIG_MAX_LEN
]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2763 if((pErrorCode
==NULL
) || U_FAILURE(*pErrorCode
)){
2767 if(source
== NULL
|| sourceLength
< -1){
2768 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
2772 if(signatureLength
== NULL
) {
2773 signatureLength
= &dummy
;
2776 if(sourceLength
==-1){
2777 sourceLength
=(int32_t)uprv_strlen(source
);
2781 while(i
<sourceLength
&& i
<SIG_MAX_LEN
){
2786 if(start
[0] == '\xFE' && start
[1] == '\xFF') {
2789 } else if(start
[0] == '\xFF' && start
[1] == '\xFE') {
2790 if(start
[2] == '\x00' && start
[3] =='\x00') {
2797 } else if(start
[0] == '\xEF' && start
[1] == '\xBB' && start
[2] == '\xBF') {
2800 } else if(start
[0] == '\x00' && start
[1] == '\x00' &&
2801 start
[2] == '\xFE' && start
[3]=='\xFF') {
2804 } else if(start
[0] == '\x0E' && start
[1] == '\xFE' && start
[2] == '\xFF') {
2807 } else if(start
[0] == '\xFB' && start
[1] == '\xEE' && start
[2] == '\x28') {
2810 } else if(start
[0] == '\x2B' && start
[1] == '\x2F' && start
[2] == '\x76') {
2812 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2813 * depending on the second UTF-16 code unit.
2814 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2817 * So far we have +/v
2819 if(start
[3] == '\x38' && start
[4] == '\x2D') {
2823 } else if(start
[3] == '\x38' || start
[3] == '\x39' || start
[3] == '\x2B' || start
[3] == '\x2F') {
2824 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2828 }else if(start
[0]=='\xDD' && start
[1]== '\x73'&& start
[2]=='\x66' && start
[3]=='\x73'){
2830 return "UTF-EBCDIC";
2834 /* no known Unicode signature byte sequence recognized */
2839 U_CAPI
int32_t U_EXPORT2
2840 ucnv_fromUCountPending(const UConverter
* cnv
, UErrorCode
* status
)
2842 if(status
== NULL
|| U_FAILURE(*status
)){
2846 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2850 if(cnv
->preFromULength
> 0){
2851 return U16_LENGTH(cnv
->preFromUFirstCP
)+cnv
->preFromULength
;
2852 }else if(cnv
->preFromULength
< 0){
2853 return -cnv
->preFromULength
;
2854 }else if(cnv
->fromUChar32
> 0){
2856 }else if(cnv
->preFromUFirstCP
>0){
2857 return U16_LENGTH(cnv
->preFromUFirstCP
);
2863 U_CAPI
int32_t U_EXPORT2
2864 ucnv_toUCountPending(const UConverter
* cnv
, UErrorCode
* status
){
2866 if(status
== NULL
|| U_FAILURE(*status
)){
2870 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2874 if(cnv
->preToULength
> 0){
2875 return cnv
->preToULength
;
2876 }else if(cnv
->preToULength
< 0){
2877 return -cnv
->preToULength
;
2878 }else if(cnv
->toULength
> 0){
2879 return cnv
->toULength
;
2886 * Hey, Emacs, please set the following:
2889 * indent-tabs-mode: nil