2 ******************************************************************************
4 * Copyright (C) 1998-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
10 * Implements APIs for the ICU's codeset conversion library;
11 * mostly calls through internal functions;
12 * created by Bertrand A. Damiba
14 * Modification History:
16 * Date Name Description
17 * 04/04/99 helena Fixed internal header inclusion.
18 * 05/09/00 helena Added implementation to handle fallback mappings.
19 * 06/20/2000 helena OS/400 port changes; mostly typecast.
22 #include "unicode/utypes.h"
24 #if !UCONFIG_NO_CONVERSION
26 #include "unicode/ustring.h"
27 #include "unicode/ucnv.h"
28 #include "unicode/ucnv_err.h"
29 #include "unicode/uset.h"
30 #include "unicode/utf.h"
31 #include "unicode/utf16.h"
42 /* size of intermediate and preflighting buffers in ucnv_convert() */
43 #define CHUNK_SIZE 1024
45 typedef struct UAmbiguousConverter
{
47 const UChar variant5c
;
48 } UAmbiguousConverter
;
50 static const UAmbiguousConverter ambiguousConverters
[]={
51 { "ibm-897_P100-1995", 0xa5 },
52 { "ibm-942_P120-1999", 0xa5 },
53 { "ibm-943_P130-1999", 0xa5 },
54 { "ibm-946_P100-1995", 0xa5 },
55 { "ibm-33722_P120-1999", 0xa5 },
56 { "ibm-1041_P100-1995", 0xa5 },
57 /*{ "ibm-54191_P100-2006", 0xa5 },*/
58 /*{ "ibm-62383_P100-2007", 0xa5 },*/
59 /*{ "ibm-891_P100-1995", 0x20a9 },*/
60 { "ibm-944_P100-1995", 0x20a9 },
61 { "ibm-949_P110-1999", 0x20a9 },
62 { "ibm-1363_P110-1997", 0x20a9 },
63 { "ISO_2022,locale=ko,version=0", 0x20a9 },
64 { "ibm-1088_P100-1995", 0x20a9 }
67 /*Calls through createConverter */
68 U_CAPI UConverter
* U_EXPORT2
69 ucnv_open (const char *name
,
74 if (err
== NULL
|| U_FAILURE (*err
)) {
78 r
= ucnv_createConverter(NULL
, name
, err
);
82 U_CAPI UConverter
* U_EXPORT2
83 ucnv_openPackage (const char *packageName
, const char *converterName
, UErrorCode
* err
)
85 return ucnv_createConverterFromPackage(packageName
, converterName
, err
);
88 /*Extracts the UChar* to a char* and calls through createConverter */
89 U_CAPI UConverter
* U_EXPORT2
90 ucnv_openU (const UChar
* name
,
93 char asciiName
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
95 if (err
== NULL
|| U_FAILURE(*err
))
98 return ucnv_open (NULL
, err
);
99 if (u_strlen(name
) >= UCNV_MAX_CONVERTER_NAME_LENGTH
)
101 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
104 return ucnv_open(u_austrcpy(asciiName
, name
), err
);
107 /* Copy the string that is represented by the UConverterPlatform enum
108 * @param platformString An output buffer
109 * @param platform An enum representing a platform
110 * @return the length of the copied string.
113 ucnv_copyPlatformString(char *platformString
, UConverterPlatform pltfrm
)
118 uprv_strcpy(platformString
, "ibm-");
124 /* default to empty string */
129 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
130 *through createConverter*/
131 U_CAPI UConverter
* U_EXPORT2
132 ucnv_openCCSID (int32_t codepage
,
133 UConverterPlatform platform
,
136 char myName
[UCNV_MAX_CONVERTER_NAME_LENGTH
];
139 if (err
== NULL
|| U_FAILURE (*err
))
142 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
143 myNameLen
= ucnv_copyPlatformString(myName
, platform
);
144 T_CString_integerToString(myName
+ myNameLen
, codepage
, 10);
146 return ucnv_createConverter(NULL
, myName
, err
);
149 /* Creating a temporary stack-based object that can be used in one thread,
150 and created from a converter that is shared across threads.
153 U_CAPI UConverter
* U_EXPORT2
154 ucnv_safeClone(const UConverter
* cnv
, void *stackBuffer
, int32_t *pBufferSize
, UErrorCode
*status
)
156 UConverter
*localConverter
, *allocatedConverter
;
157 int32_t stackBufferSize
;
158 int32_t bufferSizeNeeded
;
159 char *stackBufferChars
= (char *)stackBuffer
;
161 UConverterToUnicodeArgs toUArgs
= {
162 sizeof(UConverterToUnicodeArgs
),
171 UConverterFromUnicodeArgs fromUArgs
= {
172 sizeof(UConverterFromUnicodeArgs
),
182 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE
);
184 if (status
== NULL
|| U_FAILURE(*status
)){
185 UTRACE_EXIT_STATUS(status
? *status
: U_ILLEGAL_ARGUMENT_ERROR
);
190 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
191 UTRACE_EXIT_STATUS(*status
);
195 UTRACE_DATA3(UTRACE_OPEN_CLOSE
, "clone converter %s at %p into stackBuffer %p",
196 ucnv_getName(cnv
, status
), cnv
, stackBuffer
);
198 if (cnv
->sharedData
->impl
->safeClone
!= NULL
) {
199 /* call the custom safeClone function for sizing */
200 bufferSizeNeeded
= 0;
201 cnv
->sharedData
->impl
->safeClone(cnv
, NULL
, &bufferSizeNeeded
, status
);
202 if (U_FAILURE(*status
)) {
203 UTRACE_EXIT_STATUS(*status
);
209 /* inherent sizing */
210 bufferSizeNeeded
= sizeof(UConverter
);
213 if (pBufferSize
== NULL
) {
215 pBufferSize
= &stackBufferSize
;
217 stackBufferSize
= *pBufferSize
;
218 if (stackBufferSize
<= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
219 *pBufferSize
= bufferSizeNeeded
;
220 UTRACE_EXIT_VALUE(bufferSizeNeeded
);
226 /* Pointers on 64-bit platforms need to be aligned
227 * on a 64-bit boundary in memory.
229 if (U_ALIGNMENT_OFFSET(stackBuffer
) != 0) {
230 int32_t offsetUp
= (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars
);
231 if(stackBufferSize
> offsetUp
) {
232 stackBufferSize
-= offsetUp
;
233 stackBufferChars
+= offsetUp
;
235 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
240 stackBuffer
= (void *)stackBufferChars
;
242 /* Now, see if we must allocate any memory */
243 if (stackBufferSize
< bufferSizeNeeded
|| stackBuffer
== NULL
)
245 /* allocate one here...*/
246 localConverter
= allocatedConverter
= (UConverter
*) uprv_malloc (bufferSizeNeeded
);
248 if(localConverter
== NULL
) {
249 *status
= U_MEMORY_ALLOCATION_ERROR
;
250 UTRACE_EXIT_STATUS(*status
);
253 *status
= U_SAFECLONE_ALLOCATED_WARNING
;
255 /* record the fact that memory was allocated */
256 *pBufferSize
= bufferSizeNeeded
;
258 /* just use the stack buffer */
259 localConverter
= (UConverter
*) stackBuffer
;
260 allocatedConverter
= NULL
;
263 uprv_memset(localConverter
, 0, bufferSizeNeeded
);
265 /* Copy initial state */
266 uprv_memcpy(localConverter
, cnv
, sizeof(UConverter
));
267 localConverter
->isCopyLocal
= localConverter
->isExtraLocal
= FALSE
;
269 /* copy the substitution string */
270 if (cnv
->subChars
== (uint8_t *)cnv
->subUChars
) {
271 localConverter
->subChars
= (uint8_t *)localConverter
->subUChars
;
273 localConverter
->subChars
= (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
274 if (localConverter
->subChars
== NULL
) {
275 uprv_free(allocatedConverter
);
276 UTRACE_EXIT_STATUS(*status
);
279 uprv_memcpy(localConverter
->subChars
, cnv
->subChars
, UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
282 /* now either call the safeclone fcn or not */
283 if (cnv
->sharedData
->impl
->safeClone
!= NULL
) {
284 /* call the custom safeClone function */
285 localConverter
= cnv
->sharedData
->impl
->safeClone(cnv
, localConverter
, pBufferSize
, status
);
288 if(localConverter
==NULL
|| U_FAILURE(*status
)) {
289 if (allocatedConverter
!= NULL
&& allocatedConverter
->subChars
!= (uint8_t *)allocatedConverter
->subUChars
) {
290 uprv_free(allocatedConverter
->subChars
);
292 uprv_free(allocatedConverter
);
293 UTRACE_EXIT_STATUS(*status
);
297 /* increment refcount of shared data if needed */
298 if (cnv
->sharedData
->isReferenceCounted
) {
299 ucnv_incrementRefCount(cnv
->sharedData
);
302 if(localConverter
== (UConverter
*)stackBuffer
) {
303 /* we're using user provided data - set to not destroy */
304 localConverter
->isCopyLocal
= TRUE
;
307 /* allow callback functions to handle any memory allocation */
308 toUArgs
.converter
= fromUArgs
.converter
= localConverter
;
309 cbErr
= U_ZERO_ERROR
;
310 cnv
->fromCharErrorBehaviour(cnv
->toUContext
, &toUArgs
, NULL
, 0, UCNV_CLONE
, &cbErr
);
311 cbErr
= U_ZERO_ERROR
;
312 cnv
->fromUCharErrorBehaviour(cnv
->fromUContext
, &fromUArgs
, NULL
, 0, 0, UCNV_CLONE
, &cbErr
);
314 UTRACE_EXIT_PTR_STATUS(localConverter
, *status
);
315 return localConverter
;
320 /*Decreases the reference counter in the shared immutable section of the object
321 *and frees the mutable part*/
323 U_CAPI
void U_EXPORT2
324 ucnv_close (UConverter
* converter
)
326 UErrorCode errorCode
= U_ZERO_ERROR
;
328 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE
);
330 if (converter
== NULL
)
336 UTRACE_DATA3(UTRACE_OPEN_CLOSE
, "close converter %s at %p, isCopyLocal=%b",
337 ucnv_getName(converter
, &errorCode
), converter
, converter
->isCopyLocal
);
339 /* In order to speed up the close, only call the callbacks when they have been changed.
340 This performance check will only work when the callbacks are set within a shared library
341 or from user code that statically links this code. */
342 /* first, notify the callback functions that the converter is closed */
343 if (converter
->fromCharErrorBehaviour
!= UCNV_TO_U_DEFAULT_CALLBACK
) {
344 UConverterToUnicodeArgs toUArgs
= {
345 sizeof(UConverterToUnicodeArgs
),
355 toUArgs
.converter
= converter
;
356 errorCode
= U_ZERO_ERROR
;
357 converter
->fromCharErrorBehaviour(converter
->toUContext
, &toUArgs
, NULL
, 0, UCNV_CLOSE
, &errorCode
);
359 if (converter
->fromUCharErrorBehaviour
!= UCNV_FROM_U_DEFAULT_CALLBACK
) {
360 UConverterFromUnicodeArgs fromUArgs
= {
361 sizeof(UConverterFromUnicodeArgs
),
370 fromUArgs
.converter
= converter
;
371 errorCode
= U_ZERO_ERROR
;
372 converter
->fromUCharErrorBehaviour(converter
->fromUContext
, &fromUArgs
, NULL
, 0, 0, UCNV_CLOSE
, &errorCode
);
375 if (converter
->sharedData
->impl
->close
!= NULL
) {
376 converter
->sharedData
->impl
->close(converter
);
379 if (converter
->subChars
!= (uint8_t *)converter
->subUChars
) {
380 uprv_free(converter
->subChars
);
383 if (converter
->sharedData
->isReferenceCounted
) {
384 ucnv_unloadSharedDataIfReady(converter
->sharedData
);
387 if(!converter
->isCopyLocal
){
388 uprv_free(converter
);
394 /*returns a single Name from the list, will return NULL if out of bounds
396 U_CAPI
const char* U_EXPORT2
397 ucnv_getAvailableName (int32_t n
)
399 if (0 <= n
&& n
<= 0xffff) {
400 UErrorCode err
= U_ZERO_ERROR
;
401 const char *name
= ucnv_bld_getAvailableConverter((uint16_t)n
, &err
);
402 if (U_SUCCESS(err
)) {
409 U_CAPI
int32_t U_EXPORT2
410 ucnv_countAvailable ()
412 UErrorCode err
= U_ZERO_ERROR
;
413 return ucnv_bld_countAvailableConverters(&err
);
416 U_CAPI
void U_EXPORT2
417 ucnv_getSubstChars (const UConverter
* converter
,
422 if (U_FAILURE (*err
))
425 if (converter
->subCharLen
<= 0) {
426 /* Unicode string or empty string from ucnv_setSubstString(). */
431 if (*len
< converter
->subCharLen
) /*not enough space in subChars */
433 *err
= U_INDEX_OUTOFBOUNDS_ERROR
;
437 uprv_memcpy (mySubChar
, converter
->subChars
, converter
->subCharLen
); /*fills in the subchars */
438 *len
= converter
->subCharLen
; /*store # of bytes copied to buffer */
441 U_CAPI
void U_EXPORT2
442 ucnv_setSubstChars (UConverter
* converter
,
443 const char *mySubChar
,
447 if (U_FAILURE (*err
))
450 /*Makes sure that the subChar is within the codepages char length boundaries */
451 if ((len
> converter
->sharedData
->staticData
->maxBytesPerChar
)
452 || (len
< converter
->sharedData
->staticData
->minBytesPerChar
))
454 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
458 uprv_memcpy (converter
->subChars
, mySubChar
, len
); /*copies the subchars */
459 converter
->subCharLen
= len
; /*sets the new len */
462 * There is currently (2001Feb) no separate API to set/get subChar1.
463 * In order to always have subChar written after it is explicitly set,
464 * we set subChar1 to 0.
466 converter
->subChar1
= 0;
471 U_CAPI
void U_EXPORT2
472 ucnv_setSubstString(UConverter
*cnv
,
476 UAlignedMemory cloneBuffer
[U_CNV_SAFECLONE_BUFFERSIZE
/ sizeof(UAlignedMemory
) + 1];
477 char chars
[UCNV_ERROR_BUFFER_LENGTH
];
481 int32_t cloneSize
, length8
;
483 /* Let the following functions check all arguments. */
484 cloneSize
= sizeof(cloneBuffer
);
485 clone
= ucnv_safeClone(cnv
, cloneBuffer
, &cloneSize
, err
);
486 ucnv_setFromUCallBack(clone
, UCNV_FROM_U_CALLBACK_STOP
, NULL
, NULL
, NULL
, err
);
487 length8
= ucnv_fromUChars(clone
, chars
, (int32_t)sizeof(chars
), s
, length
, err
);
489 if (U_FAILURE(*err
)) {
493 if (cnv
->sharedData
->impl
->writeSub
== NULL
494 #if !UCONFIG_NO_LEGACY_CONVERSION
495 || (cnv
->sharedData
->staticData
->conversionType
== UCNV_MBCS
&&
496 ucnv_MBCSGetType(cnv
) != UCNV_EBCDIC_STATEFUL
)
499 /* The converter is not stateful. Store the charset bytes as a fixed string. */
500 subChars
= (uint8_t *)chars
;
503 * The converter has a non-default writeSub() function, indicating
504 * that it is stateful.
505 * Store the Unicode string for on-the-fly conversion for correct
508 if (length
> UCNV_ERROR_BUFFER_LENGTH
) {
510 * Should not occur. The converter should output at least one byte
511 * per UChar, which means that ucnv_fromUChars() should catch all
514 *err
= U_BUFFER_OVERFLOW_ERROR
;
517 subChars
= (uint8_t *)s
;
519 length
= u_strlen(s
);
521 length8
= length
* U_SIZEOF_UCHAR
;
525 * For storing the substitution string, select either the small buffer inside
526 * UConverter or allocate a subChars buffer.
528 if (length8
> UCNV_MAX_SUBCHAR_LEN
) {
529 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
530 if (cnv
->subChars
== (uint8_t *)cnv
->subUChars
) {
531 /* Allocate a new buffer for the string. */
532 cnv
->subChars
= (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
533 if (cnv
->subChars
== NULL
) {
534 cnv
->subChars
= (uint8_t *)cnv
->subUChars
;
535 *err
= U_MEMORY_ALLOCATION_ERROR
;
538 uprv_memset(cnv
->subChars
, 0, UCNV_ERROR_BUFFER_LENGTH
* U_SIZEOF_UCHAR
);
542 /* Copy the substitution string into the UConverter or its subChars buffer. */
546 uprv_memcpy(cnv
->subChars
, subChars
, length8
);
547 if (subChars
== (uint8_t *)chars
) {
548 cnv
->subCharLen
= (int8_t)length8
;
549 } else /* subChars == s */ {
550 cnv
->subCharLen
= (int8_t)-length
;
554 /* See comment in ucnv_setSubstChars(). */
558 /*resets the internal states of a converter
559 *goal : have the same behaviour than a freshly created converter
561 static void _reset(UConverter
*converter
, UConverterResetChoice choice
,
562 UBool callCallback
) {
563 if(converter
== NULL
) {
568 /* first, notify the callback functions that the converter is reset */
569 UErrorCode errorCode
;
571 if(choice
<=UCNV_RESET_TO_UNICODE
&& converter
->fromCharErrorBehaviour
!= UCNV_TO_U_DEFAULT_CALLBACK
) {
572 UConverterToUnicodeArgs toUArgs
= {
573 sizeof(UConverterToUnicodeArgs
),
582 toUArgs
.converter
= converter
;
583 errorCode
= U_ZERO_ERROR
;
584 converter
->fromCharErrorBehaviour(converter
->toUContext
, &toUArgs
, NULL
, 0, UCNV_RESET
, &errorCode
);
586 if(choice
!=UCNV_RESET_TO_UNICODE
&& converter
->fromUCharErrorBehaviour
!= UCNV_FROM_U_DEFAULT_CALLBACK
) {
587 UConverterFromUnicodeArgs fromUArgs
= {
588 sizeof(UConverterFromUnicodeArgs
),
597 fromUArgs
.converter
= converter
;
598 errorCode
= U_ZERO_ERROR
;
599 converter
->fromUCharErrorBehaviour(converter
->fromUContext
, &fromUArgs
, NULL
, 0, 0, UCNV_RESET
, &errorCode
);
603 /* now reset the converter itself */
604 if(choice
<=UCNV_RESET_TO_UNICODE
) {
605 converter
->toUnicodeStatus
= converter
->sharedData
->toUnicodeStatus
;
607 converter
->toULength
= 0;
608 converter
->invalidCharLength
= converter
->UCharErrorBufferLength
= 0;
609 converter
->preToULength
= 0;
611 if(choice
!=UCNV_RESET_TO_UNICODE
) {
612 converter
->fromUnicodeStatus
= 0;
613 converter
->fromUChar32
= 0;
614 converter
->invalidUCharLength
= converter
->charErrorBufferLength
= 0;
615 converter
->preFromUFirstCP
= U_SENTINEL
;
616 converter
->preFromULength
= 0;
619 if (converter
->sharedData
->impl
->reset
!= NULL
) {
620 /* call the custom reset function */
621 converter
->sharedData
->impl
->reset(converter
, choice
);
625 U_CAPI
void U_EXPORT2
626 ucnv_reset(UConverter
*converter
)
628 _reset(converter
, UCNV_RESET_BOTH
, TRUE
);
631 U_CAPI
void U_EXPORT2
632 ucnv_resetToUnicode(UConverter
*converter
)
634 _reset(converter
, UCNV_RESET_TO_UNICODE
, TRUE
);
637 U_CAPI
void U_EXPORT2
638 ucnv_resetFromUnicode(UConverter
*converter
)
640 _reset(converter
, UCNV_RESET_FROM_UNICODE
, TRUE
);
643 U_CAPI
int8_t U_EXPORT2
644 ucnv_getMaxCharSize (const UConverter
* converter
)
646 return converter
->maxBytesPerUChar
;
650 U_CAPI
int8_t U_EXPORT2
651 ucnv_getMinCharSize (const UConverter
* converter
)
653 return converter
->sharedData
->staticData
->minBytesPerChar
;
656 U_CAPI
const char* U_EXPORT2
657 ucnv_getName (const UConverter
* converter
, UErrorCode
* err
)
660 if (U_FAILURE (*err
))
662 if(converter
->sharedData
->impl
->getName
){
663 const char* temp
= converter
->sharedData
->impl
->getName(converter
);
667 return converter
->sharedData
->staticData
->name
;
670 U_CAPI
int32_t U_EXPORT2
671 ucnv_getCCSID(const UConverter
* converter
,
675 if (U_FAILURE (*err
))
678 ccsid
= converter
->sharedData
->staticData
->codepage
;
680 /* Rare case. This is for cases like gb18030,
681 which doesn't have an IBM canonical name, but does have an IBM alias. */
682 const char *standardName
= ucnv_getStandardName(ucnv_getName(converter
, err
), "IBM", err
);
683 if (U_SUCCESS(*err
) && standardName
) {
684 const char *ccsidStr
= uprv_strchr(standardName
, '-');
686 ccsid
= (int32_t)atol(ccsidStr
+1); /* +1 to skip '-' */
694 U_CAPI UConverterPlatform U_EXPORT2
695 ucnv_getPlatform (const UConverter
* converter
,
698 if (U_FAILURE (*err
))
701 return (UConverterPlatform
)converter
->sharedData
->staticData
->platform
;
704 U_CAPI
void U_EXPORT2
705 ucnv_getToUCallBack (const UConverter
* converter
,
706 UConverterToUCallback
*action
,
707 const void **context
)
709 *action
= converter
->fromCharErrorBehaviour
;
710 *context
= converter
->toUContext
;
713 U_CAPI
void U_EXPORT2
714 ucnv_getFromUCallBack (const UConverter
* converter
,
715 UConverterFromUCallback
*action
,
716 const void **context
)
718 *action
= converter
->fromUCharErrorBehaviour
;
719 *context
= converter
->fromUContext
;
722 U_CAPI
void U_EXPORT2
723 ucnv_setToUCallBack (UConverter
* converter
,
724 UConverterToUCallback newAction
,
725 const void* newContext
,
726 UConverterToUCallback
*oldAction
,
727 const void** oldContext
,
730 if (U_FAILURE (*err
))
732 if (oldAction
) *oldAction
= converter
->fromCharErrorBehaviour
;
733 converter
->fromCharErrorBehaviour
= newAction
;
734 if (oldContext
) *oldContext
= converter
->toUContext
;
735 converter
->toUContext
= newContext
;
738 U_CAPI
void U_EXPORT2
739 ucnv_setFromUCallBack (UConverter
* converter
,
740 UConverterFromUCallback newAction
,
741 const void* newContext
,
742 UConverterFromUCallback
*oldAction
,
743 const void** oldContext
,
746 if (U_FAILURE (*err
))
748 if (oldAction
) *oldAction
= converter
->fromUCharErrorBehaviour
;
749 converter
->fromUCharErrorBehaviour
= newAction
;
750 if (oldContext
) *oldContext
= converter
->fromUContext
;
751 converter
->fromUContext
= newContext
;
755 _updateOffsets(int32_t *offsets
, int32_t length
,
756 int32_t sourceIndex
, int32_t errorInputLength
) {
758 int32_t delta
, offset
;
762 * adjust each offset by adding the previous sourceIndex
763 * minus the length of the input sequence that caused an
766 delta
=sourceIndex
-errorInputLength
;
769 * set each offset to -1 because this conversion function
770 * does not handle offsets
775 limit
=offsets
+length
;
777 /* most common case, nothing to do */
779 /* add the delta to each offset (but not if the offset is <0) */
780 while(offsets
<limit
) {
783 *offsets
=offset
+delta
;
787 } else /* delta<0 */ {
789 * set each offset to -1 because this conversion function
790 * does not handle offsets
791 * or the error input sequence started in a previous buffer
793 while(offsets
<limit
) {
799 /* ucnv_fromUnicode --------------------------------------------------------- */
802 * Implementation note for m:n conversions
804 * While collecting source units to find the longest match for m:n conversion,
805 * some source units may need to be stored for a partial match.
806 * When a second buffer does not yield a match on all of the previously stored
807 * source units, then they must be "replayed", i.e., fed back into the converter.
809 * The code relies on the fact that replaying will not nest -
810 * converting a replay buffer will not result in a replay.
811 * This is because a replay is necessary only after the _continuation_ of a
812 * partial match failed, but a replay buffer is converted as a whole.
813 * It may result in some of its units being stored again for a partial match,
814 * but there will not be a continuation _during_ the replay which could fail.
816 * It is conceivable that a callback function could call the converter
817 * recursively in a way that causes another replay to be stored, but that
818 * would be an error in the callback function.
819 * Such violations will cause assertion failures in a debug build,
820 * and wrong output, but they will not cause a crash.
824 _fromUnicodeWithCallback(UConverterFromUnicodeArgs
*pArgs
, UErrorCode
*err
) {
825 UConverterFromUnicode fromUnicode
;
831 int32_t errorInputLength
;
832 UBool converterSawEndOfInput
, calledCallback
;
834 /* variables for m:n conversion */
835 UChar replay
[UCNV_EXT_MAX_UCHARS
];
836 const UChar
*realSource
, *realSourceLimit
;
837 int32_t realSourceIndex
;
840 cnv
=pArgs
->converter
;
843 offsets
=pArgs
->offsets
;
845 /* get the converter implementation function */
848 fromUnicode
=cnv
->sharedData
->impl
->fromUnicode
;
850 fromUnicode
=cnv
->sharedData
->impl
->fromUnicodeWithOffsets
;
851 if(fromUnicode
==NULL
) {
852 /* there is no WithOffsets implementation */
853 fromUnicode
=cnv
->sharedData
->impl
->fromUnicode
;
854 /* we will write -1 for each offset */
859 if(cnv
->preFromULength
>=0) {
863 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
864 realSourceLimit
=NULL
;
869 * Previous m:n conversion stored source units from a partial match
870 * and failed to consume all of them.
871 * We need to "replay" them from a temporary buffer and convert them first.
873 realSource
=pArgs
->source
;
874 realSourceLimit
=pArgs
->sourceLimit
;
875 realFlush
=pArgs
->flush
;
876 realSourceIndex
=sourceIndex
;
878 uprv_memcpy(replay
, cnv
->preFromU
, -cnv
->preFromULength
*U_SIZEOF_UCHAR
);
879 pArgs
->source
=replay
;
880 pArgs
->sourceLimit
=replay
-cnv
->preFromULength
;
884 cnv
->preFromULength
=0;
888 * loop for conversion and error handling
894 * handle end of input
895 * handle errors/call callback
900 if(U_SUCCESS(*err
)) {
902 fromUnicode(pArgs
, err
);
905 * set a flag for whether the converter
906 * successfully processed the end of the input
908 * need not check cnv->preFromULength==0 because a replay (<0) will cause
909 * s<sourceLimit before converterSawEndOfInput is checked
911 converterSawEndOfInput
=
912 (UBool
)(U_SUCCESS(*err
) &&
913 pArgs
->flush
&& pArgs
->source
==pArgs
->sourceLimit
&&
914 cnv
->fromUChar32
==0);
916 /* handle error from ucnv_convertEx() */
917 converterSawEndOfInput
=FALSE
;
920 /* no callback called yet for this iteration */
921 calledCallback
=FALSE
;
923 /* no sourceIndex adjustment for conversion, only for callback output */
927 * loop for offsets and error handling
929 * iterates at most 3 times:
930 * 1. to clean up after the conversion function
931 * 2. after the callback
932 * 3. after the callback again if there was truncated input
935 /* update offsets if we write any */
937 int32_t length
=(int32_t)(pArgs
->target
-t
);
939 _updateOffsets(offsets
, length
, sourceIndex
, errorInputLength
);
942 * if a converter handles offsets and updates the offsets
943 * pointer at the end, then pArgs->offset should not change
945 * however, some converters do not handle offsets at all
946 * (sourceIndex<0) or may not update the offsets pointer
948 pArgs
->offsets
=offsets
+=length
;
952 sourceIndex
+=(int32_t)(pArgs
->source
-s
);
956 if(cnv
->preFromULength
<0) {
958 * switch the source to new replay units (cannot occur while replaying)
959 * after offset handling and before end-of-input and callback handling
961 if(realSource
==NULL
) {
962 realSource
=pArgs
->source
;
963 realSourceLimit
=pArgs
->sourceLimit
;
964 realFlush
=pArgs
->flush
;
965 realSourceIndex
=sourceIndex
;
967 uprv_memcpy(replay
, cnv
->preFromU
, -cnv
->preFromULength
*U_SIZEOF_UCHAR
);
968 pArgs
->source
=replay
;
969 pArgs
->sourceLimit
=replay
-cnv
->preFromULength
;
971 if((sourceIndex
+=cnv
->preFromULength
)<0) {
975 cnv
->preFromULength
=0;
977 /* see implementation note before _fromUnicodeWithCallback() */
978 U_ASSERT(realSource
==NULL
);
979 *err
=U_INTERNAL_PROGRAM_ERROR
;
983 /* update pointers */
987 if(U_SUCCESS(*err
)) {
988 if(s
<pArgs
->sourceLimit
) {
990 * continue with the conversion loop while there is still input left
991 * (continue converting by breaking out of only the inner loop)
994 } else if(realSource
!=NULL
) {
995 /* switch back from replaying to the real source and continue */
996 pArgs
->source
=realSource
;
997 pArgs
->sourceLimit
=realSourceLimit
;
998 pArgs
->flush
=realFlush
;
999 sourceIndex
=realSourceIndex
;
1003 } else if(pArgs
->flush
&& cnv
->fromUChar32
!=0) {
1005 * the entire input stream is consumed
1006 * and there is a partial, truncated input sequence left
1009 /* inject an error and continue with callback handling */
1010 *err
=U_TRUNCATED_CHAR_FOUND
;
1011 calledCallback
=FALSE
; /* new error condition */
1013 /* input consumed */
1016 * return to the conversion loop once more if the flush
1017 * flag is set and the conversion function has not
1018 * successfully processed the end of the input yet
1020 * (continue converting by breaking out of only the inner loop)
1022 if(!converterSawEndOfInput
) {
1026 /* reset the converter without calling the callback function */
1027 _reset(cnv
, UCNV_RESET_FROM_UNICODE
, FALSE
);
1030 /* done successfully */
1035 /* U_FAILURE(*err) */
1039 if( calledCallback
||
1040 (e
=*err
)==U_BUFFER_OVERFLOW_ERROR
||
1041 (e
!=U_INVALID_CHAR_FOUND
&&
1042 e
!=U_ILLEGAL_CHAR_FOUND
&&
1043 e
!=U_TRUNCATED_CHAR_FOUND
)
1046 * the callback did not or cannot resolve the error:
1047 * set output pointers and return
1049 * the check for buffer overflow is redundant but it is
1050 * a high-runner case and hopefully documents the intent
1053 * if we were replaying, then the replay buffer must be
1054 * copied back into the UConverter
1055 * and the real arguments must be restored
1057 if(realSource
!=NULL
) {
1060 U_ASSERT(cnv
->preFromULength
==0);
1062 length
=(int32_t)(pArgs
->sourceLimit
-pArgs
->source
);
1064 uprv_memcpy(cnv
->preFromU
, pArgs
->source
, length
*U_SIZEOF_UCHAR
);
1065 cnv
->preFromULength
=(int8_t)-length
;
1068 pArgs
->source
=realSource
;
1069 pArgs
->sourceLimit
=realSourceLimit
;
1070 pArgs
->flush
=realFlush
;
1077 /* callback handling */
1081 /* get and write the code point */
1082 codePoint
=cnv
->fromUChar32
;
1084 U16_APPEND_UNSAFE(cnv
->invalidUCharBuffer
, errorInputLength
, codePoint
);
1085 cnv
->invalidUCharLength
=(int8_t)errorInputLength
;
1087 /* set the converter state to deal with the next character */
1090 /* call the callback function */
1091 cnv
->fromUCharErrorBehaviour(cnv
->fromUContext
, pArgs
,
1092 cnv
->invalidUCharBuffer
, errorInputLength
, codePoint
,
1093 *err
==U_INVALID_CHAR_FOUND
? UCNV_UNASSIGNED
: UCNV_ILLEGAL
,
1098 * loop back to the offset handling
1100 * this flag will indicate after offset handling
1101 * that a callback was called;
1102 * if the callback did not resolve the error, then we return
1104 calledCallback
=TRUE
;
1110 * Output the fromUnicode overflow buffer.
1111 * Call this function if(cnv->charErrorBufferLength>0).
1112 * @return TRUE if overflow
1115 ucnv_outputOverflowFromUnicode(UConverter
*cnv
,
1116 char **target
, const char *targetLimit
,
1124 if(pOffsets
!=NULL
) {
1130 overflow
=(char *)cnv
->charErrorBuffer
;
1131 length
=cnv
->charErrorBufferLength
;
1134 if(t
==targetLimit
) {
1135 /* the overflow buffer contains too much, keep the rest */
1139 overflow
[j
++]=overflow
[i
++];
1142 cnv
->charErrorBufferLength
=(int8_t)j
;
1147 *err
=U_BUFFER_OVERFLOW_ERROR
;
1151 /* copy the overflow contents to the target */
1154 *offsets
++=-1; /* no source index available for old output */
1158 /* the overflow buffer is completely copied to the target */
1159 cnv
->charErrorBufferLength
=0;
1167 U_CAPI
void U_EXPORT2
1168 ucnv_fromUnicode(UConverter
*cnv
,
1169 char **target
, const char *targetLimit
,
1170 const UChar
**source
, const UChar
*sourceLimit
,
1174 UConverterFromUnicodeArgs args
;
1178 /* check parameters */
1179 if(err
==NULL
|| U_FAILURE(*err
)) {
1183 if(cnv
==NULL
|| target
==NULL
|| source
==NULL
) {
1184 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1191 if ((const void *)U_MAX_PTR(sourceLimit
) == (const void *)sourceLimit
) {
1193 Prevent code from going into an infinite loop in case we do hit this
1194 limit. The limit pointer is expected to be on a UChar * boundary.
1195 This also prevents the next argument check from failing.
1197 sourceLimit
= (const UChar
*)(((const char *)sourceLimit
) - 1);
1201 * All these conditions should never happen.
1203 * 1) Make sure that the limits are >= to the address source or target
1205 * 2) Make sure that the buffer sizes do not exceed the number range for
1206 * int32_t because some functions use the size (in units or bytes)
1207 * rather than comparing pointers, and because offsets are int32_t values.
1209 * size_t is guaranteed to be unsigned and large enough for the job.
1211 * Return with an error instead of adjusting the limits because we would
1212 * not be able to maintain the semantics that either the source must be
1213 * consumed or the target filled (unless an error occurs).
1214 * An adjustment would be targetLimit=t+0x7fffffff; for example.
1216 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1217 * to a char * pointer and provide an incomplete UChar code unit.
1219 if (sourceLimit
<s
|| targetLimit
<t
||
1220 ((size_t)(sourceLimit
-s
)>(size_t)0x3fffffff && sourceLimit
>s
) ||
1221 ((size_t)(targetLimit
-t
)>(size_t)0x7fffffff && targetLimit
>t
) ||
1222 (((const char *)sourceLimit
-(const char *)s
) & 1) != 0)
1224 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1228 /* output the target overflow buffer */
1229 if( cnv
->charErrorBufferLength
>0 &&
1230 ucnv_outputOverflowFromUnicode(cnv
, target
, targetLimit
, &offsets
, err
)
1232 /* U_BUFFER_OVERFLOW_ERROR */
1235 /* *target may have moved, therefore stop using t */
1237 if(!flush
&& s
==sourceLimit
&& cnv
->preFromULength
>=0) {
1238 /* the overflow buffer is emptied and there is no new input: we are done */
1243 * Do not simply return with a buffer overflow error if
1244 * !flush && t==targetLimit
1245 * because it is possible that the source will not generate any output.
1246 * For example, the skip callback may be called;
1247 * it does not output anything.
1250 /* prepare the converter arguments */
1253 args
.offsets
=offsets
;
1255 args
.sourceLimit
=sourceLimit
;
1256 args
.target
=*target
;
1257 args
.targetLimit
=targetLimit
;
1258 args
.size
=sizeof(args
);
1260 _fromUnicodeWithCallback(&args
, err
);
1262 *source
=args
.source
;
1263 *target
=args
.target
;
1266 /* ucnv_toUnicode() --------------------------------------------------------- */
1269 _toUnicodeWithCallback(UConverterToUnicodeArgs
*pArgs
, UErrorCode
*err
) {
1270 UConverterToUnicode toUnicode
;
1275 int32_t sourceIndex
;
1276 int32_t errorInputLength
;
1277 UBool converterSawEndOfInput
, calledCallback
;
1279 /* variables for m:n conversion */
1280 char replay
[UCNV_EXT_MAX_BYTES
];
1281 const char *realSource
, *realSourceLimit
;
1282 int32_t realSourceIndex
;
1285 cnv
=pArgs
->converter
;
1288 offsets
=pArgs
->offsets
;
1290 /* get the converter implementation function */
1293 toUnicode
=cnv
->sharedData
->impl
->toUnicode
;
1295 toUnicode
=cnv
->sharedData
->impl
->toUnicodeWithOffsets
;
1296 if(toUnicode
==NULL
) {
1297 /* there is no WithOffsets implementation */
1298 toUnicode
=cnv
->sharedData
->impl
->toUnicode
;
1299 /* we will write -1 for each offset */
1304 if(cnv
->preToULength
>=0) {
1308 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1309 realSourceLimit
=NULL
;
1314 * Previous m:n conversion stored source units from a partial match
1315 * and failed to consume all of them.
1316 * We need to "replay" them from a temporary buffer and convert them first.
1318 realSource
=pArgs
->source
;
1319 realSourceLimit
=pArgs
->sourceLimit
;
1320 realFlush
=pArgs
->flush
;
1321 realSourceIndex
=sourceIndex
;
1323 uprv_memcpy(replay
, cnv
->preToU
, -cnv
->preToULength
);
1324 pArgs
->source
=replay
;
1325 pArgs
->sourceLimit
=replay
-cnv
->preToULength
;
1329 cnv
->preToULength
=0;
1333 * loop for conversion and error handling
1339 * handle end of input
1340 * handle errors/call callback
1345 if(U_SUCCESS(*err
)) {
1347 toUnicode(pArgs
, err
);
1350 * set a flag for whether the converter
1351 * successfully processed the end of the input
1353 * need not check cnv->preToULength==0 because a replay (<0) will cause
1354 * s<sourceLimit before converterSawEndOfInput is checked
1356 converterSawEndOfInput
=
1357 (UBool
)(U_SUCCESS(*err
) &&
1358 pArgs
->flush
&& pArgs
->source
==pArgs
->sourceLimit
&&
1361 /* handle error from getNextUChar() or ucnv_convertEx() */
1362 converterSawEndOfInput
=FALSE
;
1365 /* no callback called yet for this iteration */
1366 calledCallback
=FALSE
;
1368 /* no sourceIndex adjustment for conversion, only for callback output */
1372 * loop for offsets and error handling
1374 * iterates at most 3 times:
1375 * 1. to clean up after the conversion function
1376 * 2. after the callback
1377 * 3. after the callback again if there was truncated input
1380 /* update offsets if we write any */
1382 int32_t length
=(int32_t)(pArgs
->target
-t
);
1384 _updateOffsets(offsets
, length
, sourceIndex
, errorInputLength
);
1387 * if a converter handles offsets and updates the offsets
1388 * pointer at the end, then pArgs->offset should not change
1390 * however, some converters do not handle offsets at all
1391 * (sourceIndex<0) or may not update the offsets pointer
1393 pArgs
->offsets
=offsets
+=length
;
1396 if(sourceIndex
>=0) {
1397 sourceIndex
+=(int32_t)(pArgs
->source
-s
);
1401 if(cnv
->preToULength
<0) {
1403 * switch the source to new replay units (cannot occur while replaying)
1404 * after offset handling and before end-of-input and callback handling
1406 if(realSource
==NULL
) {
1407 realSource
=pArgs
->source
;
1408 realSourceLimit
=pArgs
->sourceLimit
;
1409 realFlush
=pArgs
->flush
;
1410 realSourceIndex
=sourceIndex
;
1412 uprv_memcpy(replay
, cnv
->preToU
, -cnv
->preToULength
);
1413 pArgs
->source
=replay
;
1414 pArgs
->sourceLimit
=replay
-cnv
->preToULength
;
1416 if((sourceIndex
+=cnv
->preToULength
)<0) {
1420 cnv
->preToULength
=0;
1422 /* see implementation note before _fromUnicodeWithCallback() */
1423 U_ASSERT(realSource
==NULL
);
1424 *err
=U_INTERNAL_PROGRAM_ERROR
;
1428 /* update pointers */
1432 if(U_SUCCESS(*err
)) {
1433 if(s
<pArgs
->sourceLimit
) {
1435 * continue with the conversion loop while there is still input left
1436 * (continue converting by breaking out of only the inner loop)
1439 } else if(realSource
!=NULL
) {
1440 /* switch back from replaying to the real source and continue */
1441 pArgs
->source
=realSource
;
1442 pArgs
->sourceLimit
=realSourceLimit
;
1443 pArgs
->flush
=realFlush
;
1444 sourceIndex
=realSourceIndex
;
1448 } else if(pArgs
->flush
&& cnv
->toULength
>0) {
1450 * the entire input stream is consumed
1451 * and there is a partial, truncated input sequence left
1454 /* inject an error and continue with callback handling */
1455 *err
=U_TRUNCATED_CHAR_FOUND
;
1456 calledCallback
=FALSE
; /* new error condition */
1458 /* input consumed */
1461 * return to the conversion loop once more if the flush
1462 * flag is set and the conversion function has not
1463 * successfully processed the end of the input yet
1465 * (continue converting by breaking out of only the inner loop)
1467 if(!converterSawEndOfInput
) {
1471 /* reset the converter without calling the callback function */
1472 _reset(cnv
, UCNV_RESET_TO_UNICODE
, FALSE
);
1475 /* done successfully */
1480 /* U_FAILURE(*err) */
1484 if( calledCallback
||
1485 (e
=*err
)==U_BUFFER_OVERFLOW_ERROR
||
1486 (e
!=U_INVALID_CHAR_FOUND
&&
1487 e
!=U_ILLEGAL_CHAR_FOUND
&&
1488 e
!=U_TRUNCATED_CHAR_FOUND
&&
1489 e
!=U_ILLEGAL_ESCAPE_SEQUENCE
&&
1490 e
!=U_UNSUPPORTED_ESCAPE_SEQUENCE
)
1493 * the callback did not or cannot resolve the error:
1494 * set output pointers and return
1496 * the check for buffer overflow is redundant but it is
1497 * a high-runner case and hopefully documents the intent
1500 * if we were replaying, then the replay buffer must be
1501 * copied back into the UConverter
1502 * and the real arguments must be restored
1504 if(realSource
!=NULL
) {
1507 U_ASSERT(cnv
->preToULength
==0);
1509 length
=(int32_t)(pArgs
->sourceLimit
-pArgs
->source
);
1511 uprv_memcpy(cnv
->preToU
, pArgs
->source
, length
);
1512 cnv
->preToULength
=(int8_t)-length
;
1515 pArgs
->source
=realSource
;
1516 pArgs
->sourceLimit
=realSourceLimit
;
1517 pArgs
->flush
=realFlush
;
1524 /* copy toUBytes[] to invalidCharBuffer[] */
1525 errorInputLength
=cnv
->invalidCharLength
=cnv
->toULength
;
1526 if(errorInputLength
>0) {
1527 uprv_memcpy(cnv
->invalidCharBuffer
, cnv
->toUBytes
, errorInputLength
);
1530 /* set the converter state to deal with the next character */
1533 /* call the callback function */
1534 if(cnv
->toUCallbackReason
==UCNV_ILLEGAL
&& *err
==U_INVALID_CHAR_FOUND
) {
1535 cnv
->toUCallbackReason
= UCNV_UNASSIGNED
;
1537 cnv
->fromCharErrorBehaviour(cnv
->toUContext
, pArgs
,
1538 cnv
->invalidCharBuffer
, errorInputLength
,
1539 cnv
->toUCallbackReason
,
1541 cnv
->toUCallbackReason
= UCNV_ILLEGAL
; /* reset to default value */
1544 * loop back to the offset handling
1546 * this flag will indicate after offset handling
1547 * that a callback was called;
1548 * if the callback did not resolve the error, then we return
1550 calledCallback
=TRUE
;
1556 * Output the toUnicode overflow buffer.
1557 * Call this function if(cnv->UCharErrorBufferLength>0).
1558 * @return TRUE if overflow
1561 ucnv_outputOverflowToUnicode(UConverter
*cnv
,
1562 UChar
**target
, const UChar
*targetLimit
,
1566 UChar
*overflow
, *t
;
1570 if(pOffsets
!=NULL
) {
1576 overflow
=cnv
->UCharErrorBuffer
;
1577 length
=cnv
->UCharErrorBufferLength
;
1580 if(t
==targetLimit
) {
1581 /* the overflow buffer contains too much, keep the rest */
1585 overflow
[j
++]=overflow
[i
++];
1588 cnv
->UCharErrorBufferLength
=(int8_t)j
;
1593 *err
=U_BUFFER_OVERFLOW_ERROR
;
1597 /* copy the overflow contents to the target */
1600 *offsets
++=-1; /* no source index available for old output */
1604 /* the overflow buffer is completely copied to the target */
1605 cnv
->UCharErrorBufferLength
=0;
1613 U_CAPI
void U_EXPORT2
1614 ucnv_toUnicode(UConverter
*cnv
,
1615 UChar
**target
, const UChar
*targetLimit
,
1616 const char **source
, const char *sourceLimit
,
1620 UConverterToUnicodeArgs args
;
1624 /* check parameters */
1625 if(err
==NULL
|| U_FAILURE(*err
)) {
1629 if(cnv
==NULL
|| target
==NULL
|| source
==NULL
) {
1630 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1637 if ((const void *)U_MAX_PTR(targetLimit
) == (const void *)targetLimit
) {
1639 Prevent code from going into an infinite loop in case we do hit this
1640 limit. The limit pointer is expected to be on a UChar * boundary.
1641 This also prevents the next argument check from failing.
1643 targetLimit
= (const UChar
*)(((const char *)targetLimit
) - 1);
1647 * All these conditions should never happen.
1649 * 1) Make sure that the limits are >= to the address source or target
1651 * 2) Make sure that the buffer sizes do not exceed the number range for
1652 * int32_t because some functions use the size (in units or bytes)
1653 * rather than comparing pointers, and because offsets are int32_t values.
1655 * size_t is guaranteed to be unsigned and large enough for the job.
1657 * Return with an error instead of adjusting the limits because we would
1658 * not be able to maintain the semantics that either the source must be
1659 * consumed or the target filled (unless an error occurs).
1660 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1662 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1663 * to a char * pointer and provide an incomplete UChar code unit.
1665 if (sourceLimit
<s
|| targetLimit
<t
||
1666 ((size_t)(sourceLimit
-s
)>(size_t)0x7fffffff && sourceLimit
>s
) ||
1667 ((size_t)(targetLimit
-t
)>(size_t)0x3fffffff && targetLimit
>t
) ||
1668 (((const char *)targetLimit
-(const char *)t
) & 1) != 0
1670 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1674 /* output the target overflow buffer */
1675 if( cnv
->UCharErrorBufferLength
>0 &&
1676 ucnv_outputOverflowToUnicode(cnv
, target
, targetLimit
, &offsets
, err
)
1678 /* U_BUFFER_OVERFLOW_ERROR */
1681 /* *target may have moved, therefore stop using t */
1683 if(!flush
&& s
==sourceLimit
&& cnv
->preToULength
>=0) {
1684 /* the overflow buffer is emptied and there is no new input: we are done */
1689 * Do not simply return with a buffer overflow error if
1690 * !flush && t==targetLimit
1691 * because it is possible that the source will not generate any output.
1692 * For example, the skip callback may be called;
1693 * it does not output anything.
1696 /* prepare the converter arguments */
1699 args
.offsets
=offsets
;
1701 args
.sourceLimit
=sourceLimit
;
1702 args
.target
=*target
;
1703 args
.targetLimit
=targetLimit
;
1704 args
.size
=sizeof(args
);
1706 _toUnicodeWithCallback(&args
, err
);
1708 *source
=args
.source
;
1709 *target
=args
.target
;
1712 /* ucnv_to/fromUChars() ----------------------------------------------------- */
1714 U_CAPI
int32_t U_EXPORT2
1715 ucnv_fromUChars(UConverter
*cnv
,
1716 char *dest
, int32_t destCapacity
,
1717 const UChar
*src
, int32_t srcLength
,
1718 UErrorCode
*pErrorCode
) {
1719 const UChar
*srcLimit
;
1720 char *originalDest
, *destLimit
;
1723 /* check arguments */
1724 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1729 destCapacity
<0 || (destCapacity
>0 && dest
==NULL
) ||
1730 srcLength
<-1 || (srcLength
!=0 && src
==NULL
)
1732 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1737 ucnv_resetFromUnicode(cnv
);
1740 srcLength
=u_strlen(src
);
1743 srcLimit
=src
+srcLength
;
1744 destLimit
=dest
+destCapacity
;
1746 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1747 if(destLimit
<dest
|| (destLimit
==NULL
&& dest
!=NULL
)) {
1748 destLimit
=(char *)U_MAX_PTR(dest
);
1751 /* perform the conversion */
1752 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1753 destLength
=(int32_t)(dest
-originalDest
);
1755 /* if an overflow occurs, then get the preflighting length */
1756 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
1759 destLimit
=buffer
+sizeof(buffer
);
1762 *pErrorCode
=U_ZERO_ERROR
;
1763 ucnv_fromUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1764 destLength
+=(int32_t)(dest
-buffer
);
1765 } while(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
);
1771 return u_terminateChars(originalDest
, destCapacity
, destLength
, pErrorCode
);
1774 U_CAPI
int32_t U_EXPORT2
1775 ucnv_toUChars(UConverter
*cnv
,
1776 UChar
*dest
, int32_t destCapacity
,
1777 const char *src
, int32_t srcLength
,
1778 UErrorCode
*pErrorCode
) {
1779 const char *srcLimit
;
1780 UChar
*originalDest
, *destLimit
;
1783 /* check arguments */
1784 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
1789 destCapacity
<0 || (destCapacity
>0 && dest
==NULL
) ||
1790 srcLength
<-1 || (srcLength
!=0 && src
==NULL
))
1792 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
1797 ucnv_resetToUnicode(cnv
);
1800 srcLength
=(int32_t)uprv_strlen(src
);
1803 srcLimit
=src
+srcLength
;
1804 destLimit
=dest
+destCapacity
;
1806 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1807 if(destLimit
<dest
|| (destLimit
==NULL
&& dest
!=NULL
)) {
1808 destLimit
=(UChar
*)U_MAX_PTR(dest
);
1811 /* perform the conversion */
1812 ucnv_toUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1813 destLength
=(int32_t)(dest
-originalDest
);
1815 /* if an overflow occurs, then get the preflighting length */
1816 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
)
1820 destLimit
=buffer
+UPRV_LENGTHOF(buffer
);
1823 *pErrorCode
=U_ZERO_ERROR
;
1824 ucnv_toUnicode(cnv
, &dest
, destLimit
, &src
, srcLimit
, 0, TRUE
, pErrorCode
);
1825 destLength
+=(int32_t)(dest
-buffer
);
1827 while(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
);
1833 return u_terminateUChars(originalDest
, destCapacity
, destLength
, pErrorCode
);
1836 /* ucnv_getNextUChar() ------------------------------------------------------ */
1838 U_CAPI UChar32 U_EXPORT2
1839 ucnv_getNextUChar(UConverter
*cnv
,
1840 const char **source
, const char *sourceLimit
,
1842 UConverterToUnicodeArgs args
;
1843 UChar buffer
[U16_MAX_LENGTH
];
1848 /* check parameters */
1849 if(err
==NULL
|| U_FAILURE(*err
)) {
1853 if(cnv
==NULL
|| source
==NULL
) {
1854 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1860 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1865 * Make sure that the buffer sizes do not exceed the number range for
1866 * int32_t because some functions use the size (in units or bytes)
1867 * rather than comparing pointers, and because offsets are int32_t values.
1869 * size_t is guaranteed to be unsigned and large enough for the job.
1871 * Return with an error instead of adjusting the limits because we would
1872 * not be able to maintain the semantics that either the source must be
1873 * consumed or the target filled (unless an error occurs).
1874 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1876 if(((size_t)(sourceLimit
-s
)>(size_t)0x7fffffff && sourceLimit
>s
)) {
1877 *err
=U_ILLEGAL_ARGUMENT_ERROR
;
1883 /* flush the target overflow buffer */
1884 if(cnv
->UCharErrorBufferLength
>0) {
1887 overflow
=cnv
->UCharErrorBuffer
;
1889 length
=cnv
->UCharErrorBufferLength
;
1890 U16_NEXT(overflow
, i
, length
, c
);
1892 /* move the remaining overflow contents up to the beginning */
1893 if((cnv
->UCharErrorBufferLength
=(int8_t)(length
-i
))>0) {
1894 uprv_memmove(cnv
->UCharErrorBuffer
, cnv
->UCharErrorBuffer
+i
,
1895 cnv
->UCharErrorBufferLength
*U_SIZEOF_UCHAR
);
1898 if(!U16_IS_LEAD(c
) || i
<length
) {
1902 * Continue if the overflow buffer contained only a lead surrogate,
1903 * in case the converter outputs single surrogates from complete
1909 * flush==TRUE is implied for ucnv_getNextUChar()
1911 * do not simply return even if s==sourceLimit because the converter may
1912 * not have seen flush==TRUE before
1915 /* prepare the converter arguments */
1920 args
.sourceLimit
=sourceLimit
;
1922 args
.targetLimit
=buffer
+1;
1923 args
.size
=sizeof(args
);
1927 * call the native getNextUChar() implementation if we are
1928 * at a character boundary (toULength==0)
1930 * unlike with _toUnicode(), getNextUChar() implementations must set
1931 * U_TRUNCATED_CHAR_FOUND for truncated input,
1932 * in addition to setting toULength/toUBytes[]
1934 if(cnv
->toULength
==0 && cnv
->sharedData
->impl
->getNextUChar
!=NULL
) {
1935 c
=cnv
->sharedData
->impl
->getNextUChar(&args
, err
);
1936 *source
=s
=args
.source
;
1937 if(*err
==U_INDEX_OUTOFBOUNDS_ERROR
) {
1938 /* reset the converter without calling the callback function */
1939 _reset(cnv
, UCNV_RESET_TO_UNICODE
, FALSE
);
1940 return 0xffff; /* no output */
1941 } else if(U_SUCCESS(*err
) && c
>=0) {
1944 * else fall through to use _toUnicode() because
1945 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1946 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1951 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1952 _toUnicodeWithCallback(&args
, err
);
1954 if(*err
==U_BUFFER_OVERFLOW_ERROR
) {
1959 length
=(int32_t)(args
.target
-buffer
);
1961 /* write the lead surrogate from the overflow buffer */
1963 args
.target
=buffer
+1;
1968 /* buffer contents starts at i and ends before length */
1970 if(U_FAILURE(*err
)) {
1971 c
=0xffff; /* no output */
1972 } else if(length
==0) {
1973 /* no input or only state changes */
1974 *err
=U_INDEX_OUTOFBOUNDS_ERROR
;
1975 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1976 c
=0xffff; /* no output */
1980 if(!U16_IS_LEAD(c
)) {
1981 /* consume c=buffer[0], done */
1983 /* got a lead surrogate, see if a trail surrogate follows */
1986 if(cnv
->UCharErrorBufferLength
>0) {
1987 /* got overflow output from the conversion */
1988 if(U16_IS_TRAIL(c2
=cnv
->UCharErrorBuffer
[0])) {
1989 /* got a trail surrogate, too */
1990 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
1992 /* move the remaining overflow contents up to the beginning */
1993 if((--cnv
->UCharErrorBufferLength
)>0) {
1994 uprv_memmove(cnv
->UCharErrorBuffer
, cnv
->UCharErrorBuffer
+1,
1995 cnv
->UCharErrorBufferLength
*U_SIZEOF_UCHAR
);
1998 /* c is an unpaired lead surrogate, just return it */
2000 } else if(args
.source
<sourceLimit
) {
2001 /* convert once more, to buffer[1] */
2002 args
.targetLimit
=buffer
+2;
2003 _toUnicodeWithCallback(&args
, err
);
2004 if(*err
==U_BUFFER_OVERFLOW_ERROR
) {
2008 length
=(int32_t)(args
.target
-buffer
);
2009 if(U_SUCCESS(*err
) && length
==2 && U16_IS_TRAIL(c2
=buffer
[1])) {
2010 /* got a trail surrogate, too */
2011 c
=U16_GET_SUPPLEMENTARY(c
, c2
);
2019 * move leftover output from buffer[i..length[
2020 * into the beginning of the overflow buffer
2023 /* move further overflow back */
2024 int32_t delta
=length
-i
;
2025 if((length
=cnv
->UCharErrorBufferLength
)>0) {
2026 uprv_memmove(cnv
->UCharErrorBuffer
+delta
, cnv
->UCharErrorBuffer
,
2027 length
*U_SIZEOF_UCHAR
);
2029 cnv
->UCharErrorBufferLength
=(int8_t)(length
+delta
);
2031 cnv
->UCharErrorBuffer
[0]=buffer
[i
++];
2033 cnv
->UCharErrorBuffer
[1]=buffer
[i
];
2037 *source
=args
.source
;
2041 /* ucnv_convert() and siblings ---------------------------------------------- */
2043 U_CAPI
void U_EXPORT2
2044 ucnv_convertEx(UConverter
*targetCnv
, UConverter
*sourceCnv
,
2045 char **target
, const char *targetLimit
,
2046 const char **source
, const char *sourceLimit
,
2047 UChar
*pivotStart
, UChar
**pivotSource
,
2048 UChar
**pivotTarget
, const UChar
*pivotLimit
,
2049 UBool reset
, UBool flush
,
2050 UErrorCode
*pErrorCode
) {
2051 UChar pivotBuffer
[CHUNK_SIZE
];
2052 const UChar
*myPivotSource
;
2053 UChar
*myPivotTarget
;
2057 UConverterToUnicodeArgs toUArgs
;
2058 UConverterFromUnicodeArgs fromUArgs
;
2059 UConverterConvert convert
;
2061 /* error checking */
2062 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2066 if( targetCnv
==NULL
|| sourceCnv
==NULL
||
2067 source
==NULL
|| *source
==NULL
||
2068 target
==NULL
|| *target
==NULL
|| targetLimit
==NULL
2070 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2076 if((sourceLimit
!=NULL
&& sourceLimit
<s
) || targetLimit
<t
) {
2077 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2082 * Make sure that the buffer sizes do not exceed the number range for
2083 * int32_t. See ucnv_toUnicode() for a more detailed comment.
2086 (sourceLimit
!=NULL
&& ((size_t)(sourceLimit
-s
)>(size_t)0x7fffffff && sourceLimit
>s
)) ||
2087 ((size_t)(targetLimit
-t
)>(size_t)0x7fffffff && targetLimit
>t
)
2089 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2093 if(pivotStart
==NULL
) {
2095 /* streaming conversion requires an explicit pivot buffer */
2096 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2100 /* use the stack pivot buffer */
2101 myPivotSource
=myPivotTarget
=pivotStart
=pivotBuffer
;
2102 pivotSource
=(UChar
**)&myPivotSource
;
2103 pivotTarget
=&myPivotTarget
;
2104 pivotLimit
=pivotBuffer
+CHUNK_SIZE
;
2105 } else if( pivotStart
>=pivotLimit
||
2106 pivotSource
==NULL
|| *pivotSource
==NULL
||
2107 pivotTarget
==NULL
|| *pivotTarget
==NULL
||
2110 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2114 if(sourceLimit
==NULL
) {
2115 /* get limit of single-byte-NUL-terminated source string */
2116 sourceLimit
=uprv_strchr(*source
, 0);
2120 ucnv_resetToUnicode(sourceCnv
);
2121 ucnv_resetFromUnicode(targetCnv
);
2122 *pivotSource
=*pivotTarget
=pivotStart
;
2123 } else if(targetCnv
->charErrorBufferLength
>0) {
2124 /* output the targetCnv overflow buffer */
2125 if(ucnv_outputOverflowFromUnicode(targetCnv
, target
, targetLimit
, NULL
, pErrorCode
)) {
2126 /* U_BUFFER_OVERFLOW_ERROR */
2129 /* *target has moved, therefore stop using t */
2132 targetCnv
->preFromULength
>=0 && *pivotSource
==*pivotTarget
&&
2133 sourceCnv
->UCharErrorBufferLength
==0 && sourceCnv
->preToULength
>=0 && s
==sourceLimit
2135 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2140 /* Is direct-UTF-8 conversion available? */
2141 if( sourceCnv
->sharedData
->staticData
->conversionType
==UCNV_UTF8
&&
2142 targetCnv
->sharedData
->impl
->fromUTF8
!=NULL
2144 convert
=targetCnv
->sharedData
->impl
->fromUTF8
;
2145 } else if( targetCnv
->sharedData
->staticData
->conversionType
==UCNV_UTF8
&&
2146 sourceCnv
->sharedData
->impl
->toUTF8
!=NULL
2148 convert
=sourceCnv
->sharedData
->impl
->toUTF8
;
2154 * If direct-UTF-8 conversion is available, then we use a smaller
2155 * pivot buffer for error handling and partial matches
2156 * so that we quickly return to direct conversion.
2158 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2160 * We could reduce the pivot buffer size further, at the cost of
2161 * buffer overflows from callbacks.
2162 * The pivot buffer should not be smaller than the maximum number of
2163 * fromUnicode extension table input UChars
2164 * (for m:n conversion, see
2165 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2166 * or 2 for surrogate pairs.
2168 * Too small a buffer can cause thrashing between pivoting and direct
2169 * conversion, with function call overhead outweighing the benefits
2170 * of direct conversion.
2172 if(convert
!=NULL
&& (pivotLimit
-pivotStart
)>32) {
2173 pivotLimit
=pivotStart
+32;
2176 /* prepare the converter arguments */
2177 fromUArgs
.converter
=targetCnv
;
2178 fromUArgs
.flush
=FALSE
;
2179 fromUArgs
.offsets
=NULL
;
2180 fromUArgs
.target
=*target
;
2181 fromUArgs
.targetLimit
=targetLimit
;
2182 fromUArgs
.size
=sizeof(fromUArgs
);
2184 toUArgs
.converter
=sourceCnv
;
2185 toUArgs
.flush
=flush
;
2186 toUArgs
.offsets
=NULL
;
2188 toUArgs
.sourceLimit
=sourceLimit
;
2189 toUArgs
.targetLimit
=pivotLimit
;
2190 toUArgs
.size
=sizeof(toUArgs
);
2193 * TODO: Consider separating this function into two functions,
2194 * extracting exactly the conversion loop,
2195 * for readability and to reduce the set of visible variables.
2197 * Otherwise stop using s and t from here on.
2204 * The sequence of steps in the loop may appear backward,
2205 * but the principle is simple:
2207 * source - sourceCnv overflow - pivot - targetCnv overflow - target
2208 * empty out later buffers before refilling them from earlier ones.
2210 * The targetCnv overflow buffer is flushed out only once before the loop.
2214 * if(pivot not empty or error or replay or flush fromUnicode) {
2215 * fromUnicode(pivot -> target);
2218 * For pivoting conversion; and for direct conversion for
2219 * error callback handling and flushing the replay buffer.
2221 if( *pivotSource
<*pivotTarget
||
2222 U_FAILURE(*pErrorCode
) ||
2223 targetCnv
->preFromULength
<0 ||
2226 fromUArgs
.source
=*pivotSource
;
2227 fromUArgs
.sourceLimit
=*pivotTarget
;
2228 _fromUnicodeWithCallback(&fromUArgs
, pErrorCode
);
2229 if(U_FAILURE(*pErrorCode
)) {
2230 /* target overflow, or conversion error */
2231 *pivotSource
=(UChar
*)fromUArgs
.source
;
2236 * _fromUnicodeWithCallback() must have consumed the pivot contents
2237 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
2241 /* The pivot buffer is empty; reset it so we start at pivotStart. */
2242 *pivotSource
=*pivotTarget
=pivotStart
;
2245 * if(sourceCnv overflow buffer not empty) {
2246 * move(sourceCnv overflow buffer -> pivot);
2250 /* output the sourceCnv overflow buffer */
2251 if(sourceCnv
->UCharErrorBufferLength
>0) {
2252 if(ucnv_outputOverflowToUnicode(sourceCnv
, pivotTarget
, pivotLimit
, NULL
, pErrorCode
)) {
2253 /* U_BUFFER_OVERFLOW_ERROR */
2254 *pErrorCode
=U_ZERO_ERROR
;
2260 * check for end of input and break if done
2262 * Checking both flush and fromUArgs.flush ensures that the converters
2263 * have been called with the flush flag set if the ucnv_convertEx()
2266 if( toUArgs
.source
==sourceLimit
&&
2267 sourceCnv
->preToULength
>=0 && sourceCnv
->toULength
==0 &&
2268 (!flush
|| fromUArgs
.flush
)
2270 /* done successfully */
2275 * use direct conversion if available
2276 * but not if continuing a partial match
2277 * or flushing the toUnicode replay buffer
2279 if(convert
!=NULL
&& targetCnv
->preFromUFirstCP
<0 && sourceCnv
->preToULength
==0) {
2280 if(*pErrorCode
==U_USING_DEFAULT_WARNING
) {
2281 /* remove a warning that may be set by this function */
2282 *pErrorCode
=U_ZERO_ERROR
;
2284 convert(&fromUArgs
, &toUArgs
, pErrorCode
);
2285 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2287 } else if(U_FAILURE(*pErrorCode
)) {
2288 if(sourceCnv
->toULength
>0) {
2290 * Fall through to calling _toUnicodeWithCallback()
2291 * for callback handling.
2293 * The pivot buffer will be reset with
2294 * *pivotSource=*pivotTarget=pivotStart;
2295 * which indicates a toUnicode error to the caller
2296 * (*pivotSource==pivotStart shows no pivot UChars consumed).
2300 * Indicate a fromUnicode error to the caller
2301 * (*pivotSource>pivotStart shows some pivot UChars consumed).
2303 *pivotSource
=*pivotTarget
=pivotStart
+1;
2305 * Loop around to calling _fromUnicodeWithCallbacks()
2306 * for callback handling.
2310 } else if(*pErrorCode
==U_USING_DEFAULT_WARNING
) {
2312 * No error, but the implementation requested to temporarily
2313 * fall back to pivoting.
2315 *pErrorCode
=U_ZERO_ERROR
;
2317 * The following else branches are almost identical to the end-of-input
2318 * handling in _toUnicodeWithCallback().
2319 * Avoid calling it just for the end of input.
2321 } else if(flush
&& sourceCnv
->toULength
>0) { /* flush==toUArgs.flush */
2323 * the entire input stream is consumed
2324 * and there is a partial, truncated input sequence left
2327 /* inject an error and continue with callback handling */
2328 *pErrorCode
=U_TRUNCATED_CHAR_FOUND
;
2330 /* input consumed */
2332 /* reset the converters without calling the callback functions */
2333 _reset(sourceCnv
, UCNV_RESET_TO_UNICODE
, FALSE
);
2334 _reset(targetCnv
, UCNV_RESET_FROM_UNICODE
, FALSE
);
2337 /* done successfully */
2343 * toUnicode(source -> pivot);
2345 * For pivoting conversion; and for direct conversion for
2346 * error callback handling, continuing partial matches
2347 * and flushing the replay buffer.
2349 * The pivot buffer is empty and reset.
2351 toUArgs
.target
=pivotStart
; /* ==*pivotTarget */
2352 /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2353 _toUnicodeWithCallback(&toUArgs
, pErrorCode
);
2354 *pivotTarget
=toUArgs
.target
;
2355 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
2356 /* pivot overflow: continue with the conversion loop */
2357 *pErrorCode
=U_ZERO_ERROR
;
2358 } else if(U_FAILURE(*pErrorCode
) || (!flush
&& *pivotTarget
==pivotStart
)) {
2359 /* conversion error, or there was nothing left to convert */
2364 * _toUnicodeWithCallback() wrote into the pivot buffer,
2365 * continue with fromUnicode conversion.
2367 * Set the fromUnicode flush flag if we flush and if toUnicode has
2368 * processed the end of the input.
2370 if( flush
&& toUArgs
.source
==sourceLimit
&&
2371 sourceCnv
->preToULength
>=0 &&
2372 sourceCnv
->UCharErrorBufferLength
==0
2374 fromUArgs
.flush
=TRUE
;
2379 * The conversion loop is exited when one of the following is true:
2380 * - the entire source text has been converted successfully to the target buffer
2381 * - a target buffer overflow occurred
2382 * - a conversion error occurred
2385 *source
=toUArgs
.source
;
2386 *target
=fromUArgs
.target
;
2388 /* terminate the target buffer if possible */
2389 if(flush
&& U_SUCCESS(*pErrorCode
)) {
2390 if(*target
!=targetLimit
) {
2392 if(*pErrorCode
==U_STRING_NOT_TERMINATED_WARNING
) {
2393 *pErrorCode
=U_ZERO_ERROR
;
2396 *pErrorCode
=U_STRING_NOT_TERMINATED_WARNING
;
2401 /* internal implementation of ucnv_convert() etc. with preflighting */
2403 ucnv_internalConvert(UConverter
*outConverter
, UConverter
*inConverter
,
2404 char *target
, int32_t targetCapacity
,
2405 const char *source
, int32_t sourceLength
,
2406 UErrorCode
*pErrorCode
) {
2407 UChar pivotBuffer
[CHUNK_SIZE
];
2408 UChar
*pivot
, *pivot2
;
2411 const char *sourceLimit
;
2412 const char *targetLimit
;
2413 int32_t targetLength
=0;
2416 if(sourceLength
<0) {
2417 sourceLimit
=uprv_strchr(source
, 0);
2419 sourceLimit
=source
+sourceLength
;
2422 /* if there is no input data, we're done */
2423 if(source
==sourceLimit
) {
2424 return u_terminateChars(target
, targetCapacity
, 0, pErrorCode
);
2427 pivot
=pivot2
=pivotBuffer
;
2431 if(targetCapacity
>0) {
2432 /* perform real conversion */
2433 targetLimit
=target
+targetCapacity
;
2434 ucnv_convertEx(outConverter
, inConverter
,
2435 &myTarget
, targetLimit
,
2436 &source
, sourceLimit
,
2437 pivotBuffer
, &pivot
, &pivot2
, pivotBuffer
+CHUNK_SIZE
,
2441 targetLength
=(int32_t)(myTarget
-target
);
2445 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2446 * to it but continue the conversion in order to store in targetCapacity
2447 * the number of bytes that was required.
2449 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
|| targetCapacity
==0)
2451 char targetBuffer
[CHUNK_SIZE
];
2453 targetLimit
=targetBuffer
+CHUNK_SIZE
;
2455 *pErrorCode
=U_ZERO_ERROR
;
2456 myTarget
=targetBuffer
;
2457 ucnv_convertEx(outConverter
, inConverter
,
2458 &myTarget
, targetLimit
,
2459 &source
, sourceLimit
,
2460 pivotBuffer
, &pivot
, &pivot2
, pivotBuffer
+CHUNK_SIZE
,
2464 targetLength
+=(int32_t)(myTarget
-targetBuffer
);
2465 } while(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
);
2467 /* done with preflighting, set warnings and errors as appropriate */
2468 return u_terminateChars(target
, targetCapacity
, targetLength
, pErrorCode
);
2471 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2472 return targetLength
;
2475 U_CAPI
int32_t U_EXPORT2
2476 ucnv_convert(const char *toConverterName
, const char *fromConverterName
,
2477 char *target
, int32_t targetCapacity
,
2478 const char *source
, int32_t sourceLength
,
2479 UErrorCode
*pErrorCode
) {
2480 UConverter in
, out
; /* stack-allocated */
2481 UConverter
*inConverter
, *outConverter
;
2482 int32_t targetLength
;
2484 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2488 if( source
==NULL
|| sourceLength
<-1 ||
2489 targetCapacity
<0 || (targetCapacity
>0 && target
==NULL
)
2491 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2495 /* if there is no input data, we're done */
2496 if(sourceLength
==0 || (sourceLength
<0 && *source
==0)) {
2497 return u_terminateChars(target
, targetCapacity
, 0, pErrorCode
);
2500 /* create the converters */
2501 inConverter
=ucnv_createConverter(&in
, fromConverterName
, pErrorCode
);
2502 if(U_FAILURE(*pErrorCode
)) {
2506 outConverter
=ucnv_createConverter(&out
, toConverterName
, pErrorCode
);
2507 if(U_FAILURE(*pErrorCode
)) {
2508 ucnv_close(inConverter
);
2512 targetLength
=ucnv_internalConvert(outConverter
, inConverter
,
2513 target
, targetCapacity
,
2514 source
, sourceLength
,
2517 ucnv_close(inConverter
);
2518 ucnv_close(outConverter
);
2520 return targetLength
;
2525 ucnv_convertAlgorithmic(UBool convertToAlgorithmic
,
2526 UConverterType algorithmicType
,
2528 char *target
, int32_t targetCapacity
,
2529 const char *source
, int32_t sourceLength
,
2530 UErrorCode
*pErrorCode
) {
2531 UConverter algoConverterStatic
; /* stack-allocated */
2532 UConverter
*algoConverter
, *to
, *from
;
2533 int32_t targetLength
;
2535 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
2539 if( cnv
==NULL
|| source
==NULL
|| sourceLength
<-1 ||
2540 targetCapacity
<0 || (targetCapacity
>0 && target
==NULL
)
2542 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
2546 /* if there is no input data, we're done */
2547 if(sourceLength
==0 || (sourceLength
<0 && *source
==0)) {
2548 return u_terminateChars(target
, targetCapacity
, 0, pErrorCode
);
2551 /* create the algorithmic converter */
2552 algoConverter
=ucnv_createAlgorithmicConverter(&algoConverterStatic
, algorithmicType
,
2554 if(U_FAILURE(*pErrorCode
)) {
2558 /* reset the other converter */
2559 if(convertToAlgorithmic
) {
2560 /* cnv->Unicode->algo */
2561 ucnv_resetToUnicode(cnv
);
2565 /* algo->Unicode->cnv */
2566 ucnv_resetFromUnicode(cnv
);
2571 targetLength
=ucnv_internalConvert(to
, from
,
2572 target
, targetCapacity
,
2573 source
, sourceLength
,
2576 ucnv_close(algoConverter
);
2578 return targetLength
;
2581 U_CAPI
int32_t U_EXPORT2
2582 ucnv_toAlgorithmic(UConverterType algorithmicType
,
2584 char *target
, int32_t targetCapacity
,
2585 const char *source
, int32_t sourceLength
,
2586 UErrorCode
*pErrorCode
) {
2587 return ucnv_convertAlgorithmic(TRUE
, algorithmicType
, cnv
,
2588 target
, targetCapacity
,
2589 source
, sourceLength
,
2593 U_CAPI
int32_t U_EXPORT2
2594 ucnv_fromAlgorithmic(UConverter
*cnv
,
2595 UConverterType algorithmicType
,
2596 char *target
, int32_t targetCapacity
,
2597 const char *source
, int32_t sourceLength
,
2598 UErrorCode
*pErrorCode
) {
2599 return ucnv_convertAlgorithmic(FALSE
, algorithmicType
, cnv
,
2600 target
, targetCapacity
,
2601 source
, sourceLength
,
2605 U_CAPI UConverterType U_EXPORT2
2606 ucnv_getType(const UConverter
* converter
)
2608 int8_t type
= converter
->sharedData
->staticData
->conversionType
;
2609 #if !UCONFIG_NO_LEGACY_CONVERSION
2610 if(type
== UCNV_MBCS
) {
2611 return ucnv_MBCSGetType(converter
);
2614 return (UConverterType
)type
;
2617 U_CAPI
void U_EXPORT2
2618 ucnv_getStarters(const UConverter
* converter
,
2619 UBool starters
[256],
2622 if (err
== NULL
|| U_FAILURE(*err
)) {
2626 if(converter
->sharedData
->impl
->getStarters
!= NULL
) {
2627 converter
->sharedData
->impl
->getStarters(converter
, starters
, err
);
2629 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
2633 static const UAmbiguousConverter
*ucnv_getAmbiguous(const UConverter
*cnv
)
2635 UErrorCode errorCode
;
2643 errorCode
=U_ZERO_ERROR
;
2644 name
=ucnv_getName(cnv
, &errorCode
);
2645 if(U_FAILURE(errorCode
)) {
2649 for(i
=0; i
<UPRV_LENGTHOF(ambiguousConverters
); ++i
)
2651 if(0==uprv_strcmp(name
, ambiguousConverters
[i
].name
))
2653 return ambiguousConverters
+i
;
2660 U_CAPI
void U_EXPORT2
2661 ucnv_fixFileSeparator(const UConverter
*cnv
,
2663 int32_t sourceLength
) {
2664 const UAmbiguousConverter
*a
;
2668 if(cnv
==NULL
|| source
==NULL
|| sourceLength
<=0 || (a
=ucnv_getAmbiguous(cnv
))==NULL
)
2673 variant5c
=a
->variant5c
;
2674 for(i
=0; i
<sourceLength
; ++i
) {
2675 if(source
[i
]==variant5c
) {
2681 U_CAPI UBool U_EXPORT2
2682 ucnv_isAmbiguous(const UConverter
*cnv
) {
2683 return (UBool
)(ucnv_getAmbiguous(cnv
)!=NULL
);
2686 U_CAPI
void U_EXPORT2
2687 ucnv_setFallback(UConverter
*cnv
, UBool usesFallback
)
2689 cnv
->useFallback
= usesFallback
;
2692 U_CAPI UBool U_EXPORT2
2693 ucnv_usesFallback(const UConverter
*cnv
)
2695 return cnv
->useFallback
;
2698 U_CAPI
void U_EXPORT2
2699 ucnv_getInvalidChars (const UConverter
* converter
,
2704 if (err
== NULL
|| U_FAILURE(*err
))
2708 if (len
== NULL
|| errBytes
== NULL
|| converter
== NULL
)
2710 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
2713 if (*len
< converter
->invalidCharLength
)
2715 *err
= U_INDEX_OUTOFBOUNDS_ERROR
;
2718 if ((*len
= converter
->invalidCharLength
) > 0)
2720 uprv_memcpy (errBytes
, converter
->invalidCharBuffer
, *len
);
2724 U_CAPI
void U_EXPORT2
2725 ucnv_getInvalidUChars (const UConverter
* converter
,
2730 if (err
== NULL
|| U_FAILURE(*err
))
2734 if (len
== NULL
|| errChars
== NULL
|| converter
== NULL
)
2736 *err
= U_ILLEGAL_ARGUMENT_ERROR
;
2739 if (*len
< converter
->invalidUCharLength
)
2741 *err
= U_INDEX_OUTOFBOUNDS_ERROR
;
2744 if ((*len
= converter
->invalidUCharLength
) > 0)
2746 uprv_memcpy (errChars
, converter
->invalidUCharBuffer
, sizeof(UChar
) * (*len
));
2750 #define SIG_MAX_LEN 5
2752 U_CAPI
const char* U_EXPORT2
2753 ucnv_detectUnicodeSignature( const char* source
,
2754 int32_t sourceLength
,
2755 int32_t* signatureLength
,
2756 UErrorCode
* pErrorCode
) {
2759 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2760 * bytes we don't misdetect something
2762 char start
[SIG_MAX_LEN
]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2765 if((pErrorCode
==NULL
) || U_FAILURE(*pErrorCode
)){
2769 if(source
== NULL
|| sourceLength
< -1){
2770 *pErrorCode
= U_ILLEGAL_ARGUMENT_ERROR
;
2774 if(signatureLength
== NULL
) {
2775 signatureLength
= &dummy
;
2778 if(sourceLength
==-1){
2779 sourceLength
=(int32_t)uprv_strlen(source
);
2783 while(i
<sourceLength
&& i
<SIG_MAX_LEN
){
2788 if(start
[0] == '\xFE' && start
[1] == '\xFF') {
2791 } else if(start
[0] == '\xFF' && start
[1] == '\xFE') {
2792 if(start
[2] == '\x00' && start
[3] =='\x00') {
2799 } else if(start
[0] == '\xEF' && start
[1] == '\xBB' && start
[2] == '\xBF') {
2802 } else if(start
[0] == '\x00' && start
[1] == '\x00' &&
2803 start
[2] == '\xFE' && start
[3]=='\xFF') {
2806 } else if(start
[0] == '\x0E' && start
[1] == '\xFE' && start
[2] == '\xFF') {
2809 } else if(start
[0] == '\xFB' && start
[1] == '\xEE' && start
[2] == '\x28') {
2812 } else if(start
[0] == '\x2B' && start
[1] == '\x2F' && start
[2] == '\x76') {
2814 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2815 * depending on the second UTF-16 code unit.
2816 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2819 * So far we have +/v
2821 if(start
[3] == '\x38' && start
[4] == '\x2D') {
2825 } else if(start
[3] == '\x38' || start
[3] == '\x39' || start
[3] == '\x2B' || start
[3] == '\x2F') {
2826 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2830 }else if(start
[0]=='\xDD' && start
[1]== '\x73'&& start
[2]=='\x66' && start
[3]=='\x73'){
2832 return "UTF-EBCDIC";
2836 /* no known Unicode signature byte sequence recognized */
2841 U_CAPI
int32_t U_EXPORT2
2842 ucnv_fromUCountPending(const UConverter
* cnv
, UErrorCode
* status
)
2844 if(status
== NULL
|| U_FAILURE(*status
)){
2848 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2852 if(cnv
->preFromUFirstCP
>= 0){
2853 return U16_LENGTH(cnv
->preFromUFirstCP
)+cnv
->preFromULength
;
2854 }else if(cnv
->preFromULength
< 0){
2855 return -cnv
->preFromULength
;
2856 }else if(cnv
->fromUChar32
> 0){
2863 U_CAPI
int32_t U_EXPORT2
2864 ucnv_toUCountPending(const UConverter
* cnv
, UErrorCode
* status
){
2866 if(status
== NULL
|| U_FAILURE(*status
)){
2870 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2874 if(cnv
->preToULength
> 0){
2875 return cnv
->preToULength
;
2876 }else if(cnv
->preToULength
< 0){
2877 return -cnv
->preToULength
;
2878 }else if(cnv
->toULength
> 0){
2879 return cnv
->toULength
;
2884 U_CAPI UBool U_EXPORT2
2885 ucnv_isFixedWidth(UConverter
*cnv
, UErrorCode
*status
){
2886 if (U_FAILURE(*status
)) {
2891 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
2895 switch (ucnv_getType(cnv
)) {
2898 case UCNV_UTF32_BigEndian
:
2899 case UCNV_UTF32_LittleEndian
:
2910 * Hey, Emacs, please set the following:
2913 * indent-tabs-mode: nil