]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnv.c
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / common / ucnv.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
d5d484b0 4* Copyright (C) 1998-2006,2008 International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8*
9* ucnv.c:
10* Implements APIs for the ICU's codeset conversion library;
11* mostly calls through internal functions;
12* created by Bertrand A. Damiba
13*
14* Modification History:
15*
16* Date Name Description
17* 04/04/99 helena Fixed internal header inclusion.
18* 05/09/00 helena Added implementation to handle fallback mappings.
19* 06/20/2000 helena OS/400 port changes; mostly typecast.
20*/
21
22#include "unicode/utypes.h"
374ca955
A
23
24#if !UCONFIG_NO_CONVERSION
25
b75a7d8f 26#include "unicode/ustring.h"
b75a7d8f
A
27#include "unicode/ucnv.h"
28#include "unicode/ucnv_err.h"
29#include "unicode/uset.h"
374ca955 30#include "putilimp.h"
b75a7d8f
A
31#include "cmemory.h"
32#include "cstring.h"
374ca955
A
33#include "uassert.h"
34#include "utracimp.h"
b75a7d8f
A
35#include "ustr_imp.h"
36#include "ucnv_imp.h"
b75a7d8f
A
37#include "ucnv_cnv.h"
38#include "ucnv_bld.h"
39
b75a7d8f
A
40/* size of intermediate and preflighting buffers in ucnv_convert() */
41#define CHUNK_SIZE 1024
42
43typedef struct UAmbiguousConverter {
44 const char *name;
45 const UChar variant5c;
46} UAmbiguousConverter;
47
48static const UAmbiguousConverter ambiguousConverters[]={
49 { "ibm-942_P120-1999", 0xa5 },
50 { "ibm-943_P130-1999", 0xa5 },
374ca955 51 { "ibm-897_P100-1995", 0xa5 },
b75a7d8f
A
52 { "ibm-33722_P120-1999", 0xa5 },
53 { "ibm-949_P110-1999", 0x20a9 },
54 { "ibm-1363_P110-1997", 0x20a9 },
55 { "ISO_2022,locale=ko,version=0", 0x20a9 }
56};
57
b75a7d8f
A
58/*Calls through createConverter */
59U_CAPI UConverter* U_EXPORT2
60ucnv_open (const char *name,
61 UErrorCode * err)
62{
63 UConverter *r;
64
65 if (err == NULL || U_FAILURE (*err)) {
b75a7d8f
A
66 return NULL;
67 }
68
69 r = ucnv_createConverter(NULL, name, err);
b75a7d8f
A
70 return r;
71}
72
73U_CAPI UConverter* U_EXPORT2
74ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
75{
76 return ucnv_createConverterFromPackage(packageName, converterName, err);
77}
78
79/*Extracts the UChar* to a char* and calls through createConverter */
80U_CAPI UConverter* U_EXPORT2
81ucnv_openU (const UChar * name,
82 UErrorCode * err)
83{
84 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
85
86 if (err == NULL || U_FAILURE(*err))
87 return NULL;
88 if (name == NULL)
89 return ucnv_open (NULL, err);
90 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
91 {
92 *err = U_ILLEGAL_ARGUMENT_ERROR;
93 return NULL;
94 }
95 return ucnv_open(u_austrcpy(asciiName, name), err);
96}
97
73c04bcf
A
98/* Copy the string that is represented by the UConverterPlatform enum
99 * @param platformString An output buffer
100 * @param platform An enum representing a platform
101 * @return the length of the copied string.
102 */
103static int32_t
104ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
105{
106 switch (pltfrm)
107 {
108 case UCNV_IBM:
109 uprv_strcpy(platformString, "ibm-");
110 return 4;
111 case UCNV_UNKNOWN:
112 break;
113 }
114
115 /* default to empty string */
116 *platformString = 0;
117 return 0;
118}
119
b75a7d8f
A
120/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
121 *through createConverter*/
122U_CAPI UConverter* U_EXPORT2
123ucnv_openCCSID (int32_t codepage,
124 UConverterPlatform platform,
125 UErrorCode * err)
126{
127 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
128 int32_t myNameLen;
129
130 if (err == NULL || U_FAILURE (*err))
131 return NULL;
132
133 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
134 myNameLen = ucnv_copyPlatformString(myName, platform);
135 T_CString_integerToString(myName + myNameLen, codepage, 10);
136
137 return ucnv_createConverter(NULL, myName, err);
138}
139
140/* Creating a temporary stack-based object that can be used in one thread,
141and created from a converter that is shared across threads.
142*/
143
144U_CAPI UConverter* U_EXPORT2
145ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
146{
147 UConverter *localConverter, *allocatedConverter;
148 int32_t bufferSizeNeeded;
149 char *stackBufferChars = (char *)stackBuffer;
150 UErrorCode cbErr;
151 UConverterToUnicodeArgs toUArgs = {
152 sizeof(UConverterToUnicodeArgs),
153 TRUE,
154 NULL,
155 NULL,
156 NULL,
157 NULL,
158 NULL,
159 NULL
160 };
161 UConverterFromUnicodeArgs fromUArgs = {
162 sizeof(UConverterFromUnicodeArgs),
163 TRUE,
164 NULL,
165 NULL,
166 NULL,
167 NULL,
168 NULL,
169 NULL
170 };
171
374ca955
A
172 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
173
b75a7d8f 174 if (status == NULL || U_FAILURE(*status)){
374ca955 175 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
b75a7d8f
A
176 return 0;
177 }
178
179 if (!pBufferSize || !cnv){
374ca955
A
180 *status = U_ILLEGAL_ARGUMENT_ERROR;
181 UTRACE_EXIT_STATUS(*status);
b75a7d8f
A
182 return 0;
183 }
184
374ca955
A
185 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
186 ucnv_getName(cnv, status), cnv, stackBuffer);
b75a7d8f 187
b75a7d8f
A
188 if (cnv->sharedData->impl->safeClone != NULL) {
189 /* call the custom safeClone function for sizing */
190 bufferSizeNeeded = 0;
374ca955 191 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
b75a7d8f
A
192 }
193 else
194 {
195 /* inherent sizing */
196 bufferSizeNeeded = sizeof(UConverter);
197 }
198
199 if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
200 *pBufferSize = bufferSizeNeeded;
374ca955 201 UTRACE_EXIT_VALUE(bufferSizeNeeded);
b75a7d8f
A
202 return 0;
203 }
204
205
374ca955
A
206 /* Pointers on 64-bit platforms need to be aligned
207 * on a 64-bit boundary in memory.
208 */
209 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
210 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
211 if(*pBufferSize > offsetUp) {
212 *pBufferSize -= offsetUp;
213 stackBufferChars += offsetUp;
214 } else {
215 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
216 *pBufferSize = 1;
217 }
218 }
219
220 stackBuffer = (void *)stackBufferChars;
221
b75a7d8f
A
222 /* Now, see if we must allocate any memory */
223 if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
224 {
225 /* allocate one here...*/
226 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
227
228 if(localConverter == NULL) {
229 *status = U_MEMORY_ALLOCATION_ERROR;
374ca955 230 UTRACE_EXIT_STATUS(*status);
b75a7d8f
A
231 return NULL;
232 }
233
234 if (U_SUCCESS(*status)) {
235 *status = U_SAFECLONE_ALLOCATED_WARNING;
236 }
237
238 /* record the fact that memory was allocated */
239 *pBufferSize = bufferSizeNeeded;
240 } else {
241 /* just use the stack buffer */
242 localConverter = (UConverter*) stackBuffer;
243 allocatedConverter = NULL;
244 }
245
374ca955
A
246 uprv_memset(localConverter, 0, bufferSizeNeeded);
247
b75a7d8f
A
248 /* Copy initial state */
249 uprv_memcpy(localConverter, cnv, sizeof(UConverter));
250 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
251
73c04bcf
A
252 /* copy the substitution string */
253 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
254 localConverter->subChars = (uint8_t *)localConverter->subUChars;
255 } else {
256 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
257 if (localConverter->subChars == NULL) {
258 uprv_free(allocatedConverter);
259 UTRACE_EXIT_STATUS(*status);
260 return NULL;
261 }
262 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
263 }
264
b75a7d8f
A
265 /* now either call the safeclone fcn or not */
266 if (cnv->sharedData->impl->safeClone != NULL) {
267 /* call the custom safeClone function */
268 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
269 }
270
271 if(localConverter==NULL || U_FAILURE(*status)) {
73c04bcf
A
272 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
273 uprv_free(allocatedConverter->subChars);
274 }
b75a7d8f 275 uprv_free(allocatedConverter);
374ca955 276 UTRACE_EXIT_STATUS(*status);
b75a7d8f
A
277 return NULL;
278 }
279
280 /* increment refcount of shared data if needed */
281 /*
282 Checking whether it's an algorithic converter is okay
283 in multithreaded applications because the value never changes.
284 Don't check referenceCounter for any other value.
285 */
286 if (cnv->sharedData->referenceCounter != ~0) {
287 ucnv_incrementRefCount(cnv->sharedData);
288 }
289
290 if(localConverter == (UConverter*)stackBuffer) {
291 /* we're using user provided data - set to not destroy */
292 localConverter->isCopyLocal = TRUE;
b75a7d8f
A
293 }
294
b75a7d8f
A
295 /* allow callback functions to handle any memory allocation */
296 toUArgs.converter = fromUArgs.converter = localConverter;
297 cbErr = U_ZERO_ERROR;
298 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
299 cbErr = U_ZERO_ERROR;
300 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
301
374ca955 302 UTRACE_EXIT_PTR_STATUS(localConverter, *status);
b75a7d8f
A
303 return localConverter;
304}
305
306
307
308/*Decreases the reference counter in the shared immutable section of the object
309 *and frees the mutable part*/
310
311U_CAPI void U_EXPORT2
312ucnv_close (UConverter * converter)
313{
b75a7d8f
A
314 UErrorCode errorCode = U_ZERO_ERROR;
315
374ca955
A
316 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
317
b75a7d8f
A
318 if (converter == NULL)
319 {
374ca955 320 UTRACE_EXIT();
b75a7d8f
A
321 return;
322 }
323
374ca955
A
324 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
325 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
b75a7d8f 326
73c04bcf
A
327 /* In order to speed up the close, only call the callbacks when they have been changed.
328 This performance check will only work when the callbacks are set within a shared library
329 or from user code that statically links this code. */
330 /* first, notify the callback functions that the converter is closed */
331 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
332 UConverterToUnicodeArgs toUArgs = {
333 sizeof(UConverterToUnicodeArgs),
334 TRUE,
335 NULL,
336 NULL,
337 NULL,
338 NULL,
339 NULL,
340 NULL
341 };
b75a7d8f 342
73c04bcf
A
343 toUArgs.converter = converter;
344 errorCode = U_ZERO_ERROR;
345 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
346 }
347 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
348 UConverterFromUnicodeArgs fromUArgs = {
349 sizeof(UConverterFromUnicodeArgs),
350 TRUE,
351 NULL,
352 NULL,
353 NULL,
354 NULL,
355 NULL,
356 NULL
357 };
358 fromUArgs.converter = converter;
359 errorCode = U_ZERO_ERROR;
360 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
361 }
b75a7d8f 362
b75a7d8f
A
363 if (converter->sharedData->impl->close != NULL) {
364 converter->sharedData->impl->close(converter);
365 }
366
73c04bcf
A
367 if (converter->subChars != (uint8_t *)converter->subUChars) {
368 uprv_free(converter->subChars);
369 }
370
b75a7d8f
A
371 /*
372 Checking whether it's an algorithic converter is okay
373 in multithreaded applications because the value never changes.
374 Don't check referenceCounter for any other value.
375 */
376 if (converter->sharedData->referenceCounter != ~0) {
377 ucnv_unloadSharedDataIfReady(converter->sharedData);
378 }
379
380 if(!converter->isCopyLocal){
73c04bcf 381 uprv_free(converter);
b75a7d8f 382 }
374ca955
A
383
384 UTRACE_EXIT();
b75a7d8f
A
385}
386
387/*returns a single Name from the list, will return NULL if out of bounds
388 */
389U_CAPI const char* U_EXPORT2
390ucnv_getAvailableName (int32_t n)
391{
73c04bcf
A
392 if (0 <= n && n <= 0xffff) {
393 UErrorCode err = U_ZERO_ERROR;
394 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
395 if (U_SUCCESS(err)) {
396 return name;
397 }
b75a7d8f 398 }
73c04bcf 399 return NULL;
b75a7d8f
A
400}
401
402U_CAPI int32_t U_EXPORT2
403ucnv_countAvailable ()
404{
405 UErrorCode err = U_ZERO_ERROR;
73c04bcf 406 return ucnv_bld_countAvailableConverters(&err);
b75a7d8f
A
407}
408
409U_CAPI void U_EXPORT2
410ucnv_getSubstChars (const UConverter * converter,
411 char *mySubChar,
412 int8_t * len,
413 UErrorCode * err)
414{
415 if (U_FAILURE (*err))
416 return;
417
73c04bcf
A
418 if (converter->subCharLen <= 0) {
419 /* Unicode string or empty string from ucnv_setSubstString(). */
420 *len = 0;
421 return;
422 }
423
b75a7d8f
A
424 if (*len < converter->subCharLen) /*not enough space in subChars */
425 {
426 *err = U_INDEX_OUTOFBOUNDS_ERROR;
427 return;
428 }
429
73c04bcf 430 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
b75a7d8f
A
431 *len = converter->subCharLen; /*store # of bytes copied to buffer */
432}
433
434U_CAPI void U_EXPORT2
435ucnv_setSubstChars (UConverter * converter,
436 const char *mySubChar,
437 int8_t len,
438 UErrorCode * err)
439{
440 if (U_FAILURE (*err))
441 return;
442
443 /*Makes sure that the subChar is within the codepages char length boundaries */
444 if ((len > converter->sharedData->staticData->maxBytesPerChar)
445 || (len < converter->sharedData->staticData->minBytesPerChar))
446 {
447 *err = U_ILLEGAL_ARGUMENT_ERROR;
448 return;
449 }
450
73c04bcf 451 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
b75a7d8f
A
452 converter->subCharLen = len; /*sets the new len */
453
454 /*
455 * There is currently (2001Feb) no separate API to set/get subChar1.
456 * In order to always have subChar written after it is explicitly set,
457 * we set subChar1 to 0.
458 */
459 converter->subChar1 = 0;
460
461 return;
462}
463
73c04bcf
A
464U_DRAFT void U_EXPORT2
465ucnv_setSubstString(UConverter *cnv,
466 const UChar *s,
467 int32_t length,
468 UErrorCode *err) {
469 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
470 char chars[UCNV_ERROR_BUFFER_LENGTH];
471
472 UConverter *clone;
473 uint8_t *subChars;
474 int32_t cloneSize, length8;
475
476 /* Let the following functions check all arguments. */
477 cloneSize = sizeof(cloneBuffer);
478 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
479 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
480 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
481 ucnv_close(clone);
482 if (U_FAILURE(*err)) {
483 return;
484 }
485
486 if (cnv->sharedData->impl->writeSub == NULL
487#if !UCONFIG_NO_LEGACY_CONVERSION
488 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
489 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
490#endif
491 ) {
492 /* The converter is not stateful. Store the charset bytes as a fixed string. */
493 subChars = (uint8_t *)chars;
494 } else {
495 /*
496 * The converter has a non-default writeSub() function, indicating
497 * that it is stateful.
498 * Store the Unicode string for on-the-fly conversion for correct
499 * state handling.
500 */
501 if (length > UCNV_ERROR_BUFFER_LENGTH) {
502 /*
503 * Should not occur. The converter should output at least one byte
504 * per UChar, which means that ucnv_fromUChars() should catch all
505 * overflows.
506 */
507 *err = U_BUFFER_OVERFLOW_ERROR;
508 return;
509 }
510 subChars = (uint8_t *)s;
511 if (length < 0) {
512 length = u_strlen(s);
513 }
514 length8 = length * U_SIZEOF_UCHAR;
515 }
516
517 /*
518 * For storing the substitution string, select either the small buffer inside
519 * UConverter or allocate a subChars buffer.
520 */
521 if (length8 > UCNV_MAX_SUBCHAR_LEN) {
522 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
523 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
524 /* Allocate a new buffer for the string. */
525 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
526 if (cnv->subChars == NULL) {
527 cnv->subChars = (uint8_t *)cnv->subUChars;
528 *err = U_MEMORY_ALLOCATION_ERROR;
529 return;
530 }
531 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
532 }
533 }
534
535 /* Copy the substitution string into the UConverter or its subChars buffer. */
536 if (length8 == 0) {
537 cnv->subCharLen = 0;
538 } else {
539 uprv_memcpy(cnv->subChars, subChars, length8);
540 if (subChars == (uint8_t *)chars) {
541 cnv->subCharLen = (int8_t)length8;
542 } else /* subChars == s */ {
543 cnv->subCharLen = (int8_t)-length;
544 }
545 }
546
547 /* See comment in ucnv_setSubstChars(). */
548 cnv->subChar1 = 0;
549}
550
b75a7d8f
A
551/*resets the internal states of a converter
552 *goal : have the same behaviour than a freshly created converter
553 */
374ca955
A
554static void _reset(UConverter *converter, UConverterResetChoice choice,
555 UBool callCallback) {
b75a7d8f
A
556 if(converter == NULL) {
557 return;
558 }
559
374ca955
A
560 if(callCallback) {
561 /* first, notify the callback functions that the converter is reset */
562 UConverterToUnicodeArgs toUArgs = {
563 sizeof(UConverterToUnicodeArgs),
564 TRUE,
565 NULL,
566 NULL,
567 NULL,
568 NULL,
569 NULL,
570 NULL
571 };
572 UConverterFromUnicodeArgs fromUArgs = {
573 sizeof(UConverterFromUnicodeArgs),
574 TRUE,
575 NULL,
576 NULL,
577 NULL,
578 NULL,
579 NULL,
580 NULL
581 };
582 UErrorCode errorCode;
583
584 toUArgs.converter = fromUArgs.converter = converter;
585 if(choice<=UCNV_RESET_TO_UNICODE) {
586 errorCode = U_ZERO_ERROR;
587 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
588 }
589 if(choice!=UCNV_RESET_TO_UNICODE) {
590 errorCode = U_ZERO_ERROR;
591 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
592 }
b75a7d8f
A
593 }
594
595 /* now reset the converter itself */
596 if(choice<=UCNV_RESET_TO_UNICODE) {
597 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
374ca955 598 converter->mode = 0;
b75a7d8f
A
599 converter->toULength = 0;
600 converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
374ca955 601 converter->preToULength = 0;
b75a7d8f
A
602 }
603 if(choice!=UCNV_RESET_TO_UNICODE) {
604 converter->fromUnicodeStatus = 0;
374ca955 605 converter->fromUChar32 = 0;
b75a7d8f 606 converter->invalidUCharLength = converter->charErrorBufferLength = 0;
374ca955
A
607 converter->preFromUFirstCP = U_SENTINEL;
608 converter->preFromULength = 0;
b75a7d8f
A
609 }
610
611 if (converter->sharedData->impl->reset != NULL) {
612 /* call the custom reset function */
613 converter->sharedData->impl->reset(converter, choice);
b75a7d8f
A
614 }
615}
616
617U_CAPI void U_EXPORT2
618ucnv_reset(UConverter *converter)
619{
374ca955 620 _reset(converter, UCNV_RESET_BOTH, TRUE);
b75a7d8f
A
621}
622
623U_CAPI void U_EXPORT2
624ucnv_resetToUnicode(UConverter *converter)
625{
374ca955 626 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
b75a7d8f
A
627}
628
629U_CAPI void U_EXPORT2
630ucnv_resetFromUnicode(UConverter *converter)
631{
374ca955 632 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
b75a7d8f
A
633}
634
635U_CAPI int8_t U_EXPORT2
636ucnv_getMaxCharSize (const UConverter * converter)
637{
374ca955 638 return converter->maxBytesPerUChar;
b75a7d8f
A
639}
640
641
642U_CAPI int8_t U_EXPORT2
643ucnv_getMinCharSize (const UConverter * converter)
644{
645 return converter->sharedData->staticData->minBytesPerChar;
646}
647
648U_CAPI const char* U_EXPORT2
649ucnv_getName (const UConverter * converter, UErrorCode * err)
650
651{
652 if (U_FAILURE (*err))
653 return NULL;
654 if(converter->sharedData->impl->getName){
655 const char* temp= converter->sharedData->impl->getName(converter);
656 if(temp)
657 return temp;
658 }
659 return converter->sharedData->staticData->name;
660}
661
374ca955
A
662U_CAPI int32_t U_EXPORT2
663ucnv_getCCSID(const UConverter * converter,
664 UErrorCode * err)
b75a7d8f 665{
374ca955 666 int32_t ccsid;
b75a7d8f
A
667 if (U_FAILURE (*err))
668 return -1;
669
374ca955
A
670 ccsid = converter->sharedData->staticData->codepage;
671 if (ccsid == 0) {
672 /* Rare case. This is for cases like gb18030,
673 which doesn't have an IBM cannonical name, but does have an IBM alias. */
674 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
675 if (U_SUCCESS(*err) && standardName) {
676 const char *ccsidStr = uprv_strchr(standardName, '-');
677 if (ccsidStr) {
678 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
679 }
680 }
681 }
682 return ccsid;
b75a7d8f
A
683}
684
685
686U_CAPI UConverterPlatform U_EXPORT2
687ucnv_getPlatform (const UConverter * converter,
688 UErrorCode * err)
689{
690 if (U_FAILURE (*err))
691 return UCNV_UNKNOWN;
692
693 return (UConverterPlatform)converter->sharedData->staticData->platform;
694}
695
b75a7d8f
A
696U_CAPI void U_EXPORT2
697 ucnv_getToUCallBack (const UConverter * converter,
698 UConverterToUCallback *action,
699 const void **context)
700{
701 *action = converter->fromCharErrorBehaviour;
702 *context = converter->toUContext;
703}
704
705U_CAPI void U_EXPORT2
706 ucnv_getFromUCallBack (const UConverter * converter,
707 UConverterFromUCallback *action,
708 const void **context)
709{
710 *action = converter->fromUCharErrorBehaviour;
711 *context = converter->fromUContext;
712}
713
714U_CAPI void U_EXPORT2
715ucnv_setToUCallBack (UConverter * converter,
716 UConverterToUCallback newAction,
717 const void* newContext,
718 UConverterToUCallback *oldAction,
719 const void** oldContext,
720 UErrorCode * err)
721{
722 if (U_FAILURE (*err))
723 return;
724 if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
725 converter->fromCharErrorBehaviour = newAction;
726 if (oldContext) *oldContext = converter->toUContext;
727 converter->toUContext = newContext;
728}
729
730U_CAPI void U_EXPORT2
731ucnv_setFromUCallBack (UConverter * converter,
732 UConverterFromUCallback newAction,
733 const void* newContext,
734 UConverterFromUCallback *oldAction,
735 const void** oldContext,
736 UErrorCode * err)
737{
738 if (U_FAILURE (*err))
739 return;
740 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
741 converter->fromUCharErrorBehaviour = newAction;
742 if (oldContext) *oldContext = converter->fromUContext;
743 converter->fromUContext = newContext;
744}
745
374ca955
A
746static void
747_updateOffsets(int32_t *offsets, int32_t length,
748 int32_t sourceIndex, int32_t errorInputLength) {
749 int32_t *limit;
750 int32_t delta, offset;
751
752 if(sourceIndex>=0) {
753 /*
754 * adjust each offset by adding the previous sourceIndex
755 * minus the length of the input sequence that caused an
756 * error, if any
757 */
758 delta=sourceIndex-errorInputLength;
759 } else {
760 /*
761 * set each offset to -1 because this conversion function
762 * does not handle offsets
763 */
764 delta=-1;
765 }
766
767 limit=offsets+length;
768 if(delta==0) {
769 /* most common case, nothing to do */
770 } else if(delta>0) {
771 /* add the delta to each offset (but not if the offset is <0) */
772 while(offsets<limit) {
773 offset=*offsets;
774 if(offset>=0) {
775 *offsets=offset+delta;
776 }
777 ++offsets;
778 }
779 } else /* delta<0 */ {
780 /*
781 * set each offset to -1 because this conversion function
782 * does not handle offsets
783 * or the error input sequence started in a previous buffer
784 */
785 while(offsets<limit) {
786 *offsets++=-1;
787 }
788 }
789}
790
791/* ucnv_fromUnicode --------------------------------------------------------- */
792
793/*
794 * Implementation note for m:n conversions
795 *
796 * While collecting source units to find the longest match for m:n conversion,
797 * some source units may need to be stored for a partial match.
798 * When a second buffer does not yield a match on all of the previously stored
799 * source units, then they must be "replayed", i.e., fed back into the converter.
800 *
801 * The code relies on the fact that replaying will not nest -
802 * converting a replay buffer will not result in a replay.
803 * This is because a replay is necessary only after the _continuation_ of a
804 * partial match failed, but a replay buffer is converted as a whole.
805 * It may result in some of its units being stored again for a partial match,
806 * but there will not be a continuation _during_ the replay which could fail.
807 *
808 * It is conceivable that a callback function could call the converter
809 * recursively in a way that causes another replay to be stored, but that
810 * would be an error in the callback function.
811 * Such violations will cause assertion failures in a debug build,
812 * and wrong output, but they will not cause a crash.
813 */
814
815static void
816_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
817 UConverterFromUnicode fromUnicode;
818 UConverter *cnv;
819 const UChar *s;
820 char *t;
821 int32_t *offsets;
822 int32_t sourceIndex;
823 int32_t errorInputLength;
824 UBool converterSawEndOfInput, calledCallback;
825
826 /* variables for m:n conversion */
827 UChar replay[UCNV_EXT_MAX_UCHARS];
828 const UChar *realSource, *realSourceLimit;
829 int32_t realSourceIndex;
830 UBool realFlush;
831
832 cnv=pArgs->converter;
833 s=pArgs->source;
834 t=pArgs->target;
835 offsets=pArgs->offsets;
836
837 /* get the converter implementation function */
838 sourceIndex=0;
839 if(offsets==NULL) {
840 fromUnicode=cnv->sharedData->impl->fromUnicode;
841 } else {
842 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
843 if(fromUnicode==NULL) {
844 /* there is no WithOffsets implementation */
845 fromUnicode=cnv->sharedData->impl->fromUnicode;
846 /* we will write -1 for each offset */
847 sourceIndex=-1;
848 }
849 }
850
851 if(cnv->preFromULength>=0) {
852 /* normal mode */
853 realSource=NULL;
854
855 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
856 realSourceLimit=NULL;
857 realFlush=FALSE;
858 realSourceIndex=0;
859 } else {
860 /*
861 * Previous m:n conversion stored source units from a partial match
862 * and failed to consume all of them.
863 * We need to "replay" them from a temporary buffer and convert them first.
864 */
865 realSource=pArgs->source;
866 realSourceLimit=pArgs->sourceLimit;
867 realFlush=pArgs->flush;
868 realSourceIndex=sourceIndex;
869
870 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
871 pArgs->source=replay;
872 pArgs->sourceLimit=replay-cnv->preFromULength;
873 pArgs->flush=FALSE;
874 sourceIndex=-1;
875
876 cnv->preFromULength=0;
877 }
b75a7d8f
A
878
879 /*
374ca955
A
880 * loop for conversion and error handling
881 *
882 * loop {
883 * convert
884 * loop {
885 * update offsets
886 * handle end of input
887 * handle errors/call callback
888 * }
889 * }
890 */
891 for(;;) {
892 /* convert */
893 fromUnicode(pArgs, err);
894
895 /*
896 * set a flag for whether the converter
897 * successfully processed the end of the input
898 *
899 * need not check cnv->preFromULength==0 because a replay (<0) will cause
900 * s<sourceLimit before converterSawEndOfInput is checked
901 */
902 converterSawEndOfInput=
903 (UBool)(U_SUCCESS(*err) &&
904 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
905 cnv->fromUChar32==0);
906
907 /* no callback called yet for this iteration */
908 calledCallback=FALSE;
909
910 /* no sourceIndex adjustment for conversion, only for callback output */
911 errorInputLength=0;
912
913 /*
914 * loop for offsets and error handling
915 *
916 * iterates at most 3 times:
917 * 1. to clean up after the conversion function
918 * 2. after the callback
919 * 3. after the callback again if there was truncated input
920 */
921 for(;;) {
922 /* update offsets if we write any */
923 if(offsets!=NULL) {
924 int32_t length=(int32_t)(pArgs->target-t);
925 if(length>0) {
926 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
927
928 /*
929 * if a converter handles offsets and updates the offsets
930 * pointer at the end, then pArgs->offset should not change
931 * here;
932 * however, some converters do not handle offsets at all
933 * (sourceIndex<0) or may not update the offsets pointer
934 */
935 pArgs->offsets=offsets+=length;
936 }
937
938 if(sourceIndex>=0) {
939 sourceIndex+=(int32_t)(pArgs->source-s);
940 }
941 }
942
943 if(cnv->preFromULength<0) {
944 /*
945 * switch the source to new replay units (cannot occur while replaying)
946 * after offset handling and before end-of-input and callback handling
947 */
948 if(realSource==NULL) {
949 realSource=pArgs->source;
950 realSourceLimit=pArgs->sourceLimit;
951 realFlush=pArgs->flush;
952 realSourceIndex=sourceIndex;
953
954 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
955 pArgs->source=replay;
956 pArgs->sourceLimit=replay-cnv->preFromULength;
957 pArgs->flush=FALSE;
958 if((sourceIndex+=cnv->preFromULength)<0) {
959 sourceIndex=-1;
960 }
961
962 cnv->preFromULength=0;
963 } else {
964 /* see implementation note before _fromUnicodeWithCallback() */
965 U_ASSERT(realSource==NULL);
966 *err=U_INTERNAL_PROGRAM_ERROR;
967 }
968 }
969
970 /* update pointers */
971 s=pArgs->source;
972 t=pArgs->target;
973
974 if(U_SUCCESS(*err)) {
975 if(s<pArgs->sourceLimit) {
976 /*
977 * continue with the conversion loop while there is still input left
978 * (continue converting by breaking out of only the inner loop)
979 */
980 break;
981 } else if(realSource!=NULL) {
982 /* switch back from replaying to the real source and continue */
983 pArgs->source=realSource;
984 pArgs->sourceLimit=realSourceLimit;
985 pArgs->flush=realFlush;
986 sourceIndex=realSourceIndex;
987
988 realSource=NULL;
989 break;
990 } else if(pArgs->flush && cnv->fromUChar32!=0) {
991 /*
992 * the entire input stream is consumed
993 * and there is a partial, truncated input sequence left
994 */
995
996 /* inject an error and continue with callback handling */
997 *err=U_TRUNCATED_CHAR_FOUND;
998 calledCallback=FALSE; /* new error condition */
999 } else {
1000 /* input consumed */
1001 if(pArgs->flush) {
1002 /*
1003 * return to the conversion loop once more if the flush
1004 * flag is set and the conversion function has not
1005 * successfully processed the end of the input yet
1006 *
1007 * (continue converting by breaking out of only the inner loop)
1008 */
1009 if(!converterSawEndOfInput) {
1010 break;
1011 }
1012
1013 /* reset the converter without calling the callback function */
1014 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
1015 }
1016
1017 /* done successfully */
1018 return;
1019 }
1020 }
1021
1022 /* U_FAILURE(*err) */
1023 {
1024 UErrorCode e;
1025
1026 if( calledCallback ||
1027 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1028 (e!=U_INVALID_CHAR_FOUND &&
1029 e!=U_ILLEGAL_CHAR_FOUND &&
1030 e!=U_TRUNCATED_CHAR_FOUND)
1031 ) {
1032 /*
1033 * the callback did not or cannot resolve the error:
1034 * set output pointers and return
1035 *
1036 * the check for buffer overflow is redundant but it is
1037 * a high-runner case and hopefully documents the intent
1038 * well
1039 *
1040 * if we were replaying, then the replay buffer must be
1041 * copied back into the UConverter
1042 * and the real arguments must be restored
1043 */
1044 if(realSource!=NULL) {
1045 int32_t length;
1046
1047 U_ASSERT(cnv->preFromULength==0);
1048
1049 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1050 if(length>0) {
1051 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
1052 cnv->preFromULength=(int8_t)-length;
1053 }
1054
1055 pArgs->source=realSource;
1056 pArgs->sourceLimit=realSourceLimit;
1057 pArgs->flush=realFlush;
1058 }
1059
1060 return;
1061 }
1062 }
1063
1064 /* callback handling */
1065 {
1066 UChar32 codePoint;
1067
1068 /* get and write the code point */
1069 codePoint=cnv->fromUChar32;
1070 errorInputLength=0;
1071 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
1072 cnv->invalidUCharLength=(int8_t)errorInputLength;
1073
1074 /* set the converter state to deal with the next character */
1075 cnv->fromUChar32=0;
1076
1077 /* call the callback function */
1078 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
1079 cnv->invalidUCharBuffer, errorInputLength, codePoint,
1080 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
1081 err);
1082 }
1083
1084 /*
1085 * loop back to the offset handling
1086 *
1087 * this flag will indicate after offset handling
1088 * that a callback was called;
1089 * if the callback did not resolve the error, then we return
1090 */
1091 calledCallback=TRUE;
1092 }
1093 }
1094}
1095
1096U_CAPI void U_EXPORT2
1097ucnv_fromUnicode(UConverter *cnv,
1098 char **target, const char *targetLimit,
1099 const UChar **source, const UChar *sourceLimit,
1100 int32_t *offsets,
1101 UBool flush,
1102 UErrorCode *err) {
1103 UConverterFromUnicodeArgs args;
1104 const UChar *s;
1105 char *t;
1106
1107 /* check parameters */
1108 if(err==NULL || U_FAILURE(*err)) {
b75a7d8f
A
1109 return;
1110 }
1111
374ca955
A
1112 if(cnv==NULL || target==NULL || source==NULL) {
1113 *err=U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f
A
1114 return;
1115 }
1116
374ca955
A
1117 s=*source;
1118 t=*target;
1119 if(sourceLimit<s || targetLimit<t) {
1120 *err=U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f
A
1121 return;
1122 }
1123
1124 /*
374ca955
A
1125 * Make sure that the buffer sizes do not exceed the number range for
1126 * int32_t because some functions use the size (in units or bytes)
1127 * rather than comparing pointers, and because offsets are int32_t values.
1128 *
1129 * size_t is guaranteed to be unsigned and large enough for the job.
1130 *
1131 * Return with an error instead of adjusting the limits because we would
1132 * not be able to maintain the semantics that either the source must be
1133 * consumed or the target filled (unless an error occurs).
1134 * An adjustment would be targetLimit=t+0x7fffffff; for example.
1135 */
1136 if(
1137 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
1138 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
1139 ) {
1140 *err=U_ILLEGAL_ARGUMENT_ERROR;
1141 return;
b75a7d8f
A
1142 }
1143
374ca955
A
1144 /* flush the target overflow buffer */
1145 if(cnv->charErrorBufferLength>0) {
1146 char *overflow;
1147 int32_t i, length;
1148
1149 overflow=(char *)cnv->charErrorBuffer;
1150 length=cnv->charErrorBufferLength;
1151 i=0;
1152 do {
1153 if(t==targetLimit) {
1154 /* the overflow buffer contains too much, keep the rest */
1155 int32_t j=0;
1156
1157 do {
1158 overflow[j++]=overflow[i++];
1159 } while(i<length);
1160
1161 cnv->charErrorBufferLength=(int8_t)j;
1162 *target=t;
1163 *err=U_BUFFER_OVERFLOW_ERROR;
1164 return;
1165 }
1166
1167 /* copy the overflow contents to the target */
1168 *t++=overflow[i++];
1169 if(offsets!=NULL) {
1170 *offsets++=-1; /* no source index available for old output */
1171 }
1172 } while(i<length);
1173
1174 /* the overflow buffer is completely copied to the target */
1175 cnv->charErrorBufferLength=0;
b75a7d8f
A
1176 }
1177
374ca955 1178 if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
b75a7d8f 1179 /* the overflow buffer is emptied and there is no new input: we are done */
374ca955 1180 *target=t;
b75a7d8f
A
1181 return;
1182 }
1183
374ca955
A
1184 /*
1185 * Do not simply return with a buffer overflow error if
1186 * !flush && t==targetLimit
1187 * because it is possible that the source will not generate any output.
1188 * For example, the skip callback may be called;
1189 * it does not output anything.
1190 */
1191
1192 /* prepare the converter arguments */
1193 args.converter=cnv;
1194 args.flush=flush;
1195 args.offsets=offsets;
1196 args.source=s;
1197 args.sourceLimit=sourceLimit;
1198 args.target=t;
1199 args.targetLimit=targetLimit;
1200 args.size=sizeof(args);
1201
1202 _fromUnicodeWithCallback(&args, err);
1203
1204 *source=args.source;
1205 *target=args.target;
1206}
1207
1208/* ucnv_toUnicode() --------------------------------------------------------- */
1209
1210static void
1211_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1212 UConverterToUnicode toUnicode;
1213 UConverter *cnv;
1214 const char *s;
1215 UChar *t;
1216 int32_t *offsets;
1217 int32_t sourceIndex;
1218 int32_t errorInputLength;
1219 UBool converterSawEndOfInput, calledCallback;
1220
1221 /* variables for m:n conversion */
1222 char replay[UCNV_EXT_MAX_BYTES];
1223 const char *realSource, *realSourceLimit;
1224 int32_t realSourceIndex;
1225 UBool realFlush;
1226
1227 cnv=pArgs->converter;
1228 s=pArgs->source;
1229 t=pArgs->target;
1230 offsets=pArgs->offsets;
1231
1232 /* get the converter implementation function */
1233 sourceIndex=0;
1234 if(offsets==NULL) {
1235 toUnicode=cnv->sharedData->impl->toUnicode;
1236 } else {
1237 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1238 if(toUnicode==NULL) {
1239 /* there is no WithOffsets implementation */
1240 toUnicode=cnv->sharedData->impl->toUnicode;
1241 /* we will write -1 for each offset */
1242 sourceIndex=-1;
1243 }
1244 }
1245
1246 if(cnv->preToULength>=0) {
1247 /* normal mode */
1248 realSource=NULL;
1249
1250 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1251 realSourceLimit=NULL;
1252 realFlush=FALSE;
1253 realSourceIndex=0;
1254 } else {
1255 /*
1256 * Previous m:n conversion stored source units from a partial match
1257 * and failed to consume all of them.
1258 * We need to "replay" them from a temporary buffer and convert them first.
1259 */
1260 realSource=pArgs->source;
1261 realSourceLimit=pArgs->sourceLimit;
1262 realFlush=pArgs->flush;
1263 realSourceIndex=sourceIndex;
1264
1265 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1266 pArgs->source=replay;
1267 pArgs->sourceLimit=replay-cnv->preToULength;
1268 pArgs->flush=FALSE;
1269 sourceIndex=-1;
1270
1271 cnv->preToULength=0;
1272 }
1273
1274 /*
1275 * loop for conversion and error handling
1276 *
1277 * loop {
1278 * convert
1279 * loop {
1280 * update offsets
1281 * handle end of input
1282 * handle errors/call callback
1283 * }
1284 * }
1285 */
1286 for(;;) {
1287 if(U_SUCCESS(*err)) {
1288 /* convert */
1289 toUnicode(pArgs, err);
1290
1291 /*
1292 * set a flag for whether the converter
1293 * successfully processed the end of the input
1294 *
1295 * need not check cnv->preToULength==0 because a replay (<0) will cause
1296 * s<sourceLimit before converterSawEndOfInput is checked
1297 */
1298 converterSawEndOfInput=
1299 (UBool)(U_SUCCESS(*err) &&
1300 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1301 cnv->toULength==0);
1302 } else {
1303 /* handle error from getNextUChar() */
1304 converterSawEndOfInput=FALSE;
b75a7d8f 1305 }
374ca955
A
1306
1307 /* no callback called yet for this iteration */
1308 calledCallback=FALSE;
1309
1310 /* no sourceIndex adjustment for conversion, only for callback output */
1311 errorInputLength=0;
1312
1313 /*
1314 * loop for offsets and error handling
1315 *
1316 * iterates at most 3 times:
1317 * 1. to clean up after the conversion function
1318 * 2. after the callback
1319 * 3. after the callback again if there was truncated input
1320 */
1321 for(;;) {
1322 /* update offsets if we write any */
1323 if(offsets!=NULL) {
1324 int32_t length=(int32_t)(pArgs->target-t);
1325 if(length>0) {
1326 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1327
1328 /*
1329 * if a converter handles offsets and updates the offsets
1330 * pointer at the end, then pArgs->offset should not change
1331 * here;
1332 * however, some converters do not handle offsets at all
1333 * (sourceIndex<0) or may not update the offsets pointer
1334 */
1335 pArgs->offsets=offsets+=length;
1336 }
1337
1338 if(sourceIndex>=0) {
1339 sourceIndex+=(int32_t)(pArgs->source-s);
1340 }
1341 }
1342
1343 if(cnv->preToULength<0) {
1344 /*
1345 * switch the source to new replay units (cannot occur while replaying)
1346 * after offset handling and before end-of-input and callback handling
1347 */
1348 if(realSource==NULL) {
1349 realSource=pArgs->source;
1350 realSourceLimit=pArgs->sourceLimit;
1351 realFlush=pArgs->flush;
1352 realSourceIndex=sourceIndex;
1353
1354 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1355 pArgs->source=replay;
1356 pArgs->sourceLimit=replay-cnv->preToULength;
1357 pArgs->flush=FALSE;
1358 if((sourceIndex+=cnv->preToULength)<0) {
1359 sourceIndex=-1;
1360 }
1361
1362 cnv->preToULength=0;
1363 } else {
1364 /* see implementation note before _fromUnicodeWithCallback() */
1365 U_ASSERT(realSource==NULL);
1366 *err=U_INTERNAL_PROGRAM_ERROR;
1367 }
1368 }
1369
1370 /* update pointers */
1371 s=pArgs->source;
1372 t=pArgs->target;
1373
1374 if(U_SUCCESS(*err)) {
1375 if(s<pArgs->sourceLimit) {
1376 /*
1377 * continue with the conversion loop while there is still input left
1378 * (continue converting by breaking out of only the inner loop)
1379 */
1380 break;
1381 } else if(realSource!=NULL) {
1382 /* switch back from replaying to the real source and continue */
1383 pArgs->source=realSource;
1384 pArgs->sourceLimit=realSourceLimit;
1385 pArgs->flush=realFlush;
1386 sourceIndex=realSourceIndex;
1387
1388 realSource=NULL;
1389 break;
1390 } else if(pArgs->flush && cnv->toULength>0) {
1391 /*
1392 * the entire input stream is consumed
1393 * and there is a partial, truncated input sequence left
1394 */
1395
1396 /* inject an error and continue with callback handling */
1397 *err=U_TRUNCATED_CHAR_FOUND;
1398 calledCallback=FALSE; /* new error condition */
1399 } else {
1400 /* input consumed */
1401 if(pArgs->flush) {
1402 /*
1403 * return to the conversion loop once more if the flush
1404 * flag is set and the conversion function has not
1405 * successfully processed the end of the input yet
1406 *
1407 * (continue converting by breaking out of only the inner loop)
1408 */
1409 if(!converterSawEndOfInput) {
1410 break;
1411 }
1412
1413 /* reset the converter without calling the callback function */
1414 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1415 }
1416
1417 /* done successfully */
1418 return;
1419 }
b75a7d8f 1420 }
374ca955
A
1421
1422 /* U_FAILURE(*err) */
1423 {
1424 UErrorCode e;
1425
1426 if( calledCallback ||
1427 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1428 (e!=U_INVALID_CHAR_FOUND &&
1429 e!=U_ILLEGAL_CHAR_FOUND &&
1430 e!=U_TRUNCATED_CHAR_FOUND &&
1431 e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
d5d484b0
A
1432 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE &&
1433 e!=U_PARSE_ERROR) /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE below */
374ca955
A
1434 ) {
1435 /*
1436 * the callback did not or cannot resolve the error:
1437 * set output pointers and return
1438 *
1439 * the check for buffer overflow is redundant but it is
1440 * a high-runner case and hopefully documents the intent
1441 * well
1442 *
1443 * if we were replaying, then the replay buffer must be
1444 * copied back into the UConverter
1445 * and the real arguments must be restored
1446 */
1447 if(realSource!=NULL) {
1448 int32_t length;
1449
1450 U_ASSERT(cnv->preToULength==0);
1451
1452 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1453 if(length>0) {
1454 uprv_memcpy(cnv->preToU, pArgs->source, length);
1455 cnv->preToULength=(int8_t)-length;
1456 }
1457
1458 pArgs->source=realSource;
1459 pArgs->sourceLimit=realSourceLimit;
1460 pArgs->flush=realFlush;
1461 }
1462
1463 return;
1464 }
1465 }
1466
1467 /* copy toUBytes[] to invalidCharBuffer[] */
1468 errorInputLength=cnv->invalidCharLength=cnv->toULength;
1469 if(errorInputLength>0) {
1470 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1471 }
1472
1473 /* set the converter state to deal with the next character */
1474 cnv->toULength=0;
1475
1476 /* call the callback function */
d5d484b0
A
1477 {
1478 UConverterCallbackReason reason;
1479 if (*err == U_PARSE_ERROR) { /* Here U_PARSE_ERROR indicates empty segment */
1480 *err = U_ILLEGAL_ESCAPE_SEQUENCE;
1481 reason = UCNV_IRREGULAR;
1482 } else {
1483 reason = (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ?
1484 UCNV_UNASSIGNED : UCNV_ILLEGAL;
1485 }
1486 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1487 cnv->invalidCharBuffer, errorInputLength, reason, err);
1488 }
374ca955
A
1489
1490 /*
1491 * loop back to the offset handling
1492 *
1493 * this flag will indicate after offset handling
1494 * that a callback was called;
1495 * if the callback did not resolve the error, then we return
1496 */
1497 calledCallback=TRUE;
b75a7d8f
A
1498 }
1499 }
b75a7d8f
A
1500}
1501
374ca955
A
1502U_CAPI void U_EXPORT2
1503ucnv_toUnicode(UConverter *cnv,
1504 UChar **target, const UChar *targetLimit,
1505 const char **source, const char *sourceLimit,
1506 int32_t *offsets,
1507 UBool flush,
1508 UErrorCode *err) {
b75a7d8f 1509 UConverterToUnicodeArgs args;
374ca955
A
1510 const char *s;
1511 UChar *t;
b75a7d8f 1512
374ca955
A
1513 /* check parameters */
1514 if(err==NULL || U_FAILURE(*err)) {
b75a7d8f
A
1515 return;
1516 }
1517
374ca955
A
1518 if(cnv==NULL || target==NULL || source==NULL) {
1519 *err=U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f
A
1520 return;
1521 }
1522
374ca955
A
1523 s=*source;
1524 t=*target;
1525 if(sourceLimit<s || targetLimit<t) {
1526 *err=U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f
A
1527 return;
1528 }
1529
1530 /*
374ca955
A
1531 * Make sure that the buffer sizes do not exceed the number range for
1532 * int32_t because some functions use the size (in units or bytes)
1533 * rather than comparing pointers, and because offsets are int32_t values.
1534 *
1535 * size_t is guaranteed to be unsigned and large enough for the job.
1536 *
1537 * Return with an error instead of adjusting the limits because we would
1538 * not be able to maintain the semantics that either the source must be
1539 * consumed or the target filled (unless an error occurs).
1540 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1541 */
1542 if(
1543 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
1544 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t)
1545 ) {
1546 *err=U_ILLEGAL_ARGUMENT_ERROR;
1547 return;
b75a7d8f 1548 }
374ca955
A
1549
1550 /* flush the target overflow buffer */
1551 if(cnv->UCharErrorBufferLength>0) {
1552 UChar *overflow;
1553 int32_t i, length;
1554
1555 overflow=cnv->UCharErrorBuffer;
1556 length=cnv->UCharErrorBufferLength;
1557 i=0;
1558 do {
1559 if(t==targetLimit) {
1560 /* the overflow buffer contains too much, keep the rest */
1561 int32_t j=0;
1562
1563 do {
1564 overflow[j++]=overflow[i++];
1565 } while(i<length);
1566
1567 cnv->UCharErrorBufferLength=(int8_t)j;
1568 *target=t;
1569 *err=U_BUFFER_OVERFLOW_ERROR;
1570 return;
1571 }
b75a7d8f 1572
374ca955
A
1573 /* copy the overflow contents to the target */
1574 *t++=overflow[i++];
1575 if(offsets!=NULL) {
1576 *offsets++=-1; /* no source index available for old output */
1577 }
1578 } while(i<length);
b75a7d8f 1579
374ca955
A
1580 /* the overflow buffer is completely copied to the target */
1581 cnv->UCharErrorBufferLength=0;
b75a7d8f
A
1582 }
1583
374ca955 1584 if(!flush && s==sourceLimit && cnv->preToULength>=0) {
b75a7d8f 1585 /* the overflow buffer is emptied and there is no new input: we are done */
374ca955 1586 *target=t;
b75a7d8f
A
1587 return;
1588 }
1589
374ca955
A
1590 /*
1591 * Do not simply return with a buffer overflow error if
1592 * !flush && t==targetLimit
1593 * because it is possible that the source will not generate any output.
1594 * For example, the skip callback may be called;
1595 * it does not output anything.
1596 */
b75a7d8f 1597
374ca955
A
1598 /* prepare the converter arguments */
1599 args.converter=cnv;
1600 args.flush=flush;
1601 args.offsets=offsets;
1602 args.source=s;
1603 args.sourceLimit=sourceLimit;
1604 args.target=t;
1605 args.targetLimit=targetLimit;
1606 args.size=sizeof(args);
b75a7d8f 1607
374ca955
A
1608 _toUnicodeWithCallback(&args, err);
1609
1610 *source=args.source;
1611 *target=args.target;
b75a7d8f
A
1612}
1613
374ca955
A
1614/* ucnv_to/fromUChars() ----------------------------------------------------- */
1615
b75a7d8f
A
1616U_CAPI int32_t U_EXPORT2
1617ucnv_fromUChars(UConverter *cnv,
1618 char *dest, int32_t destCapacity,
1619 const UChar *src, int32_t srcLength,
1620 UErrorCode *pErrorCode) {
1621 const UChar *srcLimit;
1622 char *originalDest, *destLimit;
1623 int32_t destLength;
1624
1625 /* check arguments */
1626 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1627 return 0;
1628 }
1629
1630 if( cnv==NULL ||
1631 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1632 srcLength<-1 || (srcLength!=0 && src==NULL)
1633 ) {
1634 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1635 return 0;
1636 }
1637
1638 /* initialize */
1639 ucnv_resetFromUnicode(cnv);
1640 originalDest=dest;
1641 if(srcLength==-1) {
1642 srcLength=u_strlen(src);
1643 }
1644 if(srcLength>0) {
1645 srcLimit=src+srcLength;
1646 destLimit=dest+destCapacity;
1647
1648 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1649 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1650 destLimit=(char *)U_MAX_PTR(dest);
1651 }
1652
1653 /* perform the conversion */
1654 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1655 destLength=(int32_t)(dest-originalDest);
1656
1657 /* if an overflow occurs, then get the preflighting length */
1658 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1659 char buffer[1024];
1660
1661 destLimit=buffer+sizeof(buffer);
1662 do {
1663 dest=buffer;
1664 *pErrorCode=U_ZERO_ERROR;
1665 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1666 destLength+=(int32_t)(dest-buffer);
1667 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1668 }
1669 } else {
1670 destLength=0;
1671 }
1672
1673 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1674}
1675
1676U_CAPI int32_t U_EXPORT2
1677ucnv_toUChars(UConverter *cnv,
1678 UChar *dest, int32_t destCapacity,
1679 const char *src, int32_t srcLength,
1680 UErrorCode *pErrorCode) {
1681 const char *srcLimit;
1682 UChar *originalDest, *destLimit;
1683 int32_t destLength;
1684
1685 /* check arguments */
1686 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1687 return 0;
1688 }
1689
1690 if( cnv==NULL ||
1691 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1692 srcLength<-1 || (srcLength!=0 && src==NULL))
1693 {
1694 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1695 return 0;
1696 }
1697
1698 /* initialize */
1699 ucnv_resetToUnicode(cnv);
1700 originalDest=dest;
1701 if(srcLength==-1) {
73c04bcf 1702 srcLength=(int32_t)uprv_strlen(src);
b75a7d8f
A
1703 }
1704 if(srcLength>0) {
1705 srcLimit=src+srcLength;
1706 destLimit=dest+destCapacity;
1707
1708 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1709 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1710 destLimit=(UChar *)U_MAX_PTR(dest);
1711 }
1712
1713 /* perform the conversion */
1714 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1715 destLength=(int32_t)(dest-originalDest);
1716
1717 /* if an overflow occurs, then get the preflighting length */
1718 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1719 {
1720 UChar buffer[1024];
1721
1722 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
1723 do {
1724 dest=buffer;
1725 *pErrorCode=U_ZERO_ERROR;
1726 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1727 destLength+=(int32_t)(dest-buffer);
1728 }
1729 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1730 }
1731 } else {
1732 destLength=0;
1733 }
1734
1735 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1736}
1737
374ca955
A
1738/* ucnv_getNextUChar() ------------------------------------------------------ */
1739
1740U_CAPI UChar32 U_EXPORT2
1741ucnv_getNextUChar(UConverter *cnv,
1742 const char **source, const char *sourceLimit,
1743 UErrorCode *err) {
b75a7d8f 1744 UConverterToUnicodeArgs args;
374ca955
A
1745 UChar buffer[U16_MAX_LENGTH];
1746 const char *s;
1747 UChar32 c;
1748 int32_t i, length;
b75a7d8f 1749
374ca955
A
1750 /* check parameters */
1751 if(err==NULL || U_FAILURE(*err)) {
b75a7d8f
A
1752 return 0xffff;
1753 }
1754
374ca955
A
1755 if(cnv==NULL || source==NULL) {
1756 *err=U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f
A
1757 return 0xffff;
1758 }
1759
374ca955
A
1760 s=*source;
1761 if(sourceLimit<s) {
1762 *err=U_ILLEGAL_ARGUMENT_ERROR;
1763 return 0xffff;
1764 }
1765
1766 /*
1767 * Make sure that the buffer sizes do not exceed the number range for
1768 * int32_t because some functions use the size (in units or bytes)
1769 * rather than comparing pointers, and because offsets are int32_t values.
1770 *
1771 * size_t is guaranteed to be unsigned and large enough for the job.
1772 *
1773 * Return with an error instead of adjusting the limits because we would
1774 * not be able to maintain the semantics that either the source must be
1775 * consumed or the target filled (unless an error occurs).
1776 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1777 */
1778 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1779 *err=U_ILLEGAL_ARGUMENT_ERROR;
1780 return 0xffff;
1781 }
1782
1783 c=U_SENTINEL;
1784
1785 /* flush the target overflow buffer */
1786 if(cnv->UCharErrorBufferLength>0) {
1787 UChar *overflow;
1788
1789 overflow=cnv->UCharErrorBuffer;
1790 i=0;
1791 length=cnv->UCharErrorBufferLength;
1792 U16_NEXT(overflow, i, length, c);
1793
1794 /* move the remaining overflow contents up to the beginning */
1795 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1796 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1797 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1798 }
1799
1800 if(!U16_IS_LEAD(c) || i<length) {
1801 return c;
1802 }
1803 /*
1804 * Continue if the overflow buffer contained only a lead surrogate,
1805 * in case the converter outputs single surrogates from complete
1806 * input sequences.
1807 */
1808 }
1809
1810 /*
1811 * flush==TRUE is implied for ucnv_getNextUChar()
1812 *
1813 * do not simply return even if s==sourceLimit because the converter may
1814 * not have seen flush==TRUE before
1815 */
1816
1817 /* prepare the converter arguments */
1818 args.converter=cnv;
1819 args.flush=TRUE;
1820 args.offsets=NULL;
1821 args.source=s;
1822 args.sourceLimit=sourceLimit;
1823 args.target=buffer;
1824 args.targetLimit=buffer+1;
1825 args.size=sizeof(args);
1826
1827 if(c<0) {
1828 /*
1829 * call the native getNextUChar() implementation if we are
1830 * at a character boundary (toULength==0)
1831 *
1832 * unlike with _toUnicode(), getNextUChar() implementations must set
1833 * U_TRUNCATED_CHAR_FOUND for truncated input,
1834 * in addition to setting toULength/toUBytes[]
1835 */
1836 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1837 c=cnv->sharedData->impl->getNextUChar(&args, err);
1838 *source=s=args.source;
1839 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1840 /* reset the converter without calling the callback function */
1841 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1842 return 0xffff; /* no output */
1843 } else if(U_SUCCESS(*err) && c>=0) {
1844 return c;
1845 /*
1846 * else fall through to use _toUnicode() because
1847 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1848 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1849 */
1850 }
1851 }
1852
1853 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1854 _toUnicodeWithCallback(&args, err);
1855
1856 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1857 *err=U_ZERO_ERROR;
1858 }
1859
1860 i=0;
1861 length=(int32_t)(args.target-buffer);
1862 } else {
1863 /* write the lead surrogate from the overflow buffer */
1864 buffer[0]=(UChar)c;
1865 args.target=buffer+1;
1866 i=0;
1867 length=1;
1868 }
1869
1870 /* buffer contents starts at i and ends before length */
1871
1872 if(U_FAILURE(*err)) {
1873 c=0xffff; /* no output */
1874 } else if(length==0) {
1875 /* no input or only state changes */
1876 *err=U_INDEX_OUTOFBOUNDS_ERROR;
1877 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1878 c=0xffff; /* no output */
b75a7d8f 1879 } else {
374ca955
A
1880 c=buffer[0];
1881 i=1;
1882 if(!U16_IS_LEAD(c)) {
1883 /* consume c=buffer[0], done */
1884 } else {
1885 /* got a lead surrogate, see if a trail surrogate follows */
1886 UChar c2;
1887
1888 if(cnv->UCharErrorBufferLength>0) {
1889 /* got overflow output from the conversion */
1890 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1891 /* got a trail surrogate, too */
1892 c=U16_GET_SUPPLEMENTARY(c, c2);
1893
1894 /* move the remaining overflow contents up to the beginning */
1895 if((--cnv->UCharErrorBufferLength)>0) {
1896 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1897 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1898 }
1899 } else {
1900 /* c is an unpaired lead surrogate, just return it */
1901 }
1902 } else if(args.source<sourceLimit) {
1903 /* convert once more, to buffer[1] */
1904 args.targetLimit=buffer+2;
1905 _toUnicodeWithCallback(&args, err);
1906 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1907 *err=U_ZERO_ERROR;
1908 }
1909
1910 length=(int32_t)(args.target-buffer);
1911 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
1912 /* got a trail surrogate, too */
1913 c=U16_GET_SUPPLEMENTARY(c, c2);
1914 i=2;
1915 }
1916 }
1917 }
1918 }
1919
1920 /*
1921 * move leftover output from buffer[i..length[
1922 * into the beginning of the overflow buffer
1923 */
1924 if(i<length) {
1925 /* move further overflow back */
1926 int32_t delta=length-i;
1927 if((length=cnv->UCharErrorBufferLength)>0) {
1928 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
1929 length*U_SIZEOF_UCHAR);
1930 }
1931 cnv->UCharErrorBufferLength=(int8_t)(length+delta);
1932
1933 cnv->UCharErrorBuffer[0]=buffer[i++];
1934 if(delta>1) {
1935 cnv->UCharErrorBuffer[1]=buffer[i];
1936 }
b75a7d8f 1937 }
374ca955
A
1938
1939 *source=args.source;
1940 return c;
b75a7d8f
A
1941}
1942
374ca955
A
1943/* ucnv_convert() and siblings ---------------------------------------------- */
1944
b75a7d8f
A
1945U_CAPI void U_EXPORT2
1946ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
1947 char **target, const char *targetLimit,
1948 const char **source, const char *sourceLimit,
1949 UChar *pivotStart, UChar **pivotSource,
1950 UChar **pivotTarget, const UChar *pivotLimit,
1951 UBool reset, UBool flush,
1952 UErrorCode *pErrorCode) {
1953 UChar pivotBuffer[CHUNK_SIZE];
1954 UChar *myPivotSource, *myPivotTarget;
1955
1956 /* error checking */
1957 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1958 return;
1959 }
1960
1961 if( targetCnv==NULL || sourceCnv==NULL ||
1962 source==NULL || *source==NULL ||
1963 target==NULL || *target==NULL || targetLimit==NULL
1964 ) {
1965 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1966 return;
1967 }
1968
1969 if(pivotStart==NULL) {
73c04bcf
A
1970 if(!flush) {
1971 /* streaming conversion requires an explicit pivot buffer */
1972 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1973 return;
1974 }
1975
b75a7d8f
A
1976 /* use the stack pivot buffer */
1977 pivotStart=myPivotSource=myPivotTarget=pivotBuffer;
1978 pivotSource=&myPivotSource;
1979 pivotTarget=&myPivotTarget;
1980 pivotLimit=pivotBuffer+CHUNK_SIZE;
1981 } else if( pivotStart>=pivotLimit ||
1982 pivotSource==NULL || *pivotSource==NULL ||
1983 pivotTarget==NULL || *pivotTarget==NULL ||
1984 pivotLimit==NULL
1985 ) {
1986 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1987 return;
1988 }
1989
1990 if(sourceLimit==NULL) {
1991 /* get limit of single-byte-NUL-terminated source string */
1992 sourceLimit=uprv_strchr(*source, 0);
1993 }
1994
1995 if(reset) {
1996 ucnv_resetToUnicode(sourceCnv);
1997 ucnv_resetFromUnicode(targetCnv);
1998 *pivotTarget=*pivotSource=pivotStart;
1999 }
2000
2001 /* conversion loop */
2002 for(;;) {
2003 if(reset) {
2004 /*
2005 * if we did a reset in this function, we know that there is nothing
2006 * to convert to the target yet, so we save a function call
2007 */
2008 reset=FALSE;
2009 } else {
2010 /*
2011 * convert to the target first in case the pivot is filled at entry
2012 * or the targetCnv has some output bytes in its state
2013 */
2014 ucnv_fromUnicode(targetCnv,
2015 target, targetLimit,
2016 (const UChar **)pivotSource, *pivotTarget,
2017 NULL,
2018 (UBool)(flush && *source==sourceLimit),
2019 pErrorCode);
2020 if(U_FAILURE(*pErrorCode)) {
2021 break;
2022 }
2023
2024 /* ucnv_fromUnicode() must have consumed the pivot contents since it returned with U_SUCCESS() */
2025 *pivotSource=*pivotTarget=pivotStart;
2026 }
2027
2028 /* convert from the source to the pivot */
2029 ucnv_toUnicode(sourceCnv,
2030 pivotTarget, pivotLimit,
2031 source, sourceLimit,
2032 NULL,
2033 flush,
2034 pErrorCode);
2035 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2036 /* pivot overflow: continue with the conversion loop */
2037 *pErrorCode=U_ZERO_ERROR;
2038 } else if(U_FAILURE(*pErrorCode) || *pivotTarget==pivotStart) {
2039 /* conversion error, or there was nothing left to convert */
2040 break;
2041 }
2042 /* else ucnv_toUnicode() wrote into the pivot buffer: continue */
2043 }
2044
2045 /*
2046 * The conversion loop is exited when one of the following is true:
2047 * - the entire source text has been converted successfully to the target buffer
2048 * - a target buffer overflow occurred
2049 * - a conversion error occurred
2050 */
2051
2052 /* terminate the target buffer if possible */
2053 if(flush && U_SUCCESS(*pErrorCode)) {
2054 if(*target!=targetLimit) {
2055 **target=0;
2056 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
2057 *pErrorCode=U_ZERO_ERROR;
2058 }
2059 } else {
2060 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
2061 }
2062 }
2063}
2064
2065/* internal implementation of ucnv_convert() etc. with preflighting */
2066static int32_t
2067ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
2068 char *target, int32_t targetCapacity,
2069 const char *source, int32_t sourceLength,
2070 UErrorCode *pErrorCode) {
2071 UChar pivotBuffer[CHUNK_SIZE];
2072 UChar *pivot, *pivot2;
2073
2074 char *myTarget;
2075 const char *sourceLimit;
2076 const char *targetLimit;
2077 int32_t targetLength=0;
2078
2079 /* set up */
2080 if(sourceLength<0) {
2081 sourceLimit=uprv_strchr(source, 0);
2082 } else {
2083 sourceLimit=source+sourceLength;
2084 }
2085
2086 /* if there is no input data, we're done */
2087 if(source==sourceLimit) {
2088 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2089 }
2090
2091 pivot=pivot2=pivotBuffer;
2092 myTarget=target;
2093 targetLength=0;
2094
2095 if(targetCapacity>0) {
2096 /* perform real conversion */
2097 targetLimit=target+targetCapacity;
2098 ucnv_convertEx(outConverter, inConverter,
2099 &myTarget, targetLimit,
2100 &source, sourceLimit,
2101 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2102 FALSE,
2103 TRUE,
2104 pErrorCode);
73c04bcf 2105 targetLength=(int32_t)(myTarget-target);
b75a7d8f
A
2106 }
2107
2108 /*
2109 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2110 * to it but continue the conversion in order to store in targetCapacity
2111 * the number of bytes that was required.
2112 */
2113 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
2114 {
2115 char targetBuffer[CHUNK_SIZE];
2116
2117 targetLimit=targetBuffer+CHUNK_SIZE;
2118 do {
2119 *pErrorCode=U_ZERO_ERROR;
2120 myTarget=targetBuffer;
2121 ucnv_convertEx(outConverter, inConverter,
2122 &myTarget, targetLimit,
2123 &source, sourceLimit,
2124 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2125 FALSE,
2126 TRUE,
2127 pErrorCode);
73c04bcf 2128 targetLength+=(int32_t)(myTarget-targetBuffer);
b75a7d8f
A
2129 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2130
2131 /* done with preflighting, set warnings and errors as appropriate */
2132 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2133 }
2134
2135 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2136 return targetLength;
2137}
2138
2139U_CAPI int32_t U_EXPORT2
2140ucnv_convert(const char *toConverterName, const char *fromConverterName,
2141 char *target, int32_t targetCapacity,
2142 const char *source, int32_t sourceLength,
2143 UErrorCode *pErrorCode) {
2144 UConverter in, out; /* stack-allocated */
2145 UConverter *inConverter, *outConverter;
2146 int32_t targetLength;
2147
2148 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2149 return 0;
2150 }
2151
2152 if( source==NULL || sourceLength<-1 ||
2153 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2154 ) {
2155 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2156 return 0;
2157 }
2158
2159 /* if there is no input data, we're done */
2160 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2161 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2162 }
2163
2164 /* create the converters */
2165 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2166 if(U_FAILURE(*pErrorCode)) {
2167 return 0;
2168 }
2169
2170 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2171 if(U_FAILURE(*pErrorCode)) {
2172 ucnv_close(inConverter);
2173 return 0;
2174 }
2175
2176 targetLength=ucnv_internalConvert(outConverter, inConverter,
2177 target, targetCapacity,
2178 source, sourceLength,
2179 pErrorCode);
2180
2181 ucnv_close(inConverter);
2182 ucnv_close(outConverter);
2183
2184 return targetLength;
2185}
2186
2187/* @internal */
2188static int32_t
2189ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2190 UConverterType algorithmicType,
2191 UConverter *cnv,
2192 char *target, int32_t targetCapacity,
2193 const char *source, int32_t sourceLength,
2194 UErrorCode *pErrorCode) {
2195 UConverter algoConverterStatic; /* stack-allocated */
2196 UConverter *algoConverter, *to, *from;
2197 int32_t targetLength;
2198
2199 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2200 return 0;
2201 }
2202
2203 if( cnv==NULL || source==NULL || sourceLength<-1 ||
2204 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2205 ) {
2206 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2207 return 0;
2208 }
2209
2210 /* if there is no input data, we're done */
2211 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2212 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2213 }
2214
2215 /* create the algorithmic converter */
2216 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2217 "", 0, pErrorCode);
2218 if(U_FAILURE(*pErrorCode)) {
2219 return 0;
2220 }
2221
2222 /* reset the other converter */
2223 if(convertToAlgorithmic) {
2224 /* cnv->Unicode->algo */
2225 ucnv_resetToUnicode(cnv);
2226 to=algoConverter;
2227 from=cnv;
2228 } else {
2229 /* algo->Unicode->cnv */
2230 ucnv_resetFromUnicode(cnv);
2231 from=algoConverter;
2232 to=cnv;
2233 }
2234
2235 targetLength=ucnv_internalConvert(to, from,
2236 target, targetCapacity,
2237 source, sourceLength,
2238 pErrorCode);
2239
2240 ucnv_close(algoConverter);
2241
2242 return targetLength;
2243}
2244
2245U_CAPI int32_t U_EXPORT2
2246ucnv_toAlgorithmic(UConverterType algorithmicType,
2247 UConverter *cnv,
2248 char *target, int32_t targetCapacity,
2249 const char *source, int32_t sourceLength,
2250 UErrorCode *pErrorCode) {
2251 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2252 target, targetCapacity,
2253 source, sourceLength,
2254 pErrorCode);
2255}
2256
2257U_CAPI int32_t U_EXPORT2
2258ucnv_fromAlgorithmic(UConverter *cnv,
2259 UConverterType algorithmicType,
2260 char *target, int32_t targetCapacity,
2261 const char *source, int32_t sourceLength,
2262 UErrorCode *pErrorCode) {
2263 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2264 target, targetCapacity,
2265 source, sourceLength,
2266 pErrorCode);
2267}
2268
2269U_CAPI UConverterType U_EXPORT2
2270ucnv_getType(const UConverter* converter)
2271{
2272 int8_t type = converter->sharedData->staticData->conversionType;
2273#if !UCONFIG_NO_LEGACY_CONVERSION
2274 if(type == UCNV_MBCS) {
374ca955 2275 return ucnv_MBCSGetType(converter);
b75a7d8f
A
2276 }
2277#endif
2278 return (UConverterType)type;
2279}
2280
2281U_CAPI void U_EXPORT2
2282ucnv_getStarters(const UConverter* converter,
2283 UBool starters[256],
2284 UErrorCode* err)
2285{
2286 if (err == NULL || U_FAILURE(*err)) {
2287 return;
2288 }
2289
2290 if(converter->sharedData->impl->getStarters != NULL) {
2291 converter->sharedData->impl->getStarters(converter, starters, err);
2292 } else {
2293 *err = U_ILLEGAL_ARGUMENT_ERROR;
2294 }
2295}
2296
2297static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2298{
2299 UErrorCode errorCode;
2300 const char *name;
2301 int32_t i;
2302
2303 if(cnv==NULL) {
2304 return NULL;
2305 }
2306
2307 errorCode=U_ZERO_ERROR;
2308 name=ucnv_getName(cnv, &errorCode);
2309 if(U_FAILURE(errorCode)) {
2310 return NULL;
2311 }
2312
2313 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
2314 {
2315 if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2316 {
2317 return ambiguousConverters+i;
2318 }
2319 }
2320
2321 return NULL;
2322}
2323
2324U_CAPI void U_EXPORT2
2325ucnv_fixFileSeparator(const UConverter *cnv,
2326 UChar* source,
2327 int32_t sourceLength) {
2328 const UAmbiguousConverter *a;
2329 int32_t i;
2330 UChar variant5c;
2331
2332 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2333 {
2334 return;
2335 }
2336
2337 variant5c=a->variant5c;
2338 for(i=0; i<sourceLength; ++i) {
2339 if(source[i]==variant5c) {
2340 source[i]=0x5c;
2341 }
2342 }
2343}
2344
2345U_CAPI UBool U_EXPORT2
2346ucnv_isAmbiguous(const UConverter *cnv) {
2347 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2348}
2349
2350U_CAPI void U_EXPORT2
2351ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2352{
2353 cnv->useFallback = usesFallback;
2354}
2355
2356U_CAPI UBool U_EXPORT2
2357ucnv_usesFallback(const UConverter *cnv)
2358{
2359 return cnv->useFallback;
2360}
2361
2362U_CAPI void U_EXPORT2
2363ucnv_getInvalidChars (const UConverter * converter,
2364 char *errBytes,
2365 int8_t * len,
2366 UErrorCode * err)
2367{
2368 if (err == NULL || U_FAILURE(*err))
2369 {
2370 return;
2371 }
2372 if (len == NULL || errBytes == NULL || converter == NULL)
2373 {
2374 *err = U_ILLEGAL_ARGUMENT_ERROR;
2375 return;
2376 }
2377 if (*len < converter->invalidCharLength)
2378 {
2379 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2380 return;
2381 }
2382 if ((*len = converter->invalidCharLength) > 0)
2383 {
2384 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2385 }
2386}
2387
2388U_CAPI void U_EXPORT2
2389ucnv_getInvalidUChars (const UConverter * converter,
2390 UChar *errChars,
2391 int8_t * len,
2392 UErrorCode * err)
2393{
2394 if (err == NULL || U_FAILURE(*err))
2395 {
2396 return;
2397 }
2398 if (len == NULL || errChars == NULL || converter == NULL)
2399 {
2400 *err = U_ILLEGAL_ARGUMENT_ERROR;
2401 return;
2402 }
2403 if (*len < converter->invalidUCharLength)
2404 {
2405 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2406 return;
2407 }
2408 if ((*len = converter->invalidUCharLength) > 0)
2409 {
2410 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
2411 }
2412}
2413
2414#define SIG_MAX_LEN 5
2415
2416U_CAPI const char* U_EXPORT2
2417ucnv_detectUnicodeSignature( const char* source,
2418 int32_t sourceLength,
2419 int32_t* signatureLength,
2420 UErrorCode* pErrorCode) {
2421 int32_t dummy;
2422
2423 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2424 * bytes we don't misdetect something
2425 */
2426 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2427 int i = 0;
2428
2429 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2430 return NULL;
2431 }
2432
2433 if(source == NULL || sourceLength < -1){
2434 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2435 return NULL;
2436 }
2437
2438 if(signatureLength == NULL) {
2439 signatureLength = &dummy;
2440 }
2441
2442 if(sourceLength==-1){
73c04bcf 2443 sourceLength=(int32_t)uprv_strlen(source);
b75a7d8f
A
2444 }
2445
2446
2447 while(i<sourceLength&& i<SIG_MAX_LEN){
2448 start[i]=source[i];
2449 i++;
2450 }
2451
2452 if(start[0] == '\xFE' && start[1] == '\xFF') {
2453 *signatureLength=2;
2454 return "UTF-16BE";
2455 } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2456 if(start[2] == '\x00' && start[3] =='\x00') {
2457 *signatureLength=4;
2458 return "UTF-32LE";
2459 } else {
2460 *signatureLength=2;
2461 return "UTF-16LE";
2462 }
2463 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2464 *signatureLength=3;
2465 return "UTF-8";
2466 } else if(start[0] == '\x00' && start[1] == '\x00' &&
2467 start[2] == '\xFE' && start[3]=='\xFF') {
2468 *signatureLength=4;
2469 return "UTF-32BE";
2470 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2471 *signatureLength=3;
2472 return "SCSU";
2473 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2474 *signatureLength=3;
2475 return "BOCU-1";
2476 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2477 /*
2478 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2479 * depending on the second UTF-16 code unit.
2480 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2481 * if it occurs.
2482 *
2483 * So far we have +/v
2484 */
2485 if(start[3] == '\x38' && start[4] == '\x2D') {
2486 /* 5 bytes +/v8- */
2487 *signatureLength=5;
2488 return "UTF-7";
2489 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2490 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2491 *signatureLength=4;
2492 return "UTF-7";
2493 }
374ca955
A
2494 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2495 *signatureLength=4;
2496 return "UTF-EBCDIC";
b75a7d8f
A
2497 }
2498
374ca955 2499
b75a7d8f
A
2500 /* no known Unicode signature byte sequence recognized */
2501 *signatureLength=0;
2502 return NULL;
2503}
2504
73c04bcf
A
2505 U_DRAFT int32_t U_EXPORT2
2506 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status){
2507
2508 if(status == NULL || U_FAILURE(*status)){
2509 return -1;
2510 }
2511 if(cnv == NULL){
2512 *status = U_ILLEGAL_ARGUMENT_ERROR;
2513 return -1;
2514 }
2515
2516 if(cnv->preFromULength > 0){
2517 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
2518 }else if(cnv->preFromULength < 0){
2519 return -cnv->preFromULength ;
2520 }else if(cnv->fromUChar32 > 0){
2521 return 1;
2522 }else if(cnv->preFromUFirstCP >0){
2523 return U16_LENGTH(cnv->preFromUFirstCP);
2524 }
2525 return 0;
2526
2527 }
2528
2529U_DRAFT int32_t U_EXPORT2
2530ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
2531
2532 if(status == NULL || U_FAILURE(*status)){
2533 return -1;
2534 }
2535 if(cnv == NULL){
2536 *status = U_ILLEGAL_ARGUMENT_ERROR;
2537 return -1;
2538 }
2539
2540 if(cnv->preToULength > 0){
2541 return cnv->preToULength ;
2542 }else if(cnv->preToULength < 0){
2543 return -cnv->preToULength;
2544 }else if(cnv->toULength > 0){
2545 return cnv->toULength;
2546 }
2547 return 0;
2548}
374ca955
A
2549#endif
2550
b75a7d8f
A
2551/*
2552 * Hey, Emacs, please set the following:
2553 *
2554 * Local Variables:
2555 * indent-tabs-mode: nil
2556 * End:
2557 *
2558 */