]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnv.c
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / common / ucnv.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
51004dcb 4* Copyright (C) 1998-2012, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8*
9* ucnv.c:
10* Implements APIs for the ICU's codeset conversion library;
11* mostly calls through internal functions;
12* created by Bertrand A. Damiba
13*
14* Modification History:
15*
16* Date Name Description
17* 04/04/99 helena Fixed internal header inclusion.
18* 05/09/00 helena Added implementation to handle fallback mappings.
19* 06/20/2000 helena OS/400 port changes; mostly typecast.
20*/
21
22#include "unicode/utypes.h"
374ca955
A
23
24#if !UCONFIG_NO_CONVERSION
25
b75a7d8f 26#include "unicode/ustring.h"
b75a7d8f
A
27#include "unicode/ucnv.h"
28#include "unicode/ucnv_err.h"
29#include "unicode/uset.h"
4388f060
A
30#include "unicode/utf.h"
31#include "unicode/utf16.h"
374ca955 32#include "putilimp.h"
b75a7d8f
A
33#include "cmemory.h"
34#include "cstring.h"
374ca955
A
35#include "uassert.h"
36#include "utracimp.h"
b75a7d8f
A
37#include "ustr_imp.h"
38#include "ucnv_imp.h"
b75a7d8f
A
39#include "ucnv_cnv.h"
40#include "ucnv_bld.h"
41
b75a7d8f
A
42/* size of intermediate and preflighting buffers in ucnv_convert() */
43#define CHUNK_SIZE 1024
44
45typedef struct UAmbiguousConverter {
46 const char *name;
47 const UChar variant5c;
48} UAmbiguousConverter;
49
50static const UAmbiguousConverter ambiguousConverters[]={
46f4442e 51 { "ibm-897_P100-1995", 0xa5 },
b75a7d8f
A
52 { "ibm-942_P120-1999", 0xa5 },
53 { "ibm-943_P130-1999", 0xa5 },
46f4442e 54 { "ibm-946_P100-1995", 0xa5 },
b75a7d8f 55 { "ibm-33722_P120-1999", 0xa5 },
729e4ab9 56 { "ibm-1041_P100-1995", 0xa5 },
46f4442e
A
57 /*{ "ibm-54191_P100-2006", 0xa5 },*/
58 /*{ "ibm-62383_P100-2007", 0xa5 },*/
59 /*{ "ibm-891_P100-1995", 0x20a9 },*/
60 { "ibm-944_P100-1995", 0x20a9 },
b75a7d8f
A
61 { "ibm-949_P110-1999", 0x20a9 },
62 { "ibm-1363_P110-1997", 0x20a9 },
729e4ab9
A
63 { "ISO_2022,locale=ko,version=0", 0x20a9 },
64 { "ibm-1088_P100-1995", 0x20a9 }
b75a7d8f
A
65};
66
b75a7d8f
A
67/*Calls through createConverter */
68U_CAPI UConverter* U_EXPORT2
69ucnv_open (const char *name,
70 UErrorCode * err)
71{
72 UConverter *r;
73
74 if (err == NULL || U_FAILURE (*err)) {
b75a7d8f
A
75 return NULL;
76 }
77
78 r = ucnv_createConverter(NULL, name, err);
b75a7d8f
A
79 return r;
80}
81
82U_CAPI UConverter* U_EXPORT2
83ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
84{
85 return ucnv_createConverterFromPackage(packageName, converterName, err);
86}
87
88/*Extracts the UChar* to a char* and calls through createConverter */
89U_CAPI UConverter* U_EXPORT2
90ucnv_openU (const UChar * name,
91 UErrorCode * err)
92{
93 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
94
95 if (err == NULL || U_FAILURE(*err))
96 return NULL;
97 if (name == NULL)
98 return ucnv_open (NULL, err);
99 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
100 {
101 *err = U_ILLEGAL_ARGUMENT_ERROR;
102 return NULL;
103 }
104 return ucnv_open(u_austrcpy(asciiName, name), err);
105}
106
73c04bcf
A
107/* Copy the string that is represented by the UConverterPlatform enum
108 * @param platformString An output buffer
109 * @param platform An enum representing a platform
110 * @return the length of the copied string.
111 */
112static int32_t
113ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
114{
115 switch (pltfrm)
116 {
117 case UCNV_IBM:
118 uprv_strcpy(platformString, "ibm-");
119 return 4;
120 case UCNV_UNKNOWN:
121 break;
122 }
123
124 /* default to empty string */
125 *platformString = 0;
126 return 0;
127}
128
b75a7d8f
A
129/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
130 *through createConverter*/
131U_CAPI UConverter* U_EXPORT2
132ucnv_openCCSID (int32_t codepage,
133 UConverterPlatform platform,
134 UErrorCode * err)
135{
136 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
137 int32_t myNameLen;
138
139 if (err == NULL || U_FAILURE (*err))
140 return NULL;
141
142 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
143 myNameLen = ucnv_copyPlatformString(myName, platform);
144 T_CString_integerToString(myName + myNameLen, codepage, 10);
145
146 return ucnv_createConverter(NULL, myName, err);
147}
148
149/* Creating a temporary stack-based object that can be used in one thread,
150and created from a converter that is shared across threads.
151*/
152
153U_CAPI UConverter* U_EXPORT2
154ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
155{
156 UConverter *localConverter, *allocatedConverter;
157 int32_t bufferSizeNeeded;
158 char *stackBufferChars = (char *)stackBuffer;
159 UErrorCode cbErr;
160 UConverterToUnicodeArgs toUArgs = {
161 sizeof(UConverterToUnicodeArgs),
162 TRUE,
163 NULL,
164 NULL,
165 NULL,
166 NULL,
167 NULL,
168 NULL
169 };
170 UConverterFromUnicodeArgs fromUArgs = {
171 sizeof(UConverterFromUnicodeArgs),
172 TRUE,
173 NULL,
174 NULL,
175 NULL,
176 NULL,
177 NULL,
178 NULL
179 };
180
374ca955
A
181 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
182
b75a7d8f 183 if (status == NULL || U_FAILURE(*status)){
374ca955 184 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
b75a7d8f
A
185 return 0;
186 }
187
188 if (!pBufferSize || !cnv){
374ca955
A
189 *status = U_ILLEGAL_ARGUMENT_ERROR;
190 UTRACE_EXIT_STATUS(*status);
b75a7d8f
A
191 return 0;
192 }
193
374ca955
A
194 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
195 ucnv_getName(cnv, status), cnv, stackBuffer);
b75a7d8f 196
b75a7d8f
A
197 if (cnv->sharedData->impl->safeClone != NULL) {
198 /* call the custom safeClone function for sizing */
199 bufferSizeNeeded = 0;
374ca955 200 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
b75a7d8f
A
201 }
202 else
203 {
204 /* inherent sizing */
205 bufferSizeNeeded = sizeof(UConverter);
206 }
207
208 if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
209 *pBufferSize = bufferSizeNeeded;
374ca955 210 UTRACE_EXIT_VALUE(bufferSizeNeeded);
b75a7d8f
A
211 return 0;
212 }
213
214
374ca955
A
215 /* Pointers on 64-bit platforms need to be aligned
216 * on a 64-bit boundary in memory.
217 */
218 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
219 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
220 if(*pBufferSize > offsetUp) {
221 *pBufferSize -= offsetUp;
222 stackBufferChars += offsetUp;
223 } else {
224 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
225 *pBufferSize = 1;
226 }
227 }
228
229 stackBuffer = (void *)stackBufferChars;
230
b75a7d8f
A
231 /* Now, see if we must allocate any memory */
232 if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
233 {
234 /* allocate one here...*/
235 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
236
237 if(localConverter == NULL) {
238 *status = U_MEMORY_ALLOCATION_ERROR;
374ca955 239 UTRACE_EXIT_STATUS(*status);
b75a7d8f
A
240 return NULL;
241 }
242
243 if (U_SUCCESS(*status)) {
244 *status = U_SAFECLONE_ALLOCATED_WARNING;
245 }
246
247 /* record the fact that memory was allocated */
248 *pBufferSize = bufferSizeNeeded;
249 } else {
250 /* just use the stack buffer */
251 localConverter = (UConverter*) stackBuffer;
252 allocatedConverter = NULL;
253 }
254
374ca955
A
255 uprv_memset(localConverter, 0, bufferSizeNeeded);
256
b75a7d8f
A
257 /* Copy initial state */
258 uprv_memcpy(localConverter, cnv, sizeof(UConverter));
259 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
260
73c04bcf
A
261 /* copy the substitution string */
262 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
263 localConverter->subChars = (uint8_t *)localConverter->subUChars;
264 } else {
265 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
266 if (localConverter->subChars == NULL) {
267 uprv_free(allocatedConverter);
268 UTRACE_EXIT_STATUS(*status);
269 return NULL;
270 }
271 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
272 }
273
b75a7d8f
A
274 /* now either call the safeclone fcn or not */
275 if (cnv->sharedData->impl->safeClone != NULL) {
276 /* call the custom safeClone function */
277 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
278 }
279
280 if(localConverter==NULL || U_FAILURE(*status)) {
73c04bcf
A
281 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
282 uprv_free(allocatedConverter->subChars);
283 }
b75a7d8f 284 uprv_free(allocatedConverter);
374ca955 285 UTRACE_EXIT_STATUS(*status);
b75a7d8f
A
286 return NULL;
287 }
288
289 /* increment refcount of shared data if needed */
290 /*
291 Checking whether it's an algorithic converter is okay
292 in multithreaded applications because the value never changes.
293 Don't check referenceCounter for any other value.
294 */
295 if (cnv->sharedData->referenceCounter != ~0) {
296 ucnv_incrementRefCount(cnv->sharedData);
297 }
298
299 if(localConverter == (UConverter*)stackBuffer) {
300 /* we're using user provided data - set to not destroy */
301 localConverter->isCopyLocal = TRUE;
b75a7d8f
A
302 }
303
b75a7d8f
A
304 /* allow callback functions to handle any memory allocation */
305 toUArgs.converter = fromUArgs.converter = localConverter;
306 cbErr = U_ZERO_ERROR;
307 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
308 cbErr = U_ZERO_ERROR;
309 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
310
374ca955 311 UTRACE_EXIT_PTR_STATUS(localConverter, *status);
b75a7d8f
A
312 return localConverter;
313}
314
315
316
317/*Decreases the reference counter in the shared immutable section of the object
318 *and frees the mutable part*/
319
320U_CAPI void U_EXPORT2
321ucnv_close (UConverter * converter)
322{
b75a7d8f
A
323 UErrorCode errorCode = U_ZERO_ERROR;
324
374ca955
A
325 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
326
b75a7d8f
A
327 if (converter == NULL)
328 {
374ca955 329 UTRACE_EXIT();
b75a7d8f
A
330 return;
331 }
332
374ca955
A
333 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
334 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
b75a7d8f 335
73c04bcf
A
336 /* In order to speed up the close, only call the callbacks when they have been changed.
337 This performance check will only work when the callbacks are set within a shared library
338 or from user code that statically links this code. */
339 /* first, notify the callback functions that the converter is closed */
340 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
341 UConverterToUnicodeArgs toUArgs = {
342 sizeof(UConverterToUnicodeArgs),
343 TRUE,
344 NULL,
345 NULL,
346 NULL,
347 NULL,
348 NULL,
349 NULL
350 };
b75a7d8f 351
73c04bcf
A
352 toUArgs.converter = converter;
353 errorCode = U_ZERO_ERROR;
354 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
355 }
356 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
357 UConverterFromUnicodeArgs fromUArgs = {
358 sizeof(UConverterFromUnicodeArgs),
359 TRUE,
360 NULL,
361 NULL,
362 NULL,
363 NULL,
364 NULL,
365 NULL
366 };
367 fromUArgs.converter = converter;
368 errorCode = U_ZERO_ERROR;
369 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
370 }
b75a7d8f 371
b75a7d8f
A
372 if (converter->sharedData->impl->close != NULL) {
373 converter->sharedData->impl->close(converter);
374 }
375
73c04bcf
A
376 if (converter->subChars != (uint8_t *)converter->subUChars) {
377 uprv_free(converter->subChars);
378 }
379
b75a7d8f
A
380 /*
381 Checking whether it's an algorithic converter is okay
382 in multithreaded applications because the value never changes.
383 Don't check referenceCounter for any other value.
384 */
385 if (converter->sharedData->referenceCounter != ~0) {
386 ucnv_unloadSharedDataIfReady(converter->sharedData);
387 }
388
389 if(!converter->isCopyLocal){
73c04bcf 390 uprv_free(converter);
b75a7d8f 391 }
374ca955
A
392
393 UTRACE_EXIT();
b75a7d8f
A
394}
395
396/*returns a single Name from the list, will return NULL if out of bounds
397 */
398U_CAPI const char* U_EXPORT2
399ucnv_getAvailableName (int32_t n)
400{
73c04bcf
A
401 if (0 <= n && n <= 0xffff) {
402 UErrorCode err = U_ZERO_ERROR;
403 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
404 if (U_SUCCESS(err)) {
405 return name;
406 }
b75a7d8f 407 }
73c04bcf 408 return NULL;
b75a7d8f
A
409}
410
411U_CAPI int32_t U_EXPORT2
412ucnv_countAvailable ()
413{
414 UErrorCode err = U_ZERO_ERROR;
73c04bcf 415 return ucnv_bld_countAvailableConverters(&err);
b75a7d8f
A
416}
417
418U_CAPI void U_EXPORT2
419ucnv_getSubstChars (const UConverter * converter,
420 char *mySubChar,
421 int8_t * len,
422 UErrorCode * err)
423{
424 if (U_FAILURE (*err))
425 return;
426
73c04bcf
A
427 if (converter->subCharLen <= 0) {
428 /* Unicode string or empty string from ucnv_setSubstString(). */
429 *len = 0;
430 return;
431 }
432
b75a7d8f
A
433 if (*len < converter->subCharLen) /*not enough space in subChars */
434 {
435 *err = U_INDEX_OUTOFBOUNDS_ERROR;
436 return;
437 }
438
73c04bcf 439 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
b75a7d8f
A
440 *len = converter->subCharLen; /*store # of bytes copied to buffer */
441}
442
443U_CAPI void U_EXPORT2
444ucnv_setSubstChars (UConverter * converter,
445 const char *mySubChar,
446 int8_t len,
447 UErrorCode * err)
448{
449 if (U_FAILURE (*err))
450 return;
451
452 /*Makes sure that the subChar is within the codepages char length boundaries */
453 if ((len > converter->sharedData->staticData->maxBytesPerChar)
454 || (len < converter->sharedData->staticData->minBytesPerChar))
455 {
456 *err = U_ILLEGAL_ARGUMENT_ERROR;
457 return;
458 }
459
73c04bcf 460 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
b75a7d8f
A
461 converter->subCharLen = len; /*sets the new len */
462
463 /*
464 * There is currently (2001Feb) no separate API to set/get subChar1.
465 * In order to always have subChar written after it is explicitly set,
466 * we set subChar1 to 0.
467 */
468 converter->subChar1 = 0;
469
470 return;
471}
472
46f4442e 473U_CAPI void U_EXPORT2
73c04bcf
A
474ucnv_setSubstString(UConverter *cnv,
475 const UChar *s,
476 int32_t length,
477 UErrorCode *err) {
478 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
479 char chars[UCNV_ERROR_BUFFER_LENGTH];
480
481 UConverter *clone;
482 uint8_t *subChars;
483 int32_t cloneSize, length8;
484
485 /* Let the following functions check all arguments. */
486 cloneSize = sizeof(cloneBuffer);
487 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
488 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
489 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
490 ucnv_close(clone);
491 if (U_FAILURE(*err)) {
492 return;
493 }
494
495 if (cnv->sharedData->impl->writeSub == NULL
496#if !UCONFIG_NO_LEGACY_CONVERSION
497 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
498 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
499#endif
500 ) {
501 /* The converter is not stateful. Store the charset bytes as a fixed string. */
502 subChars = (uint8_t *)chars;
503 } else {
504 /*
505 * The converter has a non-default writeSub() function, indicating
506 * that it is stateful.
507 * Store the Unicode string for on-the-fly conversion for correct
508 * state handling.
509 */
510 if (length > UCNV_ERROR_BUFFER_LENGTH) {
511 /*
512 * Should not occur. The converter should output at least one byte
513 * per UChar, which means that ucnv_fromUChars() should catch all
514 * overflows.
515 */
516 *err = U_BUFFER_OVERFLOW_ERROR;
517 return;
518 }
519 subChars = (uint8_t *)s;
520 if (length < 0) {
521 length = u_strlen(s);
522 }
523 length8 = length * U_SIZEOF_UCHAR;
524 }
525
526 /*
527 * For storing the substitution string, select either the small buffer inside
528 * UConverter or allocate a subChars buffer.
529 */
530 if (length8 > UCNV_MAX_SUBCHAR_LEN) {
531 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
532 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
533 /* Allocate a new buffer for the string. */
534 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
535 if (cnv->subChars == NULL) {
536 cnv->subChars = (uint8_t *)cnv->subUChars;
537 *err = U_MEMORY_ALLOCATION_ERROR;
538 return;
539 }
540 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
541 }
542 }
543
544 /* Copy the substitution string into the UConverter or its subChars buffer. */
545 if (length8 == 0) {
546 cnv->subCharLen = 0;
547 } else {
548 uprv_memcpy(cnv->subChars, subChars, length8);
549 if (subChars == (uint8_t *)chars) {
550 cnv->subCharLen = (int8_t)length8;
551 } else /* subChars == s */ {
552 cnv->subCharLen = (int8_t)-length;
553 }
554 }
555
556 /* See comment in ucnv_setSubstChars(). */
557 cnv->subChar1 = 0;
558}
559
b75a7d8f
A
560/*resets the internal states of a converter
561 *goal : have the same behaviour than a freshly created converter
562 */
374ca955
A
563static void _reset(UConverter *converter, UConverterResetChoice choice,
564 UBool callCallback) {
b75a7d8f
A
565 if(converter == NULL) {
566 return;
567 }
568
374ca955
A
569 if(callCallback) {
570 /* first, notify the callback functions that the converter is reset */
46f4442e
A
571 UErrorCode errorCode;
572
573 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
574 UConverterToUnicodeArgs toUArgs = {
575 sizeof(UConverterToUnicodeArgs),
374ca955
A
576 TRUE,
577 NULL,
578 NULL,
579 NULL,
580 NULL,
581 NULL,
582 NULL
46f4442e
A
583 };
584 toUArgs.converter = converter;
585 errorCode = U_ZERO_ERROR;
586 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
587 }
588 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
589 UConverterFromUnicodeArgs fromUArgs = {
590 sizeof(UConverterFromUnicodeArgs),
374ca955
A
591 TRUE,
592 NULL,
593 NULL,
594 NULL,
595 NULL,
596 NULL,
597 NULL
46f4442e
A
598 };
599 fromUArgs.converter = converter;
374ca955
A
600 errorCode = U_ZERO_ERROR;
601 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
602 }
b75a7d8f
A
603 }
604
605 /* now reset the converter itself */
606 if(choice<=UCNV_RESET_TO_UNICODE) {
607 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
374ca955 608 converter->mode = 0;
b75a7d8f
A
609 converter->toULength = 0;
610 converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
374ca955 611 converter->preToULength = 0;
b75a7d8f
A
612 }
613 if(choice!=UCNV_RESET_TO_UNICODE) {
614 converter->fromUnicodeStatus = 0;
374ca955 615 converter->fromUChar32 = 0;
b75a7d8f 616 converter->invalidUCharLength = converter->charErrorBufferLength = 0;
374ca955
A
617 converter->preFromUFirstCP = U_SENTINEL;
618 converter->preFromULength = 0;
b75a7d8f
A
619 }
620
621 if (converter->sharedData->impl->reset != NULL) {
622 /* call the custom reset function */
623 converter->sharedData->impl->reset(converter, choice);
b75a7d8f
A
624 }
625}
626
627U_CAPI void U_EXPORT2
628ucnv_reset(UConverter *converter)
629{
374ca955 630 _reset(converter, UCNV_RESET_BOTH, TRUE);
b75a7d8f
A
631}
632
633U_CAPI void U_EXPORT2
634ucnv_resetToUnicode(UConverter *converter)
635{
374ca955 636 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
b75a7d8f
A
637}
638
639U_CAPI void U_EXPORT2
640ucnv_resetFromUnicode(UConverter *converter)
641{
374ca955 642 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
b75a7d8f
A
643}
644
645U_CAPI int8_t U_EXPORT2
646ucnv_getMaxCharSize (const UConverter * converter)
647{
374ca955 648 return converter->maxBytesPerUChar;
b75a7d8f
A
649}
650
651
652U_CAPI int8_t U_EXPORT2
653ucnv_getMinCharSize (const UConverter * converter)
654{
655 return converter->sharedData->staticData->minBytesPerChar;
656}
657
658U_CAPI const char* U_EXPORT2
659ucnv_getName (const UConverter * converter, UErrorCode * err)
660
661{
662 if (U_FAILURE (*err))
663 return NULL;
664 if(converter->sharedData->impl->getName){
665 const char* temp= converter->sharedData->impl->getName(converter);
666 if(temp)
667 return temp;
668 }
669 return converter->sharedData->staticData->name;
670}
671
374ca955
A
672U_CAPI int32_t U_EXPORT2
673ucnv_getCCSID(const UConverter * converter,
674 UErrorCode * err)
b75a7d8f 675{
374ca955 676 int32_t ccsid;
b75a7d8f
A
677 if (U_FAILURE (*err))
678 return -1;
679
374ca955
A
680 ccsid = converter->sharedData->staticData->codepage;
681 if (ccsid == 0) {
682 /* Rare case. This is for cases like gb18030,
51004dcb 683 which doesn't have an IBM canonical name, but does have an IBM alias. */
374ca955
A
684 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
685 if (U_SUCCESS(*err) && standardName) {
686 const char *ccsidStr = uprv_strchr(standardName, '-');
687 if (ccsidStr) {
688 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
689 }
690 }
691 }
692 return ccsid;
b75a7d8f
A
693}
694
695
696U_CAPI UConverterPlatform U_EXPORT2
697ucnv_getPlatform (const UConverter * converter,
698 UErrorCode * err)
699{
700 if (U_FAILURE (*err))
701 return UCNV_UNKNOWN;
702
703 return (UConverterPlatform)converter->sharedData->staticData->platform;
704}
705
b75a7d8f
A
706U_CAPI void U_EXPORT2
707 ucnv_getToUCallBack (const UConverter * converter,
708 UConverterToUCallback *action,
709 const void **context)
710{
711 *action = converter->fromCharErrorBehaviour;
712 *context = converter->toUContext;
713}
714
715U_CAPI void U_EXPORT2
716 ucnv_getFromUCallBack (const UConverter * converter,
717 UConverterFromUCallback *action,
718 const void **context)
719{
720 *action = converter->fromUCharErrorBehaviour;
721 *context = converter->fromUContext;
722}
723
724U_CAPI void U_EXPORT2
725ucnv_setToUCallBack (UConverter * converter,
726 UConverterToUCallback newAction,
727 const void* newContext,
728 UConverterToUCallback *oldAction,
729 const void** oldContext,
730 UErrorCode * err)
731{
732 if (U_FAILURE (*err))
733 return;
734 if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
735 converter->fromCharErrorBehaviour = newAction;
736 if (oldContext) *oldContext = converter->toUContext;
737 converter->toUContext = newContext;
738}
739
740U_CAPI void U_EXPORT2
741ucnv_setFromUCallBack (UConverter * converter,
742 UConverterFromUCallback newAction,
743 const void* newContext,
744 UConverterFromUCallback *oldAction,
745 const void** oldContext,
746 UErrorCode * err)
747{
748 if (U_FAILURE (*err))
749 return;
750 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
751 converter->fromUCharErrorBehaviour = newAction;
752 if (oldContext) *oldContext = converter->fromUContext;
753 converter->fromUContext = newContext;
754}
755
374ca955
A
756static void
757_updateOffsets(int32_t *offsets, int32_t length,
758 int32_t sourceIndex, int32_t errorInputLength) {
759 int32_t *limit;
760 int32_t delta, offset;
761
762 if(sourceIndex>=0) {
763 /*
764 * adjust each offset by adding the previous sourceIndex
765 * minus the length of the input sequence that caused an
766 * error, if any
767 */
768 delta=sourceIndex-errorInputLength;
769 } else {
770 /*
771 * set each offset to -1 because this conversion function
772 * does not handle offsets
773 */
774 delta=-1;
775 }
776
777 limit=offsets+length;
778 if(delta==0) {
779 /* most common case, nothing to do */
780 } else if(delta>0) {
781 /* add the delta to each offset (but not if the offset is <0) */
782 while(offsets<limit) {
783 offset=*offsets;
784 if(offset>=0) {
785 *offsets=offset+delta;
786 }
787 ++offsets;
788 }
789 } else /* delta<0 */ {
790 /*
791 * set each offset to -1 because this conversion function
792 * does not handle offsets
793 * or the error input sequence started in a previous buffer
794 */
795 while(offsets<limit) {
796 *offsets++=-1;
797 }
798 }
799}
800
801/* ucnv_fromUnicode --------------------------------------------------------- */
802
803/*
804 * Implementation note for m:n conversions
805 *
806 * While collecting source units to find the longest match for m:n conversion,
807 * some source units may need to be stored for a partial match.
808 * When a second buffer does not yield a match on all of the previously stored
809 * source units, then they must be "replayed", i.e., fed back into the converter.
810 *
811 * The code relies on the fact that replaying will not nest -
812 * converting a replay buffer will not result in a replay.
813 * This is because a replay is necessary only after the _continuation_ of a
814 * partial match failed, but a replay buffer is converted as a whole.
815 * It may result in some of its units being stored again for a partial match,
816 * but there will not be a continuation _during_ the replay which could fail.
817 *
818 * It is conceivable that a callback function could call the converter
819 * recursively in a way that causes another replay to be stored, but that
820 * would be an error in the callback function.
821 * Such violations will cause assertion failures in a debug build,
822 * and wrong output, but they will not cause a crash.
823 */
824
825static void
826_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
827 UConverterFromUnicode fromUnicode;
828 UConverter *cnv;
829 const UChar *s;
830 char *t;
831 int32_t *offsets;
832 int32_t sourceIndex;
833 int32_t errorInputLength;
834 UBool converterSawEndOfInput, calledCallback;
835
836 /* variables for m:n conversion */
837 UChar replay[UCNV_EXT_MAX_UCHARS];
838 const UChar *realSource, *realSourceLimit;
839 int32_t realSourceIndex;
840 UBool realFlush;
841
842 cnv=pArgs->converter;
843 s=pArgs->source;
844 t=pArgs->target;
845 offsets=pArgs->offsets;
846
847 /* get the converter implementation function */
848 sourceIndex=0;
849 if(offsets==NULL) {
850 fromUnicode=cnv->sharedData->impl->fromUnicode;
851 } else {
852 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
853 if(fromUnicode==NULL) {
854 /* there is no WithOffsets implementation */
855 fromUnicode=cnv->sharedData->impl->fromUnicode;
856 /* we will write -1 for each offset */
857 sourceIndex=-1;
858 }
859 }
860
861 if(cnv->preFromULength>=0) {
862 /* normal mode */
863 realSource=NULL;
864
865 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
866 realSourceLimit=NULL;
867 realFlush=FALSE;
868 realSourceIndex=0;
869 } else {
870 /*
871 * Previous m:n conversion stored source units from a partial match
872 * and failed to consume all of them.
873 * We need to "replay" them from a temporary buffer and convert them first.
874 */
875 realSource=pArgs->source;
876 realSourceLimit=pArgs->sourceLimit;
877 realFlush=pArgs->flush;
878 realSourceIndex=sourceIndex;
879
880 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
881 pArgs->source=replay;
882 pArgs->sourceLimit=replay-cnv->preFromULength;
883 pArgs->flush=FALSE;
884 sourceIndex=-1;
885
886 cnv->preFromULength=0;
887 }
b75a7d8f
A
888
889 /*
374ca955
A
890 * loop for conversion and error handling
891 *
892 * loop {
893 * convert
894 * loop {
895 * update offsets
896 * handle end of input
897 * handle errors/call callback
898 * }
899 * }
900 */
901 for(;;) {
46f4442e
A
902 if(U_SUCCESS(*err)) {
903 /* convert */
904 fromUnicode(pArgs, err);
374ca955 905
46f4442e
A
906 /*
907 * set a flag for whether the converter
908 * successfully processed the end of the input
909 *
910 * need not check cnv->preFromULength==0 because a replay (<0) will cause
911 * s<sourceLimit before converterSawEndOfInput is checked
912 */
913 converterSawEndOfInput=
914 (UBool)(U_SUCCESS(*err) &&
915 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
916 cnv->fromUChar32==0);
917 } else {
918 /* handle error from ucnv_convertEx() */
919 converterSawEndOfInput=FALSE;
920 }
374ca955
A
921
922 /* no callback called yet for this iteration */
923 calledCallback=FALSE;
924
925 /* no sourceIndex adjustment for conversion, only for callback output */
926 errorInputLength=0;
927
928 /*
929 * loop for offsets and error handling
930 *
931 * iterates at most 3 times:
932 * 1. to clean up after the conversion function
933 * 2. after the callback
934 * 3. after the callback again if there was truncated input
935 */
936 for(;;) {
937 /* update offsets if we write any */
938 if(offsets!=NULL) {
939 int32_t length=(int32_t)(pArgs->target-t);
940 if(length>0) {
941 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
942
943 /*
944 * if a converter handles offsets and updates the offsets
945 * pointer at the end, then pArgs->offset should not change
946 * here;
947 * however, some converters do not handle offsets at all
948 * (sourceIndex<0) or may not update the offsets pointer
949 */
950 pArgs->offsets=offsets+=length;
951 }
952
953 if(sourceIndex>=0) {
954 sourceIndex+=(int32_t)(pArgs->source-s);
955 }
956 }
957
958 if(cnv->preFromULength<0) {
959 /*
960 * switch the source to new replay units (cannot occur while replaying)
961 * after offset handling and before end-of-input and callback handling
962 */
963 if(realSource==NULL) {
964 realSource=pArgs->source;
965 realSourceLimit=pArgs->sourceLimit;
966 realFlush=pArgs->flush;
967 realSourceIndex=sourceIndex;
968
969 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
970 pArgs->source=replay;
971 pArgs->sourceLimit=replay-cnv->preFromULength;
972 pArgs->flush=FALSE;
973 if((sourceIndex+=cnv->preFromULength)<0) {
974 sourceIndex=-1;
975 }
976
977 cnv->preFromULength=0;
978 } else {
979 /* see implementation note before _fromUnicodeWithCallback() */
980 U_ASSERT(realSource==NULL);
981 *err=U_INTERNAL_PROGRAM_ERROR;
982 }
983 }
984
985 /* update pointers */
986 s=pArgs->source;
987 t=pArgs->target;
988
989 if(U_SUCCESS(*err)) {
990 if(s<pArgs->sourceLimit) {
991 /*
992 * continue with the conversion loop while there is still input left
993 * (continue converting by breaking out of only the inner loop)
994 */
995 break;
996 } else if(realSource!=NULL) {
997 /* switch back from replaying to the real source and continue */
998 pArgs->source=realSource;
999 pArgs->sourceLimit=realSourceLimit;
1000 pArgs->flush=realFlush;
1001 sourceIndex=realSourceIndex;
1002
1003 realSource=NULL;
1004 break;
1005 } else if(pArgs->flush && cnv->fromUChar32!=0) {
1006 /*
1007 * the entire input stream is consumed
1008 * and there is a partial, truncated input sequence left
1009 */
1010
1011 /* inject an error and continue with callback handling */
1012 *err=U_TRUNCATED_CHAR_FOUND;
1013 calledCallback=FALSE; /* new error condition */
1014 } else {
1015 /* input consumed */
1016 if(pArgs->flush) {
1017 /*
1018 * return to the conversion loop once more if the flush
1019 * flag is set and the conversion function has not
1020 * successfully processed the end of the input yet
1021 *
1022 * (continue converting by breaking out of only the inner loop)
1023 */
1024 if(!converterSawEndOfInput) {
1025 break;
1026 }
1027
1028 /* reset the converter without calling the callback function */
1029 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
1030 }
1031
1032 /* done successfully */
1033 return;
1034 }
1035 }
1036
1037 /* U_FAILURE(*err) */
1038 {
1039 UErrorCode e;
1040
1041 if( calledCallback ||
1042 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1043 (e!=U_INVALID_CHAR_FOUND &&
1044 e!=U_ILLEGAL_CHAR_FOUND &&
1045 e!=U_TRUNCATED_CHAR_FOUND)
1046 ) {
1047 /*
1048 * the callback did not or cannot resolve the error:
1049 * set output pointers and return
1050 *
1051 * the check for buffer overflow is redundant but it is
1052 * a high-runner case and hopefully documents the intent
1053 * well
1054 *
1055 * if we were replaying, then the replay buffer must be
1056 * copied back into the UConverter
1057 * and the real arguments must be restored
1058 */
1059 if(realSource!=NULL) {
1060 int32_t length;
1061
1062 U_ASSERT(cnv->preFromULength==0);
1063
1064 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1065 if(length>0) {
1066 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
1067 cnv->preFromULength=(int8_t)-length;
1068 }
1069
1070 pArgs->source=realSource;
1071 pArgs->sourceLimit=realSourceLimit;
1072 pArgs->flush=realFlush;
1073 }
1074
1075 return;
1076 }
1077 }
1078
1079 /* callback handling */
1080 {
1081 UChar32 codePoint;
1082
1083 /* get and write the code point */
1084 codePoint=cnv->fromUChar32;
1085 errorInputLength=0;
1086 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
1087 cnv->invalidUCharLength=(int8_t)errorInputLength;
1088
1089 /* set the converter state to deal with the next character */
1090 cnv->fromUChar32=0;
1091
1092 /* call the callback function */
1093 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
1094 cnv->invalidUCharBuffer, errorInputLength, codePoint,
1095 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
1096 err);
1097 }
1098
1099 /*
1100 * loop back to the offset handling
1101 *
1102 * this flag will indicate after offset handling
1103 * that a callback was called;
1104 * if the callback did not resolve the error, then we return
1105 */
1106 calledCallback=TRUE;
1107 }
1108 }
1109}
1110
46f4442e
A
1111/*
1112 * Output the fromUnicode overflow buffer.
1113 * Call this function if(cnv->charErrorBufferLength>0).
1114 * @return TRUE if overflow
1115 */
1116static UBool
1117ucnv_outputOverflowFromUnicode(UConverter *cnv,
1118 char **target, const char *targetLimit,
1119 int32_t **pOffsets,
1120 UErrorCode *err) {
1121 int32_t *offsets;
1122 char *overflow, *t;
1123 int32_t i, length;
1124
1125 t=*target;
1126 if(pOffsets!=NULL) {
1127 offsets=*pOffsets;
1128 } else {
1129 offsets=NULL;
1130 }
1131
1132 overflow=(char *)cnv->charErrorBuffer;
1133 length=cnv->charErrorBufferLength;
1134 i=0;
1135 while(i<length) {
1136 if(t==targetLimit) {
1137 /* the overflow buffer contains too much, keep the rest */
1138 int32_t j=0;
1139
1140 do {
1141 overflow[j++]=overflow[i++];
1142 } while(i<length);
1143
1144 cnv->charErrorBufferLength=(int8_t)j;
1145 *target=t;
1146 if(offsets!=NULL) {
1147 *pOffsets=offsets;
1148 }
1149 *err=U_BUFFER_OVERFLOW_ERROR;
1150 return TRUE;
1151 }
1152
1153 /* copy the overflow contents to the target */
1154 *t++=overflow[i++];
1155 if(offsets!=NULL) {
1156 *offsets++=-1; /* no source index available for old output */
1157 }
1158 }
1159
1160 /* the overflow buffer is completely copied to the target */
1161 cnv->charErrorBufferLength=0;
1162 *target=t;
1163 if(offsets!=NULL) {
1164 *pOffsets=offsets;
1165 }
1166 return FALSE;
1167}
1168
374ca955
A
1169U_CAPI void U_EXPORT2
1170ucnv_fromUnicode(UConverter *cnv,
1171 char **target, const char *targetLimit,
1172 const UChar **source, const UChar *sourceLimit,
1173 int32_t *offsets,
1174 UBool flush,
1175 UErrorCode *err) {
1176 UConverterFromUnicodeArgs args;
1177 const UChar *s;
1178 char *t;
1179
1180 /* check parameters */
1181 if(err==NULL || U_FAILURE(*err)) {
b75a7d8f
A
1182 return;
1183 }
1184
374ca955
A
1185 if(cnv==NULL || target==NULL || source==NULL) {
1186 *err=U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f
A
1187 return;
1188 }
1189
374ca955
A
1190 s=*source;
1191 t=*target;
46f4442e
A
1192
1193 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
1194 /*
1195 Prevent code from going into an infinite loop in case we do hit this
1196 limit. The limit pointer is expected to be on a UChar * boundary.
1197 This also prevents the next argument check from failing.
1198 */
1199 sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
b75a7d8f
A
1200 }
1201
1202 /*
46f4442e
A
1203 * All these conditions should never happen.
1204 *
1205 * 1) Make sure that the limits are >= to the address source or target
1206 *
1207 * 2) Make sure that the buffer sizes do not exceed the number range for
374ca955
A
1208 * int32_t because some functions use the size (in units or bytes)
1209 * rather than comparing pointers, and because offsets are int32_t values.
1210 *
1211 * size_t is guaranteed to be unsigned and large enough for the job.
1212 *
1213 * Return with an error instead of adjusting the limits because we would
1214 * not be able to maintain the semantics that either the source must be
1215 * consumed or the target filled (unless an error occurs).
1216 * An adjustment would be targetLimit=t+0x7fffffff; for example.
46f4442e
A
1217 *
1218 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1219 * to a char * pointer and provide an incomplete UChar code unit.
374ca955 1220 */
46f4442e 1221 if (sourceLimit<s || targetLimit<t ||
374ca955 1222 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
46f4442e
A
1223 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
1224 (((const char *)sourceLimit-(const char *)s) & 1) != 0)
1225 {
374ca955
A
1226 *err=U_ILLEGAL_ARGUMENT_ERROR;
1227 return;
b75a7d8f
A
1228 }
1229
46f4442e
A
1230 /* output the target overflow buffer */
1231 if( cnv->charErrorBufferLength>0 &&
1232 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
1233 ) {
1234 /* U_BUFFER_OVERFLOW_ERROR */
1235 return;
b75a7d8f 1236 }
46f4442e 1237 /* *target may have moved, therefore stop using t */
b75a7d8f 1238
374ca955 1239 if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
b75a7d8f
A
1240 /* the overflow buffer is emptied and there is no new input: we are done */
1241 return;
1242 }
1243
374ca955
A
1244 /*
1245 * Do not simply return with a buffer overflow error if
1246 * !flush && t==targetLimit
1247 * because it is possible that the source will not generate any output.
1248 * For example, the skip callback may be called;
1249 * it does not output anything.
1250 */
1251
1252 /* prepare the converter arguments */
1253 args.converter=cnv;
1254 args.flush=flush;
1255 args.offsets=offsets;
1256 args.source=s;
1257 args.sourceLimit=sourceLimit;
46f4442e 1258 args.target=*target;
374ca955
A
1259 args.targetLimit=targetLimit;
1260 args.size=sizeof(args);
1261
1262 _fromUnicodeWithCallback(&args, err);
1263
1264 *source=args.source;
1265 *target=args.target;
1266}
1267
1268/* ucnv_toUnicode() --------------------------------------------------------- */
1269
1270static void
1271_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1272 UConverterToUnicode toUnicode;
1273 UConverter *cnv;
1274 const char *s;
1275 UChar *t;
1276 int32_t *offsets;
1277 int32_t sourceIndex;
1278 int32_t errorInputLength;
1279 UBool converterSawEndOfInput, calledCallback;
1280
1281 /* variables for m:n conversion */
1282 char replay[UCNV_EXT_MAX_BYTES];
1283 const char *realSource, *realSourceLimit;
1284 int32_t realSourceIndex;
1285 UBool realFlush;
1286
1287 cnv=pArgs->converter;
1288 s=pArgs->source;
1289 t=pArgs->target;
1290 offsets=pArgs->offsets;
1291
1292 /* get the converter implementation function */
1293 sourceIndex=0;
1294 if(offsets==NULL) {
1295 toUnicode=cnv->sharedData->impl->toUnicode;
1296 } else {
1297 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1298 if(toUnicode==NULL) {
1299 /* there is no WithOffsets implementation */
1300 toUnicode=cnv->sharedData->impl->toUnicode;
1301 /* we will write -1 for each offset */
1302 sourceIndex=-1;
1303 }
1304 }
1305
1306 if(cnv->preToULength>=0) {
1307 /* normal mode */
1308 realSource=NULL;
1309
1310 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1311 realSourceLimit=NULL;
1312 realFlush=FALSE;
1313 realSourceIndex=0;
1314 } else {
1315 /*
1316 * Previous m:n conversion stored source units from a partial match
1317 * and failed to consume all of them.
1318 * We need to "replay" them from a temporary buffer and convert them first.
1319 */
1320 realSource=pArgs->source;
1321 realSourceLimit=pArgs->sourceLimit;
1322 realFlush=pArgs->flush;
1323 realSourceIndex=sourceIndex;
1324
1325 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1326 pArgs->source=replay;
1327 pArgs->sourceLimit=replay-cnv->preToULength;
1328 pArgs->flush=FALSE;
1329 sourceIndex=-1;
1330
1331 cnv->preToULength=0;
1332 }
1333
1334 /*
1335 * loop for conversion and error handling
1336 *
1337 * loop {
1338 * convert
1339 * loop {
1340 * update offsets
1341 * handle end of input
1342 * handle errors/call callback
1343 * }
1344 * }
1345 */
1346 for(;;) {
1347 if(U_SUCCESS(*err)) {
1348 /* convert */
1349 toUnicode(pArgs, err);
1350
1351 /*
1352 * set a flag for whether the converter
1353 * successfully processed the end of the input
1354 *
1355 * need not check cnv->preToULength==0 because a replay (<0) will cause
1356 * s<sourceLimit before converterSawEndOfInput is checked
1357 */
1358 converterSawEndOfInput=
1359 (UBool)(U_SUCCESS(*err) &&
1360 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1361 cnv->toULength==0);
1362 } else {
46f4442e 1363 /* handle error from getNextUChar() or ucnv_convertEx() */
374ca955 1364 converterSawEndOfInput=FALSE;
b75a7d8f 1365 }
374ca955
A
1366
1367 /* no callback called yet for this iteration */
1368 calledCallback=FALSE;
1369
1370 /* no sourceIndex adjustment for conversion, only for callback output */
1371 errorInputLength=0;
1372
1373 /*
1374 * loop for offsets and error handling
1375 *
1376 * iterates at most 3 times:
1377 * 1. to clean up after the conversion function
1378 * 2. after the callback
1379 * 3. after the callback again if there was truncated input
1380 */
1381 for(;;) {
1382 /* update offsets if we write any */
1383 if(offsets!=NULL) {
1384 int32_t length=(int32_t)(pArgs->target-t);
1385 if(length>0) {
1386 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1387
1388 /*
1389 * if a converter handles offsets and updates the offsets
1390 * pointer at the end, then pArgs->offset should not change
1391 * here;
1392 * however, some converters do not handle offsets at all
1393 * (sourceIndex<0) or may not update the offsets pointer
1394 */
1395 pArgs->offsets=offsets+=length;
1396 }
1397
1398 if(sourceIndex>=0) {
1399 sourceIndex+=(int32_t)(pArgs->source-s);
1400 }
1401 }
1402
1403 if(cnv->preToULength<0) {
1404 /*
1405 * switch the source to new replay units (cannot occur while replaying)
1406 * after offset handling and before end-of-input and callback handling
1407 */
1408 if(realSource==NULL) {
1409 realSource=pArgs->source;
1410 realSourceLimit=pArgs->sourceLimit;
1411 realFlush=pArgs->flush;
1412 realSourceIndex=sourceIndex;
1413
1414 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1415 pArgs->source=replay;
1416 pArgs->sourceLimit=replay-cnv->preToULength;
1417 pArgs->flush=FALSE;
1418 if((sourceIndex+=cnv->preToULength)<0) {
1419 sourceIndex=-1;
1420 }
1421
1422 cnv->preToULength=0;
1423 } else {
1424 /* see implementation note before _fromUnicodeWithCallback() */
1425 U_ASSERT(realSource==NULL);
1426 *err=U_INTERNAL_PROGRAM_ERROR;
1427 }
1428 }
1429
1430 /* update pointers */
1431 s=pArgs->source;
1432 t=pArgs->target;
1433
1434 if(U_SUCCESS(*err)) {
1435 if(s<pArgs->sourceLimit) {
1436 /*
1437 * continue with the conversion loop while there is still input left
1438 * (continue converting by breaking out of only the inner loop)
1439 */
1440 break;
1441 } else if(realSource!=NULL) {
1442 /* switch back from replaying to the real source and continue */
1443 pArgs->source=realSource;
1444 pArgs->sourceLimit=realSourceLimit;
1445 pArgs->flush=realFlush;
1446 sourceIndex=realSourceIndex;
1447
1448 realSource=NULL;
1449 break;
1450 } else if(pArgs->flush && cnv->toULength>0) {
1451 /*
1452 * the entire input stream is consumed
1453 * and there is a partial, truncated input sequence left
1454 */
1455
1456 /* inject an error and continue with callback handling */
1457 *err=U_TRUNCATED_CHAR_FOUND;
1458 calledCallback=FALSE; /* new error condition */
1459 } else {
1460 /* input consumed */
1461 if(pArgs->flush) {
1462 /*
1463 * return to the conversion loop once more if the flush
1464 * flag is set and the conversion function has not
1465 * successfully processed the end of the input yet
1466 *
1467 * (continue converting by breaking out of only the inner loop)
1468 */
1469 if(!converterSawEndOfInput) {
1470 break;
1471 }
1472
1473 /* reset the converter without calling the callback function */
1474 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1475 }
1476
1477 /* done successfully */
1478 return;
1479 }
b75a7d8f 1480 }
374ca955
A
1481
1482 /* U_FAILURE(*err) */
1483 {
1484 UErrorCode e;
1485
1486 if( calledCallback ||
1487 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1488 (e!=U_INVALID_CHAR_FOUND &&
1489 e!=U_ILLEGAL_CHAR_FOUND &&
1490 e!=U_TRUNCATED_CHAR_FOUND &&
1491 e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
46f4442e 1492 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
374ca955
A
1493 ) {
1494 /*
1495 * the callback did not or cannot resolve the error:
1496 * set output pointers and return
1497 *
1498 * the check for buffer overflow is redundant but it is
1499 * a high-runner case and hopefully documents the intent
1500 * well
1501 *
1502 * if we were replaying, then the replay buffer must be
1503 * copied back into the UConverter
1504 * and the real arguments must be restored
1505 */
1506 if(realSource!=NULL) {
1507 int32_t length;
1508
1509 U_ASSERT(cnv->preToULength==0);
1510
1511 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1512 if(length>0) {
1513 uprv_memcpy(cnv->preToU, pArgs->source, length);
1514 cnv->preToULength=(int8_t)-length;
1515 }
1516
1517 pArgs->source=realSource;
1518 pArgs->sourceLimit=realSourceLimit;
1519 pArgs->flush=realFlush;
1520 }
1521
1522 return;
1523 }
1524 }
1525
1526 /* copy toUBytes[] to invalidCharBuffer[] */
1527 errorInputLength=cnv->invalidCharLength=cnv->toULength;
1528 if(errorInputLength>0) {
1529 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1530 }
1531
1532 /* set the converter state to deal with the next character */
1533 cnv->toULength=0;
1534
1535 /* call the callback function */
46f4442e
A
1536 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
1537 cnv->toUCallbackReason = UCNV_UNASSIGNED;
d5d484b0 1538 }
46f4442e
A
1539 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1540 cnv->invalidCharBuffer, errorInputLength,
1541 cnv->toUCallbackReason,
1542 err);
1543 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
374ca955
A
1544
1545 /*
1546 * loop back to the offset handling
1547 *
1548 * this flag will indicate after offset handling
1549 * that a callback was called;
1550 * if the callback did not resolve the error, then we return
1551 */
1552 calledCallback=TRUE;
b75a7d8f
A
1553 }
1554 }
b75a7d8f
A
1555}
1556
46f4442e
A
1557/*
1558 * Output the toUnicode overflow buffer.
1559 * Call this function if(cnv->UCharErrorBufferLength>0).
1560 * @return TRUE if overflow
1561 */
1562static UBool
1563ucnv_outputOverflowToUnicode(UConverter *cnv,
1564 UChar **target, const UChar *targetLimit,
1565 int32_t **pOffsets,
1566 UErrorCode *err) {
1567 int32_t *offsets;
1568 UChar *overflow, *t;
1569 int32_t i, length;
1570
1571 t=*target;
1572 if(pOffsets!=NULL) {
1573 offsets=*pOffsets;
1574 } else {
1575 offsets=NULL;
1576 }
1577
1578 overflow=cnv->UCharErrorBuffer;
1579 length=cnv->UCharErrorBufferLength;
1580 i=0;
1581 while(i<length) {
1582 if(t==targetLimit) {
1583 /* the overflow buffer contains too much, keep the rest */
1584 int32_t j=0;
1585
1586 do {
1587 overflow[j++]=overflow[i++];
1588 } while(i<length);
1589
1590 cnv->UCharErrorBufferLength=(int8_t)j;
1591 *target=t;
1592 if(offsets!=NULL) {
1593 *pOffsets=offsets;
1594 }
1595 *err=U_BUFFER_OVERFLOW_ERROR;
1596 return TRUE;
1597 }
1598
1599 /* copy the overflow contents to the target */
1600 *t++=overflow[i++];
1601 if(offsets!=NULL) {
1602 *offsets++=-1; /* no source index available for old output */
1603 }
1604 }
1605
1606 /* the overflow buffer is completely copied to the target */
1607 cnv->UCharErrorBufferLength=0;
1608 *target=t;
1609 if(offsets!=NULL) {
1610 *pOffsets=offsets;
1611 }
1612 return FALSE;
1613}
1614
374ca955
A
1615U_CAPI void U_EXPORT2
1616ucnv_toUnicode(UConverter *cnv,
1617 UChar **target, const UChar *targetLimit,
1618 const char **source, const char *sourceLimit,
1619 int32_t *offsets,
1620 UBool flush,
1621 UErrorCode *err) {
b75a7d8f 1622 UConverterToUnicodeArgs args;
374ca955
A
1623 const char *s;
1624 UChar *t;
b75a7d8f 1625
374ca955
A
1626 /* check parameters */
1627 if(err==NULL || U_FAILURE(*err)) {
b75a7d8f
A
1628 return;
1629 }
1630
374ca955
A
1631 if(cnv==NULL || target==NULL || source==NULL) {
1632 *err=U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f
A
1633 return;
1634 }
1635
374ca955
A
1636 s=*source;
1637 t=*target;
46f4442e
A
1638
1639 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
1640 /*
1641 Prevent code from going into an infinite loop in case we do hit this
1642 limit. The limit pointer is expected to be on a UChar * boundary.
1643 This also prevents the next argument check from failing.
1644 */
1645 targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
b75a7d8f
A
1646 }
1647
1648 /*
46f4442e
A
1649 * All these conditions should never happen.
1650 *
1651 * 1) Make sure that the limits are >= to the address source or target
1652 *
1653 * 2) Make sure that the buffer sizes do not exceed the number range for
374ca955
A
1654 * int32_t because some functions use the size (in units or bytes)
1655 * rather than comparing pointers, and because offsets are int32_t values.
1656 *
1657 * size_t is guaranteed to be unsigned and large enough for the job.
1658 *
1659 * Return with an error instead of adjusting the limits because we would
1660 * not be able to maintain the semantics that either the source must be
1661 * consumed or the target filled (unless an error occurs).
1662 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
46f4442e
A
1663 *
1664 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1665 * to a char * pointer and provide an incomplete UChar code unit.
374ca955 1666 */
46f4442e 1667 if (sourceLimit<s || targetLimit<t ||
374ca955 1668 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
46f4442e
A
1669 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
1670 (((const char *)targetLimit-(const char *)t) & 1) != 0
374ca955
A
1671 ) {
1672 *err=U_ILLEGAL_ARGUMENT_ERROR;
1673 return;
b75a7d8f 1674 }
374ca955 1675
46f4442e
A
1676 /* output the target overflow buffer */
1677 if( cnv->UCharErrorBufferLength>0 &&
1678 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
1679 ) {
1680 /* U_BUFFER_OVERFLOW_ERROR */
1681 return;
b75a7d8f 1682 }
46f4442e 1683 /* *target may have moved, therefore stop using t */
b75a7d8f 1684
374ca955 1685 if(!flush && s==sourceLimit && cnv->preToULength>=0) {
b75a7d8f
A
1686 /* the overflow buffer is emptied and there is no new input: we are done */
1687 return;
1688 }
1689
374ca955
A
1690 /*
1691 * Do not simply return with a buffer overflow error if
1692 * !flush && t==targetLimit
1693 * because it is possible that the source will not generate any output.
1694 * For example, the skip callback may be called;
1695 * it does not output anything.
1696 */
b75a7d8f 1697
374ca955
A
1698 /* prepare the converter arguments */
1699 args.converter=cnv;
1700 args.flush=flush;
1701 args.offsets=offsets;
1702 args.source=s;
1703 args.sourceLimit=sourceLimit;
46f4442e 1704 args.target=*target;
374ca955
A
1705 args.targetLimit=targetLimit;
1706 args.size=sizeof(args);
b75a7d8f 1707
374ca955
A
1708 _toUnicodeWithCallback(&args, err);
1709
1710 *source=args.source;
1711 *target=args.target;
b75a7d8f
A
1712}
1713
374ca955
A
1714/* ucnv_to/fromUChars() ----------------------------------------------------- */
1715
b75a7d8f
A
1716U_CAPI int32_t U_EXPORT2
1717ucnv_fromUChars(UConverter *cnv,
1718 char *dest, int32_t destCapacity,
1719 const UChar *src, int32_t srcLength,
1720 UErrorCode *pErrorCode) {
1721 const UChar *srcLimit;
1722 char *originalDest, *destLimit;
1723 int32_t destLength;
1724
1725 /* check arguments */
1726 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1727 return 0;
1728 }
1729
1730 if( cnv==NULL ||
1731 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1732 srcLength<-1 || (srcLength!=0 && src==NULL)
1733 ) {
1734 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1735 return 0;
1736 }
1737
1738 /* initialize */
1739 ucnv_resetFromUnicode(cnv);
1740 originalDest=dest;
1741 if(srcLength==-1) {
1742 srcLength=u_strlen(src);
1743 }
1744 if(srcLength>0) {
1745 srcLimit=src+srcLength;
1746 destLimit=dest+destCapacity;
1747
1748 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1749 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1750 destLimit=(char *)U_MAX_PTR(dest);
1751 }
1752
1753 /* perform the conversion */
1754 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1755 destLength=(int32_t)(dest-originalDest);
1756
1757 /* if an overflow occurs, then get the preflighting length */
1758 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1759 char buffer[1024];
1760
1761 destLimit=buffer+sizeof(buffer);
1762 do {
1763 dest=buffer;
1764 *pErrorCode=U_ZERO_ERROR;
1765 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1766 destLength+=(int32_t)(dest-buffer);
1767 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1768 }
1769 } else {
1770 destLength=0;
1771 }
1772
1773 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1774}
1775
1776U_CAPI int32_t U_EXPORT2
1777ucnv_toUChars(UConverter *cnv,
1778 UChar *dest, int32_t destCapacity,
1779 const char *src, int32_t srcLength,
1780 UErrorCode *pErrorCode) {
1781 const char *srcLimit;
1782 UChar *originalDest, *destLimit;
1783 int32_t destLength;
1784
1785 /* check arguments */
1786 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1787 return 0;
1788 }
1789
1790 if( cnv==NULL ||
1791 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1792 srcLength<-1 || (srcLength!=0 && src==NULL))
1793 {
1794 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1795 return 0;
1796 }
1797
1798 /* initialize */
1799 ucnv_resetToUnicode(cnv);
1800 originalDest=dest;
1801 if(srcLength==-1) {
73c04bcf 1802 srcLength=(int32_t)uprv_strlen(src);
b75a7d8f
A
1803 }
1804 if(srcLength>0) {
1805 srcLimit=src+srcLength;
1806 destLimit=dest+destCapacity;
1807
1808 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1809 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1810 destLimit=(UChar *)U_MAX_PTR(dest);
1811 }
1812
1813 /* perform the conversion */
1814 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1815 destLength=(int32_t)(dest-originalDest);
1816
1817 /* if an overflow occurs, then get the preflighting length */
1818 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1819 {
1820 UChar buffer[1024];
1821
1822 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
1823 do {
1824 dest=buffer;
1825 *pErrorCode=U_ZERO_ERROR;
1826 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1827 destLength+=(int32_t)(dest-buffer);
1828 }
1829 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1830 }
1831 } else {
1832 destLength=0;
1833 }
1834
1835 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1836}
1837
374ca955
A
1838/* ucnv_getNextUChar() ------------------------------------------------------ */
1839
1840U_CAPI UChar32 U_EXPORT2
1841ucnv_getNextUChar(UConverter *cnv,
1842 const char **source, const char *sourceLimit,
1843 UErrorCode *err) {
b75a7d8f 1844 UConverterToUnicodeArgs args;
374ca955
A
1845 UChar buffer[U16_MAX_LENGTH];
1846 const char *s;
1847 UChar32 c;
1848 int32_t i, length;
b75a7d8f 1849
374ca955
A
1850 /* check parameters */
1851 if(err==NULL || U_FAILURE(*err)) {
b75a7d8f
A
1852 return 0xffff;
1853 }
1854
374ca955
A
1855 if(cnv==NULL || source==NULL) {
1856 *err=U_ILLEGAL_ARGUMENT_ERROR;
b75a7d8f
A
1857 return 0xffff;
1858 }
1859
374ca955
A
1860 s=*source;
1861 if(sourceLimit<s) {
1862 *err=U_ILLEGAL_ARGUMENT_ERROR;
1863 return 0xffff;
1864 }
1865
1866 /*
1867 * Make sure that the buffer sizes do not exceed the number range for
1868 * int32_t because some functions use the size (in units or bytes)
1869 * rather than comparing pointers, and because offsets are int32_t values.
1870 *
1871 * size_t is guaranteed to be unsigned and large enough for the job.
1872 *
1873 * Return with an error instead of adjusting the limits because we would
1874 * not be able to maintain the semantics that either the source must be
1875 * consumed or the target filled (unless an error occurs).
1876 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1877 */
1878 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1879 *err=U_ILLEGAL_ARGUMENT_ERROR;
1880 return 0xffff;
1881 }
1882
1883 c=U_SENTINEL;
1884
1885 /* flush the target overflow buffer */
1886 if(cnv->UCharErrorBufferLength>0) {
1887 UChar *overflow;
1888
1889 overflow=cnv->UCharErrorBuffer;
1890 i=0;
1891 length=cnv->UCharErrorBufferLength;
1892 U16_NEXT(overflow, i, length, c);
1893
1894 /* move the remaining overflow contents up to the beginning */
1895 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1896 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1897 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1898 }
1899
1900 if(!U16_IS_LEAD(c) || i<length) {
1901 return c;
1902 }
1903 /*
1904 * Continue if the overflow buffer contained only a lead surrogate,
1905 * in case the converter outputs single surrogates from complete
1906 * input sequences.
1907 */
1908 }
1909
1910 /*
1911 * flush==TRUE is implied for ucnv_getNextUChar()
1912 *
1913 * do not simply return even if s==sourceLimit because the converter may
1914 * not have seen flush==TRUE before
1915 */
1916
1917 /* prepare the converter arguments */
1918 args.converter=cnv;
1919 args.flush=TRUE;
1920 args.offsets=NULL;
1921 args.source=s;
1922 args.sourceLimit=sourceLimit;
1923 args.target=buffer;
1924 args.targetLimit=buffer+1;
1925 args.size=sizeof(args);
1926
1927 if(c<0) {
1928 /*
1929 * call the native getNextUChar() implementation if we are
1930 * at a character boundary (toULength==0)
1931 *
1932 * unlike with _toUnicode(), getNextUChar() implementations must set
1933 * U_TRUNCATED_CHAR_FOUND for truncated input,
1934 * in addition to setting toULength/toUBytes[]
1935 */
1936 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1937 c=cnv->sharedData->impl->getNextUChar(&args, err);
1938 *source=s=args.source;
1939 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1940 /* reset the converter without calling the callback function */
1941 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1942 return 0xffff; /* no output */
1943 } else if(U_SUCCESS(*err) && c>=0) {
1944 return c;
1945 /*
1946 * else fall through to use _toUnicode() because
1947 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1948 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1949 */
1950 }
1951 }
1952
1953 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1954 _toUnicodeWithCallback(&args, err);
1955
1956 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1957 *err=U_ZERO_ERROR;
1958 }
1959
1960 i=0;
1961 length=(int32_t)(args.target-buffer);
1962 } else {
1963 /* write the lead surrogate from the overflow buffer */
1964 buffer[0]=(UChar)c;
1965 args.target=buffer+1;
1966 i=0;
1967 length=1;
1968 }
1969
1970 /* buffer contents starts at i and ends before length */
1971
1972 if(U_FAILURE(*err)) {
1973 c=0xffff; /* no output */
1974 } else if(length==0) {
1975 /* no input or only state changes */
1976 *err=U_INDEX_OUTOFBOUNDS_ERROR;
1977 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1978 c=0xffff; /* no output */
b75a7d8f 1979 } else {
374ca955
A
1980 c=buffer[0];
1981 i=1;
1982 if(!U16_IS_LEAD(c)) {
1983 /* consume c=buffer[0], done */
1984 } else {
1985 /* got a lead surrogate, see if a trail surrogate follows */
1986 UChar c2;
1987
1988 if(cnv->UCharErrorBufferLength>0) {
1989 /* got overflow output from the conversion */
1990 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1991 /* got a trail surrogate, too */
1992 c=U16_GET_SUPPLEMENTARY(c, c2);
1993
1994 /* move the remaining overflow contents up to the beginning */
1995 if((--cnv->UCharErrorBufferLength)>0) {
1996 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1997 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1998 }
1999 } else {
2000 /* c is an unpaired lead surrogate, just return it */
2001 }
2002 } else if(args.source<sourceLimit) {
2003 /* convert once more, to buffer[1] */
2004 args.targetLimit=buffer+2;
2005 _toUnicodeWithCallback(&args, err);
2006 if(*err==U_BUFFER_OVERFLOW_ERROR) {
2007 *err=U_ZERO_ERROR;
2008 }
2009
2010 length=(int32_t)(args.target-buffer);
2011 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
2012 /* got a trail surrogate, too */
2013 c=U16_GET_SUPPLEMENTARY(c, c2);
2014 i=2;
2015 }
2016 }
2017 }
2018 }
2019
2020 /*
2021 * move leftover output from buffer[i..length[
2022 * into the beginning of the overflow buffer
2023 */
2024 if(i<length) {
2025 /* move further overflow back */
2026 int32_t delta=length-i;
2027 if((length=cnv->UCharErrorBufferLength)>0) {
2028 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
2029 length*U_SIZEOF_UCHAR);
2030 }
2031 cnv->UCharErrorBufferLength=(int8_t)(length+delta);
2032
2033 cnv->UCharErrorBuffer[0]=buffer[i++];
2034 if(delta>1) {
2035 cnv->UCharErrorBuffer[1]=buffer[i];
2036 }
b75a7d8f 2037 }
374ca955
A
2038
2039 *source=args.source;
2040 return c;
b75a7d8f
A
2041}
2042
374ca955
A
2043/* ucnv_convert() and siblings ---------------------------------------------- */
2044
b75a7d8f
A
2045U_CAPI void U_EXPORT2
2046ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
2047 char **target, const char *targetLimit,
2048 const char **source, const char *sourceLimit,
2049 UChar *pivotStart, UChar **pivotSource,
2050 UChar **pivotTarget, const UChar *pivotLimit,
2051 UBool reset, UBool flush,
2052 UErrorCode *pErrorCode) {
2053 UChar pivotBuffer[CHUNK_SIZE];
46f4442e
A
2054 const UChar *myPivotSource;
2055 UChar *myPivotTarget;
2056 const char *s;
2057 char *t;
2058
2059 UConverterToUnicodeArgs toUArgs;
2060 UConverterFromUnicodeArgs fromUArgs;
2061 UConverterConvert convert;
b75a7d8f
A
2062
2063 /* error checking */
2064 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2065 return;
2066 }
2067
2068 if( targetCnv==NULL || sourceCnv==NULL ||
2069 source==NULL || *source==NULL ||
2070 target==NULL || *target==NULL || targetLimit==NULL
2071 ) {
2072 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2073 return;
2074 }
2075
46f4442e
A
2076 s=*source;
2077 t=*target;
2078 if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
2079 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2080 return;
2081 }
2082
2083 /*
2084 * Make sure that the buffer sizes do not exceed the number range for
2085 * int32_t. See ucnv_toUnicode() for a more detailed comment.
2086 */
2087 if(
2088 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
2089 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
2090 ) {
2091 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2092 return;
2093 }
2094
b75a7d8f 2095 if(pivotStart==NULL) {
73c04bcf
A
2096 if(!flush) {
2097 /* streaming conversion requires an explicit pivot buffer */
2098 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2099 return;
2100 }
2101
b75a7d8f 2102 /* use the stack pivot buffer */
46f4442e
A
2103 myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
2104 pivotSource=(UChar **)&myPivotSource;
b75a7d8f
A
2105 pivotTarget=&myPivotTarget;
2106 pivotLimit=pivotBuffer+CHUNK_SIZE;
2107 } else if( pivotStart>=pivotLimit ||
2108 pivotSource==NULL || *pivotSource==NULL ||
2109 pivotTarget==NULL || *pivotTarget==NULL ||
2110 pivotLimit==NULL
2111 ) {
2112 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2113 return;
2114 }
2115
2116 if(sourceLimit==NULL) {
2117 /* get limit of single-byte-NUL-terminated source string */
2118 sourceLimit=uprv_strchr(*source, 0);
2119 }
2120
2121 if(reset) {
2122 ucnv_resetToUnicode(sourceCnv);
2123 ucnv_resetFromUnicode(targetCnv);
46f4442e
A
2124 *pivotSource=*pivotTarget=pivotStart;
2125 } else if(targetCnv->charErrorBufferLength>0) {
2126 /* output the targetCnv overflow buffer */
2127 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
2128 /* U_BUFFER_OVERFLOW_ERROR */
2129 return;
2130 }
2131 /* *target has moved, therefore stop using t */
2132
2133 if( !flush &&
2134 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
2135 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
2136 ) {
2137 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2138 return;
2139 }
2140 }
2141
2142 /* Is direct-UTF-8 conversion available? */
2143 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2144 targetCnv->sharedData->impl->fromUTF8!=NULL
2145 ) {
2146 convert=targetCnv->sharedData->impl->fromUTF8;
2147 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2148 sourceCnv->sharedData->impl->toUTF8!=NULL
2149 ) {
2150 convert=sourceCnv->sharedData->impl->toUTF8;
2151 } else {
2152 convert=NULL;
b75a7d8f
A
2153 }
2154
46f4442e
A
2155 /*
2156 * If direct-UTF-8 conversion is available, then we use a smaller
2157 * pivot buffer for error handling and partial matches
2158 * so that we quickly return to direct conversion.
2159 *
2160 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2161 *
2162 * We could reduce the pivot buffer size further, at the cost of
2163 * buffer overflows from callbacks.
2164 * The pivot buffer should not be smaller than the maximum number of
2165 * fromUnicode extension table input UChars
2166 * (for m:n conversion, see
2167 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2168 * or 2 for surrogate pairs.
2169 *
2170 * Too small a buffer can cause thrashing between pivoting and direct
2171 * conversion, with function call overhead outweighing the benefits
2172 * of direct conversion.
2173 */
2174 if(convert!=NULL && (pivotLimit-pivotStart)>32) {
2175 pivotLimit=pivotStart+32;
2176 }
2177
2178 /* prepare the converter arguments */
2179 fromUArgs.converter=targetCnv;
2180 fromUArgs.flush=FALSE;
2181 fromUArgs.offsets=NULL;
2182 fromUArgs.target=*target;
2183 fromUArgs.targetLimit=targetLimit;
2184 fromUArgs.size=sizeof(fromUArgs);
2185
2186 toUArgs.converter=sourceCnv;
2187 toUArgs.flush=flush;
2188 toUArgs.offsets=NULL;
2189 toUArgs.source=s;
2190 toUArgs.sourceLimit=sourceLimit;
2191 toUArgs.targetLimit=pivotLimit;
2192 toUArgs.size=sizeof(toUArgs);
2193
2194 /*
2195 * TODO: Consider separating this function into two functions,
2196 * extracting exactly the conversion loop,
2197 * for readability and to reduce the set of visible variables.
2198 *
2199 * Otherwise stop using s and t from here on.
2200 */
2201 s=t=NULL;
2202
2203 /*
2204 * conversion loop
2205 *
2206 * The sequence of steps in the loop may appear backward,
2207 * but the principle is simple:
2208 * In the chain of
2209 * source - sourceCnv overflow - pivot - targetCnv overflow - target
2210 * empty out later buffers before refilling them from earlier ones.
2211 *
2212 * The targetCnv overflow buffer is flushed out only once before the loop.
2213 */
b75a7d8f 2214 for(;;) {
46f4442e
A
2215 /*
2216 * if(pivot not empty or error or replay or flush fromUnicode) {
2217 * fromUnicode(pivot -> target);
2218 * }
2219 *
2220 * For pivoting conversion; and for direct conversion for
2221 * error callback handling and flushing the replay buffer.
2222 */
2223 if( *pivotSource<*pivotTarget ||
2224 U_FAILURE(*pErrorCode) ||
2225 targetCnv->preFromULength<0 ||
2226 fromUArgs.flush
2227 ) {
2228 fromUArgs.source=*pivotSource;
2229 fromUArgs.sourceLimit=*pivotTarget;
2230 _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
2231 if(U_FAILURE(*pErrorCode)) {
2232 /* target overflow, or conversion error */
2233 *pivotSource=(UChar *)fromUArgs.source;
2234 break;
2235 }
2236
b75a7d8f 2237 /*
46f4442e
A
2238 * _fromUnicodeWithCallback() must have consumed the pivot contents
2239 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
b75a7d8f 2240 */
46f4442e
A
2241 }
2242
2243 /* The pivot buffer is empty; reset it so we start at pivotStart. */
2244 *pivotSource=*pivotTarget=pivotStart;
2245
2246 /*
2247 * if(sourceCnv overflow buffer not empty) {
2248 * move(sourceCnv overflow buffer -> pivot);
2249 * continue;
2250 * }
2251 */
2252 /* output the sourceCnv overflow buffer */
2253 if(sourceCnv->UCharErrorBufferLength>0) {
2254 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
2255 /* U_BUFFER_OVERFLOW_ERROR */
2256 *pErrorCode=U_ZERO_ERROR;
2257 }
2258 continue;
2259 }
2260
2261 /*
2262 * check for end of input and break if done
2263 *
2264 * Checking both flush and fromUArgs.flush ensures that the converters
2265 * have been called with the flush flag set if the ucnv_convertEx()
2266 * caller set it.
2267 */
2268 if( toUArgs.source==sourceLimit &&
2269 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
2270 (!flush || fromUArgs.flush)
2271 ) {
2272 /* done successfully */
2273 break;
2274 }
2275
2276 /*
2277 * use direct conversion if available
2278 * but not if continuing a partial match
2279 * or flushing the toUnicode replay buffer
2280 */
2281 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
2282 if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2283 /* remove a warning that may be set by this function */
2284 *pErrorCode=U_ZERO_ERROR;
2285 }
2286 convert(&fromUArgs, &toUArgs, pErrorCode);
2287 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2288 break;
2289 } else if(U_FAILURE(*pErrorCode)) {
2290 if(sourceCnv->toULength>0) {
2291 /*
2292 * Fall through to calling _toUnicodeWithCallback()
2293 * for callback handling.
2294 *
2295 * The pivot buffer will be reset with
2296 * *pivotSource=*pivotTarget=pivotStart;
2297 * which indicates a toUnicode error to the caller
2298 * (*pivotSource==pivotStart shows no pivot UChars consumed).
2299 */
2300 } else {
2301 /*
2302 * Indicate a fromUnicode error to the caller
2303 * (*pivotSource>pivotStart shows some pivot UChars consumed).
2304 */
2305 *pivotSource=*pivotTarget=pivotStart+1;
2306 /*
2307 * Loop around to calling _fromUnicodeWithCallbacks()
2308 * for callback handling.
2309 */
2310 continue;
2311 }
2312 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2313 /*
2314 * No error, but the implementation requested to temporarily
2315 * fall back to pivoting.
2316 */
2317 *pErrorCode=U_ZERO_ERROR;
b75a7d8f 2318 /*
46f4442e
A
2319 * The following else branches are almost identical to the end-of-input
2320 * handling in _toUnicodeWithCallback().
2321 * Avoid calling it just for the end of input.
b75a7d8f 2322 */
46f4442e
A
2323 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
2324 /*
2325 * the entire input stream is consumed
2326 * and there is a partial, truncated input sequence left
2327 */
2328
2329 /* inject an error and continue with callback handling */
2330 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
2331 } else {
2332 /* input consumed */
2333 if(flush) {
2334 /* reset the converters without calling the callback functions */
2335 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
2336 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
2337 }
2338
2339 /* done successfully */
b75a7d8f
A
2340 break;
2341 }
b75a7d8f 2342 }
46f4442e
A
2343
2344 /*
2345 * toUnicode(source -> pivot);
2346 *
2347 * For pivoting conversion; and for direct conversion for
2348 * error callback handling, continuing partial matches
2349 * and flushing the replay buffer.
2350 *
2351 * The pivot buffer is empty and reset.
2352 */
2353 toUArgs.target=pivotStart; /* ==*pivotTarget */
2354 /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2355 _toUnicodeWithCallback(&toUArgs, pErrorCode);
2356 *pivotTarget=toUArgs.target;
b75a7d8f
A
2357 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2358 /* pivot overflow: continue with the conversion loop */
2359 *pErrorCode=U_ZERO_ERROR;
46f4442e 2360 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
b75a7d8f
A
2361 /* conversion error, or there was nothing left to convert */
2362 break;
2363 }
46f4442e
A
2364 /*
2365 * else:
2366 * _toUnicodeWithCallback() wrote into the pivot buffer,
2367 * continue with fromUnicode conversion.
2368 *
2369 * Set the fromUnicode flush flag if we flush and if toUnicode has
2370 * processed the end of the input.
2371 */
2372 if( flush && toUArgs.source==sourceLimit &&
2373 sourceCnv->preToULength>=0 &&
2374 sourceCnv->UCharErrorBufferLength==0
2375 ) {
2376 fromUArgs.flush=TRUE;
2377 }
b75a7d8f
A
2378 }
2379
2380 /*
2381 * The conversion loop is exited when one of the following is true:
2382 * - the entire source text has been converted successfully to the target buffer
2383 * - a target buffer overflow occurred
2384 * - a conversion error occurred
2385 */
2386
46f4442e
A
2387 *source=toUArgs.source;
2388 *target=fromUArgs.target;
2389
b75a7d8f
A
2390 /* terminate the target buffer if possible */
2391 if(flush && U_SUCCESS(*pErrorCode)) {
2392 if(*target!=targetLimit) {
2393 **target=0;
2394 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
2395 *pErrorCode=U_ZERO_ERROR;
2396 }
2397 } else {
2398 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
2399 }
2400 }
2401}
2402
2403/* internal implementation of ucnv_convert() etc. with preflighting */
2404static int32_t
2405ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
2406 char *target, int32_t targetCapacity,
2407 const char *source, int32_t sourceLength,
2408 UErrorCode *pErrorCode) {
2409 UChar pivotBuffer[CHUNK_SIZE];
2410 UChar *pivot, *pivot2;
2411
2412 char *myTarget;
2413 const char *sourceLimit;
2414 const char *targetLimit;
2415 int32_t targetLength=0;
2416
2417 /* set up */
2418 if(sourceLength<0) {
2419 sourceLimit=uprv_strchr(source, 0);
2420 } else {
2421 sourceLimit=source+sourceLength;
2422 }
2423
2424 /* if there is no input data, we're done */
2425 if(source==sourceLimit) {
2426 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2427 }
2428
2429 pivot=pivot2=pivotBuffer;
2430 myTarget=target;
2431 targetLength=0;
2432
2433 if(targetCapacity>0) {
2434 /* perform real conversion */
2435 targetLimit=target+targetCapacity;
2436 ucnv_convertEx(outConverter, inConverter,
2437 &myTarget, targetLimit,
2438 &source, sourceLimit,
2439 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2440 FALSE,
2441 TRUE,
2442 pErrorCode);
73c04bcf 2443 targetLength=(int32_t)(myTarget-target);
b75a7d8f
A
2444 }
2445
2446 /*
2447 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2448 * to it but continue the conversion in order to store in targetCapacity
2449 * the number of bytes that was required.
2450 */
2451 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
2452 {
2453 char targetBuffer[CHUNK_SIZE];
2454
2455 targetLimit=targetBuffer+CHUNK_SIZE;
2456 do {
2457 *pErrorCode=U_ZERO_ERROR;
2458 myTarget=targetBuffer;
2459 ucnv_convertEx(outConverter, inConverter,
2460 &myTarget, targetLimit,
2461 &source, sourceLimit,
2462 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2463 FALSE,
2464 TRUE,
2465 pErrorCode);
73c04bcf 2466 targetLength+=(int32_t)(myTarget-targetBuffer);
b75a7d8f
A
2467 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2468
2469 /* done with preflighting, set warnings and errors as appropriate */
2470 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2471 }
2472
2473 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2474 return targetLength;
2475}
2476
2477U_CAPI int32_t U_EXPORT2
2478ucnv_convert(const char *toConverterName, const char *fromConverterName,
2479 char *target, int32_t targetCapacity,
2480 const char *source, int32_t sourceLength,
2481 UErrorCode *pErrorCode) {
2482 UConverter in, out; /* stack-allocated */
2483 UConverter *inConverter, *outConverter;
2484 int32_t targetLength;
2485
2486 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2487 return 0;
2488 }
2489
2490 if( source==NULL || sourceLength<-1 ||
2491 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2492 ) {
2493 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2494 return 0;
2495 }
2496
2497 /* if there is no input data, we're done */
2498 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2499 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2500 }
2501
2502 /* create the converters */
2503 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2504 if(U_FAILURE(*pErrorCode)) {
2505 return 0;
2506 }
2507
2508 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2509 if(U_FAILURE(*pErrorCode)) {
2510 ucnv_close(inConverter);
2511 return 0;
2512 }
2513
2514 targetLength=ucnv_internalConvert(outConverter, inConverter,
2515 target, targetCapacity,
2516 source, sourceLength,
2517 pErrorCode);
2518
2519 ucnv_close(inConverter);
2520 ucnv_close(outConverter);
2521
2522 return targetLength;
2523}
2524
2525/* @internal */
2526static int32_t
2527ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2528 UConverterType algorithmicType,
2529 UConverter *cnv,
2530 char *target, int32_t targetCapacity,
2531 const char *source, int32_t sourceLength,
2532 UErrorCode *pErrorCode) {
2533 UConverter algoConverterStatic; /* stack-allocated */
2534 UConverter *algoConverter, *to, *from;
2535 int32_t targetLength;
2536
2537 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2538 return 0;
2539 }
2540
2541 if( cnv==NULL || source==NULL || sourceLength<-1 ||
2542 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2543 ) {
2544 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2545 return 0;
2546 }
2547
2548 /* if there is no input data, we're done */
2549 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2550 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2551 }
2552
2553 /* create the algorithmic converter */
2554 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2555 "", 0, pErrorCode);
2556 if(U_FAILURE(*pErrorCode)) {
2557 return 0;
2558 }
2559
2560 /* reset the other converter */
2561 if(convertToAlgorithmic) {
2562 /* cnv->Unicode->algo */
2563 ucnv_resetToUnicode(cnv);
2564 to=algoConverter;
2565 from=cnv;
2566 } else {
2567 /* algo->Unicode->cnv */
2568 ucnv_resetFromUnicode(cnv);
2569 from=algoConverter;
2570 to=cnv;
2571 }
2572
2573 targetLength=ucnv_internalConvert(to, from,
2574 target, targetCapacity,
2575 source, sourceLength,
2576 pErrorCode);
2577
2578 ucnv_close(algoConverter);
2579
2580 return targetLength;
2581}
2582
2583U_CAPI int32_t U_EXPORT2
2584ucnv_toAlgorithmic(UConverterType algorithmicType,
2585 UConverter *cnv,
2586 char *target, int32_t targetCapacity,
2587 const char *source, int32_t sourceLength,
2588 UErrorCode *pErrorCode) {
2589 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2590 target, targetCapacity,
2591 source, sourceLength,
2592 pErrorCode);
2593}
2594
2595U_CAPI int32_t U_EXPORT2
2596ucnv_fromAlgorithmic(UConverter *cnv,
2597 UConverterType algorithmicType,
2598 char *target, int32_t targetCapacity,
2599 const char *source, int32_t sourceLength,
2600 UErrorCode *pErrorCode) {
2601 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2602 target, targetCapacity,
2603 source, sourceLength,
2604 pErrorCode);
2605}
2606
2607U_CAPI UConverterType U_EXPORT2
2608ucnv_getType(const UConverter* converter)
2609{
2610 int8_t type = converter->sharedData->staticData->conversionType;
2611#if !UCONFIG_NO_LEGACY_CONVERSION
2612 if(type == UCNV_MBCS) {
374ca955 2613 return ucnv_MBCSGetType(converter);
b75a7d8f
A
2614 }
2615#endif
2616 return (UConverterType)type;
2617}
2618
2619U_CAPI void U_EXPORT2
2620ucnv_getStarters(const UConverter* converter,
2621 UBool starters[256],
2622 UErrorCode* err)
2623{
2624 if (err == NULL || U_FAILURE(*err)) {
2625 return;
2626 }
2627
2628 if(converter->sharedData->impl->getStarters != NULL) {
2629 converter->sharedData->impl->getStarters(converter, starters, err);
2630 } else {
2631 *err = U_ILLEGAL_ARGUMENT_ERROR;
2632 }
2633}
2634
2635static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2636{
2637 UErrorCode errorCode;
2638 const char *name;
2639 int32_t i;
2640
2641 if(cnv==NULL) {
2642 return NULL;
2643 }
2644
2645 errorCode=U_ZERO_ERROR;
2646 name=ucnv_getName(cnv, &errorCode);
2647 if(U_FAILURE(errorCode)) {
2648 return NULL;
2649 }
2650
2651 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
2652 {
2653 if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2654 {
2655 return ambiguousConverters+i;
2656 }
2657 }
2658
2659 return NULL;
2660}
2661
2662U_CAPI void U_EXPORT2
2663ucnv_fixFileSeparator(const UConverter *cnv,
2664 UChar* source,
2665 int32_t sourceLength) {
2666 const UAmbiguousConverter *a;
2667 int32_t i;
2668 UChar variant5c;
2669
2670 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2671 {
2672 return;
2673 }
2674
2675 variant5c=a->variant5c;
2676 for(i=0; i<sourceLength; ++i) {
2677 if(source[i]==variant5c) {
2678 source[i]=0x5c;
2679 }
2680 }
2681}
2682
2683U_CAPI UBool U_EXPORT2
2684ucnv_isAmbiguous(const UConverter *cnv) {
2685 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2686}
2687
2688U_CAPI void U_EXPORT2
2689ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2690{
2691 cnv->useFallback = usesFallback;
2692}
2693
2694U_CAPI UBool U_EXPORT2
2695ucnv_usesFallback(const UConverter *cnv)
2696{
2697 return cnv->useFallback;
2698}
2699
2700U_CAPI void U_EXPORT2
2701ucnv_getInvalidChars (const UConverter * converter,
2702 char *errBytes,
2703 int8_t * len,
2704 UErrorCode * err)
2705{
2706 if (err == NULL || U_FAILURE(*err))
2707 {
2708 return;
2709 }
2710 if (len == NULL || errBytes == NULL || converter == NULL)
2711 {
2712 *err = U_ILLEGAL_ARGUMENT_ERROR;
2713 return;
2714 }
2715 if (*len < converter->invalidCharLength)
2716 {
2717 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2718 return;
2719 }
2720 if ((*len = converter->invalidCharLength) > 0)
2721 {
2722 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2723 }
2724}
2725
2726U_CAPI void U_EXPORT2
2727ucnv_getInvalidUChars (const UConverter * converter,
2728 UChar *errChars,
2729 int8_t * len,
2730 UErrorCode * err)
2731{
2732 if (err == NULL || U_FAILURE(*err))
2733 {
2734 return;
2735 }
2736 if (len == NULL || errChars == NULL || converter == NULL)
2737 {
2738 *err = U_ILLEGAL_ARGUMENT_ERROR;
2739 return;
2740 }
2741 if (*len < converter->invalidUCharLength)
2742 {
2743 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2744 return;
2745 }
2746 if ((*len = converter->invalidUCharLength) > 0)
2747 {
2748 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
2749 }
2750}
2751
2752#define SIG_MAX_LEN 5
2753
2754U_CAPI const char* U_EXPORT2
2755ucnv_detectUnicodeSignature( const char* source,
2756 int32_t sourceLength,
2757 int32_t* signatureLength,
2758 UErrorCode* pErrorCode) {
2759 int32_t dummy;
2760
2761 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2762 * bytes we don't misdetect something
2763 */
2764 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2765 int i = 0;
2766
2767 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2768 return NULL;
2769 }
2770
2771 if(source == NULL || sourceLength < -1){
2772 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2773 return NULL;
2774 }
2775
2776 if(signatureLength == NULL) {
2777 signatureLength = &dummy;
2778 }
2779
2780 if(sourceLength==-1){
73c04bcf 2781 sourceLength=(int32_t)uprv_strlen(source);
b75a7d8f
A
2782 }
2783
2784
2785 while(i<sourceLength&& i<SIG_MAX_LEN){
2786 start[i]=source[i];
2787 i++;
2788 }
2789
2790 if(start[0] == '\xFE' && start[1] == '\xFF') {
2791 *signatureLength=2;
2792 return "UTF-16BE";
2793 } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2794 if(start[2] == '\x00' && start[3] =='\x00') {
2795 *signatureLength=4;
2796 return "UTF-32LE";
2797 } else {
2798 *signatureLength=2;
2799 return "UTF-16LE";
2800 }
2801 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2802 *signatureLength=3;
2803 return "UTF-8";
2804 } else if(start[0] == '\x00' && start[1] == '\x00' &&
2805 start[2] == '\xFE' && start[3]=='\xFF') {
2806 *signatureLength=4;
2807 return "UTF-32BE";
2808 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2809 *signatureLength=3;
2810 return "SCSU";
2811 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2812 *signatureLength=3;
2813 return "BOCU-1";
2814 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2815 /*
2816 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2817 * depending on the second UTF-16 code unit.
2818 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2819 * if it occurs.
2820 *
2821 * So far we have +/v
2822 */
2823 if(start[3] == '\x38' && start[4] == '\x2D') {
2824 /* 5 bytes +/v8- */
2825 *signatureLength=5;
2826 return "UTF-7";
2827 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2828 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2829 *signatureLength=4;
2830 return "UTF-7";
2831 }
374ca955
A
2832 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2833 *signatureLength=4;
2834 return "UTF-EBCDIC";
b75a7d8f
A
2835 }
2836
374ca955 2837
b75a7d8f
A
2838 /* no known Unicode signature byte sequence recognized */
2839 *signatureLength=0;
2840 return NULL;
2841}
2842
46f4442e
A
2843U_CAPI int32_t U_EXPORT2
2844ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
2845{
73c04bcf
A
2846 if(status == NULL || U_FAILURE(*status)){
2847 return -1;
2848 }
2849 if(cnv == NULL){
2850 *status = U_ILLEGAL_ARGUMENT_ERROR;
2851 return -1;
2852 }
2853
51004dcb 2854 if(cnv->preFromUFirstCP >= 0){
73c04bcf
A
2855 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
2856 }else if(cnv->preFromULength < 0){
2857 return -cnv->preFromULength ;
2858 }else if(cnv->fromUChar32 > 0){
2859 return 1;
73c04bcf
A
2860 }
2861 return 0;
2862
46f4442e 2863}
73c04bcf 2864
46f4442e 2865U_CAPI int32_t U_EXPORT2
73c04bcf
A
2866ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
2867
2868 if(status == NULL || U_FAILURE(*status)){
2869 return -1;
2870 }
2871 if(cnv == NULL){
2872 *status = U_ILLEGAL_ARGUMENT_ERROR;
2873 return -1;
2874 }
2875
2876 if(cnv->preToULength > 0){
2877 return cnv->preToULength ;
2878 }else if(cnv->preToULength < 0){
2879 return -cnv->preToULength;
2880 }else if(cnv->toULength > 0){
2881 return cnv->toULength;
2882 }
2883 return 0;
2884}
4388f060 2885
51004dcb 2886U_CAPI UBool U_EXPORT2
4388f060
A
2887ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){
2888 if (U_FAILURE(*status)) {
2889 return FALSE;
2890 }
2891
2892 if (cnv == NULL) {
2893 *status = U_ILLEGAL_ARGUMENT_ERROR;
2894 return FALSE;
2895 }
2896
2897 switch (ucnv_getType(cnv)) {
2898 case UCNV_SBCS:
2899 case UCNV_DBCS:
2900 case UCNV_UTF32_BigEndian:
2901 case UCNV_UTF32_LittleEndian:
2902 case UCNV_UTF32:
2903 case UCNV_US_ASCII:
2904 return TRUE;
2905 default:
2906 return FALSE;
2907 }
2908}
374ca955
A
2909#endif
2910
b75a7d8f
A
2911/*
2912 * Hey, Emacs, please set the following:
2913 *
2914 * Local Variables:
2915 * indent-tabs-mode: nil
2916 * End:
2917 *
2918 */