]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ucnv.c
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / common / ucnv.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1998-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * ucnv.c:
10 * Implements APIs for the ICU's codeset conversion library;
11 * mostly calls through internal functions;
12 * created by Bertrand A. Damiba
13 *
14 * Modification History:
15 *
16 * Date Name Description
17 * 04/04/99 helena Fixed internal header inclusion.
18 * 05/09/00 helena Added implementation to handle fallback mappings.
19 * 06/20/2000 helena OS/400 port changes; mostly typecast.
20 */
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_CONVERSION
25
26 #include "unicode/ustring.h"
27 #include "unicode/ucnv.h"
28 #include "unicode/ucnv_err.h"
29 #include "unicode/uset.h"
30 #include "putilimp.h"
31 #include "cmemory.h"
32 #include "cstring.h"
33 #include "uassert.h"
34 #include "utracimp.h"
35 #include "ustr_imp.h"
36 #include "ucnv_imp.h"
37 #include "ucnv_cnv.h"
38 #include "ucnv_bld.h"
39
40 /* size of intermediate and preflighting buffers in ucnv_convert() */
41 #define CHUNK_SIZE 1024
42
43 typedef struct UAmbiguousConverter {
44 const char *name;
45 const UChar variant5c;
46 } UAmbiguousConverter;
47
48 static const UAmbiguousConverter ambiguousConverters[]={
49 { "ibm-897_P100-1995", 0xa5 },
50 { "ibm-942_P120-1999", 0xa5 },
51 { "ibm-943_P130-1999", 0xa5 },
52 { "ibm-946_P100-1995", 0xa5 },
53 { "ibm-33722_P120-1999", 0xa5 },
54 /*{ "ibm-54191_P100-2006", 0xa5 },*/
55 /*{ "ibm-62383_P100-2007", 0xa5 },*/
56 /*{ "ibm-891_P100-1995", 0x20a9 },*/
57 { "ibm-944_P100-1995", 0x20a9 },
58 { "ibm-949_P110-1999", 0x20a9 },
59 { "ibm-1363_P110-1997", 0x20a9 },
60 { "ISO_2022,locale=ko,version=0", 0x20a9 }
61 };
62
63 /*Calls through createConverter */
64 U_CAPI UConverter* U_EXPORT2
65 ucnv_open (const char *name,
66 UErrorCode * err)
67 {
68 UConverter *r;
69
70 if (err == NULL || U_FAILURE (*err)) {
71 return NULL;
72 }
73
74 r = ucnv_createConverter(NULL, name, err);
75 return r;
76 }
77
78 U_CAPI UConverter* U_EXPORT2
79 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
80 {
81 return ucnv_createConverterFromPackage(packageName, converterName, err);
82 }
83
84 /*Extracts the UChar* to a char* and calls through createConverter */
85 U_CAPI UConverter* U_EXPORT2
86 ucnv_openU (const UChar * name,
87 UErrorCode * err)
88 {
89 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
90
91 if (err == NULL || U_FAILURE(*err))
92 return NULL;
93 if (name == NULL)
94 return ucnv_open (NULL, err);
95 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
96 {
97 *err = U_ILLEGAL_ARGUMENT_ERROR;
98 return NULL;
99 }
100 return ucnv_open(u_austrcpy(asciiName, name), err);
101 }
102
103 /* Copy the string that is represented by the UConverterPlatform enum
104 * @param platformString An output buffer
105 * @param platform An enum representing a platform
106 * @return the length of the copied string.
107 */
108 static int32_t
109 ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm)
110 {
111 switch (pltfrm)
112 {
113 case UCNV_IBM:
114 uprv_strcpy(platformString, "ibm-");
115 return 4;
116 case UCNV_UNKNOWN:
117 break;
118 }
119
120 /* default to empty string */
121 *platformString = 0;
122 return 0;
123 }
124
125 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
126 *through createConverter*/
127 U_CAPI UConverter* U_EXPORT2
128 ucnv_openCCSID (int32_t codepage,
129 UConverterPlatform platform,
130 UErrorCode * err)
131 {
132 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
133 int32_t myNameLen;
134
135 if (err == NULL || U_FAILURE (*err))
136 return NULL;
137
138 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
139 myNameLen = ucnv_copyPlatformString(myName, platform);
140 T_CString_integerToString(myName + myNameLen, codepage, 10);
141
142 return ucnv_createConverter(NULL, myName, err);
143 }
144
145 /* Creating a temporary stack-based object that can be used in one thread,
146 and created from a converter that is shared across threads.
147 */
148
149 U_CAPI UConverter* U_EXPORT2
150 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
151 {
152 UConverter *localConverter, *allocatedConverter;
153 int32_t bufferSizeNeeded;
154 char *stackBufferChars = (char *)stackBuffer;
155 UErrorCode cbErr;
156 UConverterToUnicodeArgs toUArgs = {
157 sizeof(UConverterToUnicodeArgs),
158 TRUE,
159 NULL,
160 NULL,
161 NULL,
162 NULL,
163 NULL,
164 NULL
165 };
166 UConverterFromUnicodeArgs fromUArgs = {
167 sizeof(UConverterFromUnicodeArgs),
168 TRUE,
169 NULL,
170 NULL,
171 NULL,
172 NULL,
173 NULL,
174 NULL
175 };
176
177 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
178
179 if (status == NULL || U_FAILURE(*status)){
180 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
181 return 0;
182 }
183
184 if (!pBufferSize || !cnv){
185 *status = U_ILLEGAL_ARGUMENT_ERROR;
186 UTRACE_EXIT_STATUS(*status);
187 return 0;
188 }
189
190 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
191 ucnv_getName(cnv, status), cnv, stackBuffer);
192
193 if (cnv->sharedData->impl->safeClone != NULL) {
194 /* call the custom safeClone function for sizing */
195 bufferSizeNeeded = 0;
196 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
197 }
198 else
199 {
200 /* inherent sizing */
201 bufferSizeNeeded = sizeof(UConverter);
202 }
203
204 if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
205 *pBufferSize = bufferSizeNeeded;
206 UTRACE_EXIT_VALUE(bufferSizeNeeded);
207 return 0;
208 }
209
210
211 /* Pointers on 64-bit platforms need to be aligned
212 * on a 64-bit boundary in memory.
213 */
214 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
215 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
216 if(*pBufferSize > offsetUp) {
217 *pBufferSize -= offsetUp;
218 stackBufferChars += offsetUp;
219 } else {
220 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
221 *pBufferSize = 1;
222 }
223 }
224
225 stackBuffer = (void *)stackBufferChars;
226
227 /* Now, see if we must allocate any memory */
228 if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
229 {
230 /* allocate one here...*/
231 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
232
233 if(localConverter == NULL) {
234 *status = U_MEMORY_ALLOCATION_ERROR;
235 UTRACE_EXIT_STATUS(*status);
236 return NULL;
237 }
238
239 if (U_SUCCESS(*status)) {
240 *status = U_SAFECLONE_ALLOCATED_WARNING;
241 }
242
243 /* record the fact that memory was allocated */
244 *pBufferSize = bufferSizeNeeded;
245 } else {
246 /* just use the stack buffer */
247 localConverter = (UConverter*) stackBuffer;
248 allocatedConverter = NULL;
249 }
250
251 uprv_memset(localConverter, 0, bufferSizeNeeded);
252
253 /* Copy initial state */
254 uprv_memcpy(localConverter, cnv, sizeof(UConverter));
255 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
256
257 /* copy the substitution string */
258 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
259 localConverter->subChars = (uint8_t *)localConverter->subUChars;
260 } else {
261 localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
262 if (localConverter->subChars == NULL) {
263 uprv_free(allocatedConverter);
264 UTRACE_EXIT_STATUS(*status);
265 return NULL;
266 }
267 uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
268 }
269
270 /* now either call the safeclone fcn or not */
271 if (cnv->sharedData->impl->safeClone != NULL) {
272 /* call the custom safeClone function */
273 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
274 }
275
276 if(localConverter==NULL || U_FAILURE(*status)) {
277 if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) {
278 uprv_free(allocatedConverter->subChars);
279 }
280 uprv_free(allocatedConverter);
281 UTRACE_EXIT_STATUS(*status);
282 return NULL;
283 }
284
285 /* increment refcount of shared data if needed */
286 /*
287 Checking whether it's an algorithic converter is okay
288 in multithreaded applications because the value never changes.
289 Don't check referenceCounter for any other value.
290 */
291 if (cnv->sharedData->referenceCounter != ~0) {
292 ucnv_incrementRefCount(cnv->sharedData);
293 }
294
295 if(localConverter == (UConverter*)stackBuffer) {
296 /* we're using user provided data - set to not destroy */
297 localConverter->isCopyLocal = TRUE;
298 }
299
300 /* allow callback functions to handle any memory allocation */
301 toUArgs.converter = fromUArgs.converter = localConverter;
302 cbErr = U_ZERO_ERROR;
303 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
304 cbErr = U_ZERO_ERROR;
305 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
306
307 UTRACE_EXIT_PTR_STATUS(localConverter, *status);
308 return localConverter;
309 }
310
311
312
313 /*Decreases the reference counter in the shared immutable section of the object
314 *and frees the mutable part*/
315
316 U_CAPI void U_EXPORT2
317 ucnv_close (UConverter * converter)
318 {
319 UErrorCode errorCode = U_ZERO_ERROR;
320
321 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
322
323 if (converter == NULL)
324 {
325 UTRACE_EXIT();
326 return;
327 }
328
329 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
330 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
331
332 /* In order to speed up the close, only call the callbacks when they have been changed.
333 This performance check will only work when the callbacks are set within a shared library
334 or from user code that statically links this code. */
335 /* first, notify the callback functions that the converter is closed */
336 if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
337 UConverterToUnicodeArgs toUArgs = {
338 sizeof(UConverterToUnicodeArgs),
339 TRUE,
340 NULL,
341 NULL,
342 NULL,
343 NULL,
344 NULL,
345 NULL
346 };
347
348 toUArgs.converter = converter;
349 errorCode = U_ZERO_ERROR;
350 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
351 }
352 if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
353 UConverterFromUnicodeArgs fromUArgs = {
354 sizeof(UConverterFromUnicodeArgs),
355 TRUE,
356 NULL,
357 NULL,
358 NULL,
359 NULL,
360 NULL,
361 NULL
362 };
363 fromUArgs.converter = converter;
364 errorCode = U_ZERO_ERROR;
365 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
366 }
367
368 if (converter->sharedData->impl->close != NULL) {
369 converter->sharedData->impl->close(converter);
370 }
371
372 if (converter->subChars != (uint8_t *)converter->subUChars) {
373 uprv_free(converter->subChars);
374 }
375
376 /*
377 Checking whether it's an algorithic converter is okay
378 in multithreaded applications because the value never changes.
379 Don't check referenceCounter for any other value.
380 */
381 if (converter->sharedData->referenceCounter != ~0) {
382 ucnv_unloadSharedDataIfReady(converter->sharedData);
383 }
384
385 if(!converter->isCopyLocal){
386 uprv_free(converter);
387 }
388
389 UTRACE_EXIT();
390 }
391
392 /*returns a single Name from the list, will return NULL if out of bounds
393 */
394 U_CAPI const char* U_EXPORT2
395 ucnv_getAvailableName (int32_t n)
396 {
397 if (0 <= n && n <= 0xffff) {
398 UErrorCode err = U_ZERO_ERROR;
399 const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err);
400 if (U_SUCCESS(err)) {
401 return name;
402 }
403 }
404 return NULL;
405 }
406
407 U_CAPI int32_t U_EXPORT2
408 ucnv_countAvailable ()
409 {
410 UErrorCode err = U_ZERO_ERROR;
411 return ucnv_bld_countAvailableConverters(&err);
412 }
413
414 U_CAPI void U_EXPORT2
415 ucnv_getSubstChars (const UConverter * converter,
416 char *mySubChar,
417 int8_t * len,
418 UErrorCode * err)
419 {
420 if (U_FAILURE (*err))
421 return;
422
423 if (converter->subCharLen <= 0) {
424 /* Unicode string or empty string from ucnv_setSubstString(). */
425 *len = 0;
426 return;
427 }
428
429 if (*len < converter->subCharLen) /*not enough space in subChars */
430 {
431 *err = U_INDEX_OUTOFBOUNDS_ERROR;
432 return;
433 }
434
435 uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */
436 *len = converter->subCharLen; /*store # of bytes copied to buffer */
437 }
438
439 U_CAPI void U_EXPORT2
440 ucnv_setSubstChars (UConverter * converter,
441 const char *mySubChar,
442 int8_t len,
443 UErrorCode * err)
444 {
445 if (U_FAILURE (*err))
446 return;
447
448 /*Makes sure that the subChar is within the codepages char length boundaries */
449 if ((len > converter->sharedData->staticData->maxBytesPerChar)
450 || (len < converter->sharedData->staticData->minBytesPerChar))
451 {
452 *err = U_ILLEGAL_ARGUMENT_ERROR;
453 return;
454 }
455
456 uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */
457 converter->subCharLen = len; /*sets the new len */
458
459 /*
460 * There is currently (2001Feb) no separate API to set/get subChar1.
461 * In order to always have subChar written after it is explicitly set,
462 * we set subChar1 to 0.
463 */
464 converter->subChar1 = 0;
465
466 return;
467 }
468
469 U_CAPI void U_EXPORT2
470 ucnv_setSubstString(UConverter *cnv,
471 const UChar *s,
472 int32_t length,
473 UErrorCode *err) {
474 UAlignedMemory cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE / sizeof(UAlignedMemory) + 1];
475 char chars[UCNV_ERROR_BUFFER_LENGTH];
476
477 UConverter *clone;
478 uint8_t *subChars;
479 int32_t cloneSize, length8;
480
481 /* Let the following functions check all arguments. */
482 cloneSize = sizeof(cloneBuffer);
483 clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err);
484 ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err);
485 length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err);
486 ucnv_close(clone);
487 if (U_FAILURE(*err)) {
488 return;
489 }
490
491 if (cnv->sharedData->impl->writeSub == NULL
492 #if !UCONFIG_NO_LEGACY_CONVERSION
493 || (cnv->sharedData->staticData->conversionType == UCNV_MBCS &&
494 ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL)
495 #endif
496 ) {
497 /* The converter is not stateful. Store the charset bytes as a fixed string. */
498 subChars = (uint8_t *)chars;
499 } else {
500 /*
501 * The converter has a non-default writeSub() function, indicating
502 * that it is stateful.
503 * Store the Unicode string for on-the-fly conversion for correct
504 * state handling.
505 */
506 if (length > UCNV_ERROR_BUFFER_LENGTH) {
507 /*
508 * Should not occur. The converter should output at least one byte
509 * per UChar, which means that ucnv_fromUChars() should catch all
510 * overflows.
511 */
512 *err = U_BUFFER_OVERFLOW_ERROR;
513 return;
514 }
515 subChars = (uint8_t *)s;
516 if (length < 0) {
517 length = u_strlen(s);
518 }
519 length8 = length * U_SIZEOF_UCHAR;
520 }
521
522 /*
523 * For storing the substitution string, select either the small buffer inside
524 * UConverter or allocate a subChars buffer.
525 */
526 if (length8 > UCNV_MAX_SUBCHAR_LEN) {
527 /* Use a separate buffer for the string. Outside UConverter to not make it too large. */
528 if (cnv->subChars == (uint8_t *)cnv->subUChars) {
529 /* Allocate a new buffer for the string. */
530 cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
531 if (cnv->subChars == NULL) {
532 cnv->subChars = (uint8_t *)cnv->subUChars;
533 *err = U_MEMORY_ALLOCATION_ERROR;
534 return;
535 }
536 uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR);
537 }
538 }
539
540 /* Copy the substitution string into the UConverter or its subChars buffer. */
541 if (length8 == 0) {
542 cnv->subCharLen = 0;
543 } else {
544 uprv_memcpy(cnv->subChars, subChars, length8);
545 if (subChars == (uint8_t *)chars) {
546 cnv->subCharLen = (int8_t)length8;
547 } else /* subChars == s */ {
548 cnv->subCharLen = (int8_t)-length;
549 }
550 }
551
552 /* See comment in ucnv_setSubstChars(). */
553 cnv->subChar1 = 0;
554 }
555
556 /*resets the internal states of a converter
557 *goal : have the same behaviour than a freshly created converter
558 */
559 static void _reset(UConverter *converter, UConverterResetChoice choice,
560 UBool callCallback) {
561 if(converter == NULL) {
562 return;
563 }
564
565 if(callCallback) {
566 /* first, notify the callback functions that the converter is reset */
567 UErrorCode errorCode;
568
569 if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) {
570 UConverterToUnicodeArgs toUArgs = {
571 sizeof(UConverterToUnicodeArgs),
572 TRUE,
573 NULL,
574 NULL,
575 NULL,
576 NULL,
577 NULL,
578 NULL
579 };
580 toUArgs.converter = converter;
581 errorCode = U_ZERO_ERROR;
582 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
583 }
584 if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) {
585 UConverterFromUnicodeArgs fromUArgs = {
586 sizeof(UConverterFromUnicodeArgs),
587 TRUE,
588 NULL,
589 NULL,
590 NULL,
591 NULL,
592 NULL,
593 NULL
594 };
595 fromUArgs.converter = converter;
596 errorCode = U_ZERO_ERROR;
597 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
598 }
599 }
600
601 /* now reset the converter itself */
602 if(choice<=UCNV_RESET_TO_UNICODE) {
603 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
604 converter->mode = 0;
605 converter->toULength = 0;
606 converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
607 converter->preToULength = 0;
608 }
609 if(choice!=UCNV_RESET_TO_UNICODE) {
610 converter->fromUnicodeStatus = 0;
611 converter->fromUChar32 = 0;
612 converter->invalidUCharLength = converter->charErrorBufferLength = 0;
613 converter->preFromUFirstCP = U_SENTINEL;
614 converter->preFromULength = 0;
615 }
616
617 if (converter->sharedData->impl->reset != NULL) {
618 /* call the custom reset function */
619 converter->sharedData->impl->reset(converter, choice);
620 }
621 }
622
623 U_CAPI void U_EXPORT2
624 ucnv_reset(UConverter *converter)
625 {
626 _reset(converter, UCNV_RESET_BOTH, TRUE);
627 }
628
629 U_CAPI void U_EXPORT2
630 ucnv_resetToUnicode(UConverter *converter)
631 {
632 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
633 }
634
635 U_CAPI void U_EXPORT2
636 ucnv_resetFromUnicode(UConverter *converter)
637 {
638 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
639 }
640
641 U_CAPI int8_t U_EXPORT2
642 ucnv_getMaxCharSize (const UConverter * converter)
643 {
644 return converter->maxBytesPerUChar;
645 }
646
647
648 U_CAPI int8_t U_EXPORT2
649 ucnv_getMinCharSize (const UConverter * converter)
650 {
651 return converter->sharedData->staticData->minBytesPerChar;
652 }
653
654 U_CAPI const char* U_EXPORT2
655 ucnv_getName (const UConverter * converter, UErrorCode * err)
656
657 {
658 if (U_FAILURE (*err))
659 return NULL;
660 if(converter->sharedData->impl->getName){
661 const char* temp= converter->sharedData->impl->getName(converter);
662 if(temp)
663 return temp;
664 }
665 return converter->sharedData->staticData->name;
666 }
667
668 U_CAPI int32_t U_EXPORT2
669 ucnv_getCCSID(const UConverter * converter,
670 UErrorCode * err)
671 {
672 int32_t ccsid;
673 if (U_FAILURE (*err))
674 return -1;
675
676 ccsid = converter->sharedData->staticData->codepage;
677 if (ccsid == 0) {
678 /* Rare case. This is for cases like gb18030,
679 which doesn't have an IBM cannonical name, but does have an IBM alias. */
680 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
681 if (U_SUCCESS(*err) && standardName) {
682 const char *ccsidStr = uprv_strchr(standardName, '-');
683 if (ccsidStr) {
684 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
685 }
686 }
687 }
688 return ccsid;
689 }
690
691
692 U_CAPI UConverterPlatform U_EXPORT2
693 ucnv_getPlatform (const UConverter * converter,
694 UErrorCode * err)
695 {
696 if (U_FAILURE (*err))
697 return UCNV_UNKNOWN;
698
699 return (UConverterPlatform)converter->sharedData->staticData->platform;
700 }
701
702 U_CAPI void U_EXPORT2
703 ucnv_getToUCallBack (const UConverter * converter,
704 UConverterToUCallback *action,
705 const void **context)
706 {
707 *action = converter->fromCharErrorBehaviour;
708 *context = converter->toUContext;
709 }
710
711 U_CAPI void U_EXPORT2
712 ucnv_getFromUCallBack (const UConverter * converter,
713 UConverterFromUCallback *action,
714 const void **context)
715 {
716 *action = converter->fromUCharErrorBehaviour;
717 *context = converter->fromUContext;
718 }
719
720 U_CAPI void U_EXPORT2
721 ucnv_setToUCallBack (UConverter * converter,
722 UConverterToUCallback newAction,
723 const void* newContext,
724 UConverterToUCallback *oldAction,
725 const void** oldContext,
726 UErrorCode * err)
727 {
728 if (U_FAILURE (*err))
729 return;
730 if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
731 converter->fromCharErrorBehaviour = newAction;
732 if (oldContext) *oldContext = converter->toUContext;
733 converter->toUContext = newContext;
734 }
735
736 U_CAPI void U_EXPORT2
737 ucnv_setFromUCallBack (UConverter * converter,
738 UConverterFromUCallback newAction,
739 const void* newContext,
740 UConverterFromUCallback *oldAction,
741 const void** oldContext,
742 UErrorCode * err)
743 {
744 if (U_FAILURE (*err))
745 return;
746 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
747 converter->fromUCharErrorBehaviour = newAction;
748 if (oldContext) *oldContext = converter->fromUContext;
749 converter->fromUContext = newContext;
750 }
751
752 static void
753 _updateOffsets(int32_t *offsets, int32_t length,
754 int32_t sourceIndex, int32_t errorInputLength) {
755 int32_t *limit;
756 int32_t delta, offset;
757
758 if(sourceIndex>=0) {
759 /*
760 * adjust each offset by adding the previous sourceIndex
761 * minus the length of the input sequence that caused an
762 * error, if any
763 */
764 delta=sourceIndex-errorInputLength;
765 } else {
766 /*
767 * set each offset to -1 because this conversion function
768 * does not handle offsets
769 */
770 delta=-1;
771 }
772
773 limit=offsets+length;
774 if(delta==0) {
775 /* most common case, nothing to do */
776 } else if(delta>0) {
777 /* add the delta to each offset (but not if the offset is <0) */
778 while(offsets<limit) {
779 offset=*offsets;
780 if(offset>=0) {
781 *offsets=offset+delta;
782 }
783 ++offsets;
784 }
785 } else /* delta<0 */ {
786 /*
787 * set each offset to -1 because this conversion function
788 * does not handle offsets
789 * or the error input sequence started in a previous buffer
790 */
791 while(offsets<limit) {
792 *offsets++=-1;
793 }
794 }
795 }
796
797 /* ucnv_fromUnicode --------------------------------------------------------- */
798
799 /*
800 * Implementation note for m:n conversions
801 *
802 * While collecting source units to find the longest match for m:n conversion,
803 * some source units may need to be stored for a partial match.
804 * When a second buffer does not yield a match on all of the previously stored
805 * source units, then they must be "replayed", i.e., fed back into the converter.
806 *
807 * The code relies on the fact that replaying will not nest -
808 * converting a replay buffer will not result in a replay.
809 * This is because a replay is necessary only after the _continuation_ of a
810 * partial match failed, but a replay buffer is converted as a whole.
811 * It may result in some of its units being stored again for a partial match,
812 * but there will not be a continuation _during_ the replay which could fail.
813 *
814 * It is conceivable that a callback function could call the converter
815 * recursively in a way that causes another replay to be stored, but that
816 * would be an error in the callback function.
817 * Such violations will cause assertion failures in a debug build,
818 * and wrong output, but they will not cause a crash.
819 */
820
821 static void
822 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
823 UConverterFromUnicode fromUnicode;
824 UConverter *cnv;
825 const UChar *s;
826 char *t;
827 int32_t *offsets;
828 int32_t sourceIndex;
829 int32_t errorInputLength;
830 UBool converterSawEndOfInput, calledCallback;
831
832 /* variables for m:n conversion */
833 UChar replay[UCNV_EXT_MAX_UCHARS];
834 const UChar *realSource, *realSourceLimit;
835 int32_t realSourceIndex;
836 UBool realFlush;
837
838 cnv=pArgs->converter;
839 s=pArgs->source;
840 t=pArgs->target;
841 offsets=pArgs->offsets;
842
843 /* get the converter implementation function */
844 sourceIndex=0;
845 if(offsets==NULL) {
846 fromUnicode=cnv->sharedData->impl->fromUnicode;
847 } else {
848 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
849 if(fromUnicode==NULL) {
850 /* there is no WithOffsets implementation */
851 fromUnicode=cnv->sharedData->impl->fromUnicode;
852 /* we will write -1 for each offset */
853 sourceIndex=-1;
854 }
855 }
856
857 if(cnv->preFromULength>=0) {
858 /* normal mode */
859 realSource=NULL;
860
861 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
862 realSourceLimit=NULL;
863 realFlush=FALSE;
864 realSourceIndex=0;
865 } else {
866 /*
867 * Previous m:n conversion stored source units from a partial match
868 * and failed to consume all of them.
869 * We need to "replay" them from a temporary buffer and convert them first.
870 */
871 realSource=pArgs->source;
872 realSourceLimit=pArgs->sourceLimit;
873 realFlush=pArgs->flush;
874 realSourceIndex=sourceIndex;
875
876 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
877 pArgs->source=replay;
878 pArgs->sourceLimit=replay-cnv->preFromULength;
879 pArgs->flush=FALSE;
880 sourceIndex=-1;
881
882 cnv->preFromULength=0;
883 }
884
885 /*
886 * loop for conversion and error handling
887 *
888 * loop {
889 * convert
890 * loop {
891 * update offsets
892 * handle end of input
893 * handle errors/call callback
894 * }
895 * }
896 */
897 for(;;) {
898 if(U_SUCCESS(*err)) {
899 /* convert */
900 fromUnicode(pArgs, err);
901
902 /*
903 * set a flag for whether the converter
904 * successfully processed the end of the input
905 *
906 * need not check cnv->preFromULength==0 because a replay (<0) will cause
907 * s<sourceLimit before converterSawEndOfInput is checked
908 */
909 converterSawEndOfInput=
910 (UBool)(U_SUCCESS(*err) &&
911 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
912 cnv->fromUChar32==0);
913 } else {
914 /* handle error from ucnv_convertEx() */
915 converterSawEndOfInput=FALSE;
916 }
917
918 /* no callback called yet for this iteration */
919 calledCallback=FALSE;
920
921 /* no sourceIndex adjustment for conversion, only for callback output */
922 errorInputLength=0;
923
924 /*
925 * loop for offsets and error handling
926 *
927 * iterates at most 3 times:
928 * 1. to clean up after the conversion function
929 * 2. after the callback
930 * 3. after the callback again if there was truncated input
931 */
932 for(;;) {
933 /* update offsets if we write any */
934 if(offsets!=NULL) {
935 int32_t length=(int32_t)(pArgs->target-t);
936 if(length>0) {
937 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
938
939 /*
940 * if a converter handles offsets and updates the offsets
941 * pointer at the end, then pArgs->offset should not change
942 * here;
943 * however, some converters do not handle offsets at all
944 * (sourceIndex<0) or may not update the offsets pointer
945 */
946 pArgs->offsets=offsets+=length;
947 }
948
949 if(sourceIndex>=0) {
950 sourceIndex+=(int32_t)(pArgs->source-s);
951 }
952 }
953
954 if(cnv->preFromULength<0) {
955 /*
956 * switch the source to new replay units (cannot occur while replaying)
957 * after offset handling and before end-of-input and callback handling
958 */
959 if(realSource==NULL) {
960 realSource=pArgs->source;
961 realSourceLimit=pArgs->sourceLimit;
962 realFlush=pArgs->flush;
963 realSourceIndex=sourceIndex;
964
965 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
966 pArgs->source=replay;
967 pArgs->sourceLimit=replay-cnv->preFromULength;
968 pArgs->flush=FALSE;
969 if((sourceIndex+=cnv->preFromULength)<0) {
970 sourceIndex=-1;
971 }
972
973 cnv->preFromULength=0;
974 } else {
975 /* see implementation note before _fromUnicodeWithCallback() */
976 U_ASSERT(realSource==NULL);
977 *err=U_INTERNAL_PROGRAM_ERROR;
978 }
979 }
980
981 /* update pointers */
982 s=pArgs->source;
983 t=pArgs->target;
984
985 if(U_SUCCESS(*err)) {
986 if(s<pArgs->sourceLimit) {
987 /*
988 * continue with the conversion loop while there is still input left
989 * (continue converting by breaking out of only the inner loop)
990 */
991 break;
992 } else if(realSource!=NULL) {
993 /* switch back from replaying to the real source and continue */
994 pArgs->source=realSource;
995 pArgs->sourceLimit=realSourceLimit;
996 pArgs->flush=realFlush;
997 sourceIndex=realSourceIndex;
998
999 realSource=NULL;
1000 break;
1001 } else if(pArgs->flush && cnv->fromUChar32!=0) {
1002 /*
1003 * the entire input stream is consumed
1004 * and there is a partial, truncated input sequence left
1005 */
1006
1007 /* inject an error and continue with callback handling */
1008 *err=U_TRUNCATED_CHAR_FOUND;
1009 calledCallback=FALSE; /* new error condition */
1010 } else {
1011 /* input consumed */
1012 if(pArgs->flush) {
1013 /*
1014 * return to the conversion loop once more if the flush
1015 * flag is set and the conversion function has not
1016 * successfully processed the end of the input yet
1017 *
1018 * (continue converting by breaking out of only the inner loop)
1019 */
1020 if(!converterSawEndOfInput) {
1021 break;
1022 }
1023
1024 /* reset the converter without calling the callback function */
1025 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
1026 }
1027
1028 /* done successfully */
1029 return;
1030 }
1031 }
1032
1033 /* U_FAILURE(*err) */
1034 {
1035 UErrorCode e;
1036
1037 if( calledCallback ||
1038 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1039 (e!=U_INVALID_CHAR_FOUND &&
1040 e!=U_ILLEGAL_CHAR_FOUND &&
1041 e!=U_TRUNCATED_CHAR_FOUND)
1042 ) {
1043 /*
1044 * the callback did not or cannot resolve the error:
1045 * set output pointers and return
1046 *
1047 * the check for buffer overflow is redundant but it is
1048 * a high-runner case and hopefully documents the intent
1049 * well
1050 *
1051 * if we were replaying, then the replay buffer must be
1052 * copied back into the UConverter
1053 * and the real arguments must be restored
1054 */
1055 if(realSource!=NULL) {
1056 int32_t length;
1057
1058 U_ASSERT(cnv->preFromULength==0);
1059
1060 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1061 if(length>0) {
1062 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
1063 cnv->preFromULength=(int8_t)-length;
1064 }
1065
1066 pArgs->source=realSource;
1067 pArgs->sourceLimit=realSourceLimit;
1068 pArgs->flush=realFlush;
1069 }
1070
1071 return;
1072 }
1073 }
1074
1075 /* callback handling */
1076 {
1077 UChar32 codePoint;
1078
1079 /* get and write the code point */
1080 codePoint=cnv->fromUChar32;
1081 errorInputLength=0;
1082 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
1083 cnv->invalidUCharLength=(int8_t)errorInputLength;
1084
1085 /* set the converter state to deal with the next character */
1086 cnv->fromUChar32=0;
1087
1088 /* call the callback function */
1089 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
1090 cnv->invalidUCharBuffer, errorInputLength, codePoint,
1091 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
1092 err);
1093 }
1094
1095 /*
1096 * loop back to the offset handling
1097 *
1098 * this flag will indicate after offset handling
1099 * that a callback was called;
1100 * if the callback did not resolve the error, then we return
1101 */
1102 calledCallback=TRUE;
1103 }
1104 }
1105 }
1106
1107 /*
1108 * Output the fromUnicode overflow buffer.
1109 * Call this function if(cnv->charErrorBufferLength>0).
1110 * @return TRUE if overflow
1111 */
1112 static UBool
1113 ucnv_outputOverflowFromUnicode(UConverter *cnv,
1114 char **target, const char *targetLimit,
1115 int32_t **pOffsets,
1116 UErrorCode *err) {
1117 int32_t *offsets;
1118 char *overflow, *t;
1119 int32_t i, length;
1120
1121 t=*target;
1122 if(pOffsets!=NULL) {
1123 offsets=*pOffsets;
1124 } else {
1125 offsets=NULL;
1126 }
1127
1128 overflow=(char *)cnv->charErrorBuffer;
1129 length=cnv->charErrorBufferLength;
1130 i=0;
1131 while(i<length) {
1132 if(t==targetLimit) {
1133 /* the overflow buffer contains too much, keep the rest */
1134 int32_t j=0;
1135
1136 do {
1137 overflow[j++]=overflow[i++];
1138 } while(i<length);
1139
1140 cnv->charErrorBufferLength=(int8_t)j;
1141 *target=t;
1142 if(offsets!=NULL) {
1143 *pOffsets=offsets;
1144 }
1145 *err=U_BUFFER_OVERFLOW_ERROR;
1146 return TRUE;
1147 }
1148
1149 /* copy the overflow contents to the target */
1150 *t++=overflow[i++];
1151 if(offsets!=NULL) {
1152 *offsets++=-1; /* no source index available for old output */
1153 }
1154 }
1155
1156 /* the overflow buffer is completely copied to the target */
1157 cnv->charErrorBufferLength=0;
1158 *target=t;
1159 if(offsets!=NULL) {
1160 *pOffsets=offsets;
1161 }
1162 return FALSE;
1163 }
1164
1165 U_CAPI void U_EXPORT2
1166 ucnv_fromUnicode(UConverter *cnv,
1167 char **target, const char *targetLimit,
1168 const UChar **source, const UChar *sourceLimit,
1169 int32_t *offsets,
1170 UBool flush,
1171 UErrorCode *err) {
1172 UConverterFromUnicodeArgs args;
1173 const UChar *s;
1174 char *t;
1175
1176 /* check parameters */
1177 if(err==NULL || U_FAILURE(*err)) {
1178 return;
1179 }
1180
1181 if(cnv==NULL || target==NULL || source==NULL) {
1182 *err=U_ILLEGAL_ARGUMENT_ERROR;
1183 return;
1184 }
1185
1186 s=*source;
1187 t=*target;
1188
1189 if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) {
1190 /*
1191 Prevent code from going into an infinite loop in case we do hit this
1192 limit. The limit pointer is expected to be on a UChar * boundary.
1193 This also prevents the next argument check from failing.
1194 */
1195 sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1);
1196 }
1197
1198 /*
1199 * All these conditions should never happen.
1200 *
1201 * 1) Make sure that the limits are >= to the address source or target
1202 *
1203 * 2) Make sure that the buffer sizes do not exceed the number range for
1204 * int32_t because some functions use the size (in units or bytes)
1205 * rather than comparing pointers, and because offsets are int32_t values.
1206 *
1207 * size_t is guaranteed to be unsigned and large enough for the job.
1208 *
1209 * Return with an error instead of adjusting the limits because we would
1210 * not be able to maintain the semantics that either the source must be
1211 * consumed or the target filled (unless an error occurs).
1212 * An adjustment would be targetLimit=t+0x7fffffff; for example.
1213 *
1214 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1215 * to a char * pointer and provide an incomplete UChar code unit.
1216 */
1217 if (sourceLimit<s || targetLimit<t ||
1218 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
1219 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) ||
1220 (((const char *)sourceLimit-(const char *)s) & 1) != 0)
1221 {
1222 *err=U_ILLEGAL_ARGUMENT_ERROR;
1223 return;
1224 }
1225
1226 /* output the target overflow buffer */
1227 if( cnv->charErrorBufferLength>0 &&
1228 ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err)
1229 ) {
1230 /* U_BUFFER_OVERFLOW_ERROR */
1231 return;
1232 }
1233 /* *target may have moved, therefore stop using t */
1234
1235 if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
1236 /* the overflow buffer is emptied and there is no new input: we are done */
1237 return;
1238 }
1239
1240 /*
1241 * Do not simply return with a buffer overflow error if
1242 * !flush && t==targetLimit
1243 * because it is possible that the source will not generate any output.
1244 * For example, the skip callback may be called;
1245 * it does not output anything.
1246 */
1247
1248 /* prepare the converter arguments */
1249 args.converter=cnv;
1250 args.flush=flush;
1251 args.offsets=offsets;
1252 args.source=s;
1253 args.sourceLimit=sourceLimit;
1254 args.target=*target;
1255 args.targetLimit=targetLimit;
1256 args.size=sizeof(args);
1257
1258 _fromUnicodeWithCallback(&args, err);
1259
1260 *source=args.source;
1261 *target=args.target;
1262 }
1263
1264 /* ucnv_toUnicode() --------------------------------------------------------- */
1265
1266 static void
1267 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1268 UConverterToUnicode toUnicode;
1269 UConverter *cnv;
1270 const char *s;
1271 UChar *t;
1272 int32_t *offsets;
1273 int32_t sourceIndex;
1274 int32_t errorInputLength;
1275 UBool converterSawEndOfInput, calledCallback;
1276
1277 /* variables for m:n conversion */
1278 char replay[UCNV_EXT_MAX_BYTES];
1279 const char *realSource, *realSourceLimit;
1280 int32_t realSourceIndex;
1281 UBool realFlush;
1282
1283 cnv=pArgs->converter;
1284 s=pArgs->source;
1285 t=pArgs->target;
1286 offsets=pArgs->offsets;
1287
1288 /* get the converter implementation function */
1289 sourceIndex=0;
1290 if(offsets==NULL) {
1291 toUnicode=cnv->sharedData->impl->toUnicode;
1292 } else {
1293 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1294 if(toUnicode==NULL) {
1295 /* there is no WithOffsets implementation */
1296 toUnicode=cnv->sharedData->impl->toUnicode;
1297 /* we will write -1 for each offset */
1298 sourceIndex=-1;
1299 }
1300 }
1301
1302 if(cnv->preToULength>=0) {
1303 /* normal mode */
1304 realSource=NULL;
1305
1306 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1307 realSourceLimit=NULL;
1308 realFlush=FALSE;
1309 realSourceIndex=0;
1310 } else {
1311 /*
1312 * Previous m:n conversion stored source units from a partial match
1313 * and failed to consume all of them.
1314 * We need to "replay" them from a temporary buffer and convert them first.
1315 */
1316 realSource=pArgs->source;
1317 realSourceLimit=pArgs->sourceLimit;
1318 realFlush=pArgs->flush;
1319 realSourceIndex=sourceIndex;
1320
1321 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1322 pArgs->source=replay;
1323 pArgs->sourceLimit=replay-cnv->preToULength;
1324 pArgs->flush=FALSE;
1325 sourceIndex=-1;
1326
1327 cnv->preToULength=0;
1328 }
1329
1330 /*
1331 * loop for conversion and error handling
1332 *
1333 * loop {
1334 * convert
1335 * loop {
1336 * update offsets
1337 * handle end of input
1338 * handle errors/call callback
1339 * }
1340 * }
1341 */
1342 for(;;) {
1343 if(U_SUCCESS(*err)) {
1344 /* convert */
1345 toUnicode(pArgs, err);
1346
1347 /*
1348 * set a flag for whether the converter
1349 * successfully processed the end of the input
1350 *
1351 * need not check cnv->preToULength==0 because a replay (<0) will cause
1352 * s<sourceLimit before converterSawEndOfInput is checked
1353 */
1354 converterSawEndOfInput=
1355 (UBool)(U_SUCCESS(*err) &&
1356 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1357 cnv->toULength==0);
1358 } else {
1359 /* handle error from getNextUChar() or ucnv_convertEx() */
1360 converterSawEndOfInput=FALSE;
1361 }
1362
1363 /* no callback called yet for this iteration */
1364 calledCallback=FALSE;
1365
1366 /* no sourceIndex adjustment for conversion, only for callback output */
1367 errorInputLength=0;
1368
1369 /*
1370 * loop for offsets and error handling
1371 *
1372 * iterates at most 3 times:
1373 * 1. to clean up after the conversion function
1374 * 2. after the callback
1375 * 3. after the callback again if there was truncated input
1376 */
1377 for(;;) {
1378 /* update offsets if we write any */
1379 if(offsets!=NULL) {
1380 int32_t length=(int32_t)(pArgs->target-t);
1381 if(length>0) {
1382 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1383
1384 /*
1385 * if a converter handles offsets and updates the offsets
1386 * pointer at the end, then pArgs->offset should not change
1387 * here;
1388 * however, some converters do not handle offsets at all
1389 * (sourceIndex<0) or may not update the offsets pointer
1390 */
1391 pArgs->offsets=offsets+=length;
1392 }
1393
1394 if(sourceIndex>=0) {
1395 sourceIndex+=(int32_t)(pArgs->source-s);
1396 }
1397 }
1398
1399 if(cnv->preToULength<0) {
1400 /*
1401 * switch the source to new replay units (cannot occur while replaying)
1402 * after offset handling and before end-of-input and callback handling
1403 */
1404 if(realSource==NULL) {
1405 realSource=pArgs->source;
1406 realSourceLimit=pArgs->sourceLimit;
1407 realFlush=pArgs->flush;
1408 realSourceIndex=sourceIndex;
1409
1410 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1411 pArgs->source=replay;
1412 pArgs->sourceLimit=replay-cnv->preToULength;
1413 pArgs->flush=FALSE;
1414 if((sourceIndex+=cnv->preToULength)<0) {
1415 sourceIndex=-1;
1416 }
1417
1418 cnv->preToULength=0;
1419 } else {
1420 /* see implementation note before _fromUnicodeWithCallback() */
1421 U_ASSERT(realSource==NULL);
1422 *err=U_INTERNAL_PROGRAM_ERROR;
1423 }
1424 }
1425
1426 /* update pointers */
1427 s=pArgs->source;
1428 t=pArgs->target;
1429
1430 if(U_SUCCESS(*err)) {
1431 if(s<pArgs->sourceLimit) {
1432 /*
1433 * continue with the conversion loop while there is still input left
1434 * (continue converting by breaking out of only the inner loop)
1435 */
1436 break;
1437 } else if(realSource!=NULL) {
1438 /* switch back from replaying to the real source and continue */
1439 pArgs->source=realSource;
1440 pArgs->sourceLimit=realSourceLimit;
1441 pArgs->flush=realFlush;
1442 sourceIndex=realSourceIndex;
1443
1444 realSource=NULL;
1445 break;
1446 } else if(pArgs->flush && cnv->toULength>0) {
1447 /*
1448 * the entire input stream is consumed
1449 * and there is a partial, truncated input sequence left
1450 */
1451
1452 /* inject an error and continue with callback handling */
1453 *err=U_TRUNCATED_CHAR_FOUND;
1454 calledCallback=FALSE; /* new error condition */
1455 } else {
1456 /* input consumed */
1457 if(pArgs->flush) {
1458 /*
1459 * return to the conversion loop once more if the flush
1460 * flag is set and the conversion function has not
1461 * successfully processed the end of the input yet
1462 *
1463 * (continue converting by breaking out of only the inner loop)
1464 */
1465 if(!converterSawEndOfInput) {
1466 break;
1467 }
1468
1469 /* reset the converter without calling the callback function */
1470 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1471 }
1472
1473 /* done successfully */
1474 return;
1475 }
1476 }
1477
1478 /* U_FAILURE(*err) */
1479 {
1480 UErrorCode e;
1481
1482 if( calledCallback ||
1483 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1484 (e!=U_INVALID_CHAR_FOUND &&
1485 e!=U_ILLEGAL_CHAR_FOUND &&
1486 e!=U_TRUNCATED_CHAR_FOUND &&
1487 e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
1488 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
1489 ) {
1490 /*
1491 * the callback did not or cannot resolve the error:
1492 * set output pointers and return
1493 *
1494 * the check for buffer overflow is redundant but it is
1495 * a high-runner case and hopefully documents the intent
1496 * well
1497 *
1498 * if we were replaying, then the replay buffer must be
1499 * copied back into the UConverter
1500 * and the real arguments must be restored
1501 */
1502 if(realSource!=NULL) {
1503 int32_t length;
1504
1505 U_ASSERT(cnv->preToULength==0);
1506
1507 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1508 if(length>0) {
1509 uprv_memcpy(cnv->preToU, pArgs->source, length);
1510 cnv->preToULength=(int8_t)-length;
1511 }
1512
1513 pArgs->source=realSource;
1514 pArgs->sourceLimit=realSourceLimit;
1515 pArgs->flush=realFlush;
1516 }
1517
1518 return;
1519 }
1520 }
1521
1522 /* copy toUBytes[] to invalidCharBuffer[] */
1523 errorInputLength=cnv->invalidCharLength=cnv->toULength;
1524 if(errorInputLength>0) {
1525 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1526 }
1527
1528 /* set the converter state to deal with the next character */
1529 cnv->toULength=0;
1530
1531 /* call the callback function */
1532 if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) {
1533 cnv->toUCallbackReason = UCNV_UNASSIGNED;
1534 }
1535 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1536 cnv->invalidCharBuffer, errorInputLength,
1537 cnv->toUCallbackReason,
1538 err);
1539 cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */
1540
1541 /*
1542 * loop back to the offset handling
1543 *
1544 * this flag will indicate after offset handling
1545 * that a callback was called;
1546 * if the callback did not resolve the error, then we return
1547 */
1548 calledCallback=TRUE;
1549 }
1550 }
1551 }
1552
1553 /*
1554 * Output the toUnicode overflow buffer.
1555 * Call this function if(cnv->UCharErrorBufferLength>0).
1556 * @return TRUE if overflow
1557 */
1558 static UBool
1559 ucnv_outputOverflowToUnicode(UConverter *cnv,
1560 UChar **target, const UChar *targetLimit,
1561 int32_t **pOffsets,
1562 UErrorCode *err) {
1563 int32_t *offsets;
1564 UChar *overflow, *t;
1565 int32_t i, length;
1566
1567 t=*target;
1568 if(pOffsets!=NULL) {
1569 offsets=*pOffsets;
1570 } else {
1571 offsets=NULL;
1572 }
1573
1574 overflow=cnv->UCharErrorBuffer;
1575 length=cnv->UCharErrorBufferLength;
1576 i=0;
1577 while(i<length) {
1578 if(t==targetLimit) {
1579 /* the overflow buffer contains too much, keep the rest */
1580 int32_t j=0;
1581
1582 do {
1583 overflow[j++]=overflow[i++];
1584 } while(i<length);
1585
1586 cnv->UCharErrorBufferLength=(int8_t)j;
1587 *target=t;
1588 if(offsets!=NULL) {
1589 *pOffsets=offsets;
1590 }
1591 *err=U_BUFFER_OVERFLOW_ERROR;
1592 return TRUE;
1593 }
1594
1595 /* copy the overflow contents to the target */
1596 *t++=overflow[i++];
1597 if(offsets!=NULL) {
1598 *offsets++=-1; /* no source index available for old output */
1599 }
1600 }
1601
1602 /* the overflow buffer is completely copied to the target */
1603 cnv->UCharErrorBufferLength=0;
1604 *target=t;
1605 if(offsets!=NULL) {
1606 *pOffsets=offsets;
1607 }
1608 return FALSE;
1609 }
1610
1611 U_CAPI void U_EXPORT2
1612 ucnv_toUnicode(UConverter *cnv,
1613 UChar **target, const UChar *targetLimit,
1614 const char **source, const char *sourceLimit,
1615 int32_t *offsets,
1616 UBool flush,
1617 UErrorCode *err) {
1618 UConverterToUnicodeArgs args;
1619 const char *s;
1620 UChar *t;
1621
1622 /* check parameters */
1623 if(err==NULL || U_FAILURE(*err)) {
1624 return;
1625 }
1626
1627 if(cnv==NULL || target==NULL || source==NULL) {
1628 *err=U_ILLEGAL_ARGUMENT_ERROR;
1629 return;
1630 }
1631
1632 s=*source;
1633 t=*target;
1634
1635 if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) {
1636 /*
1637 Prevent code from going into an infinite loop in case we do hit this
1638 limit. The limit pointer is expected to be on a UChar * boundary.
1639 This also prevents the next argument check from failing.
1640 */
1641 targetLimit = (const UChar *)(((const char *)targetLimit) - 1);
1642 }
1643
1644 /*
1645 * All these conditions should never happen.
1646 *
1647 * 1) Make sure that the limits are >= to the address source or target
1648 *
1649 * 2) Make sure that the buffer sizes do not exceed the number range for
1650 * int32_t because some functions use the size (in units or bytes)
1651 * rather than comparing pointers, and because offsets are int32_t values.
1652 *
1653 * size_t is guaranteed to be unsigned and large enough for the job.
1654 *
1655 * Return with an error instead of adjusting the limits because we would
1656 * not be able to maintain the semantics that either the source must be
1657 * consumed or the target filled (unless an error occurs).
1658 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1659 *
1660 * 3) Make sure that the user didn't incorrectly cast a UChar * pointer
1661 * to a char * pointer and provide an incomplete UChar code unit.
1662 */
1663 if (sourceLimit<s || targetLimit<t ||
1664 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
1665 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) ||
1666 (((const char *)targetLimit-(const char *)t) & 1) != 0
1667 ) {
1668 *err=U_ILLEGAL_ARGUMENT_ERROR;
1669 return;
1670 }
1671
1672 /* output the target overflow buffer */
1673 if( cnv->UCharErrorBufferLength>0 &&
1674 ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err)
1675 ) {
1676 /* U_BUFFER_OVERFLOW_ERROR */
1677 return;
1678 }
1679 /* *target may have moved, therefore stop using t */
1680
1681 if(!flush && s==sourceLimit && cnv->preToULength>=0) {
1682 /* the overflow buffer is emptied and there is no new input: we are done */
1683 return;
1684 }
1685
1686 /*
1687 * Do not simply return with a buffer overflow error if
1688 * !flush && t==targetLimit
1689 * because it is possible that the source will not generate any output.
1690 * For example, the skip callback may be called;
1691 * it does not output anything.
1692 */
1693
1694 /* prepare the converter arguments */
1695 args.converter=cnv;
1696 args.flush=flush;
1697 args.offsets=offsets;
1698 args.source=s;
1699 args.sourceLimit=sourceLimit;
1700 args.target=*target;
1701 args.targetLimit=targetLimit;
1702 args.size=sizeof(args);
1703
1704 _toUnicodeWithCallback(&args, err);
1705
1706 *source=args.source;
1707 *target=args.target;
1708 }
1709
1710 /* ucnv_to/fromUChars() ----------------------------------------------------- */
1711
1712 U_CAPI int32_t U_EXPORT2
1713 ucnv_fromUChars(UConverter *cnv,
1714 char *dest, int32_t destCapacity,
1715 const UChar *src, int32_t srcLength,
1716 UErrorCode *pErrorCode) {
1717 const UChar *srcLimit;
1718 char *originalDest, *destLimit;
1719 int32_t destLength;
1720
1721 /* check arguments */
1722 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1723 return 0;
1724 }
1725
1726 if( cnv==NULL ||
1727 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1728 srcLength<-1 || (srcLength!=0 && src==NULL)
1729 ) {
1730 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1731 return 0;
1732 }
1733
1734 /* initialize */
1735 ucnv_resetFromUnicode(cnv);
1736 originalDest=dest;
1737 if(srcLength==-1) {
1738 srcLength=u_strlen(src);
1739 }
1740 if(srcLength>0) {
1741 srcLimit=src+srcLength;
1742 destLimit=dest+destCapacity;
1743
1744 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1745 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1746 destLimit=(char *)U_MAX_PTR(dest);
1747 }
1748
1749 /* perform the conversion */
1750 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1751 destLength=(int32_t)(dest-originalDest);
1752
1753 /* if an overflow occurs, then get the preflighting length */
1754 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1755 char buffer[1024];
1756
1757 destLimit=buffer+sizeof(buffer);
1758 do {
1759 dest=buffer;
1760 *pErrorCode=U_ZERO_ERROR;
1761 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1762 destLength+=(int32_t)(dest-buffer);
1763 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1764 }
1765 } else {
1766 destLength=0;
1767 }
1768
1769 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1770 }
1771
1772 U_CAPI int32_t U_EXPORT2
1773 ucnv_toUChars(UConverter *cnv,
1774 UChar *dest, int32_t destCapacity,
1775 const char *src, int32_t srcLength,
1776 UErrorCode *pErrorCode) {
1777 const char *srcLimit;
1778 UChar *originalDest, *destLimit;
1779 int32_t destLength;
1780
1781 /* check arguments */
1782 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1783 return 0;
1784 }
1785
1786 if( cnv==NULL ||
1787 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1788 srcLength<-1 || (srcLength!=0 && src==NULL))
1789 {
1790 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1791 return 0;
1792 }
1793
1794 /* initialize */
1795 ucnv_resetToUnicode(cnv);
1796 originalDest=dest;
1797 if(srcLength==-1) {
1798 srcLength=(int32_t)uprv_strlen(src);
1799 }
1800 if(srcLength>0) {
1801 srcLimit=src+srcLength;
1802 destLimit=dest+destCapacity;
1803
1804 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1805 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1806 destLimit=(UChar *)U_MAX_PTR(dest);
1807 }
1808
1809 /* perform the conversion */
1810 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1811 destLength=(int32_t)(dest-originalDest);
1812
1813 /* if an overflow occurs, then get the preflighting length */
1814 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1815 {
1816 UChar buffer[1024];
1817
1818 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
1819 do {
1820 dest=buffer;
1821 *pErrorCode=U_ZERO_ERROR;
1822 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1823 destLength+=(int32_t)(dest-buffer);
1824 }
1825 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1826 }
1827 } else {
1828 destLength=0;
1829 }
1830
1831 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1832 }
1833
1834 /* ucnv_getNextUChar() ------------------------------------------------------ */
1835
1836 U_CAPI UChar32 U_EXPORT2
1837 ucnv_getNextUChar(UConverter *cnv,
1838 const char **source, const char *sourceLimit,
1839 UErrorCode *err) {
1840 UConverterToUnicodeArgs args;
1841 UChar buffer[U16_MAX_LENGTH];
1842 const char *s;
1843 UChar32 c;
1844 int32_t i, length;
1845
1846 /* check parameters */
1847 if(err==NULL || U_FAILURE(*err)) {
1848 return 0xffff;
1849 }
1850
1851 if(cnv==NULL || source==NULL) {
1852 *err=U_ILLEGAL_ARGUMENT_ERROR;
1853 return 0xffff;
1854 }
1855
1856 s=*source;
1857 if(sourceLimit<s) {
1858 *err=U_ILLEGAL_ARGUMENT_ERROR;
1859 return 0xffff;
1860 }
1861
1862 /*
1863 * Make sure that the buffer sizes do not exceed the number range for
1864 * int32_t because some functions use the size (in units or bytes)
1865 * rather than comparing pointers, and because offsets are int32_t values.
1866 *
1867 * size_t is guaranteed to be unsigned and large enough for the job.
1868 *
1869 * Return with an error instead of adjusting the limits because we would
1870 * not be able to maintain the semantics that either the source must be
1871 * consumed or the target filled (unless an error occurs).
1872 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1873 */
1874 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1875 *err=U_ILLEGAL_ARGUMENT_ERROR;
1876 return 0xffff;
1877 }
1878
1879 c=U_SENTINEL;
1880
1881 /* flush the target overflow buffer */
1882 if(cnv->UCharErrorBufferLength>0) {
1883 UChar *overflow;
1884
1885 overflow=cnv->UCharErrorBuffer;
1886 i=0;
1887 length=cnv->UCharErrorBufferLength;
1888 U16_NEXT(overflow, i, length, c);
1889
1890 /* move the remaining overflow contents up to the beginning */
1891 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1892 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1893 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1894 }
1895
1896 if(!U16_IS_LEAD(c) || i<length) {
1897 return c;
1898 }
1899 /*
1900 * Continue if the overflow buffer contained only a lead surrogate,
1901 * in case the converter outputs single surrogates from complete
1902 * input sequences.
1903 */
1904 }
1905
1906 /*
1907 * flush==TRUE is implied for ucnv_getNextUChar()
1908 *
1909 * do not simply return even if s==sourceLimit because the converter may
1910 * not have seen flush==TRUE before
1911 */
1912
1913 /* prepare the converter arguments */
1914 args.converter=cnv;
1915 args.flush=TRUE;
1916 args.offsets=NULL;
1917 args.source=s;
1918 args.sourceLimit=sourceLimit;
1919 args.target=buffer;
1920 args.targetLimit=buffer+1;
1921 args.size=sizeof(args);
1922
1923 if(c<0) {
1924 /*
1925 * call the native getNextUChar() implementation if we are
1926 * at a character boundary (toULength==0)
1927 *
1928 * unlike with _toUnicode(), getNextUChar() implementations must set
1929 * U_TRUNCATED_CHAR_FOUND for truncated input,
1930 * in addition to setting toULength/toUBytes[]
1931 */
1932 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1933 c=cnv->sharedData->impl->getNextUChar(&args, err);
1934 *source=s=args.source;
1935 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1936 /* reset the converter without calling the callback function */
1937 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1938 return 0xffff; /* no output */
1939 } else if(U_SUCCESS(*err) && c>=0) {
1940 return c;
1941 /*
1942 * else fall through to use _toUnicode() because
1943 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1944 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1945 */
1946 }
1947 }
1948
1949 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1950 _toUnicodeWithCallback(&args, err);
1951
1952 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1953 *err=U_ZERO_ERROR;
1954 }
1955
1956 i=0;
1957 length=(int32_t)(args.target-buffer);
1958 } else {
1959 /* write the lead surrogate from the overflow buffer */
1960 buffer[0]=(UChar)c;
1961 args.target=buffer+1;
1962 i=0;
1963 length=1;
1964 }
1965
1966 /* buffer contents starts at i and ends before length */
1967
1968 if(U_FAILURE(*err)) {
1969 c=0xffff; /* no output */
1970 } else if(length==0) {
1971 /* no input or only state changes */
1972 *err=U_INDEX_OUTOFBOUNDS_ERROR;
1973 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1974 c=0xffff; /* no output */
1975 } else {
1976 c=buffer[0];
1977 i=1;
1978 if(!U16_IS_LEAD(c)) {
1979 /* consume c=buffer[0], done */
1980 } else {
1981 /* got a lead surrogate, see if a trail surrogate follows */
1982 UChar c2;
1983
1984 if(cnv->UCharErrorBufferLength>0) {
1985 /* got overflow output from the conversion */
1986 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1987 /* got a trail surrogate, too */
1988 c=U16_GET_SUPPLEMENTARY(c, c2);
1989
1990 /* move the remaining overflow contents up to the beginning */
1991 if((--cnv->UCharErrorBufferLength)>0) {
1992 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1993 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1994 }
1995 } else {
1996 /* c is an unpaired lead surrogate, just return it */
1997 }
1998 } else if(args.source<sourceLimit) {
1999 /* convert once more, to buffer[1] */
2000 args.targetLimit=buffer+2;
2001 _toUnicodeWithCallback(&args, err);
2002 if(*err==U_BUFFER_OVERFLOW_ERROR) {
2003 *err=U_ZERO_ERROR;
2004 }
2005
2006 length=(int32_t)(args.target-buffer);
2007 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
2008 /* got a trail surrogate, too */
2009 c=U16_GET_SUPPLEMENTARY(c, c2);
2010 i=2;
2011 }
2012 }
2013 }
2014 }
2015
2016 /*
2017 * move leftover output from buffer[i..length[
2018 * into the beginning of the overflow buffer
2019 */
2020 if(i<length) {
2021 /* move further overflow back */
2022 int32_t delta=length-i;
2023 if((length=cnv->UCharErrorBufferLength)>0) {
2024 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
2025 length*U_SIZEOF_UCHAR);
2026 }
2027 cnv->UCharErrorBufferLength=(int8_t)(length+delta);
2028
2029 cnv->UCharErrorBuffer[0]=buffer[i++];
2030 if(delta>1) {
2031 cnv->UCharErrorBuffer[1]=buffer[i];
2032 }
2033 }
2034
2035 *source=args.source;
2036 return c;
2037 }
2038
2039 /* ucnv_convert() and siblings ---------------------------------------------- */
2040
2041 U_CAPI void U_EXPORT2
2042 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
2043 char **target, const char *targetLimit,
2044 const char **source, const char *sourceLimit,
2045 UChar *pivotStart, UChar **pivotSource,
2046 UChar **pivotTarget, const UChar *pivotLimit,
2047 UBool reset, UBool flush,
2048 UErrorCode *pErrorCode) {
2049 UChar pivotBuffer[CHUNK_SIZE];
2050 const UChar *myPivotSource;
2051 UChar *myPivotTarget;
2052 const char *s;
2053 char *t;
2054
2055 UConverterToUnicodeArgs toUArgs;
2056 UConverterFromUnicodeArgs fromUArgs;
2057 UConverterConvert convert;
2058
2059 /* error checking */
2060 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2061 return;
2062 }
2063
2064 if( targetCnv==NULL || sourceCnv==NULL ||
2065 source==NULL || *source==NULL ||
2066 target==NULL || *target==NULL || targetLimit==NULL
2067 ) {
2068 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2069 return;
2070 }
2071
2072 s=*source;
2073 t=*target;
2074 if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) {
2075 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2076 return;
2077 }
2078
2079 /*
2080 * Make sure that the buffer sizes do not exceed the number range for
2081 * int32_t. See ucnv_toUnicode() for a more detailed comment.
2082 */
2083 if(
2084 (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) ||
2085 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
2086 ) {
2087 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2088 return;
2089 }
2090
2091 if(pivotStart==NULL) {
2092 if(!flush) {
2093 /* streaming conversion requires an explicit pivot buffer */
2094 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2095 return;
2096 }
2097
2098 /* use the stack pivot buffer */
2099 myPivotSource=myPivotTarget=pivotStart=pivotBuffer;
2100 pivotSource=(UChar **)&myPivotSource;
2101 pivotTarget=&myPivotTarget;
2102 pivotLimit=pivotBuffer+CHUNK_SIZE;
2103 } else if( pivotStart>=pivotLimit ||
2104 pivotSource==NULL || *pivotSource==NULL ||
2105 pivotTarget==NULL || *pivotTarget==NULL ||
2106 pivotLimit==NULL
2107 ) {
2108 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2109 return;
2110 }
2111
2112 if(sourceLimit==NULL) {
2113 /* get limit of single-byte-NUL-terminated source string */
2114 sourceLimit=uprv_strchr(*source, 0);
2115 }
2116
2117 if(reset) {
2118 ucnv_resetToUnicode(sourceCnv);
2119 ucnv_resetFromUnicode(targetCnv);
2120 *pivotSource=*pivotTarget=pivotStart;
2121 } else if(targetCnv->charErrorBufferLength>0) {
2122 /* output the targetCnv overflow buffer */
2123 if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) {
2124 /* U_BUFFER_OVERFLOW_ERROR */
2125 return;
2126 }
2127 /* *target has moved, therefore stop using t */
2128
2129 if( !flush &&
2130 targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget &&
2131 sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit
2132 ) {
2133 /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */
2134 return;
2135 }
2136 }
2137
2138 /* Is direct-UTF-8 conversion available? */
2139 if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2140 targetCnv->sharedData->impl->fromUTF8!=NULL
2141 ) {
2142 convert=targetCnv->sharedData->impl->fromUTF8;
2143 } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 &&
2144 sourceCnv->sharedData->impl->toUTF8!=NULL
2145 ) {
2146 convert=sourceCnv->sharedData->impl->toUTF8;
2147 } else {
2148 convert=NULL;
2149 }
2150
2151 /*
2152 * If direct-UTF-8 conversion is available, then we use a smaller
2153 * pivot buffer for error handling and partial matches
2154 * so that we quickly return to direct conversion.
2155 *
2156 * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH.
2157 *
2158 * We could reduce the pivot buffer size further, at the cost of
2159 * buffer overflows from callbacks.
2160 * The pivot buffer should not be smaller than the maximum number of
2161 * fromUnicode extension table input UChars
2162 * (for m:n conversion, see
2163 * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS])
2164 * or 2 for surrogate pairs.
2165 *
2166 * Too small a buffer can cause thrashing between pivoting and direct
2167 * conversion, with function call overhead outweighing the benefits
2168 * of direct conversion.
2169 */
2170 if(convert!=NULL && (pivotLimit-pivotStart)>32) {
2171 pivotLimit=pivotStart+32;
2172 }
2173
2174 /* prepare the converter arguments */
2175 fromUArgs.converter=targetCnv;
2176 fromUArgs.flush=FALSE;
2177 fromUArgs.offsets=NULL;
2178 fromUArgs.target=*target;
2179 fromUArgs.targetLimit=targetLimit;
2180 fromUArgs.size=sizeof(fromUArgs);
2181
2182 toUArgs.converter=sourceCnv;
2183 toUArgs.flush=flush;
2184 toUArgs.offsets=NULL;
2185 toUArgs.source=s;
2186 toUArgs.sourceLimit=sourceLimit;
2187 toUArgs.targetLimit=pivotLimit;
2188 toUArgs.size=sizeof(toUArgs);
2189
2190 /*
2191 * TODO: Consider separating this function into two functions,
2192 * extracting exactly the conversion loop,
2193 * for readability and to reduce the set of visible variables.
2194 *
2195 * Otherwise stop using s and t from here on.
2196 */
2197 s=t=NULL;
2198
2199 /*
2200 * conversion loop
2201 *
2202 * The sequence of steps in the loop may appear backward,
2203 * but the principle is simple:
2204 * In the chain of
2205 * source - sourceCnv overflow - pivot - targetCnv overflow - target
2206 * empty out later buffers before refilling them from earlier ones.
2207 *
2208 * The targetCnv overflow buffer is flushed out only once before the loop.
2209 */
2210 for(;;) {
2211 /*
2212 * if(pivot not empty or error or replay or flush fromUnicode) {
2213 * fromUnicode(pivot -> target);
2214 * }
2215 *
2216 * For pivoting conversion; and for direct conversion for
2217 * error callback handling and flushing the replay buffer.
2218 */
2219 if( *pivotSource<*pivotTarget ||
2220 U_FAILURE(*pErrorCode) ||
2221 targetCnv->preFromULength<0 ||
2222 fromUArgs.flush
2223 ) {
2224 fromUArgs.source=*pivotSource;
2225 fromUArgs.sourceLimit=*pivotTarget;
2226 _fromUnicodeWithCallback(&fromUArgs, pErrorCode);
2227 if(U_FAILURE(*pErrorCode)) {
2228 /* target overflow, or conversion error */
2229 *pivotSource=(UChar *)fromUArgs.source;
2230 break;
2231 }
2232
2233 /*
2234 * _fromUnicodeWithCallback() must have consumed the pivot contents
2235 * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS()
2236 */
2237 }
2238
2239 /* The pivot buffer is empty; reset it so we start at pivotStart. */
2240 *pivotSource=*pivotTarget=pivotStart;
2241
2242 /*
2243 * if(sourceCnv overflow buffer not empty) {
2244 * move(sourceCnv overflow buffer -> pivot);
2245 * continue;
2246 * }
2247 */
2248 /* output the sourceCnv overflow buffer */
2249 if(sourceCnv->UCharErrorBufferLength>0) {
2250 if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) {
2251 /* U_BUFFER_OVERFLOW_ERROR */
2252 *pErrorCode=U_ZERO_ERROR;
2253 }
2254 continue;
2255 }
2256
2257 /*
2258 * check for end of input and break if done
2259 *
2260 * Checking both flush and fromUArgs.flush ensures that the converters
2261 * have been called with the flush flag set if the ucnv_convertEx()
2262 * caller set it.
2263 */
2264 if( toUArgs.source==sourceLimit &&
2265 sourceCnv->preToULength>=0 && sourceCnv->toULength==0 &&
2266 (!flush || fromUArgs.flush)
2267 ) {
2268 /* done successfully */
2269 break;
2270 }
2271
2272 /*
2273 * use direct conversion if available
2274 * but not if continuing a partial match
2275 * or flushing the toUnicode replay buffer
2276 */
2277 if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) {
2278 if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2279 /* remove a warning that may be set by this function */
2280 *pErrorCode=U_ZERO_ERROR;
2281 }
2282 convert(&fromUArgs, &toUArgs, pErrorCode);
2283 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2284 break;
2285 } else if(U_FAILURE(*pErrorCode)) {
2286 if(sourceCnv->toULength>0) {
2287 /*
2288 * Fall through to calling _toUnicodeWithCallback()
2289 * for callback handling.
2290 *
2291 * The pivot buffer will be reset with
2292 * *pivotSource=*pivotTarget=pivotStart;
2293 * which indicates a toUnicode error to the caller
2294 * (*pivotSource==pivotStart shows no pivot UChars consumed).
2295 */
2296 } else {
2297 /*
2298 * Indicate a fromUnicode error to the caller
2299 * (*pivotSource>pivotStart shows some pivot UChars consumed).
2300 */
2301 *pivotSource=*pivotTarget=pivotStart+1;
2302 /*
2303 * Loop around to calling _fromUnicodeWithCallbacks()
2304 * for callback handling.
2305 */
2306 continue;
2307 }
2308 } else if(*pErrorCode==U_USING_DEFAULT_WARNING) {
2309 /*
2310 * No error, but the implementation requested to temporarily
2311 * fall back to pivoting.
2312 */
2313 *pErrorCode=U_ZERO_ERROR;
2314 /*
2315 * The following else branches are almost identical to the end-of-input
2316 * handling in _toUnicodeWithCallback().
2317 * Avoid calling it just for the end of input.
2318 */
2319 } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */
2320 /*
2321 * the entire input stream is consumed
2322 * and there is a partial, truncated input sequence left
2323 */
2324
2325 /* inject an error and continue with callback handling */
2326 *pErrorCode=U_TRUNCATED_CHAR_FOUND;
2327 } else {
2328 /* input consumed */
2329 if(flush) {
2330 /* reset the converters without calling the callback functions */
2331 _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE);
2332 _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE);
2333 }
2334
2335 /* done successfully */
2336 break;
2337 }
2338 }
2339
2340 /*
2341 * toUnicode(source -> pivot);
2342 *
2343 * For pivoting conversion; and for direct conversion for
2344 * error callback handling, continuing partial matches
2345 * and flushing the replay buffer.
2346 *
2347 * The pivot buffer is empty and reset.
2348 */
2349 toUArgs.target=pivotStart; /* ==*pivotTarget */
2350 /* toUArgs.targetLimit=pivotLimit; already set before the loop */
2351 _toUnicodeWithCallback(&toUArgs, pErrorCode);
2352 *pivotTarget=toUArgs.target;
2353 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
2354 /* pivot overflow: continue with the conversion loop */
2355 *pErrorCode=U_ZERO_ERROR;
2356 } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) {
2357 /* conversion error, or there was nothing left to convert */
2358 break;
2359 }
2360 /*
2361 * else:
2362 * _toUnicodeWithCallback() wrote into the pivot buffer,
2363 * continue with fromUnicode conversion.
2364 *
2365 * Set the fromUnicode flush flag if we flush and if toUnicode has
2366 * processed the end of the input.
2367 */
2368 if( flush && toUArgs.source==sourceLimit &&
2369 sourceCnv->preToULength>=0 &&
2370 sourceCnv->UCharErrorBufferLength==0
2371 ) {
2372 fromUArgs.flush=TRUE;
2373 }
2374 }
2375
2376 /*
2377 * The conversion loop is exited when one of the following is true:
2378 * - the entire source text has been converted successfully to the target buffer
2379 * - a target buffer overflow occurred
2380 * - a conversion error occurred
2381 */
2382
2383 *source=toUArgs.source;
2384 *target=fromUArgs.target;
2385
2386 /* terminate the target buffer if possible */
2387 if(flush && U_SUCCESS(*pErrorCode)) {
2388 if(*target!=targetLimit) {
2389 **target=0;
2390 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
2391 *pErrorCode=U_ZERO_ERROR;
2392 }
2393 } else {
2394 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
2395 }
2396 }
2397 }
2398
2399 /* internal implementation of ucnv_convert() etc. with preflighting */
2400 static int32_t
2401 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
2402 char *target, int32_t targetCapacity,
2403 const char *source, int32_t sourceLength,
2404 UErrorCode *pErrorCode) {
2405 UChar pivotBuffer[CHUNK_SIZE];
2406 UChar *pivot, *pivot2;
2407
2408 char *myTarget;
2409 const char *sourceLimit;
2410 const char *targetLimit;
2411 int32_t targetLength=0;
2412
2413 /* set up */
2414 if(sourceLength<0) {
2415 sourceLimit=uprv_strchr(source, 0);
2416 } else {
2417 sourceLimit=source+sourceLength;
2418 }
2419
2420 /* if there is no input data, we're done */
2421 if(source==sourceLimit) {
2422 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2423 }
2424
2425 pivot=pivot2=pivotBuffer;
2426 myTarget=target;
2427 targetLength=0;
2428
2429 if(targetCapacity>0) {
2430 /* perform real conversion */
2431 targetLimit=target+targetCapacity;
2432 ucnv_convertEx(outConverter, inConverter,
2433 &myTarget, targetLimit,
2434 &source, sourceLimit,
2435 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2436 FALSE,
2437 TRUE,
2438 pErrorCode);
2439 targetLength=(int32_t)(myTarget-target);
2440 }
2441
2442 /*
2443 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
2444 * to it but continue the conversion in order to store in targetCapacity
2445 * the number of bytes that was required.
2446 */
2447 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
2448 {
2449 char targetBuffer[CHUNK_SIZE];
2450
2451 targetLimit=targetBuffer+CHUNK_SIZE;
2452 do {
2453 *pErrorCode=U_ZERO_ERROR;
2454 myTarget=targetBuffer;
2455 ucnv_convertEx(outConverter, inConverter,
2456 &myTarget, targetLimit,
2457 &source, sourceLimit,
2458 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2459 FALSE,
2460 TRUE,
2461 pErrorCode);
2462 targetLength+=(int32_t)(myTarget-targetBuffer);
2463 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2464
2465 /* done with preflighting, set warnings and errors as appropriate */
2466 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2467 }
2468
2469 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2470 return targetLength;
2471 }
2472
2473 U_CAPI int32_t U_EXPORT2
2474 ucnv_convert(const char *toConverterName, const char *fromConverterName,
2475 char *target, int32_t targetCapacity,
2476 const char *source, int32_t sourceLength,
2477 UErrorCode *pErrorCode) {
2478 UConverter in, out; /* stack-allocated */
2479 UConverter *inConverter, *outConverter;
2480 int32_t targetLength;
2481
2482 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2483 return 0;
2484 }
2485
2486 if( source==NULL || sourceLength<-1 ||
2487 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2488 ) {
2489 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2490 return 0;
2491 }
2492
2493 /* if there is no input data, we're done */
2494 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2495 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2496 }
2497
2498 /* create the converters */
2499 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2500 if(U_FAILURE(*pErrorCode)) {
2501 return 0;
2502 }
2503
2504 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2505 if(U_FAILURE(*pErrorCode)) {
2506 ucnv_close(inConverter);
2507 return 0;
2508 }
2509
2510 targetLength=ucnv_internalConvert(outConverter, inConverter,
2511 target, targetCapacity,
2512 source, sourceLength,
2513 pErrorCode);
2514
2515 ucnv_close(inConverter);
2516 ucnv_close(outConverter);
2517
2518 return targetLength;
2519 }
2520
2521 /* @internal */
2522 static int32_t
2523 ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2524 UConverterType algorithmicType,
2525 UConverter *cnv,
2526 char *target, int32_t targetCapacity,
2527 const char *source, int32_t sourceLength,
2528 UErrorCode *pErrorCode) {
2529 UConverter algoConverterStatic; /* stack-allocated */
2530 UConverter *algoConverter, *to, *from;
2531 int32_t targetLength;
2532
2533 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2534 return 0;
2535 }
2536
2537 if( cnv==NULL || source==NULL || sourceLength<-1 ||
2538 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2539 ) {
2540 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2541 return 0;
2542 }
2543
2544 /* if there is no input data, we're done */
2545 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2546 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2547 }
2548
2549 /* create the algorithmic converter */
2550 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2551 "", 0, pErrorCode);
2552 if(U_FAILURE(*pErrorCode)) {
2553 return 0;
2554 }
2555
2556 /* reset the other converter */
2557 if(convertToAlgorithmic) {
2558 /* cnv->Unicode->algo */
2559 ucnv_resetToUnicode(cnv);
2560 to=algoConverter;
2561 from=cnv;
2562 } else {
2563 /* algo->Unicode->cnv */
2564 ucnv_resetFromUnicode(cnv);
2565 from=algoConverter;
2566 to=cnv;
2567 }
2568
2569 targetLength=ucnv_internalConvert(to, from,
2570 target, targetCapacity,
2571 source, sourceLength,
2572 pErrorCode);
2573
2574 ucnv_close(algoConverter);
2575
2576 return targetLength;
2577 }
2578
2579 U_CAPI int32_t U_EXPORT2
2580 ucnv_toAlgorithmic(UConverterType algorithmicType,
2581 UConverter *cnv,
2582 char *target, int32_t targetCapacity,
2583 const char *source, int32_t sourceLength,
2584 UErrorCode *pErrorCode) {
2585 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2586 target, targetCapacity,
2587 source, sourceLength,
2588 pErrorCode);
2589 }
2590
2591 U_CAPI int32_t U_EXPORT2
2592 ucnv_fromAlgorithmic(UConverter *cnv,
2593 UConverterType algorithmicType,
2594 char *target, int32_t targetCapacity,
2595 const char *source, int32_t sourceLength,
2596 UErrorCode *pErrorCode) {
2597 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2598 target, targetCapacity,
2599 source, sourceLength,
2600 pErrorCode);
2601 }
2602
2603 U_CAPI UConverterType U_EXPORT2
2604 ucnv_getType(const UConverter* converter)
2605 {
2606 int8_t type = converter->sharedData->staticData->conversionType;
2607 #if !UCONFIG_NO_LEGACY_CONVERSION
2608 if(type == UCNV_MBCS) {
2609 return ucnv_MBCSGetType(converter);
2610 }
2611 #endif
2612 return (UConverterType)type;
2613 }
2614
2615 U_CAPI void U_EXPORT2
2616 ucnv_getStarters(const UConverter* converter,
2617 UBool starters[256],
2618 UErrorCode* err)
2619 {
2620 if (err == NULL || U_FAILURE(*err)) {
2621 return;
2622 }
2623
2624 if(converter->sharedData->impl->getStarters != NULL) {
2625 converter->sharedData->impl->getStarters(converter, starters, err);
2626 } else {
2627 *err = U_ILLEGAL_ARGUMENT_ERROR;
2628 }
2629 }
2630
2631 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2632 {
2633 UErrorCode errorCode;
2634 const char *name;
2635 int32_t i;
2636
2637 if(cnv==NULL) {
2638 return NULL;
2639 }
2640
2641 errorCode=U_ZERO_ERROR;
2642 name=ucnv_getName(cnv, &errorCode);
2643 if(U_FAILURE(errorCode)) {
2644 return NULL;
2645 }
2646
2647 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
2648 {
2649 if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2650 {
2651 return ambiguousConverters+i;
2652 }
2653 }
2654
2655 return NULL;
2656 }
2657
2658 U_CAPI void U_EXPORT2
2659 ucnv_fixFileSeparator(const UConverter *cnv,
2660 UChar* source,
2661 int32_t sourceLength) {
2662 const UAmbiguousConverter *a;
2663 int32_t i;
2664 UChar variant5c;
2665
2666 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2667 {
2668 return;
2669 }
2670
2671 variant5c=a->variant5c;
2672 for(i=0; i<sourceLength; ++i) {
2673 if(source[i]==variant5c) {
2674 source[i]=0x5c;
2675 }
2676 }
2677 }
2678
2679 U_CAPI UBool U_EXPORT2
2680 ucnv_isAmbiguous(const UConverter *cnv) {
2681 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2682 }
2683
2684 U_CAPI void U_EXPORT2
2685 ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2686 {
2687 cnv->useFallback = usesFallback;
2688 }
2689
2690 U_CAPI UBool U_EXPORT2
2691 ucnv_usesFallback(const UConverter *cnv)
2692 {
2693 return cnv->useFallback;
2694 }
2695
2696 U_CAPI void U_EXPORT2
2697 ucnv_getInvalidChars (const UConverter * converter,
2698 char *errBytes,
2699 int8_t * len,
2700 UErrorCode * err)
2701 {
2702 if (err == NULL || U_FAILURE(*err))
2703 {
2704 return;
2705 }
2706 if (len == NULL || errBytes == NULL || converter == NULL)
2707 {
2708 *err = U_ILLEGAL_ARGUMENT_ERROR;
2709 return;
2710 }
2711 if (*len < converter->invalidCharLength)
2712 {
2713 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2714 return;
2715 }
2716 if ((*len = converter->invalidCharLength) > 0)
2717 {
2718 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2719 }
2720 }
2721
2722 U_CAPI void U_EXPORT2
2723 ucnv_getInvalidUChars (const UConverter * converter,
2724 UChar *errChars,
2725 int8_t * len,
2726 UErrorCode * err)
2727 {
2728 if (err == NULL || U_FAILURE(*err))
2729 {
2730 return;
2731 }
2732 if (len == NULL || errChars == NULL || converter == NULL)
2733 {
2734 *err = U_ILLEGAL_ARGUMENT_ERROR;
2735 return;
2736 }
2737 if (*len < converter->invalidUCharLength)
2738 {
2739 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2740 return;
2741 }
2742 if ((*len = converter->invalidUCharLength) > 0)
2743 {
2744 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
2745 }
2746 }
2747
2748 #define SIG_MAX_LEN 5
2749
2750 U_CAPI const char* U_EXPORT2
2751 ucnv_detectUnicodeSignature( const char* source,
2752 int32_t sourceLength,
2753 int32_t* signatureLength,
2754 UErrorCode* pErrorCode) {
2755 int32_t dummy;
2756
2757 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2758 * bytes we don't misdetect something
2759 */
2760 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2761 int i = 0;
2762
2763 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2764 return NULL;
2765 }
2766
2767 if(source == NULL || sourceLength < -1){
2768 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2769 return NULL;
2770 }
2771
2772 if(signatureLength == NULL) {
2773 signatureLength = &dummy;
2774 }
2775
2776 if(sourceLength==-1){
2777 sourceLength=(int32_t)uprv_strlen(source);
2778 }
2779
2780
2781 while(i<sourceLength&& i<SIG_MAX_LEN){
2782 start[i]=source[i];
2783 i++;
2784 }
2785
2786 if(start[0] == '\xFE' && start[1] == '\xFF') {
2787 *signatureLength=2;
2788 return "UTF-16BE";
2789 } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2790 if(start[2] == '\x00' && start[3] =='\x00') {
2791 *signatureLength=4;
2792 return "UTF-32LE";
2793 } else {
2794 *signatureLength=2;
2795 return "UTF-16LE";
2796 }
2797 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2798 *signatureLength=3;
2799 return "UTF-8";
2800 } else if(start[0] == '\x00' && start[1] == '\x00' &&
2801 start[2] == '\xFE' && start[3]=='\xFF') {
2802 *signatureLength=4;
2803 return "UTF-32BE";
2804 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2805 *signatureLength=3;
2806 return "SCSU";
2807 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2808 *signatureLength=3;
2809 return "BOCU-1";
2810 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2811 /*
2812 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2813 * depending on the second UTF-16 code unit.
2814 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2815 * if it occurs.
2816 *
2817 * So far we have +/v
2818 */
2819 if(start[3] == '\x38' && start[4] == '\x2D') {
2820 /* 5 bytes +/v8- */
2821 *signatureLength=5;
2822 return "UTF-7";
2823 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2824 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2825 *signatureLength=4;
2826 return "UTF-7";
2827 }
2828 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2829 *signatureLength=4;
2830 return "UTF-EBCDIC";
2831 }
2832
2833
2834 /* no known Unicode signature byte sequence recognized */
2835 *signatureLength=0;
2836 return NULL;
2837 }
2838
2839 U_CAPI int32_t U_EXPORT2
2840 ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status)
2841 {
2842 if(status == NULL || U_FAILURE(*status)){
2843 return -1;
2844 }
2845 if(cnv == NULL){
2846 *status = U_ILLEGAL_ARGUMENT_ERROR;
2847 return -1;
2848 }
2849
2850 if(cnv->preFromULength > 0){
2851 return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ;
2852 }else if(cnv->preFromULength < 0){
2853 return -cnv->preFromULength ;
2854 }else if(cnv->fromUChar32 > 0){
2855 return 1;
2856 }else if(cnv->preFromUFirstCP >0){
2857 return U16_LENGTH(cnv->preFromUFirstCP);
2858 }
2859 return 0;
2860
2861 }
2862
2863 U_CAPI int32_t U_EXPORT2
2864 ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){
2865
2866 if(status == NULL || U_FAILURE(*status)){
2867 return -1;
2868 }
2869 if(cnv == NULL){
2870 *status = U_ILLEGAL_ARGUMENT_ERROR;
2871 return -1;
2872 }
2873
2874 if(cnv->preToULength > 0){
2875 return cnv->preToULength ;
2876 }else if(cnv->preToULength < 0){
2877 return -cnv->preToULength;
2878 }else if(cnv->toULength > 0){
2879 return cnv->toULength;
2880 }
2881 return 0;
2882 }
2883 #endif
2884
2885 /*
2886 * Hey, Emacs, please set the following:
2887 *
2888 * Local Variables:
2889 * indent-tabs-mode: nil
2890 * End:
2891 *
2892 */