]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ucnv.c
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / common / ucnv.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1998-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * ucnv.c:
10 * Implements APIs for the ICU's codeset conversion library;
11 * mostly calls through internal functions;
12 * created by Bertrand A. Damiba
13 *
14 * Modification History:
15 *
16 * Date Name Description
17 * 04/04/99 helena Fixed internal header inclusion.
18 * 05/09/00 helena Added implementation to handle fallback mappings.
19 * 06/20/2000 helena OS/400 port changes; mostly typecast.
20 */
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_CONVERSION
25
26 #include "unicode/ustring.h"
27 #include "unicode/ucnv.h"
28 #include "unicode/ucnv_err.h"
29 #include "unicode/uset.h"
30 #include "putilimp.h"
31 #include "cmemory.h"
32 #include "cstring.h"
33 #include "uassert.h"
34 #include "utracimp.h"
35 #include "ustr_imp.h"
36 #include "ucnv_imp.h"
37 #include "ucnv_io.h"
38 #include "ucnv_cnv.h"
39 #include "ucnv_bld.h"
40
41 /* size of intermediate and preflighting buffers in ucnv_convert() */
42 #define CHUNK_SIZE 1024
43
44 typedef struct UAmbiguousConverter {
45 const char *name;
46 const UChar variant5c;
47 } UAmbiguousConverter;
48
49 static const UAmbiguousConverter ambiguousConverters[]={
50 { "ibm-942_P120-1999", 0xa5 },
51 { "ibm-943_P130-1999", 0xa5 },
52 { "ibm-897_P100-1995", 0xa5 },
53 { "ibm-33722_P120-1999", 0xa5 },
54 { "ibm-949_P110-1999", 0x20a9 },
55 { "ibm-1363_P110-1997", 0x20a9 },
56 { "ISO_2022,locale=ko,version=0", 0x20a9 }
57 };
58
59 U_CAPI const char* U_EXPORT2
60 ucnv_getDefaultName ()
61 {
62 return ucnv_io_getDefaultConverterName();
63 }
64
65 U_CAPI void U_EXPORT2
66 ucnv_setDefaultName (const char *converterName)
67 {
68 ucnv_io_setDefaultConverterName(converterName);
69 }
70 /*Calls through createConverter */
71 U_CAPI UConverter* U_EXPORT2
72 ucnv_open (const char *name,
73 UErrorCode * err)
74 {
75 UConverter *r;
76
77 if (err == NULL || U_FAILURE (*err)) {
78 return NULL;
79 }
80
81 r = ucnv_createConverter(NULL, name, err);
82 return r;
83 }
84
85 U_CAPI UConverter* U_EXPORT2
86 ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err)
87 {
88 return ucnv_createConverterFromPackage(packageName, converterName, err);
89 }
90
91 /*Extracts the UChar* to a char* and calls through createConverter */
92 U_CAPI UConverter* U_EXPORT2
93 ucnv_openU (const UChar * name,
94 UErrorCode * err)
95 {
96 char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
97
98 if (err == NULL || U_FAILURE(*err))
99 return NULL;
100 if (name == NULL)
101 return ucnv_open (NULL, err);
102 if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
103 {
104 *err = U_ILLEGAL_ARGUMENT_ERROR;
105 return NULL;
106 }
107 return ucnv_open(u_austrcpy(asciiName, name), err);
108 }
109
110 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
111 *through createConverter*/
112 U_CAPI UConverter* U_EXPORT2
113 ucnv_openCCSID (int32_t codepage,
114 UConverterPlatform platform,
115 UErrorCode * err)
116 {
117 char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
118 int32_t myNameLen;
119
120 if (err == NULL || U_FAILURE (*err))
121 return NULL;
122
123 /* ucnv_copyPlatformString could return "ibm-" or "cp" */
124 myNameLen = ucnv_copyPlatformString(myName, platform);
125 T_CString_integerToString(myName + myNameLen, codepage, 10);
126
127 return ucnv_createConverter(NULL, myName, err);
128 }
129
130 /* Creating a temporary stack-based object that can be used in one thread,
131 and created from a converter that is shared across threads.
132 */
133
134 U_CAPI UConverter* U_EXPORT2
135 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
136 {
137 UConverter *localConverter, *allocatedConverter;
138 int32_t bufferSizeNeeded;
139 char *stackBufferChars = (char *)stackBuffer;
140 UErrorCode cbErr;
141 UConverterToUnicodeArgs toUArgs = {
142 sizeof(UConverterToUnicodeArgs),
143 TRUE,
144 NULL,
145 NULL,
146 NULL,
147 NULL,
148 NULL,
149 NULL
150 };
151 UConverterFromUnicodeArgs fromUArgs = {
152 sizeof(UConverterFromUnicodeArgs),
153 TRUE,
154 NULL,
155 NULL,
156 NULL,
157 NULL,
158 NULL,
159 NULL
160 };
161
162 UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
163
164 if (status == NULL || U_FAILURE(*status)){
165 UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
166 return 0;
167 }
168
169 if (!pBufferSize || !cnv){
170 *status = U_ILLEGAL_ARGUMENT_ERROR;
171 UTRACE_EXIT_STATUS(*status);
172 return 0;
173 }
174
175 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
176 ucnv_getName(cnv, status), cnv, stackBuffer);
177
178 if (cnv->sharedData->impl->safeClone != NULL) {
179 /* call the custom safeClone function for sizing */
180 bufferSizeNeeded = 0;
181 cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
182 }
183 else
184 {
185 /* inherent sizing */
186 bufferSizeNeeded = sizeof(UConverter);
187 }
188
189 if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
190 *pBufferSize = bufferSizeNeeded;
191 UTRACE_EXIT_VALUE(bufferSizeNeeded);
192 return 0;
193 }
194
195
196 /* Pointers on 64-bit platforms need to be aligned
197 * on a 64-bit boundary in memory.
198 */
199 if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
200 int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
201 if(*pBufferSize > offsetUp) {
202 *pBufferSize -= offsetUp;
203 stackBufferChars += offsetUp;
204 } else {
205 /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
206 *pBufferSize = 1;
207 }
208 }
209
210 stackBuffer = (void *)stackBufferChars;
211
212 /* Now, see if we must allocate any memory */
213 if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
214 {
215 /* allocate one here...*/
216 localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
217
218 if(localConverter == NULL) {
219 *status = U_MEMORY_ALLOCATION_ERROR;
220 UTRACE_EXIT_STATUS(*status);
221 return NULL;
222 }
223
224 if (U_SUCCESS(*status)) {
225 *status = U_SAFECLONE_ALLOCATED_WARNING;
226 }
227
228 /* record the fact that memory was allocated */
229 *pBufferSize = bufferSizeNeeded;
230 } else {
231 /* just use the stack buffer */
232 localConverter = (UConverter*) stackBuffer;
233 allocatedConverter = NULL;
234 }
235
236 uprv_memset(localConverter, 0, bufferSizeNeeded);
237
238 /* Copy initial state */
239 uprv_memcpy(localConverter, cnv, sizeof(UConverter));
240 localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
241
242 /* now either call the safeclone fcn or not */
243 if (cnv->sharedData->impl->safeClone != NULL) {
244 /* call the custom safeClone function */
245 localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
246 }
247
248 if(localConverter==NULL || U_FAILURE(*status)) {
249 uprv_free(allocatedConverter);
250 UTRACE_EXIT_STATUS(*status);
251 return NULL;
252 }
253
254 /* increment refcount of shared data if needed */
255 /*
256 Checking whether it's an algorithic converter is okay
257 in multithreaded applications because the value never changes.
258 Don't check referenceCounter for any other value.
259 */
260 if (cnv->sharedData->referenceCounter != ~0) {
261 ucnv_incrementRefCount(cnv->sharedData);
262 }
263
264 if(localConverter == (UConverter*)stackBuffer) {
265 /* we're using user provided data - set to not destroy */
266 localConverter->isCopyLocal = TRUE;
267 }
268
269 /* allow callback functions to handle any memory allocation */
270 toUArgs.converter = fromUArgs.converter = localConverter;
271 cbErr = U_ZERO_ERROR;
272 cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
273 cbErr = U_ZERO_ERROR;
274 cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
275
276 UTRACE_EXIT_PTR_STATUS(localConverter, *status);
277 return localConverter;
278 }
279
280
281
282 /*Decreases the reference counter in the shared immutable section of the object
283 *and frees the mutable part*/
284
285 U_CAPI void U_EXPORT2
286 ucnv_close (UConverter * converter)
287 {
288 /* first, notify the callback functions that the converter is closed */
289 UConverterToUnicodeArgs toUArgs = {
290 sizeof(UConverterToUnicodeArgs),
291 TRUE,
292 NULL,
293 NULL,
294 NULL,
295 NULL,
296 NULL,
297 NULL
298 };
299 UConverterFromUnicodeArgs fromUArgs = {
300 sizeof(UConverterFromUnicodeArgs),
301 TRUE,
302 NULL,
303 NULL,
304 NULL,
305 NULL,
306 NULL,
307 NULL
308 };
309 UErrorCode errorCode = U_ZERO_ERROR;
310
311 UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
312
313 if (converter == NULL)
314 {
315 UTRACE_EXIT();
316 return;
317 }
318
319 UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
320 ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
321
322 toUArgs.converter = fromUArgs.converter = converter;
323
324 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
325 errorCode = U_ZERO_ERROR;
326 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
327
328 if (converter->sharedData->impl->close != NULL) {
329 converter->sharedData->impl->close(converter);
330 }
331
332 /*
333 Checking whether it's an algorithic converter is okay
334 in multithreaded applications because the value never changes.
335 Don't check referenceCounter for any other value.
336 */
337 if (converter->sharedData->referenceCounter != ~0) {
338 ucnv_unloadSharedDataIfReady(converter->sharedData);
339 }
340
341 if(!converter->isCopyLocal){
342 uprv_free (converter);
343 }
344
345 UTRACE_EXIT();
346 }
347
348 /*returns a single Name from the list, will return NULL if out of bounds
349 */
350 U_CAPI const char* U_EXPORT2
351 ucnv_getAvailableName (int32_t n)
352 {
353 if (0 <= n && n <= 0xffff) {
354 UErrorCode err = U_ZERO_ERROR;
355 const char *name = ucnv_io_getAvailableConverter((uint16_t)n, &err);
356 if (U_SUCCESS(err)) {
357 return name;
358 }
359 }
360 return NULL;
361 }
362
363 U_CAPI int32_t U_EXPORT2
364 ucnv_countAvailable ()
365 {
366 UErrorCode err = U_ZERO_ERROR;
367 return ucnv_io_countAvailableConverters(&err);
368 }
369
370 U_CAPI uint16_t U_EXPORT2
371 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
372 {
373 return ucnv_io_countAliases(alias, pErrorCode);
374 }
375
376
377 U_CAPI const char* U_EXPORT2
378 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
379 {
380 return ucnv_io_getAlias(alias, n, pErrorCode);
381 }
382
383 U_CAPI void U_EXPORT2
384 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
385 {
386 ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
387 }
388
389 U_CAPI uint16_t U_EXPORT2
390 ucnv_countStandards(void)
391 {
392 UErrorCode err = U_ZERO_ERROR;
393 return ucnv_io_countStandards(&err);
394 }
395
396 U_CAPI void U_EXPORT2
397 ucnv_getSubstChars (const UConverter * converter,
398 char *mySubChar,
399 int8_t * len,
400 UErrorCode * err)
401 {
402 if (U_FAILURE (*err))
403 return;
404
405 if (*len < converter->subCharLen) /*not enough space in subChars */
406 {
407 *err = U_INDEX_OUTOFBOUNDS_ERROR;
408 return;
409 }
410
411 uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
412 *len = converter->subCharLen; /*store # of bytes copied to buffer */
413 uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen); /*fills in the subchars */
414 *len = converter->subCharLen; /*store # of bytes copied to buffer */
415 }
416
417 U_CAPI void U_EXPORT2
418 ucnv_setSubstChars (UConverter * converter,
419 const char *mySubChar,
420 int8_t len,
421 UErrorCode * err)
422 {
423 if (U_FAILURE (*err))
424 return;
425
426 /*Makes sure that the subChar is within the codepages char length boundaries */
427 if ((len > converter->sharedData->staticData->maxBytesPerChar)
428 || (len < converter->sharedData->staticData->minBytesPerChar))
429 {
430 *err = U_ILLEGAL_ARGUMENT_ERROR;
431 return;
432 }
433
434 uprv_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */
435 converter->subCharLen = len; /*sets the new len */
436
437 /*
438 * There is currently (2001Feb) no separate API to set/get subChar1.
439 * In order to always have subChar written after it is explicitly set,
440 * we set subChar1 to 0.
441 */
442 converter->subChar1 = 0;
443
444 return;
445 }
446
447 /*resets the internal states of a converter
448 *goal : have the same behaviour than a freshly created converter
449 */
450 static void _reset(UConverter *converter, UConverterResetChoice choice,
451 UBool callCallback) {
452 if(converter == NULL) {
453 return;
454 }
455
456 if(callCallback) {
457 /* first, notify the callback functions that the converter is reset */
458 UConverterToUnicodeArgs toUArgs = {
459 sizeof(UConverterToUnicodeArgs),
460 TRUE,
461 NULL,
462 NULL,
463 NULL,
464 NULL,
465 NULL,
466 NULL
467 };
468 UConverterFromUnicodeArgs fromUArgs = {
469 sizeof(UConverterFromUnicodeArgs),
470 TRUE,
471 NULL,
472 NULL,
473 NULL,
474 NULL,
475 NULL,
476 NULL
477 };
478 UErrorCode errorCode;
479
480 toUArgs.converter = fromUArgs.converter = converter;
481 if(choice<=UCNV_RESET_TO_UNICODE) {
482 errorCode = U_ZERO_ERROR;
483 converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
484 }
485 if(choice!=UCNV_RESET_TO_UNICODE) {
486 errorCode = U_ZERO_ERROR;
487 converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
488 }
489 }
490
491 /* now reset the converter itself */
492 if(choice<=UCNV_RESET_TO_UNICODE) {
493 converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
494 converter->mode = 0;
495 converter->toULength = 0;
496 converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
497 converter->preToULength = 0;
498 }
499 if(choice!=UCNV_RESET_TO_UNICODE) {
500 converter->fromUnicodeStatus = 0;
501 converter->fromUChar32 = 0;
502 converter->invalidUCharLength = converter->charErrorBufferLength = 0;
503 converter->preFromUFirstCP = U_SENTINEL;
504 converter->preFromULength = 0;
505 }
506
507 if (converter->sharedData->impl->reset != NULL) {
508 /* call the custom reset function */
509 converter->sharedData->impl->reset(converter, choice);
510 }
511 }
512
513 U_CAPI void U_EXPORT2
514 ucnv_reset(UConverter *converter)
515 {
516 _reset(converter, UCNV_RESET_BOTH, TRUE);
517 }
518
519 U_CAPI void U_EXPORT2
520 ucnv_resetToUnicode(UConverter *converter)
521 {
522 _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
523 }
524
525 U_CAPI void U_EXPORT2
526 ucnv_resetFromUnicode(UConverter *converter)
527 {
528 _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
529 }
530
531 U_CAPI int8_t U_EXPORT2
532 ucnv_getMaxCharSize (const UConverter * converter)
533 {
534 return converter->maxBytesPerUChar;
535 }
536
537
538 U_CAPI int8_t U_EXPORT2
539 ucnv_getMinCharSize (const UConverter * converter)
540 {
541 return converter->sharedData->staticData->minBytesPerChar;
542 }
543
544 U_CAPI const char* U_EXPORT2
545 ucnv_getName (const UConverter * converter, UErrorCode * err)
546
547 {
548 if (U_FAILURE (*err))
549 return NULL;
550 if(converter->sharedData->impl->getName){
551 const char* temp= converter->sharedData->impl->getName(converter);
552 if(temp)
553 return temp;
554 }
555 return converter->sharedData->staticData->name;
556 }
557
558 U_CAPI int32_t U_EXPORT2
559 ucnv_getCCSID(const UConverter * converter,
560 UErrorCode * err)
561 {
562 int32_t ccsid;
563 if (U_FAILURE (*err))
564 return -1;
565
566 ccsid = converter->sharedData->staticData->codepage;
567 if (ccsid == 0) {
568 /* Rare case. This is for cases like gb18030,
569 which doesn't have an IBM cannonical name, but does have an IBM alias. */
570 const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
571 if (U_SUCCESS(*err) && standardName) {
572 const char *ccsidStr = uprv_strchr(standardName, '-');
573 if (ccsidStr) {
574 ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */
575 }
576 }
577 }
578 return ccsid;
579 }
580
581
582 U_CAPI UConverterPlatform U_EXPORT2
583 ucnv_getPlatform (const UConverter * converter,
584 UErrorCode * err)
585 {
586 if (U_FAILURE (*err))
587 return UCNV_UNKNOWN;
588
589 return (UConverterPlatform)converter->sharedData->staticData->platform;
590 }
591
592 U_CAPI void U_EXPORT2
593 ucnv_getToUCallBack (const UConverter * converter,
594 UConverterToUCallback *action,
595 const void **context)
596 {
597 *action = converter->fromCharErrorBehaviour;
598 *context = converter->toUContext;
599 }
600
601 U_CAPI void U_EXPORT2
602 ucnv_getFromUCallBack (const UConverter * converter,
603 UConverterFromUCallback *action,
604 const void **context)
605 {
606 *action = converter->fromUCharErrorBehaviour;
607 *context = converter->fromUContext;
608 }
609
610 U_CAPI void U_EXPORT2
611 ucnv_setToUCallBack (UConverter * converter,
612 UConverterToUCallback newAction,
613 const void* newContext,
614 UConverterToUCallback *oldAction,
615 const void** oldContext,
616 UErrorCode * err)
617 {
618 if (U_FAILURE (*err))
619 return;
620 if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
621 converter->fromCharErrorBehaviour = newAction;
622 if (oldContext) *oldContext = converter->toUContext;
623 converter->toUContext = newContext;
624 }
625
626 U_CAPI void U_EXPORT2
627 ucnv_setFromUCallBack (UConverter * converter,
628 UConverterFromUCallback newAction,
629 const void* newContext,
630 UConverterFromUCallback *oldAction,
631 const void** oldContext,
632 UErrorCode * err)
633 {
634 if (U_FAILURE (*err))
635 return;
636 if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
637 converter->fromUCharErrorBehaviour = newAction;
638 if (oldContext) *oldContext = converter->fromUContext;
639 converter->fromUContext = newContext;
640 }
641
642 static void
643 _updateOffsets(int32_t *offsets, int32_t length,
644 int32_t sourceIndex, int32_t errorInputLength) {
645 int32_t *limit;
646 int32_t delta, offset;
647
648 if(sourceIndex>=0) {
649 /*
650 * adjust each offset by adding the previous sourceIndex
651 * minus the length of the input sequence that caused an
652 * error, if any
653 */
654 delta=sourceIndex-errorInputLength;
655 } else {
656 /*
657 * set each offset to -1 because this conversion function
658 * does not handle offsets
659 */
660 delta=-1;
661 }
662
663 limit=offsets+length;
664 if(delta==0) {
665 /* most common case, nothing to do */
666 } else if(delta>0) {
667 /* add the delta to each offset (but not if the offset is <0) */
668 while(offsets<limit) {
669 offset=*offsets;
670 if(offset>=0) {
671 *offsets=offset+delta;
672 }
673 ++offsets;
674 }
675 } else /* delta<0 */ {
676 /*
677 * set each offset to -1 because this conversion function
678 * does not handle offsets
679 * or the error input sequence started in a previous buffer
680 */
681 while(offsets<limit) {
682 *offsets++=-1;
683 }
684 }
685 }
686
687 /* ucnv_fromUnicode --------------------------------------------------------- */
688
689 /*
690 * Implementation note for m:n conversions
691 *
692 * While collecting source units to find the longest match for m:n conversion,
693 * some source units may need to be stored for a partial match.
694 * When a second buffer does not yield a match on all of the previously stored
695 * source units, then they must be "replayed", i.e., fed back into the converter.
696 *
697 * The code relies on the fact that replaying will not nest -
698 * converting a replay buffer will not result in a replay.
699 * This is because a replay is necessary only after the _continuation_ of a
700 * partial match failed, but a replay buffer is converted as a whole.
701 * It may result in some of its units being stored again for a partial match,
702 * but there will not be a continuation _during_ the replay which could fail.
703 *
704 * It is conceivable that a callback function could call the converter
705 * recursively in a way that causes another replay to be stored, but that
706 * would be an error in the callback function.
707 * Such violations will cause assertion failures in a debug build,
708 * and wrong output, but they will not cause a crash.
709 */
710
711 static void
712 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
713 UConverterFromUnicode fromUnicode;
714 UConverter *cnv;
715 const UChar *s;
716 char *t;
717 int32_t *offsets;
718 int32_t sourceIndex;
719 int32_t errorInputLength;
720 UBool converterSawEndOfInput, calledCallback;
721
722 /* variables for m:n conversion */
723 UChar replay[UCNV_EXT_MAX_UCHARS];
724 const UChar *realSource, *realSourceLimit;
725 int32_t realSourceIndex;
726 UBool realFlush;
727
728 cnv=pArgs->converter;
729 s=pArgs->source;
730 t=pArgs->target;
731 offsets=pArgs->offsets;
732
733 /* get the converter implementation function */
734 sourceIndex=0;
735 if(offsets==NULL) {
736 fromUnicode=cnv->sharedData->impl->fromUnicode;
737 } else {
738 fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
739 if(fromUnicode==NULL) {
740 /* there is no WithOffsets implementation */
741 fromUnicode=cnv->sharedData->impl->fromUnicode;
742 /* we will write -1 for each offset */
743 sourceIndex=-1;
744 }
745 }
746
747 if(cnv->preFromULength>=0) {
748 /* normal mode */
749 realSource=NULL;
750
751 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
752 realSourceLimit=NULL;
753 realFlush=FALSE;
754 realSourceIndex=0;
755 } else {
756 /*
757 * Previous m:n conversion stored source units from a partial match
758 * and failed to consume all of them.
759 * We need to "replay" them from a temporary buffer and convert them first.
760 */
761 realSource=pArgs->source;
762 realSourceLimit=pArgs->sourceLimit;
763 realFlush=pArgs->flush;
764 realSourceIndex=sourceIndex;
765
766 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
767 pArgs->source=replay;
768 pArgs->sourceLimit=replay-cnv->preFromULength;
769 pArgs->flush=FALSE;
770 sourceIndex=-1;
771
772 cnv->preFromULength=0;
773 }
774
775 /*
776 * loop for conversion and error handling
777 *
778 * loop {
779 * convert
780 * loop {
781 * update offsets
782 * handle end of input
783 * handle errors/call callback
784 * }
785 * }
786 */
787 for(;;) {
788 /* convert */
789 fromUnicode(pArgs, err);
790
791 /*
792 * set a flag for whether the converter
793 * successfully processed the end of the input
794 *
795 * need not check cnv->preFromULength==0 because a replay (<0) will cause
796 * s<sourceLimit before converterSawEndOfInput is checked
797 */
798 converterSawEndOfInput=
799 (UBool)(U_SUCCESS(*err) &&
800 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
801 cnv->fromUChar32==0);
802
803 /* no callback called yet for this iteration */
804 calledCallback=FALSE;
805
806 /* no sourceIndex adjustment for conversion, only for callback output */
807 errorInputLength=0;
808
809 /*
810 * loop for offsets and error handling
811 *
812 * iterates at most 3 times:
813 * 1. to clean up after the conversion function
814 * 2. after the callback
815 * 3. after the callback again if there was truncated input
816 */
817 for(;;) {
818 /* update offsets if we write any */
819 if(offsets!=NULL) {
820 int32_t length=(int32_t)(pArgs->target-t);
821 if(length>0) {
822 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
823
824 /*
825 * if a converter handles offsets and updates the offsets
826 * pointer at the end, then pArgs->offset should not change
827 * here;
828 * however, some converters do not handle offsets at all
829 * (sourceIndex<0) or may not update the offsets pointer
830 */
831 pArgs->offsets=offsets+=length;
832 }
833
834 if(sourceIndex>=0) {
835 sourceIndex+=(int32_t)(pArgs->source-s);
836 }
837 }
838
839 if(cnv->preFromULength<0) {
840 /*
841 * switch the source to new replay units (cannot occur while replaying)
842 * after offset handling and before end-of-input and callback handling
843 */
844 if(realSource==NULL) {
845 realSource=pArgs->source;
846 realSourceLimit=pArgs->sourceLimit;
847 realFlush=pArgs->flush;
848 realSourceIndex=sourceIndex;
849
850 uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
851 pArgs->source=replay;
852 pArgs->sourceLimit=replay-cnv->preFromULength;
853 pArgs->flush=FALSE;
854 if((sourceIndex+=cnv->preFromULength)<0) {
855 sourceIndex=-1;
856 }
857
858 cnv->preFromULength=0;
859 } else {
860 /* see implementation note before _fromUnicodeWithCallback() */
861 U_ASSERT(realSource==NULL);
862 *err=U_INTERNAL_PROGRAM_ERROR;
863 }
864 }
865
866 /* update pointers */
867 s=pArgs->source;
868 t=pArgs->target;
869
870 if(U_SUCCESS(*err)) {
871 if(s<pArgs->sourceLimit) {
872 /*
873 * continue with the conversion loop while there is still input left
874 * (continue converting by breaking out of only the inner loop)
875 */
876 break;
877 } else if(realSource!=NULL) {
878 /* switch back from replaying to the real source and continue */
879 pArgs->source=realSource;
880 pArgs->sourceLimit=realSourceLimit;
881 pArgs->flush=realFlush;
882 sourceIndex=realSourceIndex;
883
884 realSource=NULL;
885 break;
886 } else if(pArgs->flush && cnv->fromUChar32!=0) {
887 /*
888 * the entire input stream is consumed
889 * and there is a partial, truncated input sequence left
890 */
891
892 /* inject an error and continue with callback handling */
893 *err=U_TRUNCATED_CHAR_FOUND;
894 calledCallback=FALSE; /* new error condition */
895 } else {
896 /* input consumed */
897 if(pArgs->flush) {
898 /*
899 * return to the conversion loop once more if the flush
900 * flag is set and the conversion function has not
901 * successfully processed the end of the input yet
902 *
903 * (continue converting by breaking out of only the inner loop)
904 */
905 if(!converterSawEndOfInput) {
906 break;
907 }
908
909 /* reset the converter without calling the callback function */
910 _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
911 }
912
913 /* done successfully */
914 return;
915 }
916 }
917
918 /* U_FAILURE(*err) */
919 {
920 UErrorCode e;
921
922 if( calledCallback ||
923 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
924 (e!=U_INVALID_CHAR_FOUND &&
925 e!=U_ILLEGAL_CHAR_FOUND &&
926 e!=U_TRUNCATED_CHAR_FOUND)
927 ) {
928 /*
929 * the callback did not or cannot resolve the error:
930 * set output pointers and return
931 *
932 * the check for buffer overflow is redundant but it is
933 * a high-runner case and hopefully documents the intent
934 * well
935 *
936 * if we were replaying, then the replay buffer must be
937 * copied back into the UConverter
938 * and the real arguments must be restored
939 */
940 if(realSource!=NULL) {
941 int32_t length;
942
943 U_ASSERT(cnv->preFromULength==0);
944
945 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
946 if(length>0) {
947 uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
948 cnv->preFromULength=(int8_t)-length;
949 }
950
951 pArgs->source=realSource;
952 pArgs->sourceLimit=realSourceLimit;
953 pArgs->flush=realFlush;
954 }
955
956 return;
957 }
958 }
959
960 /* callback handling */
961 {
962 UChar32 codePoint;
963
964 /* get and write the code point */
965 codePoint=cnv->fromUChar32;
966 errorInputLength=0;
967 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
968 cnv->invalidUCharLength=(int8_t)errorInputLength;
969
970 /* set the converter state to deal with the next character */
971 cnv->fromUChar32=0;
972
973 /* call the callback function */
974 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
975 cnv->invalidUCharBuffer, errorInputLength, codePoint,
976 *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
977 err);
978 }
979
980 /*
981 * loop back to the offset handling
982 *
983 * this flag will indicate after offset handling
984 * that a callback was called;
985 * if the callback did not resolve the error, then we return
986 */
987 calledCallback=TRUE;
988 }
989 }
990 }
991
992 U_CAPI void U_EXPORT2
993 ucnv_fromUnicode(UConverter *cnv,
994 char **target, const char *targetLimit,
995 const UChar **source, const UChar *sourceLimit,
996 int32_t *offsets,
997 UBool flush,
998 UErrorCode *err) {
999 UConverterFromUnicodeArgs args;
1000 const UChar *s;
1001 char *t;
1002
1003 /* check parameters */
1004 if(err==NULL || U_FAILURE(*err)) {
1005 return;
1006 }
1007
1008 if(cnv==NULL || target==NULL || source==NULL) {
1009 *err=U_ILLEGAL_ARGUMENT_ERROR;
1010 return;
1011 }
1012
1013 s=*source;
1014 t=*target;
1015 if(sourceLimit<s || targetLimit<t) {
1016 *err=U_ILLEGAL_ARGUMENT_ERROR;
1017 return;
1018 }
1019
1020 /*
1021 * Make sure that the buffer sizes do not exceed the number range for
1022 * int32_t because some functions use the size (in units or bytes)
1023 * rather than comparing pointers, and because offsets are int32_t values.
1024 *
1025 * size_t is guaranteed to be unsigned and large enough for the job.
1026 *
1027 * Return with an error instead of adjusting the limits because we would
1028 * not be able to maintain the semantics that either the source must be
1029 * consumed or the target filled (unless an error occurs).
1030 * An adjustment would be targetLimit=t+0x7fffffff; for example.
1031 */
1032 if(
1033 ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
1034 ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
1035 ) {
1036 *err=U_ILLEGAL_ARGUMENT_ERROR;
1037 return;
1038 }
1039
1040 /* flush the target overflow buffer */
1041 if(cnv->charErrorBufferLength>0) {
1042 char *overflow;
1043 int32_t i, length;
1044
1045 overflow=(char *)cnv->charErrorBuffer;
1046 length=cnv->charErrorBufferLength;
1047 i=0;
1048 do {
1049 if(t==targetLimit) {
1050 /* the overflow buffer contains too much, keep the rest */
1051 int32_t j=0;
1052
1053 do {
1054 overflow[j++]=overflow[i++];
1055 } while(i<length);
1056
1057 cnv->charErrorBufferLength=(int8_t)j;
1058 *target=t;
1059 *err=U_BUFFER_OVERFLOW_ERROR;
1060 return;
1061 }
1062
1063 /* copy the overflow contents to the target */
1064 *t++=overflow[i++];
1065 if(offsets!=NULL) {
1066 *offsets++=-1; /* no source index available for old output */
1067 }
1068 } while(i<length);
1069
1070 /* the overflow buffer is completely copied to the target */
1071 cnv->charErrorBufferLength=0;
1072 }
1073
1074 if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
1075 /* the overflow buffer is emptied and there is no new input: we are done */
1076 *target=t;
1077 return;
1078 }
1079
1080 /*
1081 * Do not simply return with a buffer overflow error if
1082 * !flush && t==targetLimit
1083 * because it is possible that the source will not generate any output.
1084 * For example, the skip callback may be called;
1085 * it does not output anything.
1086 */
1087
1088 /* prepare the converter arguments */
1089 args.converter=cnv;
1090 args.flush=flush;
1091 args.offsets=offsets;
1092 args.source=s;
1093 args.sourceLimit=sourceLimit;
1094 args.target=t;
1095 args.targetLimit=targetLimit;
1096 args.size=sizeof(args);
1097
1098 _fromUnicodeWithCallback(&args, err);
1099
1100 *source=args.source;
1101 *target=args.target;
1102 }
1103
1104 /* ucnv_toUnicode() --------------------------------------------------------- */
1105
1106 static void
1107 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1108 UConverterToUnicode toUnicode;
1109 UConverter *cnv;
1110 const char *s;
1111 UChar *t;
1112 int32_t *offsets;
1113 int32_t sourceIndex;
1114 int32_t errorInputLength;
1115 UBool converterSawEndOfInput, calledCallback;
1116
1117 /* variables for m:n conversion */
1118 char replay[UCNV_EXT_MAX_BYTES];
1119 const char *realSource, *realSourceLimit;
1120 int32_t realSourceIndex;
1121 UBool realFlush;
1122
1123 cnv=pArgs->converter;
1124 s=pArgs->source;
1125 t=pArgs->target;
1126 offsets=pArgs->offsets;
1127
1128 /* get the converter implementation function */
1129 sourceIndex=0;
1130 if(offsets==NULL) {
1131 toUnicode=cnv->sharedData->impl->toUnicode;
1132 } else {
1133 toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1134 if(toUnicode==NULL) {
1135 /* there is no WithOffsets implementation */
1136 toUnicode=cnv->sharedData->impl->toUnicode;
1137 /* we will write -1 for each offset */
1138 sourceIndex=-1;
1139 }
1140 }
1141
1142 if(cnv->preToULength>=0) {
1143 /* normal mode */
1144 realSource=NULL;
1145
1146 /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1147 realSourceLimit=NULL;
1148 realFlush=FALSE;
1149 realSourceIndex=0;
1150 } else {
1151 /*
1152 * Previous m:n conversion stored source units from a partial match
1153 * and failed to consume all of them.
1154 * We need to "replay" them from a temporary buffer and convert them first.
1155 */
1156 realSource=pArgs->source;
1157 realSourceLimit=pArgs->sourceLimit;
1158 realFlush=pArgs->flush;
1159 realSourceIndex=sourceIndex;
1160
1161 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1162 pArgs->source=replay;
1163 pArgs->sourceLimit=replay-cnv->preToULength;
1164 pArgs->flush=FALSE;
1165 sourceIndex=-1;
1166
1167 cnv->preToULength=0;
1168 }
1169
1170 /*
1171 * loop for conversion and error handling
1172 *
1173 * loop {
1174 * convert
1175 * loop {
1176 * update offsets
1177 * handle end of input
1178 * handle errors/call callback
1179 * }
1180 * }
1181 */
1182 for(;;) {
1183 if(U_SUCCESS(*err)) {
1184 /* convert */
1185 toUnicode(pArgs, err);
1186
1187 /*
1188 * set a flag for whether the converter
1189 * successfully processed the end of the input
1190 *
1191 * need not check cnv->preToULength==0 because a replay (<0) will cause
1192 * s<sourceLimit before converterSawEndOfInput is checked
1193 */
1194 converterSawEndOfInput=
1195 (UBool)(U_SUCCESS(*err) &&
1196 pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1197 cnv->toULength==0);
1198 } else {
1199 /* handle error from getNextUChar() */
1200 converterSawEndOfInput=FALSE;
1201 }
1202
1203 /* no callback called yet for this iteration */
1204 calledCallback=FALSE;
1205
1206 /* no sourceIndex adjustment for conversion, only for callback output */
1207 errorInputLength=0;
1208
1209 /*
1210 * loop for offsets and error handling
1211 *
1212 * iterates at most 3 times:
1213 * 1. to clean up after the conversion function
1214 * 2. after the callback
1215 * 3. after the callback again if there was truncated input
1216 */
1217 for(;;) {
1218 /* update offsets if we write any */
1219 if(offsets!=NULL) {
1220 int32_t length=(int32_t)(pArgs->target-t);
1221 if(length>0) {
1222 _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1223
1224 /*
1225 * if a converter handles offsets and updates the offsets
1226 * pointer at the end, then pArgs->offset should not change
1227 * here;
1228 * however, some converters do not handle offsets at all
1229 * (sourceIndex<0) or may not update the offsets pointer
1230 */
1231 pArgs->offsets=offsets+=length;
1232 }
1233
1234 if(sourceIndex>=0) {
1235 sourceIndex+=(int32_t)(pArgs->source-s);
1236 }
1237 }
1238
1239 if(cnv->preToULength<0) {
1240 /*
1241 * switch the source to new replay units (cannot occur while replaying)
1242 * after offset handling and before end-of-input and callback handling
1243 */
1244 if(realSource==NULL) {
1245 realSource=pArgs->source;
1246 realSourceLimit=pArgs->sourceLimit;
1247 realFlush=pArgs->flush;
1248 realSourceIndex=sourceIndex;
1249
1250 uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1251 pArgs->source=replay;
1252 pArgs->sourceLimit=replay-cnv->preToULength;
1253 pArgs->flush=FALSE;
1254 if((sourceIndex+=cnv->preToULength)<0) {
1255 sourceIndex=-1;
1256 }
1257
1258 cnv->preToULength=0;
1259 } else {
1260 /* see implementation note before _fromUnicodeWithCallback() */
1261 U_ASSERT(realSource==NULL);
1262 *err=U_INTERNAL_PROGRAM_ERROR;
1263 }
1264 }
1265
1266 /* update pointers */
1267 s=pArgs->source;
1268 t=pArgs->target;
1269
1270 if(U_SUCCESS(*err)) {
1271 if(s<pArgs->sourceLimit) {
1272 /*
1273 * continue with the conversion loop while there is still input left
1274 * (continue converting by breaking out of only the inner loop)
1275 */
1276 break;
1277 } else if(realSource!=NULL) {
1278 /* switch back from replaying to the real source and continue */
1279 pArgs->source=realSource;
1280 pArgs->sourceLimit=realSourceLimit;
1281 pArgs->flush=realFlush;
1282 sourceIndex=realSourceIndex;
1283
1284 realSource=NULL;
1285 break;
1286 } else if(pArgs->flush && cnv->toULength>0) {
1287 /*
1288 * the entire input stream is consumed
1289 * and there is a partial, truncated input sequence left
1290 */
1291
1292 /* inject an error and continue with callback handling */
1293 *err=U_TRUNCATED_CHAR_FOUND;
1294 calledCallback=FALSE; /* new error condition */
1295 } else {
1296 /* input consumed */
1297 if(pArgs->flush) {
1298 /*
1299 * return to the conversion loop once more if the flush
1300 * flag is set and the conversion function has not
1301 * successfully processed the end of the input yet
1302 *
1303 * (continue converting by breaking out of only the inner loop)
1304 */
1305 if(!converterSawEndOfInput) {
1306 break;
1307 }
1308
1309 /* reset the converter without calling the callback function */
1310 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1311 }
1312
1313 /* done successfully */
1314 return;
1315 }
1316 }
1317
1318 /* U_FAILURE(*err) */
1319 {
1320 UErrorCode e;
1321
1322 if( calledCallback ||
1323 (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1324 (e!=U_INVALID_CHAR_FOUND &&
1325 e!=U_ILLEGAL_CHAR_FOUND &&
1326 e!=U_TRUNCATED_CHAR_FOUND &&
1327 e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
1328 e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
1329 ) {
1330 /*
1331 * the callback did not or cannot resolve the error:
1332 * set output pointers and return
1333 *
1334 * the check for buffer overflow is redundant but it is
1335 * a high-runner case and hopefully documents the intent
1336 * well
1337 *
1338 * if we were replaying, then the replay buffer must be
1339 * copied back into the UConverter
1340 * and the real arguments must be restored
1341 */
1342 if(realSource!=NULL) {
1343 int32_t length;
1344
1345 U_ASSERT(cnv->preToULength==0);
1346
1347 length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1348 if(length>0) {
1349 uprv_memcpy(cnv->preToU, pArgs->source, length);
1350 cnv->preToULength=(int8_t)-length;
1351 }
1352
1353 pArgs->source=realSource;
1354 pArgs->sourceLimit=realSourceLimit;
1355 pArgs->flush=realFlush;
1356 }
1357
1358 return;
1359 }
1360 }
1361
1362 /* copy toUBytes[] to invalidCharBuffer[] */
1363 errorInputLength=cnv->invalidCharLength=cnv->toULength;
1364 if(errorInputLength>0) {
1365 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1366 }
1367
1368 /* set the converter state to deal with the next character */
1369 cnv->toULength=0;
1370
1371 /* call the callback function */
1372 cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1373 cnv->invalidCharBuffer, errorInputLength,
1374 (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ?
1375 UCNV_UNASSIGNED : UCNV_ILLEGAL,
1376 err);
1377
1378 /*
1379 * loop back to the offset handling
1380 *
1381 * this flag will indicate after offset handling
1382 * that a callback was called;
1383 * if the callback did not resolve the error, then we return
1384 */
1385 calledCallback=TRUE;
1386 }
1387 }
1388 }
1389
1390 U_CAPI void U_EXPORT2
1391 ucnv_toUnicode(UConverter *cnv,
1392 UChar **target, const UChar *targetLimit,
1393 const char **source, const char *sourceLimit,
1394 int32_t *offsets,
1395 UBool flush,
1396 UErrorCode *err) {
1397 UConverterToUnicodeArgs args;
1398 const char *s;
1399 UChar *t;
1400
1401 /* check parameters */
1402 if(err==NULL || U_FAILURE(*err)) {
1403 return;
1404 }
1405
1406 if(cnv==NULL || target==NULL || source==NULL) {
1407 *err=U_ILLEGAL_ARGUMENT_ERROR;
1408 return;
1409 }
1410
1411 s=*source;
1412 t=*target;
1413 if(sourceLimit<s || targetLimit<t) {
1414 *err=U_ILLEGAL_ARGUMENT_ERROR;
1415 return;
1416 }
1417
1418 /*
1419 * Make sure that the buffer sizes do not exceed the number range for
1420 * int32_t because some functions use the size (in units or bytes)
1421 * rather than comparing pointers, and because offsets are int32_t values.
1422 *
1423 * size_t is guaranteed to be unsigned and large enough for the job.
1424 *
1425 * Return with an error instead of adjusting the limits because we would
1426 * not be able to maintain the semantics that either the source must be
1427 * consumed or the target filled (unless an error occurs).
1428 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1429 */
1430 if(
1431 ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
1432 ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t)
1433 ) {
1434 *err=U_ILLEGAL_ARGUMENT_ERROR;
1435 return;
1436 }
1437
1438 /* flush the target overflow buffer */
1439 if(cnv->UCharErrorBufferLength>0) {
1440 UChar *overflow;
1441 int32_t i, length;
1442
1443 overflow=cnv->UCharErrorBuffer;
1444 length=cnv->UCharErrorBufferLength;
1445 i=0;
1446 do {
1447 if(t==targetLimit) {
1448 /* the overflow buffer contains too much, keep the rest */
1449 int32_t j=0;
1450
1451 do {
1452 overflow[j++]=overflow[i++];
1453 } while(i<length);
1454
1455 cnv->UCharErrorBufferLength=(int8_t)j;
1456 *target=t;
1457 *err=U_BUFFER_OVERFLOW_ERROR;
1458 return;
1459 }
1460
1461 /* copy the overflow contents to the target */
1462 *t++=overflow[i++];
1463 if(offsets!=NULL) {
1464 *offsets++=-1; /* no source index available for old output */
1465 }
1466 } while(i<length);
1467
1468 /* the overflow buffer is completely copied to the target */
1469 cnv->UCharErrorBufferLength=0;
1470 }
1471
1472 if(!flush && s==sourceLimit && cnv->preToULength>=0) {
1473 /* the overflow buffer is emptied and there is no new input: we are done */
1474 *target=t;
1475 return;
1476 }
1477
1478 /*
1479 * Do not simply return with a buffer overflow error if
1480 * !flush && t==targetLimit
1481 * because it is possible that the source will not generate any output.
1482 * For example, the skip callback may be called;
1483 * it does not output anything.
1484 */
1485
1486 /* prepare the converter arguments */
1487 args.converter=cnv;
1488 args.flush=flush;
1489 args.offsets=offsets;
1490 args.source=s;
1491 args.sourceLimit=sourceLimit;
1492 args.target=t;
1493 args.targetLimit=targetLimit;
1494 args.size=sizeof(args);
1495
1496 _toUnicodeWithCallback(&args, err);
1497
1498 *source=args.source;
1499 *target=args.target;
1500 }
1501
1502 /* ucnv_to/fromUChars() ----------------------------------------------------- */
1503
1504 U_CAPI int32_t U_EXPORT2
1505 ucnv_fromUChars(UConverter *cnv,
1506 char *dest, int32_t destCapacity,
1507 const UChar *src, int32_t srcLength,
1508 UErrorCode *pErrorCode) {
1509 const UChar *srcLimit;
1510 char *originalDest, *destLimit;
1511 int32_t destLength;
1512
1513 /* check arguments */
1514 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1515 return 0;
1516 }
1517
1518 if( cnv==NULL ||
1519 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1520 srcLength<-1 || (srcLength!=0 && src==NULL)
1521 ) {
1522 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1523 return 0;
1524 }
1525
1526 /* initialize */
1527 ucnv_resetFromUnicode(cnv);
1528 originalDest=dest;
1529 if(srcLength==-1) {
1530 srcLength=u_strlen(src);
1531 }
1532 if(srcLength>0) {
1533 srcLimit=src+srcLength;
1534 destLimit=dest+destCapacity;
1535
1536 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1537 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1538 destLimit=(char *)U_MAX_PTR(dest);
1539 }
1540
1541 /* perform the conversion */
1542 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1543 destLength=(int32_t)(dest-originalDest);
1544
1545 /* if an overflow occurs, then get the preflighting length */
1546 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1547 char buffer[1024];
1548
1549 destLimit=buffer+sizeof(buffer);
1550 do {
1551 dest=buffer;
1552 *pErrorCode=U_ZERO_ERROR;
1553 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1554 destLength+=(int32_t)(dest-buffer);
1555 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1556 }
1557 } else {
1558 destLength=0;
1559 }
1560
1561 return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1562 }
1563
1564 U_CAPI int32_t U_EXPORT2
1565 ucnv_toUChars(UConverter *cnv,
1566 UChar *dest, int32_t destCapacity,
1567 const char *src, int32_t srcLength,
1568 UErrorCode *pErrorCode) {
1569 const char *srcLimit;
1570 UChar *originalDest, *destLimit;
1571 int32_t destLength;
1572
1573 /* check arguments */
1574 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1575 return 0;
1576 }
1577
1578 if( cnv==NULL ||
1579 destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1580 srcLength<-1 || (srcLength!=0 && src==NULL))
1581 {
1582 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1583 return 0;
1584 }
1585
1586 /* initialize */
1587 ucnv_resetToUnicode(cnv);
1588 originalDest=dest;
1589 if(srcLength==-1) {
1590 srcLength=uprv_strlen(src);
1591 }
1592 if(srcLength>0) {
1593 srcLimit=src+srcLength;
1594 destLimit=dest+destCapacity;
1595
1596 /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1597 if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1598 destLimit=(UChar *)U_MAX_PTR(dest);
1599 }
1600
1601 /* perform the conversion */
1602 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1603 destLength=(int32_t)(dest-originalDest);
1604
1605 /* if an overflow occurs, then get the preflighting length */
1606 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1607 {
1608 UChar buffer[1024];
1609
1610 destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
1611 do {
1612 dest=buffer;
1613 *pErrorCode=U_ZERO_ERROR;
1614 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1615 destLength+=(int32_t)(dest-buffer);
1616 }
1617 while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1618 }
1619 } else {
1620 destLength=0;
1621 }
1622
1623 return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1624 }
1625
1626 /* ucnv_getNextUChar() ------------------------------------------------------ */
1627
1628 U_CAPI UChar32 U_EXPORT2
1629 ucnv_getNextUChar(UConverter *cnv,
1630 const char **source, const char *sourceLimit,
1631 UErrorCode *err) {
1632 UConverterToUnicodeArgs args;
1633 UChar buffer[U16_MAX_LENGTH];
1634 const char *s;
1635 UChar32 c;
1636 int32_t i, length;
1637
1638 /* check parameters */
1639 if(err==NULL || U_FAILURE(*err)) {
1640 return 0xffff;
1641 }
1642
1643 if(cnv==NULL || source==NULL) {
1644 *err=U_ILLEGAL_ARGUMENT_ERROR;
1645 return 0xffff;
1646 }
1647
1648 s=*source;
1649 if(sourceLimit<s) {
1650 *err=U_ILLEGAL_ARGUMENT_ERROR;
1651 return 0xffff;
1652 }
1653
1654 /*
1655 * Make sure that the buffer sizes do not exceed the number range for
1656 * int32_t because some functions use the size (in units or bytes)
1657 * rather than comparing pointers, and because offsets are int32_t values.
1658 *
1659 * size_t is guaranteed to be unsigned and large enough for the job.
1660 *
1661 * Return with an error instead of adjusting the limits because we would
1662 * not be able to maintain the semantics that either the source must be
1663 * consumed or the target filled (unless an error occurs).
1664 * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1665 */
1666 if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1667 *err=U_ILLEGAL_ARGUMENT_ERROR;
1668 return 0xffff;
1669 }
1670
1671 c=U_SENTINEL;
1672
1673 /* flush the target overflow buffer */
1674 if(cnv->UCharErrorBufferLength>0) {
1675 UChar *overflow;
1676
1677 overflow=cnv->UCharErrorBuffer;
1678 i=0;
1679 length=cnv->UCharErrorBufferLength;
1680 U16_NEXT(overflow, i, length, c);
1681
1682 /* move the remaining overflow contents up to the beginning */
1683 if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1684 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1685 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1686 }
1687
1688 if(!U16_IS_LEAD(c) || i<length) {
1689 return c;
1690 }
1691 /*
1692 * Continue if the overflow buffer contained only a lead surrogate,
1693 * in case the converter outputs single surrogates from complete
1694 * input sequences.
1695 */
1696 }
1697
1698 /*
1699 * flush==TRUE is implied for ucnv_getNextUChar()
1700 *
1701 * do not simply return even if s==sourceLimit because the converter may
1702 * not have seen flush==TRUE before
1703 */
1704
1705 /* prepare the converter arguments */
1706 args.converter=cnv;
1707 args.flush=TRUE;
1708 args.offsets=NULL;
1709 args.source=s;
1710 args.sourceLimit=sourceLimit;
1711 args.target=buffer;
1712 args.targetLimit=buffer+1;
1713 args.size=sizeof(args);
1714
1715 if(c<0) {
1716 /*
1717 * call the native getNextUChar() implementation if we are
1718 * at a character boundary (toULength==0)
1719 *
1720 * unlike with _toUnicode(), getNextUChar() implementations must set
1721 * U_TRUNCATED_CHAR_FOUND for truncated input,
1722 * in addition to setting toULength/toUBytes[]
1723 */
1724 if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1725 c=cnv->sharedData->impl->getNextUChar(&args, err);
1726 *source=s=args.source;
1727 if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1728 /* reset the converter without calling the callback function */
1729 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1730 return 0xffff; /* no output */
1731 } else if(U_SUCCESS(*err) && c>=0) {
1732 return c;
1733 /*
1734 * else fall through to use _toUnicode() because
1735 * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1736 * U_FAILURE: call _toUnicode() for callback handling (do not output c)
1737 */
1738 }
1739 }
1740
1741 /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1742 _toUnicodeWithCallback(&args, err);
1743
1744 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1745 *err=U_ZERO_ERROR;
1746 }
1747
1748 i=0;
1749 length=(int32_t)(args.target-buffer);
1750 } else {
1751 /* write the lead surrogate from the overflow buffer */
1752 buffer[0]=(UChar)c;
1753 args.target=buffer+1;
1754 i=0;
1755 length=1;
1756 }
1757
1758 /* buffer contents starts at i and ends before length */
1759
1760 if(U_FAILURE(*err)) {
1761 c=0xffff; /* no output */
1762 } else if(length==0) {
1763 /* no input or only state changes */
1764 *err=U_INDEX_OUTOFBOUNDS_ERROR;
1765 /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1766 c=0xffff; /* no output */
1767 } else {
1768 c=buffer[0];
1769 i=1;
1770 if(!U16_IS_LEAD(c)) {
1771 /* consume c=buffer[0], done */
1772 } else {
1773 /* got a lead surrogate, see if a trail surrogate follows */
1774 UChar c2;
1775
1776 if(cnv->UCharErrorBufferLength>0) {
1777 /* got overflow output from the conversion */
1778 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1779 /* got a trail surrogate, too */
1780 c=U16_GET_SUPPLEMENTARY(c, c2);
1781
1782 /* move the remaining overflow contents up to the beginning */
1783 if((--cnv->UCharErrorBufferLength)>0) {
1784 uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1785 cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1786 }
1787 } else {
1788 /* c is an unpaired lead surrogate, just return it */
1789 }
1790 } else if(args.source<sourceLimit) {
1791 /* convert once more, to buffer[1] */
1792 args.targetLimit=buffer+2;
1793 _toUnicodeWithCallback(&args, err);
1794 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1795 *err=U_ZERO_ERROR;
1796 }
1797
1798 length=(int32_t)(args.target-buffer);
1799 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
1800 /* got a trail surrogate, too */
1801 c=U16_GET_SUPPLEMENTARY(c, c2);
1802 i=2;
1803 }
1804 }
1805 }
1806 }
1807
1808 /*
1809 * move leftover output from buffer[i..length[
1810 * into the beginning of the overflow buffer
1811 */
1812 if(i<length) {
1813 /* move further overflow back */
1814 int32_t delta=length-i;
1815 if((length=cnv->UCharErrorBufferLength)>0) {
1816 uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
1817 length*U_SIZEOF_UCHAR);
1818 }
1819 cnv->UCharErrorBufferLength=(int8_t)(length+delta);
1820
1821 cnv->UCharErrorBuffer[0]=buffer[i++];
1822 if(delta>1) {
1823 cnv->UCharErrorBuffer[1]=buffer[i];
1824 }
1825 }
1826
1827 *source=args.source;
1828 return c;
1829 }
1830
1831 /* ucnv_convert() and siblings ---------------------------------------------- */
1832
1833 U_CAPI void U_EXPORT2
1834 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
1835 char **target, const char *targetLimit,
1836 const char **source, const char *sourceLimit,
1837 UChar *pivotStart, UChar **pivotSource,
1838 UChar **pivotTarget, const UChar *pivotLimit,
1839 UBool reset, UBool flush,
1840 UErrorCode *pErrorCode) {
1841 UChar pivotBuffer[CHUNK_SIZE];
1842 UChar *myPivotSource, *myPivotTarget;
1843
1844 /* error checking */
1845 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1846 return;
1847 }
1848
1849 if( targetCnv==NULL || sourceCnv==NULL ||
1850 source==NULL || *source==NULL ||
1851 target==NULL || *target==NULL || targetLimit==NULL
1852 ) {
1853 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1854 return;
1855 }
1856
1857 if(pivotStart==NULL) {
1858 /* use the stack pivot buffer */
1859 pivotStart=myPivotSource=myPivotTarget=pivotBuffer;
1860 pivotSource=&myPivotSource;
1861 pivotTarget=&myPivotTarget;
1862 pivotLimit=pivotBuffer+CHUNK_SIZE;
1863 } else if( pivotStart>=pivotLimit ||
1864 pivotSource==NULL || *pivotSource==NULL ||
1865 pivotTarget==NULL || *pivotTarget==NULL ||
1866 pivotLimit==NULL
1867 ) {
1868 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1869 return;
1870 }
1871
1872 if(sourceLimit==NULL) {
1873 /* get limit of single-byte-NUL-terminated source string */
1874 sourceLimit=uprv_strchr(*source, 0);
1875 }
1876
1877 if(reset) {
1878 ucnv_resetToUnicode(sourceCnv);
1879 ucnv_resetFromUnicode(targetCnv);
1880 *pivotTarget=*pivotSource=pivotStart;
1881 }
1882
1883 /* conversion loop */
1884 for(;;) {
1885 if(reset) {
1886 /*
1887 * if we did a reset in this function, we know that there is nothing
1888 * to convert to the target yet, so we save a function call
1889 */
1890 reset=FALSE;
1891 } else {
1892 /*
1893 * convert to the target first in case the pivot is filled at entry
1894 * or the targetCnv has some output bytes in its state
1895 */
1896 ucnv_fromUnicode(targetCnv,
1897 target, targetLimit,
1898 (const UChar **)pivotSource, *pivotTarget,
1899 NULL,
1900 (UBool)(flush && *source==sourceLimit),
1901 pErrorCode);
1902 if(U_FAILURE(*pErrorCode)) {
1903 break;
1904 }
1905
1906 /* ucnv_fromUnicode() must have consumed the pivot contents since it returned with U_SUCCESS() */
1907 *pivotSource=*pivotTarget=pivotStart;
1908 }
1909
1910 /* convert from the source to the pivot */
1911 ucnv_toUnicode(sourceCnv,
1912 pivotTarget, pivotLimit,
1913 source, sourceLimit,
1914 NULL,
1915 flush,
1916 pErrorCode);
1917 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1918 /* pivot overflow: continue with the conversion loop */
1919 *pErrorCode=U_ZERO_ERROR;
1920 } else if(U_FAILURE(*pErrorCode) || *pivotTarget==pivotStart) {
1921 /* conversion error, or there was nothing left to convert */
1922 break;
1923 }
1924 /* else ucnv_toUnicode() wrote into the pivot buffer: continue */
1925 }
1926
1927 /*
1928 * The conversion loop is exited when one of the following is true:
1929 * - the entire source text has been converted successfully to the target buffer
1930 * - a target buffer overflow occurred
1931 * - a conversion error occurred
1932 */
1933
1934 /* terminate the target buffer if possible */
1935 if(flush && U_SUCCESS(*pErrorCode)) {
1936 if(*target!=targetLimit) {
1937 **target=0;
1938 if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
1939 *pErrorCode=U_ZERO_ERROR;
1940 }
1941 } else {
1942 *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
1943 }
1944 }
1945 }
1946
1947 /* internal implementation of ucnv_convert() etc. with preflighting */
1948 static int32_t
1949 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
1950 char *target, int32_t targetCapacity,
1951 const char *source, int32_t sourceLength,
1952 UErrorCode *pErrorCode) {
1953 UChar pivotBuffer[CHUNK_SIZE];
1954 UChar *pivot, *pivot2;
1955
1956 char *myTarget;
1957 const char *sourceLimit;
1958 const char *targetLimit;
1959 int32_t targetLength=0;
1960
1961 /* set up */
1962 if(sourceLength<0) {
1963 sourceLimit=uprv_strchr(source, 0);
1964 } else {
1965 sourceLimit=source+sourceLength;
1966 }
1967
1968 /* if there is no input data, we're done */
1969 if(source==sourceLimit) {
1970 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
1971 }
1972
1973 pivot=pivot2=pivotBuffer;
1974 myTarget=target;
1975 targetLength=0;
1976
1977 if(targetCapacity>0) {
1978 /* perform real conversion */
1979 targetLimit=target+targetCapacity;
1980 ucnv_convertEx(outConverter, inConverter,
1981 &myTarget, targetLimit,
1982 &source, sourceLimit,
1983 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
1984 FALSE,
1985 TRUE,
1986 pErrorCode);
1987 targetLength=myTarget-target;
1988 }
1989
1990 /*
1991 * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
1992 * to it but continue the conversion in order to store in targetCapacity
1993 * the number of bytes that was required.
1994 */
1995 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
1996 {
1997 char targetBuffer[CHUNK_SIZE];
1998
1999 targetLimit=targetBuffer+CHUNK_SIZE;
2000 do {
2001 *pErrorCode=U_ZERO_ERROR;
2002 myTarget=targetBuffer;
2003 ucnv_convertEx(outConverter, inConverter,
2004 &myTarget, targetLimit,
2005 &source, sourceLimit,
2006 pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2007 FALSE,
2008 TRUE,
2009 pErrorCode);
2010 targetLength+=(myTarget-targetBuffer);
2011 } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2012
2013 /* done with preflighting, set warnings and errors as appropriate */
2014 return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2015 }
2016
2017 /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2018 return targetLength;
2019 }
2020
2021 U_CAPI int32_t U_EXPORT2
2022 ucnv_convert(const char *toConverterName, const char *fromConverterName,
2023 char *target, int32_t targetCapacity,
2024 const char *source, int32_t sourceLength,
2025 UErrorCode *pErrorCode) {
2026 UConverter in, out; /* stack-allocated */
2027 UConverter *inConverter, *outConverter;
2028 int32_t targetLength;
2029
2030 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2031 return 0;
2032 }
2033
2034 if( source==NULL || sourceLength<-1 ||
2035 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2036 ) {
2037 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2038 return 0;
2039 }
2040
2041 /* if there is no input data, we're done */
2042 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2043 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2044 }
2045
2046 /* create the converters */
2047 inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2048 if(U_FAILURE(*pErrorCode)) {
2049 return 0;
2050 }
2051
2052 outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2053 if(U_FAILURE(*pErrorCode)) {
2054 ucnv_close(inConverter);
2055 return 0;
2056 }
2057
2058 targetLength=ucnv_internalConvert(outConverter, inConverter,
2059 target, targetCapacity,
2060 source, sourceLength,
2061 pErrorCode);
2062
2063 ucnv_close(inConverter);
2064 ucnv_close(outConverter);
2065
2066 return targetLength;
2067 }
2068
2069 /* @internal */
2070 static int32_t
2071 ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2072 UConverterType algorithmicType,
2073 UConverter *cnv,
2074 char *target, int32_t targetCapacity,
2075 const char *source, int32_t sourceLength,
2076 UErrorCode *pErrorCode) {
2077 UConverter algoConverterStatic; /* stack-allocated */
2078 UConverter *algoConverter, *to, *from;
2079 int32_t targetLength;
2080
2081 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2082 return 0;
2083 }
2084
2085 if( cnv==NULL || source==NULL || sourceLength<-1 ||
2086 targetCapacity<0 || (targetCapacity>0 && target==NULL)
2087 ) {
2088 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2089 return 0;
2090 }
2091
2092 /* if there is no input data, we're done */
2093 if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2094 return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2095 }
2096
2097 /* create the algorithmic converter */
2098 algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2099 "", 0, pErrorCode);
2100 if(U_FAILURE(*pErrorCode)) {
2101 return 0;
2102 }
2103
2104 /* reset the other converter */
2105 if(convertToAlgorithmic) {
2106 /* cnv->Unicode->algo */
2107 ucnv_resetToUnicode(cnv);
2108 to=algoConverter;
2109 from=cnv;
2110 } else {
2111 /* algo->Unicode->cnv */
2112 ucnv_resetFromUnicode(cnv);
2113 from=algoConverter;
2114 to=cnv;
2115 }
2116
2117 targetLength=ucnv_internalConvert(to, from,
2118 target, targetCapacity,
2119 source, sourceLength,
2120 pErrorCode);
2121
2122 ucnv_close(algoConverter);
2123
2124 return targetLength;
2125 }
2126
2127 U_CAPI int32_t U_EXPORT2
2128 ucnv_toAlgorithmic(UConverterType algorithmicType,
2129 UConverter *cnv,
2130 char *target, int32_t targetCapacity,
2131 const char *source, int32_t sourceLength,
2132 UErrorCode *pErrorCode) {
2133 return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2134 target, targetCapacity,
2135 source, sourceLength,
2136 pErrorCode);
2137 }
2138
2139 U_CAPI int32_t U_EXPORT2
2140 ucnv_fromAlgorithmic(UConverter *cnv,
2141 UConverterType algorithmicType,
2142 char *target, int32_t targetCapacity,
2143 const char *source, int32_t sourceLength,
2144 UErrorCode *pErrorCode) {
2145 return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2146 target, targetCapacity,
2147 source, sourceLength,
2148 pErrorCode);
2149 }
2150
2151 U_CAPI UConverterType U_EXPORT2
2152 ucnv_getType(const UConverter* converter)
2153 {
2154 int8_t type = converter->sharedData->staticData->conversionType;
2155 #if !UCONFIG_NO_LEGACY_CONVERSION
2156 if(type == UCNV_MBCS) {
2157 return ucnv_MBCSGetType(converter);
2158 }
2159 #endif
2160 return (UConverterType)type;
2161 }
2162
2163 U_CAPI void U_EXPORT2
2164 ucnv_getStarters(const UConverter* converter,
2165 UBool starters[256],
2166 UErrorCode* err)
2167 {
2168 if (err == NULL || U_FAILURE(*err)) {
2169 return;
2170 }
2171
2172 if(converter->sharedData->impl->getStarters != NULL) {
2173 converter->sharedData->impl->getStarters(converter, starters, err);
2174 } else {
2175 *err = U_ILLEGAL_ARGUMENT_ERROR;
2176 }
2177 }
2178
2179 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2180 {
2181 UErrorCode errorCode;
2182 const char *name;
2183 int32_t i;
2184
2185 if(cnv==NULL) {
2186 return NULL;
2187 }
2188
2189 errorCode=U_ZERO_ERROR;
2190 name=ucnv_getName(cnv, &errorCode);
2191 if(U_FAILURE(errorCode)) {
2192 return NULL;
2193 }
2194
2195 for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
2196 {
2197 if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2198 {
2199 return ambiguousConverters+i;
2200 }
2201 }
2202
2203 return NULL;
2204 }
2205
2206 U_CAPI void U_EXPORT2
2207 ucnv_fixFileSeparator(const UConverter *cnv,
2208 UChar* source,
2209 int32_t sourceLength) {
2210 const UAmbiguousConverter *a;
2211 int32_t i;
2212 UChar variant5c;
2213
2214 if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2215 {
2216 return;
2217 }
2218
2219 variant5c=a->variant5c;
2220 for(i=0; i<sourceLength; ++i) {
2221 if(source[i]==variant5c) {
2222 source[i]=0x5c;
2223 }
2224 }
2225 }
2226
2227 U_CAPI UBool U_EXPORT2
2228 ucnv_isAmbiguous(const UConverter *cnv) {
2229 return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2230 }
2231
2232 U_CAPI void U_EXPORT2
2233 ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2234 {
2235 cnv->useFallback = usesFallback;
2236 }
2237
2238 U_CAPI UBool U_EXPORT2
2239 ucnv_usesFallback(const UConverter *cnv)
2240 {
2241 return cnv->useFallback;
2242 }
2243
2244 U_CAPI void U_EXPORT2
2245 ucnv_getInvalidChars (const UConverter * converter,
2246 char *errBytes,
2247 int8_t * len,
2248 UErrorCode * err)
2249 {
2250 if (err == NULL || U_FAILURE(*err))
2251 {
2252 return;
2253 }
2254 if (len == NULL || errBytes == NULL || converter == NULL)
2255 {
2256 *err = U_ILLEGAL_ARGUMENT_ERROR;
2257 return;
2258 }
2259 if (*len < converter->invalidCharLength)
2260 {
2261 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2262 return;
2263 }
2264 if ((*len = converter->invalidCharLength) > 0)
2265 {
2266 uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2267 }
2268 }
2269
2270 U_CAPI void U_EXPORT2
2271 ucnv_getInvalidUChars (const UConverter * converter,
2272 UChar *errChars,
2273 int8_t * len,
2274 UErrorCode * err)
2275 {
2276 if (err == NULL || U_FAILURE(*err))
2277 {
2278 return;
2279 }
2280 if (len == NULL || errChars == NULL || converter == NULL)
2281 {
2282 *err = U_ILLEGAL_ARGUMENT_ERROR;
2283 return;
2284 }
2285 if (*len < converter->invalidUCharLength)
2286 {
2287 *err = U_INDEX_OUTOFBOUNDS_ERROR;
2288 return;
2289 }
2290 if ((*len = converter->invalidUCharLength) > 0)
2291 {
2292 uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
2293 }
2294 }
2295
2296 #define SIG_MAX_LEN 5
2297
2298 U_CAPI const char* U_EXPORT2
2299 ucnv_detectUnicodeSignature( const char* source,
2300 int32_t sourceLength,
2301 int32_t* signatureLength,
2302 UErrorCode* pErrorCode) {
2303 int32_t dummy;
2304
2305 /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2306 * bytes we don't misdetect something
2307 */
2308 char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2309 int i = 0;
2310
2311 if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2312 return NULL;
2313 }
2314
2315 if(source == NULL || sourceLength < -1){
2316 *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2317 return NULL;
2318 }
2319
2320 if(signatureLength == NULL) {
2321 signatureLength = &dummy;
2322 }
2323
2324 if(sourceLength==-1){
2325 sourceLength=uprv_strlen(source);
2326 }
2327
2328
2329 while(i<sourceLength&& i<SIG_MAX_LEN){
2330 start[i]=source[i];
2331 i++;
2332 }
2333
2334 if(start[0] == '\xFE' && start[1] == '\xFF') {
2335 *signatureLength=2;
2336 return "UTF-16BE";
2337 } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2338 if(start[2] == '\x00' && start[3] =='\x00') {
2339 *signatureLength=4;
2340 return "UTF-32LE";
2341 } else {
2342 *signatureLength=2;
2343 return "UTF-16LE";
2344 }
2345 } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2346 *signatureLength=3;
2347 return "UTF-8";
2348 } else if(start[0] == '\x00' && start[1] == '\x00' &&
2349 start[2] == '\xFE' && start[3]=='\xFF') {
2350 *signatureLength=4;
2351 return "UTF-32BE";
2352 } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2353 *signatureLength=3;
2354 return "SCSU";
2355 } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2356 *signatureLength=3;
2357 return "BOCU-1";
2358 } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2359 /*
2360 * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/
2361 * depending on the second UTF-16 code unit.
2362 * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2363 * if it occurs.
2364 *
2365 * So far we have +/v
2366 */
2367 if(start[3] == '\x38' && start[4] == '\x2D') {
2368 /* 5 bytes +/v8- */
2369 *signatureLength=5;
2370 return "UTF-7";
2371 } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2372 /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */
2373 *signatureLength=4;
2374 return "UTF-7";
2375 }
2376 }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2377 *signatureLength=4;
2378 return "UTF-EBCDIC";
2379 }
2380
2381
2382 /* no known Unicode signature byte sequence recognized */
2383 *signatureLength=0;
2384 return NULL;
2385 }
2386
2387 #endif
2388
2389 /*
2390 * Hey, Emacs, please set the following:
2391 *
2392 * Local Variables:
2393 * indent-tabs-mode: nil
2394 * End:
2395 *
2396 */