icuSources/common/ucnv.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1998-2004, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *
   9 *  ucnv.c:
  10 *  Implements APIs for the ICU's codeset conversion library;
  11 *  mostly calls through internal functions;
  12 *  created by Bertrand A. Damiba
  13 *
  14 * Modification History:
  15 *
  16 *   Date        Name        Description
  17 *   04/04/99    helena      Fixed internal header inclusion.
  18 *   05/09/00    helena      Added implementation to handle fallback mappings.
  19 *   06/20/2000  helena      OS/400 port changes; mostly typecast.
  20 */
  21
  22 #include "unicode/utypes.h"
  23
  24 #if !UCONFIG_NO_CONVERSION
  25
  26 #include "unicode/ustring.h"
  27 #include "unicode/ucnv.h"
  28 #include "unicode/ucnv_err.h"
  29 #include "unicode/uset.h"
  30 #include "putilimp.h"
  31 #include "cmemory.h"
  32 #include "cstring.h"
  33 #include "uassert.h"
  34 #include "utracimp.h"
  35 #include "ustr_imp.h"
  36 #include "ucnv_imp.h"
  37 #include "ucnv_io.h"
  38 #include "ucnv_cnv.h"
  39 #include "ucnv_bld.h"
  40
  41 /* size of intermediate and preflighting buffers in ucnv_convert() */
  42 #define CHUNK_SIZE 1024
  43
  44 typedef struct UAmbiguousConverter {
  45     const char *name;
  46     const UChar variant5c;
  47 } UAmbiguousConverter;
  48
  49 static const UAmbiguousConverter ambiguousConverters[]={
  50     { "ibm-942_P120-1999", 0xa5 },
  51     { "ibm-943_P130-1999", 0xa5 },
  52     { "ibm-897_P100-1995", 0xa5 },
  53     { "ibm-33722_P120-1999", 0xa5 },
  54     { "ibm-949_P110-1999", 0x20a9 },
  55     { "ibm-1363_P110-1997", 0x20a9 },
  56     { "ISO_2022,locale=ko,version=0", 0x20a9 }
  57 };
  58
  59 U_CAPI const char*  U_EXPORT2
  60 ucnv_getDefaultName ()
  61 {
  62     return ucnv_io_getDefaultConverterName();
  63 }
  64
  65 U_CAPI void U_EXPORT2
  66 ucnv_setDefaultName (const char *converterName)
  67 {
  68   ucnv_io_setDefaultConverterName(converterName);
  69 }
  70 /*Calls through createConverter */
  71 U_CAPI UConverter* U_EXPORT2
  72 ucnv_open (const char *name,
  73                        UErrorCode * err)
  74 {
  75     UConverter *r;
  76
  77     if (err == NULL || U_FAILURE (*err)) {
  78         return NULL;
  79     }
  80
  81     r =  ucnv_createConverter(NULL, name, err);
  82     return r;
  83 }
  84
  85 U_CAPI UConverter* U_EXPORT2
  86 ucnv_openPackage   (const char *packageName, const char *converterName, UErrorCode * err)
  87 {
  88     return ucnv_createConverterFromPackage(packageName, converterName,  err);
  89 }
  90
  91 /*Extracts the UChar* to a char* and calls through createConverter */
  92 U_CAPI UConverter*   U_EXPORT2
  93 ucnv_openU (const UChar * name,
  94                          UErrorCode * err)
  95 {
  96     char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH];
  97
  98     if (err == NULL || U_FAILURE(*err))
  99         return NULL;
 100     if (name == NULL)
 101         return ucnv_open (NULL, err);
 102     if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH)
 103     {
 104         *err = U_ILLEGAL_ARGUMENT_ERROR;
 105         return NULL;
 106     }
 107     return ucnv_open(u_austrcpy(asciiName, name), err);
 108 }
 109
 110 /*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls
 111  *through createConverter*/
 112 U_CAPI UConverter*   U_EXPORT2
 113 ucnv_openCCSID (int32_t codepage,
 114                 UConverterPlatform platform,
 115                 UErrorCode * err)
 116 {
 117     char myName[UCNV_MAX_CONVERTER_NAME_LENGTH];
 118     int32_t myNameLen;
 119
 120     if (err == NULL || U_FAILURE (*err))
 121         return NULL;
 122
 123     /* ucnv_copyPlatformString could return "ibm-" or "cp" */
 124     myNameLen = ucnv_copyPlatformString(myName, platform);
 125     T_CString_integerToString(myName + myNameLen, codepage, 10);
 126
 127     return ucnv_createConverter(NULL, myName, err);
 128 }
 129
 130 /* Creating a temporary stack-based object that can be used in one thread,
 131 and created from a converter that is shared across threads.
 132 */
 133
 134 U_CAPI UConverter* U_EXPORT2
 135 ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
 136 {
 137     UConverter *localConverter, *allocatedConverter;
 138     int32_t bufferSizeNeeded;
 139     char *stackBufferChars = (char *)stackBuffer;
 140     UErrorCode cbErr;
 141     UConverterToUnicodeArgs toUArgs = {
 142         sizeof(UConverterToUnicodeArgs),
 143             TRUE,
 144             NULL,
 145             NULL,
 146             NULL,
 147             NULL,
 148             NULL,
 149             NULL
 150     };
 151     UConverterFromUnicodeArgs fromUArgs = {
 152         sizeof(UConverterFromUnicodeArgs),
 153             TRUE,
 154             NULL,
 155             NULL,
 156             NULL,
 157             NULL,
 158             NULL,
 159             NULL
 160     };
 161
 162     UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE);
 163
 164     if (status == NULL || U_FAILURE(*status)){
 165         UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR);
 166         return 0;
 167     }
 168
 169     if (!pBufferSize || !cnv){
 170         *status = U_ILLEGAL_ARGUMENT_ERROR;
 171         UTRACE_EXIT_STATUS(*status);
 172         return 0;
 173     }
 174
 175     UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p",
 176                                     ucnv_getName(cnv, status), cnv, stackBuffer);
 177
 178     if (cnv->sharedData->impl->safeClone != NULL) {
 179         /* call the custom safeClone function for sizing */
 180         bufferSizeNeeded = 0;
 181         cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status);
 182     }
 183     else
 184     {
 185         /* inherent sizing */
 186         bufferSizeNeeded = sizeof(UConverter);
 187     }
 188
 189     if (*pBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */
 190         *pBufferSize = bufferSizeNeeded;
 191         UTRACE_EXIT_VALUE(bufferSizeNeeded);
 192         return 0;
 193     }
 194
 195
 196     /* Pointers on 64-bit platforms need to be aligned
 197      * on a 64-bit boundary in memory.
 198      */
 199     if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
 200         int32_t offsetUp = (int32_t)U_ALIGNMENT_OFFSET_UP(stackBufferChars);
 201         if(*pBufferSize > offsetUp) {
 202             *pBufferSize -= offsetUp;
 203             stackBufferChars += offsetUp;
 204         } else {
 205             /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */
 206             *pBufferSize = 1;
 207         }
 208     }
 209
 210     stackBuffer = (void *)stackBufferChars;
 211
 212     /* Now, see if we must allocate any memory */
 213     if (*pBufferSize < bufferSizeNeeded || stackBuffer == NULL)
 214     {
 215         /* allocate one here...*/
 216         localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded);
 217
 218         if(localConverter == NULL) {
 219             *status = U_MEMORY_ALLOCATION_ERROR;
 220             UTRACE_EXIT_STATUS(*status);
 221             return NULL;
 222         }
 223
 224         if (U_SUCCESS(*status)) {
 225             *status = U_SAFECLONE_ALLOCATED_WARNING;
 226         }
 227
 228         /* record the fact that memory was allocated */
 229         *pBufferSize = bufferSizeNeeded;
 230     } else {
 231         /* just use the stack buffer */
 232         localConverter = (UConverter*) stackBuffer;
 233         allocatedConverter = NULL;
 234     }
 235
 236     uprv_memset(localConverter, 0, bufferSizeNeeded);
 237
 238     /* Copy initial state */
 239     uprv_memcpy(localConverter, cnv, sizeof(UConverter));
 240     localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE;
 241
 242     /* now either call the safeclone fcn or not */
 243     if (cnv->sharedData->impl->safeClone != NULL) {
 244         /* call the custom safeClone function */
 245         localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status);
 246     }
 247
 248     if(localConverter==NULL || U_FAILURE(*status)) {
 249         uprv_free(allocatedConverter);
 250         UTRACE_EXIT_STATUS(*status);
 251         return NULL;
 252     }
 253
 254     /* increment refcount of shared data if needed */
 255     /*
 256     Checking whether it's an algorithic converter is okay
 257     in multithreaded applications because the value never changes.
 258     Don't check referenceCounter for any other value.
 259     */
 260     if (cnv->sharedData->referenceCounter != ~0) {
 261         ucnv_incrementRefCount(cnv->sharedData);
 262     }
 263
 264     if(localConverter == (UConverter*)stackBuffer) {
 265         /* we're using user provided data - set to not destroy */
 266         localConverter->isCopyLocal = TRUE;
 267     }
 268
 269     /* allow callback functions to handle any memory allocation */
 270     toUArgs.converter = fromUArgs.converter = localConverter;
 271     cbErr = U_ZERO_ERROR;
 272     cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr);
 273     cbErr = U_ZERO_ERROR;
 274     cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr);
 275
 276     UTRACE_EXIT_PTR_STATUS(localConverter, *status);
 277     return localConverter;
 278 }
 279
 280
 281
 282 /*Decreases the reference counter in the shared immutable section of the object
 283  *and frees the mutable part*/
 284
 285 U_CAPI void  U_EXPORT2
 286 ucnv_close (UConverter * converter)
 287 {
 288     /* first, notify the callback functions that the converter is closed */
 289     UConverterToUnicodeArgs toUArgs = {
 290         sizeof(UConverterToUnicodeArgs),
 291             TRUE,
 292             NULL,
 293             NULL,
 294             NULL,
 295             NULL,
 296             NULL,
 297             NULL
 298     };
 299     UConverterFromUnicodeArgs fromUArgs = {
 300         sizeof(UConverterFromUnicodeArgs),
 301             TRUE,
 302             NULL,
 303             NULL,
 304             NULL,
 305             NULL,
 306             NULL,
 307             NULL
 308     };
 309     UErrorCode errorCode = U_ZERO_ERROR;
 310
 311     UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE);
 312
 313     if (converter == NULL)
 314     {
 315         UTRACE_EXIT();
 316         return;
 317     }
 318
 319     UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b",
 320         ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal);
 321
 322     toUArgs.converter = fromUArgs.converter = converter;
 323
 324     converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode);
 325     errorCode = U_ZERO_ERROR;
 326     converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode);
 327
 328     if (converter->sharedData->impl->close != NULL) {
 329         converter->sharedData->impl->close(converter);
 330     }
 331
 332     /*
 333     Checking whether it's an algorithic converter is okay
 334     in multithreaded applications because the value never changes.
 335     Don't check referenceCounter for any other value.
 336     */
 337     if (converter->sharedData->referenceCounter != ~0) {
 338         ucnv_unloadSharedDataIfReady(converter->sharedData);
 339     }
 340
 341     if(!converter->isCopyLocal){
 342         uprv_free (converter);
 343     }
 344
 345     UTRACE_EXIT();
 346 }
 347
 348 /*returns a single Name from the list, will return NULL if out of bounds
 349  */
 350 U_CAPI const char*   U_EXPORT2
 351 ucnv_getAvailableName (int32_t n)
 352 {
 353   if (0 <= n && n <= 0xffff) {
 354     UErrorCode err = U_ZERO_ERROR;
 355     const char *name = ucnv_io_getAvailableConverter((uint16_t)n, &err);
 356     if (U_SUCCESS(err)) {
 357       return name;
 358     }
 359   }
 360   return NULL;
 361 }
 362
 363 U_CAPI int32_t   U_EXPORT2
 364 ucnv_countAvailable ()
 365 {
 366     UErrorCode err = U_ZERO_ERROR;
 367     return ucnv_io_countAvailableConverters(&err);
 368 }
 369
 370 U_CAPI uint16_t U_EXPORT2
 371 ucnv_countAliases(const char *alias, UErrorCode *pErrorCode)
 372 {
 373     return ucnv_io_countAliases(alias, pErrorCode);
 374 }
 375
 376
 377 U_CAPI const char* U_EXPORT2
 378 ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode)
 379 {
 380     return ucnv_io_getAlias(alias, n, pErrorCode);
 381 }
 382
 383 U_CAPI void U_EXPORT2
 384 ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode)
 385 {
 386     ucnv_io_getAliases(alias, 0, aliases, pErrorCode);
 387 }
 388
 389 U_CAPI uint16_t U_EXPORT2
 390 ucnv_countStandards(void)
 391 {
 392     UErrorCode err = U_ZERO_ERROR;
 393     return ucnv_io_countStandards(&err);
 394 }
 395
 396 U_CAPI void    U_EXPORT2
 397 ucnv_getSubstChars (const UConverter * converter,
 398                     char *mySubChar,
 399                     int8_t * len,
 400                     UErrorCode * err)
 401 {
 402     if (U_FAILURE (*err))
 403         return;
 404
 405     if (*len < converter->subCharLen) /*not enough space in subChars */
 406     {
 407         *err = U_INDEX_OUTOFBOUNDS_ERROR;
 408         return;
 409     }
 410
 411   uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen);   /*fills in the subchars */
 412   *len = converter->subCharLen; /*store # of bytes copied to buffer */
 413     uprv_memcpy (mySubChar, converter->subChar, converter->subCharLen);   /*fills in the subchars */
 414     *len = converter->subCharLen; /*store # of bytes copied to buffer */
 415 }
 416
 417 U_CAPI void    U_EXPORT2
 418 ucnv_setSubstChars (UConverter * converter,
 419                     const char *mySubChar,
 420                     int8_t len,
 421                     UErrorCode * err)
 422 {
 423     if (U_FAILURE (*err))
 424         return;
 425
 426     /*Makes sure that the subChar is within the codepages char length boundaries */
 427     if ((len > converter->sharedData->staticData->maxBytesPerChar)
 428      || (len < converter->sharedData->staticData->minBytesPerChar))
 429     {
 430         *err = U_ILLEGAL_ARGUMENT_ERROR;
 431         return;
 432     }
 433
 434     uprv_memcpy (converter->subChar, mySubChar, len); /*copies the subchars */
 435     converter->subCharLen = len;  /*sets the new len */
 436
 437     /*
 438     * There is currently (2001Feb) no separate API to set/get subChar1.
 439     * In order to always have subChar written after it is explicitly set,
 440     * we set subChar1 to 0.
 441     */
 442     converter->subChar1 = 0;
 443
 444     return;
 445 }
 446
 447 /*resets the internal states of a converter
 448  *goal : have the same behaviour than a freshly created converter
 449  */
 450 static void _reset(UConverter *converter, UConverterResetChoice choice,
 451                    UBool callCallback) {
 452     if(converter == NULL) {
 453         return;
 454     }
 455
 456     if(callCallback) {
 457         /* first, notify the callback functions that the converter is reset */
 458         UConverterToUnicodeArgs toUArgs = {
 459             sizeof(UConverterToUnicodeArgs),
 460                 TRUE,
 461                 NULL,
 462                 NULL,
 463                 NULL,
 464                 NULL,
 465                 NULL,
 466                 NULL
 467         };
 468         UConverterFromUnicodeArgs fromUArgs = {
 469             sizeof(UConverterFromUnicodeArgs),
 470                 TRUE,
 471                 NULL,
 472                 NULL,
 473                 NULL,
 474                 NULL,
 475                 NULL,
 476                 NULL
 477         };
 478         UErrorCode errorCode;
 479
 480         toUArgs.converter = fromUArgs.converter = converter;
 481         if(choice<=UCNV_RESET_TO_UNICODE) {
 482             errorCode = U_ZERO_ERROR;
 483             converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode);
 484         }
 485         if(choice!=UCNV_RESET_TO_UNICODE) {
 486             errorCode = U_ZERO_ERROR;
 487             converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode);
 488         }
 489     }
 490
 491     /* now reset the converter itself */
 492     if(choice<=UCNV_RESET_TO_UNICODE) {
 493         converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus;
 494         converter->mode = 0;
 495         converter->toULength = 0;
 496         converter->invalidCharLength = converter->UCharErrorBufferLength = 0;
 497         converter->preToULength = 0;
 498     }
 499     if(choice!=UCNV_RESET_TO_UNICODE) {
 500         converter->fromUnicodeStatus = 0;
 501         converter->fromUChar32 = 0;
 502         converter->invalidUCharLength = converter->charErrorBufferLength = 0;
 503         converter->preFromUFirstCP = U_SENTINEL;
 504         converter->preFromULength = 0;
 505     }
 506
 507     if (converter->sharedData->impl->reset != NULL) {
 508         /* call the custom reset function */
 509         converter->sharedData->impl->reset(converter, choice);
 510     }
 511 }
 512
 513 U_CAPI void  U_EXPORT2
 514 ucnv_reset(UConverter *converter)
 515 {
 516     _reset(converter, UCNV_RESET_BOTH, TRUE);
 517 }
 518
 519 U_CAPI void  U_EXPORT2
 520 ucnv_resetToUnicode(UConverter *converter)
 521 {
 522     _reset(converter, UCNV_RESET_TO_UNICODE, TRUE);
 523 }
 524
 525 U_CAPI void  U_EXPORT2
 526 ucnv_resetFromUnicode(UConverter *converter)
 527 {
 528     _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE);
 529 }
 530
 531 U_CAPI int8_t   U_EXPORT2
 532 ucnv_getMaxCharSize (const UConverter * converter)
 533 {
 534     return converter->maxBytesPerUChar;
 535 }
 536
 537
 538 U_CAPI int8_t   U_EXPORT2
 539 ucnv_getMinCharSize (const UConverter * converter)
 540 {
 541     return converter->sharedData->staticData->minBytesPerChar;
 542 }
 543
 544 U_CAPI const char*   U_EXPORT2
 545 ucnv_getName (const UConverter * converter, UErrorCode * err)
 546
 547 {
 548     if (U_FAILURE (*err))
 549         return NULL;
 550     if(converter->sharedData->impl->getName){
 551         const char* temp= converter->sharedData->impl->getName(converter);
 552         if(temp)
 553             return temp;
 554     }
 555     return converter->sharedData->staticData->name;
 556 }
 557
 558 U_CAPI int32_t U_EXPORT2
 559 ucnv_getCCSID(const UConverter * converter,
 560               UErrorCode * err)
 561 {
 562     int32_t ccsid;
 563     if (U_FAILURE (*err))
 564         return -1;
 565
 566     ccsid = converter->sharedData->staticData->codepage;
 567     if (ccsid == 0) {
 568         /* Rare case. This is for cases like gb18030,
 569         which doesn't have an IBM cannonical name, but does have an IBM alias. */
 570         const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err);
 571         if (U_SUCCESS(*err) && standardName) {
 572             const char *ccsidStr = uprv_strchr(standardName, '-');
 573             if (ccsidStr) {
 574                 ccsid = (int32_t)atol(ccsidStr+1);  /* +1 to skip '-' */
 575             }
 576         }
 577     }
 578     return ccsid;
 579 }
 580
 581
 582 U_CAPI UConverterPlatform   U_EXPORT2
 583 ucnv_getPlatform (const UConverter * converter,
 584                                       UErrorCode * err)
 585 {
 586     if (U_FAILURE (*err))
 587         return UCNV_UNKNOWN;
 588
 589     return (UConverterPlatform)converter->sharedData->staticData->platform;
 590 }
 591
 592 U_CAPI void U_EXPORT2
 593     ucnv_getToUCallBack (const UConverter * converter,
 594                          UConverterToUCallback *action,
 595                          const void **context)
 596 {
 597     *action = converter->fromCharErrorBehaviour;
 598     *context = converter->toUContext;
 599 }
 600
 601 U_CAPI void U_EXPORT2
 602     ucnv_getFromUCallBack (const UConverter * converter,
 603                            UConverterFromUCallback *action,
 604                            const void **context)
 605 {
 606     *action = converter->fromUCharErrorBehaviour;
 607     *context = converter->fromUContext;
 608 }
 609
 610 U_CAPI void    U_EXPORT2
 611 ucnv_setToUCallBack (UConverter * converter,
 612                             UConverterToUCallback newAction,
 613                             const void* newContext,
 614                             UConverterToUCallback *oldAction,
 615                             const void** oldContext,
 616                             UErrorCode * err)
 617 {
 618     if (U_FAILURE (*err))
 619         return;
 620     if (oldAction) *oldAction = converter->fromCharErrorBehaviour;
 621     converter->fromCharErrorBehaviour = newAction;
 622     if (oldContext) *oldContext = converter->toUContext;
 623     converter->toUContext = newContext;
 624 }
 625
 626 U_CAPI void  U_EXPORT2
 627 ucnv_setFromUCallBack (UConverter * converter,
 628                             UConverterFromUCallback newAction,
 629                             const void* newContext,
 630                             UConverterFromUCallback *oldAction,
 631                             const void** oldContext,
 632                             UErrorCode * err)
 633 {
 634     if (U_FAILURE (*err))
 635         return;
 636     if (oldAction) *oldAction = converter->fromUCharErrorBehaviour;
 637     converter->fromUCharErrorBehaviour = newAction;
 638     if (oldContext) *oldContext = converter->fromUContext;
 639     converter->fromUContext = newContext;
 640 }
 641
 642 static void
 643 _updateOffsets(int32_t *offsets, int32_t length,
 644                int32_t sourceIndex, int32_t errorInputLength) {
 645     int32_t *limit;
 646     int32_t delta, offset;
 647
 648     if(sourceIndex>=0) {
 649         /*
 650          * adjust each offset by adding the previous sourceIndex
 651          * minus the length of the input sequence that caused an
 652          * error, if any
 653          */
 654         delta=sourceIndex-errorInputLength;
 655     } else {
 656         /*
 657          * set each offset to -1 because this conversion function
 658          * does not handle offsets
 659          */
 660         delta=-1;
 661     }
 662
 663     limit=offsets+length;
 664     if(delta==0) {
 665         /* most common case, nothing to do */
 666     } else if(delta>0) {
 667         /* add the delta to each offset (but not if the offset is <0) */
 668         while(offsets<limit) {
 669             offset=*offsets;
 670             if(offset>=0) {
 671                 *offsets=offset+delta;
 672             }
 673             ++offsets;
 674         }
 675     } else /* delta<0 */ {
 676         /*
 677          * set each offset to -1 because this conversion function
 678          * does not handle offsets
 679          * or the error input sequence started in a previous buffer
 680          */
 681         while(offsets<limit) {
 682             *offsets++=-1;
 683         }
 684     }
 685 }
 686
 687 /* ucnv_fromUnicode --------------------------------------------------------- */
 688
 689 /*
 690  * Implementation note for m:n conversions
 691  *
 692  * While collecting source units to find the longest match for m:n conversion,
 693  * some source units may need to be stored for a partial match.
 694  * When a second buffer does not yield a match on all of the previously stored
 695  * source units, then they must be "replayed", i.e., fed back into the converter.
 696  *
 697  * The code relies on the fact that replaying will not nest -
 698  * converting a replay buffer will not result in a replay.
 699  * This is because a replay is necessary only after the _continuation_ of a
 700  * partial match failed, but a replay buffer is converted as a whole.
 701  * It may result in some of its units being stored again for a partial match,
 702  * but there will not be a continuation _during_ the replay which could fail.
 703  *
 704  * It is conceivable that a callback function could call the converter
 705  * recursively in a way that causes another replay to be stored, but that
 706  * would be an error in the callback function.
 707  * Such violations will cause assertion failures in a debug build,
 708  * and wrong output, but they will not cause a crash.
 709  */
 710
 711 static void
 712 _fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) {
 713     UConverterFromUnicode fromUnicode;
 714     UConverter *cnv;
 715     const UChar *s;
 716     char *t;
 717     int32_t *offsets;
 718     int32_t sourceIndex;
 719     int32_t errorInputLength;
 720     UBool converterSawEndOfInput, calledCallback;
 721
 722     /* variables for m:n conversion */
 723     UChar replay[UCNV_EXT_MAX_UCHARS];
 724     const UChar *realSource, *realSourceLimit;
 725     int32_t realSourceIndex;
 726     UBool realFlush;
 727
 728     cnv=pArgs->converter;
 729     s=pArgs->source;
 730     t=pArgs->target;
 731     offsets=pArgs->offsets;
 732
 733     /* get the converter implementation function */
 734     sourceIndex=0;
 735     if(offsets==NULL) {
 736         fromUnicode=cnv->sharedData->impl->fromUnicode;
 737     } else {
 738         fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets;
 739         if(fromUnicode==NULL) {
 740             /* there is no WithOffsets implementation */
 741             fromUnicode=cnv->sharedData->impl->fromUnicode;
 742             /* we will write -1 for each offset */
 743             sourceIndex=-1;
 744         }
 745     }
 746
 747     if(cnv->preFromULength>=0) {
 748         /* normal mode */
 749         realSource=NULL;
 750
 751         /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
 752         realSourceLimit=NULL;
 753         realFlush=FALSE;
 754         realSourceIndex=0;
 755     } else {
 756         /*
 757          * Previous m:n conversion stored source units from a partial match
 758          * and failed to consume all of them.
 759          * We need to "replay" them from a temporary buffer and convert them first.
 760          */
 761         realSource=pArgs->source;
 762         realSourceLimit=pArgs->sourceLimit;
 763         realFlush=pArgs->flush;
 764         realSourceIndex=sourceIndex;
 765
 766         uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
 767         pArgs->source=replay;
 768         pArgs->sourceLimit=replay-cnv->preFromULength;
 769         pArgs->flush=FALSE;
 770         sourceIndex=-1;
 771
 772         cnv->preFromULength=0;
 773     }
 774
 775     /*
 776      * loop for conversion and error handling
 777      *
 778      * loop {
 779      *   convert
 780      *   loop {
 781      *     update offsets
 782      *     handle end of input
 783      *     handle errors/call callback
 784      *   }
 785      * }
 786      */
 787     for(;;) {
 788         /* convert */
 789         fromUnicode(pArgs, err);
 790
 791         /*
 792          * set a flag for whether the converter
 793          * successfully processed the end of the input
 794          *
 795          * need not check cnv->preFromULength==0 because a replay (<0) will cause
 796          * s<sourceLimit before converterSawEndOfInput is checked
 797          */
 798         converterSawEndOfInput=
 799             (UBool)(U_SUCCESS(*err) &&
 800                     pArgs->flush && pArgs->source==pArgs->sourceLimit &&
 801                     cnv->fromUChar32==0);
 802
 803         /* no callback called yet for this iteration */
 804         calledCallback=FALSE;
 805
 806         /* no sourceIndex adjustment for conversion, only for callback output */
 807         errorInputLength=0;
 808
 809         /*
 810          * loop for offsets and error handling
 811          *
 812          * iterates at most 3 times:
 813          * 1. to clean up after the conversion function
 814          * 2. after the callback
 815          * 3. after the callback again if there was truncated input
 816          */
 817         for(;;) {
 818             /* update offsets if we write any */
 819             if(offsets!=NULL) {
 820                 int32_t length=(int32_t)(pArgs->target-t);
 821                 if(length>0) {
 822                     _updateOffsets(offsets, length, sourceIndex, errorInputLength);
 823
 824                     /*
 825                      * if a converter handles offsets and updates the offsets
 826                      * pointer at the end, then pArgs->offset should not change
 827                      * here;
 828                      * however, some converters do not handle offsets at all
 829                      * (sourceIndex<0) or may not update the offsets pointer
 830                      */
 831                     pArgs->offsets=offsets+=length;
 832                 }
 833
 834                 if(sourceIndex>=0) {
 835                     sourceIndex+=(int32_t)(pArgs->source-s);
 836                 }
 837             }
 838
 839             if(cnv->preFromULength<0) {
 840                 /*
 841                  * switch the source to new replay units (cannot occur while replaying)
 842                  * after offset handling and before end-of-input and callback handling
 843                  */
 844                 if(realSource==NULL) {
 845                     realSource=pArgs->source;
 846                     realSourceLimit=pArgs->sourceLimit;
 847                     realFlush=pArgs->flush;
 848                     realSourceIndex=sourceIndex;
 849
 850                     uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR);
 851                     pArgs->source=replay;
 852                     pArgs->sourceLimit=replay-cnv->preFromULength;
 853                     pArgs->flush=FALSE;
 854                     if((sourceIndex+=cnv->preFromULength)<0) {
 855                         sourceIndex=-1;
 856                     }
 857
 858                     cnv->preFromULength=0;
 859                 } else {
 860                     /* see implementation note before _fromUnicodeWithCallback() */
 861                     U_ASSERT(realSource==NULL);
 862                     *err=U_INTERNAL_PROGRAM_ERROR;
 863                 }
 864             }
 865
 866             /* update pointers */
 867             s=pArgs->source;
 868             t=pArgs->target;
 869
 870             if(U_SUCCESS(*err)) {
 871                 if(s<pArgs->sourceLimit) {
 872                     /*
 873                      * continue with the conversion loop while there is still input left
 874                      * (continue converting by breaking out of only the inner loop)
 875                      */
 876                     break;
 877                 } else if(realSource!=NULL) {
 878                     /* switch back from replaying to the real source and continue */
 879                     pArgs->source=realSource;
 880                     pArgs->sourceLimit=realSourceLimit;
 881                     pArgs->flush=realFlush;
 882                     sourceIndex=realSourceIndex;
 883
 884                     realSource=NULL;
 885                     break;
 886                 } else if(pArgs->flush && cnv->fromUChar32!=0) {
 887                     /*
 888                      * the entire input stream is consumed
 889                      * and there is a partial, truncated input sequence left
 890                      */
 891
 892                     /* inject an error and continue with callback handling */
 893                     *err=U_TRUNCATED_CHAR_FOUND;
 894                     calledCallback=FALSE; /* new error condition */
 895                 } else {
 896                     /* input consumed */
 897                     if(pArgs->flush) {
 898                         /*
 899                          * return to the conversion loop once more if the flush
 900                          * flag is set and the conversion function has not
 901                          * successfully processed the end of the input yet
 902                          *
 903                          * (continue converting by breaking out of only the inner loop)
 904                          */
 905                         if(!converterSawEndOfInput) {
 906                             break;
 907                         }
 908
 909                         /* reset the converter without calling the callback function */
 910                         _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE);
 911                     }
 912
 913                     /* done successfully */
 914                     return;
 915                 }
 916             }
 917
 918             /* U_FAILURE(*err) */
 919             {
 920                 UErrorCode e;
 921
 922                 if( calledCallback ||
 923                     (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
 924                     (e!=U_INVALID_CHAR_FOUND &&
 925                      e!=U_ILLEGAL_CHAR_FOUND &&
 926                      e!=U_TRUNCATED_CHAR_FOUND)
 927                 ) {
 928                     /*
 929                      * the callback did not or cannot resolve the error:
 930                      * set output pointers and return
 931                      *
 932                      * the check for buffer overflow is redundant but it is
 933                      * a high-runner case and hopefully documents the intent
 934                      * well
 935                      *
 936                      * if we were replaying, then the replay buffer must be
 937                      * copied back into the UConverter
 938                      * and the real arguments must be restored
 939                      */
 940                     if(realSource!=NULL) {
 941                         int32_t length;
 942
 943                         U_ASSERT(cnv->preFromULength==0);
 944
 945                         length=(int32_t)(pArgs->sourceLimit-pArgs->source);
 946                         if(length>0) {
 947                             uprv_memcpy(cnv->preFromU, pArgs->source, length*U_SIZEOF_UCHAR);
 948                             cnv->preFromULength=(int8_t)-length;
 949                         }
 950
 951                         pArgs->source=realSource;
 952                         pArgs->sourceLimit=realSourceLimit;
 953                         pArgs->flush=realFlush;
 954                     }
 955
 956                     return;
 957                 }
 958             }
 959
 960             /* callback handling */
 961             {
 962                 UChar32 codePoint;
 963
 964                 /* get and write the code point */
 965                 codePoint=cnv->fromUChar32;
 966                 errorInputLength=0;
 967                 U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint);
 968                 cnv->invalidUCharLength=(int8_t)errorInputLength;
 969
 970                 /* set the converter state to deal with the next character */
 971                 cnv->fromUChar32=0;
 972
 973                 /* call the callback function */
 974                 cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs,
 975                     cnv->invalidUCharBuffer, errorInputLength, codePoint,
 976                     *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL,
 977                     err);
 978             }
 979
 980             /*
 981              * loop back to the offset handling
 982              *
 983              * this flag will indicate after offset handling
 984              * that a callback was called;
 985              * if the callback did not resolve the error, then we return
 986              */
 987             calledCallback=TRUE;
 988         }
 989     }
 990 }
 991
 992 U_CAPI void U_EXPORT2
 993 ucnv_fromUnicode(UConverter *cnv,
 994                  char **target, const char *targetLimit,
 995                  const UChar **source, const UChar *sourceLimit,
 996                  int32_t *offsets,
 997                  UBool flush,
 998                  UErrorCode *err) {
 999     UConverterFromUnicodeArgs args;
1000     const UChar *s;
1001     char *t;
1002
1003     /* check parameters */
1004     if(err==NULL || U_FAILURE(*err)) {
1005         return;
1006     }
1007
1008     if(cnv==NULL || target==NULL || source==NULL) {
1009         *err=U_ILLEGAL_ARGUMENT_ERROR;
1010         return;
1011     }
1012
1013     s=*source;
1014     t=*target;
1015     if(sourceLimit<s || targetLimit<t) {
1016         *err=U_ILLEGAL_ARGUMENT_ERROR;
1017         return;
1018     }
1019
1020     /*
1021      * Make sure that the buffer sizes do not exceed the number range for
1022      * int32_t because some functions use the size (in units or bytes)
1023      * rather than comparing pointers, and because offsets are int32_t values.
1024      *
1025      * size_t is guaranteed to be unsigned and large enough for the job.
1026      *
1027      * Return with an error instead of adjusting the limits because we would
1028      * not be able to maintain the semantics that either the source must be
1029      * consumed or the target filled (unless an error occurs).
1030      * An adjustment would be targetLimit=t+0x7fffffff; for example.
1031      */
1032     if(
1033         ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) ||
1034         ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t)
1035     ) {
1036         *err=U_ILLEGAL_ARGUMENT_ERROR;
1037         return;
1038     }
1039
1040     /* flush the target overflow buffer */
1041     if(cnv->charErrorBufferLength>0) {
1042         char *overflow;
1043         int32_t i, length;
1044
1045         overflow=(char *)cnv->charErrorBuffer;
1046         length=cnv->charErrorBufferLength;
1047         i=0;
1048         do {
1049             if(t==targetLimit) {
1050                 /* the overflow buffer contains too much, keep the rest */
1051                 int32_t j=0;
1052
1053                 do {
1054                     overflow[j++]=overflow[i++];
1055                 } while(i<length);
1056
1057                 cnv->charErrorBufferLength=(int8_t)j;
1058                 *target=t;
1059                 *err=U_BUFFER_OVERFLOW_ERROR;
1060                 return;
1061             }
1062
1063             /* copy the overflow contents to the target */
1064             *t++=overflow[i++];
1065             if(offsets!=NULL) {
1066                 *offsets++=-1; /* no source index available for old output */
1067             }
1068         } while(i<length);
1069
1070         /* the overflow buffer is completely copied to the target */
1071         cnv->charErrorBufferLength=0;
1072     }
1073
1074     if(!flush && s==sourceLimit && cnv->preFromULength>=0) {
1075         /* the overflow buffer is emptied and there is no new input: we are done */
1076         *target=t;
1077         return;
1078     }
1079
1080     /*
1081      * Do not simply return with a buffer overflow error if
1082      * !flush && t==targetLimit
1083      * because it is possible that the source will not generate any output.
1084      * For example, the skip callback may be called;
1085      * it does not output anything.
1086      */
1087
1088     /* prepare the converter arguments */
1089     args.converter=cnv;
1090     args.flush=flush;
1091     args.offsets=offsets;
1092     args.source=s;
1093     args.sourceLimit=sourceLimit;
1094     args.target=t;
1095     args.targetLimit=targetLimit;
1096     args.size=sizeof(args);
1097
1098     _fromUnicodeWithCallback(&args, err);
1099
1100     *source=args.source;
1101     *target=args.target;
1102 }
1103
1104 /* ucnv_toUnicode() --------------------------------------------------------- */
1105
1106 static void
1107 _toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
1108     UConverterToUnicode toUnicode;
1109     UConverter *cnv;
1110     const char *s;
1111     UChar *t;
1112     int32_t *offsets;
1113     int32_t sourceIndex;
1114     int32_t errorInputLength;
1115     UBool converterSawEndOfInput, calledCallback;
1116
1117     /* variables for m:n conversion */
1118     char replay[UCNV_EXT_MAX_BYTES];
1119     const char *realSource, *realSourceLimit;
1120     int32_t realSourceIndex;
1121     UBool realFlush;
1122
1123     cnv=pArgs->converter;
1124     s=pArgs->source;
1125     t=pArgs->target;
1126     offsets=pArgs->offsets;
1127
1128     /* get the converter implementation function */
1129     sourceIndex=0;
1130     if(offsets==NULL) {
1131         toUnicode=cnv->sharedData->impl->toUnicode;
1132     } else {
1133         toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets;
1134         if(toUnicode==NULL) {
1135             /* there is no WithOffsets implementation */
1136             toUnicode=cnv->sharedData->impl->toUnicode;
1137             /* we will write -1 for each offset */
1138             sourceIndex=-1;
1139         }
1140     }
1141
1142     if(cnv->preToULength>=0) {
1143         /* normal mode */
1144         realSource=NULL;
1145
1146         /* avoid compiler warnings - not otherwise necessary, and the values do not matter */
1147         realSourceLimit=NULL;
1148         realFlush=FALSE;
1149         realSourceIndex=0;
1150     } else {
1151         /*
1152          * Previous m:n conversion stored source units from a partial match
1153          * and failed to consume all of them.
1154          * We need to "replay" them from a temporary buffer and convert them first.
1155          */
1156         realSource=pArgs->source;
1157         realSourceLimit=pArgs->sourceLimit;
1158         realFlush=pArgs->flush;
1159         realSourceIndex=sourceIndex;
1160
1161         uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1162         pArgs->source=replay;
1163         pArgs->sourceLimit=replay-cnv->preToULength;
1164         pArgs->flush=FALSE;
1165         sourceIndex=-1;
1166
1167         cnv->preToULength=0;
1168     }
1169
1170     /*
1171      * loop for conversion and error handling
1172      *
1173      * loop {
1174      *   convert
1175      *   loop {
1176      *     update offsets
1177      *     handle end of input
1178      *     handle errors/call callback
1179      *   }
1180      * }
1181      */
1182     for(;;) {
1183         if(U_SUCCESS(*err)) {
1184             /* convert */
1185             toUnicode(pArgs, err);
1186
1187             /*
1188              * set a flag for whether the converter
1189              * successfully processed the end of the input
1190              *
1191              * need not check cnv->preToULength==0 because a replay (<0) will cause
1192              * s<sourceLimit before converterSawEndOfInput is checked
1193              */
1194             converterSawEndOfInput=
1195                 (UBool)(U_SUCCESS(*err) &&
1196                         pArgs->flush && pArgs->source==pArgs->sourceLimit &&
1197                         cnv->toULength==0);
1198         } else {
1199             /* handle error from getNextUChar() */
1200             converterSawEndOfInput=FALSE;
1201         }
1202
1203         /* no callback called yet for this iteration */
1204         calledCallback=FALSE;
1205
1206         /* no sourceIndex adjustment for conversion, only for callback output */
1207         errorInputLength=0;
1208
1209         /*
1210          * loop for offsets and error handling
1211          *
1212          * iterates at most 3 times:
1213          * 1. to clean up after the conversion function
1214          * 2. after the callback
1215          * 3. after the callback again if there was truncated input
1216          */
1217         for(;;) {
1218             /* update offsets if we write any */
1219             if(offsets!=NULL) {
1220                 int32_t length=(int32_t)(pArgs->target-t);
1221                 if(length>0) {
1222                     _updateOffsets(offsets, length, sourceIndex, errorInputLength);
1223
1224                     /*
1225                      * if a converter handles offsets and updates the offsets
1226                      * pointer at the end, then pArgs->offset should not change
1227                      * here;
1228                      * however, some converters do not handle offsets at all
1229                      * (sourceIndex<0) or may not update the offsets pointer
1230                      */
1231                     pArgs->offsets=offsets+=length;
1232                 }
1233
1234                 if(sourceIndex>=0) {
1235                     sourceIndex+=(int32_t)(pArgs->source-s);
1236                 }
1237             }
1238
1239             if(cnv->preToULength<0) {
1240                 /*
1241                  * switch the source to new replay units (cannot occur while replaying)
1242                  * after offset handling and before end-of-input and callback handling
1243                  */
1244                 if(realSource==NULL) {
1245                     realSource=pArgs->source;
1246                     realSourceLimit=pArgs->sourceLimit;
1247                     realFlush=pArgs->flush;
1248                     realSourceIndex=sourceIndex;
1249
1250                     uprv_memcpy(replay, cnv->preToU, -cnv->preToULength);
1251                     pArgs->source=replay;
1252                     pArgs->sourceLimit=replay-cnv->preToULength;
1253                     pArgs->flush=FALSE;
1254                     if((sourceIndex+=cnv->preToULength)<0) {
1255                         sourceIndex=-1;
1256                     }
1257
1258                     cnv->preToULength=0;
1259                 } else {
1260                     /* see implementation note before _fromUnicodeWithCallback() */
1261                     U_ASSERT(realSource==NULL);
1262                     *err=U_INTERNAL_PROGRAM_ERROR;
1263                 }
1264             }
1265
1266             /* update pointers */
1267             s=pArgs->source;
1268             t=pArgs->target;
1269
1270             if(U_SUCCESS(*err)) {
1271                 if(s<pArgs->sourceLimit) {
1272                     /*
1273                      * continue with the conversion loop while there is still input left
1274                      * (continue converting by breaking out of only the inner loop)
1275                      */
1276                     break;
1277                 } else if(realSource!=NULL) {
1278                     /* switch back from replaying to the real source and continue */
1279                     pArgs->source=realSource;
1280                     pArgs->sourceLimit=realSourceLimit;
1281                     pArgs->flush=realFlush;
1282                     sourceIndex=realSourceIndex;
1283
1284                     realSource=NULL;
1285                     break;
1286                 } else if(pArgs->flush && cnv->toULength>0) {
1287                     /*
1288                      * the entire input stream is consumed
1289                      * and there is a partial, truncated input sequence left
1290                      */
1291
1292                     /* inject an error and continue with callback handling */
1293                     *err=U_TRUNCATED_CHAR_FOUND;
1294                     calledCallback=FALSE; /* new error condition */
1295                 } else {
1296                     /* input consumed */
1297                     if(pArgs->flush) {
1298                         /*
1299                          * return to the conversion loop once more if the flush
1300                          * flag is set and the conversion function has not
1301                          * successfully processed the end of the input yet
1302                          *
1303                          * (continue converting by breaking out of only the inner loop)
1304                          */
1305                         if(!converterSawEndOfInput) {
1306                             break;
1307                         }
1308
1309                         /* reset the converter without calling the callback function */
1310                         _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1311                     }
1312
1313                     /* done successfully */
1314                     return;
1315                 }
1316             }
1317
1318             /* U_FAILURE(*err) */
1319             {
1320                 UErrorCode e;
1321
1322                 if( calledCallback ||
1323                     (e=*err)==U_BUFFER_OVERFLOW_ERROR ||
1324                     (e!=U_INVALID_CHAR_FOUND &&
1325                      e!=U_ILLEGAL_CHAR_FOUND &&
1326                      e!=U_TRUNCATED_CHAR_FOUND &&
1327                      e!=U_ILLEGAL_ESCAPE_SEQUENCE &&
1328                      e!=U_UNSUPPORTED_ESCAPE_SEQUENCE)
1329                 ) {
1330                     /*
1331                      * the callback did not or cannot resolve the error:
1332                      * set output pointers and return
1333                      *
1334                      * the check for buffer overflow is redundant but it is
1335                      * a high-runner case and hopefully documents the intent
1336                      * well
1337                      *
1338                      * if we were replaying, then the replay buffer must be
1339                      * copied back into the UConverter
1340                      * and the real arguments must be restored
1341                      */
1342                     if(realSource!=NULL) {
1343                         int32_t length;
1344
1345                         U_ASSERT(cnv->preToULength==0);
1346
1347                         length=(int32_t)(pArgs->sourceLimit-pArgs->source);
1348                         if(length>0) {
1349                             uprv_memcpy(cnv->preToU, pArgs->source, length);
1350                             cnv->preToULength=(int8_t)-length;
1351                         }
1352
1353                         pArgs->source=realSource;
1354                         pArgs->sourceLimit=realSourceLimit;
1355                         pArgs->flush=realFlush;
1356                     }
1357
1358                     return;
1359                 }
1360             }
1361
1362             /* copy toUBytes[] to invalidCharBuffer[] */
1363             errorInputLength=cnv->invalidCharLength=cnv->toULength;
1364             if(errorInputLength>0) {
1365                 uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength);
1366             }
1367
1368             /* set the converter state to deal with the next character */
1369             cnv->toULength=0;
1370
1371             /* call the callback function */
1372             cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs,
1373                 cnv->invalidCharBuffer, errorInputLength,
1374                 (*err==U_INVALID_CHAR_FOUND || *err==U_UNSUPPORTED_ESCAPE_SEQUENCE) ?
1375                     UCNV_UNASSIGNED : UCNV_ILLEGAL,
1376                 err);
1377
1378             /*
1379              * loop back to the offset handling
1380              *
1381              * this flag will indicate after offset handling
1382              * that a callback was called;
1383              * if the callback did not resolve the error, then we return
1384              */
1385             calledCallback=TRUE;
1386         }
1387     }
1388 }
1389
1390 U_CAPI void U_EXPORT2
1391 ucnv_toUnicode(UConverter *cnv,
1392                UChar **target, const UChar *targetLimit,
1393                const char **source, const char *sourceLimit,
1394                int32_t *offsets,
1395                UBool flush,
1396                UErrorCode *err) {
1397     UConverterToUnicodeArgs args;
1398     const char *s;
1399     UChar *t;
1400
1401     /* check parameters */
1402     if(err==NULL || U_FAILURE(*err)) {
1403         return;
1404     }
1405
1406     if(cnv==NULL || target==NULL || source==NULL) {
1407         *err=U_ILLEGAL_ARGUMENT_ERROR;
1408         return;
1409     }
1410
1411     s=*source;
1412     t=*target;
1413     if(sourceLimit<s || targetLimit<t) {
1414         *err=U_ILLEGAL_ARGUMENT_ERROR;
1415         return;
1416     }
1417
1418     /*
1419      * Make sure that the buffer sizes do not exceed the number range for
1420      * int32_t because some functions use the size (in units or bytes)
1421      * rather than comparing pointers, and because offsets are int32_t values.
1422      *
1423      * size_t is guaranteed to be unsigned and large enough for the job.
1424      *
1425      * Return with an error instead of adjusting the limits because we would
1426      * not be able to maintain the semantics that either the source must be
1427      * consumed or the target filled (unless an error occurs).
1428      * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1429      */
1430     if(
1431         ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) ||
1432         ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t)
1433     ) {
1434         *err=U_ILLEGAL_ARGUMENT_ERROR;
1435         return;
1436     }
1437
1438     /* flush the target overflow buffer */
1439     if(cnv->UCharErrorBufferLength>0) {
1440         UChar *overflow;
1441         int32_t i, length;
1442
1443         overflow=cnv->UCharErrorBuffer;
1444         length=cnv->UCharErrorBufferLength;
1445         i=0;
1446         do {
1447             if(t==targetLimit) {
1448                 /* the overflow buffer contains too much, keep the rest */
1449                 int32_t j=0;
1450
1451                 do {
1452                     overflow[j++]=overflow[i++];
1453                 } while(i<length);
1454
1455                 cnv->UCharErrorBufferLength=(int8_t)j;
1456                 *target=t;
1457                 *err=U_BUFFER_OVERFLOW_ERROR;
1458                 return;
1459             }
1460
1461             /* copy the overflow contents to the target */
1462             *t++=overflow[i++];
1463             if(offsets!=NULL) {
1464                 *offsets++=-1; /* no source index available for old output */
1465             }
1466         } while(i<length);
1467
1468         /* the overflow buffer is completely copied to the target */
1469         cnv->UCharErrorBufferLength=0;
1470     }
1471
1472     if(!flush && s==sourceLimit && cnv->preToULength>=0) {
1473         /* the overflow buffer is emptied and there is no new input: we are done */
1474         *target=t;
1475         return;
1476     }
1477
1478     /*
1479      * Do not simply return with a buffer overflow error if
1480      * !flush && t==targetLimit
1481      * because it is possible that the source will not generate any output.
1482      * For example, the skip callback may be called;
1483      * it does not output anything.
1484      */
1485
1486     /* prepare the converter arguments */
1487     args.converter=cnv;
1488     args.flush=flush;
1489     args.offsets=offsets;
1490     args.source=s;
1491     args.sourceLimit=sourceLimit;
1492     args.target=t;
1493     args.targetLimit=targetLimit;
1494     args.size=sizeof(args);
1495
1496     _toUnicodeWithCallback(&args, err);
1497
1498     *source=args.source;
1499     *target=args.target;
1500 }
1501
1502 /* ucnv_to/fromUChars() ----------------------------------------------------- */
1503
1504 U_CAPI int32_t U_EXPORT2
1505 ucnv_fromUChars(UConverter *cnv,
1506                 char *dest, int32_t destCapacity,
1507                 const UChar *src, int32_t srcLength,
1508                 UErrorCode *pErrorCode) {
1509     const UChar *srcLimit;
1510     char *originalDest, *destLimit;
1511     int32_t destLength;
1512
1513     /* check arguments */
1514     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1515         return 0;
1516     }
1517
1518     if( cnv==NULL ||
1519         destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1520         srcLength<-1 || (srcLength!=0 && src==NULL)
1521     ) {
1522         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1523         return 0;
1524     }
1525
1526     /* initialize */
1527     ucnv_resetFromUnicode(cnv);
1528     originalDest=dest;
1529     if(srcLength==-1) {
1530         srcLength=u_strlen(src);
1531     }
1532     if(srcLength>0) {
1533         srcLimit=src+srcLength;
1534         destLimit=dest+destCapacity;
1535
1536         /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1537         if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1538             destLimit=(char *)U_MAX_PTR(dest);
1539         }
1540
1541         /* perform the conversion */
1542         ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1543         destLength=(int32_t)(dest-originalDest);
1544
1545         /* if an overflow occurs, then get the preflighting length */
1546         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1547             char buffer[1024];
1548
1549             destLimit=buffer+sizeof(buffer);
1550             do {
1551                 dest=buffer;
1552                 *pErrorCode=U_ZERO_ERROR;
1553                 ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1554                 destLength+=(int32_t)(dest-buffer);
1555             } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1556         }
1557     } else {
1558         destLength=0;
1559     }
1560
1561     return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode);
1562 }
1563
1564 U_CAPI int32_t U_EXPORT2
1565 ucnv_toUChars(UConverter *cnv,
1566               UChar *dest, int32_t destCapacity,
1567               const char *src, int32_t srcLength,
1568               UErrorCode *pErrorCode) {
1569     const char *srcLimit;
1570     UChar *originalDest, *destLimit;
1571     int32_t destLength;
1572
1573     /* check arguments */
1574     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1575         return 0;
1576     }
1577
1578     if( cnv==NULL ||
1579         destCapacity<0 || (destCapacity>0 && dest==NULL) ||
1580         srcLength<-1 || (srcLength!=0 && src==NULL))
1581     {
1582         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1583         return 0;
1584     }
1585
1586     /* initialize */
1587     ucnv_resetToUnicode(cnv);
1588     originalDest=dest;
1589     if(srcLength==-1) {
1590         srcLength=uprv_strlen(src);
1591     }
1592     if(srcLength>0) {
1593         srcLimit=src+srcLength;
1594         destLimit=dest+destCapacity;
1595
1596         /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */
1597         if(destLimit<dest || (destLimit==NULL && dest!=NULL)) {
1598             destLimit=(UChar *)U_MAX_PTR(dest);
1599         }
1600
1601         /* perform the conversion */
1602         ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1603         destLength=(int32_t)(dest-originalDest);
1604
1605         /* if an overflow occurs, then get the preflighting length */
1606         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR)
1607         {
1608             UChar buffer[1024];
1609
1610             destLimit=buffer+sizeof(buffer)/U_SIZEOF_UCHAR;
1611             do {
1612                 dest=buffer;
1613                 *pErrorCode=U_ZERO_ERROR;
1614                 ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode);
1615                 destLength+=(int32_t)(dest-buffer);
1616             }
1617             while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
1618         }
1619     } else {
1620         destLength=0;
1621     }
1622
1623     return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode);
1624 }
1625
1626 /* ucnv_getNextUChar() ------------------------------------------------------ */
1627
1628 U_CAPI UChar32 U_EXPORT2
1629 ucnv_getNextUChar(UConverter *cnv,
1630                   const char **source, const char *sourceLimit,
1631                   UErrorCode *err) {
1632     UConverterToUnicodeArgs args;
1633     UChar buffer[U16_MAX_LENGTH];
1634     const char *s;
1635     UChar32 c;
1636     int32_t i, length;
1637
1638     /* check parameters */
1639     if(err==NULL || U_FAILURE(*err)) {
1640         return 0xffff;
1641     }
1642
1643     if(cnv==NULL || source==NULL) {
1644         *err=U_ILLEGAL_ARGUMENT_ERROR;
1645         return 0xffff;
1646     }
1647
1648     s=*source;
1649     if(sourceLimit<s) {
1650         *err=U_ILLEGAL_ARGUMENT_ERROR;
1651         return 0xffff;
1652     }
1653
1654     /*
1655      * Make sure that the buffer sizes do not exceed the number range for
1656      * int32_t because some functions use the size (in units or bytes)
1657      * rather than comparing pointers, and because offsets are int32_t values.
1658      *
1659      * size_t is guaranteed to be unsigned and large enough for the job.
1660      *
1661      * Return with an error instead of adjusting the limits because we would
1662      * not be able to maintain the semantics that either the source must be
1663      * consumed or the target filled (unless an error occurs).
1664      * An adjustment would be sourceLimit=t+0x7fffffff; for example.
1665      */
1666     if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) {
1667         *err=U_ILLEGAL_ARGUMENT_ERROR;
1668         return 0xffff;
1669     }
1670
1671     c=U_SENTINEL;
1672
1673     /* flush the target overflow buffer */
1674     if(cnv->UCharErrorBufferLength>0) {
1675         UChar *overflow;
1676
1677         overflow=cnv->UCharErrorBuffer;
1678         i=0;
1679         length=cnv->UCharErrorBufferLength;
1680         U16_NEXT(overflow, i, length, c);
1681
1682         /* move the remaining overflow contents up to the beginning */
1683         if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) {
1684             uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i,
1685                          cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1686         }
1687
1688         if(!U16_IS_LEAD(c) || i<length) {
1689             return c;
1690         }
1691         /*
1692          * Continue if the overflow buffer contained only a lead surrogate,
1693          * in case the converter outputs single surrogates from complete
1694          * input sequences.
1695          */
1696     }
1697
1698     /*
1699      * flush==TRUE is implied for ucnv_getNextUChar()
1700      *
1701      * do not simply return even if s==sourceLimit because the converter may
1702      * not have seen flush==TRUE before
1703      */
1704
1705     /* prepare the converter arguments */
1706     args.converter=cnv;
1707     args.flush=TRUE;
1708     args.offsets=NULL;
1709     args.source=s;
1710     args.sourceLimit=sourceLimit;
1711     args.target=buffer;
1712     args.targetLimit=buffer+1;
1713     args.size=sizeof(args);
1714
1715     if(c<0) {
1716         /*
1717          * call the native getNextUChar() implementation if we are
1718          * at a character boundary (toULength==0)
1719          *
1720          * unlike with _toUnicode(), getNextUChar() implementations must set
1721          * U_TRUNCATED_CHAR_FOUND for truncated input,
1722          * in addition to setting toULength/toUBytes[]
1723          */
1724         if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) {
1725             c=cnv->sharedData->impl->getNextUChar(&args, err);
1726             *source=s=args.source;
1727             if(*err==U_INDEX_OUTOFBOUNDS_ERROR) {
1728                 /* reset the converter without calling the callback function */
1729                 _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE);
1730                 return 0xffff; /* no output */
1731             } else if(U_SUCCESS(*err) && c>=0) {
1732                 return c;
1733             /*
1734              * else fall through to use _toUnicode() because
1735              *   UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all
1736              *   U_FAILURE: call _toUnicode() for callback handling (do not output c)
1737              */
1738             }
1739         }
1740
1741         /* convert to one UChar in buffer[0], or handle getNextUChar() errors */
1742         _toUnicodeWithCallback(&args, err);
1743
1744         if(*err==U_BUFFER_OVERFLOW_ERROR) {
1745             *err=U_ZERO_ERROR;
1746         }
1747
1748         i=0;
1749         length=(int32_t)(args.target-buffer);
1750     } else {
1751         /* write the lead surrogate from the overflow buffer */
1752         buffer[0]=(UChar)c;
1753         args.target=buffer+1;
1754         i=0;
1755         length=1;
1756     }
1757
1758     /* buffer contents starts at i and ends before length */
1759
1760     if(U_FAILURE(*err)) {
1761         c=0xffff; /* no output */
1762     } else if(length==0) {
1763         /* no input or only state changes */
1764         *err=U_INDEX_OUTOFBOUNDS_ERROR;
1765         /* no need to reset explicitly because _toUnicodeWithCallback() did it */
1766         c=0xffff; /* no output */
1767     } else {
1768         c=buffer[0];
1769         i=1;
1770         if(!U16_IS_LEAD(c)) {
1771             /* consume c=buffer[0], done */
1772         } else {
1773             /* got a lead surrogate, see if a trail surrogate follows */
1774             UChar c2;
1775
1776             if(cnv->UCharErrorBufferLength>0) {
1777                 /* got overflow output from the conversion */
1778                 if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) {
1779                     /* got a trail surrogate, too */
1780                     c=U16_GET_SUPPLEMENTARY(c, c2);
1781
1782                     /* move the remaining overflow contents up to the beginning */
1783                     if((--cnv->UCharErrorBufferLength)>0) {
1784                         uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1,
1785                                      cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR);
1786                     }
1787                 } else {
1788                     /* c is an unpaired lead surrogate, just return it */
1789                 }
1790             } else if(args.source<sourceLimit) {
1791                 /* convert once more, to buffer[1] */
1792                 args.targetLimit=buffer+2;
1793                 _toUnicodeWithCallback(&args, err);
1794                 if(*err==U_BUFFER_OVERFLOW_ERROR) {
1795                     *err=U_ZERO_ERROR;
1796                 }
1797
1798                 length=(int32_t)(args.target-buffer);
1799                 if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) {
1800                     /* got a trail surrogate, too */
1801                     c=U16_GET_SUPPLEMENTARY(c, c2);
1802                     i=2;
1803                 }
1804             }
1805         }
1806     }
1807
1808     /*
1809      * move leftover output from buffer[i..length[
1810      * into the beginning of the overflow buffer
1811      */
1812     if(i<length) {
1813         /* move further overflow back */
1814         int32_t delta=length-i;
1815         if((length=cnv->UCharErrorBufferLength)>0) {
1816             uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer,
1817                          length*U_SIZEOF_UCHAR);
1818         }
1819         cnv->UCharErrorBufferLength=(int8_t)(length+delta);
1820
1821         cnv->UCharErrorBuffer[0]=buffer[i++];
1822         if(delta>1) {
1823             cnv->UCharErrorBuffer[1]=buffer[i];
1824         }
1825     }
1826
1827     *source=args.source;
1828     return c;
1829 }
1830
1831 /* ucnv_convert() and siblings ---------------------------------------------- */
1832
1833 U_CAPI void U_EXPORT2
1834 ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
1835                char **target, const char *targetLimit,
1836                const char **source, const char *sourceLimit,
1837                UChar *pivotStart, UChar **pivotSource,
1838                UChar **pivotTarget, const UChar *pivotLimit,
1839                UBool reset, UBool flush,
1840                UErrorCode *pErrorCode) {
1841     UChar pivotBuffer[CHUNK_SIZE];
1842     UChar *myPivotSource, *myPivotTarget;
1843
1844     /* error checking */
1845     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1846         return;
1847     }
1848
1849     if( targetCnv==NULL || sourceCnv==NULL ||
1850         source==NULL || *source==NULL ||
1851         target==NULL || *target==NULL || targetLimit==NULL
1852     ) {
1853         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1854         return;
1855     }
1856
1857     if(pivotStart==NULL) {
1858         /* use the stack pivot buffer */
1859         pivotStart=myPivotSource=myPivotTarget=pivotBuffer;
1860         pivotSource=&myPivotSource;
1861         pivotTarget=&myPivotTarget;
1862         pivotLimit=pivotBuffer+CHUNK_SIZE;
1863     } else if(  pivotStart>=pivotLimit ||
1864                 pivotSource==NULL || *pivotSource==NULL ||
1865                 pivotTarget==NULL || *pivotTarget==NULL ||
1866                 pivotLimit==NULL
1867     ) {
1868         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1869         return;
1870     }
1871
1872     if(sourceLimit==NULL) {
1873         /* get limit of single-byte-NUL-terminated source string */
1874         sourceLimit=uprv_strchr(*source, 0);
1875     }
1876
1877     if(reset) {
1878         ucnv_resetToUnicode(sourceCnv);
1879         ucnv_resetFromUnicode(targetCnv);
1880         *pivotTarget=*pivotSource=pivotStart;
1881     }
1882
1883     /* conversion loop */
1884     for(;;) {
1885         if(reset) {
1886             /*
1887              * if we did a reset in this function, we know that there is nothing
1888              * to convert to the target yet, so we save a function call
1889              */
1890             reset=FALSE;
1891         } else {
1892             /*
1893              * convert to the target first in case the pivot is filled at entry
1894              * or the targetCnv has some output bytes in its state
1895              */
1896             ucnv_fromUnicode(targetCnv,
1897                              target, targetLimit,
1898                              (const UChar **)pivotSource, *pivotTarget,
1899                              NULL,
1900                              (UBool)(flush && *source==sourceLimit),
1901                              pErrorCode);
1902             if(U_FAILURE(*pErrorCode)) {
1903                 break;
1904             }
1905
1906             /* ucnv_fromUnicode() must have consumed the pivot contents since it returned with U_SUCCESS() */
1907             *pivotSource=*pivotTarget=pivotStart;
1908         }
1909
1910         /* convert from the source to the pivot */
1911         ucnv_toUnicode(sourceCnv,
1912                        pivotTarget, pivotLimit,
1913                        source, sourceLimit,
1914                        NULL,
1915                        flush,
1916                        pErrorCode);
1917         if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
1918             /* pivot overflow: continue with the conversion loop */
1919             *pErrorCode=U_ZERO_ERROR;
1920         } else if(U_FAILURE(*pErrorCode) || *pivotTarget==pivotStart) {
1921             /* conversion error, or there was nothing left to convert */
1922             break;
1923         }
1924         /* else ucnv_toUnicode() wrote into the pivot buffer: continue */
1925     }
1926
1927     /*
1928      * The conversion loop is exited when one of the following is true:
1929      * - the entire source text has been converted successfully to the target buffer
1930      * - a target buffer overflow occurred
1931      * - a conversion error occurred
1932      */
1933
1934     /* terminate the target buffer if possible */
1935     if(flush && U_SUCCESS(*pErrorCode)) {
1936         if(*target!=targetLimit) {
1937             **target=0;
1938             if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {
1939                 *pErrorCode=U_ZERO_ERROR;
1940             }
1941         } else {
1942             *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
1943         }
1944     }
1945 }
1946
1947 /* internal implementation of ucnv_convert() etc. with preflighting */
1948 static int32_t
1949 ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter,
1950                      char *target, int32_t targetCapacity,
1951                      const char *source, int32_t sourceLength,
1952                      UErrorCode *pErrorCode) {
1953     UChar pivotBuffer[CHUNK_SIZE];
1954     UChar *pivot, *pivot2;
1955
1956     char *myTarget;
1957     const char *sourceLimit;
1958     const char *targetLimit;
1959     int32_t targetLength=0;
1960
1961     /* set up */
1962     if(sourceLength<0) {
1963         sourceLimit=uprv_strchr(source, 0);
1964     } else {
1965         sourceLimit=source+sourceLength;
1966     }
1967
1968     /* if there is no input data, we're done */
1969     if(source==sourceLimit) {
1970         return u_terminateChars(target, targetCapacity, 0, pErrorCode);
1971     }
1972
1973     pivot=pivot2=pivotBuffer;
1974     myTarget=target;
1975     targetLength=0;
1976
1977     if(targetCapacity>0) {
1978         /* perform real conversion */
1979         targetLimit=target+targetCapacity;
1980         ucnv_convertEx(outConverter, inConverter,
1981                        &myTarget, targetLimit,
1982                        &source, sourceLimit,
1983                        pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
1984                        FALSE,
1985                        TRUE,
1986                        pErrorCode);
1987         targetLength=myTarget-target;
1988     }
1989
1990     /*
1991      * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing
1992      * to it but continue the conversion in order to store in targetCapacity
1993      * the number of bytes that was required.
1994      */
1995     if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0)
1996     {
1997         char targetBuffer[CHUNK_SIZE];
1998
1999         targetLimit=targetBuffer+CHUNK_SIZE;
2000         do {
2001             *pErrorCode=U_ZERO_ERROR;
2002             myTarget=targetBuffer;
2003             ucnv_convertEx(outConverter, inConverter,
2004                            &myTarget, targetLimit,
2005                            &source, sourceLimit,
2006                            pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE,
2007                            FALSE,
2008                            TRUE,
2009                            pErrorCode);
2010             targetLength+=(myTarget-targetBuffer);
2011         } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR);
2012
2013         /* done with preflighting, set warnings and errors as appropriate */
2014         return u_terminateChars(target, targetCapacity, targetLength, pErrorCode);
2015     }
2016
2017     /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */
2018     return targetLength;
2019 }
2020
2021 U_CAPI int32_t U_EXPORT2
2022 ucnv_convert(const char *toConverterName, const char *fromConverterName,
2023              char *target, int32_t targetCapacity,
2024              const char *source, int32_t sourceLength,
2025              UErrorCode *pErrorCode) {
2026     UConverter in, out; /* stack-allocated */
2027     UConverter *inConverter, *outConverter;
2028     int32_t targetLength;
2029
2030     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2031         return 0;
2032     }
2033
2034     if( source==NULL || sourceLength<-1 ||
2035         targetCapacity<0 || (targetCapacity>0 && target==NULL)
2036     ) {
2037         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2038         return 0;
2039     }
2040
2041     /* if there is no input data, we're done */
2042     if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2043         return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2044     }
2045
2046     /* create the converters */
2047     inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode);
2048     if(U_FAILURE(*pErrorCode)) {
2049         return 0;
2050     }
2051
2052     outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode);
2053     if(U_FAILURE(*pErrorCode)) {
2054         ucnv_close(inConverter);
2055         return 0;
2056     }
2057
2058     targetLength=ucnv_internalConvert(outConverter, inConverter,
2059                                       target, targetCapacity,
2060                                       source, sourceLength,
2061                                       pErrorCode);
2062
2063     ucnv_close(inConverter);
2064     ucnv_close(outConverter);
2065
2066     return targetLength;
2067 }
2068
2069 /* @internal */
2070 static int32_t
2071 ucnv_convertAlgorithmic(UBool convertToAlgorithmic,
2072                         UConverterType algorithmicType,
2073                         UConverter *cnv,
2074                         char *target, int32_t targetCapacity,
2075                         const char *source, int32_t sourceLength,
2076                         UErrorCode *pErrorCode) {
2077     UConverter algoConverterStatic; /* stack-allocated */
2078     UConverter *algoConverter, *to, *from;
2079     int32_t targetLength;
2080
2081     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
2082         return 0;
2083     }
2084
2085     if( cnv==NULL || source==NULL || sourceLength<-1 ||
2086         targetCapacity<0 || (targetCapacity>0 && target==NULL)
2087     ) {
2088         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2089         return 0;
2090     }
2091
2092     /* if there is no input data, we're done */
2093     if(sourceLength==0 || (sourceLength<0 && *source==0)) {
2094         return u_terminateChars(target, targetCapacity, 0, pErrorCode);
2095     }
2096
2097     /* create the algorithmic converter */
2098     algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType,
2099                                                   "", 0, pErrorCode);
2100     if(U_FAILURE(*pErrorCode)) {
2101         return 0;
2102     }
2103
2104     /* reset the other converter */
2105     if(convertToAlgorithmic) {
2106         /* cnv->Unicode->algo */
2107         ucnv_resetToUnicode(cnv);
2108         to=algoConverter;
2109         from=cnv;
2110     } else {
2111         /* algo->Unicode->cnv */
2112         ucnv_resetFromUnicode(cnv);
2113         from=algoConverter;
2114         to=cnv;
2115     }
2116
2117     targetLength=ucnv_internalConvert(to, from,
2118                                       target, targetCapacity,
2119                                       source, sourceLength,
2120                                       pErrorCode);
2121
2122     ucnv_close(algoConverter);
2123
2124     return targetLength;
2125 }
2126
2127 U_CAPI int32_t U_EXPORT2
2128 ucnv_toAlgorithmic(UConverterType algorithmicType,
2129                    UConverter *cnv,
2130                    char *target, int32_t targetCapacity,
2131                    const char *source, int32_t sourceLength,
2132                    UErrorCode *pErrorCode) {
2133     return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv,
2134                                    target, targetCapacity,
2135                                    source, sourceLength,
2136                                    pErrorCode);
2137 }
2138
2139 U_CAPI int32_t U_EXPORT2
2140 ucnv_fromAlgorithmic(UConverter *cnv,
2141                      UConverterType algorithmicType,
2142                      char *target, int32_t targetCapacity,
2143                      const char *source, int32_t sourceLength,
2144                      UErrorCode *pErrorCode) {
2145     return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv,
2146                                    target, targetCapacity,
2147                                    source, sourceLength,
2148                                    pErrorCode);
2149 }
2150
2151 U_CAPI UConverterType  U_EXPORT2
2152 ucnv_getType(const UConverter* converter)
2153 {
2154     int8_t type = converter->sharedData->staticData->conversionType;
2155 #if !UCONFIG_NO_LEGACY_CONVERSION
2156     if(type == UCNV_MBCS) {
2157         return ucnv_MBCSGetType(converter);
2158     }
2159 #endif
2160     return (UConverterType)type;
2161 }
2162
2163 U_CAPI void  U_EXPORT2
2164 ucnv_getStarters(const UConverter* converter,
2165                  UBool starters[256],
2166                  UErrorCode* err)
2167 {
2168     if (err == NULL || U_FAILURE(*err)) {
2169         return;
2170     }
2171
2172     if(converter->sharedData->impl->getStarters != NULL) {
2173         converter->sharedData->impl->getStarters(converter, starters, err);
2174     } else {
2175         *err = U_ILLEGAL_ARGUMENT_ERROR;
2176     }
2177 }
2178
2179 static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv)
2180 {
2181     UErrorCode errorCode;
2182     const char *name;
2183     int32_t i;
2184
2185     if(cnv==NULL) {
2186         return NULL;
2187     }
2188
2189     errorCode=U_ZERO_ERROR;
2190     name=ucnv_getName(cnv, &errorCode);
2191     if(U_FAILURE(errorCode)) {
2192         return NULL;
2193     }
2194
2195     for(i=0; i<(int32_t)(sizeof(ambiguousConverters)/sizeof(UAmbiguousConverter)); ++i)
2196     {
2197         if(0==uprv_strcmp(name, ambiguousConverters[i].name))
2198         {
2199             return ambiguousConverters+i;
2200         }
2201     }
2202
2203     return NULL;
2204 }
2205
2206 U_CAPI void  U_EXPORT2
2207 ucnv_fixFileSeparator(const UConverter *cnv,
2208                       UChar* source,
2209                       int32_t sourceLength) {
2210     const UAmbiguousConverter *a;
2211     int32_t i;
2212     UChar variant5c;
2213
2214     if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL)
2215     {
2216         return;
2217     }
2218
2219     variant5c=a->variant5c;
2220     for(i=0; i<sourceLength; ++i) {
2221         if(source[i]==variant5c) {
2222             source[i]=0x5c;
2223         }
2224     }
2225 }
2226
2227 U_CAPI UBool  U_EXPORT2
2228 ucnv_isAmbiguous(const UConverter *cnv) {
2229     return (UBool)(ucnv_getAmbiguous(cnv)!=NULL);
2230 }
2231
2232 U_CAPI void  U_EXPORT2
2233 ucnv_setFallback(UConverter *cnv, UBool usesFallback)
2234 {
2235     cnv->useFallback = usesFallback;
2236 }
2237
2238 U_CAPI UBool  U_EXPORT2
2239 ucnv_usesFallback(const UConverter *cnv)
2240 {
2241     return cnv->useFallback;
2242 }
2243
2244 U_CAPI void  U_EXPORT2
2245 ucnv_getInvalidChars (const UConverter * converter,
2246                       char *errBytes,
2247                       int8_t * len,
2248                       UErrorCode * err)
2249 {
2250     if (err == NULL || U_FAILURE(*err))
2251     {
2252         return;
2253     }
2254     if (len == NULL || errBytes == NULL || converter == NULL)
2255     {
2256         *err = U_ILLEGAL_ARGUMENT_ERROR;
2257         return;
2258     }
2259     if (*len < converter->invalidCharLength)
2260     {
2261         *err = U_INDEX_OUTOFBOUNDS_ERROR;
2262         return;
2263     }
2264     if ((*len = converter->invalidCharLength) > 0)
2265     {
2266         uprv_memcpy (errBytes, converter->invalidCharBuffer, *len);
2267     }
2268 }
2269
2270 U_CAPI void  U_EXPORT2
2271 ucnv_getInvalidUChars (const UConverter * converter,
2272                        UChar *errChars,
2273                        int8_t * len,
2274                        UErrorCode * err)
2275 {
2276     if (err == NULL || U_FAILURE(*err))
2277     {
2278         return;
2279     }
2280     if (len == NULL || errChars == NULL || converter == NULL)
2281     {
2282         *err = U_ILLEGAL_ARGUMENT_ERROR;
2283         return;
2284     }
2285     if (*len < converter->invalidUCharLength)
2286     {
2287         *err = U_INDEX_OUTOFBOUNDS_ERROR;
2288         return;
2289     }
2290     if ((*len = converter->invalidUCharLength) > 0)
2291     {
2292         uprv_memcpy (errChars, converter->invalidUCharBuffer, sizeof(UChar) * (*len));
2293     }
2294 }
2295
2296 #define SIG_MAX_LEN 5
2297
2298 U_CAPI const char* U_EXPORT2
2299 ucnv_detectUnicodeSignature( const char* source,
2300                              int32_t sourceLength,
2301                              int32_t* signatureLength,
2302                              UErrorCode* pErrorCode) {
2303     int32_t dummy;
2304
2305     /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN
2306      * bytes we don't misdetect something
2307      */
2308     char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' };
2309     int i = 0;
2310
2311     if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){
2312         return NULL;
2313     }
2314
2315     if(source == NULL || sourceLength < -1){
2316         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
2317         return NULL;
2318     }
2319
2320     if(signatureLength == NULL) {
2321         signatureLength = &dummy;
2322     }
2323
2324     if(sourceLength==-1){
2325         sourceLength=uprv_strlen(source);
2326     }
2327
2328
2329     while(i<sourceLength&& i<SIG_MAX_LEN){
2330         start[i]=source[i];
2331         i++;
2332     }
2333
2334     if(start[0] == '\xFE' && start[1] == '\xFF') {
2335         *signatureLength=2;
2336         return  "UTF-16BE";
2337     } else if(start[0] == '\xFF' && start[1] == '\xFE') {
2338         if(start[2] == '\x00' && start[3] =='\x00') {
2339             *signatureLength=4;
2340             return "UTF-32LE";
2341         } else {
2342             *signatureLength=2;
2343             return  "UTF-16LE";
2344         }
2345     } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') {
2346         *signatureLength=3;
2347         return  "UTF-8";
2348     } else if(start[0] == '\x00' && start[1] == '\x00' &&
2349               start[2] == '\xFE' && start[3]=='\xFF') {
2350         *signatureLength=4;
2351         return  "UTF-32BE";
2352     } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') {
2353         *signatureLength=3;
2354         return "SCSU";
2355     } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') {
2356         *signatureLength=3;
2357         return "BOCU-1";
2358     } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') {
2359         /*
2360          * UTF-7: Initial U+FEFF is encoded as +/v8  or  +/v9  or  +/v+  or  +/v/
2361          * depending on the second UTF-16 code unit.
2362          * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF
2363          * if it occurs.
2364          *
2365          * So far we have +/v
2366          */
2367         if(start[3] == '\x38' && start[4] == '\x2D') {
2368             /* 5 bytes +/v8- */
2369             *signatureLength=5;
2370             return "UTF-7";
2371         } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') {
2372             /* 4 bytes +/v8  or  +/v9  or  +/v+  or  +/v/ */
2373             *signatureLength=4;
2374             return "UTF-7";
2375         }
2376     }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){
2377         *signatureLength=4;
2378         return "UTF-EBCDIC";
2379     }
2380
2381
2382     /* no known Unicode signature byte sequence recognized */
2383     *signatureLength=0;
2384     return NULL;
2385 }
2386
2387 #endif
2388
2389 /*
2390  * Hey, Emacs, please set the following:
2391  *
2392  * Local Variables:
2393  * indent-tabs-mode: nil
2394  * End:
2395  *
2396  */