icuSources/common/ucnvlat1.c

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 2000-2003, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *   file name:  ucnvlat1.cpp
   7 *   encoding:   US-ASCII
   8 *   tab size:   8 (not used)
   9 *   indentation:4
  10 *
  11 *   created on: 2000feb07
  12 *   created by: Markus W. Scherer
  13 */
  14
  15 #include "unicode/utypes.h"
  16 #include "unicode/ucnv.h"
  17 #include "unicode/ucnv_err.h"
  18 #include "unicode/uset.h"
  19 #include "ucnv_bld.h"
  20 #include "ucnv_cnv.h"
  21
  22 /* control optimizations according to the platform */
  23 #define LATIN1_UNROLL_TO_UNICODE 1
  24 #define LATIN1_UNROLL_FROM_UNICODE 1
  25 #define ASCII_UNROLL_TO_UNICODE 1
  26
  27 /* ISO 8859-1 --------------------------------------------------------------- */
  28
  29 /* This is a table-less and callback-less version of _MBCSSingleToBMPWithOffsets(). */
  30 static void
  31 _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
  32                             UErrorCode *pErrorCode) {
  33     const uint8_t *source;
  34     UChar *target;
  35     int32_t targetCapacity, length;
  36     int32_t *offsets;
  37
  38     int32_t sourceIndex;
  39
  40     /* set up the local pointers */
  41     source=(const uint8_t *)pArgs->source;
  42     target=pArgs->target;
  43     targetCapacity=pArgs->targetLimit-pArgs->target;
  44     offsets=pArgs->offsets;
  45
  46     sourceIndex=0;
  47
  48     /*
  49      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
  50      * for the minimum of the sourceLength and targetCapacity
  51      */
  52     length=(const uint8_t *)pArgs->sourceLimit-source;
  53     if(length<=targetCapacity) {
  54         targetCapacity=length;
  55     } else {
  56         /* target will be full */
  57         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  58         length=targetCapacity;
  59     }
  60
  61 #if LATIN1_UNROLL_TO_UNICODE
  62     if(targetCapacity>=16) {
  63         int32_t count, loops;
  64
  65         loops=count=targetCapacity>>4;
  66         length=targetCapacity&=0xf;
  67         do {
  68             *target++=*source++;
  69             *target++=*source++;
  70             *target++=*source++;
  71             *target++=*source++;
  72             *target++=*source++;
  73             *target++=*source++;
  74             *target++=*source++;
  75             *target++=*source++;
  76             *target++=*source++;
  77             *target++=*source++;
  78             *target++=*source++;
  79             *target++=*source++;
  80             *target++=*source++;
  81             *target++=*source++;
  82             *target++=*source++;
  83             *target++=*source++;
  84         } while(--count>0);
  85
  86         if(offsets!=NULL) {
  87             do {
  88                 *offsets++=sourceIndex++;
  89                 *offsets++=sourceIndex++;
  90                 *offsets++=sourceIndex++;
  91                 *offsets++=sourceIndex++;
  92                 *offsets++=sourceIndex++;
  93                 *offsets++=sourceIndex++;
  94                 *offsets++=sourceIndex++;
  95                 *offsets++=sourceIndex++;
  96                 *offsets++=sourceIndex++;
  97                 *offsets++=sourceIndex++;
  98                 *offsets++=sourceIndex++;
  99                 *offsets++=sourceIndex++;
 100                 *offsets++=sourceIndex++;
 101                 *offsets++=sourceIndex++;
 102                 *offsets++=sourceIndex++;
 103                 *offsets++=sourceIndex++;
 104             } while(--loops>0);
 105         }
 106     }
 107 #endif
 108
 109     /* conversion loop */
 110     while(targetCapacity>0) {
 111         *target++=*source++;
 112         --targetCapacity;
 113     }
 114
 115     /* write back the updated pointers */
 116     pArgs->source=(const char *)source;
 117     pArgs->target=target;
 118
 119     /* set offsets */
 120     if(offsets!=NULL) {
 121         while(length>0) {
 122             *offsets++=sourceIndex++;
 123             --length;
 124         }
 125         pArgs->offsets=offsets;
 126     }
 127 }
 128
 129 /* This is a table-less and callback-less version of _MBCSSingleGetNextUChar(). */
 130 static UChar32
 131 _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
 132                     UErrorCode *pErrorCode) {
 133     const uint8_t *source=(const uint8_t *)pArgs->source;
 134     if(source<(const uint8_t *)pArgs->sourceLimit) {
 135         pArgs->source=(const char *)(source+1);
 136         return *source;
 137     }
 138
 139     /* no output because of empty input */
 140     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 141     return 0xffff;
 142 }
 143
 144 /* This is a table-less version of _MBCSSingleFromBMPWithOffsets(). */
 145 static void
 146 _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
 147                               UErrorCode *pErrorCode) {
 148     UConverter *cnv;
 149     const UChar *source, *sourceLimit, *lastSource;
 150     uint8_t *target;
 151     int32_t targetCapacity, length;
 152     int32_t *offsets;
 153
 154     UChar32 c, max;
 155
 156     int32_t sourceIndex;
 157
 158     UConverterCallbackReason reason;
 159     int32_t i;
 160
 161     /* set up the local pointers */
 162     cnv=pArgs->converter;
 163     source=pArgs->source;
 164     sourceLimit=pArgs->sourceLimit;
 165     target=(uint8_t *)pArgs->target;
 166     targetCapacity=pArgs->targetLimit-pArgs->target;
 167     offsets=pArgs->offsets;
 168
 169     if(cnv->sharedData==&_Latin1Data) {
 170         max=0xff; /* Latin-1 */
 171     } else {
 172         max=0x7f; /* US-ASCII */
 173     }
 174
 175     /* get the converter state from UConverter */
 176     c=cnv->fromUSurrogateLead;
 177
 178     /* sourceIndex=-1 if the current character began in the previous buffer */
 179     sourceIndex= c==0 ? 0 : -1;
 180     lastSource=source;
 181
 182     /*
 183      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
 184      * for the minimum of the sourceLength and targetCapacity
 185      */
 186     length=sourceLimit-source;
 187     if(length<targetCapacity) {
 188         targetCapacity=length;
 189     }
 190
 191     /* conversion loop */
 192     if(c!=0 && targetCapacity>0) {
 193         goto getTrail;
 194     }
 195
 196 #if LATIN1_UNROLL_FROM_UNICODE
 197     /* unroll the loop with the most common case */
 198 unrolled:
 199     if(targetCapacity>=16) {
 200         int32_t count, loops;
 201         UChar u, oredChars;
 202
 203         loops=count=targetCapacity>>4;
 204         do {
 205             oredChars=u=*source++;
 206             *target++=(uint8_t)u;
 207             oredChars|=u=*source++;
 208             *target++=(uint8_t)u;
 209             oredChars|=u=*source++;
 210             *target++=(uint8_t)u;
 211             oredChars|=u=*source++;
 212             *target++=(uint8_t)u;
 213             oredChars|=u=*source++;
 214             *target++=(uint8_t)u;
 215             oredChars|=u=*source++;
 216             *target++=(uint8_t)u;
 217             oredChars|=u=*source++;
 218             *target++=(uint8_t)u;
 219             oredChars|=u=*source++;
 220             *target++=(uint8_t)u;
 221             oredChars|=u=*source++;
 222             *target++=(uint8_t)u;
 223             oredChars|=u=*source++;
 224             *target++=(uint8_t)u;
 225             oredChars|=u=*source++;
 226             *target++=(uint8_t)u;
 227             oredChars|=u=*source++;
 228             *target++=(uint8_t)u;
 229             oredChars|=u=*source++;
 230             *target++=(uint8_t)u;
 231             oredChars|=u=*source++;
 232             *target++=(uint8_t)u;
 233             oredChars|=u=*source++;
 234             *target++=(uint8_t)u;
 235             oredChars|=u=*source++;
 236             *target++=(uint8_t)u;
 237
 238             /* were all 16 entries really valid? */
 239             if(oredChars>max) {
 240                 /* no, return to the first of these 16 */
 241                 source-=16;
 242                 target-=16;
 243                 break;
 244             }
 245         } while(--count>0);
 246         count=loops-count;
 247         targetCapacity-=16*count;
 248
 249         if(offsets!=NULL) {
 250             lastSource+=16*count;
 251             while(count>0) {
 252                 *offsets++=sourceIndex++;
 253                 *offsets++=sourceIndex++;
 254                 *offsets++=sourceIndex++;
 255                 *offsets++=sourceIndex++;
 256                 *offsets++=sourceIndex++;
 257                 *offsets++=sourceIndex++;
 258                 *offsets++=sourceIndex++;
 259                 *offsets++=sourceIndex++;
 260                 *offsets++=sourceIndex++;
 261                 *offsets++=sourceIndex++;
 262                 *offsets++=sourceIndex++;
 263                 *offsets++=sourceIndex++;
 264                 *offsets++=sourceIndex++;
 265                 *offsets++=sourceIndex++;
 266                 *offsets++=sourceIndex++;
 267                 *offsets++=sourceIndex++;
 268                 --count;
 269             }
 270         }
 271
 272         c=0;
 273     }
 274 #endif
 275
 276     while(targetCapacity>0) {
 277         /*
 278          * Get a correct Unicode code point:
 279          * a single UChar for a BMP code point or
 280          * a matched surrogate pair for a "surrogate code point".
 281          */
 282         c=*source++;
 283         if(c<=max) {
 284             /* convert the Unicode code point */
 285             *target++=(uint8_t)c;
 286             --targetCapacity;
 287
 288             /* normal end of conversion: prepare for a new character */
 289             c=0;
 290         } else {
 291             if(!UTF_IS_SURROGATE(c)) {
 292                 /* callback(unassigned) */
 293                 reason=UCNV_UNASSIGNED;
 294                 *pErrorCode=U_INVALID_CHAR_FOUND;
 295             } else if(UTF_IS_SURROGATE_FIRST(c)) {
 296 getTrail:
 297                 if(source<sourceLimit) {
 298                     /* test the following code unit */
 299                     UChar trail=*source;
 300                     if(UTF_IS_SECOND_SURROGATE(trail)) {
 301                         ++source;
 302                         c=UTF16_GET_PAIR_VALUE(c, trail);
 303                         /* this codepage does not map supplementary code points */
 304                         /* callback(unassigned) */
 305                         reason=UCNV_UNASSIGNED;
 306                         *pErrorCode=U_INVALID_CHAR_FOUND;
 307                     } else {
 308                         /* this is an unmatched lead code unit (1st surrogate) */
 309                         /* callback(illegal) */
 310                         reason=UCNV_ILLEGAL;
 311                         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 312                     }
 313                 } else {
 314                     /* no more input */
 315                     break;
 316                 }
 317             } else {
 318                 /* this is an unmatched trail code unit (2nd surrogate) */
 319                 /* callback(illegal) */
 320                 reason=UCNV_ILLEGAL;
 321                 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 322             }
 323
 324             /* call the callback function with all the preparations and post-processing */
 325             /* get the number of code units for c to correctly advance sourceIndex after the callback call */
 326             length=UTF_CHAR_LENGTH(c);
 327
 328             /* set offsets since the start or the last callback */
 329             if(offsets!=NULL) {
 330                 int32_t count=(int32_t)(source-lastSource);
 331
 332                 /* do not set the offset for the callback-causing character */
 333                 count-=length;
 334
 335                 while(count>0) {
 336                     *offsets++=sourceIndex++;
 337                     --count;
 338                 }
 339                 /* offset and sourceIndex are now set for the current character */
 340             }
 341
 342             /* update the arguments structure */
 343             pArgs->source=source;
 344             pArgs->target=(char *)target;
 345             pArgs->offsets=offsets;
 346
 347             /* set the converter state in UConverter to deal with the next character */
 348             cnv->fromUSurrogateLead=0;
 349
 350             /* write the code point as code units */
 351             i=0;
 352             UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c);
 353             cnv->invalidUCharLength=(int8_t)i;
 354             /* i==length */
 355
 356             /* call the callback function */
 357             cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, reason, pErrorCode);
 358
 359             /* get the converter state from UConverter */
 360             c=cnv->fromUSurrogateLead;
 361
 362             /* update target and deal with offsets if necessary */
 363             offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex);
 364             target=(uint8_t *)pArgs->target;
 365
 366             /* update the source pointer and index */
 367             sourceIndex+=length+(pArgs->source-source);
 368             source=lastSource=pArgs->source;
 369             targetCapacity=(uint8_t *)pArgs->targetLimit-target;
 370             length=sourceLimit-source;
 371             if(length<targetCapacity) {
 372                 targetCapacity=length;
 373             }
 374
 375             /*
 376              * If the callback overflowed the target, then we need to
 377              * stop here with an overflow indication.
 378              */
 379             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
 380                 break;
 381             } else if(U_FAILURE(*pErrorCode)) {
 382                 /* break on error */
 383                 c=0;
 384                 break;
 385             } else if(cnv->charErrorBufferLength>0) {
 386                 /* target is full */
 387                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 388                 break;
 389             }
 390
 391 #if LATIN1_UNROLL_FROM_UNICODE
 392             goto unrolled;
 393 #endif
 394         }
 395     }
 396
 397     if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
 398         /* target is full */
 399         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 400     }
 401
 402     /* set offsets since the start or the last callback */
 403     if(offsets!=NULL) {
 404         size_t count=source-lastSource;
 405         while(count>0) {
 406             *offsets++=sourceIndex++;
 407             --count;
 408         }
 409     }
 410
 411     if(pArgs->flush && source>=sourceLimit) {
 412         /* reset the state for the next conversion */
 413         if(c!=0 && U_SUCCESS(*pErrorCode)) {
 414             /* a Unicode code point remains incomplete (only a first surrogate) */
 415             *pErrorCode=U_TRUNCATED_CHAR_FOUND;
 416         }
 417         cnv->fromUSurrogateLead=0;
 418     } else {
 419         /* set the converter state back into UConverter */
 420         cnv->fromUSurrogateLead=(UChar)c;
 421     }
 422
 423     /* write back the updated pointers */
 424     pArgs->source=source;
 425     pArgs->target=(char *)target;
 426     pArgs->offsets=offsets;
 427 }
 428
 429 static void
 430 _Latin1GetUnicodeSet(const UConverter *cnv,
 431                      USet *set,
 432                      UConverterUnicodeSet which,
 433                      UErrorCode *pErrorCode) {
 434     uset_addRange(set, 0, 0xff);
 435 }
 436
 437 static const UConverterImpl _Latin1Impl={
 438     UCNV_LATIN_1,
 439
 440     NULL,
 441     NULL,
 442
 443     NULL,
 444     NULL,
 445     NULL,
 446
 447     _Latin1ToUnicodeWithOffsets,
 448     _Latin1ToUnicodeWithOffsets,
 449     _Latin1FromUnicodeWithOffsets,
 450     _Latin1FromUnicodeWithOffsets,
 451     _Latin1GetNextUChar,
 452
 453     NULL,
 454     NULL,
 455     NULL,
 456     NULL,
 457     _Latin1GetUnicodeSet
 458 };
 459
 460 static const UConverterStaticData _Latin1StaticData={
 461     sizeof(UConverterStaticData),
 462     "ISO-8859-1",
 463     819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
 464     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
 465     0,
 466     0,
 467     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 468 };
 469
 470 const UConverterSharedData _Latin1Data={
 471     sizeof(UConverterSharedData), ~((uint32_t) 0),
 472     NULL, NULL, &_Latin1StaticData, FALSE, &_Latin1Impl,
 473     0
 474 };
 475
 476 /* US-ASCII ----------------------------------------------------------------- */
 477
 478 /* This is a table-less version of _MBCSSingleToBMPWithOffsets(). */
 479 static void
 480 _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
 481                            UErrorCode *pErrorCode) {
 482     const uint8_t *source, *sourceLimit, *lastSource;
 483     UChar *target;
 484     int32_t targetCapacity, length;
 485     int32_t *offsets;
 486
 487     int32_t sourceIndex;
 488
 489     /* set up the local pointers */
 490     source=(const uint8_t *)pArgs->source;
 491     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
 492     target=pArgs->target;
 493     targetCapacity=pArgs->targetLimit-pArgs->target;
 494     offsets=pArgs->offsets;
 495
 496     /* sourceIndex=-1 if the current character began in the previous buffer */
 497     sourceIndex=0;
 498     lastSource=source;
 499
 500     /*
 501      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
 502      * for the minimum of the sourceLength and targetCapacity
 503      */
 504     length=sourceLimit-source;
 505     if(length<targetCapacity) {
 506         targetCapacity=length;
 507     }
 508
 509 #if ASCII_UNROLL_TO_UNICODE
 510     /* unroll the loop with the most common case */
 511 unrolled:
 512     if(targetCapacity>=16) {
 513         int32_t count, loops;
 514         UChar oredChars;
 515
 516         loops=count=targetCapacity>>4;
 517         do {
 518             oredChars=*target++=*source++;
 519             oredChars|=*target++=*source++;
 520             oredChars|=*target++=*source++;
 521             oredChars|=*target++=*source++;
 522             oredChars|=*target++=*source++;
 523             oredChars|=*target++=*source++;
 524             oredChars|=*target++=*source++;
 525             oredChars|=*target++=*source++;
 526             oredChars|=*target++=*source++;
 527             oredChars|=*target++=*source++;
 528             oredChars|=*target++=*source++;
 529             oredChars|=*target++=*source++;
 530             oredChars|=*target++=*source++;
 531             oredChars|=*target++=*source++;
 532             oredChars|=*target++=*source++;
 533             oredChars|=*target++=*source++;
 534
 535             /* were all 16 entries really valid? */
 536             if(oredChars>0x7f) {
 537                 /* no, return to the first of these 16 */
 538                 source-=16;
 539                 target-=16;
 540                 break;
 541             }
 542         } while(--count>0);
 543         count=loops-count;
 544         targetCapacity-=16*count;
 545
 546         if(offsets!=NULL) {
 547             lastSource+=16*count;
 548             while(count>0) {
 549                 *offsets++=sourceIndex++;
 550                 *offsets++=sourceIndex++;
 551                 *offsets++=sourceIndex++;
 552                 *offsets++=sourceIndex++;
 553                 *offsets++=sourceIndex++;
 554                 *offsets++=sourceIndex++;
 555                 *offsets++=sourceIndex++;
 556                 *offsets++=sourceIndex++;
 557                 *offsets++=sourceIndex++;
 558                 *offsets++=sourceIndex++;
 559                 *offsets++=sourceIndex++;
 560                 *offsets++=sourceIndex++;
 561                 *offsets++=sourceIndex++;
 562                 *offsets++=sourceIndex++;
 563                 *offsets++=sourceIndex++;
 564                 *offsets++=sourceIndex++;
 565                 --count;
 566             }
 567         }
 568     }
 569 #endif
 570
 571     /* conversion loop */
 572     while(targetCapacity>0) {
 573         if((*target++=*source++)<=0x7f) {
 574             --targetCapacity;
 575         } else {
 576             UConverter *cnv;
 577
 578             /* back out the illegal character */
 579             --target;
 580
 581             /* call the callback function with all the preparations and post-processing */
 582             cnv=pArgs->converter;
 583
 584             /* callback(illegal) */
 585             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 586
 587             /* set offsets since the start or the last callback */
 588             if(offsets!=NULL) {
 589                 int32_t count=(int32_t)(source-lastSource);
 590
 591                 /* predecrement: do not set the offset for the callback-causing character */
 592                 while(--count>0) {
 593                     *offsets++=sourceIndex++;
 594                 }
 595                 /* offset and sourceIndex are now set for the current character */
 596             }
 597
 598             /* update the arguments structure */
 599             pArgs->source=(const char *)source;
 600             pArgs->target=target;
 601             pArgs->offsets=offsets;
 602
 603             /* copy the current bytes to invalidCharBuffer */
 604             cnv->invalidCharBuffer[0]=*(source-1);
 605             cnv->invalidCharLength=1;
 606
 607             /* call the callback function */
 608             cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);
 609
 610             /* update target and deal with offsets if necessary */
 611             offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex);
 612             target=pArgs->target;
 613
 614             /* update the source pointer and index */
 615             sourceIndex+=1+((const uint8_t *)pArgs->source-source);
 616             source=lastSource=(const uint8_t *)pArgs->source;
 617             targetCapacity=pArgs->targetLimit-target;
 618             length=sourceLimit-source;
 619             if(length<targetCapacity) {
 620                 targetCapacity=length;
 621             }
 622
 623             /*
 624              * If the callback overflowed the target, then we need to
 625              * stop here with an overflow indication.
 626              */
 627             if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
 628                 break;
 629             } else if(U_FAILURE(*pErrorCode)) {
 630                 /* break on error */
 631                 break;
 632             } else if(cnv->UCharErrorBufferLength>0) {
 633                 /* target is full */
 634                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 635                 break;
 636             }
 637
 638 #if ASCII_UNROLL_TO_UNICODE
 639             goto unrolled;
 640 #endif
 641         }
 642     }
 643
 644     if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=pArgs->targetLimit) {
 645         /* target is full */
 646         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 647     }
 648
 649     /* set offsets since the start or the last callback */
 650     if(offsets!=NULL) {
 651         size_t count=source-lastSource;
 652         while(count>0) {
 653             *offsets++=sourceIndex++;
 654             --count;
 655         }
 656     }
 657
 658     /* write back the updated pointers */
 659     pArgs->source=(const char *)source;
 660     pArgs->target=target;
 661     pArgs->offsets=offsets;
 662 }
 663
 664 /* This is a table-less version of _MBCSSingleGetNextUChar(). */
 665 static UChar32
 666 _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
 667                    UErrorCode *pErrorCode) {
 668     UChar buffer[UTF_MAX_CHAR_LENGTH];
 669     const uint8_t *source;
 670     uint8_t b;
 671
 672     /* set up the local pointers */
 673     source=(const uint8_t *)pArgs->source;
 674
 675     /* conversion loop */
 676     while(source<(const uint8_t *)pArgs->sourceLimit) {
 677         b=*source++;
 678         pArgs->source=(const char *)source;
 679         if(b<=0x7f) {
 680             return b;
 681         } else {
 682             /* call the callback function with all the preparations and post-processing */
 683             UConverter *cnv=pArgs->converter;
 684
 685             /* callback(illegal) */
 686             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 687
 688             /* update the arguments structure */
 689             pArgs->target=buffer;
 690             pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
 691
 692             /* copy the current byte to invalidCharBuffer */
 693             cnv->invalidCharBuffer[0]=(char)b;
 694             cnv->invalidCharLength=1;
 695
 696             /* call the callback function */
 697             cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode);
 698
 699             /* update the source pointer */
 700             source=(const uint8_t *)pArgs->source;
 701
 702             /*
 703              * return the first character if the callback wrote some
 704              * we do not need to goto finish because the converter state is already set
 705              */
 706             if(U_SUCCESS(*pErrorCode)) {
 707                 int32_t length=pArgs->target-buffer;
 708                 if(length>0) {
 709                     return ucnv_getUChar32KeepOverflow(cnv, buffer, length);
 710                 }
 711                 /* else (callback did not write anything) continue */
 712             } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
 713                 *pErrorCode=U_ZERO_ERROR;
 714                 return ucnv_getUChar32KeepOverflow(cnv, buffer, UTF_MAX_CHAR_LENGTH);
 715             } else {
 716                 /* break on error */
 717                 /* ### what if a callback set an error but _also_ generated output?! */
 718                 return 0xffff;
 719             }
 720         }
 721     }
 722
 723     /* no output because of empty input or only skipping callbacks */
 724     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 725     return 0xffff;
 726 }
 727
 728 static void
 729 _ASCIIGetUnicodeSet(const UConverter *cnv,
 730                     USet *set,
 731                     UConverterUnicodeSet which,
 732                     UErrorCode *pErrorCode) {
 733     uset_addRange(set, 0, 0x7f);
 734 }
 735
 736 static const UConverterImpl _ASCIIImpl={
 737     UCNV_US_ASCII,
 738
 739     NULL,
 740     NULL,
 741
 742     NULL,
 743     NULL,
 744     NULL,
 745
 746     _ASCIIToUnicodeWithOffsets,
 747     _ASCIIToUnicodeWithOffsets,
 748     _Latin1FromUnicodeWithOffsets,
 749     _Latin1FromUnicodeWithOffsets,
 750     _ASCIIGetNextUChar,
 751
 752     NULL,
 753     NULL,
 754     NULL,
 755     NULL,
 756     _ASCIIGetUnicodeSet
 757 };
 758
 759 static const UConverterStaticData _ASCIIStaticData={
 760     sizeof(UConverterStaticData),
 761     "US-ASCII",
 762     367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
 763     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
 764     0,
 765     0,
 766     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 767 };
 768
 769 const UConverterSharedData _ASCIIData={
 770     sizeof(UConverterSharedData), ~((uint32_t) 0),
 771     NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl,
 772     0
 773 };