icuSources/common/ucnvlat1.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 **********************************************************************
   5 *   Copyright (C) 2000-2015, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 **********************************************************************
   8 *   file name:  ucnvlat1.cpp
   9 *   encoding:   UTF-8
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2000feb07
  14 *   created by: Markus W. Scherer
  15 */
  16
  17 #include "unicode/utypes.h"
  18
  19 #if !UCONFIG_NO_CONVERSION
  20
  21 #include "unicode/ucnv.h"
  22 #include "unicode/uset.h"
  23 #include "unicode/utf8.h"
  24 #include "ucnv_bld.h"
  25 #include "ucnv_cnv.h"
  26
  27 /* control optimizations according to the platform */
  28 #define LATIN1_UNROLL_FROM_UNICODE 1
  29
  30 /* ISO 8859-1 --------------------------------------------------------------- */
  31
  32 /* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
  33 U_CDECL_BEGIN
  34 static void U_CALLCONV
  35 _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
  36                             UErrorCode *pErrorCode) {
  37     const uint8_t *source;
  38     UChar *target;
  39     int32_t targetCapacity, length;
  40     int32_t *offsets;
  41
  42     int32_t sourceIndex;
  43
  44     /* set up the local pointers */
  45     source=(const uint8_t *)pArgs->source;
  46     target=pArgs->target;
  47     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  48     offsets=pArgs->offsets;
  49
  50     sourceIndex=0;
  51
  52     /*
  53      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
  54      * for the minimum of the sourceLength and targetCapacity
  55      */
  56     length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
  57     if(length<=targetCapacity) {
  58         targetCapacity=length;
  59     } else {
  60         /* target will be full */
  61         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  62         length=targetCapacity;
  63     }
  64
  65     if(targetCapacity>=8) {
  66         /* This loop is unrolled for speed and improved pipelining. */
  67         int32_t count, loops;
  68
  69         loops=count=targetCapacity>>3;
  70         length=targetCapacity&=0x7;
  71         do {
  72             target[0]=source[0];
  73             target[1]=source[1];
  74             target[2]=source[2];
  75             target[3]=source[3];
  76             target[4]=source[4];
  77             target[5]=source[5];
  78             target[6]=source[6];
  79             target[7]=source[7];
  80             target+=8;
  81             source+=8;
  82         } while(--count>0);
  83
  84         if(offsets!=NULL) {
  85             do {
  86                 offsets[0]=sourceIndex++;
  87                 offsets[1]=sourceIndex++;
  88                 offsets[2]=sourceIndex++;
  89                 offsets[3]=sourceIndex++;
  90                 offsets[4]=sourceIndex++;
  91                 offsets[5]=sourceIndex++;
  92                 offsets[6]=sourceIndex++;
  93                 offsets[7]=sourceIndex++;
  94                 offsets+=8;
  95             } while(--loops>0);
  96         }
  97     }
  98
  99     /* conversion loop */
 100     while(targetCapacity>0) {
 101         *target++=*source++;
 102         --targetCapacity;
 103     }
 104
 105     /* write back the updated pointers */
 106     pArgs->source=(const char *)source;
 107     pArgs->target=target;
 108
 109     /* set offsets */
 110     if(offsets!=NULL) {
 111         while(length>0) {
 112             *offsets++=sourceIndex++;
 113             --length;
 114         }
 115         pArgs->offsets=offsets;
 116     }
 117 }
 118
 119 /* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
 120 static UChar32 U_CALLCONV
 121 _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
 122                     UErrorCode *pErrorCode) {
 123     const uint8_t *source=(const uint8_t *)pArgs->source;
 124     if(source<(const uint8_t *)pArgs->sourceLimit) {
 125         pArgs->source=(const char *)(source+1);
 126         return *source;
 127     }
 128
 129     /* no output because of empty input */
 130     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 131     return 0xffff;
 132 }
 133
 134 /* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
 135 static void U_CALLCONV
 136 _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
 137                               UErrorCode *pErrorCode) {
 138     UConverter *cnv;
 139     const UChar *source, *sourceLimit;
 140     uint8_t *target, *oldTarget;
 141     int32_t targetCapacity, length;
 142     int32_t *offsets;
 143
 144     UChar32 cp;
 145     UChar c, max;
 146
 147     int32_t sourceIndex;
 148
 149     /* set up the local pointers */
 150     cnv=pArgs->converter;
 151     source=pArgs->source;
 152     sourceLimit=pArgs->sourceLimit;
 153     target=oldTarget=(uint8_t *)pArgs->target;
 154     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
 155     offsets=pArgs->offsets;
 156
 157     if(cnv->sharedData==&_Latin1Data) {
 158         max=0xff; /* Latin-1 */
 159     } else {
 160         max=0x7f; /* US-ASCII */
 161     }
 162
 163     /* get the converter state from UConverter */
 164     cp=cnv->fromUChar32;
 165
 166     /* sourceIndex=-1 if the current character began in the previous buffer */
 167     sourceIndex= cp==0 ? 0 : -1;
 168
 169     /*
 170      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
 171      * for the minimum of the sourceLength and targetCapacity
 172      */
 173     length=(int32_t)(sourceLimit-source);
 174     if(length<targetCapacity) {
 175         targetCapacity=length;
 176     }
 177
 178     /* conversion loop */
 179     if(cp!=0 && targetCapacity>0) {
 180         goto getTrail;
 181     }
 182
 183 #if LATIN1_UNROLL_FROM_UNICODE
 184     /* unroll the loop with the most common case */
 185     if(targetCapacity>=16) {
 186         int32_t count, loops;
 187         UChar u, oredChars;
 188
 189         loops=count=targetCapacity>>4;
 190         do {
 191             oredChars=u=*source++;
 192             *target++=(uint8_t)u;
 193             oredChars|=u=*source++;
 194             *target++=(uint8_t)u;
 195             oredChars|=u=*source++;
 196             *target++=(uint8_t)u;
 197             oredChars|=u=*source++;
 198             *target++=(uint8_t)u;
 199             oredChars|=u=*source++;
 200             *target++=(uint8_t)u;
 201             oredChars|=u=*source++;
 202             *target++=(uint8_t)u;
 203             oredChars|=u=*source++;
 204             *target++=(uint8_t)u;
 205             oredChars|=u=*source++;
 206             *target++=(uint8_t)u;
 207             oredChars|=u=*source++;
 208             *target++=(uint8_t)u;
 209             oredChars|=u=*source++;
 210             *target++=(uint8_t)u;
 211             oredChars|=u=*source++;
 212             *target++=(uint8_t)u;
 213             oredChars|=u=*source++;
 214             *target++=(uint8_t)u;
 215             oredChars|=u=*source++;
 216             *target++=(uint8_t)u;
 217             oredChars|=u=*source++;
 218             *target++=(uint8_t)u;
 219             oredChars|=u=*source++;
 220             *target++=(uint8_t)u;
 221             oredChars|=u=*source++;
 222             *target++=(uint8_t)u;
 223
 224             /* were all 16 entries really valid? */
 225             if(oredChars>max) {
 226                 /* no, return to the first of these 16 */
 227                 source-=16;
 228                 target-=16;
 229                 break;
 230             }
 231         } while(--count>0);
 232         count=loops-count;
 233         targetCapacity-=16*count;
 234
 235         if(offsets!=NULL) {
 236             oldTarget+=16*count;
 237             while(count>0) {
 238                 *offsets++=sourceIndex++;
 239                 *offsets++=sourceIndex++;
 240                 *offsets++=sourceIndex++;
 241                 *offsets++=sourceIndex++;
 242                 *offsets++=sourceIndex++;
 243                 *offsets++=sourceIndex++;
 244                 *offsets++=sourceIndex++;
 245                 *offsets++=sourceIndex++;
 246                 *offsets++=sourceIndex++;
 247                 *offsets++=sourceIndex++;
 248                 *offsets++=sourceIndex++;
 249                 *offsets++=sourceIndex++;
 250                 *offsets++=sourceIndex++;
 251                 *offsets++=sourceIndex++;
 252                 *offsets++=sourceIndex++;
 253                 *offsets++=sourceIndex++;
 254                 --count;
 255             }
 256         }
 257     }
 258 #endif
 259
 260     /* conversion loop */
 261     c=0;
 262     while(targetCapacity>0 && (c=*source++)<=max) {
 263         /* convert the Unicode code point */
 264         *target++=(uint8_t)c;
 265         --targetCapacity;
 266     }
 267
 268     if(c>max) {
 269         cp=c;
 270         if(!U_IS_SURROGATE(cp)) {
 271             /* callback(unassigned) */
 272         } else if(U_IS_SURROGATE_LEAD(cp)) {
 273 getTrail:
 274             if(source<sourceLimit) {
 275                 /* test the following code unit */
 276                 UChar trail=*source;
 277                 if(U16_IS_TRAIL(trail)) {
 278                     ++source;
 279                     cp=U16_GET_SUPPLEMENTARY(cp, trail);
 280                     /* this codepage does not map supplementary code points */
 281                     /* callback(unassigned) */
 282                 } else {
 283                     /* this is an unmatched lead code unit (1st surrogate) */
 284                     /* callback(illegal) */
 285                 }
 286             } else {
 287                 /* no more input */
 288                 cnv->fromUChar32=cp;
 289                 goto noMoreInput;
 290             }
 291         } else {
 292             /* this is an unmatched trail code unit (2nd surrogate) */
 293             /* callback(illegal) */
 294         }
 295
 296         *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
 297         cnv->fromUChar32=cp;
 298     }
 299 noMoreInput:
 300
 301     /* set offsets since the start */
 302     if(offsets!=NULL) {
 303         size_t count=target-oldTarget;
 304         while(count>0) {
 305             *offsets++=sourceIndex++;
 306             --count;
 307         }
 308     }
 309
 310     if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
 311         /* target is full */
 312         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 313     }
 314
 315     /* write back the updated pointers */
 316     pArgs->source=source;
 317     pArgs->target=(char *)target;
 318     pArgs->offsets=offsets;
 319 }
 320
 321 /* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
 322 static void U_CALLCONV
 323 ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
 324                     UConverterToUnicodeArgs *pToUArgs,
 325                     UErrorCode *pErrorCode) {
 326     UConverter *utf8;
 327     const uint8_t *source, *sourceLimit;
 328     uint8_t *target;
 329     int32_t targetCapacity;
 330
 331     UChar32 c;
 332     uint8_t b, t1;
 333
 334     /* set up the local pointers */
 335     utf8=pToUArgs->converter;
 336     source=(uint8_t *)pToUArgs->source;
 337     sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
 338     target=(uint8_t *)pFromUArgs->target;
 339     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
 340
 341     /* get the converter state from the UTF-8 UConverter */
 342     c=(UChar32)utf8->toUnicodeStatus;
 343     if(c!=0 && source<sourceLimit) {
 344         if(targetCapacity==0) {
 345             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 346             return;
 347         } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
 348             ++source;
 349             *target++=(uint8_t)(((c&3)<<6)|t1);
 350             --targetCapacity;
 351
 352             utf8->toUnicodeStatus=0;
 353             utf8->toULength=0;
 354         } else {
 355             /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
 356             *pErrorCode=U_USING_DEFAULT_WARNING;
 357             return;
 358         }
 359     }
 360
 361     /*
 362      * Make sure that the last byte sequence before sourceLimit is complete
 363      * or runs into a lead byte.
 364      * In the conversion loop compare source with sourceLimit only once
 365      * per multi-byte character.
 366      * For Latin-1, adjust sourceLimit only for 1 trail byte because
 367      * the conversion loop handles at most 2-byte sequences.
 368      */
 369     if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
 370         --sourceLimit;
 371     }
 372
 373     /* conversion loop */
 374     while(source<sourceLimit) {
 375         if(targetCapacity>0) {
 376             b=*source++;
 377             if((int8_t)b>=0) {
 378                 /* convert ASCII */
 379                 *target++=(uint8_t)b;
 380                 --targetCapacity;
 381             } else if( /* handle U+0080..U+00FF inline */
 382                        b>=0xc2 && b<=0xc3 &&
 383                        (t1=(uint8_t)(*source-0x80)) <= 0x3f
 384             ) {
 385                 ++source;
 386                 *target++=(uint8_t)(((b&3)<<6)|t1);
 387                 --targetCapacity;
 388             } else {
 389                 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
 390                 pToUArgs->source=(char *)(source-1);
 391                 pFromUArgs->target=(char *)target;
 392                 *pErrorCode=U_USING_DEFAULT_WARNING;
 393                 return;
 394             }
 395         } else {
 396             /* target is full */
 397             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 398             break;
 399         }
 400     }
 401
 402     /*
 403      * The sourceLimit may have been adjusted before the conversion loop
 404      * to stop before a truncated sequence.
 405      * If so, then collect the truncated sequence now.
 406      * For Latin-1, there is at most exactly one lead byte because of the
 407      * smaller sourceLimit adjustment logic.
 408      */
 409     if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
 410         utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
 411         utf8->toULength=1;
 412         utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1;
 413     }
 414
 415     /* write back the updated pointers */
 416     pToUArgs->source=(char *)source;
 417     pFromUArgs->target=(char *)target;
 418 }
 419
 420 static void U_CALLCONV
 421 _Latin1GetUnicodeSet(const UConverter *cnv,
 422                      const USetAdder *sa,
 423                      UConverterUnicodeSet which,
 424                      UErrorCode *pErrorCode) {
 425     (void)cnv;
 426     (void)which;
 427     (void)pErrorCode;
 428     sa->addRange(sa->set, 0, 0xff);
 429 }
 430 U_CDECL_END
 431
 432
 433 static const UConverterImpl _Latin1Impl={
 434     UCNV_LATIN_1,
 435
 436     NULL,
 437     NULL,
 438
 439     NULL,
 440     NULL,
 441     NULL,
 442
 443     _Latin1ToUnicodeWithOffsets,
 444     _Latin1ToUnicodeWithOffsets,
 445     _Latin1FromUnicodeWithOffsets,
 446     _Latin1FromUnicodeWithOffsets,
 447     _Latin1GetNextUChar,
 448
 449     NULL,
 450     NULL,
 451     NULL,
 452     NULL,
 453     _Latin1GetUnicodeSet,
 454
 455     NULL,
 456     ucnv_Latin1FromUTF8
 457 };
 458
 459 static const UConverterStaticData _Latin1StaticData={
 460     sizeof(UConverterStaticData),
 461     "ISO-8859-1",
 462     819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
 463     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
 464     0,
 465     0,
 466     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 467 };
 468
 469 const UConverterSharedData _Latin1Data=
 470         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
 471
 472 /* US-ASCII ----------------------------------------------------------------- */
 473
 474 U_CDECL_BEGIN
 475 /* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
 476 static void U_CALLCONV
 477 _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
 478                            UErrorCode *pErrorCode) {
 479     const uint8_t *source, *sourceLimit;
 480     UChar *target, *oldTarget;
 481     int32_t targetCapacity, length;
 482     int32_t *offsets;
 483
 484     int32_t sourceIndex;
 485
 486     uint8_t c;
 487
 488     /* set up the local pointers */
 489     source=(const uint8_t *)pArgs->source;
 490     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
 491     target=oldTarget=pArgs->target;
 492     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
 493     offsets=pArgs->offsets;
 494
 495     /* sourceIndex=-1 if the current character began in the previous buffer */
 496     sourceIndex=0;
 497
 498     /*
 499      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
 500      * for the minimum of the sourceLength and targetCapacity
 501      */
 502     length=(int32_t)(sourceLimit-source);
 503     if(length<targetCapacity) {
 504         targetCapacity=length;
 505     }
 506
 507     if(targetCapacity>=8) {
 508         /* This loop is unrolled for speed and improved pipelining. */
 509         int32_t count, loops;
 510         UChar oredChars;
 511
 512         loops=count=targetCapacity>>3;
 513         do {
 514             oredChars=target[0]=source[0];
 515             oredChars|=target[1]=source[1];
 516             oredChars|=target[2]=source[2];
 517             oredChars|=target[3]=source[3];
 518             oredChars|=target[4]=source[4];
 519             oredChars|=target[5]=source[5];
 520             oredChars|=target[6]=source[6];
 521             oredChars|=target[7]=source[7];
 522
 523             /* were all 16 entries really valid? */
 524             if(oredChars>0x7f) {
 525                 /* no, return to the first of these 16 */
 526                 break;
 527             }
 528             source+=8;
 529             target+=8;
 530         } while(--count>0);
 531         count=loops-count;
 532         targetCapacity-=count*8;
 533
 534         if(offsets!=NULL) {
 535             oldTarget+=count*8;
 536             while(count>0) {
 537                 offsets[0]=sourceIndex++;
 538                 offsets[1]=sourceIndex++;
 539                 offsets[2]=sourceIndex++;
 540                 offsets[3]=sourceIndex++;
 541                 offsets[4]=sourceIndex++;
 542                 offsets[5]=sourceIndex++;
 543                 offsets[6]=sourceIndex++;
 544                 offsets[7]=sourceIndex++;
 545                 offsets+=8;
 546                 --count;
 547             }
 548         }
 549     }
 550
 551     /* conversion loop */
 552     c=0;
 553     while(targetCapacity>0 && (c=*source++)<=0x7f) {
 554         *target++=c;
 555         --targetCapacity;
 556     }
 557
 558     if(c>0x7f) {
 559         /* callback(illegal); copy the current bytes to toUBytes[] */
 560         UConverter *cnv=pArgs->converter;
 561         cnv->toUBytes[0]=c;
 562         cnv->toULength=1;
 563         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 564     } else if(source<sourceLimit && target>=pArgs->targetLimit) {
 565         /* target is full */
 566         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 567     }
 568
 569     /* set offsets since the start */
 570     if(offsets!=NULL) {
 571         size_t count=target-oldTarget;
 572         while(count>0) {
 573             *offsets++=sourceIndex++;
 574             --count;
 575         }
 576     }
 577
 578     /* write back the updated pointers */
 579     pArgs->source=(const char *)source;
 580     pArgs->target=target;
 581     pArgs->offsets=offsets;
 582 }
 583
 584 /* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
 585 static UChar32 U_CALLCONV
 586 _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
 587                    UErrorCode *pErrorCode) {
 588     const uint8_t *source;
 589     uint8_t b;
 590
 591     source=(const uint8_t *)pArgs->source;
 592     if(source<(const uint8_t *)pArgs->sourceLimit) {
 593         b=*source++;
 594         pArgs->source=(const char *)source;
 595         if(b<=0x7f) {
 596             return b;
 597         } else {
 598             UConverter *cnv=pArgs->converter;
 599             cnv->toUBytes[0]=b;
 600             cnv->toULength=1;
 601             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 602             return 0xffff;
 603         }
 604     }
 605
 606     /* no output because of empty input */
 607     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 608     return 0xffff;
 609 }
 610
 611 /* "Convert" UTF-8 to US-ASCII: Validate and copy. */
 612 static void U_CALLCONV
 613 ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
 614                    UConverterToUnicodeArgs *pToUArgs,
 615                    UErrorCode *pErrorCode) {
 616     const uint8_t *source, *sourceLimit;
 617     uint8_t *target;
 618     int32_t targetCapacity, length;
 619
 620     uint8_t c;
 621
 622     if(pToUArgs->converter->toUnicodeStatus!=0) {
 623         /* no handling of partial UTF-8 characters here, fall back to pivoting */
 624         *pErrorCode=U_USING_DEFAULT_WARNING;
 625         return;
 626     }
 627
 628     /* set up the local pointers */
 629     source=(const uint8_t *)pToUArgs->source;
 630     sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
 631     target=(uint8_t *)pFromUArgs->target;
 632     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
 633
 634     /*
 635      * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
 636      * for the minimum of the sourceLength and targetCapacity
 637      */
 638     length=(int32_t)(sourceLimit-source);
 639     if(length<targetCapacity) {
 640         targetCapacity=length;
 641     }
 642
 643     /* unroll the loop with the most common case */
 644     if(targetCapacity>=16) {
 645         int32_t count, loops;
 646         uint8_t oredChars;
 647
 648         loops=count=targetCapacity>>4;
 649         do {
 650             oredChars=*target++=*source++;
 651             oredChars|=*target++=*source++;
 652             oredChars|=*target++=*source++;
 653             oredChars|=*target++=*source++;
 654             oredChars|=*target++=*source++;
 655             oredChars|=*target++=*source++;
 656             oredChars|=*target++=*source++;
 657             oredChars|=*target++=*source++;
 658             oredChars|=*target++=*source++;
 659             oredChars|=*target++=*source++;
 660             oredChars|=*target++=*source++;
 661             oredChars|=*target++=*source++;
 662             oredChars|=*target++=*source++;
 663             oredChars|=*target++=*source++;
 664             oredChars|=*target++=*source++;
 665             oredChars|=*target++=*source++;
 666
 667             /* were all 16 entries really valid? */
 668             if(oredChars>0x7f) {
 669                 /* no, return to the first of these 16 */
 670                 source-=16;
 671                 target-=16;
 672                 break;
 673             }
 674         } while(--count>0);
 675         count=loops-count;
 676         targetCapacity-=16*count;
 677     }
 678
 679     /* conversion loop */
 680     c=0;
 681     while(targetCapacity>0 && (c=*source)<=0x7f) {
 682         ++source;
 683         *target++=c;
 684         --targetCapacity;
 685     }
 686
 687     if(c>0x7f) {
 688         /* non-ASCII character, handle in standard converter */
 689         *pErrorCode=U_USING_DEFAULT_WARNING;
 690     } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
 691         /* target is full */
 692         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 693     }
 694
 695     /* write back the updated pointers */
 696     pToUArgs->source=(const char *)source;
 697     pFromUArgs->target=(char *)target;
 698 }
 699
 700 static void U_CALLCONV
 701 _ASCIIGetUnicodeSet(const UConverter *cnv,
 702                     const USetAdder *sa,
 703                     UConverterUnicodeSet which,
 704                     UErrorCode *pErrorCode) {
 705     (void)cnv;
 706     (void)which;
 707     (void)pErrorCode;
 708     sa->addRange(sa->set, 0, 0x7f);
 709 }
 710 U_CDECL_END
 711
 712 static const UConverterImpl _ASCIIImpl={
 713     UCNV_US_ASCII,
 714
 715     NULL,
 716     NULL,
 717
 718     NULL,
 719     NULL,
 720     NULL,
 721
 722     _ASCIIToUnicodeWithOffsets,
 723     _ASCIIToUnicodeWithOffsets,
 724     _Latin1FromUnicodeWithOffsets,
 725     _Latin1FromUnicodeWithOffsets,
 726     _ASCIIGetNextUChar,
 727
 728     NULL,
 729     NULL,
 730     NULL,
 731     NULL,
 732     _ASCIIGetUnicodeSet,
 733
 734     NULL,
 735     ucnv_ASCIIFromUTF8
 736 };
 737
 738 static const UConverterStaticData _ASCIIStaticData={
 739     sizeof(UConverterStaticData),
 740     "US-ASCII",
 741     367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
 742     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
 743     0,
 744     0,
 745     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 746 };
 747
 748 const UConverterSharedData _ASCIIData=
 749         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
 750
 751 #endif