icuSources/common/ucnvlat1.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 **********************************************************************
   5 *   Copyright (C) 2000-2015, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 **********************************************************************
   8 *   file name:  ucnvlat1.cpp
   9 *   encoding:   UTF-8
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2000feb07
  14 *   created by: Markus W. Scherer
  15 */
  16
  17 #include "unicode/utypes.h"
  18
  19 #if !UCONFIG_NO_CONVERSION
  20
  21 #include "unicode/ucnv.h"
  22 #include "unicode/uset.h"
  23 #include "unicode/utf8.h"
  24 #include "ucnv_bld.h"
  25 #include "ucnv_cnv.h"
  26 #include "ustr_imp.h"
  27
  28 /* control optimizations according to the platform */
  29 #define LATIN1_UNROLL_FROM_UNICODE 1
  30
  31 /* ISO 8859-1 --------------------------------------------------------------- */
  32
  33 /* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
  34 U_CDECL_BEGIN
  35 static void U_CALLCONV
  36 _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
  37                             UErrorCode *pErrorCode) {
  38     const uint8_t *source;
  39     UChar *target;
  40     int32_t targetCapacity, length;
  41     int32_t *offsets;
  42
  43     int32_t sourceIndex;
  44
  45     /* set up the local pointers */
  46     source=(const uint8_t *)pArgs->source;
  47     target=pArgs->target;
  48     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  49     offsets=pArgs->offsets;
  50
  51     sourceIndex=0;
  52
  53     /*
  54      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
  55      * for the minimum of the sourceLength and targetCapacity
  56      */
  57     length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
  58     if(length<=targetCapacity) {
  59         targetCapacity=length;
  60     } else {
  61         /* target will be full */
  62         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  63         length=targetCapacity;
  64     }
  65
  66     if(targetCapacity>=8) {
  67         /* This loop is unrolled for speed and improved pipelining. */
  68         int32_t count, loops;
  69
  70         loops=count=targetCapacity>>3;
  71         length=targetCapacity&=0x7;
  72         do {
  73             target[0]=source[0];
  74             target[1]=source[1];
  75             target[2]=source[2];
  76             target[3]=source[3];
  77             target[4]=source[4];
  78             target[5]=source[5];
  79             target[6]=source[6];
  80             target[7]=source[7];
  81             target+=8;
  82             source+=8;
  83         } while(--count>0);
  84
  85         if(offsets!=NULL) {
  86             do {
  87                 offsets[0]=sourceIndex++;
  88                 offsets[1]=sourceIndex++;
  89                 offsets[2]=sourceIndex++;
  90                 offsets[3]=sourceIndex++;
  91                 offsets[4]=sourceIndex++;
  92                 offsets[5]=sourceIndex++;
  93                 offsets[6]=sourceIndex++;
  94                 offsets[7]=sourceIndex++;
  95                 offsets+=8;
  96             } while(--loops>0);
  97         }
  98     }
  99
 100     /* conversion loop */
 101     while(targetCapacity>0) {
 102         *target++=*source++;
 103         --targetCapacity;
 104     }
 105
 106     /* write back the updated pointers */
 107     pArgs->source=(const char *)source;
 108     pArgs->target=target;
 109
 110     /* set offsets */
 111     if(offsets!=NULL) {
 112         while(length>0) {
 113             *offsets++=sourceIndex++;
 114             --length;
 115         }
 116         pArgs->offsets=offsets;
 117     }
 118 }
 119
 120 /* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
 121 static UChar32 U_CALLCONV
 122 _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
 123                     UErrorCode *pErrorCode) {
 124     const uint8_t *source=(const uint8_t *)pArgs->source;
 125     if(source<(const uint8_t *)pArgs->sourceLimit) {
 126         pArgs->source=(const char *)(source+1);
 127         return *source;
 128     }
 129
 130     /* no output because of empty input */
 131     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 132     return 0xffff;
 133 }
 134
 135 /* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
 136 static void U_CALLCONV
 137 _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
 138                               UErrorCode *pErrorCode) {
 139     UConverter *cnv;
 140     const UChar *source, *sourceLimit;
 141     uint8_t *target, *oldTarget;
 142     int32_t targetCapacity, length;
 143     int32_t *offsets;
 144
 145     UChar32 cp;
 146     UChar c, max;
 147
 148     int32_t sourceIndex;
 149
 150     /* set up the local pointers */
 151     cnv=pArgs->converter;
 152     source=pArgs->source;
 153     sourceLimit=pArgs->sourceLimit;
 154     target=oldTarget=(uint8_t *)pArgs->target;
 155     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
 156     offsets=pArgs->offsets;
 157
 158     if(cnv->sharedData==&_Latin1Data) {
 159         max=0xff; /* Latin-1 */
 160     } else {
 161         max=0x7f; /* US-ASCII */
 162     }
 163
 164     /* get the converter state from UConverter */
 165     cp=cnv->fromUChar32;
 166
 167     /* sourceIndex=-1 if the current character began in the previous buffer */
 168     sourceIndex= cp==0 ? 0 : -1;
 169
 170     /*
 171      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
 172      * for the minimum of the sourceLength and targetCapacity
 173      */
 174     length=(int32_t)(sourceLimit-source);
 175     if(length<targetCapacity) {
 176         targetCapacity=length;
 177     }
 178
 179     /* conversion loop */
 180     if(cp!=0 && targetCapacity>0) {
 181         goto getTrail;
 182     }
 183
 184 #if LATIN1_UNROLL_FROM_UNICODE
 185     /* unroll the loop with the most common case */
 186     if(targetCapacity>=16) {
 187         int32_t count, loops;
 188         UChar u, oredChars;
 189
 190         loops=count=targetCapacity>>4;
 191         do {
 192             oredChars=u=*source++;
 193             *target++=(uint8_t)u;
 194             oredChars|=u=*source++;
 195             *target++=(uint8_t)u;
 196             oredChars|=u=*source++;
 197             *target++=(uint8_t)u;
 198             oredChars|=u=*source++;
 199             *target++=(uint8_t)u;
 200             oredChars|=u=*source++;
 201             *target++=(uint8_t)u;
 202             oredChars|=u=*source++;
 203             *target++=(uint8_t)u;
 204             oredChars|=u=*source++;
 205             *target++=(uint8_t)u;
 206             oredChars|=u=*source++;
 207             *target++=(uint8_t)u;
 208             oredChars|=u=*source++;
 209             *target++=(uint8_t)u;
 210             oredChars|=u=*source++;
 211             *target++=(uint8_t)u;
 212             oredChars|=u=*source++;
 213             *target++=(uint8_t)u;
 214             oredChars|=u=*source++;
 215             *target++=(uint8_t)u;
 216             oredChars|=u=*source++;
 217             *target++=(uint8_t)u;
 218             oredChars|=u=*source++;
 219             *target++=(uint8_t)u;
 220             oredChars|=u=*source++;
 221             *target++=(uint8_t)u;
 222             oredChars|=u=*source++;
 223             *target++=(uint8_t)u;
 224
 225             /* were all 16 entries really valid? */
 226             if(oredChars>max) {
 227                 /* no, return to the first of these 16 */
 228                 source-=16;
 229                 target-=16;
 230                 break;
 231             }
 232         } while(--count>0);
 233         count=loops-count;
 234         targetCapacity-=16*count;
 235
 236         if(offsets!=NULL) {
 237             oldTarget+=16*count;
 238             while(count>0) {
 239                 *offsets++=sourceIndex++;
 240                 *offsets++=sourceIndex++;
 241                 *offsets++=sourceIndex++;
 242                 *offsets++=sourceIndex++;
 243                 *offsets++=sourceIndex++;
 244                 *offsets++=sourceIndex++;
 245                 *offsets++=sourceIndex++;
 246                 *offsets++=sourceIndex++;
 247                 *offsets++=sourceIndex++;
 248                 *offsets++=sourceIndex++;
 249                 *offsets++=sourceIndex++;
 250                 *offsets++=sourceIndex++;
 251                 *offsets++=sourceIndex++;
 252                 *offsets++=sourceIndex++;
 253                 *offsets++=sourceIndex++;
 254                 *offsets++=sourceIndex++;
 255                 --count;
 256             }
 257         }
 258     }
 259 #endif
 260
 261     /* conversion loop */
 262     c=0;
 263     while(targetCapacity>0 && (c=*source++)<=max) {
 264         /* convert the Unicode code point */
 265         *target++=(uint8_t)c;
 266         --targetCapacity;
 267     }
 268
 269     if(c>max) {
 270         cp=c;
 271         if(!U_IS_SURROGATE(cp)) {
 272             /* callback(unassigned) */
 273         } else if(U_IS_SURROGATE_LEAD(cp)) {
 274 getTrail:
 275             if(source<sourceLimit) {
 276                 /* test the following code unit */
 277                 UChar trail=*source;
 278                 if(U16_IS_TRAIL(trail)) {
 279                     ++source;
 280                     cp=U16_GET_SUPPLEMENTARY(cp, trail);
 281                     /* this codepage does not map supplementary code points */
 282                     /* callback(unassigned) */
 283                 } else {
 284                     /* this is an unmatched lead code unit (1st surrogate) */
 285                     /* callback(illegal) */
 286                 }
 287             } else {
 288                 /* no more input */
 289                 cnv->fromUChar32=cp;
 290                 goto noMoreInput;
 291             }
 292         } else {
 293             /* this is an unmatched trail code unit (2nd surrogate) */
 294             /* callback(illegal) */
 295         }
 296
 297         *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
 298         cnv->fromUChar32=cp;
 299     }
 300 noMoreInput:
 301
 302     /* set offsets since the start */
 303     if(offsets!=NULL) {
 304         size_t count=target-oldTarget;
 305         while(count>0) {
 306             *offsets++=sourceIndex++;
 307             --count;
 308         }
 309     }
 310
 311     if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
 312         /* target is full */
 313         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 314     }
 315
 316     /* write back the updated pointers */
 317     pArgs->source=source;
 318     pArgs->target=(char *)target;
 319     pArgs->offsets=offsets;
 320 }
 321
 322 /* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
 323 static void U_CALLCONV
 324 ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
 325                     UConverterToUnicodeArgs *pToUArgs,
 326                     UErrorCode *pErrorCode) {
 327     UConverter *utf8;
 328     const uint8_t *source, *sourceLimit;
 329     uint8_t *target;
 330     int32_t targetCapacity;
 331
 332     UChar32 c;
 333     uint8_t b, t1;
 334
 335     /* set up the local pointers */
 336     utf8=pToUArgs->converter;
 337     source=(uint8_t *)pToUArgs->source;
 338     sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
 339     target=(uint8_t *)pFromUArgs->target;
 340     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
 341
 342     /* get the converter state from the UTF-8 UConverter */
 343     if (utf8->toULength > 0) {
 344         c=(UChar32)utf8->toUnicodeStatus;
 345     } else {
 346         c = 0;
 347     }
 348     if(c!=0 && source<sourceLimit) {
 349         if(targetCapacity==0) {
 350             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 351             return;
 352         } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
 353             ++source;
 354             *target++=(uint8_t)(((c&3)<<6)|t1);
 355             --targetCapacity;
 356
 357             utf8->toUnicodeStatus=0;
 358             utf8->toULength=0;
 359         } else {
 360             /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
 361             *pErrorCode=U_USING_DEFAULT_WARNING;
 362             return;
 363         }
 364     }
 365
 366     /*
 367      * Make sure that the last byte sequence before sourceLimit is complete
 368      * or runs into a lead byte.
 369      * In the conversion loop compare source with sourceLimit only once
 370      * per multi-byte character.
 371      * For Latin-1, adjust sourceLimit only for 1 trail byte because
 372      * the conversion loop handles at most 2-byte sequences.
 373      */
 374     if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
 375         --sourceLimit;
 376     }
 377
 378     /* conversion loop */
 379     while(source<sourceLimit) {
 380         if(targetCapacity>0) {
 381             b=*source++;
 382             if(U8_IS_SINGLE(b)) {
 383                 /* convert ASCII */
 384                 *target++=(uint8_t)b;
 385                 --targetCapacity;
 386             } else if( /* handle U+0080..U+00FF inline */
 387                        b>=0xc2 && b<=0xc3 &&
 388                        (t1=(uint8_t)(*source-0x80)) <= 0x3f
 389             ) {
 390                 ++source;
 391                 *target++=(uint8_t)(((b&3)<<6)|t1);
 392                 --targetCapacity;
 393             } else {
 394                 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
 395                 pToUArgs->source=(char *)(source-1);
 396                 pFromUArgs->target=(char *)target;
 397                 *pErrorCode=U_USING_DEFAULT_WARNING;
 398                 return;
 399             }
 400         } else {
 401             /* target is full */
 402             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 403             break;
 404         }
 405     }
 406
 407     /*
 408      * The sourceLimit may have been adjusted before the conversion loop
 409      * to stop before a truncated sequence.
 410      * If so, then collect the truncated sequence now.
 411      * For Latin-1, there is at most exactly one lead byte because of the
 412      * smaller sourceLimit adjustment logic.
 413      */
 414     if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
 415         utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
 416         utf8->toULength=1;
 417         utf8->mode=U8_COUNT_BYTES(b);
 418     }
 419
 420     /* write back the updated pointers */
 421     pToUArgs->source=(char *)source;
 422     pFromUArgs->target=(char *)target;
 423 }
 424
 425 static void U_CALLCONV
 426 _Latin1GetUnicodeSet(const UConverter *cnv,
 427                      const USetAdder *sa,
 428                      UConverterUnicodeSet which,
 429                      UErrorCode *pErrorCode) {
 430     (void)cnv;
 431     (void)which;
 432     (void)pErrorCode;
 433     sa->addRange(sa->set, 0, 0xff);
 434 }
 435 U_CDECL_END
 436
 437
 438 static const UConverterImpl _Latin1Impl={
 439     UCNV_LATIN_1,
 440
 441     NULL,
 442     NULL,
 443
 444     NULL,
 445     NULL,
 446     NULL,
 447
 448     _Latin1ToUnicodeWithOffsets,
 449     _Latin1ToUnicodeWithOffsets,
 450     _Latin1FromUnicodeWithOffsets,
 451     _Latin1FromUnicodeWithOffsets,
 452     _Latin1GetNextUChar,
 453
 454     NULL,
 455     NULL,
 456     NULL,
 457     NULL,
 458     _Latin1GetUnicodeSet,
 459
 460     NULL,
 461     ucnv_Latin1FromUTF8
 462 };
 463
 464 static const UConverterStaticData _Latin1StaticData={
 465     sizeof(UConverterStaticData),
 466     "ISO-8859-1",
 467     819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
 468     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
 469     0,
 470     0,
 471     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 472 };
 473
 474 const UConverterSharedData _Latin1Data=
 475         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
 476
 477 /* US-ASCII ----------------------------------------------------------------- */
 478
 479 U_CDECL_BEGIN
 480 /* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
 481 static void U_CALLCONV
 482 _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
 483                            UErrorCode *pErrorCode) {
 484     const uint8_t *source, *sourceLimit;
 485     UChar *target, *oldTarget;
 486     int32_t targetCapacity, length;
 487     int32_t *offsets;
 488
 489     int32_t sourceIndex;
 490
 491     uint8_t c;
 492
 493     /* set up the local pointers */
 494     source=(const uint8_t *)pArgs->source;
 495     sourceLimit=(const uint8_t *)pArgs->sourceLimit;
 496     target=oldTarget=pArgs->target;
 497     targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
 498     offsets=pArgs->offsets;
 499
 500     /* sourceIndex=-1 if the current character began in the previous buffer */
 501     sourceIndex=0;
 502
 503     /*
 504      * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
 505      * for the minimum of the sourceLength and targetCapacity
 506      */
 507     length=(int32_t)(sourceLimit-source);
 508     if(length<targetCapacity) {
 509         targetCapacity=length;
 510     }
 511
 512     if(targetCapacity>=8) {
 513         /* This loop is unrolled for speed and improved pipelining. */
 514         int32_t count, loops;
 515         UChar oredChars;
 516
 517         loops=count=targetCapacity>>3;
 518         do {
 519             oredChars=target[0]=source[0];
 520             oredChars|=target[1]=source[1];
 521             oredChars|=target[2]=source[2];
 522             oredChars|=target[3]=source[3];
 523             oredChars|=target[4]=source[4];
 524             oredChars|=target[5]=source[5];
 525             oredChars|=target[6]=source[6];
 526             oredChars|=target[7]=source[7];
 527
 528             /* were all 16 entries really valid? */
 529             if(oredChars>0x7f) {
 530                 /* no, return to the first of these 16 */
 531                 break;
 532             }
 533             source+=8;
 534             target+=8;
 535         } while(--count>0);
 536         count=loops-count;
 537         targetCapacity-=count*8;
 538
 539         if(offsets!=NULL) {
 540             oldTarget+=count*8;
 541             while(count>0) {
 542                 offsets[0]=sourceIndex++;
 543                 offsets[1]=sourceIndex++;
 544                 offsets[2]=sourceIndex++;
 545                 offsets[3]=sourceIndex++;
 546                 offsets[4]=sourceIndex++;
 547                 offsets[5]=sourceIndex++;
 548                 offsets[6]=sourceIndex++;
 549                 offsets[7]=sourceIndex++;
 550                 offsets+=8;
 551                 --count;
 552             }
 553         }
 554     }
 555
 556     /* conversion loop */
 557     c=0;
 558     while(targetCapacity>0 && (c=*source++)<=0x7f) {
 559         *target++=c;
 560         --targetCapacity;
 561     }
 562
 563     if(c>0x7f) {
 564         /* callback(illegal); copy the current bytes to toUBytes[] */
 565         UConverter *cnv=pArgs->converter;
 566         cnv->toUBytes[0]=c;
 567         cnv->toULength=1;
 568         *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 569     } else if(source<sourceLimit && target>=pArgs->targetLimit) {
 570         /* target is full */
 571         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 572     }
 573
 574     /* set offsets since the start */
 575     if(offsets!=NULL) {
 576         size_t count=target-oldTarget;
 577         while(count>0) {
 578             *offsets++=sourceIndex++;
 579             --count;
 580         }
 581     }
 582
 583     /* write back the updated pointers */
 584     pArgs->source=(const char *)source;
 585     pArgs->target=target;
 586     pArgs->offsets=offsets;
 587 }
 588
 589 /* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
 590 static UChar32 U_CALLCONV
 591 _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
 592                    UErrorCode *pErrorCode) {
 593     const uint8_t *source;
 594     uint8_t b;
 595
 596     source=(const uint8_t *)pArgs->source;
 597     if(source<(const uint8_t *)pArgs->sourceLimit) {
 598         b=*source++;
 599         pArgs->source=(const char *)source;
 600         if(b<=0x7f) {
 601             return b;
 602         } else {
 603             UConverter *cnv=pArgs->converter;
 604             cnv->toUBytes[0]=b;
 605             cnv->toULength=1;
 606             *pErrorCode=U_ILLEGAL_CHAR_FOUND;
 607             return 0xffff;
 608         }
 609     }
 610
 611     /* no output because of empty input */
 612     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
 613     return 0xffff;
 614 }
 615
 616 /* "Convert" UTF-8 to US-ASCII: Validate and copy. */
 617 static void U_CALLCONV
 618 ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
 619                    UConverterToUnicodeArgs *pToUArgs,
 620                    UErrorCode *pErrorCode) {
 621     const uint8_t *source, *sourceLimit;
 622     uint8_t *target;
 623     int32_t targetCapacity, length;
 624
 625     uint8_t c;
 626
 627     if(pToUArgs->converter->toULength > 0) {
 628         /* no handling of partial UTF-8 characters here, fall back to pivoting */
 629         *pErrorCode=U_USING_DEFAULT_WARNING;
 630         return;
 631     }
 632
 633     /* set up the local pointers */
 634     source=(const uint8_t *)pToUArgs->source;
 635     sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
 636     target=(uint8_t *)pFromUArgs->target;
 637     targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
 638
 639     /*
 640      * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
 641      * for the minimum of the sourceLength and targetCapacity
 642      */
 643     length=(int32_t)(sourceLimit-source);
 644     if(length<targetCapacity) {
 645         targetCapacity=length;
 646     }
 647
 648     /* unroll the loop with the most common case */
 649     if(targetCapacity>=16) {
 650         int32_t count, loops;
 651         uint8_t oredChars;
 652
 653         loops=count=targetCapacity>>4;
 654         do {
 655             oredChars=*target++=*source++;
 656             oredChars|=*target++=*source++;
 657             oredChars|=*target++=*source++;
 658             oredChars|=*target++=*source++;
 659             oredChars|=*target++=*source++;
 660             oredChars|=*target++=*source++;
 661             oredChars|=*target++=*source++;
 662             oredChars|=*target++=*source++;
 663             oredChars|=*target++=*source++;
 664             oredChars|=*target++=*source++;
 665             oredChars|=*target++=*source++;
 666             oredChars|=*target++=*source++;
 667             oredChars|=*target++=*source++;
 668             oredChars|=*target++=*source++;
 669             oredChars|=*target++=*source++;
 670             oredChars|=*target++=*source++;
 671
 672             /* were all 16 entries really valid? */
 673             if(oredChars>0x7f) {
 674                 /* no, return to the first of these 16 */
 675                 source-=16;
 676                 target-=16;
 677                 break;
 678             }
 679         } while(--count>0);
 680         count=loops-count;
 681         targetCapacity-=16*count;
 682     }
 683
 684     /* conversion loop */
 685     c=0;
 686     while(targetCapacity>0 && (c=*source)<=0x7f) {
 687         ++source;
 688         *target++=c;
 689         --targetCapacity;
 690     }
 691
 692     if(c>0x7f) {
 693         /* non-ASCII character, handle in standard converter */
 694         *pErrorCode=U_USING_DEFAULT_WARNING;
 695     } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
 696         /* target is full */
 697         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
 698     }
 699
 700     /* write back the updated pointers */
 701     pToUArgs->source=(const char *)source;
 702     pFromUArgs->target=(char *)target;
 703 }
 704
 705 static void U_CALLCONV
 706 _ASCIIGetUnicodeSet(const UConverter *cnv,
 707                     const USetAdder *sa,
 708                     UConverterUnicodeSet which,
 709                     UErrorCode *pErrorCode) {
 710     (void)cnv;
 711     (void)which;
 712     (void)pErrorCode;
 713     sa->addRange(sa->set, 0, 0x7f);
 714 }
 715 U_CDECL_END
 716
 717 static const UConverterImpl _ASCIIImpl={
 718     UCNV_US_ASCII,
 719
 720     NULL,
 721     NULL,
 722
 723     NULL,
 724     NULL,
 725     NULL,
 726
 727     _ASCIIToUnicodeWithOffsets,
 728     _ASCIIToUnicodeWithOffsets,
 729     _Latin1FromUnicodeWithOffsets,
 730     _Latin1FromUnicodeWithOffsets,
 731     _ASCIIGetNextUChar,
 732
 733     NULL,
 734     NULL,
 735     NULL,
 736     NULL,
 737     _ASCIIGetUnicodeSet,
 738
 739     NULL,
 740     ucnv_ASCIIFromUTF8
 741 };
 742
 743 static const UConverterStaticData _ASCIIStaticData={
 744     sizeof(UConverterStaticData),
 745     "US-ASCII",
 746     367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
 747     { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
 748     0,
 749     0,
 750     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
 751 };
 752
 753 const UConverterSharedData _ASCIIData=
 754         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
 755
 756 #endif