]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnvlat1.cpp
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / common / ucnvlat1.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4**********************************************************************
2ca993e8 5* Copyright (C) 2000-2015, International Business Machines
b75a7d8f
A
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* file name: ucnvlat1.cpp
f3c0d7a5 9* encoding: UTF-8
b75a7d8f
A
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2000feb07
14* created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
374ca955
A
18
19#if !UCONFIG_NO_CONVERSION
20
b75a7d8f 21#include "unicode/ucnv.h"
b75a7d8f 22#include "unicode/uset.h"
4388f060 23#include "unicode/utf8.h"
b75a7d8f
A
24#include "ucnv_bld.h"
25#include "ucnv_cnv.h"
26
27/* control optimizations according to the platform */
b75a7d8f 28#define LATIN1_UNROLL_FROM_UNICODE 1
b75a7d8f
A
29
30/* ISO 8859-1 --------------------------------------------------------------- */
31
374ca955 32/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
f3c0d7a5
A
33U_CDECL_BEGIN
34static void U_CALLCONV
b75a7d8f
A
35_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
36 UErrorCode *pErrorCode) {
37 const uint8_t *source;
38 UChar *target;
39 int32_t targetCapacity, length;
40 int32_t *offsets;
41
42 int32_t sourceIndex;
43
44 /* set up the local pointers */
45 source=(const uint8_t *)pArgs->source;
46 target=pArgs->target;
73c04bcf 47 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
b75a7d8f
A
48 offsets=pArgs->offsets;
49
50 sourceIndex=0;
51
52 /*
53 * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
54 * for the minimum of the sourceLength and targetCapacity
55 */
73c04bcf 56 length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
b75a7d8f
A
57 if(length<=targetCapacity) {
58 targetCapacity=length;
59 } else {
60 /* target will be full */
61 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
62 length=targetCapacity;
63 }
64
46f4442e
A
65 if(targetCapacity>=8) {
66 /* This loop is unrolled for speed and improved pipelining. */
b75a7d8f
A
67 int32_t count, loops;
68
46f4442e
A
69 loops=count=targetCapacity>>3;
70 length=targetCapacity&=0x7;
b75a7d8f 71 do {
46f4442e
A
72 target[0]=source[0];
73 target[1]=source[1];
74 target[2]=source[2];
75 target[3]=source[3];
76 target[4]=source[4];
77 target[5]=source[5];
78 target[6]=source[6];
79 target[7]=source[7];
80 target+=8;
81 source+=8;
b75a7d8f
A
82 } while(--count>0);
83
84 if(offsets!=NULL) {
85 do {
46f4442e
A
86 offsets[0]=sourceIndex++;
87 offsets[1]=sourceIndex++;
88 offsets[2]=sourceIndex++;
89 offsets[3]=sourceIndex++;
90 offsets[4]=sourceIndex++;
91 offsets[5]=sourceIndex++;
92 offsets[6]=sourceIndex++;
93 offsets[7]=sourceIndex++;
94 offsets+=8;
b75a7d8f
A
95 } while(--loops>0);
96 }
97 }
b75a7d8f
A
98
99 /* conversion loop */
100 while(targetCapacity>0) {
101 *target++=*source++;
102 --targetCapacity;
103 }
104
105 /* write back the updated pointers */
106 pArgs->source=(const char *)source;
107 pArgs->target=target;
108
109 /* set offsets */
110 if(offsets!=NULL) {
111 while(length>0) {
112 *offsets++=sourceIndex++;
113 --length;
114 }
115 pArgs->offsets=offsets;
116 }
117}
118
374ca955 119/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
f3c0d7a5 120static UChar32 U_CALLCONV
b75a7d8f
A
121_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
122 UErrorCode *pErrorCode) {
123 const uint8_t *source=(const uint8_t *)pArgs->source;
124 if(source<(const uint8_t *)pArgs->sourceLimit) {
125 pArgs->source=(const char *)(source+1);
126 return *source;
127 }
128
129 /* no output because of empty input */
130 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
131 return 0xffff;
132}
133
374ca955 134/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
f3c0d7a5 135static void U_CALLCONV
b75a7d8f
A
136_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
137 UErrorCode *pErrorCode) {
138 UConverter *cnv;
374ca955
A
139 const UChar *source, *sourceLimit;
140 uint8_t *target, *oldTarget;
b75a7d8f
A
141 int32_t targetCapacity, length;
142 int32_t *offsets;
143
374ca955
A
144 UChar32 cp;
145 UChar c, max;
b75a7d8f
A
146
147 int32_t sourceIndex;
148
b75a7d8f
A
149 /* set up the local pointers */
150 cnv=pArgs->converter;
151 source=pArgs->source;
152 sourceLimit=pArgs->sourceLimit;
374ca955 153 target=oldTarget=(uint8_t *)pArgs->target;
73c04bcf 154 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
b75a7d8f
A
155 offsets=pArgs->offsets;
156
157 if(cnv->sharedData==&_Latin1Data) {
158 max=0xff; /* Latin-1 */
159 } else {
160 max=0x7f; /* US-ASCII */
161 }
162
163 /* get the converter state from UConverter */
374ca955 164 cp=cnv->fromUChar32;
b75a7d8f
A
165
166 /* sourceIndex=-1 if the current character began in the previous buffer */
374ca955 167 sourceIndex= cp==0 ? 0 : -1;
b75a7d8f
A
168
169 /*
170 * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
171 * for the minimum of the sourceLength and targetCapacity
172 */
73c04bcf 173 length=(int32_t)(sourceLimit-source);
b75a7d8f
A
174 if(length<targetCapacity) {
175 targetCapacity=length;
176 }
177
178 /* conversion loop */
374ca955 179 if(cp!=0 && targetCapacity>0) {
b75a7d8f
A
180 goto getTrail;
181 }
182
183#if LATIN1_UNROLL_FROM_UNICODE
184 /* unroll the loop with the most common case */
b75a7d8f
A
185 if(targetCapacity>=16) {
186 int32_t count, loops;
187 UChar u, oredChars;
188
189 loops=count=targetCapacity>>4;
190 do {
191 oredChars=u=*source++;
192 *target++=(uint8_t)u;
193 oredChars|=u=*source++;
194 *target++=(uint8_t)u;
195 oredChars|=u=*source++;
196 *target++=(uint8_t)u;
197 oredChars|=u=*source++;
198 *target++=(uint8_t)u;
199 oredChars|=u=*source++;
200 *target++=(uint8_t)u;
201 oredChars|=u=*source++;
202 *target++=(uint8_t)u;
203 oredChars|=u=*source++;
204 *target++=(uint8_t)u;
205 oredChars|=u=*source++;
206 *target++=(uint8_t)u;
207 oredChars|=u=*source++;
208 *target++=(uint8_t)u;
209 oredChars|=u=*source++;
210 *target++=(uint8_t)u;
211 oredChars|=u=*source++;
212 *target++=(uint8_t)u;
213 oredChars|=u=*source++;
214 *target++=(uint8_t)u;
215 oredChars|=u=*source++;
216 *target++=(uint8_t)u;
217 oredChars|=u=*source++;
218 *target++=(uint8_t)u;
219 oredChars|=u=*source++;
220 *target++=(uint8_t)u;
221 oredChars|=u=*source++;
222 *target++=(uint8_t)u;
223
224 /* were all 16 entries really valid? */
225 if(oredChars>max) {
226 /* no, return to the first of these 16 */
227 source-=16;
228 target-=16;
229 break;
230 }
231 } while(--count>0);
232 count=loops-count;
233 targetCapacity-=16*count;
234
235 if(offsets!=NULL) {
374ca955 236 oldTarget+=16*count;
b75a7d8f
A
237 while(count>0) {
238 *offsets++=sourceIndex++;
239 *offsets++=sourceIndex++;
240 *offsets++=sourceIndex++;
241 *offsets++=sourceIndex++;
242 *offsets++=sourceIndex++;
243 *offsets++=sourceIndex++;
244 *offsets++=sourceIndex++;
245 *offsets++=sourceIndex++;
246 *offsets++=sourceIndex++;
247 *offsets++=sourceIndex++;
248 *offsets++=sourceIndex++;
249 *offsets++=sourceIndex++;
250 *offsets++=sourceIndex++;
251 *offsets++=sourceIndex++;
252 *offsets++=sourceIndex++;
253 *offsets++=sourceIndex++;
254 --count;
255 }
256 }
b75a7d8f
A
257 }
258#endif
259
374ca955
A
260 /* conversion loop */
261 c=0;
262 while(targetCapacity>0 && (c=*source++)<=max) {
263 /* convert the Unicode code point */
264 *target++=(uint8_t)c;
265 --targetCapacity;
266 }
267
268 if(c>max) {
269 cp=c;
270 if(!U_IS_SURROGATE(cp)) {
271 /* callback(unassigned) */
272 } else if(U_IS_SURROGATE_LEAD(cp)) {
b75a7d8f 273getTrail:
374ca955
A
274 if(source<sourceLimit) {
275 /* test the following code unit */
276 UChar trail=*source;
277 if(U16_IS_TRAIL(trail)) {
278 ++source;
279 cp=U16_GET_SUPPLEMENTARY(cp, trail);
280 /* this codepage does not map supplementary code points */
281 /* callback(unassigned) */
b75a7d8f 282 } else {
374ca955
A
283 /* this is an unmatched lead code unit (1st surrogate) */
284 /* callback(illegal) */
b75a7d8f
A
285 }
286 } else {
374ca955
A
287 /* no more input */
288 cnv->fromUChar32=cp;
289 goto noMoreInput;
b75a7d8f 290 }
374ca955
A
291 } else {
292 /* this is an unmatched trail code unit (2nd surrogate) */
293 /* callback(illegal) */
b75a7d8f 294 }
b75a7d8f 295
374ca955
A
296 *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
297 cnv->fromUChar32=cp;
b75a7d8f 298 }
374ca955 299noMoreInput:
b75a7d8f 300
374ca955 301 /* set offsets since the start */
b75a7d8f 302 if(offsets!=NULL) {
374ca955 303 size_t count=target-oldTarget;
b75a7d8f
A
304 while(count>0) {
305 *offsets++=sourceIndex++;
306 --count;
307 }
308 }
309
374ca955
A
310 if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
311 /* target is full */
312 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
b75a7d8f
A
313 }
314
315 /* write back the updated pointers */
316 pArgs->source=source;
317 pArgs->target=(char *)target;
318 pArgs->offsets=offsets;
319}
320
46f4442e 321/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
f3c0d7a5 322static void U_CALLCONV
46f4442e
A
323ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
324 UConverterToUnicodeArgs *pToUArgs,
325 UErrorCode *pErrorCode) {
326 UConverter *utf8;
327 const uint8_t *source, *sourceLimit;
328 uint8_t *target;
329 int32_t targetCapacity;
330
331 UChar32 c;
332 uint8_t b, t1;
333
334 /* set up the local pointers */
335 utf8=pToUArgs->converter;
336 source=(uint8_t *)pToUArgs->source;
337 sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
338 target=(uint8_t *)pFromUArgs->target;
339 targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
340
341 /* get the converter state from the UTF-8 UConverter */
342 c=(UChar32)utf8->toUnicodeStatus;
343 if(c!=0 && source<sourceLimit) {
344 if(targetCapacity==0) {
345 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
346 return;
347 } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
348 ++source;
349 *target++=(uint8_t)(((c&3)<<6)|t1);
350 --targetCapacity;
351
352 utf8->toUnicodeStatus=0;
353 utf8->toULength=0;
354 } else {
355 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
356 *pErrorCode=U_USING_DEFAULT_WARNING;
357 return;
358 }
359 }
360
361 /*
362 * Make sure that the last byte sequence before sourceLimit is complete
363 * or runs into a lead byte.
364 * In the conversion loop compare source with sourceLimit only once
365 * per multi-byte character.
366 * For Latin-1, adjust sourceLimit only for 1 trail byte because
367 * the conversion loop handles at most 2-byte sequences.
368 */
369 if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
370 --sourceLimit;
371 }
372
373 /* conversion loop */
374 while(source<sourceLimit) {
375 if(targetCapacity>0) {
376 b=*source++;
377 if((int8_t)b>=0) {
378 /* convert ASCII */
379 *target++=(uint8_t)b;
380 --targetCapacity;
381 } else if( /* handle U+0080..U+00FF inline */
382 b>=0xc2 && b<=0xc3 &&
383 (t1=(uint8_t)(*source-0x80)) <= 0x3f
384 ) {
385 ++source;
386 *target++=(uint8_t)(((b&3)<<6)|t1);
387 --targetCapacity;
388 } else {
389 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
390 pToUArgs->source=(char *)(source-1);
391 pFromUArgs->target=(char *)target;
392 *pErrorCode=U_USING_DEFAULT_WARNING;
393 return;
394 }
395 } else {
396 /* target is full */
397 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
398 break;
399 }
400 }
401
402 /*
403 * The sourceLimit may have been adjusted before the conversion loop
404 * to stop before a truncated sequence.
405 * If so, then collect the truncated sequence now.
406 * For Latin-1, there is at most exactly one lead byte because of the
407 * smaller sourceLimit adjustment logic.
408 */
409 if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
410 utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
411 utf8->toULength=1;
51004dcb 412 utf8->mode=U8_COUNT_TRAIL_BYTES(b)+1;
46f4442e
A
413 }
414
415 /* write back the updated pointers */
416 pToUArgs->source=(char *)source;
417 pFromUArgs->target=(char *)target;
418}
419
f3c0d7a5 420static void U_CALLCONV
b75a7d8f 421_Latin1GetUnicodeSet(const UConverter *cnv,
73c04bcf 422 const USetAdder *sa,
b75a7d8f
A
423 UConverterUnicodeSet which,
424 UErrorCode *pErrorCode) {
f3c0d7a5
A
425 (void)cnv;
426 (void)which;
427 (void)pErrorCode;
374ca955 428 sa->addRange(sa->set, 0, 0xff);
b75a7d8f 429}
f3c0d7a5
A
430U_CDECL_END
431
b75a7d8f
A
432
433static const UConverterImpl _Latin1Impl={
434 UCNV_LATIN_1,
435
436 NULL,
437 NULL,
438
439 NULL,
440 NULL,
441 NULL,
442
443 _Latin1ToUnicodeWithOffsets,
444 _Latin1ToUnicodeWithOffsets,
445 _Latin1FromUnicodeWithOffsets,
446 _Latin1FromUnicodeWithOffsets,
447 _Latin1GetNextUChar,
448
449 NULL,
450 NULL,
451 NULL,
452 NULL,
46f4442e
A
453 _Latin1GetUnicodeSet,
454
455 NULL,
456 ucnv_Latin1FromUTF8
b75a7d8f
A
457};
458
459static const UConverterStaticData _Latin1StaticData={
460 sizeof(UConverterStaticData),
461 "ISO-8859-1",
462 819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
463 { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
464 0,
465 0,
466 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
467};
468
2ca993e8
A
469const UConverterSharedData _Latin1Data=
470 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
b75a7d8f
A
471
472/* US-ASCII ----------------------------------------------------------------- */
473
f3c0d7a5 474U_CDECL_BEGIN
374ca955 475/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
f3c0d7a5 476static void U_CALLCONV
b75a7d8f
A
477_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
478 UErrorCode *pErrorCode) {
374ca955
A
479 const uint8_t *source, *sourceLimit;
480 UChar *target, *oldTarget;
b75a7d8f
A
481 int32_t targetCapacity, length;
482 int32_t *offsets;
483
484 int32_t sourceIndex;
485
374ca955
A
486 uint8_t c;
487
b75a7d8f
A
488 /* set up the local pointers */
489 source=(const uint8_t *)pArgs->source;
490 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
374ca955 491 target=oldTarget=pArgs->target;
73c04bcf 492 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
b75a7d8f
A
493 offsets=pArgs->offsets;
494
495 /* sourceIndex=-1 if the current character began in the previous buffer */
496 sourceIndex=0;
b75a7d8f
A
497
498 /*
499 * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
500 * for the minimum of the sourceLength and targetCapacity
501 */
73c04bcf 502 length=(int32_t)(sourceLimit-source);
b75a7d8f
A
503 if(length<targetCapacity) {
504 targetCapacity=length;
505 }
506
46f4442e
A
507 if(targetCapacity>=8) {
508 /* This loop is unrolled for speed and improved pipelining. */
b75a7d8f
A
509 int32_t count, loops;
510 UChar oredChars;
511
46f4442e 512 loops=count=targetCapacity>>3;
b75a7d8f 513 do {
46f4442e
A
514 oredChars=target[0]=source[0];
515 oredChars|=target[1]=source[1];
516 oredChars|=target[2]=source[2];
517 oredChars|=target[3]=source[3];
518 oredChars|=target[4]=source[4];
519 oredChars|=target[5]=source[5];
520 oredChars|=target[6]=source[6];
521 oredChars|=target[7]=source[7];
b75a7d8f
A
522
523 /* were all 16 entries really valid? */
524 if(oredChars>0x7f) {
525 /* no, return to the first of these 16 */
b75a7d8f
A
526 break;
527 }
46f4442e
A
528 source+=8;
529 target+=8;
b75a7d8f
A
530 } while(--count>0);
531 count=loops-count;
46f4442e 532 targetCapacity-=count*8;
b75a7d8f
A
533
534 if(offsets!=NULL) {
46f4442e 535 oldTarget+=count*8;
b75a7d8f 536 while(count>0) {
46f4442e
A
537 offsets[0]=sourceIndex++;
538 offsets[1]=sourceIndex++;
539 offsets[2]=sourceIndex++;
540 offsets[3]=sourceIndex++;
541 offsets[4]=sourceIndex++;
542 offsets[5]=sourceIndex++;
543 offsets[6]=sourceIndex++;
544 offsets[7]=sourceIndex++;
545 offsets+=8;
b75a7d8f
A
546 --count;
547 }
548 }
549 }
b75a7d8f
A
550
551 /* conversion loop */
374ca955
A
552 c=0;
553 while(targetCapacity>0 && (c=*source++)<=0x7f) {
554 *target++=c;
555 --targetCapacity;
b75a7d8f
A
556 }
557
374ca955
A
558 if(c>0x7f) {
559 /* callback(illegal); copy the current bytes to toUBytes[] */
560 UConverter *cnv=pArgs->converter;
561 cnv->toUBytes[0]=c;
562 cnv->toULength=1;
563 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
564 } else if(source<sourceLimit && target>=pArgs->targetLimit) {
b75a7d8f
A
565 /* target is full */
566 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
567 }
568
374ca955 569 /* set offsets since the start */
b75a7d8f 570 if(offsets!=NULL) {
374ca955 571 size_t count=target-oldTarget;
b75a7d8f
A
572 while(count>0) {
573 *offsets++=sourceIndex++;
574 --count;
575 }
576 }
577
578 /* write back the updated pointers */
579 pArgs->source=(const char *)source;
580 pArgs->target=target;
581 pArgs->offsets=offsets;
582}
583
374ca955 584/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
f3c0d7a5 585static UChar32 U_CALLCONV
b75a7d8f
A
586_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
587 UErrorCode *pErrorCode) {
b75a7d8f
A
588 const uint8_t *source;
589 uint8_t b;
590
b75a7d8f 591 source=(const uint8_t *)pArgs->source;
374ca955 592 if(source<(const uint8_t *)pArgs->sourceLimit) {
b75a7d8f
A
593 b=*source++;
594 pArgs->source=(const char *)source;
595 if(b<=0x7f) {
596 return b;
597 } else {
b75a7d8f 598 UConverter *cnv=pArgs->converter;
374ca955
A
599 cnv->toUBytes[0]=b;
600 cnv->toULength=1;
b75a7d8f 601 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
374ca955 602 return 0xffff;
b75a7d8f
A
603 }
604 }
605
374ca955 606 /* no output because of empty input */
b75a7d8f
A
607 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
608 return 0xffff;
609}
610
46f4442e 611/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
f3c0d7a5 612static void U_CALLCONV
46f4442e
A
613ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
614 UConverterToUnicodeArgs *pToUArgs,
615 UErrorCode *pErrorCode) {
616 const uint8_t *source, *sourceLimit;
617 uint8_t *target;
618 int32_t targetCapacity, length;
619
620 uint8_t c;
621
622 if(pToUArgs->converter->toUnicodeStatus!=0) {
623 /* no handling of partial UTF-8 characters here, fall back to pivoting */
624 *pErrorCode=U_USING_DEFAULT_WARNING;
625 return;
626 }
627
628 /* set up the local pointers */
629 source=(const uint8_t *)pToUArgs->source;
630 sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
631 target=(uint8_t *)pFromUArgs->target;
632 targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
633
634 /*
635 * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
636 * for the minimum of the sourceLength and targetCapacity
637 */
638 length=(int32_t)(sourceLimit-source);
639 if(length<targetCapacity) {
640 targetCapacity=length;
641 }
642
643 /* unroll the loop with the most common case */
644 if(targetCapacity>=16) {
645 int32_t count, loops;
646 uint8_t oredChars;
647
648 loops=count=targetCapacity>>4;
649 do {
650 oredChars=*target++=*source++;
651 oredChars|=*target++=*source++;
652 oredChars|=*target++=*source++;
653 oredChars|=*target++=*source++;
654 oredChars|=*target++=*source++;
655 oredChars|=*target++=*source++;
656 oredChars|=*target++=*source++;
657 oredChars|=*target++=*source++;
658 oredChars|=*target++=*source++;
659 oredChars|=*target++=*source++;
660 oredChars|=*target++=*source++;
661 oredChars|=*target++=*source++;
662 oredChars|=*target++=*source++;
663 oredChars|=*target++=*source++;
664 oredChars|=*target++=*source++;
665 oredChars|=*target++=*source++;
666
667 /* were all 16 entries really valid? */
668 if(oredChars>0x7f) {
669 /* no, return to the first of these 16 */
670 source-=16;
671 target-=16;
672 break;
673 }
674 } while(--count>0);
675 count=loops-count;
676 targetCapacity-=16*count;
677 }
678
679 /* conversion loop */
680 c=0;
681 while(targetCapacity>0 && (c=*source)<=0x7f) {
682 ++source;
683 *target++=c;
684 --targetCapacity;
685 }
686
687 if(c>0x7f) {
688 /* non-ASCII character, handle in standard converter */
689 *pErrorCode=U_USING_DEFAULT_WARNING;
690 } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
691 /* target is full */
692 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
693 }
694
695 /* write back the updated pointers */
696 pToUArgs->source=(const char *)source;
697 pFromUArgs->target=(char *)target;
698}
699
f3c0d7a5 700static void U_CALLCONV
b75a7d8f 701_ASCIIGetUnicodeSet(const UConverter *cnv,
73c04bcf 702 const USetAdder *sa,
b75a7d8f
A
703 UConverterUnicodeSet which,
704 UErrorCode *pErrorCode) {
f3c0d7a5
A
705 (void)cnv;
706 (void)which;
707 (void)pErrorCode;
374ca955 708 sa->addRange(sa->set, 0, 0x7f);
b75a7d8f 709}
f3c0d7a5 710U_CDECL_END
b75a7d8f
A
711
712static const UConverterImpl _ASCIIImpl={
713 UCNV_US_ASCII,
714
715 NULL,
716 NULL,
717
718 NULL,
719 NULL,
720 NULL,
721
722 _ASCIIToUnicodeWithOffsets,
723 _ASCIIToUnicodeWithOffsets,
724 _Latin1FromUnicodeWithOffsets,
725 _Latin1FromUnicodeWithOffsets,
726 _ASCIIGetNextUChar,
727
728 NULL,
729 NULL,
730 NULL,
731 NULL,
46f4442e
A
732 _ASCIIGetUnicodeSet,
733
734 NULL,
735 ucnv_ASCIIFromUTF8
b75a7d8f
A
736};
737
738static const UConverterStaticData _ASCIIStaticData={
739 sizeof(UConverterStaticData),
740 "US-ASCII",
741 367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
742 { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
743 0,
744 0,
745 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
746};
747
2ca993e8
A
748const UConverterSharedData _ASCIIData=
749 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
374ca955
A
750
751#endif