]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnvlat1.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / common / ucnvlat1.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4**********************************************************************
2ca993e8 5* Copyright (C) 2000-2015, International Business Machines
b75a7d8f
A
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* file name: ucnvlat1.cpp
f3c0d7a5 9* encoding: UTF-8
b75a7d8f
A
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2000feb07
14* created by: Markus W. Scherer
15*/
16
17#include "unicode/utypes.h"
374ca955
A
18
19#if !UCONFIG_NO_CONVERSION
20
b75a7d8f 21#include "unicode/ucnv.h"
b75a7d8f 22#include "unicode/uset.h"
4388f060 23#include "unicode/utf8.h"
b75a7d8f
A
24#include "ucnv_bld.h"
25#include "ucnv_cnv.h"
0f5d89e8 26#include "ustr_imp.h"
b75a7d8f
A
27
28/* control optimizations according to the platform */
b75a7d8f 29#define LATIN1_UNROLL_FROM_UNICODE 1
b75a7d8f
A
30
31/* ISO 8859-1 --------------------------------------------------------------- */
32
374ca955 33/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
f3c0d7a5
A
34U_CDECL_BEGIN
35static void U_CALLCONV
b75a7d8f
A
36_Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
37 UErrorCode *pErrorCode) {
38 const uint8_t *source;
39 UChar *target;
40 int32_t targetCapacity, length;
41 int32_t *offsets;
42
43 int32_t sourceIndex;
44
45 /* set up the local pointers */
46 source=(const uint8_t *)pArgs->source;
47 target=pArgs->target;
73c04bcf 48 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
b75a7d8f
A
49 offsets=pArgs->offsets;
50
51 sourceIndex=0;
52
53 /*
54 * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
55 * for the minimum of the sourceLength and targetCapacity
56 */
73c04bcf 57 length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
b75a7d8f
A
58 if(length<=targetCapacity) {
59 targetCapacity=length;
60 } else {
61 /* target will be full */
62 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
63 length=targetCapacity;
64 }
65
46f4442e
A
66 if(targetCapacity>=8) {
67 /* This loop is unrolled for speed and improved pipelining. */
b75a7d8f
A
68 int32_t count, loops;
69
46f4442e
A
70 loops=count=targetCapacity>>3;
71 length=targetCapacity&=0x7;
b75a7d8f 72 do {
46f4442e
A
73 target[0]=source[0];
74 target[1]=source[1];
75 target[2]=source[2];
76 target[3]=source[3];
77 target[4]=source[4];
78 target[5]=source[5];
79 target[6]=source[6];
80 target[7]=source[7];
81 target+=8;
82 source+=8;
b75a7d8f
A
83 } while(--count>0);
84
85 if(offsets!=NULL) {
86 do {
46f4442e
A
87 offsets[0]=sourceIndex++;
88 offsets[1]=sourceIndex++;
89 offsets[2]=sourceIndex++;
90 offsets[3]=sourceIndex++;
91 offsets[4]=sourceIndex++;
92 offsets[5]=sourceIndex++;
93 offsets[6]=sourceIndex++;
94 offsets[7]=sourceIndex++;
95 offsets+=8;
b75a7d8f
A
96 } while(--loops>0);
97 }
98 }
b75a7d8f
A
99
100 /* conversion loop */
101 while(targetCapacity>0) {
102 *target++=*source++;
103 --targetCapacity;
104 }
105
106 /* write back the updated pointers */
107 pArgs->source=(const char *)source;
108 pArgs->target=target;
109
110 /* set offsets */
111 if(offsets!=NULL) {
112 while(length>0) {
113 *offsets++=sourceIndex++;
114 --length;
115 }
116 pArgs->offsets=offsets;
117 }
118}
119
374ca955 120/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
f3c0d7a5 121static UChar32 U_CALLCONV
b75a7d8f
A
122_Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
123 UErrorCode *pErrorCode) {
124 const uint8_t *source=(const uint8_t *)pArgs->source;
125 if(source<(const uint8_t *)pArgs->sourceLimit) {
126 pArgs->source=(const char *)(source+1);
127 return *source;
128 }
129
130 /* no output because of empty input */
131 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
132 return 0xffff;
133}
134
374ca955 135/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
f3c0d7a5 136static void U_CALLCONV
b75a7d8f
A
137_Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
138 UErrorCode *pErrorCode) {
139 UConverter *cnv;
374ca955
A
140 const UChar *source, *sourceLimit;
141 uint8_t *target, *oldTarget;
b75a7d8f
A
142 int32_t targetCapacity, length;
143 int32_t *offsets;
144
374ca955
A
145 UChar32 cp;
146 UChar c, max;
b75a7d8f
A
147
148 int32_t sourceIndex;
149
b75a7d8f
A
150 /* set up the local pointers */
151 cnv=pArgs->converter;
152 source=pArgs->source;
153 sourceLimit=pArgs->sourceLimit;
374ca955 154 target=oldTarget=(uint8_t *)pArgs->target;
73c04bcf 155 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
b75a7d8f
A
156 offsets=pArgs->offsets;
157
158 if(cnv->sharedData==&_Latin1Data) {
159 max=0xff; /* Latin-1 */
160 } else {
161 max=0x7f; /* US-ASCII */
162 }
163
164 /* get the converter state from UConverter */
374ca955 165 cp=cnv->fromUChar32;
b75a7d8f
A
166
167 /* sourceIndex=-1 if the current character began in the previous buffer */
374ca955 168 sourceIndex= cp==0 ? 0 : -1;
b75a7d8f
A
169
170 /*
171 * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
172 * for the minimum of the sourceLength and targetCapacity
173 */
73c04bcf 174 length=(int32_t)(sourceLimit-source);
b75a7d8f
A
175 if(length<targetCapacity) {
176 targetCapacity=length;
177 }
178
179 /* conversion loop */
374ca955 180 if(cp!=0 && targetCapacity>0) {
b75a7d8f
A
181 goto getTrail;
182 }
183
184#if LATIN1_UNROLL_FROM_UNICODE
185 /* unroll the loop with the most common case */
b75a7d8f
A
186 if(targetCapacity>=16) {
187 int32_t count, loops;
188 UChar u, oredChars;
189
190 loops=count=targetCapacity>>4;
191 do {
192 oredChars=u=*source++;
193 *target++=(uint8_t)u;
194 oredChars|=u=*source++;
195 *target++=(uint8_t)u;
196 oredChars|=u=*source++;
197 *target++=(uint8_t)u;
198 oredChars|=u=*source++;
199 *target++=(uint8_t)u;
200 oredChars|=u=*source++;
201 *target++=(uint8_t)u;
202 oredChars|=u=*source++;
203 *target++=(uint8_t)u;
204 oredChars|=u=*source++;
205 *target++=(uint8_t)u;
206 oredChars|=u=*source++;
207 *target++=(uint8_t)u;
208 oredChars|=u=*source++;
209 *target++=(uint8_t)u;
210 oredChars|=u=*source++;
211 *target++=(uint8_t)u;
212 oredChars|=u=*source++;
213 *target++=(uint8_t)u;
214 oredChars|=u=*source++;
215 *target++=(uint8_t)u;
216 oredChars|=u=*source++;
217 *target++=(uint8_t)u;
218 oredChars|=u=*source++;
219 *target++=(uint8_t)u;
220 oredChars|=u=*source++;
221 *target++=(uint8_t)u;
222 oredChars|=u=*source++;
223 *target++=(uint8_t)u;
224
225 /* were all 16 entries really valid? */
226 if(oredChars>max) {
227 /* no, return to the first of these 16 */
228 source-=16;
229 target-=16;
230 break;
231 }
232 } while(--count>0);
233 count=loops-count;
234 targetCapacity-=16*count;
235
236 if(offsets!=NULL) {
374ca955 237 oldTarget+=16*count;
b75a7d8f
A
238 while(count>0) {
239 *offsets++=sourceIndex++;
240 *offsets++=sourceIndex++;
241 *offsets++=sourceIndex++;
242 *offsets++=sourceIndex++;
243 *offsets++=sourceIndex++;
244 *offsets++=sourceIndex++;
245 *offsets++=sourceIndex++;
246 *offsets++=sourceIndex++;
247 *offsets++=sourceIndex++;
248 *offsets++=sourceIndex++;
249 *offsets++=sourceIndex++;
250 *offsets++=sourceIndex++;
251 *offsets++=sourceIndex++;
252 *offsets++=sourceIndex++;
253 *offsets++=sourceIndex++;
254 *offsets++=sourceIndex++;
255 --count;
256 }
257 }
b75a7d8f
A
258 }
259#endif
260
374ca955
A
261 /* conversion loop */
262 c=0;
263 while(targetCapacity>0 && (c=*source++)<=max) {
264 /* convert the Unicode code point */
265 *target++=(uint8_t)c;
266 --targetCapacity;
267 }
268
269 if(c>max) {
270 cp=c;
271 if(!U_IS_SURROGATE(cp)) {
272 /* callback(unassigned) */
273 } else if(U_IS_SURROGATE_LEAD(cp)) {
b75a7d8f 274getTrail:
374ca955
A
275 if(source<sourceLimit) {
276 /* test the following code unit */
277 UChar trail=*source;
278 if(U16_IS_TRAIL(trail)) {
279 ++source;
280 cp=U16_GET_SUPPLEMENTARY(cp, trail);
281 /* this codepage does not map supplementary code points */
282 /* callback(unassigned) */
b75a7d8f 283 } else {
374ca955
A
284 /* this is an unmatched lead code unit (1st surrogate) */
285 /* callback(illegal) */
b75a7d8f
A
286 }
287 } else {
374ca955
A
288 /* no more input */
289 cnv->fromUChar32=cp;
290 goto noMoreInput;
b75a7d8f 291 }
374ca955
A
292 } else {
293 /* this is an unmatched trail code unit (2nd surrogate) */
294 /* callback(illegal) */
b75a7d8f 295 }
b75a7d8f 296
374ca955
A
297 *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
298 cnv->fromUChar32=cp;
b75a7d8f 299 }
374ca955 300noMoreInput:
b75a7d8f 301
374ca955 302 /* set offsets since the start */
b75a7d8f 303 if(offsets!=NULL) {
374ca955 304 size_t count=target-oldTarget;
b75a7d8f
A
305 while(count>0) {
306 *offsets++=sourceIndex++;
307 --count;
308 }
309 }
310
374ca955
A
311 if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
312 /* target is full */
313 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
b75a7d8f
A
314 }
315
316 /* write back the updated pointers */
317 pArgs->source=source;
318 pArgs->target=(char *)target;
319 pArgs->offsets=offsets;
320}
321
46f4442e 322/* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
f3c0d7a5 323static void U_CALLCONV
46f4442e
A
324ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
325 UConverterToUnicodeArgs *pToUArgs,
326 UErrorCode *pErrorCode) {
327 UConverter *utf8;
328 const uint8_t *source, *sourceLimit;
329 uint8_t *target;
330 int32_t targetCapacity;
331
332 UChar32 c;
333 uint8_t b, t1;
334
335 /* set up the local pointers */
336 utf8=pToUArgs->converter;
337 source=(uint8_t *)pToUArgs->source;
338 sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
339 target=(uint8_t *)pFromUArgs->target;
340 targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
341
342 /* get the converter state from the UTF-8 UConverter */
0f5d89e8
A
343 if (utf8->toULength > 0) {
344 c=(UChar32)utf8->toUnicodeStatus;
345 } else {
346 c = 0;
347 }
46f4442e
A
348 if(c!=0 && source<sourceLimit) {
349 if(targetCapacity==0) {
350 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
351 return;
352 } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
353 ++source;
354 *target++=(uint8_t)(((c&3)<<6)|t1);
355 --targetCapacity;
356
357 utf8->toUnicodeStatus=0;
358 utf8->toULength=0;
359 } else {
360 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
361 *pErrorCode=U_USING_DEFAULT_WARNING;
362 return;
363 }
364 }
365
366 /*
367 * Make sure that the last byte sequence before sourceLimit is complete
368 * or runs into a lead byte.
369 * In the conversion loop compare source with sourceLimit only once
370 * per multi-byte character.
371 * For Latin-1, adjust sourceLimit only for 1 trail byte because
372 * the conversion loop handles at most 2-byte sequences.
373 */
374 if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
375 --sourceLimit;
376 }
377
378 /* conversion loop */
379 while(source<sourceLimit) {
380 if(targetCapacity>0) {
381 b=*source++;
0f5d89e8 382 if(U8_IS_SINGLE(b)) {
46f4442e
A
383 /* convert ASCII */
384 *target++=(uint8_t)b;
385 --targetCapacity;
386 } else if( /* handle U+0080..U+00FF inline */
387 b>=0xc2 && b<=0xc3 &&
388 (t1=(uint8_t)(*source-0x80)) <= 0x3f
389 ) {
390 ++source;
391 *target++=(uint8_t)(((b&3)<<6)|t1);
392 --targetCapacity;
393 } else {
394 /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
395 pToUArgs->source=(char *)(source-1);
396 pFromUArgs->target=(char *)target;
397 *pErrorCode=U_USING_DEFAULT_WARNING;
398 return;
399 }
400 } else {
401 /* target is full */
402 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
403 break;
404 }
405 }
406
407 /*
408 * The sourceLimit may have been adjusted before the conversion loop
409 * to stop before a truncated sequence.
410 * If so, then collect the truncated sequence now.
411 * For Latin-1, there is at most exactly one lead byte because of the
412 * smaller sourceLimit adjustment logic.
413 */
414 if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
415 utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
416 utf8->toULength=1;
0f5d89e8 417 utf8->mode=U8_COUNT_BYTES(b);
46f4442e
A
418 }
419
420 /* write back the updated pointers */
421 pToUArgs->source=(char *)source;
422 pFromUArgs->target=(char *)target;
423}
424
f3c0d7a5 425static void U_CALLCONV
b75a7d8f 426_Latin1GetUnicodeSet(const UConverter *cnv,
73c04bcf 427 const USetAdder *sa,
b75a7d8f
A
428 UConverterUnicodeSet which,
429 UErrorCode *pErrorCode) {
f3c0d7a5
A
430 (void)cnv;
431 (void)which;
432 (void)pErrorCode;
374ca955 433 sa->addRange(sa->set, 0, 0xff);
b75a7d8f 434}
f3c0d7a5
A
435U_CDECL_END
436
b75a7d8f
A
437
438static const UConverterImpl _Latin1Impl={
439 UCNV_LATIN_1,
440
441 NULL,
442 NULL,
443
444 NULL,
445 NULL,
446 NULL,
447
448 _Latin1ToUnicodeWithOffsets,
449 _Latin1ToUnicodeWithOffsets,
450 _Latin1FromUnicodeWithOffsets,
451 _Latin1FromUnicodeWithOffsets,
452 _Latin1GetNextUChar,
453
454 NULL,
455 NULL,
456 NULL,
457 NULL,
46f4442e
A
458 _Latin1GetUnicodeSet,
459
460 NULL,
461 ucnv_Latin1FromUTF8
b75a7d8f
A
462};
463
464static const UConverterStaticData _Latin1StaticData={
465 sizeof(UConverterStaticData),
466 "ISO-8859-1",
467 819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
468 { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
469 0,
470 0,
471 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
472};
473
2ca993e8
A
474const UConverterSharedData _Latin1Data=
475 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
b75a7d8f
A
476
477/* US-ASCII ----------------------------------------------------------------- */
478
f3c0d7a5 479U_CDECL_BEGIN
374ca955 480/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
f3c0d7a5 481static void U_CALLCONV
b75a7d8f
A
482_ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
483 UErrorCode *pErrorCode) {
374ca955
A
484 const uint8_t *source, *sourceLimit;
485 UChar *target, *oldTarget;
b75a7d8f
A
486 int32_t targetCapacity, length;
487 int32_t *offsets;
488
489 int32_t sourceIndex;
490
374ca955
A
491 uint8_t c;
492
b75a7d8f
A
493 /* set up the local pointers */
494 source=(const uint8_t *)pArgs->source;
495 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
374ca955 496 target=oldTarget=pArgs->target;
73c04bcf 497 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
b75a7d8f
A
498 offsets=pArgs->offsets;
499
500 /* sourceIndex=-1 if the current character began in the previous buffer */
501 sourceIndex=0;
b75a7d8f
A
502
503 /*
504 * since the conversion here is 1:1 UChar:uint8_t, we need only one counter
505 * for the minimum of the sourceLength and targetCapacity
506 */
73c04bcf 507 length=(int32_t)(sourceLimit-source);
b75a7d8f
A
508 if(length<targetCapacity) {
509 targetCapacity=length;
510 }
511
46f4442e
A
512 if(targetCapacity>=8) {
513 /* This loop is unrolled for speed and improved pipelining. */
b75a7d8f
A
514 int32_t count, loops;
515 UChar oredChars;
516
46f4442e 517 loops=count=targetCapacity>>3;
b75a7d8f 518 do {
46f4442e
A
519 oredChars=target[0]=source[0];
520 oredChars|=target[1]=source[1];
521 oredChars|=target[2]=source[2];
522 oredChars|=target[3]=source[3];
523 oredChars|=target[4]=source[4];
524 oredChars|=target[5]=source[5];
525 oredChars|=target[6]=source[6];
526 oredChars|=target[7]=source[7];
b75a7d8f
A
527
528 /* were all 16 entries really valid? */
529 if(oredChars>0x7f) {
530 /* no, return to the first of these 16 */
b75a7d8f
A
531 break;
532 }
46f4442e
A
533 source+=8;
534 target+=8;
b75a7d8f
A
535 } while(--count>0);
536 count=loops-count;
46f4442e 537 targetCapacity-=count*8;
b75a7d8f
A
538
539 if(offsets!=NULL) {
46f4442e 540 oldTarget+=count*8;
b75a7d8f 541 while(count>0) {
46f4442e
A
542 offsets[0]=sourceIndex++;
543 offsets[1]=sourceIndex++;
544 offsets[2]=sourceIndex++;
545 offsets[3]=sourceIndex++;
546 offsets[4]=sourceIndex++;
547 offsets[5]=sourceIndex++;
548 offsets[6]=sourceIndex++;
549 offsets[7]=sourceIndex++;
550 offsets+=8;
b75a7d8f
A
551 --count;
552 }
553 }
554 }
b75a7d8f
A
555
556 /* conversion loop */
374ca955
A
557 c=0;
558 while(targetCapacity>0 && (c=*source++)<=0x7f) {
559 *target++=c;
560 --targetCapacity;
b75a7d8f
A
561 }
562
374ca955
A
563 if(c>0x7f) {
564 /* callback(illegal); copy the current bytes to toUBytes[] */
565 UConverter *cnv=pArgs->converter;
566 cnv->toUBytes[0]=c;
567 cnv->toULength=1;
568 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
569 } else if(source<sourceLimit && target>=pArgs->targetLimit) {
b75a7d8f
A
570 /* target is full */
571 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
572 }
573
374ca955 574 /* set offsets since the start */
b75a7d8f 575 if(offsets!=NULL) {
374ca955 576 size_t count=target-oldTarget;
b75a7d8f
A
577 while(count>0) {
578 *offsets++=sourceIndex++;
579 --count;
580 }
581 }
582
583 /* write back the updated pointers */
584 pArgs->source=(const char *)source;
585 pArgs->target=target;
586 pArgs->offsets=offsets;
587}
588
374ca955 589/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
f3c0d7a5 590static UChar32 U_CALLCONV
b75a7d8f
A
591_ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
592 UErrorCode *pErrorCode) {
b75a7d8f
A
593 const uint8_t *source;
594 uint8_t b;
595
b75a7d8f 596 source=(const uint8_t *)pArgs->source;
374ca955 597 if(source<(const uint8_t *)pArgs->sourceLimit) {
b75a7d8f
A
598 b=*source++;
599 pArgs->source=(const char *)source;
600 if(b<=0x7f) {
601 return b;
602 } else {
b75a7d8f 603 UConverter *cnv=pArgs->converter;
374ca955
A
604 cnv->toUBytes[0]=b;
605 cnv->toULength=1;
b75a7d8f 606 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
374ca955 607 return 0xffff;
b75a7d8f
A
608 }
609 }
610
374ca955 611 /* no output because of empty input */
b75a7d8f
A
612 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
613 return 0xffff;
614}
615
46f4442e 616/* "Convert" UTF-8 to US-ASCII: Validate and copy. */
f3c0d7a5 617static void U_CALLCONV
46f4442e
A
618ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
619 UConverterToUnicodeArgs *pToUArgs,
620 UErrorCode *pErrorCode) {
621 const uint8_t *source, *sourceLimit;
622 uint8_t *target;
623 int32_t targetCapacity, length;
624
625 uint8_t c;
626
0f5d89e8 627 if(pToUArgs->converter->toULength > 0) {
46f4442e
A
628 /* no handling of partial UTF-8 characters here, fall back to pivoting */
629 *pErrorCode=U_USING_DEFAULT_WARNING;
630 return;
631 }
632
633 /* set up the local pointers */
634 source=(const uint8_t *)pToUArgs->source;
635 sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
636 target=(uint8_t *)pFromUArgs->target;
637 targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
638
639 /*
640 * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
641 * for the minimum of the sourceLength and targetCapacity
642 */
643 length=(int32_t)(sourceLimit-source);
644 if(length<targetCapacity) {
645 targetCapacity=length;
646 }
647
648 /* unroll the loop with the most common case */
649 if(targetCapacity>=16) {
650 int32_t count, loops;
651 uint8_t oredChars;
652
653 loops=count=targetCapacity>>4;
654 do {
655 oredChars=*target++=*source++;
656 oredChars|=*target++=*source++;
657 oredChars|=*target++=*source++;
658 oredChars|=*target++=*source++;
659 oredChars|=*target++=*source++;
660 oredChars|=*target++=*source++;
661 oredChars|=*target++=*source++;
662 oredChars|=*target++=*source++;
663 oredChars|=*target++=*source++;
664 oredChars|=*target++=*source++;
665 oredChars|=*target++=*source++;
666 oredChars|=*target++=*source++;
667 oredChars|=*target++=*source++;
668 oredChars|=*target++=*source++;
669 oredChars|=*target++=*source++;
670 oredChars|=*target++=*source++;
671
672 /* were all 16 entries really valid? */
673 if(oredChars>0x7f) {
674 /* no, return to the first of these 16 */
675 source-=16;
676 target-=16;
677 break;
678 }
679 } while(--count>0);
680 count=loops-count;
681 targetCapacity-=16*count;
682 }
683
684 /* conversion loop */
685 c=0;
686 while(targetCapacity>0 && (c=*source)<=0x7f) {
687 ++source;
688 *target++=c;
689 --targetCapacity;
690 }
691
692 if(c>0x7f) {
693 /* non-ASCII character, handle in standard converter */
694 *pErrorCode=U_USING_DEFAULT_WARNING;
695 } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
696 /* target is full */
697 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
698 }
699
700 /* write back the updated pointers */
701 pToUArgs->source=(const char *)source;
702 pFromUArgs->target=(char *)target;
703}
704
f3c0d7a5 705static void U_CALLCONV
b75a7d8f 706_ASCIIGetUnicodeSet(const UConverter *cnv,
73c04bcf 707 const USetAdder *sa,
b75a7d8f
A
708 UConverterUnicodeSet which,
709 UErrorCode *pErrorCode) {
f3c0d7a5
A
710 (void)cnv;
711 (void)which;
712 (void)pErrorCode;
374ca955 713 sa->addRange(sa->set, 0, 0x7f);
b75a7d8f 714}
f3c0d7a5 715U_CDECL_END
b75a7d8f
A
716
717static const UConverterImpl _ASCIIImpl={
718 UCNV_US_ASCII,
719
720 NULL,
721 NULL,
722
723 NULL,
724 NULL,
725 NULL,
726
727 _ASCIIToUnicodeWithOffsets,
728 _ASCIIToUnicodeWithOffsets,
729 _Latin1FromUnicodeWithOffsets,
730 _Latin1FromUnicodeWithOffsets,
731 _ASCIIGetNextUChar,
732
733 NULL,
734 NULL,
735 NULL,
736 NULL,
46f4442e
A
737 _ASCIIGetUnicodeSet,
738
739 NULL,
740 ucnv_ASCIIFromUTF8
b75a7d8f
A
741};
742
743static const UConverterStaticData _ASCIIStaticData={
744 sizeof(UConverterStaticData),
745 "US-ASCII",
746 367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
747 { 0x1a, 0, 0, 0 }, 1, FALSE, FALSE,
748 0,
749 0,
750 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
751};
752
2ca993e8
A
753const UConverterSharedData _ASCIIData=
754 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
374ca955
A
755
756#endif