]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnv_u16.c
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / common / ucnv_u16.c
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
3* Copyright (C) 2002-2003, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6* file name: ucnv_u16.c
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* created on: 2002jul01
12* created by: Markus W. Scherer
13*
14* UTF-16 converter implementation. Used to be in ucnv_utf.c.
15*/
16
17#include "unicode/utypes.h"
18#include "unicode/ucnv.h"
19#include "unicode/ucnv_err.h"
20#include "ucnv_bld.h"
21#include "ucnv_cnv.h"
22#include "cmemory.h"
23
24/* UTF-16 Platform Endian --------------------------------------------------- */
25
26static void
27_UTF16PEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
28 UErrorCode *pErrorCode) {
29 UConverter *cnv = pArgs->converter;
30 const uint8_t *source = (const uint8_t *)pArgs->source;
31 UChar *target = pArgs->target;
32 int32_t *offsets = pArgs->offsets;
33 int32_t targetCapacity = pArgs->targetLimit - pArgs->target;
34 int32_t length = (const uint8_t *)pArgs->sourceLimit - source;
35 int32_t count;
36 int32_t sourceIndex = 0;
37
38 if(length <= 0 && cnv->toUnicodeStatus == 0) {
39 /* no input, nothing to do */
40 return;
41 }
42
43 if(targetCapacity <= 0) {
44 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
45 return;
46 }
47
48 /* complete a partial UChar from the last call */
49 if(length != 0 && cnv->toUnicodeStatus != 0) {
50 /*
51 * copy the byte from the last call and the first one here into the target,
52 * byte-wise to keep the platform endianness
53 */
54 uint8_t *p = (uint8_t *)target++;
55 *p++ = (uint8_t)cnv->toUnicodeStatus;
56 cnv->toUnicodeStatus = 0;
57 *p = *source++;
58 --length;
59 --targetCapacity;
60 if(offsets != NULL) {
61 *offsets++ = -1;
62 }
63 }
64
65 /* copy an even number of bytes for complete UChars */
66 count = 2 * targetCapacity;
67 if(count > length) {
68 count = length & ~1;
69 }
70 if(count > 0) {
71 uprv_memcpy(target, source, count);
72 source += count;
73 length -= count;
74 count >>= 1;
75 target += count;
76 targetCapacity -= count;
77 if(offsets != NULL) {
78 while(count > 0) {
79 *offsets++ = sourceIndex;
80 sourceIndex += 2;
81 --count;
82 }
83 }
84 }
85
86 /* check for a remaining source byte and store the status */
87 if(length >= 2) {
88 /* it must be targetCapacity==0 because otherwise the above would have copied more */
89 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
90 } else if(length == 1) {
91 if(pArgs->flush) {
92 /* a UChar remains incomplete */
93 *pErrorCode = U_TRUNCATED_CHAR_FOUND;
94 } else {
95 /* consume the last byte and store it, making sure that it will never set the status to 0 */
96 cnv->toUnicodeStatus = *source++ | 0x100;
97 }
98 } else /* length==0 */ if(cnv->toUnicodeStatus!=0 && pArgs->flush) {
99 /* a UChar remains incomplete */
100 *pErrorCode = U_TRUNCATED_CHAR_FOUND;
101 }
102
103 /* write back the updated pointers */
104 pArgs->source = (const char *)source;
105 pArgs->target = target;
106 pArgs->offsets = offsets;
107}
108
109static void
110_UTF16PEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
111 UErrorCode *pErrorCode) {
112 UConverter *cnv = pArgs->converter;
113 const UChar *source = pArgs->source;
114 uint8_t *target = (uint8_t *)pArgs->target;
115 int32_t *offsets = pArgs->offsets;
116 int32_t targetCapacity = pArgs->targetLimit - pArgs->target;
117 int32_t length = pArgs->sourceLimit - source;
118 int32_t count;
119 int32_t sourceIndex = 0;
120
121 if(length <= 0 && cnv->fromUnicodeStatus == 0) {
122 /* no input, nothing to do */
123 return;
124 }
125
126 if(targetCapacity <= 0) {
127 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
128 return;
129 }
130
131 /* complete a partial UChar from the last call */
132 if(cnv->fromUnicodeStatus != 0) {
133 *target++ = (uint8_t)cnv->fromUnicodeStatus;
134 cnv->fromUnicodeStatus = 0;
135 --targetCapacity;
136 if(offsets != NULL) {
137 *offsets++ = -1;
138 }
139 }
140
141 /* copy an even number of bytes for complete UChars */
142 count = 2 * length;
143 if(count > targetCapacity) {
144 count = targetCapacity & ~1;
145 }
146 if(count>0) {
147 uprv_memcpy(target, source, count);
148 target += count;
149 targetCapacity -= count;
150 count >>= 1;
151 source += count;
152 length -= count;
153 if(offsets != NULL) {
154 while(count > 0) {
155 *offsets++ = sourceIndex;
156 *offsets++ = sourceIndex++;
157 --count;
158 }
159 }
160 }
161
162 if(length > 0) {
163 /* it must be targetCapacity<=1 because otherwise the above would have copied more */
164 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
165 if(targetCapacity > 0) /* targetCapacity==1 */ {
166 /* copy one byte and keep the other in the status */
167 const uint8_t *p = (const uint8_t *)source++;
168 *target++ = *p++;
169 cnv->fromUnicodeStatus = *p | 0x100;
170 if(offsets != NULL) {
171 *offsets++ = sourceIndex;
172 }
173 }
174 }
175
176 /* write back the updated pointers */
177 pArgs->source = source;
178 pArgs->target = (char *)target;
179 pArgs->offsets = offsets;
180}
181
182/* UTF-16 Opposite Endian --------------------------------------------------- */
183
184/*
185 * For opposite-endian UTF-16, we keep a byte pointer to the UChars
186 * and copy two bytes at a time and reverse them.
187 */
188
189static void
190_UTF16OEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
191 UErrorCode *pErrorCode) {
192 UConverter *cnv = pArgs->converter;
193 const uint8_t *source = (const uint8_t *)pArgs->source;
194 UChar *target = pArgs->target;
195 uint8_t *target8 = (uint8_t *)target; /* byte pointer to the target */
196 int32_t *offsets = pArgs->offsets;
197 int32_t targetCapacity = pArgs->targetLimit - pArgs->target;
198 int32_t length = (const uint8_t *)pArgs->sourceLimit - source;
199 int32_t count;
200 int32_t sourceIndex = 0;
201
202 if(length <= 0 && cnv->toUnicodeStatus == 0) {
203 /* no input, nothing to do */
204 return;
205 }
206
207 if(targetCapacity <= 0) {
208 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
209 return;
210 }
211
212 /* complete a partial UChar from the last call */
213 if(length != 0 && cnv->toUnicodeStatus != 0) {
214 /*
215 * copy the byte from the last call and the first one here into the target,
216 * byte-wise, reversing the platform endianness
217 */
218 *target8++ = *source++;
219 *target8++ = (uint8_t)cnv->toUnicodeStatus;
220 cnv->toUnicodeStatus = 0;
221 ++target;
222 --length;
223 --targetCapacity;
224 if(offsets != NULL) {
225 *offsets++ = -1;
226 }
227 }
228
229 /* copy an even number of bytes for complete UChars */
230 count = 2 * targetCapacity;
231 if(count > length) {
232 count = length & ~1;
233 }
234 if(count>0) {
235 length -= count;
236 count >>= 1;
237 targetCapacity -= count;
238 if(offsets == NULL) {
239 while(count > 0) {
240 target8[1] = *source++;
241 target8[0] = *source++;
242 target8 += 2;
243 --count;
244 }
245 } else {
246 while(count>0) {
247 target8[1] = *source++;
248 target8[0] = *source++;
249 target8 += 2;
250 *offsets++ = sourceIndex;
251 sourceIndex += 2;
252 --count;
253 }
254 }
255 target=(UChar *)target8;
256 }
257
258 /* check for a remaining source byte and store the status */
259 if(length >= 2) {
260 /* it must be targetCapacity==0 because otherwise the above would have copied more */
261 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
262 } else if(length == 1) {
263 if(pArgs->flush) {
264 /* a UChar remains incomplete */
265 *pErrorCode = U_TRUNCATED_CHAR_FOUND;
266 } else {
267 /* consume the last byte and store it, making sure that it will never set the status to 0 */
268 cnv->toUnicodeStatus = *source++ | 0x100;
269 }
270 } else /* length==0 */ if(cnv->toUnicodeStatus!=0 && pArgs->flush) {
271 /* a UChar remains incomplete */
272 *pErrorCode = U_TRUNCATED_CHAR_FOUND;
273 }
274
275 /* write back the updated pointers */
276 pArgs->source = (const char *)source;
277 pArgs->target = target;
278 pArgs->offsets = offsets;
279}
280
281static void
282_UTF16OEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
283 UErrorCode *pErrorCode) {
284 UConverter *cnv = pArgs->converter;
285 const UChar *source = pArgs->source;
286 const uint8_t *source8 = (const uint8_t *)source; /* byte pointer to the source */
287 uint8_t *target = (uint8_t *)pArgs->target;
288 int32_t *offsets = pArgs->offsets;
289 int32_t targetCapacity = pArgs->targetLimit - pArgs->target;
290 int32_t length = pArgs->sourceLimit - source;
291 int32_t count;
292 int32_t sourceIndex = 0;
293
294 if(length <= 0 && cnv->fromUnicodeStatus == 0) {
295 /* no input, nothing to do */
296 return;
297 }
298
299 if(targetCapacity <= 0) {
300 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
301 return;
302 }
303
304 /* complete a partial UChar from the last call */
305 if(cnv->fromUnicodeStatus != 0) {
306 *target++ = (uint8_t)cnv->fromUnicodeStatus;
307 cnv->fromUnicodeStatus = 0;
308 --targetCapacity;
309 if(offsets != NULL) {
310 *offsets++ = -1;
311 }
312 }
313
314 /* copy an even number of bytes for complete UChars */
315 count = 2 * length;
316 if(count > targetCapacity) {
317 count = targetCapacity & ~1;
318 }
319 if(count > 0) {
320 targetCapacity -= count;
321 count >>= 1;
322 length -= count;
323 if(offsets == NULL) {
324 while(count > 0) {
325 target[1] = *source8++;
326 target[0] = *source8++;
327 target += 2;
328 --count;
329 }
330 } else {
331 while(count>0) {
332 target[1] = *source8++;
333 target[0] = *source8++;
334 target += 2;
335 *offsets++ = sourceIndex;
336 *offsets++ = sourceIndex++;
337 --count;
338 }
339 }
340 source=(const UChar *)source8;
341 }
342
343 if(length > 0) {
344 /* it must be targetCapacity<=1 because otherwise the above would have copied more */
345 *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
346 if(targetCapacity > 0) /* targetCapacity==1 */ {
347 /* copy one byte and keep the other in the status */
348 cnv->fromUnicodeStatus = *source8++ | 0x100;
349 *target++ = *source8;
350 ++source;
351 if(offsets != NULL) {
352 *offsets++ = sourceIndex;
353 }
354 }
355 }
356
357 /* write back the updated pointers */
358 pArgs->source = source;
359 pArgs->target = (char *)target;
360 pArgs->offsets = offsets;
361}
362
363/* UTF-16BE ----------------------------------------------------------------- */
364
365#if U_IS_BIG_ENDIAN
366# define _UTF16BEToUnicodeWithOffsets _UTF16PEToUnicodeWithOffsets
367# define _UTF16LEToUnicodeWithOffsets _UTF16OEToUnicodeWithOffsets
368# define _UTF16BEFromUnicodeWithOffsets _UTF16PEFromUnicodeWithOffsets
369# define _UTF16LEFromUnicodeWithOffsets _UTF16OEFromUnicodeWithOffsets
370#else
371# define _UTF16BEToUnicodeWithOffsets _UTF16OEToUnicodeWithOffsets
372# define _UTF16LEToUnicodeWithOffsets _UTF16PEToUnicodeWithOffsets
373# define _UTF16BEFromUnicodeWithOffsets _UTF16OEFromUnicodeWithOffsets
374# define _UTF16LEFromUnicodeWithOffsets _UTF16PEFromUnicodeWithOffsets
375#endif
376
377static UChar32 T_UConverter_getNextUChar_UTF16_BE(UConverterToUnicodeArgs* args,
378 UErrorCode* err)
379{
380 UChar32 myUChar;
381 uint16_t first;
382 /*Checks boundaries and set appropriate error codes*/
383 if (args->source+2 > args->sourceLimit)
384 {
385 if (args->source >= args->sourceLimit)
386 {
387 /*Either caller has reached the end of the byte stream*/
388 *err = U_INDEX_OUTOFBOUNDS_ERROR;
389 }
390 else
391 {
392 /* a character was cut in half*/
393 *err = U_TRUNCATED_CHAR_FOUND;
394 }
395 return 0xffff;
396 }
397
398 /*Gets the corresponding codepoint*/
399 first = (uint16_t)(((uint16_t)(*(args->source)) << 8) |((uint8_t)*((args->source)+1)));
400 myUChar = first;
401 args->source += 2;
402
403 if(UTF_IS_FIRST_SURROGATE(first)) {
404 uint16_t second;
405
406 if (args->source+2 > args->sourceLimit) {
407 *err = U_TRUNCATED_CHAR_FOUND;
408 return 0xffff;
409 }
410
411 /* get the second surrogate and assemble the code point */
412 second = (uint16_t)(((uint16_t)(*(args->source)) << 8) |((uint8_t)*(args->source+1)));
413
414 /* ignore unmatched surrogates and just deliver the first one in such a case */
415 if(UTF_IS_SECOND_SURROGATE(second)) {
416 /* matched pair, get pair value */
417 myUChar = UTF16_GET_PAIR_VALUE(first, second);
418 args->source += 2;
419 }
420 }
421
422 return myUChar;
423}
424
425static const UConverterImpl _UTF16BEImpl={
426 UCNV_UTF16_BigEndian,
427
428 NULL,
429 NULL,
430
431 NULL,
432 NULL,
433 NULL,
434
435 _UTF16BEToUnicodeWithOffsets,
436 _UTF16BEToUnicodeWithOffsets,
437 _UTF16BEFromUnicodeWithOffsets,
438 _UTF16BEFromUnicodeWithOffsets,
439 T_UConverter_getNextUChar_UTF16_BE,
440
441 NULL,
442 NULL,
443 NULL,
444 NULL,
445 ucnv_getCompleteUnicodeSet
446};
447
448/* The 1200 CCSID refers to any version of Unicode with any endianess of UTF-16 */
449static const UConverterStaticData _UTF16BEStaticData={
450 sizeof(UConverterStaticData),
451 "UTF-16BE",
452 1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2,
453 { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE,
454 0,
455 0,
456 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
457};
458
459
460const UConverterSharedData _UTF16BEData={
461 sizeof(UConverterSharedData), ~((uint32_t) 0),
462 NULL, NULL, &_UTF16BEStaticData, FALSE, &_UTF16BEImpl,
463 0
464};
465
466/* UTF-16LE ----------------------------------------------------------------- */
467
468static UChar32 T_UConverter_getNextUChar_UTF16_LE(UConverterToUnicodeArgs* args,
469 UErrorCode* err)
470{
471 UChar32 myUChar;
472 uint16_t first;
473 /*Checks boundaries and set appropriate error codes*/
474 if (args->source+2 > args->sourceLimit)
475 {
476 if (args->source >= args->sourceLimit)
477 {
478 /*Either caller has reached the end of the byte stream*/
479 *err = U_INDEX_OUTOFBOUNDS_ERROR;
480 }
481 else
482 {
483 /* a character was cut in half*/
484 *err = U_TRUNCATED_CHAR_FOUND;
485 }
486
487 return 0xffff;
488 }
489
490 /*Gets the corresponding codepoint*/
491 first = (uint16_t)(((uint16_t)*((args->source)+1) << 8) | ((uint8_t)(*(args->source))));
492 myUChar=first;
493 /*updates the source*/
494 args->source += 2;
495
496 if (UTF_IS_FIRST_SURROGATE(first))
497 {
498 uint16_t second;
499
500 if (args->source+2 > args->sourceLimit)
501 {
502 *err = U_TRUNCATED_CHAR_FOUND;
503 return 0xffff;
504 }
505
506 /* get the second surrogate and assemble the code point */
507 second = (uint16_t)(((uint16_t)*(args->source+1) << 8) |((uint8_t)(*(args->source))));
508
509 /* ignore unmatched surrogates and just deliver the first one in such a case */
510 if(UTF_IS_SECOND_SURROGATE(second))
511 {
512 /* matched pair, get pair value */
513 myUChar = UTF16_GET_PAIR_VALUE(first, second);
514 args->source += 2;
515 }
516 }
517
518 return myUChar;
519}
520
521static const UConverterImpl _UTF16LEImpl={
522 UCNV_UTF16_LittleEndian,
523
524 NULL,
525 NULL,
526
527 NULL,
528 NULL,
529 NULL,
530
531 _UTF16LEToUnicodeWithOffsets,
532 _UTF16LEToUnicodeWithOffsets,
533 _UTF16LEFromUnicodeWithOffsets,
534 _UTF16LEFromUnicodeWithOffsets,
535 T_UConverter_getNextUChar_UTF16_LE,
536
537 NULL,
538 NULL,
539 NULL,
540 NULL,
541 ucnv_getCompleteUnicodeSet
542};
543
544
545/* The 1200 CCSID refers to any version of Unicode with any endianess of UTF-16 */
546static const UConverterStaticData _UTF16LEStaticData={
547 sizeof(UConverterStaticData),
548 "UTF-16LE",
549 1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2,
550 { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE,
551 0,
552 0,
553 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
554};
555
556
557const UConverterSharedData _UTF16LEData={
558 sizeof(UConverterSharedData), ~((uint32_t) 0),
559 NULL, NULL, &_UTF16LEStaticData, FALSE, &_UTF16LEImpl,
560 0
561};
562
563/* UTF-16 (Detect BOM) ------------------------------------------------------ */
564
565/*
566 * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE
567 * accordingly.
568 * This is a simpler version of the UTF-32 converter below, with
569 * fewer states for shorter BOMs.
570 *
571 * State values:
572 * 0 initial state
573 * 1 saw FE
574 * 2..4 -
575 * 5 saw FF
576 * 6..7 -
577 * 8 UTF-16BE mode
578 * 9 UTF-16LE mode
579 *
580 * During detection: state&3==number of matching bytes so far.
581 *
582 * On output, emit U+FEFF as the first code point.
583 */
584
585static void
586_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) {
587 if(choice<=UCNV_RESET_TO_UNICODE) {
588 /* reset toUnicode: state=0 */
589 cnv->mode=0;
590 }
591 if(choice!=UCNV_RESET_TO_UNICODE) {
592 /* reset fromUnicode: prepare to output the UTF-16PE BOM */
593 cnv->charErrorBufferLength=2;
594#if U_IS_BIG_ENDIAN
595 cnv->charErrorBuffer[0]=0xfe;
596 cnv->charErrorBuffer[1]=0xff;
597#else
598 cnv->charErrorBuffer[0]=0xff;
599 cnv->charErrorBuffer[1]=0xfe;
600#endif
601 }
602}
603
604static void
605_UTF16Open(UConverter *cnv,
606 const char *name,
607 const char *locale,
608 uint32_t options,
609 UErrorCode *pErrorCode) {
610 _UTF16Reset(cnv, UCNV_RESET_BOTH);
611}
612
613static const char utf16BOM[8]={ (char)0xfe, (char)0xff, 0, 0, (char)0xff, (char)0xfe, 0, 0 };
614
615static void
616_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
617 UErrorCode *pErrorCode) {
618 UConverter *cnv=pArgs->converter;
619 const char *source=pArgs->source;
620 const char *sourceLimit=pArgs->sourceLimit;
621 int32_t *offsets=pArgs->offsets;
622
623 int32_t state, offsetDelta;
624 char b;
625
626 state=cnv->mode;
627
628 /*
629 * If we detect a BOM in this buffer, then we must add the BOM size to the
630 * offsets because the actual converter function will not see and count the BOM.
631 * offsetDelta will have the number of the BOM bytes that are in the current buffer.
632 */
633 offsetDelta=0;
634
635 while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
636 switch(state) {
637 case 0:
638 b=*source;
639 if(b==(char)0xfe) {
640 state=1; /* could be FE FF */
641 } else if(b==(char)0xff) {
642 state=5; /* could be FF FE */
643 } else {
644 state=8; /* default to UTF-16BE */
645 continue;
646 }
647 ++source;
648 break;
649 case 1:
650 case 5:
651 if(*source==utf16BOM[state]) {
652 ++source;
653 if(state==1) {
654 state=8; /* detect UTF-16BE */
655 offsetDelta=source-pArgs->source;
656 } else if(state==5) {
657 state=9; /* detect UTF-16LE */
658 offsetDelta=source-pArgs->source;
659 }
660 } else {
661 /* switch to UTF-16BE and pass the previous bytes */
662 if(source!=pArgs->source) {
663 /* just reset the source */
664 source=pArgs->source;
665 } else {
666 UBool oldFlush=pArgs->flush;
667
668 /* the first byte is from a previous buffer, replay it first */
669 pArgs->source=utf16BOM+(state&4); /* select the correct BOM */
670 pArgs->sourceLimit=pArgs->source+1; /* replay previous byte */
671 pArgs->flush=FALSE; /* this sourceLimit is not the real source stream limit */
672
673 _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
674
675 /* restore real pointers; pArgs->source will be set in case 8/9 */
676 pArgs->sourceLimit=sourceLimit;
677 pArgs->flush=oldFlush;
678 }
679 state=8;
680 continue;
681 }
682 break;
683 case 8:
684 /* call UTF-16BE */
685 pArgs->source=source;
686 _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
687 source=pArgs->source;
688 break;
689 case 9:
690 /* call UTF-16LE */
691 pArgs->source=source;
692 _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
693 source=pArgs->source;
694 break;
695 default:
696 break; /* does not occur */
697 }
698 }
699
700 /* add BOM size to offsets - see comment at offsetDelta declaration */
701 if(offsets!=NULL && offsetDelta!=0) {
702 int32_t *offsetsLimit=pArgs->offsets;
703 while(offsets<offsetsLimit) {
704 *offsets++ += offsetDelta;
705 }
706 }
707
708 pArgs->source=source;
709
710 if(source==sourceLimit && pArgs->flush) {
711 /* handle truncated input */
712 switch(state) {
713 case 0:
714 break; /* no input at all, nothing to do */
715 case 8:
716 _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
717 break;
718 case 9:
719 _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
720 break;
721 default:
722 /* handle 0<state<8: call UTF-16BE with too-short input */
723 pArgs->source=utf16BOM+(state&4); /* select the correct BOM */
724 pArgs->sourceLimit=pArgs->source+(state&3); /* replay bytes */
725
726 /* no offsets: not enough for output */
727 _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
728 pArgs->source=source;
729 pArgs->sourceLimit=sourceLimit;
730 break;
731 }
732 cnv->mode=0; /* reset */
733 } else {
734 cnv->mode=state;
735 }
736}
737
738static UChar32
739_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs,
740 UErrorCode *pErrorCode) {
741 switch(pArgs->converter->mode) {
742 case 8:
743 return T_UConverter_getNextUChar_UTF16_BE(pArgs, pErrorCode);
744 case 9:
745 return T_UConverter_getNextUChar_UTF16_LE(pArgs, pErrorCode);
746 default:
747 return ucnv_getNextUCharFromToUImpl(pArgs, _UTF16ToUnicodeWithOffsets, TRUE, pErrorCode);
748 }
749}
750
751static const UConverterImpl _UTF16Impl = {
752 UCNV_UTF16,
753
754 NULL,
755 NULL,
756
757 _UTF16Open,
758 NULL,
759 _UTF16Reset,
760
761 _UTF16ToUnicodeWithOffsets,
762 _UTF16ToUnicodeWithOffsets,
763 _UTF16PEFromUnicodeWithOffsets,
764 _UTF16PEFromUnicodeWithOffsets,
765 _UTF16GetNextUChar,
766
767 NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
768 NULL,
769 NULL,
770 NULL,
771 ucnv_getCompleteUnicodeSet
772};
773
774static const UConverterStaticData _UTF16StaticData = {
775 sizeof(UConverterStaticData),
776 "UTF-16",
777 0, /* ### TODO review correctness of all Unicode CCSIDs */
778 UCNV_IBM, UCNV_UTF16, 2, 2,
779#if U_IS_BIG_ENDIAN
780 { 0xff, 0xfd, 0, 0 }, 2,
781#else
782 { 0xfd, 0xff, 0, 0 }, 2,
783#endif
784 FALSE, FALSE,
785 0,
786 0,
787 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
788};
789
790const UConverterSharedData _UTF16Data = {
791 sizeof(UConverterSharedData), ~((uint32_t) 0),
792 NULL, NULL, &_UTF16StaticData, FALSE, &_UTF16Impl,
793 0
794};