]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ucnvscsu.c
ICU-8.11.2.tar.gz
[apple/icu.git] / icuSources / common / ucnvscsu.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
73c04bcf 4* Copyright (C) 2000-2006, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8* file name: ucnvscsu.c
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2000nov18
14* created by: Markus W. Scherer
15*
16* This is an implementation of the Standard Compression Scheme for Unicode
17* as defined in http://www.unicode.org/unicode/reports/tr6/ .
18* Reserved commands and window settings are treated as illegal sequences and
19* will result in callback calls.
20*/
21
22#include "unicode/utypes.h"
374ca955
A
23
24#if !UCONFIG_NO_CONVERSION
25
b75a7d8f
A
26#include "unicode/ucnv.h"
27#include "unicode/ucnv_cb.h"
28#include "ucnv_bld.h"
29#include "ucnv_cnv.h"
30#include "cmemory.h"
31
32/* SCSU definitions --------------------------------------------------------- */
33
34/* SCSU command byte values */
35enum {
36 SQ0=0x01, /* Quote from window pair 0 */
37 SQ7=0x08, /* Quote from window pair 7 */
38 SDX=0x0B, /* Define a window as extended */
39 Srs=0x0C, /* reserved */
40 SQU=0x0E, /* Quote a single Unicode character */
41 SCU=0x0F, /* Change to Unicode mode */
42 SC0=0x10, /* Select window 0 */
43 SC7=0x17, /* Select window 7 */
44 SD0=0x18, /* Define and select window 0 */
45 SD7=0x1F, /* Define and select window 7 */
46
47 UC0=0xE0, /* Select window 0 */
48 UC7=0xE7, /* Select window 7 */
49 UD0=0xE8, /* Define and select window 0 */
50 UD7=0xEF, /* Define and select window 7 */
51 UQU=0xF0, /* Quote a single Unicode character */
52 UDX=0xF1, /* Define a Window as extended */
53 Urs=0xF2 /* reserved */
54};
55
56enum {
57 /*
58 * Unicode code points from 3400 to E000 are not adressible by
59 * dynamic window, since in these areas no short run alphabets are
60 * found. Therefore add gapOffset to all values from gapThreshold.
61 */
62 gapThreshold=0x68,
63 gapOffset=0xAC00,
64
65 /* values between reservedStart and fixedThreshold are reserved */
66 reservedStart=0xA8,
67
68 /* use table of predefined fixed offsets for values from fixedThreshold */
69 fixedThreshold=0xF9
70};
71
72/* constant offsets for the 8 static windows */
73static const uint32_t staticOffsets[8]={
74 0x0000, /* ASCII for quoted tags */
75 0x0080, /* Latin - 1 Supplement (for access to punctuation) */
76 0x0100, /* Latin Extended-A */
77 0x0300, /* Combining Diacritical Marks */
78 0x2000, /* General Punctuation */
79 0x2080, /* Currency Symbols */
80 0x2100, /* Letterlike Symbols and Number Forms */
81 0x3000 /* CJK Symbols and punctuation */
82};
83
84/* initial offsets for the 8 dynamic (sliding) windows */
85static const uint32_t initialDynamicOffsets[8]={
86 0x0080, /* Latin-1 */
87 0x00C0, /* Latin Extended A */
88 0x0400, /* Cyrillic */
89 0x0600, /* Arabic */
90 0x0900, /* Devanagari */
91 0x3040, /* Hiragana */
92 0x30A0, /* Katakana */
93 0xFF00 /* Fullwidth ASCII */
94};
95
96/* Table of fixed predefined Offsets */
97static const uint32_t fixedOffsets[]={
98 /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
99 /* 0xFA */ 0x0250, /* IPA extensions */
100 /* 0xFB */ 0x0370, /* Greek */
101 /* 0xFC */ 0x0530, /* Armenian */
102 /* 0xFD */ 0x3040, /* Hiragana */
103 /* 0xFE */ 0x30A0, /* Katakana */
104 /* 0xFF */ 0xFF60 /* Halfwidth Katakana */
105};
106
107/* state values */
108enum {
109 readCommand,
110 quotePairOne,
111 quotePairTwo,
112 quoteOne,
113 definePairOne,
114 definePairTwo,
115 defineOne
116};
117
118typedef struct SCSUData {
119 /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
120 uint32_t toUDynamicOffsets[8];
121 uint32_t fromUDynamicOffsets[8];
122
123 /* state machine state - toUnicode */
124 UBool toUIsSingleByteMode;
125 uint8_t toUState;
126 int8_t toUQuoteWindow, toUDynamicWindow;
127 uint8_t toUByteOne;
128 uint8_t toUPadding[3];
129
130 /* state machine state - fromUnicode */
131 UBool fromUIsSingleByteMode;
132 int8_t fromUDynamicWindow;
133
134 /*
135 * windowUse[] keeps track of the use of the dynamic windows:
136 * At nextWindowUseIndex there is the least recently used window,
137 * and the following windows (in a wrapping manner) are more and more
138 * recently used.
139 * At nextWindowUseIndex-1 there is the most recently used window.
140 */
141 uint8_t locale;
142 int8_t nextWindowUseIndex;
143 int8_t windowUse[8];
144} SCSUData;
145
146static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
147static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
148
149enum {
150 lGeneric, l_ja
151};
152
153/* SCSU setup functions ----------------------------------------------------- */
154
155static void
156_SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
157 SCSUData *scsu=(SCSUData *)cnv->extraInfo;
158
159 if(choice<=UCNV_RESET_TO_UNICODE) {
160 /* reset toUnicode */
161 uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
162
163 scsu->toUIsSingleByteMode=TRUE;
164 scsu->toUState=readCommand;
165 scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
166 scsu->toUByteOne=0;
167
168 cnv->toULength=0;
169 }
170 if(choice!=UCNV_RESET_TO_UNICODE) {
171 /* reset fromUnicode */
172 uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
173
174 scsu->fromUIsSingleByteMode=TRUE;
175 scsu->fromUDynamicWindow=0;
176
177 scsu->nextWindowUseIndex=0;
178 switch(scsu->locale) {
179 case l_ja:
180 uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
181 break;
182 default:
183 uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
184 break;
185 }
186
374ca955 187 cnv->fromUChar32=0;
b75a7d8f
A
188 }
189}
190
191static void
192_SCSUOpen(UConverter *cnv,
193 const char *name,
194 const char *locale,
195 uint32_t options,
196 UErrorCode *pErrorCode) {
197 cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
198 if(cnv->extraInfo!=NULL) {
199 if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
200 ((SCSUData *)cnv->extraInfo)->locale=l_ja;
201 } else {
202 ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
203 }
204 _SCSUReset(cnv, UCNV_RESET_BOTH);
205 } else {
206 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
207 }
73c04bcf
A
208
209 /* Set the substitution character U+fffd as a Unicode string. */
210 cnv->subUChars[0]=0xfffd;
211 cnv->subCharLen=-1;
b75a7d8f
A
212}
213
214static void
215_SCSUClose(UConverter *cnv) {
216 if(cnv->extraInfo!=NULL) {
217 if(!cnv->isExtraLocal) {
218 uprv_free(cnv->extraInfo);
219 }
220 cnv->extraInfo=NULL;
221 }
222}
223
224/* SCSU-to-Unicode conversion functions ------------------------------------- */
225
b75a7d8f
A
226static void
227_SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
228 UErrorCode *pErrorCode) {
229 UConverter *cnv;
230 SCSUData *scsu;
231 const uint8_t *source, *sourceLimit;
232 UChar *target;
233 const UChar *targetLimit;
234 int32_t *offsets;
235 UBool isSingleByteMode;
236 uint8_t state, byteOne;
237 int8_t quoteWindow, dynamicWindow;
238
239 int32_t sourceIndex, nextSourceIndex;
240
241 uint8_t b;
242
243 /* set up the local pointers */
244 cnv=pArgs->converter;
245 scsu=(SCSUData *)cnv->extraInfo;
246
247 source=(const uint8_t *)pArgs->source;
248 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
249 target=pArgs->target;
250 targetLimit=pArgs->targetLimit;
251 offsets=pArgs->offsets;
252
253 /* get the state machine state */
254 isSingleByteMode=scsu->toUIsSingleByteMode;
255 state=scsu->toUState;
256 quoteWindow=scsu->toUQuoteWindow;
257 dynamicWindow=scsu->toUDynamicWindow;
258 byteOne=scsu->toUByteOne;
259
260 /* sourceIndex=-1 if the current character began in the previous buffer */
261 sourceIndex=state==readCommand ? 0 : -1;
262 nextSourceIndex=0;
263
264 /*
265 * conversion "loop"
266 *
267 * For performance, this is not a normal C loop.
268 * Instead, there are two code blocks for the two SCSU modes.
269 * The function branches to either one, and a change of the mode is done with a goto to
270 * the other branch.
271 *
272 * Each branch has two conventional loops:
273 * - a fast-path loop for the most common codes in the mode
274 * - a loop for all other codes in the mode
275 * When the fast-path runs into a code that it cannot handle, its loop ends and it
276 * runs into the following loop to handle the other codes.
277 * The end of the input or output buffer is also handled by the slower loop.
278 * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
279 *
374ca955
A
280 * The callback handling is done by returning with an error code.
281 * The conversion framework actually calls the callback function.
b75a7d8f 282 */
b75a7d8f
A
283 if(isSingleByteMode) {
284 /* fast path for single-byte mode */
285 if(state==readCommand) {
286fastSingle:
287 while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
288 ++source;
289 ++nextSourceIndex;
290 if(b<=0x7f) {
291 /* write US-ASCII graphic character or DEL */
292 *target++=(UChar)b;
293 if(offsets!=NULL) {
294 *offsets++=sourceIndex;
295 }
296 } else {
297 /* write from dynamic window */
298 uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
299 if(c<=0xffff) {
300 *target++=(UChar)c;
301 if(offsets!=NULL) {
302 *offsets++=sourceIndex;
303 }
304 } else {
305 /* output surrogate pair */
306 *target++=(UChar)(0xd7c0+(c>>10));
307 if(target<targetLimit) {
308 *target++=(UChar)(0xdc00|(c&0x3ff));
309 if(offsets!=NULL) {
310 *offsets++=sourceIndex;
311 *offsets++=sourceIndex;
312 }
313 } else {
314 /* target overflow */
315 if(offsets!=NULL) {
316 *offsets++=sourceIndex;
317 }
318 cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
319 cnv->UCharErrorBufferLength=1;
320 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
321 goto endloop;
322 }
323 }
324 }
325 sourceIndex=nextSourceIndex;
326 }
327 }
328
329 /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
330singleByteMode:
331 while(source<sourceLimit) {
332 if(target>=targetLimit) {
333 /* target is full */
334 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
335 break;
336 }
337 b=*source++;
338 ++nextSourceIndex;
339 switch(state) {
340 case readCommand:
341 /* redundant conditions are commented out */
342 /* here: b<0x20 because otherwise we would be in fastSingle */
343 if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
344 /* CR/LF/TAB/NUL */
345 *target++=(UChar)b;
346 if(offsets!=NULL) {
347 *offsets++=sourceIndex;
348 }
349 sourceIndex=nextSourceIndex;
350 goto fastSingle;
351 } else if(SC0<=b) {
352 if(b<=SC7) {
353 dynamicWindow=(int8_t)(b-SC0);
354 sourceIndex=nextSourceIndex;
355 goto fastSingle;
356 } else /* if(SD0<=b && b<=SD7) */ {
357 dynamicWindow=(int8_t)(b-SD0);
358 state=defineOne;
359 }
360 } else if(/* SQ0<=b && */ b<=SQ7) {
361 quoteWindow=(int8_t)(b-SQ0);
362 state=quoteOne;
363 } else if(b==SDX) {
364 state=definePairOne;
365 } else if(b==SQU) {
366 state=quotePairOne;
367 } else if(b==SCU) {
368 sourceIndex=nextSourceIndex;
369 isSingleByteMode=FALSE;
370 goto fastUnicode;
371 } else /* Srs */ {
372 /* callback(illegal) */
374ca955
A
373 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
374 cnv->toUBytes[0]=b;
375 cnv->toULength=1;
376 goto endloop;
b75a7d8f 377 }
374ca955
A
378
379 /* store the first byte of a multibyte sequence in toUBytes[] */
380 cnv->toUBytes[0]=b;
381 cnv->toULength=1;
b75a7d8f
A
382 break;
383 case quotePairOne:
384 byteOne=b;
374ca955
A
385 cnv->toUBytes[1]=b;
386 cnv->toULength=2;
b75a7d8f
A
387 state=quotePairTwo;
388 break;
389 case quotePairTwo:
390 *target++=(UChar)((byteOne<<8)|b);
391 if(offsets!=NULL) {
392 *offsets++=sourceIndex;
393 }
394 sourceIndex=nextSourceIndex;
395 state=readCommand;
396 goto fastSingle;
397 case quoteOne:
398 if(b<0x80) {
399 /* all static offsets are in the BMP */
400 *target++=(UChar)(staticOffsets[quoteWindow]+b);
401 if(offsets!=NULL) {
402 *offsets++=sourceIndex;
403 }
404 } else {
405 /* write from dynamic window */
406 uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
407 if(c<=0xffff) {
408 *target++=(UChar)c;
409 if(offsets!=NULL) {
410 *offsets++=sourceIndex;
411 }
412 } else {
413 /* output surrogate pair */
414 *target++=(UChar)(0xd7c0+(c>>10));
415 if(target<targetLimit) {
416 *target++=(UChar)(0xdc00|(c&0x3ff));
417 if(offsets!=NULL) {
418 *offsets++=sourceIndex;
419 *offsets++=sourceIndex;
420 }
421 } else {
422 /* target overflow */
423 if(offsets!=NULL) {
424 *offsets++=sourceIndex;
425 }
426 cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
427 cnv->UCharErrorBufferLength=1;
428 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
429 goto endloop;
430 }
431 }
432 }
433 sourceIndex=nextSourceIndex;
434 state=readCommand;
435 goto fastSingle;
436 case definePairOne:
437 dynamicWindow=(int8_t)((b>>5)&7);
438 byteOne=(uint8_t)(b&0x1f);
374ca955
A
439 cnv->toUBytes[1]=b;
440 cnv->toULength=2;
b75a7d8f
A
441 state=definePairTwo;
442 break;
443 case definePairTwo:
444 scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
445 sourceIndex=nextSourceIndex;
446 state=readCommand;
447 goto fastSingle;
448 case defineOne:
449 if(b==0) {
450 /* callback(illegal): Reserved window offset value 0 */
374ca955
A
451 cnv->toUBytes[1]=b;
452 cnv->toULength=2;
453 goto endloop;
b75a7d8f
A
454 } else if(b<gapThreshold) {
455 scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
456 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
457 scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
458 } else if(b>=fixedThreshold) {
459 scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
460 } else {
461 /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
374ca955
A
462 cnv->toUBytes[1]=b;
463 cnv->toULength=2;
464 goto endloop;
b75a7d8f
A
465 }
466 sourceIndex=nextSourceIndex;
467 state=readCommand;
468 goto fastSingle;
469 }
470 }
471 } else {
472 /* fast path for Unicode mode */
473 if(state==readCommand) {
474fastUnicode:
475 while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
476 *target++=(UChar)((b<<8)|source[1]);
477 if(offsets!=NULL) {
478 *offsets++=sourceIndex;
479 }
480 sourceIndex=nextSourceIndex;
481 nextSourceIndex+=2;
482 source+=2;
483 }
484 }
485
486 /* normal state machine for Unicode mode */
487/* unicodeByteMode: */
488 while(source<sourceLimit) {
489 if(target>=targetLimit) {
490 /* target is full */
491 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
492 break;
493 }
494 b=*source++;
495 ++nextSourceIndex;
496 switch(state) {
497 case readCommand:
498 if((uint8_t)(b-UC0)>(Urs-UC0)) {
499 byteOne=b;
374ca955
A
500 cnv->toUBytes[0]=b;
501 cnv->toULength=1;
b75a7d8f
A
502 state=quotePairTwo;
503 } else if(/* UC0<=b && */ b<=UC7) {
504 dynamicWindow=(int8_t)(b-UC0);
505 sourceIndex=nextSourceIndex;
506 isSingleByteMode=TRUE;
507 goto fastSingle;
508 } else if(/* UD0<=b && */ b<=UD7) {
509 dynamicWindow=(int8_t)(b-UD0);
510 isSingleByteMode=TRUE;
374ca955
A
511 cnv->toUBytes[0]=b;
512 cnv->toULength=1;
b75a7d8f
A
513 state=defineOne;
514 goto singleByteMode;
515 } else if(b==UDX) {
516 isSingleByteMode=TRUE;
374ca955
A
517 cnv->toUBytes[0]=b;
518 cnv->toULength=1;
b75a7d8f
A
519 state=definePairOne;
520 goto singleByteMode;
521 } else if(b==UQU) {
374ca955
A
522 cnv->toUBytes[0]=b;
523 cnv->toULength=1;
b75a7d8f
A
524 state=quotePairOne;
525 } else /* Urs */ {
526 /* callback(illegal) */
374ca955
A
527 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
528 cnv->toUBytes[0]=b;
529 cnv->toULength=1;
530 goto endloop;
b75a7d8f
A
531 }
532 break;
533 case quotePairOne:
534 byteOne=b;
374ca955
A
535 cnv->toUBytes[1]=b;
536 cnv->toULength=2;
b75a7d8f
A
537 state=quotePairTwo;
538 break;
539 case quotePairTwo:
540 *target++=(UChar)((byteOne<<8)|b);
541 if(offsets!=NULL) {
542 *offsets++=sourceIndex;
543 }
544 sourceIndex=nextSourceIndex;
545 state=readCommand;
546 goto fastUnicode;
547 }
548 }
549 }
550endloop:
551
374ca955
A
552 /* set the converter state back into UConverter */
553 if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
554 /* reset to deal with the next character */
555 state=readCommand;
556 } else if(state==readCommand) {
557 /* not in a multi-byte sequence, reset toULength */
558 cnv->toULength=0;
b75a7d8f 559 }
374ca955
A
560 scsu->toUIsSingleByteMode=isSingleByteMode;
561 scsu->toUState=state;
562 scsu->toUQuoteWindow=quoteWindow;
563 scsu->toUDynamicWindow=dynamicWindow;
564 scsu->toUByteOne=byteOne;
b75a7d8f 565
b75a7d8f
A
566 /* write back the updated pointers */
567 pArgs->source=(const char *)source;
568 pArgs->target=target;
569 pArgs->offsets=offsets;
570 return;
b75a7d8f
A
571}
572
573/*
574 * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
575 * If a change is made in the original function, then either
576 * change this function the same way or
577 * re-copy the original function and remove the variables
578 * offsets, sourceIndex, and nextSourceIndex.
579 */
580static void
581_SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
582 UErrorCode *pErrorCode) {
583 UConverter *cnv;
584 SCSUData *scsu;
585 const uint8_t *source, *sourceLimit;
586 UChar *target;
587 const UChar *targetLimit;
b75a7d8f
A
588 UBool isSingleByteMode;
589 uint8_t state, byteOne;
590 int8_t quoteWindow, dynamicWindow;
591
592 uint8_t b;
593
594 /* set up the local pointers */
595 cnv=pArgs->converter;
596 scsu=(SCSUData *)cnv->extraInfo;
597
598 source=(const uint8_t *)pArgs->source;
599 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
600 target=pArgs->target;
601 targetLimit=pArgs->targetLimit;
602
603 /* get the state machine state */
604 isSingleByteMode=scsu->toUIsSingleByteMode;
605 state=scsu->toUState;
606 quoteWindow=scsu->toUQuoteWindow;
607 dynamicWindow=scsu->toUDynamicWindow;
608 byteOne=scsu->toUByteOne;
609
610 /*
611 * conversion "loop"
612 *
613 * For performance, this is not a normal C loop.
614 * Instead, there are two code blocks for the two SCSU modes.
615 * The function branches to either one, and a change of the mode is done with a goto to
616 * the other branch.
617 *
618 * Each branch has two conventional loops:
619 * - a fast-path loop for the most common codes in the mode
620 * - a loop for all other codes in the mode
621 * When the fast-path runs into a code that it cannot handle, its loop ends and it
622 * runs into the following loop to handle the other codes.
623 * The end of the input or output buffer is also handled by the slower loop.
624 * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
625 *
374ca955
A
626 * The callback handling is done by returning with an error code.
627 * The conversion framework actually calls the callback function.
b75a7d8f 628 */
b75a7d8f
A
629 if(isSingleByteMode) {
630 /* fast path for single-byte mode */
631 if(state==readCommand) {
632fastSingle:
633 while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
634 ++source;
635 if(b<=0x7f) {
636 /* write US-ASCII graphic character or DEL */
637 *target++=(UChar)b;
638 } else {
639 /* write from dynamic window */
640 uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
641 if(c<=0xffff) {
642 *target++=(UChar)c;
643 } else {
644 /* output surrogate pair */
645 *target++=(UChar)(0xd7c0+(c>>10));
646 if(target<targetLimit) {
647 *target++=(UChar)(0xdc00|(c&0x3ff));
648 } else {
649 /* target overflow */
650 cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
651 cnv->UCharErrorBufferLength=1;
652 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
653 goto endloop;
654 }
655 }
656 }
657 }
658 }
659
660 /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
661singleByteMode:
662 while(source<sourceLimit) {
663 if(target>=targetLimit) {
664 /* target is full */
665 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
666 break;
667 }
668 b=*source++;
669 switch(state) {
670 case readCommand:
671 /* redundant conditions are commented out */
672 /* here: b<0x20 because otherwise we would be in fastSingle */
673 if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
674 /* CR/LF/TAB/NUL */
675 *target++=(UChar)b;
676 goto fastSingle;
677 } else if(SC0<=b) {
678 if(b<=SC7) {
679 dynamicWindow=(int8_t)(b-SC0);
680 goto fastSingle;
681 } else /* if(SD0<=b && b<=SD7) */ {
682 dynamicWindow=(int8_t)(b-SD0);
683 state=defineOne;
684 }
685 } else if(/* SQ0<=b && */ b<=SQ7) {
686 quoteWindow=(int8_t)(b-SQ0);
687 state=quoteOne;
688 } else if(b==SDX) {
689 state=definePairOne;
690 } else if(b==SQU) {
691 state=quotePairOne;
692 } else if(b==SCU) {
693 isSingleByteMode=FALSE;
694 goto fastUnicode;
695 } else /* Srs */ {
696 /* callback(illegal) */
374ca955
A
697 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
698 cnv->toUBytes[0]=b;
699 cnv->toULength=1;
700 goto endloop;
b75a7d8f 701 }
374ca955
A
702
703 /* store the first byte of a multibyte sequence in toUBytes[] */
704 cnv->toUBytes[0]=b;
705 cnv->toULength=1;
b75a7d8f
A
706 break;
707 case quotePairOne:
708 byteOne=b;
374ca955
A
709 cnv->toUBytes[1]=b;
710 cnv->toULength=2;
b75a7d8f
A
711 state=quotePairTwo;
712 break;
713 case quotePairTwo:
714 *target++=(UChar)((byteOne<<8)|b);
715 state=readCommand;
716 goto fastSingle;
717 case quoteOne:
718 if(b<0x80) {
719 /* all static offsets are in the BMP */
720 *target++=(UChar)(staticOffsets[quoteWindow]+b);
721 } else {
722 /* write from dynamic window */
723 uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
724 if(c<=0xffff) {
725 *target++=(UChar)c;
726 } else {
727 /* output surrogate pair */
728 *target++=(UChar)(0xd7c0+(c>>10));
729 if(target<targetLimit) {
730 *target++=(UChar)(0xdc00|(c&0x3ff));
731 } else {
732 /* target overflow */
733 cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
734 cnv->UCharErrorBufferLength=1;
735 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
736 goto endloop;
737 }
738 }
739 }
740 state=readCommand;
741 goto fastSingle;
742 case definePairOne:
743 dynamicWindow=(int8_t)((b>>5)&7);
744 byteOne=(uint8_t)(b&0x1f);
374ca955
A
745 cnv->toUBytes[1]=b;
746 cnv->toULength=2;
b75a7d8f
A
747 state=definePairTwo;
748 break;
749 case definePairTwo:
750 scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
751 state=readCommand;
752 goto fastSingle;
753 case defineOne:
754 if(b==0) {
755 /* callback(illegal): Reserved window offset value 0 */
374ca955
A
756 cnv->toUBytes[1]=b;
757 cnv->toULength=2;
758 goto endloop;
b75a7d8f
A
759 } else if(b<gapThreshold) {
760 scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
761 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
762 scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
763 } else if(b>=fixedThreshold) {
764 scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
765 } else {
766 /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
374ca955
A
767 cnv->toUBytes[1]=b;
768 cnv->toULength=2;
769 goto endloop;
b75a7d8f
A
770 }
771 state=readCommand;
772 goto fastSingle;
773 }
774 }
775 } else {
776 /* fast path for Unicode mode */
777 if(state==readCommand) {
778fastUnicode:
779 while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
780 *target++=(UChar)((b<<8)|source[1]);
781 source+=2;
782 }
783 }
784
785 /* normal state machine for Unicode mode */
786/* unicodeByteMode: */
787 while(source<sourceLimit) {
788 if(target>=targetLimit) {
789 /* target is full */
790 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
791 break;
792 }
793 b=*source++;
794 switch(state) {
795 case readCommand:
796 if((uint8_t)(b-UC0)>(Urs-UC0)) {
797 byteOne=b;
374ca955
A
798 cnv->toUBytes[0]=b;
799 cnv->toULength=1;
b75a7d8f
A
800 state=quotePairTwo;
801 } else if(/* UC0<=b && */ b<=UC7) {
802 dynamicWindow=(int8_t)(b-UC0);
803 isSingleByteMode=TRUE;
804 goto fastSingle;
805 } else if(/* UD0<=b && */ b<=UD7) {
806 dynamicWindow=(int8_t)(b-UD0);
807 isSingleByteMode=TRUE;
374ca955
A
808 cnv->toUBytes[0]=b;
809 cnv->toULength=1;
b75a7d8f
A
810 state=defineOne;
811 goto singleByteMode;
812 } else if(b==UDX) {
813 isSingleByteMode=TRUE;
374ca955
A
814 cnv->toUBytes[0]=b;
815 cnv->toULength=1;
b75a7d8f
A
816 state=definePairOne;
817 goto singleByteMode;
818 } else if(b==UQU) {
374ca955
A
819 cnv->toUBytes[0]=b;
820 cnv->toULength=1;
b75a7d8f
A
821 state=quotePairOne;
822 } else /* Urs */ {
823 /* callback(illegal) */
374ca955
A
824 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
825 cnv->toUBytes[0]=b;
826 cnv->toULength=1;
827 goto endloop;
b75a7d8f
A
828 }
829 break;
830 case quotePairOne:
831 byteOne=b;
374ca955
A
832 cnv->toUBytes[1]=b;
833 cnv->toULength=2;
b75a7d8f
A
834 state=quotePairTwo;
835 break;
836 case quotePairTwo:
837 *target++=(UChar)((byteOne<<8)|b);
838 state=readCommand;
839 goto fastUnicode;
840 }
841 }
842 }
843endloop:
844
374ca955
A
845 /* set the converter state back into UConverter */
846 if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
847 /* reset to deal with the next character */
848 state=readCommand;
849 } else if(state==readCommand) {
850 /* not in a multi-byte sequence, reset toULength */
851 cnv->toULength=0;
b75a7d8f 852 }
374ca955
A
853 scsu->toUIsSingleByteMode=isSingleByteMode;
854 scsu->toUState=state;
855 scsu->toUQuoteWindow=quoteWindow;
856 scsu->toUDynamicWindow=dynamicWindow;
857 scsu->toUByteOne=byteOne;
b75a7d8f 858
b75a7d8f
A
859 /* write back the updated pointers */
860 pArgs->source=(const char *)source;
861 pArgs->target=target;
862 return;
b75a7d8f
A
863}
864
865/* SCSU-from-Unicode conversion functions ----------------------------------- */
866
867/*
868 * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
869 * reasonable results. The lookahead is minimal.
870 * Many cases are simple:
871 * A character fits directly into the current mode, a dynamic or static window,
872 * or is not compressible. These cases are tested first.
873 * Real compression heuristics are applied to the rest, in code branches for
874 * single/Unicode mode and BMP/supplementary code points.
875 * The heuristics used here are extremely simple.
876 */
877
878/* get the number of the window that this character is in, or -1 */
879static int8_t
880getWindow(const uint32_t offsets[8], uint32_t c) {
881 int i;
882 for(i=0; i<8; ++i) {
883 if((uint32_t)(c-offsets[i])<=0x7f) {
884 return (int8_t)(i);
885 }
886 }
887 return -1;
888}
889
890/* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
891static UBool
892isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
893 return (UBool)(c<=offset+0x7f &&
894 (c>=offset || (c<=0x7f &&
895 (c>=0x20 || (1UL<<c)&0x2601))));
896 /* binary 0010 0110 0000 0001,
897 check for b==0xd || b==0xa || b==9 || b==0 */
898}
899
900/*
901 * getNextDynamicWindow returns the next dynamic window to be redefined
902 */
903static int8_t
904getNextDynamicWindow(SCSUData *scsu) {
905 int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
906 if(++scsu->nextWindowUseIndex==8) {
907 scsu->nextWindowUseIndex=0;
908 }
909 return window;
910}
911
912/*
913 * useDynamicWindow() adjusts
914 * windowUse[] and nextWindowUseIndex for the algorithm to choose
915 * the next dynamic window to be defined;
916 * a subclass may override it and provide its own algorithm.
917 */
918static void
919useDynamicWindow(SCSUData *scsu, int8_t window) {
920 /*
921 * move the existing window, which just became the most recently used one,
922 * up in windowUse[] to nextWindowUseIndex-1
923 */
924
925 /* first, find the index of the window - backwards to favor the more recently used windows */
926 int i, j;
927
928 i=scsu->nextWindowUseIndex;
929 do {
930 if(--i<0) {
931 i=7;
932 }
933 } while(scsu->windowUse[i]!=window);
934
935 /* now copy each windowUse[i+1] to [i] */
936 j=i+1;
937 if(j==8) {
938 j=0;
939 }
940 while(j!=scsu->nextWindowUseIndex) {
941 scsu->windowUse[i]=scsu->windowUse[j];
942 i=j;
943 if(++j==8) { j=0; }
944 }
945
946 /* finally, set the window into the most recently used index */
947 scsu->windowUse[i]=window;
948}
949
950/*
951 * calculate the offset and the code for a dynamic window that contains the character
952 * takes fixed offsets into account
953 * the offset of the window is stored in the offset variable,
954 * the code is returned
955 *
956 * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code
957 */
958static int
959getDynamicOffset(uint32_t c, uint32_t *pOffset) {
960 int i;
961
962 for(i=0; i<7; ++i) {
963 if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
964 *pOffset=fixedOffsets[i];
965 return 0xf9+i;
966 }
967 }
968
969 if(c<0x80) {
970 /* No dynamic window for US-ASCII. */
971 return -1;
972 } else if(c<0x3400 ||
973 (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
974 (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
975 ) {
976 /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
977 *pOffset=c&0x7fffff80;
978 return (int)(c>>7);
979 } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
980 /* For these characters we need to take the gapOffset into account. */
981 *pOffset=c&0x7fffff80;
982 return (int)((c-gapOffset)>>7);
983 } else {
984 return -1;
985 }
986}
987
988/*
989 * Idea for compression:
990 * - save SCSUData and other state before really starting work
991 * - at endloop, see if compression could be better with just unicode mode
992 * - don't do this if a callback has been called
993 * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
994 * - different buffer handling!
995 *
996 * Drawback or need for corrective handling:
997 * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
998 * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
999 * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
1000 *
1001 * How to achieve both?
1002 * - Only replace the result after an SDX or SCU?
1003 */
1004
1005static void
1006_SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
1007 UErrorCode *pErrorCode) {
1008 UConverter *cnv;
1009 SCSUData *scsu;
1010 const UChar *source, *sourceLimit;
1011 uint8_t *target;
1012 int32_t targetCapacity;
1013 int32_t *offsets;
1014
1015 UBool isSingleByteMode;
1016 uint8_t dynamicWindow;
1017 uint32_t currentOffset;
1018
1019 uint32_t c, delta;
1020
1021 int32_t sourceIndex, nextSourceIndex;
1022
b75a7d8f
A
1023 int32_t length;
1024
1025 /* variables for compression heuristics */
1026 uint32_t offset;
1027 UChar lead, trail;
1028 int code;
1029 int8_t window;
1030
1031 /* set up the local pointers */
1032 cnv=pArgs->converter;
1033 scsu=(SCSUData *)cnv->extraInfo;
1034
1035 /* set up the local pointers */
1036 source=pArgs->source;
1037 sourceLimit=pArgs->sourceLimit;
1038 target=(uint8_t *)pArgs->target;
1039 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1040 offsets=pArgs->offsets;
1041
1042 /* get the state machine state */
1043 isSingleByteMode=scsu->fromUIsSingleByteMode;
1044 dynamicWindow=scsu->fromUDynamicWindow;
1045 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1046
374ca955 1047 c=cnv->fromUChar32;
b75a7d8f
A
1048
1049 /* sourceIndex=-1 if the current character began in the previous buffer */
1050 sourceIndex= c==0 ? 0 : -1;
1051 nextSourceIndex=0;
1052
1053 /* similar conversion "loop" as in toUnicode */
1054loop:
1055 if(isSingleByteMode) {
1056 if(c!=0 && targetCapacity>0) {
1057 goto getTrailSingle;
1058 }
1059
1060 /* state machine for single-byte mode */
1061/* singleByteMode: */
1062 while(source<sourceLimit) {
1063 if(targetCapacity<=0) {
1064 /* target is full */
1065 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1066 break;
1067 }
1068 c=*source++;
1069 ++nextSourceIndex;
1070
1071 if((c-0x20)<=0x5f) {
1072 /* pass US-ASCII graphic character through */
1073 *target++=(uint8_t)c;
1074 if(offsets!=NULL) {
1075 *offsets++=sourceIndex;
1076 }
1077 --targetCapacity;
1078 } else if(c<0x20) {
1079 if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1080 /* CR/LF/TAB/NUL */
1081 *target++=(uint8_t)c;
1082 if(offsets!=NULL) {
1083 *offsets++=sourceIndex;
1084 }
1085 --targetCapacity;
1086 } else {
1087 /* quote C0 control character */
1088 c|=SQ0<<8;
1089 length=2;
1090 goto outputBytes;
1091 }
1092 } else if((delta=c-currentOffset)<=0x7f) {
1093 /* use the current dynamic window */
1094 *target++=(uint8_t)(delta|0x80);
1095 if(offsets!=NULL) {
1096 *offsets++=sourceIndex;
1097 }
1098 --targetCapacity;
1099 } else if(UTF_IS_SURROGATE(c)) {
1100 if(UTF_IS_SURROGATE_FIRST(c)) {
1101getTrailSingle:
1102 lead=(UChar)c;
1103 if(source<sourceLimit) {
1104 /* test the following code unit */
1105 trail=*source;
1106 if(UTF_IS_SECOND_SURROGATE(trail)) {
1107 ++source;
1108 ++nextSourceIndex;
1109 c=UTF16_GET_PAIR_VALUE(c, trail);
1110 /* convert this surrogate code point */
1111 /* exit this condition tree */
1112 } else {
1113 /* this is an unmatched lead code unit (1st surrogate) */
1114 /* callback(illegal) */
374ca955
A
1115 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1116 goto endloop;
b75a7d8f
A
1117 }
1118 } else {
1119 /* no more input */
1120 break;
1121 }
1122 } else {
1123 /* this is an unmatched trail code unit (2nd surrogate) */
1124 /* callback(illegal) */
374ca955
A
1125 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1126 goto endloop;
b75a7d8f
A
1127 }
1128
1129 /* compress supplementary character U+10000..U+10ffff */
1130 if((delta=c-currentOffset)<=0x7f) {
1131 /* use the current dynamic window */
1132 *target++=(uint8_t)(delta|0x80);
1133 if(offsets!=NULL) {
1134 *offsets++=sourceIndex;
1135 }
1136 --targetCapacity;
1137 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1138 /* there is a dynamic window that contains this character, change to it */
1139 dynamicWindow=window;
1140 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1141 useDynamicWindow(scsu, dynamicWindow);
1142 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1143 length=2;
1144 goto outputBytes;
1145 } else if((code=getDynamicOffset(c, &offset))>=0) {
1146 /* might check if there are more characters in this window to come */
1147 /* define an extended window with this character */
1148 code-=0x200;
1149 dynamicWindow=getNextDynamicWindow(scsu);
1150 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1151 useDynamicWindow(scsu, dynamicWindow);
1152 c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1153 length=4;
1154 goto outputBytes;
1155 } else {
1156 /* change to Unicode mode and output this (lead, trail) pair */
1157 isSingleByteMode=FALSE;
1158 *target++=(uint8_t)SCU;
1159 if(offsets!=NULL) {
1160 *offsets++=sourceIndex;
1161 }
1162 --targetCapacity;
1163 c=((uint32_t)lead<<16)|trail;
1164 length=4;
1165 goto outputBytes;
1166 }
1167 } else if(c<0xa0) {
1168 /* quote C1 control character */
1169 c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1170 length=2;
1171 goto outputBytes;
1172 } else if(c==0xfeff || c>=0xfff0) {
1173 /* quote signature character=byte order mark and specials */
1174 c|=SQU<<16;
1175 length=3;
1176 goto outputBytes;
1177 } else {
1178 /* compress all other BMP characters */
1179 if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1180 /* there is a window defined that contains this character - switch to it or quote from it? */
1181 if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1182 /* change to dynamic window */
1183 dynamicWindow=window;
1184 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1185 useDynamicWindow(scsu, dynamicWindow);
1186 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1187 length=2;
1188 goto outputBytes;
1189 } else {
1190 /* quote from dynamic window */
1191 c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1192 length=2;
1193 goto outputBytes;
1194 }
1195 } else if((window=getWindow(staticOffsets, c))>=0) {
1196 /* quote from static window */
1197 c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1198 length=2;
1199 goto outputBytes;
1200 } else if((code=getDynamicOffset(c, &offset))>=0) {
1201 /* define a dynamic window with this character */
1202 dynamicWindow=getNextDynamicWindow(scsu);
1203 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1204 useDynamicWindow(scsu, dynamicWindow);
1205 c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1206 length=3;
1207 goto outputBytes;
1208 } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1209 (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1210 ) {
1211 /*
1212 * this character is not compressible (a BMP ideograph or similar);
1213 * switch to Unicode mode if this is the last character in the block
1214 * or there is at least one more ideograph following immediately
1215 */
1216 isSingleByteMode=FALSE;
1217 c|=SCU<<16;
1218 length=3;
1219 goto outputBytes;
1220 } else {
1221 /* quote Unicode */
1222 c|=SQU<<16;
1223 length=3;
1224 goto outputBytes;
1225 }
1226 }
1227
1228 /* normal end of conversion: prepare for a new character */
1229 c=0;
1230 sourceIndex=nextSourceIndex;
1231 }
1232 } else {
1233 if(c!=0 && targetCapacity>0) {
1234 goto getTrailUnicode;
1235 }
1236
1237 /* state machine for Unicode mode */
1238/* unicodeByteMode: */
1239 while(source<sourceLimit) {
1240 if(targetCapacity<=0) {
1241 /* target is full */
1242 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1243 break;
1244 }
1245 c=*source++;
1246 ++nextSourceIndex;
1247
1248 if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1249 /* not compressible, write character directly */
1250 if(targetCapacity>=2) {
1251 *target++=(uint8_t)(c>>8);
1252 *target++=(uint8_t)c;
1253 if(offsets!=NULL) {
1254 *offsets++=sourceIndex;
1255 *offsets++=sourceIndex;
1256 }
1257 targetCapacity-=2;
1258 } else {
1259 length=2;
1260 goto outputBytes;
1261 }
1262 } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1263 /* compress BMP character if the following one is not an uncompressible ideograph */
1264 if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1265 if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1266 /* ASCII digit or letter */
1267 isSingleByteMode=TRUE;
1268 c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1269 length=2;
1270 goto outputBytes;
1271 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1272 /* there is a dynamic window that contains this character, change to it */
1273 isSingleByteMode=TRUE;
1274 dynamicWindow=window;
1275 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1276 useDynamicWindow(scsu, dynamicWindow);
1277 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1278 length=2;
1279 goto outputBytes;
1280 } else if((code=getDynamicOffset(c, &offset))>=0) {
1281 /* define a dynamic window with this character */
1282 isSingleByteMode=TRUE;
1283 dynamicWindow=getNextDynamicWindow(scsu);
1284 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1285 useDynamicWindow(scsu, dynamicWindow);
1286 c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1287 length=3;
1288 goto outputBytes;
1289 }
1290 }
1291
1292 /* don't know how to compress this character, just write it directly */
1293 length=2;
1294 goto outputBytes;
1295 } else if(c<0xe000) {
1296 /* c is a surrogate */
1297 if(UTF_IS_SURROGATE_FIRST(c)) {
1298getTrailUnicode:
1299 lead=(UChar)c;
1300 if(source<sourceLimit) {
1301 /* test the following code unit */
1302 trail=*source;
1303 if(UTF_IS_SECOND_SURROGATE(trail)) {
1304 ++source;
1305 ++nextSourceIndex;
1306 c=UTF16_GET_PAIR_VALUE(c, trail);
1307 /* convert this surrogate code point */
1308 /* exit this condition tree */
1309 } else {
1310 /* this is an unmatched lead code unit (1st surrogate) */
1311 /* callback(illegal) */
374ca955
A
1312 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1313 goto endloop;
b75a7d8f
A
1314 }
1315 } else {
1316 /* no more input */
1317 break;
1318 }
1319 } else {
1320 /* this is an unmatched trail code unit (2nd surrogate) */
1321 /* callback(illegal) */
374ca955
A
1322 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1323 goto endloop;
b75a7d8f
A
1324 }
1325
1326 /* compress supplementary character */
1327 if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1328 !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1329 ) {
1330 /*
1331 * there is a dynamic window that contains this character and
1332 * the following character is not uncompressible,
1333 * change to the window
1334 */
1335 isSingleByteMode=TRUE;
1336 dynamicWindow=window;
1337 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1338 useDynamicWindow(scsu, dynamicWindow);
1339 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1340 length=2;
1341 goto outputBytes;
1342 } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1343 (code=getDynamicOffset(c, &offset))>=0
1344 ) {
1345 /* two supplementary characters in (probably) the same window - define an extended one */
1346 isSingleByteMode=TRUE;
1347 code-=0x200;
1348 dynamicWindow=getNextDynamicWindow(scsu);
1349 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1350 useDynamicWindow(scsu, dynamicWindow);
1351 c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1352 length=4;
1353 goto outputBytes;
1354 } else {
1355 /* don't know how to compress this character, just write it directly */
1356 c=((uint32_t)lead<<16)|trail;
1357 length=4;
1358 goto outputBytes;
1359 }
1360 } else /* 0xe000<=c<0xf300 */ {
1361 /* quote to avoid SCSU tags */
1362 c|=UQU<<16;
1363 length=3;
1364 goto outputBytes;
1365 }
1366
1367 /* normal end of conversion: prepare for a new character */
1368 c=0;
1369 sourceIndex=nextSourceIndex;
1370 }
1371 }
1372endloop:
1373
374ca955
A
1374 /* set the converter state back into UConverter */
1375 scsu->fromUIsSingleByteMode=isSingleByteMode;
1376 scsu->fromUDynamicWindow=dynamicWindow;
b75a7d8f 1377
374ca955 1378 cnv->fromUChar32=c;
b75a7d8f 1379
b75a7d8f
A
1380 /* write back the updated pointers */
1381 pArgs->source=source;
1382 pArgs->target=(char *)target;
1383 pArgs->offsets=offsets;
1384 return;
1385
1386outputBytes:
1387 /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1388 /* from the first if in the loop we know that targetCapacity>0 */
1389 if(length<=targetCapacity) {
1390 if(offsets==NULL) {
1391 switch(length) {
1392 /* each branch falls through to the next one */
1393 case 4:
1394 *target++=(uint8_t)(c>>24);
1395 case 3:
1396 *target++=(uint8_t)(c>>16);
1397 case 2:
1398 *target++=(uint8_t)(c>>8);
1399 case 1:
1400 *target++=(uint8_t)c;
1401 default:
1402 /* will never occur */
1403 break;
1404 }
1405 } else {
1406 switch(length) {
1407 /* each branch falls through to the next one */
1408 case 4:
1409 *target++=(uint8_t)(c>>24);
1410 *offsets++=sourceIndex;
1411 case 3:
1412 *target++=(uint8_t)(c>>16);
1413 *offsets++=sourceIndex;
1414 case 2:
1415 *target++=(uint8_t)(c>>8);
1416 *offsets++=sourceIndex;
1417 case 1:
1418 *target++=(uint8_t)c;
1419 *offsets++=sourceIndex;
1420 default:
1421 /* will never occur */
1422 break;
1423 }
1424 }
1425 targetCapacity-=length;
1426
1427 /* normal end of conversion: prepare for a new character */
1428 c=0;
1429 sourceIndex=nextSourceIndex;
1430 goto loop;
1431 } else {
1432 uint8_t *p;
1433
1434 /*
1435 * We actually do this backwards here:
1436 * In order to save an intermediate variable, we output
1437 * first to the overflow buffer what does not fit into the
1438 * regular target.
1439 */
73c04bcf
A
1440 /* we know that 0<=targetCapacity<length<=4 */
1441 /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
b75a7d8f
A
1442 length-=targetCapacity;
1443 p=(uint8_t *)cnv->charErrorBuffer;
1444 switch(length) {
1445 /* each branch falls through to the next one */
73c04bcf
A
1446 case 4:
1447 *p++=(uint8_t)(c>>24);
b75a7d8f
A
1448 case 3:
1449 *p++=(uint8_t)(c>>16);
1450 case 2:
1451 *p++=(uint8_t)(c>>8);
1452 case 1:
1453 *p=(uint8_t)c;
1454 default:
1455 /* will never occur */
1456 break;
1457 }
1458 cnv->charErrorBufferLength=(int8_t)length;
1459
1460 /* now output what fits into the regular target */
1461 c>>=8*length; /* length was reduced by targetCapacity */
1462 switch(targetCapacity) {
1463 /* each branch falls through to the next one */
1464 case 3:
1465 *target++=(uint8_t)(c>>16);
1466 if(offsets!=NULL) {
1467 *offsets++=sourceIndex;
1468 }
1469 case 2:
1470 *target++=(uint8_t)(c>>8);
1471 if(offsets!=NULL) {
1472 *offsets++=sourceIndex;
1473 }
1474 case 1:
1475 *target++=(uint8_t)c;
1476 if(offsets!=NULL) {
1477 *offsets++=sourceIndex;
1478 }
1479 default:
b75a7d8f
A
1480 break;
1481 }
1482
1483 /* target overflow */
1484 targetCapacity=0;
1485 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1486 c=0;
1487 goto endloop;
1488 }
b75a7d8f
A
1489}
1490
1491/*
1492 * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
1493 * If a change is made in the original function, then either
1494 * change this function the same way or
1495 * re-copy the original function and remove the variables
1496 * offsets, sourceIndex, and nextSourceIndex.
1497 */
1498static void
1499_SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
1500 UErrorCode *pErrorCode) {
1501 UConverter *cnv;
1502 SCSUData *scsu;
1503 const UChar *source, *sourceLimit;
1504 uint8_t *target;
1505 int32_t targetCapacity;
1506
1507 UBool isSingleByteMode;
1508 uint8_t dynamicWindow;
1509 uint32_t currentOffset;
1510
1511 uint32_t c, delta;
1512
b75a7d8f
A
1513 int32_t length;
1514
1515 /* variables for compression heuristics */
1516 uint32_t offset;
1517 UChar lead, trail;
1518 int code;
1519 int8_t window;
1520
1521 /* set up the local pointers */
1522 cnv=pArgs->converter;
1523 scsu=(SCSUData *)cnv->extraInfo;
1524
1525 /* set up the local pointers */
1526 source=pArgs->source;
1527 sourceLimit=pArgs->sourceLimit;
1528 target=(uint8_t *)pArgs->target;
1529 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1530
1531 /* get the state machine state */
1532 isSingleByteMode=scsu->fromUIsSingleByteMode;
1533 dynamicWindow=scsu->fromUDynamicWindow;
1534 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1535
374ca955 1536 c=cnv->fromUChar32;
b75a7d8f
A
1537
1538 /* similar conversion "loop" as in toUnicode */
1539loop:
1540 if(isSingleByteMode) {
1541 if(c!=0 && targetCapacity>0) {
1542 goto getTrailSingle;
1543 }
1544
1545 /* state machine for single-byte mode */
1546/* singleByteMode: */
1547 while(source<sourceLimit) {
1548 if(targetCapacity<=0) {
1549 /* target is full */
1550 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1551 break;
1552 }
1553 c=*source++;
1554
1555 if((c-0x20)<=0x5f) {
1556 /* pass US-ASCII graphic character through */
1557 *target++=(uint8_t)c;
1558 --targetCapacity;
1559 } else if(c<0x20) {
1560 if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1561 /* CR/LF/TAB/NUL */
1562 *target++=(uint8_t)c;
1563 --targetCapacity;
1564 } else {
1565 /* quote C0 control character */
1566 c|=SQ0<<8;
1567 length=2;
1568 goto outputBytes;
1569 }
1570 } else if((delta=c-currentOffset)<=0x7f) {
1571 /* use the current dynamic window */
1572 *target++=(uint8_t)(delta|0x80);
1573 --targetCapacity;
1574 } else if(UTF_IS_SURROGATE(c)) {
1575 if(UTF_IS_SURROGATE_FIRST(c)) {
1576getTrailSingle:
1577 lead=(UChar)c;
1578 if(source<sourceLimit) {
1579 /* test the following code unit */
1580 trail=*source;
1581 if(UTF_IS_SECOND_SURROGATE(trail)) {
1582 ++source;
1583 c=UTF16_GET_PAIR_VALUE(c, trail);
1584 /* convert this surrogate code point */
1585 /* exit this condition tree */
1586 } else {
1587 /* this is an unmatched lead code unit (1st surrogate) */
1588 /* callback(illegal) */
374ca955
A
1589 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1590 goto endloop;
b75a7d8f
A
1591 }
1592 } else {
1593 /* no more input */
1594 break;
1595 }
1596 } else {
1597 /* this is an unmatched trail code unit (2nd surrogate) */
1598 /* callback(illegal) */
374ca955
A
1599 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1600 goto endloop;
b75a7d8f
A
1601 }
1602
1603 /* compress supplementary character U+10000..U+10ffff */
1604 if((delta=c-currentOffset)<=0x7f) {
1605 /* use the current dynamic window */
1606 *target++=(uint8_t)(delta|0x80);
1607 --targetCapacity;
1608 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1609 /* there is a dynamic window that contains this character, change to it */
1610 dynamicWindow=window;
1611 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1612 useDynamicWindow(scsu, dynamicWindow);
1613 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1614 length=2;
1615 goto outputBytes;
1616 } else if((code=getDynamicOffset(c, &offset))>=0) {
1617 /* might check if there are more characters in this window to come */
1618 /* define an extended window with this character */
1619 code-=0x200;
1620 dynamicWindow=getNextDynamicWindow(scsu);
1621 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1622 useDynamicWindow(scsu, dynamicWindow);
1623 c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1624 length=4;
1625 goto outputBytes;
1626 } else {
1627 /* change to Unicode mode and output this (lead, trail) pair */
1628 isSingleByteMode=FALSE;
1629 *target++=(uint8_t)SCU;
1630 --targetCapacity;
1631 c=((uint32_t)lead<<16)|trail;
1632 length=4;
1633 goto outputBytes;
1634 }
1635 } else if(c<0xa0) {
1636 /* quote C1 control character */
1637 c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1638 length=2;
1639 goto outputBytes;
1640 } else if(c==0xfeff || c>=0xfff0) {
1641 /* quote signature character=byte order mark and specials */
1642 c|=SQU<<16;
1643 length=3;
1644 goto outputBytes;
1645 } else {
1646 /* compress all other BMP characters */
1647 if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1648 /* there is a window defined that contains this character - switch to it or quote from it? */
1649 if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1650 /* change to dynamic window */
1651 dynamicWindow=window;
1652 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1653 useDynamicWindow(scsu, dynamicWindow);
1654 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1655 length=2;
1656 goto outputBytes;
1657 } else {
1658 /* quote from dynamic window */
1659 c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1660 length=2;
1661 goto outputBytes;
1662 }
1663 } else if((window=getWindow(staticOffsets, c))>=0) {
1664 /* quote from static window */
1665 c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1666 length=2;
1667 goto outputBytes;
1668 } else if((code=getDynamicOffset(c, &offset))>=0) {
1669 /* define a dynamic window with this character */
1670 dynamicWindow=getNextDynamicWindow(scsu);
1671 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1672 useDynamicWindow(scsu, dynamicWindow);
1673 c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1674 length=3;
1675 goto outputBytes;
1676 } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1677 (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1678 ) {
1679 /*
1680 * this character is not compressible (a BMP ideograph or similar);
1681 * switch to Unicode mode if this is the last character in the block
1682 * or there is at least one more ideograph following immediately
1683 */
1684 isSingleByteMode=FALSE;
1685 c|=SCU<<16;
1686 length=3;
1687 goto outputBytes;
1688 } else {
1689 /* quote Unicode */
1690 c|=SQU<<16;
1691 length=3;
1692 goto outputBytes;
1693 }
1694 }
1695
1696 /* normal end of conversion: prepare for a new character */
1697 c=0;
1698 }
1699 } else {
1700 if(c!=0 && targetCapacity>0) {
1701 goto getTrailUnicode;
1702 }
1703
1704 /* state machine for Unicode mode */
1705/* unicodeByteMode: */
1706 while(source<sourceLimit) {
1707 if(targetCapacity<=0) {
1708 /* target is full */
1709 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1710 break;
1711 }
1712 c=*source++;
1713
1714 if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1715 /* not compressible, write character directly */
1716 if(targetCapacity>=2) {
1717 *target++=(uint8_t)(c>>8);
1718 *target++=(uint8_t)c;
1719 targetCapacity-=2;
1720 } else {
1721 length=2;
1722 goto outputBytes;
1723 }
1724 } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1725 /* compress BMP character if the following one is not an uncompressible ideograph */
1726 if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1727 if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1728 /* ASCII digit or letter */
1729 isSingleByteMode=TRUE;
1730 c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1731 length=2;
1732 goto outputBytes;
1733 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1734 /* there is a dynamic window that contains this character, change to it */
1735 isSingleByteMode=TRUE;
1736 dynamicWindow=window;
1737 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1738 useDynamicWindow(scsu, dynamicWindow);
1739 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1740 length=2;
1741 goto outputBytes;
1742 } else if((code=getDynamicOffset(c, &offset))>=0) {
1743 /* define a dynamic window with this character */
1744 isSingleByteMode=TRUE;
1745 dynamicWindow=getNextDynamicWindow(scsu);
1746 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1747 useDynamicWindow(scsu, dynamicWindow);
1748 c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1749 length=3;
1750 goto outputBytes;
1751 }
1752 }
1753
1754 /* don't know how to compress this character, just write it directly */
1755 length=2;
1756 goto outputBytes;
1757 } else if(c<0xe000) {
1758 /* c is a surrogate */
1759 if(UTF_IS_SURROGATE_FIRST(c)) {
1760getTrailUnicode:
1761 lead=(UChar)c;
1762 if(source<sourceLimit) {
1763 /* test the following code unit */
1764 trail=*source;
1765 if(UTF_IS_SECOND_SURROGATE(trail)) {
1766 ++source;
1767 c=UTF16_GET_PAIR_VALUE(c, trail);
1768 /* convert this surrogate code point */
1769 /* exit this condition tree */
1770 } else {
1771 /* this is an unmatched lead code unit (1st surrogate) */
1772 /* callback(illegal) */
374ca955
A
1773 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1774 goto endloop;
b75a7d8f
A
1775 }
1776 } else {
1777 /* no more input */
1778 break;
1779 }
1780 } else {
1781 /* this is an unmatched trail code unit (2nd surrogate) */
1782 /* callback(illegal) */
374ca955
A
1783 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1784 goto endloop;
b75a7d8f
A
1785 }
1786
1787 /* compress supplementary character */
1788 if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1789 !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1790 ) {
1791 /*
1792 * there is a dynamic window that contains this character and
1793 * the following character is not uncompressible,
1794 * change to the window
1795 */
1796 isSingleByteMode=TRUE;
1797 dynamicWindow=window;
1798 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1799 useDynamicWindow(scsu, dynamicWindow);
1800 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1801 length=2;
1802 goto outputBytes;
1803 } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1804 (code=getDynamicOffset(c, &offset))>=0
1805 ) {
1806 /* two supplementary characters in (probably) the same window - define an extended one */
1807 isSingleByteMode=TRUE;
1808 code-=0x200;
1809 dynamicWindow=getNextDynamicWindow(scsu);
1810 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1811 useDynamicWindow(scsu, dynamicWindow);
1812 c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1813 length=4;
1814 goto outputBytes;
1815 } else {
1816 /* don't know how to compress this character, just write it directly */
1817 c=((uint32_t)lead<<16)|trail;
1818 length=4;
1819 goto outputBytes;
1820 }
1821 } else /* 0xe000<=c<0xf300 */ {
1822 /* quote to avoid SCSU tags */
1823 c|=UQU<<16;
1824 length=3;
1825 goto outputBytes;
1826 }
1827
1828 /* normal end of conversion: prepare for a new character */
1829 c=0;
1830 }
1831 }
1832endloop:
1833
374ca955
A
1834 /* set the converter state back into UConverter */
1835 scsu->fromUIsSingleByteMode=isSingleByteMode;
1836 scsu->fromUDynamicWindow=dynamicWindow;
b75a7d8f 1837
374ca955 1838 cnv->fromUChar32=c;
b75a7d8f 1839
b75a7d8f
A
1840 /* write back the updated pointers */
1841 pArgs->source=source;
1842 pArgs->target=(char *)target;
1843 return;
1844
1845outputBytes:
1846 /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1847 /* from the first if in the loop we know that targetCapacity>0 */
1848 if(length<=targetCapacity) {
1849 switch(length) {
1850 /* each branch falls through to the next one */
1851 case 4:
1852 *target++=(uint8_t)(c>>24);
1853 case 3:
1854 *target++=(uint8_t)(c>>16);
1855 case 2:
1856 *target++=(uint8_t)(c>>8);
1857 case 1:
1858 *target++=(uint8_t)c;
1859 default:
1860 /* will never occur */
1861 break;
1862 }
1863 targetCapacity-=length;
1864
1865 /* normal end of conversion: prepare for a new character */
1866 c=0;
1867 goto loop;
1868 } else {
1869 uint8_t *p;
1870
1871 /*
1872 * We actually do this backwards here:
1873 * In order to save an intermediate variable, we output
1874 * first to the overflow buffer what does not fit into the
1875 * regular target.
1876 */
73c04bcf
A
1877 /* we know that 0<=targetCapacity<length<=4 */
1878 /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
b75a7d8f
A
1879 length-=targetCapacity;
1880 p=(uint8_t *)cnv->charErrorBuffer;
1881 switch(length) {
1882 /* each branch falls through to the next one */
73c04bcf
A
1883 case 4:
1884 *p++=(uint8_t)(c>>24);
b75a7d8f
A
1885 case 3:
1886 *p++=(uint8_t)(c>>16);
1887 case 2:
1888 *p++=(uint8_t)(c>>8);
1889 case 1:
1890 *p=(uint8_t)c;
1891 default:
1892 /* will never occur */
1893 break;
1894 }
1895 cnv->charErrorBufferLength=(int8_t)length;
1896
1897 /* now output what fits into the regular target */
1898 c>>=8*length; /* length was reduced by targetCapacity */
1899 switch(targetCapacity) {
1900 /* each branch falls through to the next one */
1901 case 3:
1902 *target++=(uint8_t)(c>>16);
1903 case 2:
1904 *target++=(uint8_t)(c>>8);
1905 case 1:
1906 *target++=(uint8_t)c;
1907 default:
b75a7d8f
A
1908 break;
1909 }
1910
1911 /* target overflow */
1912 targetCapacity=0;
1913 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1914 c=0;
1915 goto endloop;
1916 }
b75a7d8f
A
1917}
1918
1919/* miscellaneous ------------------------------------------------------------ */
1920
1921static const char *
1922_SCSUGetName(const UConverter *cnv) {
1923 SCSUData *scsu=(SCSUData *)cnv->extraInfo;
1924
1925 switch(scsu->locale) {
1926 case l_ja:
1927 return "SCSU,locale=ja";
1928 default:
1929 return "SCSU";
1930 }
1931}
1932
b75a7d8f 1933/* structure for SafeClone calculations */
374ca955 1934struct cloneSCSUStruct
b75a7d8f
A
1935{
1936 UConverter cnv;
1937 SCSUData mydata;
1938};
1939
1940static UConverter *
1941_SCSUSafeClone(const UConverter *cnv,
1942 void *stackBuffer,
1943 int32_t *pBufferSize,
1944 UErrorCode *status)
1945{
374ca955
A
1946 struct cloneSCSUStruct * localClone;
1947 int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
b75a7d8f
A
1948
1949 if (U_FAILURE(*status)){
1950 return 0;
1951 }
1952
1953 if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1954 *pBufferSize = bufferSizeNeeded;
1955 return 0;
1956 }
1957
374ca955
A
1958 localClone = (struct cloneSCSUStruct *)stackBuffer;
1959 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
b75a7d8f
A
1960
1961 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
1962 localClone->cnv.extraInfo = &localClone->mydata;
1963 localClone->cnv.isExtraLocal = TRUE;
1964
1965 return &localClone->cnv;
1966}
1967
1968
b75a7d8f
A
1969static const UConverterImpl _SCSUImpl={
1970 UCNV_SCSU,
1971
1972 NULL,
1973 NULL,
1974
1975 _SCSUOpen,
1976 _SCSUClose,
1977 _SCSUReset,
1978
1979 _SCSUToUnicode,
1980 _SCSUToUnicodeWithOffsets,
1981 _SCSUFromUnicode,
1982 _SCSUFromUnicodeWithOffsets,
374ca955 1983 NULL,
b75a7d8f
A
1984
1985 NULL,
1986 _SCSUGetName,
73c04bcf 1987 NULL,
b75a7d8f
A
1988 _SCSUSafeClone,
1989 ucnv_getCompleteUnicodeSet
1990};
1991
1992static const UConverterStaticData _SCSUStaticData={
1993 sizeof(UConverterStaticData),
1994 "SCSU",
73c04bcf 1995 1212, /* CCSID for SCSU */
b75a7d8f
A
1996 UCNV_IBM, UCNV_SCSU,
1997 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
374ca955 1998 /*
73c04bcf
A
1999 * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
2000 * substitution string.
374ca955
A
2001 */
2002 { 0x0e, 0xff, 0xfd, 0 }, 3,
b75a7d8f
A
2003 FALSE, FALSE,
2004 0,
2005 0,
2006 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
2007};
2008
2009const UConverterSharedData _SCSUData={
2010 sizeof(UConverterSharedData), ~((uint32_t)0),
2011 NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl,
2012 0
2013};
2014
374ca955 2015#endif