]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ucnvscsu.c
ICU-6.2.8.tar.gz
[apple/icu.git] / icuSources / common / ucnvscsu.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 2000-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 * file name: ucnvscsu.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2000nov18
14 * created by: Markus W. Scherer
15 *
16 * This is an implementation of the Standard Compression Scheme for Unicode
17 * as defined in http://www.unicode.org/unicode/reports/tr6/ .
18 * Reserved commands and window settings are treated as illegal sequences and
19 * will result in callback calls.
20 */
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_CONVERSION
25
26 #include "unicode/ucnv.h"
27 #include "unicode/ucnv_cb.h"
28 #include "ucnv_bld.h"
29 #include "ucnv_cnv.h"
30 #include "cmemory.h"
31
32 /* SCSU definitions --------------------------------------------------------- */
33
34 /* SCSU command byte values */
35 enum {
36 SQ0=0x01, /* Quote from window pair 0 */
37 SQ7=0x08, /* Quote from window pair 7 */
38 SDX=0x0B, /* Define a window as extended */
39 Srs=0x0C, /* reserved */
40 SQU=0x0E, /* Quote a single Unicode character */
41 SCU=0x0F, /* Change to Unicode mode */
42 SC0=0x10, /* Select window 0 */
43 SC7=0x17, /* Select window 7 */
44 SD0=0x18, /* Define and select window 0 */
45 SD7=0x1F, /* Define and select window 7 */
46
47 UC0=0xE0, /* Select window 0 */
48 UC7=0xE7, /* Select window 7 */
49 UD0=0xE8, /* Define and select window 0 */
50 UD7=0xEF, /* Define and select window 7 */
51 UQU=0xF0, /* Quote a single Unicode character */
52 UDX=0xF1, /* Define a Window as extended */
53 Urs=0xF2 /* reserved */
54 };
55
56 enum {
57 /*
58 * Unicode code points from 3400 to E000 are not adressible by
59 * dynamic window, since in these areas no short run alphabets are
60 * found. Therefore add gapOffset to all values from gapThreshold.
61 */
62 gapThreshold=0x68,
63 gapOffset=0xAC00,
64
65 /* values between reservedStart and fixedThreshold are reserved */
66 reservedStart=0xA8,
67
68 /* use table of predefined fixed offsets for values from fixedThreshold */
69 fixedThreshold=0xF9
70 };
71
72 /* constant offsets for the 8 static windows */
73 static const uint32_t staticOffsets[8]={
74 0x0000, /* ASCII for quoted tags */
75 0x0080, /* Latin - 1 Supplement (for access to punctuation) */
76 0x0100, /* Latin Extended-A */
77 0x0300, /* Combining Diacritical Marks */
78 0x2000, /* General Punctuation */
79 0x2080, /* Currency Symbols */
80 0x2100, /* Letterlike Symbols and Number Forms */
81 0x3000 /* CJK Symbols and punctuation */
82 };
83
84 /* initial offsets for the 8 dynamic (sliding) windows */
85 static const uint32_t initialDynamicOffsets[8]={
86 0x0080, /* Latin-1 */
87 0x00C0, /* Latin Extended A */
88 0x0400, /* Cyrillic */
89 0x0600, /* Arabic */
90 0x0900, /* Devanagari */
91 0x3040, /* Hiragana */
92 0x30A0, /* Katakana */
93 0xFF00 /* Fullwidth ASCII */
94 };
95
96 /* Table of fixed predefined Offsets */
97 static const uint32_t fixedOffsets[]={
98 /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
99 /* 0xFA */ 0x0250, /* IPA extensions */
100 /* 0xFB */ 0x0370, /* Greek */
101 /* 0xFC */ 0x0530, /* Armenian */
102 /* 0xFD */ 0x3040, /* Hiragana */
103 /* 0xFE */ 0x30A0, /* Katakana */
104 /* 0xFF */ 0xFF60 /* Halfwidth Katakana */
105 };
106
107 /* state values */
108 enum {
109 readCommand,
110 quotePairOne,
111 quotePairTwo,
112 quoteOne,
113 definePairOne,
114 definePairTwo,
115 defineOne
116 };
117
118 typedef struct SCSUData {
119 /* dynamic window offsets, intitialize to default values from initialDynamicOffsets */
120 uint32_t toUDynamicOffsets[8];
121 uint32_t fromUDynamicOffsets[8];
122
123 /* state machine state - toUnicode */
124 UBool toUIsSingleByteMode;
125 uint8_t toUState;
126 int8_t toUQuoteWindow, toUDynamicWindow;
127 uint8_t toUByteOne;
128 uint8_t toUPadding[3];
129
130 /* state machine state - fromUnicode */
131 UBool fromUIsSingleByteMode;
132 int8_t fromUDynamicWindow;
133
134 /*
135 * windowUse[] keeps track of the use of the dynamic windows:
136 * At nextWindowUseIndex there is the least recently used window,
137 * and the following windows (in a wrapping manner) are more and more
138 * recently used.
139 * At nextWindowUseIndex-1 there is the most recently used window.
140 */
141 uint8_t locale;
142 int8_t nextWindowUseIndex;
143 int8_t windowUse[8];
144 } SCSUData;
145
146 static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
147 static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
148
149 enum {
150 lGeneric, l_ja
151 };
152
153 /* SCSU setup functions ----------------------------------------------------- */
154
155 static void
156 _SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
157 SCSUData *scsu=(SCSUData *)cnv->extraInfo;
158
159 if(choice<=UCNV_RESET_TO_UNICODE) {
160 /* reset toUnicode */
161 uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
162
163 scsu->toUIsSingleByteMode=TRUE;
164 scsu->toUState=readCommand;
165 scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
166 scsu->toUByteOne=0;
167
168 cnv->toULength=0;
169 }
170 if(choice!=UCNV_RESET_TO_UNICODE) {
171 /* reset fromUnicode */
172 uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
173
174 scsu->fromUIsSingleByteMode=TRUE;
175 scsu->fromUDynamicWindow=0;
176
177 scsu->nextWindowUseIndex=0;
178 switch(scsu->locale) {
179 case l_ja:
180 uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
181 break;
182 default:
183 uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
184 break;
185 }
186
187 cnv->fromUChar32=0;
188 }
189 }
190
191 static void
192 _SCSUOpen(UConverter *cnv,
193 const char *name,
194 const char *locale,
195 uint32_t options,
196 UErrorCode *pErrorCode) {
197 cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
198 if(cnv->extraInfo!=NULL) {
199 if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
200 ((SCSUData *)cnv->extraInfo)->locale=l_ja;
201 } else {
202 ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
203 }
204 _SCSUReset(cnv, UCNV_RESET_BOTH);
205 } else {
206 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
207 }
208 }
209
210 static void
211 _SCSUClose(UConverter *cnv) {
212 if(cnv->extraInfo!=NULL) {
213 if(!cnv->isExtraLocal) {
214 uprv_free(cnv->extraInfo);
215 }
216 cnv->extraInfo=NULL;
217 }
218 }
219
220 /* SCSU-to-Unicode conversion functions ------------------------------------- */
221
222 static void
223 _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
224 UErrorCode *pErrorCode) {
225 UConverter *cnv;
226 SCSUData *scsu;
227 const uint8_t *source, *sourceLimit;
228 UChar *target;
229 const UChar *targetLimit;
230 int32_t *offsets;
231 UBool isSingleByteMode;
232 uint8_t state, byteOne;
233 int8_t quoteWindow, dynamicWindow;
234
235 int32_t sourceIndex, nextSourceIndex;
236
237 uint8_t b;
238
239 /* set up the local pointers */
240 cnv=pArgs->converter;
241 scsu=(SCSUData *)cnv->extraInfo;
242
243 source=(const uint8_t *)pArgs->source;
244 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
245 target=pArgs->target;
246 targetLimit=pArgs->targetLimit;
247 offsets=pArgs->offsets;
248
249 /* get the state machine state */
250 isSingleByteMode=scsu->toUIsSingleByteMode;
251 state=scsu->toUState;
252 quoteWindow=scsu->toUQuoteWindow;
253 dynamicWindow=scsu->toUDynamicWindow;
254 byteOne=scsu->toUByteOne;
255
256 /* sourceIndex=-1 if the current character began in the previous buffer */
257 sourceIndex=state==readCommand ? 0 : -1;
258 nextSourceIndex=0;
259
260 /*
261 * conversion "loop"
262 *
263 * For performance, this is not a normal C loop.
264 * Instead, there are two code blocks for the two SCSU modes.
265 * The function branches to either one, and a change of the mode is done with a goto to
266 * the other branch.
267 *
268 * Each branch has two conventional loops:
269 * - a fast-path loop for the most common codes in the mode
270 * - a loop for all other codes in the mode
271 * When the fast-path runs into a code that it cannot handle, its loop ends and it
272 * runs into the following loop to handle the other codes.
273 * The end of the input or output buffer is also handled by the slower loop.
274 * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
275 *
276 * The callback handling is done by returning with an error code.
277 * The conversion framework actually calls the callback function.
278 */
279 if(isSingleByteMode) {
280 /* fast path for single-byte mode */
281 if(state==readCommand) {
282 fastSingle:
283 while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
284 ++source;
285 ++nextSourceIndex;
286 if(b<=0x7f) {
287 /* write US-ASCII graphic character or DEL */
288 *target++=(UChar)b;
289 if(offsets!=NULL) {
290 *offsets++=sourceIndex;
291 }
292 } else {
293 /* write from dynamic window */
294 uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
295 if(c<=0xffff) {
296 *target++=(UChar)c;
297 if(offsets!=NULL) {
298 *offsets++=sourceIndex;
299 }
300 } else {
301 /* output surrogate pair */
302 *target++=(UChar)(0xd7c0+(c>>10));
303 if(target<targetLimit) {
304 *target++=(UChar)(0xdc00|(c&0x3ff));
305 if(offsets!=NULL) {
306 *offsets++=sourceIndex;
307 *offsets++=sourceIndex;
308 }
309 } else {
310 /* target overflow */
311 if(offsets!=NULL) {
312 *offsets++=sourceIndex;
313 }
314 cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
315 cnv->UCharErrorBufferLength=1;
316 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
317 goto endloop;
318 }
319 }
320 }
321 sourceIndex=nextSourceIndex;
322 }
323 }
324
325 /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
326 singleByteMode:
327 while(source<sourceLimit) {
328 if(target>=targetLimit) {
329 /* target is full */
330 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
331 break;
332 }
333 b=*source++;
334 ++nextSourceIndex;
335 switch(state) {
336 case readCommand:
337 /* redundant conditions are commented out */
338 /* here: b<0x20 because otherwise we would be in fastSingle */
339 if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
340 /* CR/LF/TAB/NUL */
341 *target++=(UChar)b;
342 if(offsets!=NULL) {
343 *offsets++=sourceIndex;
344 }
345 sourceIndex=nextSourceIndex;
346 goto fastSingle;
347 } else if(SC0<=b) {
348 if(b<=SC7) {
349 dynamicWindow=(int8_t)(b-SC0);
350 sourceIndex=nextSourceIndex;
351 goto fastSingle;
352 } else /* if(SD0<=b && b<=SD7) */ {
353 dynamicWindow=(int8_t)(b-SD0);
354 state=defineOne;
355 }
356 } else if(/* SQ0<=b && */ b<=SQ7) {
357 quoteWindow=(int8_t)(b-SQ0);
358 state=quoteOne;
359 } else if(b==SDX) {
360 state=definePairOne;
361 } else if(b==SQU) {
362 state=quotePairOne;
363 } else if(b==SCU) {
364 sourceIndex=nextSourceIndex;
365 isSingleByteMode=FALSE;
366 goto fastUnicode;
367 } else /* Srs */ {
368 /* callback(illegal) */
369 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
370 cnv->toUBytes[0]=b;
371 cnv->toULength=1;
372 goto endloop;
373 }
374
375 /* store the first byte of a multibyte sequence in toUBytes[] */
376 cnv->toUBytes[0]=b;
377 cnv->toULength=1;
378 break;
379 case quotePairOne:
380 byteOne=b;
381 cnv->toUBytes[1]=b;
382 cnv->toULength=2;
383 state=quotePairTwo;
384 break;
385 case quotePairTwo:
386 *target++=(UChar)((byteOne<<8)|b);
387 if(offsets!=NULL) {
388 *offsets++=sourceIndex;
389 }
390 sourceIndex=nextSourceIndex;
391 state=readCommand;
392 goto fastSingle;
393 case quoteOne:
394 if(b<0x80) {
395 /* all static offsets are in the BMP */
396 *target++=(UChar)(staticOffsets[quoteWindow]+b);
397 if(offsets!=NULL) {
398 *offsets++=sourceIndex;
399 }
400 } else {
401 /* write from dynamic window */
402 uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
403 if(c<=0xffff) {
404 *target++=(UChar)c;
405 if(offsets!=NULL) {
406 *offsets++=sourceIndex;
407 }
408 } else {
409 /* output surrogate pair */
410 *target++=(UChar)(0xd7c0+(c>>10));
411 if(target<targetLimit) {
412 *target++=(UChar)(0xdc00|(c&0x3ff));
413 if(offsets!=NULL) {
414 *offsets++=sourceIndex;
415 *offsets++=sourceIndex;
416 }
417 } else {
418 /* target overflow */
419 if(offsets!=NULL) {
420 *offsets++=sourceIndex;
421 }
422 cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
423 cnv->UCharErrorBufferLength=1;
424 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
425 goto endloop;
426 }
427 }
428 }
429 sourceIndex=nextSourceIndex;
430 state=readCommand;
431 goto fastSingle;
432 case definePairOne:
433 dynamicWindow=(int8_t)((b>>5)&7);
434 byteOne=(uint8_t)(b&0x1f);
435 cnv->toUBytes[1]=b;
436 cnv->toULength=2;
437 state=definePairTwo;
438 break;
439 case definePairTwo:
440 scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
441 sourceIndex=nextSourceIndex;
442 state=readCommand;
443 goto fastSingle;
444 case defineOne:
445 if(b==0) {
446 /* callback(illegal): Reserved window offset value 0 */
447 cnv->toUBytes[1]=b;
448 cnv->toULength=2;
449 goto endloop;
450 } else if(b<gapThreshold) {
451 scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
452 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
453 scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
454 } else if(b>=fixedThreshold) {
455 scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
456 } else {
457 /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
458 cnv->toUBytes[1]=b;
459 cnv->toULength=2;
460 goto endloop;
461 }
462 sourceIndex=nextSourceIndex;
463 state=readCommand;
464 goto fastSingle;
465 }
466 }
467 } else {
468 /* fast path for Unicode mode */
469 if(state==readCommand) {
470 fastUnicode:
471 while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
472 *target++=(UChar)((b<<8)|source[1]);
473 if(offsets!=NULL) {
474 *offsets++=sourceIndex;
475 }
476 sourceIndex=nextSourceIndex;
477 nextSourceIndex+=2;
478 source+=2;
479 }
480 }
481
482 /* normal state machine for Unicode mode */
483 /* unicodeByteMode: */
484 while(source<sourceLimit) {
485 if(target>=targetLimit) {
486 /* target is full */
487 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
488 break;
489 }
490 b=*source++;
491 ++nextSourceIndex;
492 switch(state) {
493 case readCommand:
494 if((uint8_t)(b-UC0)>(Urs-UC0)) {
495 byteOne=b;
496 cnv->toUBytes[0]=b;
497 cnv->toULength=1;
498 state=quotePairTwo;
499 } else if(/* UC0<=b && */ b<=UC7) {
500 dynamicWindow=(int8_t)(b-UC0);
501 sourceIndex=nextSourceIndex;
502 isSingleByteMode=TRUE;
503 goto fastSingle;
504 } else if(/* UD0<=b && */ b<=UD7) {
505 dynamicWindow=(int8_t)(b-UD0);
506 isSingleByteMode=TRUE;
507 cnv->toUBytes[0]=b;
508 cnv->toULength=1;
509 state=defineOne;
510 goto singleByteMode;
511 } else if(b==UDX) {
512 isSingleByteMode=TRUE;
513 cnv->toUBytes[0]=b;
514 cnv->toULength=1;
515 state=definePairOne;
516 goto singleByteMode;
517 } else if(b==UQU) {
518 cnv->toUBytes[0]=b;
519 cnv->toULength=1;
520 state=quotePairOne;
521 } else /* Urs */ {
522 /* callback(illegal) */
523 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
524 cnv->toUBytes[0]=b;
525 cnv->toULength=1;
526 goto endloop;
527 }
528 break;
529 case quotePairOne:
530 byteOne=b;
531 cnv->toUBytes[1]=b;
532 cnv->toULength=2;
533 state=quotePairTwo;
534 break;
535 case quotePairTwo:
536 *target++=(UChar)((byteOne<<8)|b);
537 if(offsets!=NULL) {
538 *offsets++=sourceIndex;
539 }
540 sourceIndex=nextSourceIndex;
541 state=readCommand;
542 goto fastUnicode;
543 }
544 }
545 }
546 endloop:
547
548 /* set the converter state back into UConverter */
549 if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
550 /* reset to deal with the next character */
551 state=readCommand;
552 } else if(state==readCommand) {
553 /* not in a multi-byte sequence, reset toULength */
554 cnv->toULength=0;
555 }
556 scsu->toUIsSingleByteMode=isSingleByteMode;
557 scsu->toUState=state;
558 scsu->toUQuoteWindow=quoteWindow;
559 scsu->toUDynamicWindow=dynamicWindow;
560 scsu->toUByteOne=byteOne;
561
562 /* write back the updated pointers */
563 pArgs->source=(const char *)source;
564 pArgs->target=target;
565 pArgs->offsets=offsets;
566 return;
567 }
568
569 /*
570 * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
571 * If a change is made in the original function, then either
572 * change this function the same way or
573 * re-copy the original function and remove the variables
574 * offsets, sourceIndex, and nextSourceIndex.
575 */
576 static void
577 _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
578 UErrorCode *pErrorCode) {
579 UConverter *cnv;
580 SCSUData *scsu;
581 const uint8_t *source, *sourceLimit;
582 UChar *target;
583 const UChar *targetLimit;
584 UBool isSingleByteMode;
585 uint8_t state, byteOne;
586 int8_t quoteWindow, dynamicWindow;
587
588 uint8_t b;
589
590 /* set up the local pointers */
591 cnv=pArgs->converter;
592 scsu=(SCSUData *)cnv->extraInfo;
593
594 source=(const uint8_t *)pArgs->source;
595 sourceLimit=(const uint8_t *)pArgs->sourceLimit;
596 target=pArgs->target;
597 targetLimit=pArgs->targetLimit;
598
599 /* get the state machine state */
600 isSingleByteMode=scsu->toUIsSingleByteMode;
601 state=scsu->toUState;
602 quoteWindow=scsu->toUQuoteWindow;
603 dynamicWindow=scsu->toUDynamicWindow;
604 byteOne=scsu->toUByteOne;
605
606 /*
607 * conversion "loop"
608 *
609 * For performance, this is not a normal C loop.
610 * Instead, there are two code blocks for the two SCSU modes.
611 * The function branches to either one, and a change of the mode is done with a goto to
612 * the other branch.
613 *
614 * Each branch has two conventional loops:
615 * - a fast-path loop for the most common codes in the mode
616 * - a loop for all other codes in the mode
617 * When the fast-path runs into a code that it cannot handle, its loop ends and it
618 * runs into the following loop to handle the other codes.
619 * The end of the input or output buffer is also handled by the slower loop.
620 * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
621 *
622 * The callback handling is done by returning with an error code.
623 * The conversion framework actually calls the callback function.
624 */
625 if(isSingleByteMode) {
626 /* fast path for single-byte mode */
627 if(state==readCommand) {
628 fastSingle:
629 while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
630 ++source;
631 if(b<=0x7f) {
632 /* write US-ASCII graphic character or DEL */
633 *target++=(UChar)b;
634 } else {
635 /* write from dynamic window */
636 uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
637 if(c<=0xffff) {
638 *target++=(UChar)c;
639 } else {
640 /* output surrogate pair */
641 *target++=(UChar)(0xd7c0+(c>>10));
642 if(target<targetLimit) {
643 *target++=(UChar)(0xdc00|(c&0x3ff));
644 } else {
645 /* target overflow */
646 cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
647 cnv->UCharErrorBufferLength=1;
648 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
649 goto endloop;
650 }
651 }
652 }
653 }
654 }
655
656 /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
657 singleByteMode:
658 while(source<sourceLimit) {
659 if(target>=targetLimit) {
660 /* target is full */
661 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
662 break;
663 }
664 b=*source++;
665 switch(state) {
666 case readCommand:
667 /* redundant conditions are commented out */
668 /* here: b<0x20 because otherwise we would be in fastSingle */
669 if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
670 /* CR/LF/TAB/NUL */
671 *target++=(UChar)b;
672 goto fastSingle;
673 } else if(SC0<=b) {
674 if(b<=SC7) {
675 dynamicWindow=(int8_t)(b-SC0);
676 goto fastSingle;
677 } else /* if(SD0<=b && b<=SD7) */ {
678 dynamicWindow=(int8_t)(b-SD0);
679 state=defineOne;
680 }
681 } else if(/* SQ0<=b && */ b<=SQ7) {
682 quoteWindow=(int8_t)(b-SQ0);
683 state=quoteOne;
684 } else if(b==SDX) {
685 state=definePairOne;
686 } else if(b==SQU) {
687 state=quotePairOne;
688 } else if(b==SCU) {
689 isSingleByteMode=FALSE;
690 goto fastUnicode;
691 } else /* Srs */ {
692 /* callback(illegal) */
693 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
694 cnv->toUBytes[0]=b;
695 cnv->toULength=1;
696 goto endloop;
697 }
698
699 /* store the first byte of a multibyte sequence in toUBytes[] */
700 cnv->toUBytes[0]=b;
701 cnv->toULength=1;
702 break;
703 case quotePairOne:
704 byteOne=b;
705 cnv->toUBytes[1]=b;
706 cnv->toULength=2;
707 state=quotePairTwo;
708 break;
709 case quotePairTwo:
710 *target++=(UChar)((byteOne<<8)|b);
711 state=readCommand;
712 goto fastSingle;
713 case quoteOne:
714 if(b<0x80) {
715 /* all static offsets are in the BMP */
716 *target++=(UChar)(staticOffsets[quoteWindow]+b);
717 } else {
718 /* write from dynamic window */
719 uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
720 if(c<=0xffff) {
721 *target++=(UChar)c;
722 } else {
723 /* output surrogate pair */
724 *target++=(UChar)(0xd7c0+(c>>10));
725 if(target<targetLimit) {
726 *target++=(UChar)(0xdc00|(c&0x3ff));
727 } else {
728 /* target overflow */
729 cnv->UCharErrorBuffer[0]=(UChar)(0xdc00|(c&0x3ff));
730 cnv->UCharErrorBufferLength=1;
731 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
732 goto endloop;
733 }
734 }
735 }
736 state=readCommand;
737 goto fastSingle;
738 case definePairOne:
739 dynamicWindow=(int8_t)((b>>5)&7);
740 byteOne=(uint8_t)(b&0x1f);
741 cnv->toUBytes[1]=b;
742 cnv->toULength=2;
743 state=definePairTwo;
744 break;
745 case definePairTwo:
746 scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
747 state=readCommand;
748 goto fastSingle;
749 case defineOne:
750 if(b==0) {
751 /* callback(illegal): Reserved window offset value 0 */
752 cnv->toUBytes[1]=b;
753 cnv->toULength=2;
754 goto endloop;
755 } else if(b<gapThreshold) {
756 scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
757 } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
758 scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
759 } else if(b>=fixedThreshold) {
760 scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
761 } else {
762 /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
763 cnv->toUBytes[1]=b;
764 cnv->toULength=2;
765 goto endloop;
766 }
767 state=readCommand;
768 goto fastSingle;
769 }
770 }
771 } else {
772 /* fast path for Unicode mode */
773 if(state==readCommand) {
774 fastUnicode:
775 while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
776 *target++=(UChar)((b<<8)|source[1]);
777 source+=2;
778 }
779 }
780
781 /* normal state machine for Unicode mode */
782 /* unicodeByteMode: */
783 while(source<sourceLimit) {
784 if(target>=targetLimit) {
785 /* target is full */
786 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
787 break;
788 }
789 b=*source++;
790 switch(state) {
791 case readCommand:
792 if((uint8_t)(b-UC0)>(Urs-UC0)) {
793 byteOne=b;
794 cnv->toUBytes[0]=b;
795 cnv->toULength=1;
796 state=quotePairTwo;
797 } else if(/* UC0<=b && */ b<=UC7) {
798 dynamicWindow=(int8_t)(b-UC0);
799 isSingleByteMode=TRUE;
800 goto fastSingle;
801 } else if(/* UD0<=b && */ b<=UD7) {
802 dynamicWindow=(int8_t)(b-UD0);
803 isSingleByteMode=TRUE;
804 cnv->toUBytes[0]=b;
805 cnv->toULength=1;
806 state=defineOne;
807 goto singleByteMode;
808 } else if(b==UDX) {
809 isSingleByteMode=TRUE;
810 cnv->toUBytes[0]=b;
811 cnv->toULength=1;
812 state=definePairOne;
813 goto singleByteMode;
814 } else if(b==UQU) {
815 cnv->toUBytes[0]=b;
816 cnv->toULength=1;
817 state=quotePairOne;
818 } else /* Urs */ {
819 /* callback(illegal) */
820 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
821 cnv->toUBytes[0]=b;
822 cnv->toULength=1;
823 goto endloop;
824 }
825 break;
826 case quotePairOne:
827 byteOne=b;
828 cnv->toUBytes[1]=b;
829 cnv->toULength=2;
830 state=quotePairTwo;
831 break;
832 case quotePairTwo:
833 *target++=(UChar)((byteOne<<8)|b);
834 state=readCommand;
835 goto fastUnicode;
836 }
837 }
838 }
839 endloop:
840
841 /* set the converter state back into UConverter */
842 if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
843 /* reset to deal with the next character */
844 state=readCommand;
845 } else if(state==readCommand) {
846 /* not in a multi-byte sequence, reset toULength */
847 cnv->toULength=0;
848 }
849 scsu->toUIsSingleByteMode=isSingleByteMode;
850 scsu->toUState=state;
851 scsu->toUQuoteWindow=quoteWindow;
852 scsu->toUDynamicWindow=dynamicWindow;
853 scsu->toUByteOne=byteOne;
854
855 /* write back the updated pointers */
856 pArgs->source=(const char *)source;
857 pArgs->target=target;
858 return;
859 }
860
861 /* SCSU-from-Unicode conversion functions ----------------------------------- */
862
863 /*
864 * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
865 * reasonable results. The lookahead is minimal.
866 * Many cases are simple:
867 * A character fits directly into the current mode, a dynamic or static window,
868 * or is not compressible. These cases are tested first.
869 * Real compression heuristics are applied to the rest, in code branches for
870 * single/Unicode mode and BMP/supplementary code points.
871 * The heuristics used here are extremely simple.
872 */
873
874 /* get the number of the window that this character is in, or -1 */
875 static int8_t
876 getWindow(const uint32_t offsets[8], uint32_t c) {
877 int i;
878 for(i=0; i<8; ++i) {
879 if((uint32_t)(c-offsets[i])<=0x7f) {
880 return (int8_t)(i);
881 }
882 }
883 return -1;
884 }
885
886 /* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
887 static UBool
888 isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
889 return (UBool)(c<=offset+0x7f &&
890 (c>=offset || (c<=0x7f &&
891 (c>=0x20 || (1UL<<c)&0x2601))));
892 /* binary 0010 0110 0000 0001,
893 check for b==0xd || b==0xa || b==9 || b==0 */
894 }
895
896 /*
897 * getNextDynamicWindow returns the next dynamic window to be redefined
898 */
899 static int8_t
900 getNextDynamicWindow(SCSUData *scsu) {
901 int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
902 if(++scsu->nextWindowUseIndex==8) {
903 scsu->nextWindowUseIndex=0;
904 }
905 return window;
906 }
907
908 /*
909 * useDynamicWindow() adjusts
910 * windowUse[] and nextWindowUseIndex for the algorithm to choose
911 * the next dynamic window to be defined;
912 * a subclass may override it and provide its own algorithm.
913 */
914 static void
915 useDynamicWindow(SCSUData *scsu, int8_t window) {
916 /*
917 * move the existing window, which just became the most recently used one,
918 * up in windowUse[] to nextWindowUseIndex-1
919 */
920
921 /* first, find the index of the window - backwards to favor the more recently used windows */
922 int i, j;
923
924 i=scsu->nextWindowUseIndex;
925 do {
926 if(--i<0) {
927 i=7;
928 }
929 } while(scsu->windowUse[i]!=window);
930
931 /* now copy each windowUse[i+1] to [i] */
932 j=i+1;
933 if(j==8) {
934 j=0;
935 }
936 while(j!=scsu->nextWindowUseIndex) {
937 scsu->windowUse[i]=scsu->windowUse[j];
938 i=j;
939 if(++j==8) { j=0; }
940 }
941
942 /* finally, set the window into the most recently used index */
943 scsu->windowUse[i]=window;
944 }
945
946 /*
947 * calculate the offset and the code for a dynamic window that contains the character
948 * takes fixed offsets into account
949 * the offset of the window is stored in the offset variable,
950 * the code is returned
951 *
952 * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code
953 */
954 static int
955 getDynamicOffset(uint32_t c, uint32_t *pOffset) {
956 int i;
957
958 for(i=0; i<7; ++i) {
959 if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
960 *pOffset=fixedOffsets[i];
961 return 0xf9+i;
962 }
963 }
964
965 if(c<0x80) {
966 /* No dynamic window for US-ASCII. */
967 return -1;
968 } else if(c<0x3400 ||
969 (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
970 (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
971 ) {
972 /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
973 *pOffset=c&0x7fffff80;
974 return (int)(c>>7);
975 } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
976 /* For these characters we need to take the gapOffset into account. */
977 *pOffset=c&0x7fffff80;
978 return (int)((c-gapOffset)>>7);
979 } else {
980 return -1;
981 }
982 }
983
984 /*
985 * Idea for compression:
986 * - save SCSUData and other state before really starting work
987 * - at endloop, see if compression could be better with just unicode mode
988 * - don't do this if a callback has been called
989 * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
990 * - different buffer handling!
991 *
992 * Drawback or need for corrective handling:
993 * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
994 * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
995 * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
996 *
997 * How to achieve both?
998 * - Only replace the result after an SDX or SCU?
999 */
1000
1001 static void
1002 _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
1003 UErrorCode *pErrorCode) {
1004 UConverter *cnv;
1005 SCSUData *scsu;
1006 const UChar *source, *sourceLimit;
1007 uint8_t *target;
1008 int32_t targetCapacity;
1009 int32_t *offsets;
1010
1011 UBool isSingleByteMode;
1012 uint8_t dynamicWindow;
1013 uint32_t currentOffset;
1014
1015 uint32_t c, delta;
1016
1017 int32_t sourceIndex, nextSourceIndex;
1018
1019 int32_t length;
1020
1021 /* variables for compression heuristics */
1022 uint32_t offset;
1023 UChar lead, trail;
1024 int code;
1025 int8_t window;
1026
1027 /* set up the local pointers */
1028 cnv=pArgs->converter;
1029 scsu=(SCSUData *)cnv->extraInfo;
1030
1031 /* set up the local pointers */
1032 source=pArgs->source;
1033 sourceLimit=pArgs->sourceLimit;
1034 target=(uint8_t *)pArgs->target;
1035 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1036 offsets=pArgs->offsets;
1037
1038 /* get the state machine state */
1039 isSingleByteMode=scsu->fromUIsSingleByteMode;
1040 dynamicWindow=scsu->fromUDynamicWindow;
1041 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1042
1043 c=cnv->fromUChar32;
1044
1045 /* sourceIndex=-1 if the current character began in the previous buffer */
1046 sourceIndex= c==0 ? 0 : -1;
1047 nextSourceIndex=0;
1048
1049 /* similar conversion "loop" as in toUnicode */
1050 loop:
1051 if(isSingleByteMode) {
1052 if(c!=0 && targetCapacity>0) {
1053 goto getTrailSingle;
1054 }
1055
1056 /* state machine for single-byte mode */
1057 /* singleByteMode: */
1058 while(source<sourceLimit) {
1059 if(targetCapacity<=0) {
1060 /* target is full */
1061 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1062 break;
1063 }
1064 c=*source++;
1065 ++nextSourceIndex;
1066
1067 if((c-0x20)<=0x5f) {
1068 /* pass US-ASCII graphic character through */
1069 *target++=(uint8_t)c;
1070 if(offsets!=NULL) {
1071 *offsets++=sourceIndex;
1072 }
1073 --targetCapacity;
1074 } else if(c<0x20) {
1075 if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1076 /* CR/LF/TAB/NUL */
1077 *target++=(uint8_t)c;
1078 if(offsets!=NULL) {
1079 *offsets++=sourceIndex;
1080 }
1081 --targetCapacity;
1082 } else {
1083 /* quote C0 control character */
1084 c|=SQ0<<8;
1085 length=2;
1086 goto outputBytes;
1087 }
1088 } else if((delta=c-currentOffset)<=0x7f) {
1089 /* use the current dynamic window */
1090 *target++=(uint8_t)(delta|0x80);
1091 if(offsets!=NULL) {
1092 *offsets++=sourceIndex;
1093 }
1094 --targetCapacity;
1095 } else if(UTF_IS_SURROGATE(c)) {
1096 if(UTF_IS_SURROGATE_FIRST(c)) {
1097 getTrailSingle:
1098 lead=(UChar)c;
1099 if(source<sourceLimit) {
1100 /* test the following code unit */
1101 trail=*source;
1102 if(UTF_IS_SECOND_SURROGATE(trail)) {
1103 ++source;
1104 ++nextSourceIndex;
1105 c=UTF16_GET_PAIR_VALUE(c, trail);
1106 /* convert this surrogate code point */
1107 /* exit this condition tree */
1108 } else {
1109 /* this is an unmatched lead code unit (1st surrogate) */
1110 /* callback(illegal) */
1111 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1112 goto endloop;
1113 }
1114 } else {
1115 /* no more input */
1116 break;
1117 }
1118 } else {
1119 /* this is an unmatched trail code unit (2nd surrogate) */
1120 /* callback(illegal) */
1121 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1122 goto endloop;
1123 }
1124
1125 /* compress supplementary character U+10000..U+10ffff */
1126 if((delta=c-currentOffset)<=0x7f) {
1127 /* use the current dynamic window */
1128 *target++=(uint8_t)(delta|0x80);
1129 if(offsets!=NULL) {
1130 *offsets++=sourceIndex;
1131 }
1132 --targetCapacity;
1133 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1134 /* there is a dynamic window that contains this character, change to it */
1135 dynamicWindow=window;
1136 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1137 useDynamicWindow(scsu, dynamicWindow);
1138 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1139 length=2;
1140 goto outputBytes;
1141 } else if((code=getDynamicOffset(c, &offset))>=0) {
1142 /* might check if there are more characters in this window to come */
1143 /* define an extended window with this character */
1144 code-=0x200;
1145 dynamicWindow=getNextDynamicWindow(scsu);
1146 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1147 useDynamicWindow(scsu, dynamicWindow);
1148 c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1149 length=4;
1150 goto outputBytes;
1151 } else {
1152 /* change to Unicode mode and output this (lead, trail) pair */
1153 isSingleByteMode=FALSE;
1154 *target++=(uint8_t)SCU;
1155 if(offsets!=NULL) {
1156 *offsets++=sourceIndex;
1157 }
1158 --targetCapacity;
1159 c=((uint32_t)lead<<16)|trail;
1160 length=4;
1161 goto outputBytes;
1162 }
1163 } else if(c<0xa0) {
1164 /* quote C1 control character */
1165 c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1166 length=2;
1167 goto outputBytes;
1168 } else if(c==0xfeff || c>=0xfff0) {
1169 /* quote signature character=byte order mark and specials */
1170 c|=SQU<<16;
1171 length=3;
1172 goto outputBytes;
1173 } else {
1174 /* compress all other BMP characters */
1175 if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1176 /* there is a window defined that contains this character - switch to it or quote from it? */
1177 if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1178 /* change to dynamic window */
1179 dynamicWindow=window;
1180 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1181 useDynamicWindow(scsu, dynamicWindow);
1182 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1183 length=2;
1184 goto outputBytes;
1185 } else {
1186 /* quote from dynamic window */
1187 c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1188 length=2;
1189 goto outputBytes;
1190 }
1191 } else if((window=getWindow(staticOffsets, c))>=0) {
1192 /* quote from static window */
1193 c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1194 length=2;
1195 goto outputBytes;
1196 } else if((code=getDynamicOffset(c, &offset))>=0) {
1197 /* define a dynamic window with this character */
1198 dynamicWindow=getNextDynamicWindow(scsu);
1199 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1200 useDynamicWindow(scsu, dynamicWindow);
1201 c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1202 length=3;
1203 goto outputBytes;
1204 } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1205 (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1206 ) {
1207 /*
1208 * this character is not compressible (a BMP ideograph or similar);
1209 * switch to Unicode mode if this is the last character in the block
1210 * or there is at least one more ideograph following immediately
1211 */
1212 isSingleByteMode=FALSE;
1213 c|=SCU<<16;
1214 length=3;
1215 goto outputBytes;
1216 } else {
1217 /* quote Unicode */
1218 c|=SQU<<16;
1219 length=3;
1220 goto outputBytes;
1221 }
1222 }
1223
1224 /* normal end of conversion: prepare for a new character */
1225 c=0;
1226 sourceIndex=nextSourceIndex;
1227 }
1228 } else {
1229 if(c!=0 && targetCapacity>0) {
1230 goto getTrailUnicode;
1231 }
1232
1233 /* state machine for Unicode mode */
1234 /* unicodeByteMode: */
1235 while(source<sourceLimit) {
1236 if(targetCapacity<=0) {
1237 /* target is full */
1238 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1239 break;
1240 }
1241 c=*source++;
1242 ++nextSourceIndex;
1243
1244 if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1245 /* not compressible, write character directly */
1246 if(targetCapacity>=2) {
1247 *target++=(uint8_t)(c>>8);
1248 *target++=(uint8_t)c;
1249 if(offsets!=NULL) {
1250 *offsets++=sourceIndex;
1251 *offsets++=sourceIndex;
1252 }
1253 targetCapacity-=2;
1254 } else {
1255 length=2;
1256 goto outputBytes;
1257 }
1258 } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1259 /* compress BMP character if the following one is not an uncompressible ideograph */
1260 if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1261 if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1262 /* ASCII digit or letter */
1263 isSingleByteMode=TRUE;
1264 c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1265 length=2;
1266 goto outputBytes;
1267 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1268 /* there is a dynamic window that contains this character, change to it */
1269 isSingleByteMode=TRUE;
1270 dynamicWindow=window;
1271 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1272 useDynamicWindow(scsu, dynamicWindow);
1273 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1274 length=2;
1275 goto outputBytes;
1276 } else if((code=getDynamicOffset(c, &offset))>=0) {
1277 /* define a dynamic window with this character */
1278 isSingleByteMode=TRUE;
1279 dynamicWindow=getNextDynamicWindow(scsu);
1280 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1281 useDynamicWindow(scsu, dynamicWindow);
1282 c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1283 length=3;
1284 goto outputBytes;
1285 }
1286 }
1287
1288 /* don't know how to compress this character, just write it directly */
1289 length=2;
1290 goto outputBytes;
1291 } else if(c<0xe000) {
1292 /* c is a surrogate */
1293 if(UTF_IS_SURROGATE_FIRST(c)) {
1294 getTrailUnicode:
1295 lead=(UChar)c;
1296 if(source<sourceLimit) {
1297 /* test the following code unit */
1298 trail=*source;
1299 if(UTF_IS_SECOND_SURROGATE(trail)) {
1300 ++source;
1301 ++nextSourceIndex;
1302 c=UTF16_GET_PAIR_VALUE(c, trail);
1303 /* convert this surrogate code point */
1304 /* exit this condition tree */
1305 } else {
1306 /* this is an unmatched lead code unit (1st surrogate) */
1307 /* callback(illegal) */
1308 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1309 goto endloop;
1310 }
1311 } else {
1312 /* no more input */
1313 break;
1314 }
1315 } else {
1316 /* this is an unmatched trail code unit (2nd surrogate) */
1317 /* callback(illegal) */
1318 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1319 goto endloop;
1320 }
1321
1322 /* compress supplementary character */
1323 if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1324 !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1325 ) {
1326 /*
1327 * there is a dynamic window that contains this character and
1328 * the following character is not uncompressible,
1329 * change to the window
1330 */
1331 isSingleByteMode=TRUE;
1332 dynamicWindow=window;
1333 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1334 useDynamicWindow(scsu, dynamicWindow);
1335 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1336 length=2;
1337 goto outputBytes;
1338 } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1339 (code=getDynamicOffset(c, &offset))>=0
1340 ) {
1341 /* two supplementary characters in (probably) the same window - define an extended one */
1342 isSingleByteMode=TRUE;
1343 code-=0x200;
1344 dynamicWindow=getNextDynamicWindow(scsu);
1345 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1346 useDynamicWindow(scsu, dynamicWindow);
1347 c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1348 length=4;
1349 goto outputBytes;
1350 } else {
1351 /* don't know how to compress this character, just write it directly */
1352 c=((uint32_t)lead<<16)|trail;
1353 length=4;
1354 goto outputBytes;
1355 }
1356 } else /* 0xe000<=c<0xf300 */ {
1357 /* quote to avoid SCSU tags */
1358 c|=UQU<<16;
1359 length=3;
1360 goto outputBytes;
1361 }
1362
1363 /* normal end of conversion: prepare for a new character */
1364 c=0;
1365 sourceIndex=nextSourceIndex;
1366 }
1367 }
1368 endloop:
1369
1370 /* set the converter state back into UConverter */
1371 scsu->fromUIsSingleByteMode=isSingleByteMode;
1372 scsu->fromUDynamicWindow=dynamicWindow;
1373
1374 cnv->fromUChar32=c;
1375
1376 /* write back the updated pointers */
1377 pArgs->source=source;
1378 pArgs->target=(char *)target;
1379 pArgs->offsets=offsets;
1380 return;
1381
1382 outputBytes:
1383 /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1384 /* from the first if in the loop we know that targetCapacity>0 */
1385 if(length<=targetCapacity) {
1386 if(offsets==NULL) {
1387 switch(length) {
1388 /* each branch falls through to the next one */
1389 case 4:
1390 *target++=(uint8_t)(c>>24);
1391 case 3:
1392 *target++=(uint8_t)(c>>16);
1393 case 2:
1394 *target++=(uint8_t)(c>>8);
1395 case 1:
1396 *target++=(uint8_t)c;
1397 default:
1398 /* will never occur */
1399 break;
1400 }
1401 } else {
1402 switch(length) {
1403 /* each branch falls through to the next one */
1404 case 4:
1405 *target++=(uint8_t)(c>>24);
1406 *offsets++=sourceIndex;
1407 case 3:
1408 *target++=(uint8_t)(c>>16);
1409 *offsets++=sourceIndex;
1410 case 2:
1411 *target++=(uint8_t)(c>>8);
1412 *offsets++=sourceIndex;
1413 case 1:
1414 *target++=(uint8_t)c;
1415 *offsets++=sourceIndex;
1416 default:
1417 /* will never occur */
1418 break;
1419 }
1420 }
1421 targetCapacity-=length;
1422
1423 /* normal end of conversion: prepare for a new character */
1424 c=0;
1425 sourceIndex=nextSourceIndex;
1426 goto loop;
1427 } else {
1428 uint8_t *p;
1429
1430 /*
1431 * We actually do this backwards here:
1432 * In order to save an intermediate variable, we output
1433 * first to the overflow buffer what does not fit into the
1434 * regular target.
1435 */
1436 /* we know that 1<=targetCapacity<length<=4 */
1437 length-=targetCapacity;
1438 p=(uint8_t *)cnv->charErrorBuffer;
1439 switch(length) {
1440 /* each branch falls through to the next one */
1441 case 3:
1442 *p++=(uint8_t)(c>>16);
1443 case 2:
1444 *p++=(uint8_t)(c>>8);
1445 case 1:
1446 *p=(uint8_t)c;
1447 default:
1448 /* will never occur */
1449 break;
1450 }
1451 cnv->charErrorBufferLength=(int8_t)length;
1452
1453 /* now output what fits into the regular target */
1454 c>>=8*length; /* length was reduced by targetCapacity */
1455 switch(targetCapacity) {
1456 /* each branch falls through to the next one */
1457 case 3:
1458 *target++=(uint8_t)(c>>16);
1459 if(offsets!=NULL) {
1460 *offsets++=sourceIndex;
1461 }
1462 case 2:
1463 *target++=(uint8_t)(c>>8);
1464 if(offsets!=NULL) {
1465 *offsets++=sourceIndex;
1466 }
1467 case 1:
1468 *target++=(uint8_t)c;
1469 if(offsets!=NULL) {
1470 *offsets++=sourceIndex;
1471 }
1472 default:
1473 /* will never occur */
1474 break;
1475 }
1476
1477 /* target overflow */
1478 targetCapacity=0;
1479 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1480 c=0;
1481 goto endloop;
1482 }
1483 }
1484
1485 /*
1486 * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
1487 * If a change is made in the original function, then either
1488 * change this function the same way or
1489 * re-copy the original function and remove the variables
1490 * offsets, sourceIndex, and nextSourceIndex.
1491 */
1492 static void
1493 _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
1494 UErrorCode *pErrorCode) {
1495 UConverter *cnv;
1496 SCSUData *scsu;
1497 const UChar *source, *sourceLimit;
1498 uint8_t *target;
1499 int32_t targetCapacity;
1500
1501 UBool isSingleByteMode;
1502 uint8_t dynamicWindow;
1503 uint32_t currentOffset;
1504
1505 uint32_t c, delta;
1506
1507 int32_t length;
1508
1509 /* variables for compression heuristics */
1510 uint32_t offset;
1511 UChar lead, trail;
1512 int code;
1513 int8_t window;
1514
1515 /* set up the local pointers */
1516 cnv=pArgs->converter;
1517 scsu=(SCSUData *)cnv->extraInfo;
1518
1519 /* set up the local pointers */
1520 source=pArgs->source;
1521 sourceLimit=pArgs->sourceLimit;
1522 target=(uint8_t *)pArgs->target;
1523 targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
1524
1525 /* get the state machine state */
1526 isSingleByteMode=scsu->fromUIsSingleByteMode;
1527 dynamicWindow=scsu->fromUDynamicWindow;
1528 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1529
1530 c=cnv->fromUChar32;
1531
1532 /* similar conversion "loop" as in toUnicode */
1533 loop:
1534 if(isSingleByteMode) {
1535 if(c!=0 && targetCapacity>0) {
1536 goto getTrailSingle;
1537 }
1538
1539 /* state machine for single-byte mode */
1540 /* singleByteMode: */
1541 while(source<sourceLimit) {
1542 if(targetCapacity<=0) {
1543 /* target is full */
1544 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1545 break;
1546 }
1547 c=*source++;
1548
1549 if((c-0x20)<=0x5f) {
1550 /* pass US-ASCII graphic character through */
1551 *target++=(uint8_t)c;
1552 --targetCapacity;
1553 } else if(c<0x20) {
1554 if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
1555 /* CR/LF/TAB/NUL */
1556 *target++=(uint8_t)c;
1557 --targetCapacity;
1558 } else {
1559 /* quote C0 control character */
1560 c|=SQ0<<8;
1561 length=2;
1562 goto outputBytes;
1563 }
1564 } else if((delta=c-currentOffset)<=0x7f) {
1565 /* use the current dynamic window */
1566 *target++=(uint8_t)(delta|0x80);
1567 --targetCapacity;
1568 } else if(UTF_IS_SURROGATE(c)) {
1569 if(UTF_IS_SURROGATE_FIRST(c)) {
1570 getTrailSingle:
1571 lead=(UChar)c;
1572 if(source<sourceLimit) {
1573 /* test the following code unit */
1574 trail=*source;
1575 if(UTF_IS_SECOND_SURROGATE(trail)) {
1576 ++source;
1577 c=UTF16_GET_PAIR_VALUE(c, trail);
1578 /* convert this surrogate code point */
1579 /* exit this condition tree */
1580 } else {
1581 /* this is an unmatched lead code unit (1st surrogate) */
1582 /* callback(illegal) */
1583 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1584 goto endloop;
1585 }
1586 } else {
1587 /* no more input */
1588 break;
1589 }
1590 } else {
1591 /* this is an unmatched trail code unit (2nd surrogate) */
1592 /* callback(illegal) */
1593 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1594 goto endloop;
1595 }
1596
1597 /* compress supplementary character U+10000..U+10ffff */
1598 if((delta=c-currentOffset)<=0x7f) {
1599 /* use the current dynamic window */
1600 *target++=(uint8_t)(delta|0x80);
1601 --targetCapacity;
1602 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1603 /* there is a dynamic window that contains this character, change to it */
1604 dynamicWindow=window;
1605 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1606 useDynamicWindow(scsu, dynamicWindow);
1607 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1608 length=2;
1609 goto outputBytes;
1610 } else if((code=getDynamicOffset(c, &offset))>=0) {
1611 /* might check if there are more characters in this window to come */
1612 /* define an extended window with this character */
1613 code-=0x200;
1614 dynamicWindow=getNextDynamicWindow(scsu);
1615 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1616 useDynamicWindow(scsu, dynamicWindow);
1617 c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1618 length=4;
1619 goto outputBytes;
1620 } else {
1621 /* change to Unicode mode and output this (lead, trail) pair */
1622 isSingleByteMode=FALSE;
1623 *target++=(uint8_t)SCU;
1624 --targetCapacity;
1625 c=((uint32_t)lead<<16)|trail;
1626 length=4;
1627 goto outputBytes;
1628 }
1629 } else if(c<0xa0) {
1630 /* quote C1 control character */
1631 c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
1632 length=2;
1633 goto outputBytes;
1634 } else if(c==0xfeff || c>=0xfff0) {
1635 /* quote signature character=byte order mark and specials */
1636 c|=SQU<<16;
1637 length=3;
1638 goto outputBytes;
1639 } else {
1640 /* compress all other BMP characters */
1641 if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1642 /* there is a window defined that contains this character - switch to it or quote from it? */
1643 if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
1644 /* change to dynamic window */
1645 dynamicWindow=window;
1646 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1647 useDynamicWindow(scsu, dynamicWindow);
1648 c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1649 length=2;
1650 goto outputBytes;
1651 } else {
1652 /* quote from dynamic window */
1653 c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
1654 length=2;
1655 goto outputBytes;
1656 }
1657 } else if((window=getWindow(staticOffsets, c))>=0) {
1658 /* quote from static window */
1659 c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
1660 length=2;
1661 goto outputBytes;
1662 } else if((code=getDynamicOffset(c, &offset))>=0) {
1663 /* define a dynamic window with this character */
1664 dynamicWindow=getNextDynamicWindow(scsu);
1665 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1666 useDynamicWindow(scsu, dynamicWindow);
1667 c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1668 length=3;
1669 goto outputBytes;
1670 } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
1671 (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1672 ) {
1673 /*
1674 * this character is not compressible (a BMP ideograph or similar);
1675 * switch to Unicode mode if this is the last character in the block
1676 * or there is at least one more ideograph following immediately
1677 */
1678 isSingleByteMode=FALSE;
1679 c|=SCU<<16;
1680 length=3;
1681 goto outputBytes;
1682 } else {
1683 /* quote Unicode */
1684 c|=SQU<<16;
1685 length=3;
1686 goto outputBytes;
1687 }
1688 }
1689
1690 /* normal end of conversion: prepare for a new character */
1691 c=0;
1692 }
1693 } else {
1694 if(c!=0 && targetCapacity>0) {
1695 goto getTrailUnicode;
1696 }
1697
1698 /* state machine for Unicode mode */
1699 /* unicodeByteMode: */
1700 while(source<sourceLimit) {
1701 if(targetCapacity<=0) {
1702 /* target is full */
1703 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1704 break;
1705 }
1706 c=*source++;
1707
1708 if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
1709 /* not compressible, write character directly */
1710 if(targetCapacity>=2) {
1711 *target++=(uint8_t)(c>>8);
1712 *target++=(uint8_t)c;
1713 targetCapacity-=2;
1714 } else {
1715 length=2;
1716 goto outputBytes;
1717 }
1718 } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
1719 /* compress BMP character if the following one is not an uncompressible ideograph */
1720 if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
1721 if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
1722 /* ASCII digit or letter */
1723 isSingleByteMode=TRUE;
1724 c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
1725 length=2;
1726 goto outputBytes;
1727 } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
1728 /* there is a dynamic window that contains this character, change to it */
1729 isSingleByteMode=TRUE;
1730 dynamicWindow=window;
1731 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1732 useDynamicWindow(scsu, dynamicWindow);
1733 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1734 length=2;
1735 goto outputBytes;
1736 } else if((code=getDynamicOffset(c, &offset))>=0) {
1737 /* define a dynamic window with this character */
1738 isSingleByteMode=TRUE;
1739 dynamicWindow=getNextDynamicWindow(scsu);
1740 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1741 useDynamicWindow(scsu, dynamicWindow);
1742 c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1743 length=3;
1744 goto outputBytes;
1745 }
1746 }
1747
1748 /* don't know how to compress this character, just write it directly */
1749 length=2;
1750 goto outputBytes;
1751 } else if(c<0xe000) {
1752 /* c is a surrogate */
1753 if(UTF_IS_SURROGATE_FIRST(c)) {
1754 getTrailUnicode:
1755 lead=(UChar)c;
1756 if(source<sourceLimit) {
1757 /* test the following code unit */
1758 trail=*source;
1759 if(UTF_IS_SECOND_SURROGATE(trail)) {
1760 ++source;
1761 c=UTF16_GET_PAIR_VALUE(c, trail);
1762 /* convert this surrogate code point */
1763 /* exit this condition tree */
1764 } else {
1765 /* this is an unmatched lead code unit (1st surrogate) */
1766 /* callback(illegal) */
1767 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1768 goto endloop;
1769 }
1770 } else {
1771 /* no more input */
1772 break;
1773 }
1774 } else {
1775 /* this is an unmatched trail code unit (2nd surrogate) */
1776 /* callback(illegal) */
1777 *pErrorCode=U_ILLEGAL_CHAR_FOUND;
1778 goto endloop;
1779 }
1780
1781 /* compress supplementary character */
1782 if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
1783 !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
1784 ) {
1785 /*
1786 * there is a dynamic window that contains this character and
1787 * the following character is not uncompressible,
1788 * change to the window
1789 */
1790 isSingleByteMode=TRUE;
1791 dynamicWindow=window;
1792 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
1793 useDynamicWindow(scsu, dynamicWindow);
1794 c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
1795 length=2;
1796 goto outputBytes;
1797 } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
1798 (code=getDynamicOffset(c, &offset))>=0
1799 ) {
1800 /* two supplementary characters in (probably) the same window - define an extended one */
1801 isSingleByteMode=TRUE;
1802 code-=0x200;
1803 dynamicWindow=getNextDynamicWindow(scsu);
1804 currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
1805 useDynamicWindow(scsu, dynamicWindow);
1806 c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
1807 length=4;
1808 goto outputBytes;
1809 } else {
1810 /* don't know how to compress this character, just write it directly */
1811 c=((uint32_t)lead<<16)|trail;
1812 length=4;
1813 goto outputBytes;
1814 }
1815 } else /* 0xe000<=c<0xf300 */ {
1816 /* quote to avoid SCSU tags */
1817 c|=UQU<<16;
1818 length=3;
1819 goto outputBytes;
1820 }
1821
1822 /* normal end of conversion: prepare for a new character */
1823 c=0;
1824 }
1825 }
1826 endloop:
1827
1828 /* set the converter state back into UConverter */
1829 scsu->fromUIsSingleByteMode=isSingleByteMode;
1830 scsu->fromUDynamicWindow=dynamicWindow;
1831
1832 cnv->fromUChar32=c;
1833
1834 /* write back the updated pointers */
1835 pArgs->source=source;
1836 pArgs->target=(char *)target;
1837 return;
1838
1839 outputBytes:
1840 /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
1841 /* from the first if in the loop we know that targetCapacity>0 */
1842 if(length<=targetCapacity) {
1843 switch(length) {
1844 /* each branch falls through to the next one */
1845 case 4:
1846 *target++=(uint8_t)(c>>24);
1847 case 3:
1848 *target++=(uint8_t)(c>>16);
1849 case 2:
1850 *target++=(uint8_t)(c>>8);
1851 case 1:
1852 *target++=(uint8_t)c;
1853 default:
1854 /* will never occur */
1855 break;
1856 }
1857 targetCapacity-=length;
1858
1859 /* normal end of conversion: prepare for a new character */
1860 c=0;
1861 goto loop;
1862 } else {
1863 uint8_t *p;
1864
1865 /*
1866 * We actually do this backwards here:
1867 * In order to save an intermediate variable, we output
1868 * first to the overflow buffer what does not fit into the
1869 * regular target.
1870 */
1871 /* we know that 1<=targetCapacity<length<=4 */
1872 length-=targetCapacity;
1873 p=(uint8_t *)cnv->charErrorBuffer;
1874 switch(length) {
1875 /* each branch falls through to the next one */
1876 case 3:
1877 *p++=(uint8_t)(c>>16);
1878 case 2:
1879 *p++=(uint8_t)(c>>8);
1880 case 1:
1881 *p=(uint8_t)c;
1882 default:
1883 /* will never occur */
1884 break;
1885 }
1886 cnv->charErrorBufferLength=(int8_t)length;
1887
1888 /* now output what fits into the regular target */
1889 c>>=8*length; /* length was reduced by targetCapacity */
1890 switch(targetCapacity) {
1891 /* each branch falls through to the next one */
1892 case 3:
1893 *target++=(uint8_t)(c>>16);
1894 case 2:
1895 *target++=(uint8_t)(c>>8);
1896 case 1:
1897 *target++=(uint8_t)c;
1898 default:
1899 /* will never occur */
1900 break;
1901 }
1902
1903 /* target overflow */
1904 targetCapacity=0;
1905 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1906 c=0;
1907 goto endloop;
1908 }
1909 }
1910
1911 /* miscellaneous ------------------------------------------------------------ */
1912
1913 static const char *
1914 _SCSUGetName(const UConverter *cnv) {
1915 SCSUData *scsu=(SCSUData *)cnv->extraInfo;
1916
1917 switch(scsu->locale) {
1918 case l_ja:
1919 return "SCSU,locale=ja";
1920 default:
1921 return "SCSU";
1922 }
1923 }
1924
1925 static void
1926 _SCSUWriteSub(UConverterFromUnicodeArgs *pArgs,
1927 int32_t offsetIndex,
1928 UErrorCode *pErrorCode) {
1929 static const char squ_fffd[]={ (char)SQU, (char)0xffu, (char)0xfdu };
1930
1931 /*
1932 * The substitution character is U+fffd={ ff, fd }.
1933 * If the SCSU converter is in Unicode mode, then these two bytes just need to
1934 * be written. Otherwise, this character is quoted.
1935 */
1936 if(((SCSUData *)pArgs->converter->extraInfo)->fromUIsSingleByteMode) {
1937 /* single-byte mode: quote Unicode */
1938 ucnv_cbFromUWriteBytes(pArgs,
1939 squ_fffd, 3,
1940 offsetIndex, pErrorCode);
1941 } else {
1942 /* Unicode mode: just write U+fffd */
1943 ucnv_cbFromUWriteBytes(pArgs,
1944 squ_fffd+1, 2,
1945 offsetIndex, pErrorCode);
1946 }
1947 }
1948
1949 /* structure for SafeClone calculations */
1950 struct cloneSCSUStruct
1951 {
1952 UConverter cnv;
1953 SCSUData mydata;
1954 };
1955
1956 static UConverter *
1957 _SCSUSafeClone(const UConverter *cnv,
1958 void *stackBuffer,
1959 int32_t *pBufferSize,
1960 UErrorCode *status)
1961 {
1962 struct cloneSCSUStruct * localClone;
1963 int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
1964
1965 if (U_FAILURE(*status)){
1966 return 0;
1967 }
1968
1969 if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
1970 *pBufferSize = bufferSizeNeeded;
1971 return 0;
1972 }
1973
1974 localClone = (struct cloneSCSUStruct *)stackBuffer;
1975 /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
1976
1977 uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
1978 localClone->cnv.extraInfo = &localClone->mydata;
1979 localClone->cnv.isExtraLocal = TRUE;
1980
1981 return &localClone->cnv;
1982 }
1983
1984
1985 static const UConverterImpl _SCSUImpl={
1986 UCNV_SCSU,
1987
1988 NULL,
1989 NULL,
1990
1991 _SCSUOpen,
1992 _SCSUClose,
1993 _SCSUReset,
1994
1995 _SCSUToUnicode,
1996 _SCSUToUnicodeWithOffsets,
1997 _SCSUFromUnicode,
1998 _SCSUFromUnicodeWithOffsets,
1999 NULL,
2000
2001 NULL,
2002 _SCSUGetName,
2003 _SCSUWriteSub,
2004 _SCSUSafeClone,
2005 ucnv_getCompleteUnicodeSet
2006 };
2007
2008 static const UConverterStaticData _SCSUStaticData={
2009 sizeof(UConverterStaticData),
2010 "SCSU",
2011 0, /* CCSID for SCSU */
2012 UCNV_IBM, UCNV_SCSU,
2013 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */
2014 /*
2015 * ### TODO the subchar really must be written by an SCSU function
2016 * however, currently SCSU's fromUnicode() never causes errors, therefore
2017 * no callbacks will be called and no subchars written
2018 * See Jitterbug 2837 - RFE: forbid converting surrogate code points in all charsets
2019 */
2020 { 0x0e, 0xff, 0xfd, 0 }, 3,
2021 FALSE, FALSE,
2022 0,
2023 0,
2024 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
2025 };
2026
2027 const UConverterSharedData _SCSUData={
2028 sizeof(UConverterSharedData), ~((uint32_t)0),
2029 NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl,
2030 0
2031 };
2032
2033 #endif