2 **********************************************************************
3 * Copyright (C) 2010-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 * tab size: 8 (not used)
11 * created on: 2010Dec09
12 * created by: Michael Ow
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
19 #include "unicode/ucnv.h"
20 #include "unicode/uset.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
23 #include "unicode/utf16.h"
35 COMPOUND_TEXT_SINGLE_0
= 0,
36 COMPOUND_TEXT_SINGLE_1
= 1,
37 COMPOUND_TEXT_SINGLE_2
= 2,
38 COMPOUND_TEXT_SINGLE_3
= 3,
40 COMPOUND_TEXT_DOUBLE_1
= 4,
41 COMPOUND_TEXT_DOUBLE_2
= 5,
42 COMPOUND_TEXT_DOUBLE_3
= 6,
43 COMPOUND_TEXT_DOUBLE_4
= 7,
44 COMPOUND_TEXT_DOUBLE_5
= 8,
45 COMPOUND_TEXT_DOUBLE_6
= 9,
46 COMPOUND_TEXT_DOUBLE_7
= 10,
48 COMPOUND_TEXT_TRIPLE_DOUBLE
= 11,
58 NUM_OF_CONVERTERS
= 20
59 } COMPOUND_TEXT_CONVERTERS
;
61 #define SEARCH_LENGTH 12
63 static const uint8_t escSeqCompoundText
[NUM_OF_CONVERTERS
][5] = {
65 { 0x1B, 0x2D, 0x41, 0, 0 },
66 { 0x1B, 0x2D, 0x4D, 0, 0 },
67 { 0x1B, 0x2D, 0x46, 0, 0 },
68 { 0x1B, 0x2D, 0x47, 0, 0 },
71 { 0x1B, 0x24, 0x29, 0x41, 0 },
72 { 0x1B, 0x24, 0x29, 0x42, 0 },
73 { 0x1B, 0x24, 0x29, 0x43, 0 },
74 { 0x1B, 0x24, 0x29, 0x44, 0 },
75 { 0x1B, 0x24, 0x29, 0x47, 0 },
76 { 0x1B, 0x24, 0x29, 0x48, 0 },
77 { 0x1B, 0x24, 0x29, 0x49, 0 },
80 { 0x1B, 0x25, 0x47, 0, 0 },
83 { 0x1B, 0x2D, 0x4C, 0, 0 },
85 { 0x1B, 0x2D, 0x48, 0, 0 },
87 { 0x1B, 0x2D, 0x44, 0, 0 },
89 { 0x1B, 0x2D, 0x54, 0, 0 },
91 { 0x1B, 0x2D, 0x42, 0, 0 },
93 { 0x1B, 0x2D, 0x43, 0, 0 },
95 { 0x1B, 0x2D, 0x5F, 0, 0 },
97 { 0x1B, 0x2D, 0x62, 0, 0 },
100 #define ESC_START 0x1B
102 #define isASCIIRange(codepoint) \
103 ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \
104 (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF))
106 #define isIBM915(codepoint) \
107 ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116))
109 #define isIBM916(codepoint) \
110 ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E))
112 #define isCompoundS3(codepoint) \
113 ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \
114 (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \
115 (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE))
117 #define isCompoundS2(codepoint) \
118 ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015))
120 #define isIBM914(codepoint) \
121 ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \
122 (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \
123 (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \
124 (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \
125 (codepoint == 0x0172) || (codepoint == 0x0173))
127 #define isIBM874(codepoint) \
128 ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B))
130 #define isIBM912(codepoint) \
131 ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \
132 (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \
133 (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \
134 (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \
135 (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \
136 (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD))
138 #define isIBM913(codepoint) \
139 ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \
140 (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \
141 (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \
142 (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D))
144 #define isCompoundS1(codepoint) \
145 ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \
146 (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B))
148 #define isISO8859_14(codepoint) \
149 ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \
150 (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \
151 (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \
152 (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \
153 (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \
154 (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85))
156 #define isIBM923(codepoint) \
157 ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC))
161 UConverterSharedData
*myConverterArray
[NUM_OF_CONVERTERS
];
162 COMPOUND_TEXT_CONVERTERS state
;
163 } UConverterDataCompoundText
;
165 /*********** Compound Text Converter Protos ***********/
167 _CompoundTextOpen(UConverter
*cnv
, UConverterLoadArgs
*pArgs
, UErrorCode
*errorCode
);
170 _CompoundTextClose(UConverter
*converter
);
173 _CompoundTextReset(UConverter
*converter
, UConverterResetChoice choice
);
176 _CompoundTextgetName(const UConverter
* cnv
);
179 static int32_t findNextEsc(const char *source
, const char *sourceLimit
) {
180 int32_t length
= sourceLimit
- source
;
182 for (i
= 1; i
< length
; i
++) {
183 if (*(source
+ i
) == 0x1B) {
191 static COMPOUND_TEXT_CONVERTERS
getState(int codepoint
) {
192 COMPOUND_TEXT_CONVERTERS state
= DO_SEARCH
;
194 if (isASCIIRange(codepoint
)) {
195 state
= COMPOUND_TEXT_SINGLE_0
;
196 } else if (isIBM912(codepoint
)) {
198 }else if (isIBM913(codepoint
)) {
200 } else if (isISO8859_14(codepoint
)) {
202 } else if (isIBM923(codepoint
)) {
204 } else if (isIBM874(codepoint
)) {
206 } else if (isIBM914(codepoint
)) {
208 } else if (isCompoundS2(codepoint
)) {
209 state
= COMPOUND_TEXT_SINGLE_2
;
210 } else if (isCompoundS3(codepoint
)) {
211 state
= COMPOUND_TEXT_SINGLE_3
;
212 } else if (isIBM916(codepoint
)) {
214 } else if (isIBM915(codepoint
)) {
216 } else if (isCompoundS1(codepoint
)) {
217 state
= COMPOUND_TEXT_SINGLE_1
;
223 static COMPOUND_TEXT_CONVERTERS
findStateFromEscSeq(const char* source
, const char* sourceLimit
, const uint8_t* toUBytesBuffer
, int32_t toUBytesBufferLength
, UErrorCode
*err
) {
224 COMPOUND_TEXT_CONVERTERS state
= INVALID
;
225 UBool matchFound
= FALSE
;
226 int32_t i
, n
, offset
= toUBytesBufferLength
;
228 for (i
= 0; i
< NUM_OF_CONVERTERS
; i
++) {
230 for (n
= 0; escSeqCompoundText
[i
][n
] != 0; n
++) {
231 if (n
< toUBytesBufferLength
) {
232 if (toUBytesBuffer
[n
] != escSeqCompoundText
[i
][n
]) {
236 } else if ((source
+ (n
- offset
)) >= sourceLimit
) {
237 *err
= U_TRUNCATED_CHAR_FOUND
;
240 } else if (*(source
+ (n
- offset
)) != escSeqCompoundText
[i
][n
]) {
252 state
= (COMPOUND_TEXT_CONVERTERS
)i
;
259 _CompoundTextOpen(UConverter
*cnv
, UConverterLoadArgs
*pArgs
, UErrorCode
*errorCode
){
260 cnv
->extraInfo
= uprv_malloc (sizeof (UConverterDataCompoundText
));
261 if (cnv
->extraInfo
!= NULL
) {
262 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*) cnv
->extraInfo
;
264 UConverterNamePieces stackPieces
;
265 UConverterLoadArgs stackArgs
={ (int32_t)sizeof(UConverterLoadArgs
) };
267 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_0
] = NULL
;
268 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_1
] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces
, &stackArgs
, errorCode
);
269 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_2
] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces
, &stackArgs
, errorCode
);
270 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_3
] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces
, &stackArgs
, errorCode
);
271 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_1
] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces
, &stackArgs
, errorCode
);
272 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_2
] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces
, &stackArgs
, errorCode
);
273 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_3
] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces
, &stackArgs
, errorCode
);
274 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_4
] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces
, &stackArgs
, errorCode
);
275 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_5
] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces
, &stackArgs
, errorCode
);
276 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_6
] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces
, &stackArgs
, errorCode
);
277 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_7
] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces
, &stackArgs
, errorCode
);
278 myConverterData
->myConverterArray
[COMPOUND_TEXT_TRIPLE_DOUBLE
] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces
, &stackArgs
, errorCode
);
280 myConverterData
->myConverterArray
[IBM_915
] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
281 myConverterData
->myConverterArray
[IBM_916
] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
282 myConverterData
->myConverterArray
[IBM_914
] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
283 myConverterData
->myConverterArray
[IBM_874
] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
284 myConverterData
->myConverterArray
[IBM_912
] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
285 myConverterData
->myConverterArray
[IBM_913
] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces
, &stackArgs
, errorCode
);
286 myConverterData
->myConverterArray
[ISO_8859_14
] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces
, &stackArgs
, errorCode
);
287 myConverterData
->myConverterArray
[IBM_923
] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces
, &stackArgs
, errorCode
);
289 if (U_FAILURE(*errorCode
) || pArgs
->onlyTestIsLoadable
) {
290 _CompoundTextClose(cnv
);
294 myConverterData
->state
= (COMPOUND_TEXT_CONVERTERS
)0;
296 *errorCode
= U_MEMORY_ALLOCATION_ERROR
;
302 _CompoundTextClose(UConverter
*converter
) {
303 UConverterDataCompoundText
* myConverterData
= (UConverterDataCompoundText
*)(converter
->extraInfo
);
306 if (converter
->extraInfo
!= NULL
) {
307 /*close the array of converter pointers and free the memory*/
308 for (i
= 0; i
< NUM_OF_CONVERTERS
; i
++) {
309 if (myConverterData
->myConverterArray
[i
] != NULL
) {
310 ucnv_unloadSharedDataIfReady(myConverterData
->myConverterArray
[i
]);
314 uprv_free(converter
->extraInfo
);
319 _CompoundTextReset(UConverter
*converter
, UConverterResetChoice choice
) {
323 _CompoundTextgetName(const UConverter
* cnv
){
324 return "x11-compound-text";
328 UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs
* args
, UErrorCode
* err
){
329 UConverter
*cnv
= args
->converter
;
330 uint8_t *target
= (uint8_t *) args
->target
;
331 const uint8_t *targetLimit
= (const uint8_t *) args
->targetLimit
;
332 const UChar
* source
= args
->source
;
333 const UChar
* sourceLimit
= args
->sourceLimit
;
334 /* int32_t* offsets = args->offsets; */
336 UBool useFallback
= cnv
->useFallback
;
337 uint8_t tmpTargetBuffer
[7];
338 int32_t tmpTargetBufferLength
= 0;
339 COMPOUND_TEXT_CONVERTERS currentState
, tmpState
;
341 int32_t pValueLength
= 0;
344 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*) cnv
->extraInfo
;
346 currentState
= myConverterData
->state
;
348 /* check if the last codepoint of previous buffer was a lead surrogate*/
349 if((sourceChar
= cnv
->fromUChar32
)!=0 && target
< targetLimit
) {
353 while( source
< sourceLimit
){
354 if(target
< targetLimit
){
356 sourceChar
= *(source
++);
357 /*check if the char is a First surrogate*/
358 if(U16_IS_SURROGATE(sourceChar
)) {
359 if(U16_IS_SURROGATE_LEAD(sourceChar
)) {
361 /*look ahead to find the trail surrogate*/
362 if(source
< sourceLimit
) {
363 /* test the following code unit */
364 UChar trail
=(UChar
) *source
;
365 if(U16_IS_TRAIL(trail
)) {
367 sourceChar
=U16_GET_SUPPLEMENTARY(sourceChar
, trail
);
368 cnv
->fromUChar32
=0x00;
369 /* convert this supplementary code point */
370 /* exit this condition tree */
372 /* this is an unmatched lead code unit (1st surrogate) */
373 /* callback(illegal) */
374 *err
=U_ILLEGAL_CHAR_FOUND
;
375 cnv
->fromUChar32
=sourceChar
;
380 cnv
->fromUChar32
=sourceChar
;
384 /* this is an unmatched trail code unit (2nd surrogate) */
385 /* callback(illegal) */
386 *err
=U_ILLEGAL_CHAR_FOUND
;
387 cnv
->fromUChar32
=sourceChar
;
392 tmpTargetBufferLength
= 0;
393 tmpState
= getState(sourceChar
);
395 if (tmpState
!= DO_SEARCH
&& currentState
!= tmpState
) {
396 /* Get escape sequence if necessary */
397 currentState
= tmpState
;
398 for (i
= 0; escSeqCompoundText
[currentState
][i
] != 0; i
++) {
399 tmpTargetBuffer
[tmpTargetBufferLength
++] = escSeqCompoundText
[currentState
][i
];
403 if (tmpState
== DO_SEARCH
) {
404 /* Test all available converters */
405 for (i
= 1; i
< SEARCH_LENGTH
; i
++) {
406 pValueLength
= ucnv_MBCSFromUChar32(myConverterData
->myConverterArray
[i
], sourceChar
, &pValue
, useFallback
);
407 if (pValueLength
> 0) {
408 tmpState
= (COMPOUND_TEXT_CONVERTERS
)i
;
409 if (currentState
!= tmpState
) {
410 currentState
= tmpState
;
411 for (j
= 0; escSeqCompoundText
[currentState
][j
] != 0; j
++) {
412 tmpTargetBuffer
[tmpTargetBufferLength
++] = escSeqCompoundText
[currentState
][j
];
415 for (n
= (pValueLength
- 1); n
>= 0; n
--) {
416 tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)(pValue
>> (n
* 8));
421 } else if (tmpState
== COMPOUND_TEXT_SINGLE_0
) {
422 tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)sourceChar
;
424 pValueLength
= ucnv_MBCSFromUChar32(myConverterData
->myConverterArray
[currentState
], sourceChar
, &pValue
, useFallback
);
425 if (pValueLength
> 0) {
426 for (n
= (pValueLength
- 1); n
>= 0; n
--) {
427 tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)(pValue
>> (n
* 8));
432 for (i
= 0; i
< tmpTargetBufferLength
; i
++) {
433 if (target
< targetLimit
) {
434 *target
++ = tmpTargetBuffer
[i
];
436 *err
= U_BUFFER_OVERFLOW_ERROR
;
441 if (*err
== U_BUFFER_OVERFLOW_ERROR
) {
442 for (; i
< tmpTargetBufferLength
; i
++) {
443 args
->converter
->charErrorBuffer
[args
->converter
->charErrorBufferLength
++] = tmpTargetBuffer
[i
];
447 *err
= U_BUFFER_OVERFLOW_ERROR
;
452 /*save the state and return */
453 myConverterData
->state
= currentState
;
454 args
->source
= source
;
455 args
->target
= (char*)target
;
460 UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs
*args
,
462 const char *mySource
= (char *) args
->source
;
463 UChar
*myTarget
= args
->target
;
464 const char *mySourceLimit
= args
->sourceLimit
;
465 const char *tmpSourceLimit
= mySourceLimit
;
466 uint32_t mySourceChar
= 0x0000;
467 COMPOUND_TEXT_CONVERTERS currentState
, tmpState
;
468 int32_t sourceOffset
= 0;
469 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*) args
->converter
->extraInfo
;
470 UConverterSharedData
* savedSharedData
= NULL
;
472 UConverterToUnicodeArgs subArgs
;
475 /* set up the subconverter arguments */
476 if(args
->size
<sizeof(UConverterToUnicodeArgs
)) {
477 minArgsSize
= args
->size
;
479 minArgsSize
= (int32_t)sizeof(UConverterToUnicodeArgs
);
482 uprv_memcpy(&subArgs
, args
, minArgsSize
);
483 subArgs
.size
= (uint16_t)minArgsSize
;
485 currentState
= tmpState
= myConverterData
->state
;
487 while(mySource
< mySourceLimit
){
488 if(myTarget
< args
->targetLimit
){
489 if (args
->converter
->toULength
> 0) {
490 mySourceChar
= args
->converter
->toUBytes
[0];
492 mySourceChar
= (uint8_t)*mySource
;
495 if (mySourceChar
== ESC_START
) {
496 tmpState
= findStateFromEscSeq(mySource
, mySourceLimit
, args
->converter
->toUBytes
, args
->converter
->toULength
, err
);
498 if (*err
== U_TRUNCATED_CHAR_FOUND
) {
499 for (; mySource
< mySourceLimit
;) {
500 args
->converter
->toUBytes
[args
->converter
->toULength
++] = *mySource
++;
504 } else if (tmpState
== INVALID
) {
505 if (args
->converter
->toULength
== 0) {
506 mySource
++; /* skip over the 0x1b byte */
508 *err
= U_ILLEGAL_CHAR_FOUND
;
512 if (tmpState
!= currentState
) {
513 currentState
= tmpState
;
516 sourceOffset
= uprv_strlen((char*)escSeqCompoundText
[currentState
]) - args
->converter
->toULength
;
518 mySource
+= sourceOffset
;
520 args
->converter
->toULength
= 0;
523 if (currentState
== COMPOUND_TEXT_SINGLE_0
) {
524 while (mySource
< mySourceLimit
) {
525 if (*mySource
== ESC_START
) {
528 if (myTarget
< args
->targetLimit
) {
529 *myTarget
++ = 0x00ff&(*mySource
++);
531 *err
= U_BUFFER_OVERFLOW_ERROR
;
535 } else if (mySource
< mySourceLimit
){
536 sourceOffset
= findNextEsc(mySource
, mySourceLimit
);
538 tmpSourceLimit
= mySource
+ sourceOffset
;
540 subArgs
.source
= mySource
;
541 subArgs
.sourceLimit
= tmpSourceLimit
;
542 subArgs
.target
= myTarget
;
543 savedSharedData
= subArgs
.converter
->sharedData
;
544 subArgs
.converter
->sharedData
= myConverterData
->myConverterArray
[currentState
];
546 ucnv_MBCSToUnicodeWithOffsets(&subArgs
, err
);
548 subArgs
.converter
->sharedData
= savedSharedData
;
550 mySource
= subArgs
.source
;
551 myTarget
= subArgs
.target
;
553 if (U_FAILURE(*err
)) {
554 if(*err
== U_BUFFER_OVERFLOW_ERROR
) {
555 if(subArgs
.converter
->UCharErrorBufferLength
> 0) {
556 uprv_memcpy(args
->converter
->UCharErrorBuffer
, subArgs
.converter
->UCharErrorBuffer
,
557 subArgs
.converter
->UCharErrorBufferLength
);
559 args
->converter
->UCharErrorBufferLength
=subArgs
.converter
->UCharErrorBufferLength
;
560 subArgs
.converter
->UCharErrorBufferLength
= 0;
566 *err
= U_BUFFER_OVERFLOW_ERROR
;
570 myConverterData
->state
= currentState
;
571 args
->target
= myTarget
;
572 args
->source
= mySource
;
576 _CompoundText_GetUnicodeSet(const UConverter
*cnv
,
578 UConverterUnicodeSet which
,
579 UErrorCode
*pErrorCode
) {
580 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*)cnv
->extraInfo
;
583 for (i
= 1; i
< NUM_OF_CONVERTERS
; i
++) {
584 ucnv_MBCSGetUnicodeSetForUnicode(myConverterData
->myConverterArray
[i
], sa
, which
, pErrorCode
);
586 sa
->add(sa
->set
, 0x0000);
587 sa
->add(sa
->set
, 0x0009);
588 sa
->add(sa
->set
, 0x000A);
589 sa
->addRange(sa
->set
, 0x0020, 0x007F);
590 sa
->addRange(sa
->set
, 0x00A0, 0x00FF);
593 static const UConverterImpl _CompoundTextImpl
= {
604 UConverter_toUnicode_CompoundText_OFFSETS
,
605 UConverter_toUnicode_CompoundText_OFFSETS
,
606 UConverter_fromUnicode_CompoundText_OFFSETS
,
607 UConverter_fromUnicode_CompoundText_OFFSETS
,
611 _CompoundTextgetName
,
614 _CompoundText_GetUnicodeSet
616 static const UConverterStaticData _CompoundTextStaticData
= {
617 sizeof(UConverterStaticData
),
630 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
632 const UConverterSharedData _CompoundTextData
=
633 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData
, &_CompoundTextImpl
);
635 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */