2 **********************************************************************
3 * Copyright (C) 2010-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
8 * tab size: 8 (not used)
11 * created on: 2010Dec09
12 * created by: Michael Ow
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
19 #include "unicode/ucnv.h"
20 #include "unicode/uset.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
23 #include "unicode/utf16.h"
31 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
37 COMPOUND_TEXT_SINGLE_0
= 0,
38 COMPOUND_TEXT_SINGLE_1
= 1,
39 COMPOUND_TEXT_SINGLE_2
= 2,
40 COMPOUND_TEXT_SINGLE_3
= 3,
42 COMPOUND_TEXT_DOUBLE_1
= 4,
43 COMPOUND_TEXT_DOUBLE_2
= 5,
44 COMPOUND_TEXT_DOUBLE_3
= 6,
45 COMPOUND_TEXT_DOUBLE_4
= 7,
46 COMPOUND_TEXT_DOUBLE_5
= 8,
47 COMPOUND_TEXT_DOUBLE_6
= 9,
48 COMPOUND_TEXT_DOUBLE_7
= 10,
50 COMPOUND_TEXT_TRIPLE_DOUBLE
= 11,
60 NUM_OF_CONVERTERS
= 20
61 } COMPOUND_TEXT_CONVERTERS
;
63 #define SEARCH_LENGTH 12
65 static const uint8_t escSeqCompoundText
[NUM_OF_CONVERTERS
][5] = {
67 { 0x1B, 0x2D, 0x41, 0, 0 },
68 { 0x1B, 0x2D, 0x4D, 0, 0 },
69 { 0x1B, 0x2D, 0x46, 0, 0 },
70 { 0x1B, 0x2D, 0x47, 0, 0 },
73 { 0x1B, 0x24, 0x29, 0x41, 0 },
74 { 0x1B, 0x24, 0x29, 0x42, 0 },
75 { 0x1B, 0x24, 0x29, 0x43, 0 },
76 { 0x1B, 0x24, 0x29, 0x44, 0 },
77 { 0x1B, 0x24, 0x29, 0x47, 0 },
78 { 0x1B, 0x24, 0x29, 0x48, 0 },
79 { 0x1B, 0x24, 0x29, 0x49, 0 },
82 { 0x1B, 0x25, 0x47, 0, 0 },
85 { 0x1B, 0x2D, 0x4C, 0, 0 },
87 { 0x1B, 0x2D, 0x48, 0, 0 },
89 { 0x1B, 0x2D, 0x44, 0, 0 },
91 { 0x1B, 0x2D, 0x54, 0, 0 },
93 { 0x1B, 0x2D, 0x42, 0, 0 },
95 { 0x1B, 0x2D, 0x43, 0, 0 },
97 { 0x1B, 0x2D, 0x5F, 0, 0 },
99 { 0x1B, 0x2D, 0x62, 0, 0 },
102 #define ESC_START 0x1B
104 #define isASCIIRange(codepoint) \
105 ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \
106 (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF))
108 #define isIBM915(codepoint) \
109 ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116))
111 #define isIBM916(codepoint) \
112 ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E))
114 #define isCompoundS3(codepoint) \
115 ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \
116 (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \
117 (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE))
119 #define isCompoundS2(codepoint) \
120 ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015))
122 #define isIBM914(codepoint) \
123 ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \
124 (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \
125 (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \
126 (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \
127 (codepoint == 0x0172) || (codepoint == 0x0173))
129 #define isIBM874(codepoint) \
130 ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B))
132 #define isIBM912(codepoint) \
133 ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \
134 (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \
135 (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \
136 (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \
137 (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \
138 (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD))
140 #define isIBM913(codepoint) \
141 ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \
142 (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \
143 (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \
144 (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D))
146 #define isCompoundS1(codepoint) \
147 ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \
148 (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B))
150 #define isISO8859_14(codepoint) \
151 ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \
152 (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \
153 (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \
154 (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \
155 (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \
156 (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85))
158 #define isIBM923(codepoint) \
159 ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC))
163 UConverterSharedData
*myConverterArray
[NUM_OF_CONVERTERS
];
164 COMPOUND_TEXT_CONVERTERS state
;
165 } UConverterDataCompoundText
;
167 /*********** Compound Text Converter Protos ***********/
169 _CompoundTextOpen(UConverter
*cnv
, UConverterLoadArgs
*pArgs
, UErrorCode
*errorCode
);
172 _CompoundTextClose(UConverter
*converter
);
175 _CompoundTextReset(UConverter
*converter
, UConverterResetChoice choice
);
178 _CompoundTextgetName(const UConverter
* cnv
);
181 static int32_t findNextEsc(const char *source
, const char *sourceLimit
) {
182 int32_t length
= sourceLimit
- source
;
184 for (i
= 1; i
< length
; i
++) {
185 if (*(source
+ i
) == 0x1B) {
193 static COMPOUND_TEXT_CONVERTERS
getState(int codepoint
) {
194 COMPOUND_TEXT_CONVERTERS state
= DO_SEARCH
;
196 if (isASCIIRange(codepoint
)) {
197 state
= COMPOUND_TEXT_SINGLE_0
;
198 } else if (isIBM912(codepoint
)) {
200 }else if (isIBM913(codepoint
)) {
202 } else if (isISO8859_14(codepoint
)) {
204 } else if (isIBM923(codepoint
)) {
206 } else if (isIBM874(codepoint
)) {
208 } else if (isIBM914(codepoint
)) {
210 } else if (isCompoundS2(codepoint
)) {
211 state
= COMPOUND_TEXT_SINGLE_2
;
212 } else if (isCompoundS3(codepoint
)) {
213 state
= COMPOUND_TEXT_SINGLE_3
;
214 } else if (isIBM916(codepoint
)) {
216 } else if (isIBM915(codepoint
)) {
218 } else if (isCompoundS1(codepoint
)) {
219 state
= COMPOUND_TEXT_SINGLE_1
;
225 static COMPOUND_TEXT_CONVERTERS
findStateFromEscSeq(const char* source
, const char* sourceLimit
, const uint8_t* toUBytesBuffer
, int32_t toUBytesBufferLength
, UErrorCode
*err
) {
226 COMPOUND_TEXT_CONVERTERS state
= INVALID
;
227 UBool matchFound
= FALSE
;
228 int32_t i
, n
, offset
= toUBytesBufferLength
;
230 for (i
= 0; i
< NUM_OF_CONVERTERS
; i
++) {
232 for (n
= 0; escSeqCompoundText
[i
][n
] != 0; n
++) {
233 if (n
< toUBytesBufferLength
) {
234 if (toUBytesBuffer
[n
] != escSeqCompoundText
[i
][n
]) {
238 } else if ((source
+ (n
- offset
)) >= sourceLimit
) {
239 *err
= U_TRUNCATED_CHAR_FOUND
;
242 } else if (*(source
+ (n
- offset
)) != escSeqCompoundText
[i
][n
]) {
254 state
= (COMPOUND_TEXT_CONVERTERS
)i
;
261 _CompoundTextOpen(UConverter
*cnv
, UConverterLoadArgs
*pArgs
, UErrorCode
*errorCode
){
262 cnv
->extraInfo
= uprv_malloc (sizeof (UConverterDataCompoundText
));
263 if (cnv
->extraInfo
!= NULL
) {
264 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*) cnv
->extraInfo
;
266 UConverterNamePieces stackPieces
;
267 UConverterLoadArgs stackArgs
={ (int32_t)sizeof(UConverterLoadArgs
) };
269 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_0
] = NULL
;
270 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_1
] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces
, &stackArgs
, errorCode
);
271 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_2
] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces
, &stackArgs
, errorCode
);
272 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_3
] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces
, &stackArgs
, errorCode
);
273 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_1
] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces
, &stackArgs
, errorCode
);
274 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_2
] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces
, &stackArgs
, errorCode
);
275 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_3
] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces
, &stackArgs
, errorCode
);
276 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_4
] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces
, &stackArgs
, errorCode
);
277 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_5
] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces
, &stackArgs
, errorCode
);
278 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_6
] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces
, &stackArgs
, errorCode
);
279 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_7
] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces
, &stackArgs
, errorCode
);
280 myConverterData
->myConverterArray
[COMPOUND_TEXT_TRIPLE_DOUBLE
] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces
, &stackArgs
, errorCode
);
282 myConverterData
->myConverterArray
[IBM_915
] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
283 myConverterData
->myConverterArray
[IBM_916
] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
284 myConverterData
->myConverterArray
[IBM_914
] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
285 myConverterData
->myConverterArray
[IBM_874
] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
286 myConverterData
->myConverterArray
[IBM_912
] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
287 myConverterData
->myConverterArray
[IBM_913
] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces
, &stackArgs
, errorCode
);
288 myConverterData
->myConverterArray
[ISO_8859_14
] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces
, &stackArgs
, errorCode
);
289 myConverterData
->myConverterArray
[IBM_923
] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces
, &stackArgs
, errorCode
);
291 if (U_FAILURE(*errorCode
) || pArgs
->onlyTestIsLoadable
) {
292 _CompoundTextClose(cnv
);
296 myConverterData
->state
= 0;
298 *errorCode
= U_MEMORY_ALLOCATION_ERROR
;
304 _CompoundTextClose(UConverter
*converter
) {
305 UConverterDataCompoundText
* myConverterData
= (UConverterDataCompoundText
*)(converter
->extraInfo
);
308 if (converter
->extraInfo
!= NULL
) {
309 /*close the array of converter pointers and free the memory*/
310 for (i
= 0; i
< NUM_OF_CONVERTERS
; i
++) {
311 if (myConverterData
->myConverterArray
[i
] != NULL
) {
312 ucnv_unloadSharedDataIfReady(myConverterData
->myConverterArray
[i
]);
316 uprv_free(converter
->extraInfo
);
321 _CompoundTextReset(UConverter
*converter
, UConverterResetChoice choice
) {
325 _CompoundTextgetName(const UConverter
* cnv
){
326 return "x11-compound-text";
330 UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs
* args
, UErrorCode
* err
){
331 UConverter
*cnv
= args
->converter
;
332 uint8_t *target
= (uint8_t *) args
->target
;
333 const uint8_t *targetLimit
= (const uint8_t *) args
->targetLimit
;
334 const UChar
* source
= args
->source
;
335 const UChar
* sourceLimit
= args
->sourceLimit
;
336 /* int32_t* offsets = args->offsets; */
338 UBool useFallback
= cnv
->useFallback
;
339 uint8_t tmpTargetBuffer
[7];
340 int32_t tmpTargetBufferLength
= 0;
341 COMPOUND_TEXT_CONVERTERS currentState
, tmpState
;
343 int32_t pValueLength
= 0;
346 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*) cnv
->extraInfo
;
348 currentState
= myConverterData
->state
;
350 /* check if the last codepoint of previous buffer was a lead surrogate*/
351 if((sourceChar
= cnv
->fromUChar32
)!=0 && target
< targetLimit
) {
355 while( source
< sourceLimit
){
356 if(target
< targetLimit
){
358 sourceChar
= *(source
++);
359 /*check if the char is a First surrogate*/
360 if(U16_IS_SURROGATE(sourceChar
)) {
361 if(U16_IS_SURROGATE_LEAD(sourceChar
)) {
363 /*look ahead to find the trail surrogate*/
364 if(source
< sourceLimit
) {
365 /* test the following code unit */
366 UChar trail
=(UChar
) *source
;
367 if(U16_IS_TRAIL(trail
)) {
369 sourceChar
=U16_GET_SUPPLEMENTARY(sourceChar
, trail
);
370 cnv
->fromUChar32
=0x00;
371 /* convert this supplementary code point */
372 /* exit this condition tree */
374 /* this is an unmatched lead code unit (1st surrogate) */
375 /* callback(illegal) */
376 *err
=U_ILLEGAL_CHAR_FOUND
;
377 cnv
->fromUChar32
=sourceChar
;
382 cnv
->fromUChar32
=sourceChar
;
386 /* this is an unmatched trail code unit (2nd surrogate) */
387 /* callback(illegal) */
388 *err
=U_ILLEGAL_CHAR_FOUND
;
389 cnv
->fromUChar32
=sourceChar
;
394 tmpTargetBufferLength
= 0;
395 tmpState
= getState(sourceChar
);
397 if (tmpState
!= DO_SEARCH
&& currentState
!= tmpState
) {
398 /* Get escape sequence if necessary */
399 currentState
= tmpState
;
400 for (i
= 0; escSeqCompoundText
[currentState
][i
] != 0; i
++) {
401 tmpTargetBuffer
[tmpTargetBufferLength
++] = escSeqCompoundText
[currentState
][i
];
405 if (tmpState
== DO_SEARCH
) {
406 /* Test all available converters */
407 for (i
= 1; i
< SEARCH_LENGTH
; i
++) {
408 pValueLength
= ucnv_MBCSFromUChar32(myConverterData
->myConverterArray
[i
], sourceChar
, &pValue
, useFallback
);
409 if (pValueLength
> 0) {
410 tmpState
= (COMPOUND_TEXT_CONVERTERS
)i
;
411 if (currentState
!= tmpState
) {
412 currentState
= tmpState
;
413 for (j
= 0; escSeqCompoundText
[currentState
][j
] != 0; j
++) {
414 tmpTargetBuffer
[tmpTargetBufferLength
++] = escSeqCompoundText
[currentState
][j
];
417 for (n
= (pValueLength
- 1); n
>= 0; n
--) {
418 tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)(pValue
>> (n
* 8));
423 } else if (tmpState
== COMPOUND_TEXT_SINGLE_0
) {
424 tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)sourceChar
;
426 pValueLength
= ucnv_MBCSFromUChar32(myConverterData
->myConverterArray
[currentState
], sourceChar
, &pValue
, useFallback
);
427 if (pValueLength
> 0) {
428 for (n
= (pValueLength
- 1); n
>= 0; n
--) {
429 tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)(pValue
>> (n
* 8));
434 for (i
= 0; i
< tmpTargetBufferLength
; i
++) {
435 if (target
< targetLimit
) {
436 *target
++ = tmpTargetBuffer
[i
];
438 *err
= U_BUFFER_OVERFLOW_ERROR
;
443 if (*err
== U_BUFFER_OVERFLOW_ERROR
) {
444 for (; i
< tmpTargetBufferLength
; i
++) {
445 args
->converter
->charErrorBuffer
[args
->converter
->charErrorBufferLength
++] = tmpTargetBuffer
[i
];
449 *err
= U_BUFFER_OVERFLOW_ERROR
;
454 /*save the state and return */
455 myConverterData
->state
= currentState
;
456 args
->source
= source
;
457 args
->target
= (char*)target
;
462 UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs
*args
,
464 const char *mySource
= (char *) args
->source
;
465 UChar
*myTarget
= args
->target
;
466 const char *mySourceLimit
= args
->sourceLimit
;
467 const char *tmpSourceLimit
= mySourceLimit
;
468 uint32_t mySourceChar
= 0x0000;
469 COMPOUND_TEXT_CONVERTERS currentState
, tmpState
;
470 int32_t sourceOffset
= 0;
471 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*) args
->converter
->extraInfo
;
472 UConverterSharedData
* savedSharedData
= NULL
;
474 UConverterToUnicodeArgs subArgs
;
477 /* set up the subconverter arguments */
478 if(args
->size
<sizeof(UConverterToUnicodeArgs
)) {
479 minArgsSize
= args
->size
;
481 minArgsSize
= (int32_t)sizeof(UConverterToUnicodeArgs
);
484 uprv_memcpy(&subArgs
, args
, minArgsSize
);
485 subArgs
.size
= (uint16_t)minArgsSize
;
487 currentState
= tmpState
= myConverterData
->state
;
489 while(mySource
< mySourceLimit
){
490 if(myTarget
< args
->targetLimit
){
491 if (args
->converter
->toULength
> 0) {
492 mySourceChar
= args
->converter
->toUBytes
[0];
494 mySourceChar
= (uint8_t)*mySource
;
497 if (mySourceChar
== ESC_START
) {
498 tmpState
= findStateFromEscSeq(mySource
, mySourceLimit
, args
->converter
->toUBytes
, args
->converter
->toULength
, err
);
500 if (*err
== U_TRUNCATED_CHAR_FOUND
) {
501 for (; mySource
< mySourceLimit
;) {
502 args
->converter
->toUBytes
[args
->converter
->toULength
++] = *mySource
++;
506 } else if (tmpState
== INVALID
) {
507 if (args
->converter
->toULength
== 0) {
508 mySource
++; /* skip over the 0x1b byte */
510 *err
= U_ILLEGAL_CHAR_FOUND
;
514 if (tmpState
!= currentState
) {
515 currentState
= tmpState
;
518 sourceOffset
= uprv_strlen((char*)escSeqCompoundText
[currentState
]) - args
->converter
->toULength
;
520 mySource
+= sourceOffset
;
522 args
->converter
->toULength
= 0;
525 if (currentState
== COMPOUND_TEXT_SINGLE_0
) {
526 while (mySource
< mySourceLimit
) {
527 if (*mySource
== ESC_START
) {
530 if (myTarget
< args
->targetLimit
) {
531 *myTarget
++ = 0x00ff&(*mySource
++);
533 *err
= U_BUFFER_OVERFLOW_ERROR
;
537 } else if (mySource
< mySourceLimit
){
538 sourceOffset
= findNextEsc(mySource
, mySourceLimit
);
540 tmpSourceLimit
= mySource
+ sourceOffset
;
542 subArgs
.source
= mySource
;
543 subArgs
.sourceLimit
= tmpSourceLimit
;
544 subArgs
.target
= myTarget
;
545 savedSharedData
= subArgs
.converter
->sharedData
;
546 subArgs
.converter
->sharedData
= myConverterData
->myConverterArray
[currentState
];
548 ucnv_MBCSToUnicodeWithOffsets(&subArgs
, err
);
550 subArgs
.converter
->sharedData
= savedSharedData
;
552 mySource
= subArgs
.source
;
553 myTarget
= subArgs
.target
;
555 if (U_FAILURE(*err
)) {
556 if(*err
== U_BUFFER_OVERFLOW_ERROR
) {
557 if(subArgs
.converter
->UCharErrorBufferLength
> 0) {
558 uprv_memcpy(args
->converter
->UCharErrorBuffer
, subArgs
.converter
->UCharErrorBuffer
,
559 subArgs
.converter
->UCharErrorBufferLength
);
561 args
->converter
->UCharErrorBufferLength
=subArgs
.converter
->UCharErrorBufferLength
;
562 subArgs
.converter
->UCharErrorBufferLength
= 0;
568 *err
= U_BUFFER_OVERFLOW_ERROR
;
572 myConverterData
->state
= currentState
;
573 args
->target
= myTarget
;
574 args
->source
= mySource
;
578 _CompoundText_GetUnicodeSet(const UConverter
*cnv
,
580 UConverterUnicodeSet which
,
581 UErrorCode
*pErrorCode
) {
582 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*)cnv
->extraInfo
;
585 for (i
= 1; i
< NUM_OF_CONVERTERS
; i
++) {
586 ucnv_MBCSGetUnicodeSetForUnicode(myConverterData
->myConverterArray
[i
], sa
, which
, pErrorCode
);
588 sa
->add(sa
->set
, 0x0000);
589 sa
->add(sa
->set
, 0x0009);
590 sa
->add(sa
->set
, 0x000A);
591 sa
->addRange(sa
->set
, 0x0020, 0x007F);
592 sa
->addRange(sa
->set
, 0x00A0, 0x00FF);
595 static const UConverterImpl _CompoundTextImpl
= {
606 UConverter_toUnicode_CompoundText_OFFSETS
,
607 UConverter_toUnicode_CompoundText_OFFSETS
,
608 UConverter_fromUnicode_CompoundText_OFFSETS
,
609 UConverter_fromUnicode_CompoundText_OFFSETS
,
613 _CompoundTextgetName
,
616 _CompoundText_GetUnicodeSet
618 static const UConverterStaticData _CompoundTextStaticData
= {
619 sizeof(UConverterStaticData
),
632 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
634 const UConverterSharedData _CompoundTextData
= {
635 sizeof(UConverterSharedData
),
639 &_CompoundTextStaticData
,
645 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */