1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2010-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
10 * tab size: 8 (not used)
13 * created on: 2010Dec09
14 * created by: Michael Ow
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
21 #include "unicode/ucnv.h"
22 #include "unicode/uset.h"
23 #include "unicode/ucnv_err.h"
24 #include "unicode/ucnv_cb.h"
25 #include "unicode/utf16.h"
37 COMPOUND_TEXT_SINGLE_0
= 0,
38 COMPOUND_TEXT_SINGLE_1
= 1,
39 COMPOUND_TEXT_SINGLE_2
= 2,
40 COMPOUND_TEXT_SINGLE_3
= 3,
42 COMPOUND_TEXT_DOUBLE_1
= 4,
43 COMPOUND_TEXT_DOUBLE_2
= 5,
44 COMPOUND_TEXT_DOUBLE_3
= 6,
45 COMPOUND_TEXT_DOUBLE_4
= 7,
46 COMPOUND_TEXT_DOUBLE_5
= 8,
47 COMPOUND_TEXT_DOUBLE_6
= 9,
48 COMPOUND_TEXT_DOUBLE_7
= 10,
50 COMPOUND_TEXT_TRIPLE_DOUBLE
= 11,
60 NUM_OF_CONVERTERS
= 20
61 } COMPOUND_TEXT_CONVERTERS
;
63 #define SEARCH_LENGTH 12
65 static const uint8_t escSeqCompoundText
[NUM_OF_CONVERTERS
][5] = {
67 { 0x1B, 0x2D, 0x41, 0, 0 },
68 { 0x1B, 0x2D, 0x4D, 0, 0 },
69 { 0x1B, 0x2D, 0x46, 0, 0 },
70 { 0x1B, 0x2D, 0x47, 0, 0 },
73 { 0x1B, 0x24, 0x29, 0x41, 0 },
74 { 0x1B, 0x24, 0x29, 0x42, 0 },
75 { 0x1B, 0x24, 0x29, 0x43, 0 },
76 { 0x1B, 0x24, 0x29, 0x44, 0 },
77 { 0x1B, 0x24, 0x29, 0x47, 0 },
78 { 0x1B, 0x24, 0x29, 0x48, 0 },
79 { 0x1B, 0x24, 0x29, 0x49, 0 },
82 { 0x1B, 0x25, 0x47, 0, 0 },
85 { 0x1B, 0x2D, 0x4C, 0, 0 },
87 { 0x1B, 0x2D, 0x48, 0, 0 },
89 { 0x1B, 0x2D, 0x44, 0, 0 },
91 { 0x1B, 0x2D, 0x54, 0, 0 },
93 { 0x1B, 0x2D, 0x42, 0, 0 },
95 { 0x1B, 0x2D, 0x43, 0, 0 },
97 { 0x1B, 0x2D, 0x5F, 0, 0 },
99 { 0x1B, 0x2D, 0x62, 0, 0 },
102 #define ESC_START 0x1B
104 #define isASCIIRange(codepoint) \
105 ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \
106 (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF))
108 #define isIBM915(codepoint) \
109 ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116))
111 #define isIBM916(codepoint) \
112 ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E))
114 #define isCompoundS3(codepoint) \
115 ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \
116 (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \
117 (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE))
119 #define isCompoundS2(codepoint) \
120 ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015))
122 #define isIBM914(codepoint) \
123 ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \
124 (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \
125 (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint == 0x0146) || \
126 (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \
127 (codepoint == 0x0172) || (codepoint == 0x0173))
129 #define isIBM874(codepoint) \
130 ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B))
132 #define isIBM912(codepoint) \
133 ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \
134 (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \
135 (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \
136 (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \
137 (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint == 0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \
138 (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD))
140 #define isIBM913(codepoint) \
141 ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \
142 (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \
143 (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \
144 (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint == 0x016D))
146 #define isCompoundS1(codepoint) \
147 ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \
148 (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B))
150 #define isISO8859_14(codepoint) \
151 ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \
152 (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \
153 (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \
154 (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \
155 (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \
156 (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85))
158 #define isIBM923(codepoint) \
159 ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC))
163 UConverterSharedData
*myConverterArray
[NUM_OF_CONVERTERS
];
164 COMPOUND_TEXT_CONVERTERS state
;
165 } UConverterDataCompoundText
;
167 /*********** Compound Text Converter Protos ***********/
169 static void U_CALLCONV
170 _CompoundTextOpen(UConverter
*cnv
, UConverterLoadArgs
*pArgs
, UErrorCode
*errorCode
);
172 static void U_CALLCONV
173 _CompoundTextClose(UConverter
*converter
);
175 static void U_CALLCONV
176 _CompoundTextReset(UConverter
*converter
, UConverterResetChoice choice
);
178 static const char* U_CALLCONV
179 _CompoundTextgetName(const UConverter
* cnv
);
182 static int32_t findNextEsc(const char *source
, const char *sourceLimit
) {
183 int32_t length
= static_cast<int32_t>(sourceLimit
- source
);
185 for (i
= 1; i
< length
; i
++) {
186 if (*(source
+ i
) == 0x1B) {
194 static COMPOUND_TEXT_CONVERTERS
getState(int codepoint
) {
195 COMPOUND_TEXT_CONVERTERS state
= DO_SEARCH
;
197 if (isASCIIRange(codepoint
)) {
198 state
= COMPOUND_TEXT_SINGLE_0
;
199 } else if (isIBM912(codepoint
)) {
201 }else if (isIBM913(codepoint
)) {
203 } else if (isISO8859_14(codepoint
)) {
205 } else if (isIBM923(codepoint
)) {
207 } else if (isIBM874(codepoint
)) {
209 } else if (isIBM914(codepoint
)) {
211 } else if (isCompoundS2(codepoint
)) {
212 state
= COMPOUND_TEXT_SINGLE_2
;
213 } else if (isCompoundS3(codepoint
)) {
214 state
= COMPOUND_TEXT_SINGLE_3
;
215 } else if (isIBM916(codepoint
)) {
217 } else if (isIBM915(codepoint
)) {
219 } else if (isCompoundS1(codepoint
)) {
220 state
= COMPOUND_TEXT_SINGLE_1
;
226 static COMPOUND_TEXT_CONVERTERS
findStateFromEscSeq(const char* source
, const char* sourceLimit
, const uint8_t* toUBytesBuffer
, int32_t toUBytesBufferLength
, UErrorCode
*err
) {
227 COMPOUND_TEXT_CONVERTERS state
= INVALID
;
228 UBool matchFound
= FALSE
;
229 int32_t i
, n
, offset
= toUBytesBufferLength
;
231 for (i
= 0; i
< NUM_OF_CONVERTERS
; i
++) {
233 for (n
= 0; escSeqCompoundText
[i
][n
] != 0; n
++) {
234 if (n
< toUBytesBufferLength
) {
235 if (toUBytesBuffer
[n
] != escSeqCompoundText
[i
][n
]) {
239 } else if ((source
+ (n
- offset
)) >= sourceLimit
) {
240 *err
= U_TRUNCATED_CHAR_FOUND
;
243 } else if (*(source
+ (n
- offset
)) != escSeqCompoundText
[i
][n
]) {
255 state
= (COMPOUND_TEXT_CONVERTERS
)i
;
261 static void U_CALLCONV
262 _CompoundTextOpen(UConverter
*cnv
, UConverterLoadArgs
*pArgs
, UErrorCode
*errorCode
){
263 cnv
->extraInfo
= uprv_malloc (sizeof (UConverterDataCompoundText
));
264 if (cnv
->extraInfo
!= NULL
) {
265 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*) cnv
->extraInfo
;
267 UConverterNamePieces stackPieces
;
268 UConverterLoadArgs stackArgs
=UCNV_LOAD_ARGS_INITIALIZER
;
270 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_0
] = NULL
;
271 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_1
] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces
, &stackArgs
, errorCode
);
272 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_2
] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces
, &stackArgs
, errorCode
);
273 myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_3
] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces
, &stackArgs
, errorCode
);
274 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_1
] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces
, &stackArgs
, errorCode
);
275 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_2
] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces
, &stackArgs
, errorCode
);
276 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_3
] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces
, &stackArgs
, errorCode
);
277 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_4
] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces
, &stackArgs
, errorCode
);
278 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_5
] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces
, &stackArgs
, errorCode
);
279 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_6
] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces
, &stackArgs
, errorCode
);
280 myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_7
] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces
, &stackArgs
, errorCode
);
281 myConverterData
->myConverterArray
[COMPOUND_TEXT_TRIPLE_DOUBLE
] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces
, &stackArgs
, errorCode
);
283 myConverterData
->myConverterArray
[IBM_915
] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
284 myConverterData
->myConverterArray
[IBM_916
] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
285 myConverterData
->myConverterArray
[IBM_914
] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
286 myConverterData
->myConverterArray
[IBM_874
] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
287 myConverterData
->myConverterArray
[IBM_912
] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces
, &stackArgs
, errorCode
);
288 myConverterData
->myConverterArray
[IBM_913
] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces
, &stackArgs
, errorCode
);
289 myConverterData
->myConverterArray
[ISO_8859_14
] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces
, &stackArgs
, errorCode
);
290 myConverterData
->myConverterArray
[IBM_923
] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces
, &stackArgs
, errorCode
);
292 if (U_FAILURE(*errorCode
) || pArgs
->onlyTestIsLoadable
) {
293 _CompoundTextClose(cnv
);
297 myConverterData
->state
= (COMPOUND_TEXT_CONVERTERS
)0;
299 *errorCode
= U_MEMORY_ALLOCATION_ERROR
;
304 static void U_CALLCONV
305 _CompoundTextClose(UConverter
*converter
) {
306 UConverterDataCompoundText
* myConverterData
= (UConverterDataCompoundText
*)(converter
->extraInfo
);
309 if (converter
->extraInfo
!= NULL
) {
310 /*close the array of converter pointers and free the memory*/
311 for (i
= 0; i
< NUM_OF_CONVERTERS
; i
++) {
312 if (myConverterData
->myConverterArray
[i
] != NULL
) {
313 ucnv_unloadSharedDataIfReady(myConverterData
->myConverterArray
[i
]);
317 uprv_free(converter
->extraInfo
);
318 converter
->extraInfo
= NULL
;
322 static void U_CALLCONV
323 _CompoundTextReset(UConverter
*converter
, UConverterResetChoice choice
) {
328 static const char* U_CALLCONV
329 _CompoundTextgetName(const UConverter
* cnv
){
331 return "x11-compound-text";
334 static void U_CALLCONV
335 UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs
* args
, UErrorCode
* err
){
336 UConverter
*cnv
= args
->converter
;
337 uint8_t *target
= (uint8_t *) args
->target
;
338 const uint8_t *targetLimit
= (const uint8_t *) args
->targetLimit
;
339 const UChar
* source
= args
->source
;
340 const UChar
* sourceLimit
= args
->sourceLimit
;
341 /* int32_t* offsets = args->offsets; */
343 UBool useFallback
= cnv
->useFallback
;
344 uint8_t tmpTargetBuffer
[7];
345 int32_t tmpTargetBufferLength
= 0;
346 COMPOUND_TEXT_CONVERTERS currentState
, tmpState
;
348 int32_t pValueLength
= 0;
351 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*) cnv
->extraInfo
;
353 currentState
= myConverterData
->state
;
355 /* check if the last codepoint of previous buffer was a lead surrogate*/
356 if((sourceChar
= cnv
->fromUChar32
)!=0 && target
< targetLimit
) {
360 while( source
< sourceLimit
){
361 if(target
< targetLimit
){
363 sourceChar
= *(source
++);
364 /*check if the char is a First surrogate*/
365 if(U16_IS_SURROGATE(sourceChar
)) {
366 if(U16_IS_SURROGATE_LEAD(sourceChar
)) {
368 /*look ahead to find the trail surrogate*/
369 if(source
< sourceLimit
) {
370 /* test the following code unit */
371 UChar trail
=(UChar
) *source
;
372 if(U16_IS_TRAIL(trail
)) {
374 sourceChar
=U16_GET_SUPPLEMENTARY(sourceChar
, trail
);
375 cnv
->fromUChar32
=0x00;
376 /* convert this supplementary code point */
377 /* exit this condition tree */
379 /* this is an unmatched lead code unit (1st surrogate) */
380 /* callback(illegal) */
381 *err
=U_ILLEGAL_CHAR_FOUND
;
382 cnv
->fromUChar32
=sourceChar
;
387 cnv
->fromUChar32
=sourceChar
;
391 /* this is an unmatched trail code unit (2nd surrogate) */
392 /* callback(illegal) */
393 *err
=U_ILLEGAL_CHAR_FOUND
;
394 cnv
->fromUChar32
=sourceChar
;
399 tmpTargetBufferLength
= 0;
400 tmpState
= getState(sourceChar
);
402 if (tmpState
!= DO_SEARCH
&& currentState
!= tmpState
) {
403 /* Get escape sequence if necessary */
404 currentState
= tmpState
;
405 for (i
= 0; escSeqCompoundText
[currentState
][i
] != 0; i
++) {
406 tmpTargetBuffer
[tmpTargetBufferLength
++] = escSeqCompoundText
[currentState
][i
];
410 if (tmpState
== DO_SEARCH
) {
411 /* Test all available converters */
412 for (i
= 1; i
< SEARCH_LENGTH
; i
++) {
413 pValueLength
= ucnv_MBCSFromUChar32(myConverterData
->myConverterArray
[i
], sourceChar
, &pValue
, useFallback
);
414 if (pValueLength
> 0) {
415 tmpState
= (COMPOUND_TEXT_CONVERTERS
)i
;
416 if (currentState
!= tmpState
) {
417 currentState
= tmpState
;
418 for (j
= 0; escSeqCompoundText
[currentState
][j
] != 0; j
++) {
419 tmpTargetBuffer
[tmpTargetBufferLength
++] = escSeqCompoundText
[currentState
][j
];
422 for (n
= (pValueLength
- 1); n
>= 0; n
--) {
423 tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)(pValue
>> (n
* 8));
428 } else if (tmpState
== COMPOUND_TEXT_SINGLE_0
) {
429 tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)sourceChar
;
431 pValueLength
= ucnv_MBCSFromUChar32(myConverterData
->myConverterArray
[currentState
], sourceChar
, &pValue
, useFallback
);
432 if (pValueLength
> 0) {
433 for (n
= (pValueLength
- 1); n
>= 0; n
--) {
434 tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)(pValue
>> (n
* 8));
439 for (i
= 0; i
< tmpTargetBufferLength
; i
++) {
440 if (target
< targetLimit
) {
441 *target
++ = tmpTargetBuffer
[i
];
443 *err
= U_BUFFER_OVERFLOW_ERROR
;
448 if (*err
== U_BUFFER_OVERFLOW_ERROR
) {
449 for (; i
< tmpTargetBufferLength
; i
++) {
450 args
->converter
->charErrorBuffer
[args
->converter
->charErrorBufferLength
++] = tmpTargetBuffer
[i
];
454 *err
= U_BUFFER_OVERFLOW_ERROR
;
459 /*save the state and return */
460 myConverterData
->state
= currentState
;
461 args
->source
= source
;
462 args
->target
= (char*)target
;
466 static void U_CALLCONV
467 UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs
*args
,
469 const char *mySource
= (char *) args
->source
;
470 UChar
*myTarget
= args
->target
;
471 const char *mySourceLimit
= args
->sourceLimit
;
472 const char *tmpSourceLimit
= mySourceLimit
;
473 uint32_t mySourceChar
= 0x0000;
474 COMPOUND_TEXT_CONVERTERS currentState
, tmpState
;
475 int32_t sourceOffset
= 0;
476 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*) args
->converter
->extraInfo
;
477 UConverterSharedData
* savedSharedData
= NULL
;
479 UConverterToUnicodeArgs subArgs
;
482 /* set up the subconverter arguments */
483 if(args
->size
<sizeof(UConverterToUnicodeArgs
)) {
484 minArgsSize
= args
->size
;
486 minArgsSize
= (int32_t)sizeof(UConverterToUnicodeArgs
);
489 uprv_memcpy(&subArgs
, args
, minArgsSize
);
490 subArgs
.size
= (uint16_t)minArgsSize
;
492 currentState
= tmpState
= myConverterData
->state
;
494 while(mySource
< mySourceLimit
){
495 if(myTarget
< args
->targetLimit
){
496 if (args
->converter
->toULength
> 0) {
497 mySourceChar
= args
->converter
->toUBytes
[0];
499 mySourceChar
= (uint8_t)*mySource
;
502 if (mySourceChar
== ESC_START
) {
503 tmpState
= findStateFromEscSeq(mySource
, mySourceLimit
, args
->converter
->toUBytes
, args
->converter
->toULength
, err
);
505 if (*err
== U_TRUNCATED_CHAR_FOUND
) {
506 for (; mySource
< mySourceLimit
;) {
507 args
->converter
->toUBytes
[args
->converter
->toULength
++] = *mySource
++;
511 } else if (tmpState
== INVALID
) {
512 if (args
->converter
->toULength
== 0) {
513 mySource
++; /* skip over the 0x1b byte */
515 *err
= U_ILLEGAL_CHAR_FOUND
;
519 if (tmpState
!= currentState
) {
520 currentState
= tmpState
;
523 sourceOffset
= static_cast<int32_t>(uprv_strlen((char*)escSeqCompoundText
[currentState
]) - args
->converter
->toULength
);
525 mySource
+= sourceOffset
;
527 args
->converter
->toULength
= 0;
530 if (currentState
== COMPOUND_TEXT_SINGLE_0
) {
531 while (mySource
< mySourceLimit
) {
532 if (*mySource
== ESC_START
) {
535 if (myTarget
< args
->targetLimit
) {
536 *myTarget
++ = 0x00ff&(*mySource
++);
538 *err
= U_BUFFER_OVERFLOW_ERROR
;
542 } else if (mySource
< mySourceLimit
){
543 sourceOffset
= findNextEsc(mySource
, mySourceLimit
);
545 tmpSourceLimit
= mySource
+ sourceOffset
;
547 subArgs
.source
= mySource
;
548 subArgs
.sourceLimit
= tmpSourceLimit
;
549 subArgs
.target
= myTarget
;
550 savedSharedData
= subArgs
.converter
->sharedData
;
551 subArgs
.converter
->sharedData
= myConverterData
->myConverterArray
[currentState
];
553 ucnv_MBCSToUnicodeWithOffsets(&subArgs
, err
);
555 subArgs
.converter
->sharedData
= savedSharedData
;
557 mySource
= subArgs
.source
;
558 myTarget
= subArgs
.target
;
560 if (U_FAILURE(*err
)) {
561 if(*err
== U_BUFFER_OVERFLOW_ERROR
) {
562 if(subArgs
.converter
->UCharErrorBufferLength
> 0) {
563 uprv_memcpy(args
->converter
->UCharErrorBuffer
, subArgs
.converter
->UCharErrorBuffer
,
564 subArgs
.converter
->UCharErrorBufferLength
);
566 args
->converter
->UCharErrorBufferLength
=subArgs
.converter
->UCharErrorBufferLength
;
567 subArgs
.converter
->UCharErrorBufferLength
= 0;
573 *err
= U_BUFFER_OVERFLOW_ERROR
;
577 myConverterData
->state
= currentState
;
578 args
->target
= myTarget
;
579 args
->source
= mySource
;
582 static void U_CALLCONV
583 _CompoundText_GetUnicodeSet(const UConverter
*cnv
,
585 UConverterUnicodeSet which
,
586 UErrorCode
*pErrorCode
) {
587 UConverterDataCompoundText
*myConverterData
= (UConverterDataCompoundText
*)cnv
->extraInfo
;
590 for (i
= 1; i
< NUM_OF_CONVERTERS
; i
++) {
591 ucnv_MBCSGetUnicodeSetForUnicode(myConverterData
->myConverterArray
[i
], sa
, which
, pErrorCode
);
593 sa
->add(sa
->set
, 0x0000);
594 sa
->add(sa
->set
, 0x0009);
595 sa
->add(sa
->set
, 0x000A);
596 sa
->addRange(sa
->set
, 0x0020, 0x007F);
597 sa
->addRange(sa
->set
, 0x00A0, 0x00FF);
601 static const UConverterImpl _CompoundTextImpl
= {
612 UConverter_toUnicode_CompoundText_OFFSETS
,
613 UConverter_toUnicode_CompoundText_OFFSETS
,
614 UConverter_fromUnicode_CompoundText_OFFSETS
,
615 UConverter_fromUnicode_CompoundText_OFFSETS
,
619 _CompoundTextgetName
,
622 _CompoundText_GetUnicodeSet
,
627 static const UConverterStaticData _CompoundTextStaticData
= {
628 sizeof(UConverterStaticData
),
641 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
643 const UConverterSharedData _CompoundTextData
=
644 UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData
, &_CompoundTextImpl
);
646 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */