1 // © 2016 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   4 ********************************************************************** 
   5 *   Copyright (C) 2010-2015, International Business Machines 
   6 *   Corporation and others.  All Rights Reserved. 
   7 ********************************************************************** 
  10 *   tab size:   8 (not used) 
  13 *   created on: 2010Dec09 
  14 *   created by: Michael Ow 
  17 #include "unicode/utypes.h" 
  19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION 
  21 #include "unicode/ucnv.h" 
  22 #include "unicode/uset.h" 
  23 #include "unicode/ucnv_err.h" 
  24 #include "unicode/ucnv_cb.h" 
  25 #include "unicode/utf16.h" 
  37     COMPOUND_TEXT_SINGLE_0 
= 0, 
  38     COMPOUND_TEXT_SINGLE_1 
= 1, 
  39     COMPOUND_TEXT_SINGLE_2 
= 2, 
  40     COMPOUND_TEXT_SINGLE_3 
= 3, 
  42     COMPOUND_TEXT_DOUBLE_1 
= 4, 
  43     COMPOUND_TEXT_DOUBLE_2 
= 5, 
  44     COMPOUND_TEXT_DOUBLE_3 
= 6, 
  45     COMPOUND_TEXT_DOUBLE_4 
= 7, 
  46     COMPOUND_TEXT_DOUBLE_5 
= 8, 
  47     COMPOUND_TEXT_DOUBLE_6 
= 9, 
  48     COMPOUND_TEXT_DOUBLE_7 
= 10, 
  50     COMPOUND_TEXT_TRIPLE_DOUBLE 
= 11, 
  60     NUM_OF_CONVERTERS 
= 20 
  61 } COMPOUND_TEXT_CONVERTERS
; 
  63 #define SEARCH_LENGTH 12 
  65 static const uint8_t escSeqCompoundText
[NUM_OF_CONVERTERS
][5] = { 
  67     { 0x1B, 0x2D, 0x41, 0, 0 }, 
  68     { 0x1B, 0x2D, 0x4D, 0, 0 }, 
  69     { 0x1B, 0x2D, 0x46, 0, 0 }, 
  70     { 0x1B, 0x2D, 0x47, 0, 0 }, 
  73     { 0x1B, 0x24, 0x29, 0x41, 0 }, 
  74     { 0x1B, 0x24, 0x29, 0x42, 0 }, 
  75     { 0x1B, 0x24, 0x29, 0x43, 0 }, 
  76     { 0x1B, 0x24, 0x29, 0x44, 0 }, 
  77     { 0x1B, 0x24, 0x29, 0x47, 0 }, 
  78     { 0x1B, 0x24, 0x29, 0x48, 0 }, 
  79     { 0x1B, 0x24, 0x29, 0x49, 0 }, 
  82     { 0x1B, 0x25, 0x47, 0, 0 }, 
  85     { 0x1B, 0x2D, 0x4C, 0, 0 }, 
  87     { 0x1B, 0x2D, 0x48, 0, 0 }, 
  89     { 0x1B, 0x2D, 0x44, 0, 0 }, 
  91     { 0x1B, 0x2D, 0x54, 0, 0 }, 
  93     { 0x1B, 0x2D, 0x42, 0, 0 }, 
  95     { 0x1B, 0x2D, 0x43, 0, 0 }, 
  97     { 0x1B, 0x2D, 0x5F, 0, 0 }, 
  99     { 0x1B, 0x2D, 0x62, 0, 0 }, 
 102 #define ESC_START 0x1B 
 104 #define isASCIIRange(codepoint) \ 
 105         ((codepoint == 0x0000) || (codepoint == 0x0009) || (codepoint == 0x000A) || \ 
 106          (codepoint >= 0x0020 && codepoint <= 0x007f) || (codepoint >= 0x00A0 && codepoint <= 0x00FF)) 
 108 #define isIBM915(codepoint) \ 
 109         ((codepoint >= 0x0401 && codepoint <= 0x045F) || (codepoint == 0x2116)) 
 111 #define isIBM916(codepoint) \ 
 112         ((codepoint >= 0x05D0 && codepoint <= 0x05EA) || (codepoint == 0x2017) || (codepoint == 0x203E)) 
 114 #define isCompoundS3(codepoint) \ 
 115         ((codepoint == 0x060C) || (codepoint == 0x061B) || (codepoint == 0x061F) || (codepoint >= 0x0621 && codepoint <= 0x063A) || \ 
 116          (codepoint >= 0x0640 && codepoint <= 0x0652) || (codepoint >= 0x0660 && codepoint <= 0x066D) || (codepoint == 0x200B) || \ 
 117          (codepoint >= 0x0FE70 && codepoint <= 0x0FE72) || (codepoint == 0x0FE74) || (codepoint >= 0x0FE76 && codepoint <= 0x0FEBE)) 
 119 #define isCompoundS2(codepoint) \ 
 120         ((codepoint == 0x02BC) || (codepoint == 0x02BD) || (codepoint >= 0x0384 && codepoint <= 0x03CE) || (codepoint == 0x2015)) 
 122 #define isIBM914(codepoint) \ 
 123         ((codepoint == 0x0100) || (codepoint == 0x0101) || (codepoint == 0x0112) || (codepoint == 0x0113) || (codepoint == 0x0116) || (codepoint == 0x0117) || \ 
 124          (codepoint == 0x0122) || (codepoint == 0x0123) || (codepoint >= 0x0128 && codepoint <= 0x012B) || (codepoint == 0x012E) || (codepoint == 0x012F) || \ 
 125          (codepoint >= 0x0136 && codepoint <= 0x0138) || (codepoint == 0x013B) || (codepoint == 0x013C) || (codepoint == 0x0145) || (codepoint ==  0x0146) || \ 
 126          (codepoint >= 0x014A && codepoint <= 0x014D) || (codepoint == 0x0156) || (codepoint == 0x0157) || (codepoint >= 0x0166 && codepoint <= 0x016B) || \ 
 127          (codepoint == 0x0172) || (codepoint == 0x0173)) 
 129 #define isIBM874(codepoint) \ 
 130         ((codepoint >= 0x0E01 && codepoint <= 0x0E3A) || (codepoint >= 0x0E3F && codepoint <= 0x0E5B)) 
 132 #define isIBM912(codepoint) \ 
 133         ((codepoint >= 0x0102 && codepoint <= 0x0107) || (codepoint >= 0x010C && codepoint <= 0x0111) || (codepoint >= 0x0118 && codepoint <= 0x011B) || \ 
 134          (codepoint == 0x0139) || (codepoint == 0x013A) || (codepoint == 0x013D) || (codepoint == 0x013E) || (codepoint >= 0x0141 && codepoint <= 0x0144) || \ 
 135          (codepoint == 0x0147) || (codepoint == 0x0147) || (codepoint == 0x0150) || (codepoint == 0x0151) || (codepoint == 0x0154) || (codepoint == 0x0155) || \ 
 136          (codepoint >= 0x0158 && codepoint <= 0x015B) || (codepoint == 0x015E) || (codepoint == 0x015F) || (codepoint >= 0x0160 && codepoint <= 0x0165) || \ 
 137          (codepoint == 0x016E) || (codepoint == 0x016F) || (codepoint == 0x0170) || (codepoint ==  0x0171) || (codepoint >= 0x0179 && codepoint <= 0x017E) || \ 
 138          (codepoint == 0x02C7) || (codepoint == 0x02D8) || (codepoint == 0x02D9) || (codepoint == 0x02DB) || (codepoint == 0x02DD)) 
 140 #define isIBM913(codepoint) \ 
 141         ((codepoint >= 0x0108 && codepoint <= 0x010B) || (codepoint == 0x011C) || \ 
 142          (codepoint == 0x011D) || (codepoint == 0x0120) || (codepoint == 0x0121) || \ 
 143          (codepoint >= 0x0124 && codepoint <= 0x0127) || (codepoint == 0x0134) || (codepoint == 0x0135) || \ 
 144          (codepoint == 0x015C) || (codepoint == 0x015D) || (codepoint == 0x016C) || (codepoint ==  0x016D)) 
 146 #define isCompoundS1(codepoint) \ 
 147         ((codepoint == 0x011E) || (codepoint == 0x011F) || (codepoint == 0x0130) || \ 
 148          (codepoint == 0x0131) || (codepoint >= 0x0218 && codepoint <= 0x021B)) 
 150 #define isISO8859_14(codepoint) \ 
 151         ((codepoint >= 0x0174 && codepoint <= 0x0177) || (codepoint == 0x1E0A) || \ 
 152          (codepoint == 0x1E0B) || (codepoint == 0x1E1E) || (codepoint == 0x1E1F) || \ 
 153          (codepoint == 0x1E40) || (codepoint == 0x1E41) || (codepoint == 0x1E56) || \ 
 154          (codepoint == 0x1E57) || (codepoint == 0x1E60) || (codepoint == 0x1E61) || \ 
 155          (codepoint == 0x1E6A) || (codepoint == 0x1E6B) || (codepoint == 0x1EF2) || \ 
 156          (codepoint == 0x1EF3) || (codepoint >= 0x1E80 && codepoint <= 0x1E85)) 
 158 #define isIBM923(codepoint) \ 
 159         ((codepoint == 0x0152) || (codepoint == 0x0153) || (codepoint == 0x0178) || (codepoint == 0x20AC)) 
 163     UConverterSharedData 
*myConverterArray
[NUM_OF_CONVERTERS
]; 
 164     COMPOUND_TEXT_CONVERTERS state
; 
 165 } UConverterDataCompoundText
; 
 167 /*********** Compound Text Converter Protos ***********/ 
 169 static void U_CALLCONV
 
 170 _CompoundTextOpen(UConverter 
*cnv
, UConverterLoadArgs 
*pArgs
, UErrorCode 
*errorCode
); 
 172 static void U_CALLCONV
 
 173  _CompoundTextClose(UConverter 
*converter
); 
 175 static void U_CALLCONV
 
 176 _CompoundTextReset(UConverter 
*converter
, UConverterResetChoice choice
); 
 178 static const char* U_CALLCONV
 
 179 _CompoundTextgetName(const UConverter
* cnv
); 
 182 static int32_t findNextEsc(const char *source
, const char *sourceLimit
) { 
 183     int32_t length 
= static_cast<int32_t>(sourceLimit 
- source
); 
 185     for (i 
= 1; i 
< length
; i
++) { 
 186         if (*(source 
+ i
) == 0x1B) { 
 194 static COMPOUND_TEXT_CONVERTERS 
getState(int codepoint
) { 
 195     COMPOUND_TEXT_CONVERTERS state 
= DO_SEARCH
; 
 197     if (isASCIIRange(codepoint
)) { 
 198         state 
= COMPOUND_TEXT_SINGLE_0
; 
 199     } else if (isIBM912(codepoint
)) { 
 201     }else if (isIBM913(codepoint
)) { 
 203     } else if (isISO8859_14(codepoint
)) { 
 205     } else if (isIBM923(codepoint
)) { 
 207     } else if (isIBM874(codepoint
)) { 
 209     } else if (isIBM914(codepoint
)) { 
 211     } else if (isCompoundS2(codepoint
)) { 
 212         state 
= COMPOUND_TEXT_SINGLE_2
; 
 213     } else if (isCompoundS3(codepoint
)) { 
 214         state 
= COMPOUND_TEXT_SINGLE_3
; 
 215     } else if (isIBM916(codepoint
)) { 
 217     } else if (isIBM915(codepoint
)) { 
 219     } else if (isCompoundS1(codepoint
)) { 
 220         state 
= COMPOUND_TEXT_SINGLE_1
; 
 226 static COMPOUND_TEXT_CONVERTERS 
findStateFromEscSeq(const char* source
, const char* sourceLimit
, const uint8_t* toUBytesBuffer
, int32_t toUBytesBufferLength
, UErrorCode 
*err
) { 
 227     COMPOUND_TEXT_CONVERTERS state 
= INVALID
; 
 228     UBool matchFound 
= FALSE
; 
 229     int32_t i
, n
, offset 
= toUBytesBufferLength
; 
 231     for (i 
= 0; i 
< NUM_OF_CONVERTERS
; i
++) { 
 233         for (n 
= 0; escSeqCompoundText
[i
][n
] != 0; n
++) { 
 234             if (n 
< toUBytesBufferLength
) { 
 235                 if (toUBytesBuffer
[n
] != escSeqCompoundText
[i
][n
]) { 
 239             } else if ((source 
+ (n 
- offset
)) >= sourceLimit
) { 
 240                 *err 
= U_TRUNCATED_CHAR_FOUND
; 
 243             } else if (*(source 
+ (n 
- offset
)) != escSeqCompoundText
[i
][n
]) { 
 255         state 
= (COMPOUND_TEXT_CONVERTERS
)i
; 
 261 static void U_CALLCONV
 
 262 _CompoundTextOpen(UConverter 
*cnv
, UConverterLoadArgs 
*pArgs
, UErrorCode 
*errorCode
){ 
 263     cnv
->extraInfo 
= uprv_malloc (sizeof (UConverterDataCompoundText
)); 
 264     if (cnv
->extraInfo 
!= NULL
) { 
 265         UConverterDataCompoundText 
*myConverterData 
= (UConverterDataCompoundText 
*) cnv
->extraInfo
; 
 267         UConverterNamePieces stackPieces
; 
 268         UConverterLoadArgs stackArgs
=UCNV_LOAD_ARGS_INITIALIZER
; 
 270         myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_0
] = NULL
; 
 271         myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_1
] = ucnv_loadSharedData("icu-internal-compound-s1", &stackPieces
, &stackArgs
, errorCode
); 
 272         myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_2
] = ucnv_loadSharedData("icu-internal-compound-s2", &stackPieces
, &stackArgs
, errorCode
); 
 273         myConverterData
->myConverterArray
[COMPOUND_TEXT_SINGLE_3
] = ucnv_loadSharedData("icu-internal-compound-s3", &stackPieces
, &stackArgs
, errorCode
); 
 274         myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_1
] = ucnv_loadSharedData("icu-internal-compound-d1", &stackPieces
, &stackArgs
, errorCode
); 
 275         myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_2
] = ucnv_loadSharedData("icu-internal-compound-d2", &stackPieces
, &stackArgs
, errorCode
); 
 276         myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_3
] = ucnv_loadSharedData("icu-internal-compound-d3", &stackPieces
, &stackArgs
, errorCode
); 
 277         myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_4
] = ucnv_loadSharedData("icu-internal-compound-d4", &stackPieces
, &stackArgs
, errorCode
); 
 278         myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_5
] = ucnv_loadSharedData("icu-internal-compound-d5", &stackPieces
, &stackArgs
, errorCode
); 
 279         myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_6
] = ucnv_loadSharedData("icu-internal-compound-d6", &stackPieces
, &stackArgs
, errorCode
); 
 280         myConverterData
->myConverterArray
[COMPOUND_TEXT_DOUBLE_7
] = ucnv_loadSharedData("icu-internal-compound-d7", &stackPieces
, &stackArgs
, errorCode
); 
 281         myConverterData
->myConverterArray
[COMPOUND_TEXT_TRIPLE_DOUBLE
] = ucnv_loadSharedData("icu-internal-compound-t", &stackPieces
, &stackArgs
, errorCode
); 
 283         myConverterData
->myConverterArray
[IBM_915
] = ucnv_loadSharedData("ibm-915_P100-1995", &stackPieces
, &stackArgs
, errorCode
); 
 284         myConverterData
->myConverterArray
[IBM_916
] = ucnv_loadSharedData("ibm-916_P100-1995", &stackPieces
, &stackArgs
, errorCode
); 
 285         myConverterData
->myConverterArray
[IBM_914
] = ucnv_loadSharedData("ibm-914_P100-1995", &stackPieces
, &stackArgs
, errorCode
); 
 286         myConverterData
->myConverterArray
[IBM_874
] = ucnv_loadSharedData("ibm-874_P100-1995", &stackPieces
, &stackArgs
, errorCode
); 
 287         myConverterData
->myConverterArray
[IBM_912
] = ucnv_loadSharedData("ibm-912_P100-1995", &stackPieces
, &stackArgs
, errorCode
); 
 288         myConverterData
->myConverterArray
[IBM_913
] = ucnv_loadSharedData("ibm-913_P100-2000", &stackPieces
, &stackArgs
, errorCode
); 
 289         myConverterData
->myConverterArray
[ISO_8859_14
] = ucnv_loadSharedData("iso-8859_14-1998", &stackPieces
, &stackArgs
, errorCode
); 
 290         myConverterData
->myConverterArray
[IBM_923
] = ucnv_loadSharedData("ibm-923_P100-1998", &stackPieces
, &stackArgs
, errorCode
); 
 292         if (U_FAILURE(*errorCode
) || pArgs
->onlyTestIsLoadable
) { 
 293             _CompoundTextClose(cnv
); 
 297         myConverterData
->state 
= (COMPOUND_TEXT_CONVERTERS
)0; 
 299         *errorCode 
= U_MEMORY_ALLOCATION_ERROR
; 
 304 static void U_CALLCONV
 
 305 _CompoundTextClose(UConverter 
*converter
) { 
 306     UConverterDataCompoundText
* myConverterData 
= (UConverterDataCompoundText
*)(converter
->extraInfo
); 
 309     if (converter
->extraInfo 
!= NULL
) { 
 310         /*close the array of converter pointers and free the memory*/ 
 311         for (i 
= 0; i 
< NUM_OF_CONVERTERS
; i
++) { 
 312             if (myConverterData
->myConverterArray
[i
] != NULL
) { 
 313                 ucnv_unloadSharedDataIfReady(myConverterData
->myConverterArray
[i
]); 
 317         uprv_free(converter
->extraInfo
); 
 318         converter
->extraInfo 
= NULL
; 
 322 static void U_CALLCONV
 
 323 _CompoundTextReset(UConverter 
*converter
, UConverterResetChoice choice
) { 
 328 static const char* U_CALLCONV
 
 329 _CompoundTextgetName(const UConverter
* cnv
){ 
 331     return "x11-compound-text"; 
 334 static void U_CALLCONV
 
 335 UConverter_fromUnicode_CompoundText_OFFSETS(UConverterFromUnicodeArgs
* args
, UErrorCode
* err
){ 
 336     UConverter 
*cnv 
= args
->converter
; 
 337     uint8_t *target 
= (uint8_t *) args
->target
; 
 338     const uint8_t *targetLimit 
= (const uint8_t *) args
->targetLimit
; 
 339     const UChar
* source 
= args
->source
; 
 340     const UChar
* sourceLimit 
= args
->sourceLimit
; 
 341     /* int32_t* offsets = args->offsets; */ 
 343     UBool useFallback 
= cnv
->useFallback
; 
 344     uint8_t tmpTargetBuffer
[7]; 
 345     int32_t tmpTargetBufferLength 
= 0; 
 346     COMPOUND_TEXT_CONVERTERS currentState
, tmpState
; 
 348     int32_t pValueLength 
= 0; 
 351     UConverterDataCompoundText 
*myConverterData 
= (UConverterDataCompoundText 
*) cnv
->extraInfo
; 
 353     currentState 
= myConverterData
->state
; 
 355     /* check if the last codepoint of previous buffer was a lead surrogate*/ 
 356     if((sourceChar 
= cnv
->fromUChar32
)!=0 && target
< targetLimit
) { 
 360     while( source 
< sourceLimit
){ 
 361         if(target 
< targetLimit
){ 
 363             sourceChar  
= *(source
++); 
 364             /*check if the char is a First surrogate*/ 
 365              if(U16_IS_SURROGATE(sourceChar
)) { 
 366                 if(U16_IS_SURROGATE_LEAD(sourceChar
)) { 
 368                     /*look ahead to find the trail surrogate*/ 
 369                     if(source 
< sourceLimit
) { 
 370                         /* test the following code unit */ 
 371                         UChar trail
=(UChar
) *source
; 
 372                         if(U16_IS_TRAIL(trail
)) { 
 374                             sourceChar
=U16_GET_SUPPLEMENTARY(sourceChar
, trail
); 
 375                             cnv
->fromUChar32
=0x00; 
 376                             /* convert this supplementary code point */ 
 377                             /* exit this condition tree */ 
 379                             /* this is an unmatched lead code unit (1st surrogate) */ 
 380                             /* callback(illegal) */ 
 381                             *err
=U_ILLEGAL_CHAR_FOUND
; 
 382                             cnv
->fromUChar32
=sourceChar
; 
 387                         cnv
->fromUChar32
=sourceChar
; 
 391                     /* this is an unmatched trail code unit (2nd surrogate) */ 
 392                     /* callback(illegal) */ 
 393                     *err
=U_ILLEGAL_CHAR_FOUND
; 
 394                     cnv
->fromUChar32
=sourceChar
; 
 399              tmpTargetBufferLength 
= 0; 
 400              tmpState 
= getState(sourceChar
); 
 402              if (tmpState 
!= DO_SEARCH 
&& currentState 
!= tmpState
) { 
 403                  /* Get escape sequence if necessary */ 
 404                  currentState 
= tmpState
; 
 405                  for (i 
= 0; escSeqCompoundText
[currentState
][i
] != 0; i
++) { 
 406                      tmpTargetBuffer
[tmpTargetBufferLength
++] = escSeqCompoundText
[currentState
][i
]; 
 410              if (tmpState 
== DO_SEARCH
) { 
 411                  /* Test all available converters */ 
 412                  for (i 
= 1; i 
< SEARCH_LENGTH
; i
++) { 
 413                      pValueLength 
= ucnv_MBCSFromUChar32(myConverterData
->myConverterArray
[i
], sourceChar
, &pValue
, useFallback
); 
 414                      if (pValueLength 
> 0) { 
 415                          tmpState 
= (COMPOUND_TEXT_CONVERTERS
)i
; 
 416                          if (currentState 
!= tmpState
) { 
 417                              currentState 
= tmpState
; 
 418                              for (j 
= 0; escSeqCompoundText
[currentState
][j
] != 0; j
++) { 
 419                                  tmpTargetBuffer
[tmpTargetBufferLength
++] = escSeqCompoundText
[currentState
][j
]; 
 422                          for (n 
= (pValueLength 
- 1); n 
>= 0; n
--) { 
 423                              tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)(pValue 
>> (n 
* 8)); 
 428              } else if (tmpState 
== COMPOUND_TEXT_SINGLE_0
) { 
 429                  tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)sourceChar
; 
 431                  pValueLength 
= ucnv_MBCSFromUChar32(myConverterData
->myConverterArray
[currentState
], sourceChar
, &pValue
, useFallback
); 
 432                  if (pValueLength 
> 0) { 
 433                      for (n 
= (pValueLength 
- 1); n 
>= 0; n
--) { 
 434                          tmpTargetBuffer
[tmpTargetBufferLength
++] = (uint8_t)(pValue 
>> (n 
* 8)); 
 439              for (i 
= 0; i 
< tmpTargetBufferLength
; i
++) { 
 440                  if (target 
< targetLimit
) { 
 441                      *target
++ = tmpTargetBuffer
[i
]; 
 443                      *err 
= U_BUFFER_OVERFLOW_ERROR
; 
 448              if (*err 
== U_BUFFER_OVERFLOW_ERROR
) { 
 449                  for (; i 
< tmpTargetBufferLength
; i
++) { 
 450                      args
->converter
->charErrorBuffer
[args
->converter
->charErrorBufferLength
++] = tmpTargetBuffer
[i
]; 
 454             *err 
= U_BUFFER_OVERFLOW_ERROR
; 
 459     /*save the state and return */ 
 460     myConverterData
->state 
= currentState
; 
 461     args
->source 
= source
; 
 462     args
->target 
= (char*)target
; 
 466 static void U_CALLCONV
 
 467 UConverter_toUnicode_CompoundText_OFFSETS(UConverterToUnicodeArgs 
*args
, 
 469     const char *mySource 
= (char *) args
->source
; 
 470     UChar 
*myTarget 
= args
->target
; 
 471     const char *mySourceLimit 
= args
->sourceLimit
; 
 472     const char *tmpSourceLimit 
= mySourceLimit
; 
 473     uint32_t mySourceChar 
= 0x0000; 
 474     COMPOUND_TEXT_CONVERTERS currentState
, tmpState
; 
 475     int32_t sourceOffset 
= 0; 
 476     UConverterDataCompoundText 
*myConverterData 
= (UConverterDataCompoundText 
*) args
->converter
->extraInfo
; 
 477     UConverterSharedData
* savedSharedData 
= NULL
; 
 479     UConverterToUnicodeArgs subArgs
; 
 482     /* set up the subconverter arguments */ 
 483     if(args
->size
<sizeof(UConverterToUnicodeArgs
)) { 
 484         minArgsSize 
= args
->size
; 
 486         minArgsSize 
= (int32_t)sizeof(UConverterToUnicodeArgs
); 
 489     uprv_memcpy(&subArgs
, args
, minArgsSize
); 
 490     subArgs
.size 
= (uint16_t)minArgsSize
; 
 492     currentState 
= tmpState 
=  myConverterData
->state
; 
 494     while(mySource 
< mySourceLimit
){ 
 495         if(myTarget 
< args
->targetLimit
){ 
 496             if (args
->converter
->toULength 
> 0) { 
 497                 mySourceChar 
= args
->converter
->toUBytes
[0]; 
 499                 mySourceChar 
= (uint8_t)*mySource
; 
 502             if (mySourceChar 
== ESC_START
) { 
 503                 tmpState 
= findStateFromEscSeq(mySource
, mySourceLimit
, args
->converter
->toUBytes
, args
->converter
->toULength
, err
); 
 505                 if (*err 
== U_TRUNCATED_CHAR_FOUND
) { 
 506                     for (; mySource 
< mySourceLimit
;) { 
 507                         args
->converter
->toUBytes
[args
->converter
->toULength
++] = *mySource
++; 
 511                 } else if (tmpState 
== INVALID
) { 
 512                     if (args
->converter
->toULength 
== 0) { 
 513                         mySource
++; /* skip over the 0x1b byte */ 
 515                     *err 
= U_ILLEGAL_CHAR_FOUND
; 
 519                 if (tmpState 
!= currentState
) { 
 520                     currentState 
= tmpState
; 
 523                 sourceOffset 
= static_cast<int32_t>(uprv_strlen((char*)escSeqCompoundText
[currentState
]) - args
->converter
->toULength
); 
 525                 mySource 
+= sourceOffset
; 
 527                 args
->converter
->toULength 
= 0; 
 530             if (currentState 
== COMPOUND_TEXT_SINGLE_0
) { 
 531                 while (mySource 
< mySourceLimit
) { 
 532                     if (*mySource 
== ESC_START
) { 
 535                     if (myTarget 
< args
->targetLimit
) { 
 536                         *myTarget
++ = 0x00ff&(*mySource
++); 
 538                         *err 
= U_BUFFER_OVERFLOW_ERROR
; 
 542             } else if (mySource 
< mySourceLimit
){ 
 543                 sourceOffset 
= findNextEsc(mySource
, mySourceLimit
); 
 545                 tmpSourceLimit 
= mySource 
+ sourceOffset
; 
 547                 subArgs
.source 
= mySource
; 
 548                 subArgs
.sourceLimit 
= tmpSourceLimit
; 
 549                 subArgs
.target 
= myTarget
; 
 550                 savedSharedData 
= subArgs
.converter
->sharedData
; 
 551                 subArgs
.converter
->sharedData 
= myConverterData
->myConverterArray
[currentState
]; 
 553                 ucnv_MBCSToUnicodeWithOffsets(&subArgs
, err
); 
 555                 subArgs
.converter
->sharedData 
= savedSharedData
; 
 557                 mySource 
= subArgs
.source
; 
 558                 myTarget 
= subArgs
.target
; 
 560                 if (U_FAILURE(*err
)) { 
 561                     if(*err 
== U_BUFFER_OVERFLOW_ERROR
) { 
 562                         if(subArgs
.converter
->UCharErrorBufferLength 
> 0) { 
 563                             uprv_memcpy(args
->converter
->UCharErrorBuffer
, subArgs
.converter
->UCharErrorBuffer
, 
 564                                         subArgs
.converter
->UCharErrorBufferLength
); 
 566                         args
->converter
->UCharErrorBufferLength
=subArgs
.converter
->UCharErrorBufferLength
; 
 567                         subArgs
.converter
->UCharErrorBufferLength 
= 0; 
 573             *err 
= U_BUFFER_OVERFLOW_ERROR
; 
 577     myConverterData
->state 
= currentState
; 
 578     args
->target 
= myTarget
; 
 579     args
->source 
= mySource
; 
 582 static void U_CALLCONV
 
 583 _CompoundText_GetUnicodeSet(const UConverter 
*cnv
, 
 585                     UConverterUnicodeSet which
, 
 586                     UErrorCode 
*pErrorCode
) { 
 587     UConverterDataCompoundText 
*myConverterData 
= (UConverterDataCompoundText 
*)cnv
->extraInfo
; 
 590     for (i 
= 1; i 
< NUM_OF_CONVERTERS
; i
++) { 
 591         ucnv_MBCSGetUnicodeSetForUnicode(myConverterData
->myConverterArray
[i
], sa
, which
, pErrorCode
); 
 593     sa
->add(sa
->set
, 0x0000); 
 594     sa
->add(sa
->set
, 0x0009); 
 595     sa
->add(sa
->set
, 0x000A); 
 596     sa
->addRange(sa
->set
, 0x0020, 0x007F); 
 597     sa
->addRange(sa
->set
, 0x00A0, 0x00FF); 
 601 static const UConverterImpl _CompoundTextImpl 
= { 
 612     UConverter_toUnicode_CompoundText_OFFSETS
, 
 613     UConverter_toUnicode_CompoundText_OFFSETS
, 
 614     UConverter_fromUnicode_CompoundText_OFFSETS
, 
 615     UConverter_fromUnicode_CompoundText_OFFSETS
, 
 619     _CompoundTextgetName
, 
 622     _CompoundText_GetUnicodeSet
, 
 627 static const UConverterStaticData _CompoundTextStaticData 
= { 
 628     sizeof(UConverterStaticData
), 
 641     { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 
 643 const UConverterSharedData _CompoundTextData 
= 
 644         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_CompoundTextStaticData
, &_CompoundTextImpl
); 
 646 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */