icuSources/common/ucnvhz.c

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 2000-2015, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *   file name:  ucnvhz.c
   7 *   encoding:   US-ASCII
   8 *   tab size:   8 (not used)
   9 *   indentation:4
  10 *
  11 *   created on: 2000oct16
  12 *   created by: Ram Viswanadha
  13 *   10/31/2000  Ram     Implemented offsets logic function
  14 *
  15 */
  16
  17 #include "unicode/utypes.h"
  18
  19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
  20
  21 #include "cmemory.h"
  22 #include "unicode/ucnv.h"
  23 #include "unicode/ucnv_cb.h"
  24 #include "unicode/uset.h"
  25 #include "unicode/utf16.h"
  26 #include "ucnv_bld.h"
  27 #include "ucnv_cnv.h"
  28 #include "ucnv_imp.h"
  29
  30 #define UCNV_TILDE 0x7E          /* ~ */
  31 #define UCNV_OPEN_BRACE 0x7B     /* { */
  32 #define UCNV_CLOSE_BRACE 0x7D   /* } */
  33 #define SB_ESCAPE    "\x7E\x7D"
  34 #define DB_ESCAPE    "\x7E\x7B"
  35 #define TILDE_ESCAPE "\x7E\x7E"
  36 #define ESC_LEN       2
  37
  38
  39 #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){                             \
  40     while(len-->0){                                                                                                         \
  41         if(targetIndex < targetLength){                                                                                     \
  42             args->target[targetIndex] = (unsigned char) *strToAppend;                                                       \
  43             if(args->offsets!=NULL){                                                                                        \
  44                 *(offsets++) = sourceIndex-1;                                                                               \
  45             }                                                                                                               \
  46             targetIndex++;                                                                                                  \
  47         }                                                                                                                   \
  48         else{                                                                                                               \
  49             args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
  50             *err =U_BUFFER_OVERFLOW_ERROR;                                                                                  \
  51         }                                                                                                                   \
  52         strToAppend++;                                                                                                      \
  53     }                                                                                                                       \
  54 }
  55
  56
  57 typedef struct{
  58     UConverter* gbConverter;
  59     int32_t targetIndex;
  60     int32_t sourceIndex;
  61     UBool isEscapeAppended;
  62     UBool isStateDBCS;
  63     UBool isTargetUCharDBCS;
  64     UBool isEmptySegment;
  65 }UConverterDataHZ;
  66
  67
  68
  69 static void
  70 _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
  71     UConverter *gbConverter;
  72     if(pArgs->onlyTestIsLoadable) {
  73         ucnv_canCreateConverter("GBK", errorCode);  /* errorCode carries result */
  74         return;
  75     }
  76     gbConverter = ucnv_open("GBK", errorCode);
  77     if(U_FAILURE(*errorCode)) {
  78         return;
  79     }
  80     cnv->toUnicodeStatus = 0;
  81     cnv->fromUnicodeStatus= 0;
  82     cnv->mode=0;
  83     cnv->fromUChar32=0x0000;
  84     cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ));
  85     if(cnv->extraInfo != NULL){
  86         ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter;
  87     }
  88     else {
  89         ucnv_close(gbConverter);
  90         *errorCode = U_MEMORY_ALLOCATION_ERROR;
  91         return;
  92     }
  93 }
  94
  95 static void
  96 _HZClose(UConverter *cnv){
  97     if(cnv->extraInfo != NULL) {
  98         ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
  99         if(!cnv->isExtraLocal) {
 100             uprv_free(cnv->extraInfo);
 101         }
 102         cnv->extraInfo = NULL;
 103     }
 104 }
 105
 106 static void
 107 _HZReset(UConverter *cnv, UConverterResetChoice choice){
 108     if(choice<=UCNV_RESET_TO_UNICODE) {
 109         cnv->toUnicodeStatus = 0;
 110         cnv->mode=0;
 111         if(cnv->extraInfo != NULL){
 112             ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
 113             ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
 114         }
 115     }
 116     if(choice!=UCNV_RESET_TO_UNICODE) {
 117         cnv->fromUnicodeStatus= 0;
 118         cnv->fromUChar32=0x0000;
 119         if(cnv->extraInfo != NULL){
 120             ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
 121             ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
 122             ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
 123             ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
 124         }
 125     }
 126 }
 127
 128 /**************************************HZ Encoding*************************************************
 129 * Rules for HZ encoding
 130 *
 131 *   In ASCII mode, a byte is interpreted as an ASCII character, unless a
 132 *   '~' is encountered. The character '~' is an escape character. By
 133 *   convention, it must be immediately followed ONLY by '~', '{' or '\n'
 134 *   (<LF>), with the following special meaning.
 135
 136 *   1. The escape sequence '~~' is interpreted as a '~'.
 137 *   2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
 138 *   3. The escape sequence '~\n' is a line-continuation marker to be
 139 *     consumed with no output produced.
 140 *   In GB mode, characters are interpreted two bytes at a time as (pure)
 141 *   GB codes until the escape-from-GB code '~}' is read. This code
 142 *   switches the mode from GB back to ASCII.  (Note that the escape-
 143 *   from-GB code '~}' ($7E7D) is outside the defined GB range.)
 144 *
 145 *   Source: RFC 1842
 146 *
 147 *   Note that the formal syntax in RFC 1842 is invalid. I assume that the
 148 *   intended definition of single-byte-segment is as follows (pedberg):
 149 *   single-byte-segment = single-byte-seq 1*single-byte-char
 150 */
 151
 152
 153 static void
 154 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
 155                                                             UErrorCode* err){
 156     char tempBuf[2];
 157     const char *mySource = ( char *) args->source;
 158     UChar *myTarget = args->target;
 159     const char *mySourceLimit = args->sourceLimit;
 160     UChar32 targetUniChar = 0x0000;
 161     int32_t mySourceChar = 0x0000;
 162     UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
 163     tempBuf[0]=0;
 164     tempBuf[1]=0;
 165
 166     /* Calling code already handles this situation. */
 167     /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
 168         *err = U_ILLEGAL_ARGUMENT_ERROR;
 169         return;
 170     }*/
 171
 172     while(mySource< mySourceLimit){
 173
 174         if(myTarget < args->targetLimit){
 175
 176             mySourceChar= (unsigned char) *mySource++;
 177
 178             if(args->converter->mode == UCNV_TILDE) {
 179                 /* second byte after ~ */
 180                 args->converter->mode=0;
 181                 switch(mySourceChar) {
 182                 case 0x0A:
 183                     /* no output for ~\n (line-continuation marker) */
 184                     continue;
 185                 case UCNV_TILDE:
 186                     if(args->offsets) {
 187                         args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
 188                     }
 189                     *(myTarget++)=(UChar)mySourceChar;
 190                     myData->isEmptySegment = FALSE;
 191                     continue;
 192                 case UCNV_OPEN_BRACE:
 193                 case UCNV_CLOSE_BRACE:
 194                     myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
 195                     if (myData->isEmptySegment) {
 196                         myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
 197                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 198                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
 199                         args->converter->toUBytes[0] = UCNV_TILDE;
 200                         args->converter->toUBytes[1] = mySourceChar;
 201                         args->converter->toULength = 2;
 202                         args->target = myTarget;
 203                         args->source = mySource;
 204                         return;
 205                     }
 206                     myData->isEmptySegment = TRUE;
 207                     continue;
 208                 default:
 209                      /* if the first byte is equal to TILDE and the trail byte
 210                      * is not a valid byte then it is an error condition
 211                      */
 212                     /*
 213                      * Ticket 5691: consistent illegal sequences:
 214                      * - We include at least the first byte in the illegal sequence.
 215                      * - If any of the non-initial bytes could be the start of a character,
 216                      *   we stop the illegal sequence before the first one of those.
 217                      */
 218                     myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
 219                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 220                     args->converter->toUBytes[0] = UCNV_TILDE;
 221                     if( myData->isStateDBCS ?
 222                             (0x21 <= mySourceChar && mySourceChar <= 0x7e) :
 223                             mySourceChar <= 0x7f
 224                     ) {
 225                         /* The current byte could be the start of a character: Back it out. */
 226                         args->converter->toULength = 1;
 227                         --mySource;
 228                     } else {
 229                         /* Include the current byte in the illegal sequence. */
 230                         args->converter->toUBytes[1] = mySourceChar;
 231                         args->converter->toULength = 2;
 232                     }
 233                     args->target = myTarget;
 234                     args->source = mySource;
 235                     return;
 236                 }
 237             } else if(myData->isStateDBCS) {
 238                 if(args->converter->toUnicodeStatus == 0x00){
 239                     /* lead byte */
 240                     if(mySourceChar == UCNV_TILDE) {
 241                         args->converter->mode = UCNV_TILDE;
 242                     } else {
 243                         /* add another bit to distinguish a 0 byte from not having seen a lead byte */
 244                         args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
 245                         myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
 246                     }
 247                     continue;
 248                 }
 249                 else{
 250                     /* trail byte */
 251                     int leadIsOk, trailIsOk;
 252                     uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
 253                     targetUniChar = 0xffff;
 254                     /*
 255                      * Ticket 5691: consistent illegal sequences:
 256                      * - We include at least the first byte in the illegal sequence.
 257                      * - If any of the non-initial bytes could be the start of a character,
 258                      *   we stop the illegal sequence before the first one of those.
 259                      *
 260                      * In HZ DBCS, if the second byte is in the 21..7e range,
 261                      * we report only the first byte as the illegal sequence.
 262                      * Otherwise we convert or report the pair of bytes.
 263                      */
 264                     leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
 265                     trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
 266                     if (leadIsOk && trailIsOk) {
 267                         tempBuf[0] = (char) (leadByte+0x80) ;
 268                         tempBuf[1] = (char) (mySourceChar+0x80);
 269                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
 270                             tempBuf, 2, args->converter->useFallback);
 271                         mySourceChar= (leadByte << 8) | mySourceChar;
 272                     } else if (trailIsOk) {
 273                         /* report a single illegal byte and continue with the following DBCS starter byte */
 274                         --mySource;
 275                         mySourceChar = (int32_t)leadByte;
 276                     } else {
 277                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
 278                         /* add another bit so that the code below writes 2 bytes in case of error */
 279                         mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
 280                     }
 281                     args->converter->toUnicodeStatus =0x00;
 282                 }
 283             }
 284             else{
 285                 if(mySourceChar == UCNV_TILDE) {
 286                     args->converter->mode = UCNV_TILDE;
 287                     continue;
 288                 } else if(mySourceChar <= 0x7f) {
 289                     targetUniChar = (UChar)mySourceChar;  /* ASCII */
 290                     myData->isEmptySegment = FALSE; /* the segment has something valid */
 291                 } else {
 292                     targetUniChar = 0xffff;
 293                     myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
 294                 }
 295             }
 296             if(targetUniChar < 0xfffe){
 297                 if(args->offsets) {
 298                     args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
 299                 }
 300
 301                 *(myTarget++)=(UChar)targetUniChar;
 302             }
 303             else /* targetUniChar>=0xfffe */ {
 304                 if(targetUniChar == 0xfffe){
 305                     *err = U_INVALID_CHAR_FOUND;
 306                 }
 307                 else{
 308                     *err = U_ILLEGAL_CHAR_FOUND;
 309                 }
 310                 if(mySourceChar > 0xff){
 311                     args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
 312                     args->converter->toUBytes[1] = (uint8_t)mySourceChar;
 313                     args->converter->toULength=2;
 314                 }
 315                 else{
 316                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
 317                     args->converter->toULength=1;
 318                 }
 319                 break;
 320             }
 321         }
 322         else{
 323             *err =U_BUFFER_OVERFLOW_ERROR;
 324             break;
 325         }
 326     }
 327
 328     args->target = myTarget;
 329     args->source = mySource;
 330 }
 331
 332
 333 static void
 334 UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
 335                                                       UErrorCode * err){
 336     const UChar *mySource = args->source;
 337     char *myTarget = args->target;
 338     int32_t* offsets = args->offsets;
 339     int32_t mySourceIndex = 0;
 340     int32_t myTargetIndex = 0;
 341     int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
 342     int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
 343     uint32_t targetUniChar = 0x0000;
 344     UChar32 mySourceChar = 0x0000;
 345     UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
 346     UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
 347     UBool oldIsTargetUCharDBCS;
 348     int len =0;
 349     const char* escSeq=NULL;
 350
 351     /* Calling code already handles this situation. */
 352     /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
 353         *err = U_ILLEGAL_ARGUMENT_ERROR;
 354         return;
 355     }*/
 356     if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
 357         goto getTrail;
 358     }
 359     /*writing the char to the output stream */
 360     while (mySourceIndex < mySourceLength){
 361         targetUniChar = missingCharMarker;
 362         if (myTargetIndex < targetLength){
 363
 364             mySourceChar = (UChar) mySource[mySourceIndex++];
 365
 366
 367             oldIsTargetUCharDBCS = isTargetUCharDBCS;
 368             if(mySourceChar ==UCNV_TILDE){
 369                 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
 370                 len = ESC_LEN;
 371                 escSeq = TILDE_ESCAPE;
 372                 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
 373                 continue;
 374             } else if(mySourceChar <= 0x7f) {
 375                 targetUniChar = mySourceChar;
 376             } else {
 377                 int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
 378                     mySourceChar,&targetUniChar,args->converter->useFallback);
 379                 /* we can only use lead bytes 21..7D and trail bytes 21..7E */
 380                 if( length == 2 &&
 381                     (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&
 382                     (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)
 383                 ) {
 384                     targetUniChar -= 0x8080;
 385                 } else {
 386                     targetUniChar = missingCharMarker;
 387                 }
 388             }
 389             if (targetUniChar != missingCharMarker){
 390                myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
 391                  if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
 392                     /*Shifting from a double byte to single byte mode*/
 393                     if(!isTargetUCharDBCS){
 394                         len =ESC_LEN;
 395                         escSeq = SB_ESCAPE;
 396                         CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
 397                         myConverterData->isEscapeAppended = TRUE;
 398                     }
 399                     else{ /* Shifting from a single byte to double byte mode*/
 400                         len =ESC_LEN;
 401                         escSeq = DB_ESCAPE;
 402                         CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
 403                         myConverterData->isEscapeAppended = TRUE;
 404
 405                     }
 406                 }
 407
 408                 if(isTargetUCharDBCS){
 409                     if( myTargetIndex <targetLength){
 410                         myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);
 411                         if(offsets){
 412                             *(offsets++) = mySourceIndex-1;
 413                         }
 414                         if(myTargetIndex < targetLength){
 415                             myTarget[myTargetIndex++] =(char) targetUniChar;
 416                             if(offsets){
 417                                 *(offsets++) = mySourceIndex-1;
 418                             }
 419                         }else{
 420                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
 421                             *err = U_BUFFER_OVERFLOW_ERROR;
 422                         }
 423                     }else{
 424                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);
 425                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
 426                         *err = U_BUFFER_OVERFLOW_ERROR;
 427                     }
 428
 429                 }else{
 430                     if( myTargetIndex <targetLength){
 431                         myTarget[myTargetIndex++] = (char) (targetUniChar );
 432                         if(offsets){
 433                             *(offsets++) = mySourceIndex-1;
 434                         }
 435
 436                     }else{
 437                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
 438                         *err = U_BUFFER_OVERFLOW_ERROR;
 439                     }
 440                 }
 441
 442             }
 443             else{
 444                 /* oops.. the code point is unassigned */
 445                 /*Handle surrogates */
 446                 /*check if the char is a First surrogate*/
 447                 if(U16_IS_SURROGATE(mySourceChar)) {
 448                     if(U16_IS_SURROGATE_LEAD(mySourceChar)) {
 449                         args->converter->fromUChar32=mySourceChar;
 450 getTrail:
 451                         /*look ahead to find the trail surrogate*/
 452                         if(mySourceIndex <  mySourceLength) {
 453                             /* test the following code unit */
 454                             UChar trail=(UChar) args->source[mySourceIndex];
 455                             if(U16_IS_TRAIL(trail)) {
 456                                 ++mySourceIndex;
 457                                 mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail);
 458                                 args->converter->fromUChar32=0x00;
 459                                 /* there are no surrogates in GB2312*/
 460                                 *err = U_INVALID_CHAR_FOUND;
 461                                 /* exit this condition tree */
 462                             } else {
 463                                 /* this is an unmatched lead code unit (1st surrogate) */
 464                                 /* callback(illegal) */
 465                                 *err=U_ILLEGAL_CHAR_FOUND;
 466                             }
 467                         } else {
 468                             /* no more input */
 469                             *err = U_ZERO_ERROR;
 470                         }
 471                     } else {
 472                         /* this is an unmatched trail code unit (2nd surrogate) */
 473                         /* callback(illegal) */
 474                         *err=U_ILLEGAL_CHAR_FOUND;
 475                     }
 476                 } else {
 477                     /* callback(unassigned) for a BMP code point */
 478                     *err = U_INVALID_CHAR_FOUND;
 479                 }
 480
 481                 args->converter->fromUChar32=mySourceChar;
 482                 break;
 483             }
 484         }
 485         else{
 486             *err = U_BUFFER_OVERFLOW_ERROR;
 487             break;
 488         }
 489         targetUniChar=missingCharMarker;
 490     }
 491
 492     args->target += myTargetIndex;
 493     args->source += mySourceIndex;
 494     myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
 495 }
 496
 497 static void
 498 _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
 499     UConverter *cnv = args->converter;
 500     UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
 501     char *p;
 502     char buffer[4];
 503     p = buffer;
 504
 505     if( convData->isTargetUCharDBCS){
 506         *p++= UCNV_TILDE;
 507         *p++= UCNV_CLOSE_BRACE;
 508         convData->isTargetUCharDBCS=FALSE;
 509     }
 510     *p++= (char)cnv->subChars[0];
 511
 512     ucnv_cbFromUWriteBytes(args,
 513                            buffer, (int32_t)(p - buffer),
 514                            offsetIndex, err);
 515 }
 516
 517 /*
 518  * Structure for cloning an HZ converter into a single memory block.
 519  * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct,
 520  * and then ucnv_safeClone() of the sub-converter may additionally align
 521  * subCnv inside the cloneHZStruct, for which we need the deadSpace after
 522  * subCnv. This is because UAlignedMemory may be larger than the actually
 523  * necessary alignment size for the platform.
 524  * The other cloneHZStruct fields will not be moved around,
 525  * and are aligned properly with cloneHZStruct's alignment.
 526  */
 527 struct cloneHZStruct
 528 {
 529     UConverter cnv;
 530     UConverter subCnv;
 531     UAlignedMemory deadSpace;
 532     UConverterDataHZ mydata;
 533 };
 534
 535
 536 static UConverter *
 537 _HZ_SafeClone(const UConverter *cnv,
 538               void *stackBuffer,
 539               int32_t *pBufferSize,
 540               UErrorCode *status)
 541 {
 542     struct cloneHZStruct * localClone;
 543     int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
 544
 545     if (U_FAILURE(*status)){
 546         return 0;
 547     }
 548
 549     if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
 550         *pBufferSize = bufferSizeNeeded;
 551         return 0;
 552     }
 553
 554     localClone = (struct cloneHZStruct *)stackBuffer;
 555     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
 556
 557     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
 558     localClone->cnv.extraInfo = &localClone->mydata;
 559     localClone->cnv.isExtraLocal = TRUE;
 560
 561     /* deep-clone the sub-converter */
 562     size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
 563     ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
 564         ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
 565
 566     return &localClone->cnv;
 567 }
 568
 569 static void
 570 _HZ_GetUnicodeSet(const UConverter *cnv,
 571                   const USetAdder *sa,
 572                   UConverterUnicodeSet which,
 573                   UErrorCode *pErrorCode) {
 574     /* HZ converts all of ASCII */
 575     sa->addRange(sa->set, 0, 0x7f);
 576
 577     /* add all of the code points that the sub-converter handles */
 578     ucnv_MBCSGetFilteredUnicodeSetForUnicode(
 579         ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,
 580         sa, which, UCNV_SET_FILTER_HZ,
 581         pErrorCode);
 582 }
 583
 584 static const UConverterImpl _HZImpl={
 585
 586     UCNV_HZ,
 587
 588     NULL,
 589     NULL,
 590
 591     _HZOpen,
 592     _HZClose,
 593     _HZReset,
 594
 595     UConverter_toUnicode_HZ_OFFSETS_LOGIC,
 596     UConverter_toUnicode_HZ_OFFSETS_LOGIC,
 597     UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
 598     UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
 599     NULL,
 600
 601     NULL,
 602     NULL,
 603     _HZ_WriteSub,
 604     _HZ_SafeClone,
 605     _HZ_GetUnicodeSet
 606 };
 607
 608 static const UConverterStaticData _HZStaticData={
 609     sizeof(UConverterStaticData),
 610         "HZ",
 611          0,
 612          UCNV_IBM,
 613          UCNV_HZ,
 614          1,
 615          4,
 616         { 0x1a, 0, 0, 0 },
 617         1,
 618         FALSE,
 619         FALSE,
 620         0,
 621         0,
 622         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
 623
 624 };
 625
 626
 627 const UConverterSharedData _HZData={
 628     sizeof(UConverterSharedData),
 629         ~((uint32_t) 0),
 630         NULL,
 631         NULL,
 632         &_HZStaticData,
 633         FALSE,
 634         &_HZImpl,
 635         0
 636 };
 637
 638 #endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */