icuSources/common/ucnvhz.c

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 2000-2009, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *   file name:  ucnvhz.c
   7 *   encoding:   US-ASCII
   8 *   tab size:   8 (not used)
   9 *   indentation:4
  10 *
  11 *   created on: 2000oct16
  12 *   created by: Ram Viswanadha
  13 *   10/31/2000  Ram     Implemented offsets logic function
  14 *
  15 */
  16
  17 #include "unicode/utypes.h"
  18
  19 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
  20
  21 #include "cmemory.h"
  22 #include "unicode/ucnv.h"
  23 #include "unicode/ucnv_cb.h"
  24 #include "unicode/uset.h"
  25 #include "ucnv_bld.h"
  26 #include "ucnv_cnv.h"
  27 #include "ucnv_imp.h"
  28
  29 #define UCNV_TILDE 0x7E          /* ~ */
  30 #define UCNV_OPEN_BRACE 0x7B     /* { */
  31 #define UCNV_CLOSE_BRACE 0x7D   /* } */
  32 #define SB_ESCAPE    "\x7E\x7D"
  33 #define DB_ESCAPE    "\x7E\x7B"
  34 #define TILDE_ESCAPE "\x7E\x7E"
  35 #define ESC_LEN       2
  36
  37
  38 #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){                             \
  39     while(len-->0){                                                                                                         \
  40         if(targetIndex < targetLength){                                                                                     \
  41             args->target[targetIndex] = (unsigned char) *strToAppend;                                                       \
  42             if(args->offsets!=NULL){                                                                                        \
  43                 *(offsets++) = sourceIndex-1;                                                                               \
  44             }                                                                                                               \
  45             targetIndex++;                                                                                                  \
  46         }                                                                                                                   \
  47         else{                                                                                                               \
  48             args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
  49             *err =U_BUFFER_OVERFLOW_ERROR;                                                                                  \
  50         }                                                                                                                   \
  51         strToAppend++;                                                                                                      \
  52     }                                                                                                                       \
  53 }
  54
  55
  56 typedef struct{
  57     UConverter* gbConverter;
  58     int32_t targetIndex;
  59     int32_t sourceIndex;
  60     UBool isEscapeAppended;
  61     UBool isStateDBCS;
  62     UBool isTargetUCharDBCS;
  63     UBool isEmptySegment;
  64 }UConverterDataHZ;
  65
  66
  67
  68 static void
  69 _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
  70     UConverter *gbConverter;
  71     if(pArgs->onlyTestIsLoadable) {
  72         ucnv_canCreateConverter("GBK", errorCode);  /* errorCode carries result */
  73         return;
  74     }
  75     gbConverter = ucnv_open("GBK", errorCode);
  76     if(U_FAILURE(*errorCode)) {
  77         return;
  78     }
  79     cnv->toUnicodeStatus = 0;
  80     cnv->fromUnicodeStatus= 0;
  81     cnv->mode=0;
  82     cnv->fromUChar32=0x0000;
  83     cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ));
  84     if(cnv->extraInfo != NULL){
  85         uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ));
  86         ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter;
  87     }
  88     else {
  89         ucnv_close(gbConverter);
  90         *errorCode = U_MEMORY_ALLOCATION_ERROR;
  91         return;
  92     }
  93 }
  94
  95 static void
  96 _HZClose(UConverter *cnv){
  97     if(cnv->extraInfo != NULL) {
  98         ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
  99         if(!cnv->isExtraLocal) {
 100             uprv_free(cnv->extraInfo);
 101         }
 102         cnv->extraInfo = NULL;
 103     }
 104 }
 105
 106 static void
 107 _HZReset(UConverter *cnv, UConverterResetChoice choice){
 108     if(choice<=UCNV_RESET_TO_UNICODE) {
 109         cnv->toUnicodeStatus = 0;
 110         cnv->mode=0;
 111         if(cnv->extraInfo != NULL){
 112             ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
 113             ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
 114         }
 115     }
 116     if(choice!=UCNV_RESET_TO_UNICODE) {
 117         cnv->fromUnicodeStatus= 0;
 118         cnv->fromUChar32=0x0000;
 119         if(cnv->extraInfo != NULL){
 120             ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
 121             ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
 122             ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
 123             ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
 124         }
 125     }
 126 }
 127
 128 /**************************************HZ Encoding*************************************************
 129 * Rules for HZ encoding
 130 *
 131 *   In ASCII mode, a byte is interpreted as an ASCII character, unless a
 132 *   '~' is encountered. The character '~' is an escape character. By
 133 *   convention, it must be immediately followed ONLY by '~', '{' or '\n'
 134 *   (<LF>), with the following special meaning.
 135
 136 *   1. The escape sequence '~~' is interpreted as a '~'.
 137 *   2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
 138 *   3. The escape sequence '~\n' is a line-continuation marker to be
 139 *     consumed with no output produced.
 140 *   In GB mode, characters are interpreted two bytes at a time as (pure)
 141 *   GB codes until the escape-from-GB code '~}' is read. This code
 142 *   switches the mode from GB back to ASCII.  (Note that the escape-
 143 *   from-GB code '~}' ($7E7D) is outside the defined GB range.)
 144 *
 145 *   Source: RFC 1842
 146 *
 147 *   Note that the formal syntax in RFC 1842 is invalid. I assume that the
 148 *   intended definition of single-byte-segment is as follows (pedberg):
 149 *   single-byte-segment = single-byte-seq 1*single-byte-char
 150 */
 151
 152
 153 static void
 154 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
 155                                                             UErrorCode* err){
 156     char tempBuf[2];
 157     const char *mySource = ( char *) args->source;
 158     UChar *myTarget = args->target;
 159     const char *mySourceLimit = args->sourceLimit;
 160     UChar32 targetUniChar = 0x0000;
 161     int32_t mySourceChar = 0x0000;
 162     UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
 163     tempBuf[0]=0;
 164     tempBuf[1]=0;
 165
 166     /* Calling code already handles this situation. */
 167     /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
 168         *err = U_ILLEGAL_ARGUMENT_ERROR;
 169         return;
 170     }*/
 171
 172     while(mySource< mySourceLimit){
 173
 174         if(myTarget < args->targetLimit){
 175
 176             mySourceChar= (unsigned char) *mySource++;
 177
 178             if(args->converter->mode == UCNV_TILDE) {
 179                 /* second byte after ~ */
 180                 args->converter->mode=0;
 181                 switch(mySourceChar) {
 182                 case 0x0A:
 183                     /* no output for ~\n (line-continuation marker) */
 184                     continue;
 185                 case UCNV_TILDE:
 186                     if(args->offsets) {
 187                         args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
 188                     }
 189                     *(myTarget++)=(UChar)mySourceChar;
 190                     myData->isEmptySegment = FALSE;
 191                     continue;
 192                 case UCNV_OPEN_BRACE:
 193                 case UCNV_CLOSE_BRACE:
 194                     myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
 195                     if (myData->isEmptySegment) {
 196                         myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
 197                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 198                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
 199                         args->converter->toUBytes[0] = UCNV_TILDE;
 200                         args->converter->toUBytes[1] = mySourceChar;
 201                         args->converter->toULength = 2;
 202                         args->target = myTarget;
 203                         args->source = mySource;
 204                         return;
 205                     }
 206                     myData->isEmptySegment = TRUE;
 207                     continue;
 208                 default:
 209                      /* if the first byte is equal to TILDE and the trail byte
 210                      * is not a valid byte then it is an error condition
 211                      */
 212                     /*
 213                      * Ticket 5691: consistent illegal sequences:
 214                      * - We include at least the first byte in the illegal sequence.
 215                      * - If any of the non-initial bytes could be the start of a character,
 216                      *   we stop the illegal sequence before the first one of those.
 217                      */
 218                     myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
 219                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 220                     args->converter->toUBytes[0] = UCNV_TILDE;
 221                     if( myData->isStateDBCS ?
 222                             (0x21 <= mySourceChar && mySourceChar <= 0x7e) :
 223                             mySourceChar <= 0x7f
 224                     ) {
 225                         /* The current byte could be the start of a character: Back it out. */
 226                         args->converter->toULength = 1;
 227                         --mySource;
 228                     } else {
 229                         /* Include the current byte in the illegal sequence. */
 230                         args->converter->toUBytes[1] = mySourceChar;
 231                         args->converter->toULength = 2;
 232                     }
 233                     args->target = myTarget;
 234                     args->source = mySource;
 235                     return;
 236                 }
 237             } else if(myData->isStateDBCS) {
 238                 if(args->converter->toUnicodeStatus == 0x00){
 239                     /* lead byte */
 240                     if(mySourceChar == UCNV_TILDE) {
 241                         args->converter->mode = UCNV_TILDE;
 242                     } else {
 243                         /* add another bit to distinguish a 0 byte from not having seen a lead byte */
 244                         args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
 245                         myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
 246                     }
 247                     continue;
 248                 }
 249                 else{
 250                     /* trail byte */
 251                     int leadIsOk, trailIsOk;
 252                     uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
 253                     targetUniChar = 0xffff;
 254                     /*
 255                      * Ticket 5691: consistent illegal sequences:
 256                      * - We include at least the first byte in the illegal sequence.
 257                      * - If any of the non-initial bytes could be the start of a character,
 258                      *   we stop the illegal sequence before the first one of those.
 259                      *
 260                      * In HZ DBCS, if the second byte is in the 21..7e range,
 261                      * we report only the first byte as the illegal sequence.
 262                      * Otherwise we convert or report the pair of bytes.
 263                      */
 264                     leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
 265                     trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
 266                     if (leadIsOk && trailIsOk) {
 267                         tempBuf[0] = (char) (leadByte+0x80) ;
 268                         tempBuf[1] = (char) (mySourceChar+0x80);
 269                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
 270                             tempBuf, 2, args->converter->useFallback);
 271                         mySourceChar= (leadByte << 8) | mySourceChar;
 272                     } else if (trailIsOk) {
 273                         /* report a single illegal byte and continue with the following DBCS starter byte */
 274                         --mySource;
 275                         mySourceChar = (int32_t)leadByte;
 276                     } else {
 277                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
 278                         /* add another bit so that the code below writes 2 bytes in case of error */
 279                         mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
 280                     }
 281                     args->converter->toUnicodeStatus =0x00;
 282                 }
 283             }
 284             else{
 285                 if(mySourceChar == UCNV_TILDE) {
 286                     args->converter->mode = UCNV_TILDE;
 287                     continue;
 288                 } else if(mySourceChar <= 0x7f) {
 289                     targetUniChar = (UChar)mySourceChar;  /* ASCII */
 290                     myData->isEmptySegment = FALSE; /* the segment has something valid */
 291                 } else {
 292                     targetUniChar = 0xffff;
 293                     myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
 294                 }
 295             }
 296             if(targetUniChar < 0xfffe){
 297                 if(args->offsets) {
 298                     args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
 299                 }
 300
 301                 *(myTarget++)=(UChar)targetUniChar;
 302             }
 303             else /* targetUniChar>=0xfffe */ {
 304                 if(targetUniChar == 0xfffe){
 305                     *err = U_INVALID_CHAR_FOUND;
 306                 }
 307                 else{
 308                     *err = U_ILLEGAL_CHAR_FOUND;
 309                 }
 310                 if(mySourceChar > 0xff){
 311                     args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
 312                     args->converter->toUBytes[1] = (uint8_t)mySourceChar;
 313                     args->converter->toULength=2;
 314                 }
 315                 else{
 316                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
 317                     args->converter->toULength=1;
 318                 }
 319                 break;
 320             }
 321         }
 322         else{
 323             *err =U_BUFFER_OVERFLOW_ERROR;
 324             break;
 325         }
 326     }
 327
 328     args->target = myTarget;
 329     args->source = mySource;
 330 }
 331
 332
 333 static void
 334 UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
 335                                                       UErrorCode * err){
 336     const UChar *mySource = args->source;
 337     char *myTarget = args->target;
 338     int32_t* offsets = args->offsets;
 339     int32_t mySourceIndex = 0;
 340     int32_t myTargetIndex = 0;
 341     int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
 342     int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
 343     int32_t length=0;
 344     uint32_t targetUniChar = 0x0000;
 345     UChar32 mySourceChar = 0x0000;
 346     UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
 347     UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
 348     UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
 349     int len =0;
 350     const char* escSeq=NULL;
 351
 352     /* Calling code already handles this situation. */
 353     /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
 354         *err = U_ILLEGAL_ARGUMENT_ERROR;
 355         return;
 356     }*/
 357     if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
 358         goto getTrail;
 359     }
 360     /*writing the char to the output stream */
 361     while (mySourceIndex < mySourceLength){
 362         targetUniChar = missingCharMarker;
 363         if (myTargetIndex < targetLength){
 364
 365             mySourceChar = (UChar) mySource[mySourceIndex++];
 366
 367
 368             oldIsTargetUCharDBCS = isTargetUCharDBCS;
 369             if(mySourceChar ==UCNV_TILDE){
 370                 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
 371                 len = ESC_LEN;
 372                 escSeq = TILDE_ESCAPE;
 373                 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
 374                 continue;
 375             } else if(mySourceChar <= 0x7f) {
 376                 length = 1;
 377                 targetUniChar = mySourceChar;
 378             } else {
 379                 length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
 380                     mySourceChar,&targetUniChar,args->converter->useFallback);
 381                 /* we can only use lead bytes 21..7D and trail bytes 21..7E */
 382                 if( length == 2 &&
 383                     (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&
 384                     (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)
 385                 ) {
 386                     targetUniChar -= 0x8080;
 387                 } else {
 388                     targetUniChar = missingCharMarker;
 389                 }
 390             }
 391             if (targetUniChar != missingCharMarker){
 392                myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
 393                  if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
 394                     /*Shifting from a double byte to single byte mode*/
 395                     if(!isTargetUCharDBCS){
 396                         len =ESC_LEN;
 397                         escSeq = SB_ESCAPE;
 398                         CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
 399                         myConverterData->isEscapeAppended = TRUE;
 400                     }
 401                     else{ /* Shifting from a single byte to double byte mode*/
 402                         len =ESC_LEN;
 403                         escSeq = DB_ESCAPE;
 404                         CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
 405                         myConverterData->isEscapeAppended = TRUE;
 406
 407                     }
 408                 }
 409
 410                 if(isTargetUCharDBCS){
 411                     if( myTargetIndex <targetLength){
 412                         myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);
 413                         if(offsets){
 414                             *(offsets++) = mySourceIndex-1;
 415                         }
 416                         if(myTargetIndex < targetLength){
 417                             myTarget[myTargetIndex++] =(char) targetUniChar;
 418                             if(offsets){
 419                                 *(offsets++) = mySourceIndex-1;
 420                             }
 421                         }else{
 422                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
 423                             *err = U_BUFFER_OVERFLOW_ERROR;
 424                         }
 425                     }else{
 426                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);
 427                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
 428                         *err = U_BUFFER_OVERFLOW_ERROR;
 429                     }
 430
 431                 }else{
 432                     if( myTargetIndex <targetLength){
 433                         myTarget[myTargetIndex++] = (char) (targetUniChar );
 434                         if(offsets){
 435                             *(offsets++) = mySourceIndex-1;
 436                         }
 437
 438                     }else{
 439                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
 440                         *err = U_BUFFER_OVERFLOW_ERROR;
 441                     }
 442                 }
 443
 444             }
 445             else{
 446                 /* oops.. the code point is unassigned */
 447                 /*Handle surrogates */
 448                 /*check if the char is a First surrogate*/
 449                 if(UTF_IS_SURROGATE(mySourceChar)) {
 450                     if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
 451                         args->converter->fromUChar32=mySourceChar;
 452 getTrail:
 453                         /*look ahead to find the trail surrogate*/
 454                         if(mySourceIndex <  mySourceLength) {
 455                             /* test the following code unit */
 456                             UChar trail=(UChar) args->source[mySourceIndex];
 457                             if(UTF_IS_SECOND_SURROGATE(trail)) {
 458                                 ++mySourceIndex;
 459                                 mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail);
 460                                 args->converter->fromUChar32=0x00;
 461                                 /* there are no surrogates in GB2312*/
 462                                 *err = U_INVALID_CHAR_FOUND;
 463                                 /* exit this condition tree */
 464                             } else {
 465                                 /* this is an unmatched lead code unit (1st surrogate) */
 466                                 /* callback(illegal) */
 467                                 *err=U_ILLEGAL_CHAR_FOUND;
 468                             }
 469                         } else {
 470                             /* no more input */
 471                             *err = U_ZERO_ERROR;
 472                         }
 473                     } else {
 474                         /* this is an unmatched trail code unit (2nd surrogate) */
 475                         /* callback(illegal) */
 476                         *err=U_ILLEGAL_CHAR_FOUND;
 477                     }
 478                 } else {
 479                     /* callback(unassigned) for a BMP code point */
 480                     *err = U_INVALID_CHAR_FOUND;
 481                 }
 482
 483                 args->converter->fromUChar32=mySourceChar;
 484                 break;
 485             }
 486         }
 487         else{
 488             *err = U_BUFFER_OVERFLOW_ERROR;
 489             break;
 490         }
 491         targetUniChar=missingCharMarker;
 492     }
 493
 494     args->target += myTargetIndex;
 495     args->source += mySourceIndex;
 496     myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
 497 }
 498
 499 static void
 500 _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
 501     UConverter *cnv = args->converter;
 502     UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
 503     char *p;
 504     char buffer[4];
 505     p = buffer;
 506
 507     if( convData->isTargetUCharDBCS){
 508         *p++= UCNV_TILDE;
 509         *p++= UCNV_CLOSE_BRACE;
 510         convData->isTargetUCharDBCS=FALSE;
 511     }
 512     *p++= (char)cnv->subChars[0];
 513
 514     ucnv_cbFromUWriteBytes(args,
 515                            buffer, (int32_t)(p - buffer),
 516                            offsetIndex, err);
 517 }
 518
 519 /*
 520  * Structure for cloning an HZ converter into a single memory block.
 521  * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct,
 522  * and then ucnv_safeClone() of the sub-converter may additionally align
 523  * subCnv inside the cloneHZStruct, for which we need the deadSpace after
 524  * subCnv. This is because UAlignedMemory may be larger than the actually
 525  * necessary alignment size for the platform.
 526  * The other cloneHZStruct fields will not be moved around,
 527  * and are aligned properly with cloneHZStruct's alignment.
 528  */
 529 struct cloneHZStruct
 530 {
 531     UConverter cnv;
 532     UConverter subCnv;
 533     UAlignedMemory deadSpace;
 534     UConverterDataHZ mydata;
 535 };
 536
 537
 538 static UConverter *
 539 _HZ_SafeClone(const UConverter *cnv,
 540               void *stackBuffer,
 541               int32_t *pBufferSize,
 542               UErrorCode *status)
 543 {
 544     struct cloneHZStruct * localClone;
 545     int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
 546
 547     if (U_FAILURE(*status)){
 548         return 0;
 549     }
 550
 551     if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
 552         *pBufferSize = bufferSizeNeeded;
 553         return 0;
 554     }
 555
 556     localClone = (struct cloneHZStruct *)stackBuffer;
 557     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
 558
 559     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
 560     localClone->cnv.extraInfo = &localClone->mydata;
 561     localClone->cnv.isExtraLocal = TRUE;
 562
 563     /* deep-clone the sub-converter */
 564     size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
 565     ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
 566         ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
 567
 568     return &localClone->cnv;
 569 }
 570
 571 static void
 572 _HZ_GetUnicodeSet(const UConverter *cnv,
 573                   const USetAdder *sa,
 574                   UConverterUnicodeSet which,
 575                   UErrorCode *pErrorCode) {
 576     /* HZ converts all of ASCII */
 577     sa->addRange(sa->set, 0, 0x7f);
 578
 579     /* add all of the code points that the sub-converter handles */
 580     ucnv_MBCSGetFilteredUnicodeSetForUnicode(
 581         ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,
 582         sa, which, UCNV_SET_FILTER_HZ,
 583         pErrorCode);
 584 }
 585
 586 static const UConverterImpl _HZImpl={
 587
 588     UCNV_HZ,
 589
 590     NULL,
 591     NULL,
 592
 593     _HZOpen,
 594     _HZClose,
 595     _HZReset,
 596
 597     UConverter_toUnicode_HZ_OFFSETS_LOGIC,
 598     UConverter_toUnicode_HZ_OFFSETS_LOGIC,
 599     UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
 600     UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
 601     NULL,
 602
 603     NULL,
 604     NULL,
 605     _HZ_WriteSub,
 606     _HZ_SafeClone,
 607     _HZ_GetUnicodeSet
 608 };
 609
 610 static const UConverterStaticData _HZStaticData={
 611     sizeof(UConverterStaticData),
 612         "HZ",
 613          0,
 614          UCNV_IBM,
 615          UCNV_HZ,
 616          1,
 617          4,
 618         { 0x1a, 0, 0, 0 },
 619         1,
 620         FALSE,
 621         FALSE,
 622         0,
 623         0,
 624         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
 625
 626 };
 627
 628
 629 const UConverterSharedData _HZData={
 630     sizeof(UConverterSharedData),
 631         ~((uint32_t) 0),
 632         NULL,
 633         NULL,
 634         &_HZStaticData,
 635         FALSE,
 636         &_HZImpl,
 637         0
 638 };
 639
 640 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */