icuSources/common/ucnvhz.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 **********************************************************************
   5 *   Copyright (C) 2000-2015, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 **********************************************************************
   8 *   file name:  ucnvhz.c
   9 *   encoding:   UTF-8
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2000oct16
  14 *   created by: Ram Viswanadha
  15 *   10/31/2000  Ram     Implemented offsets logic function
  16 *
  17 */
  18
  19 #include "unicode/utypes.h"
  20
  21 #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
  22
  23 #include "cmemory.h"
  24 #include "unicode/ucnv.h"
  25 #include "unicode/ucnv_cb.h"
  26 #include "unicode/uset.h"
  27 #include "unicode/utf16.h"
  28 #include "ucnv_bld.h"
  29 #include "ucnv_cnv.h"
  30 #include "ucnv_imp.h"
  31
  32 #define UCNV_TILDE 0x7E          /* ~ */
  33 #define UCNV_OPEN_BRACE 0x7B     /* { */
  34 #define UCNV_CLOSE_BRACE 0x7D   /* } */
  35 #define SB_ESCAPE    "\x7E\x7D"
  36 #define DB_ESCAPE    "\x7E\x7B"
  37 #define TILDE_ESCAPE "\x7E\x7E"
  38 #define ESC_LEN       2
  39
  40
  41 #define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){                             \
  42     while(len-->0){                                                                                                         \
  43         if(targetIndex < targetLength){                                                                                     \
  44             args->target[targetIndex] = (unsigned char) *strToAppend;                                                       \
  45             if(args->offsets!=NULL){                                                                                        \
  46                 *(offsets++) = sourceIndex-1;                                                                               \
  47             }                                                                                                               \
  48             targetIndex++;                                                                                                  \
  49         }                                                                                                                   \
  50         else{                                                                                                               \
  51             args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
  52             *err =U_BUFFER_OVERFLOW_ERROR;                                                                                  \
  53         }                                                                                                                   \
  54         strToAppend++;                                                                                                      \
  55     }                                                                                                                       \
  56 }
  57
  58
  59 typedef struct{
  60     UConverter* gbConverter;
  61     int32_t targetIndex;
  62     int32_t sourceIndex;
  63     UBool isEscapeAppended;
  64     UBool isStateDBCS;
  65     UBool isTargetUCharDBCS;
  66     UBool isEmptySegment;
  67 }UConverterDataHZ;
  68
  69
  70 U_CDECL_BEGIN
  71 static void  U_CALLCONV
  72 _HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){
  73     UConverter *gbConverter;
  74     if(pArgs->onlyTestIsLoadable) {
  75         ucnv_canCreateConverter("GBK", errorCode);  /* errorCode carries result */
  76         return;
  77     }
  78     gbConverter = ucnv_open("GBK", errorCode);
  79     if(U_FAILURE(*errorCode)) {
  80         return;
  81     }
  82     cnv->toUnicodeStatus = 0;
  83     cnv->fromUnicodeStatus= 0;
  84     cnv->mode=0;
  85     cnv->fromUChar32=0x0000;
  86     cnv->extraInfo = uprv_calloc(1, sizeof(UConverterDataHZ));
  87     if(cnv->extraInfo != NULL){
  88         ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter;
  89     }
  90     else {
  91         ucnv_close(gbConverter);
  92         *errorCode = U_MEMORY_ALLOCATION_ERROR;
  93         return;
  94     }
  95 }
  96
  97 static void  U_CALLCONV
  98 _HZClose(UConverter *cnv){
  99     if(cnv->extraInfo != NULL) {
 100         ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
 101         if(!cnv->isExtraLocal) {
 102             uprv_free(cnv->extraInfo);
 103         }
 104         cnv->extraInfo = NULL;
 105     }
 106 }
 107
 108 static void  U_CALLCONV
 109 _HZReset(UConverter *cnv, UConverterResetChoice choice){
 110     if(choice<=UCNV_RESET_TO_UNICODE) {
 111         cnv->toUnicodeStatus = 0;
 112         cnv->mode=0;
 113         if(cnv->extraInfo != NULL){
 114             ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
 115             ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
 116         }
 117     }
 118     if(choice!=UCNV_RESET_TO_UNICODE) {
 119         cnv->fromUnicodeStatus= 0;
 120         cnv->fromUChar32=0x0000;
 121         if(cnv->extraInfo != NULL){
 122             ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
 123             ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
 124             ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
 125             ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
 126         }
 127     }
 128 }
 129
 130 /**************************************HZ Encoding*************************************************
 131 * Rules for HZ encoding
 132 *
 133 *   In ASCII mode, a byte is interpreted as an ASCII character, unless a
 134 *   '~' is encountered. The character '~' is an escape character. By
 135 *   convention, it must be immediately followed ONLY by '~', '{' or '\n'
 136 *   (<LF>), with the following special meaning.
 137
 138 *   1. The escape sequence '~~' is interpreted as a '~'.
 139 *   2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
 140 *   3. The escape sequence '~\n' is a line-continuation marker to be
 141 *     consumed with no output produced.
 142 *   In GB mode, characters are interpreted two bytes at a time as (pure)
 143 *   GB codes until the escape-from-GB code '~}' is read. This code
 144 *   switches the mode from GB back to ASCII.  (Note that the escape-
 145 *   from-GB code '~}' ($7E7D) is outside the defined GB range.)
 146 *
 147 *   Source: RFC 1842
 148 *
 149 *   Note that the formal syntax in RFC 1842 is invalid. I assume that the
 150 *   intended definition of single-byte-segment is as follows (pedberg):
 151 *   single-byte-segment = single-byte-seq 1*single-byte-char
 152 */
 153
 154
 155 static void  U_CALLCONV
 156 UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
 157                                                             UErrorCode* err){
 158     char tempBuf[2];
 159     const char *mySource = ( char *) args->source;
 160     UChar *myTarget = args->target;
 161     const char *mySourceLimit = args->sourceLimit;
 162     UChar32 targetUniChar = 0x0000;
 163     int32_t mySourceChar = 0x0000;
 164     UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
 165     tempBuf[0]=0;
 166     tempBuf[1]=0;
 167
 168     /* Calling code already handles this situation. */
 169     /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
 170         *err = U_ILLEGAL_ARGUMENT_ERROR;
 171         return;
 172     }*/
 173
 174     while(mySource< mySourceLimit){
 175
 176         if(myTarget < args->targetLimit){
 177
 178             mySourceChar= (unsigned char) *mySource++;
 179
 180             if(args->converter->mode == UCNV_TILDE) {
 181                 /* second byte after ~ */
 182                 args->converter->mode=0;
 183                 switch(mySourceChar) {
 184                 case 0x0A:
 185                     /* no output for ~\n (line-continuation marker) */
 186                     continue;
 187                 case UCNV_TILDE:
 188                     if(args->offsets) {
 189                         args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
 190                     }
 191                     *(myTarget++)=(UChar)mySourceChar;
 192                     myData->isEmptySegment = FALSE;
 193                     continue;
 194                 case UCNV_OPEN_BRACE:
 195                 case UCNV_CLOSE_BRACE:
 196                     myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
 197                     if (myData->isEmptySegment) {
 198                         myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
 199                         *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 200                         args->converter->toUCallbackReason = UCNV_IRREGULAR;
 201                         args->converter->toUBytes[0] = UCNV_TILDE;
 202                         args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
 203                         args->converter->toULength = 2;
 204                         args->target = myTarget;
 205                         args->source = mySource;
 206                         return;
 207                     }
 208                     myData->isEmptySegment = TRUE;
 209                     continue;
 210                 default:
 211                      /* if the first byte is equal to TILDE and the trail byte
 212                      * is not a valid byte then it is an error condition
 213                      */
 214                     /*
 215                      * Ticket 5691: consistent illegal sequences:
 216                      * - We include at least the first byte in the illegal sequence.
 217                      * - If any of the non-initial bytes could be the start of a character,
 218                      *   we stop the illegal sequence before the first one of those.
 219                      */
 220                     myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
 221                     *err = U_ILLEGAL_ESCAPE_SEQUENCE;
 222                     args->converter->toUBytes[0] = UCNV_TILDE;
 223                     if( myData->isStateDBCS ?
 224                             (0x21 <= mySourceChar && mySourceChar <= 0x7e) :
 225                             mySourceChar <= 0x7f
 226                     ) {
 227                         /* The current byte could be the start of a character: Back it out. */
 228                         args->converter->toULength = 1;
 229                         --mySource;
 230                     } else {
 231                         /* Include the current byte in the illegal sequence. */
 232                         args->converter->toUBytes[1] = static_cast<uint8_t>(mySourceChar);
 233                         args->converter->toULength = 2;
 234                     }
 235                     args->target = myTarget;
 236                     args->source = mySource;
 237                     return;
 238                 }
 239             } else if(myData->isStateDBCS) {
 240                 if(args->converter->toUnicodeStatus == 0x00){
 241                     /* lead byte */
 242                     if(mySourceChar == UCNV_TILDE) {
 243                         args->converter->mode = UCNV_TILDE;
 244                     } else {
 245                         /* add another bit to distinguish a 0 byte from not having seen a lead byte */
 246                         args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
 247                         myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
 248                     }
 249                     continue;
 250                 }
 251                 else{
 252                     /* trail byte */
 253                     int leadIsOk, trailIsOk;
 254                     uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
 255                     targetUniChar = 0xffff;
 256                     /*
 257                      * Ticket 5691: consistent illegal sequences:
 258                      * - We include at least the first byte in the illegal sequence.
 259                      * - If any of the non-initial bytes could be the start of a character,
 260                      *   we stop the illegal sequence before the first one of those.
 261                      *
 262                      * In HZ DBCS, if the second byte is in the 21..7e range,
 263                      * we report only the first byte as the illegal sequence.
 264                      * Otherwise we convert or report the pair of bytes.
 265                      */
 266                     leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
 267                     trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
 268                     if (leadIsOk && trailIsOk) {
 269                         tempBuf[0] = (char) (leadByte+0x80) ;
 270                         tempBuf[1] = (char) (mySourceChar+0x80);
 271                         targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
 272                             tempBuf, 2, args->converter->useFallback);
 273                         mySourceChar= (leadByte << 8) | mySourceChar;
 274                     } else if (trailIsOk) {
 275                         /* report a single illegal byte and continue with the following DBCS starter byte */
 276                         --mySource;
 277                         mySourceChar = (int32_t)leadByte;
 278                     } else {
 279                         /* report a pair of illegal bytes if the second byte is not a DBCS starter */
 280                         /* add another bit so that the code below writes 2 bytes in case of error */
 281                         mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
 282                     }
 283                     args->converter->toUnicodeStatus =0x00;
 284                 }
 285             }
 286             else{
 287                 if(mySourceChar == UCNV_TILDE) {
 288                     args->converter->mode = UCNV_TILDE;
 289                     continue;
 290                 } else if(mySourceChar <= 0x7f) {
 291                     targetUniChar = (UChar)mySourceChar;  /* ASCII */
 292                     myData->isEmptySegment = FALSE; /* the segment has something valid */
 293                 } else {
 294                     targetUniChar = 0xffff;
 295                     myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
 296                 }
 297             }
 298             if(targetUniChar < 0xfffe){
 299                 if(args->offsets) {
 300                     args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
 301                 }
 302
 303                 *(myTarget++)=(UChar)targetUniChar;
 304             }
 305             else /* targetUniChar>=0xfffe */ {
 306                 if(targetUniChar == 0xfffe){
 307                     *err = U_INVALID_CHAR_FOUND;
 308                 }
 309                 else{
 310                     *err = U_ILLEGAL_CHAR_FOUND;
 311                 }
 312                 if(mySourceChar > 0xff){
 313                     args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
 314                     args->converter->toUBytes[1] = (uint8_t)mySourceChar;
 315                     args->converter->toULength=2;
 316                 }
 317                 else{
 318                     args->converter->toUBytes[0] = (uint8_t)mySourceChar;
 319                     args->converter->toULength=1;
 320                 }
 321                 break;
 322             }
 323         }
 324         else{
 325             *err =U_BUFFER_OVERFLOW_ERROR;
 326             break;
 327         }
 328     }
 329
 330     args->target = myTarget;
 331     args->source = mySource;
 332 }
 333
 334
 335 static void  U_CALLCONV
 336 UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
 337                                                       UErrorCode * err){
 338     const UChar *mySource = args->source;
 339     char *myTarget = args->target;
 340     int32_t* offsets = args->offsets;
 341     int32_t mySourceIndex = 0;
 342     int32_t myTargetIndex = 0;
 343     int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
 344     int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
 345     uint32_t targetUniChar = 0x0000;
 346     UChar32 mySourceChar = 0x0000;
 347     UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
 348     UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
 349     UBool oldIsTargetUCharDBCS;
 350     int len =0;
 351     const char* escSeq=NULL;
 352
 353     /* Calling code already handles this situation. */
 354     /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
 355         *err = U_ILLEGAL_ARGUMENT_ERROR;
 356         return;
 357     }*/
 358     if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
 359         goto getTrail;
 360     }
 361     /*writing the char to the output stream */
 362     while (mySourceIndex < mySourceLength){
 363         targetUniChar = missingCharMarker;
 364         if (myTargetIndex < targetLength){
 365
 366             mySourceChar = (UChar) mySource[mySourceIndex++];
 367
 368
 369             oldIsTargetUCharDBCS = isTargetUCharDBCS;
 370             if(mySourceChar ==UCNV_TILDE){
 371                 /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
 372                 len = ESC_LEN;
 373                 escSeq = TILDE_ESCAPE;
 374                 CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
 375                 continue;
 376             } else if(mySourceChar <= 0x7f) {
 377                 targetUniChar = mySourceChar;
 378             } else {
 379                 int32_t length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
 380                     mySourceChar,&targetUniChar,args->converter->useFallback);
 381                 /* we can only use lead bytes 21..7D and trail bytes 21..7E */
 382                 if( length == 2 &&
 383                     (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) &&
 384                     (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1)
 385                 ) {
 386                     targetUniChar -= 0x8080;
 387                 } else {
 388                     targetUniChar = missingCharMarker;
 389                 }
 390             }
 391             if (targetUniChar != missingCharMarker){
 392                myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
 393                  if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
 394                     /*Shifting from a double byte to single byte mode*/
 395                     if(!isTargetUCharDBCS){
 396                         len =ESC_LEN;
 397                         escSeq = SB_ESCAPE;
 398                         CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
 399                         myConverterData->isEscapeAppended = TRUE;
 400                     }
 401                     else{ /* Shifting from a single byte to double byte mode*/
 402                         len =ESC_LEN;
 403                         escSeq = DB_ESCAPE;
 404                         CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
 405                         myConverterData->isEscapeAppended = TRUE;
 406
 407                     }
 408                 }
 409
 410                 if(isTargetUCharDBCS){
 411                     if( myTargetIndex <targetLength){
 412                         myTarget[myTargetIndex++] =(char) (targetUniChar >> 8);
 413                         if(offsets){
 414                             *(offsets++) = mySourceIndex-1;
 415                         }
 416                         if(myTargetIndex < targetLength){
 417                             myTarget[myTargetIndex++] =(char) targetUniChar;
 418                             if(offsets){
 419                                 *(offsets++) = mySourceIndex-1;
 420                             }
 421                         }else{
 422                             args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
 423                             *err = U_BUFFER_OVERFLOW_ERROR;
 424                         }
 425                     }else{
 426                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8);
 427                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
 428                         *err = U_BUFFER_OVERFLOW_ERROR;
 429                     }
 430
 431                 }else{
 432                     if( myTargetIndex <targetLength){
 433                         myTarget[myTargetIndex++] = (char) (targetUniChar );
 434                         if(offsets){
 435                             *(offsets++) = mySourceIndex-1;
 436                         }
 437
 438                     }else{
 439                         args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
 440                         *err = U_BUFFER_OVERFLOW_ERROR;
 441                     }
 442                 }
 443
 444             }
 445             else{
 446                 /* oops.. the code point is unassigned */
 447                 /*Handle surrogates */
 448                 /*check if the char is a First surrogate*/
 449                 if(U16_IS_SURROGATE(mySourceChar)) {
 450                     if(U16_IS_SURROGATE_LEAD(mySourceChar)) {
 451                         args->converter->fromUChar32=mySourceChar;
 452 getTrail:
 453                         /*look ahead to find the trail surrogate*/
 454                         if(mySourceIndex <  mySourceLength) {
 455                             /* test the following code unit */
 456                             UChar trail=(UChar) args->source[mySourceIndex];
 457                             if(U16_IS_TRAIL(trail)) {
 458                                 ++mySourceIndex;
 459                                 mySourceChar=U16_GET_SUPPLEMENTARY(args->converter->fromUChar32, trail);
 460                                 args->converter->fromUChar32=0x00;
 461                                 /* there are no surrogates in GB2312*/
 462                                 *err = U_INVALID_CHAR_FOUND;
 463                                 /* exit this condition tree */
 464                             } else {
 465                                 /* this is an unmatched lead code unit (1st surrogate) */
 466                                 /* callback(illegal) */
 467                                 *err=U_ILLEGAL_CHAR_FOUND;
 468                             }
 469                         } else {
 470                             /* no more input */
 471                             *err = U_ZERO_ERROR;
 472                         }
 473                     } else {
 474                         /* this is an unmatched trail code unit (2nd surrogate) */
 475                         /* callback(illegal) */
 476                         *err=U_ILLEGAL_CHAR_FOUND;
 477                     }
 478                 } else {
 479                     /* callback(unassigned) for a BMP code point */
 480                     *err = U_INVALID_CHAR_FOUND;
 481                 }
 482
 483                 args->converter->fromUChar32=mySourceChar;
 484                 break;
 485             }
 486         }
 487         else{
 488             *err = U_BUFFER_OVERFLOW_ERROR;
 489             break;
 490         }
 491         targetUniChar=missingCharMarker;
 492     }
 493
 494     args->target += myTargetIndex;
 495     args->source += mySourceIndex;
 496     myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
 497 }
 498
 499 static void U_CALLCONV
 500 _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
 501     UConverter *cnv = args->converter;
 502     UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
 503     char *p;
 504     char buffer[4];
 505     p = buffer;
 506
 507     if( convData->isTargetUCharDBCS){
 508         *p++= UCNV_TILDE;
 509         *p++= UCNV_CLOSE_BRACE;
 510         convData->isTargetUCharDBCS=FALSE;
 511     }
 512     *p++= (char)cnv->subChars[0];
 513
 514     ucnv_cbFromUWriteBytes(args,
 515                            buffer, (int32_t)(p - buffer),
 516                            offsetIndex, err);
 517 }
 518
 519 /*
 520  * Structure for cloning an HZ converter into a single memory block.
 521  * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct,
 522  * and then ucnv_safeClone() of the sub-converter may additionally align
 523  * subCnv inside the cloneHZStruct, for which we need the deadSpace after
 524  * subCnv. This is because UAlignedMemory may be larger than the actually
 525  * necessary alignment size for the platform.
 526  * The other cloneHZStruct fields will not be moved around,
 527  * and are aligned properly with cloneHZStruct's alignment.
 528  */
 529 struct cloneHZStruct
 530 {
 531     UConverter cnv;
 532     UConverter subCnv;
 533     UAlignedMemory deadSpace;
 534     UConverterDataHZ mydata;
 535 };
 536
 537
 538 static UConverter *  U_CALLCONV
 539 _HZ_SafeClone(const UConverter *cnv,
 540               void *stackBuffer,
 541               int32_t *pBufferSize,
 542               UErrorCode *status)
 543 {
 544     struct cloneHZStruct * localClone;
 545     int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
 546
 547     if (U_FAILURE(*status)){
 548         return 0;
 549     }
 550
 551     if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
 552         *pBufferSize = bufferSizeNeeded;
 553         return 0;
 554     }
 555
 556     localClone = (struct cloneHZStruct *)stackBuffer;
 557     /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
 558
 559     uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
 560     localClone->cnv.extraInfo = &localClone->mydata;
 561     localClone->cnv.isExtraLocal = TRUE;
 562
 563     /* deep-clone the sub-converter */
 564     size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
 565     ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
 566         ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
 567
 568     return &localClone->cnv;
 569 }
 570
 571 static void U_CALLCONV
 572 _HZ_GetUnicodeSet(const UConverter *cnv,
 573                   const USetAdder *sa,
 574                   UConverterUnicodeSet which,
 575                   UErrorCode *pErrorCode) {
 576     /* HZ converts all of ASCII */
 577     sa->addRange(sa->set, 0, 0x7f);
 578
 579     /* add all of the code points that the sub-converter handles */
 580     ucnv_MBCSGetFilteredUnicodeSetForUnicode(
 581         ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData,
 582         sa, which, UCNV_SET_FILTER_HZ,
 583         pErrorCode);
 584 }
 585 U_CDECL_END
 586 static const UConverterImpl _HZImpl={
 587
 588     UCNV_HZ,
 589
 590     NULL,
 591     NULL,
 592
 593     _HZOpen,
 594     _HZClose,
 595     _HZReset,
 596
 597     UConverter_toUnicode_HZ_OFFSETS_LOGIC,
 598     UConverter_toUnicode_HZ_OFFSETS_LOGIC,
 599     UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
 600     UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
 601     NULL,
 602
 603     NULL,
 604     NULL,
 605     _HZ_WriteSub,
 606     _HZ_SafeClone,
 607     _HZ_GetUnicodeSet,
 608     NULL,
 609     NULL
 610 };
 611
 612 static const UConverterStaticData _HZStaticData={
 613     sizeof(UConverterStaticData),
 614         "HZ",
 615          0,
 616          UCNV_IBM,
 617          UCNV_HZ,
 618          1,
 619          4,
 620         { 0x1a, 0, 0, 0 },
 621         1,
 622         FALSE,
 623         FALSE,
 624         0,
 625         0,
 626         { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
 627
 628 };
 629
 630 const UConverterSharedData _HZData=
 631         UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_HZStaticData, &_HZImpl);
 632
 633 #endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION */