[apple/icu.git] / icuSources / common / ucnvhz.c

/*  
**********************************************************************
*   Copyright (C) 2000-2006, 2008 International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   file name:  ucnvhz.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2000oct16
*   created by: Ram Viswanadha
*   10/31/2000  Ram     Implemented offsets logic function
*   
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION

#include "cmemory.h"
#include "unicode/ucnv.h"
#include "unicode/ucnv_cb.h"
#include "unicode/uset.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"

#define UCNV_TILDE 0x7E          /* ~ */
#define UCNV_OPEN_BRACE 0x7B     /* { */
#define UCNV_CLOSE_BRACE 0x7D   /* } */
#define SB_ESCAPE    "\x7E\x7D"
#define DB_ESCAPE    "\x7E\x7B"
#define TILDE_ESCAPE "\x7E\x7E"
#define ESC_LEN       2


#define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){                             \
    while(len-->0){                                                                                                         \
        if(targetIndex < targetLength){                                                                                     \
            args->target[targetIndex] = (unsigned char) *strToAppend;                                                       \
            if(args->offsets!=NULL){                                                                                        \
                *(offsets++) = sourceIndex-1;                                                                               \
            }                                                                                                               \
            targetIndex++;                                                                                                  \
        }                                                                                                                   \
        else{                                                                                                               \
            args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
            *err =U_BUFFER_OVERFLOW_ERROR;                                                                                  \
        }                                                                                                                   \
        strToAppend++;                                                                                                      \
    }                                                                                                                       \
}


typedef struct{
    UConverter* gbConverter;
    int32_t targetIndex;
    int32_t sourceIndex;
    UBool isEscapeAppended;
    UBool isStateDBCS;
    UBool isTargetUCharDBCS;
    UBool isEmptySegment;
}UConverterDataHZ;


static void 
_HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, UErrorCode *errorCode){
    cnv->toUnicodeStatus = 0;
    cnv->fromUnicodeStatus= 0;
    cnv->mode=0;
    cnv->fromUChar32=0x0000;
    cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ));
    if(cnv->extraInfo != NULL){
        uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ));
        ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode);
    }
    else {
        *errorCode = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
}

static void 
_HZClose(UConverter *cnv){
    if(cnv->extraInfo != NULL) {
        ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
        if(!cnv->isExtraLocal) {
            uprv_free(cnv->extraInfo);
        }
        cnv->extraInfo = NULL;
    }
}

static void 
_HZReset(UConverter *cnv, UConverterResetChoice choice){
    if(choice<=UCNV_RESET_TO_UNICODE) {
        cnv->toUnicodeStatus = 0;
        cnv->mode=0;
        if(cnv->extraInfo != NULL){
            ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
            ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
        }
    }
    if(choice!=UCNV_RESET_TO_UNICODE) {
        cnv->fromUnicodeStatus= 0;
        cnv->fromUChar32=0x0000; 
        if(cnv->extraInfo != NULL){
            ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
            ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
            ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
            ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
        }
    }
}

/**************************************HZ Encoding*************************************************
* Rules for HZ encoding
* 
*   In ASCII mode, a byte is interpreted as an ASCII character, unless a
*   '~' is encountered. The character '~' is an escape character. By
*   convention, it must be immediately followed ONLY by '~', '{' or '\n'
*   (<LF>), with the following special meaning.

*   1. The escape sequence '~~' is interpreted as a '~'.
*   2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
*   3. The escape sequence '~\n' is a line-continuation marker to be
*     consumed with no output produced.
*   In GB mode, characters are interpreted two bytes at a time as (pure)
*   GB codes until the escape-from-GB code '~}' is read. This code
*   switches the mode from GB back to ASCII.  (Note that the escape-
*   from-GB code '~}' ($7E7D) is outside the defined GB range.)
*
*   Source: RFC 1842
*/


static void 
UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
                                                            UErrorCode* err){
    char tempBuf[2];
    const char *mySource = ( char *) args->source;
    UChar *myTarget = args->target;
    const char *mySourceLimit = args->sourceLimit;
    UChar32 targetUniChar = 0x0000;
    UChar mySourceChar = 0x0000;
    UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
    tempBuf[0]=0; 
    tempBuf[1]=0;
    if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){
        *err = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    
    while(mySource< mySourceLimit){
        
        if(myTarget < args->targetLimit){
            
            mySourceChar= (unsigned char) *mySource++;

            switch(mySourceChar){
                case 0x0A:
                    if(args->converter->mode ==UCNV_TILDE){
                        args->converter->mode=0;
                        
                    }
                    *(myTarget++)=(UChar)mySourceChar;
                    myData->isEmptySegment = FALSE;
                    continue;
            
                case UCNV_TILDE:
                    if(args->converter->mode ==UCNV_TILDE){
                        *(myTarget++)=(UChar)mySourceChar;
                        args->converter->mode=0;
                        myData->isEmptySegment = FALSE;
                        continue;
                        
                    }
                    else if(args->converter->toUnicodeStatus !=0){
                        args->converter->mode=0;
                        break;
                    }
                    else{
                        args->converter->mode = UCNV_TILDE;
                        continue;
                    }
                
                
                case UCNV_OPEN_BRACE:
                    if(args->converter->mode == UCNV_TILDE){
                        args->converter->mode=0;
                        myData->isStateDBCS = TRUE;
                        myData->isEmptySegment = TRUE;
                        continue;
                    }
                    else{
                        break;
                    }
               
                
                case UCNV_CLOSE_BRACE:
                    if(args->converter->mode == UCNV_TILDE){
                        args->converter->mode=0;
                         myData->isStateDBCS = FALSE;
                         if (myData->isEmptySegment) {
                            myData->isEmptySegment = FALSE;	/* we are handling it, reset to avoid future spurious errors */
                            *err = U_PARSE_ERROR;	/* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
                            args->converter->toUBytes[0] = UCNV_TILDE;
                            args->converter->toUBytes[1] = mySourceChar;
                            args->converter->toULength = 2;
                         	goto EXIT;
                         }
                         myData->isEmptySegment = TRUE;
                        continue;
                    }
                    else{
                        break;
                    }
                
                default:
                     /* if the first byte is equal to TILDE and the trail byte
                     * is not a valid byte then it is an error condition
                     */
                    if(args->converter->mode == UCNV_TILDE){
                        args->converter->mode=0;
                        mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
                        myData->isEmptySegment = FALSE;	/* different error here, reset this to avoid spurious future error */
                        goto SAVE_STATE;
                    }
                    
                    break;

            }
             
            myData->isEmptySegment = FALSE;	/* the segment has something, either valid or will produce a different error, so reset this */
            if(myData->isStateDBCS){
                if(args->converter->toUnicodeStatus == 0x00){
                    args->converter->toUnicodeStatus = (UChar) mySourceChar;
                    continue;
                }
                else{
                    tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ;
                    tempBuf[1] = (char) (mySourceChar+0x80);
                    mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
                    args->converter->toUnicodeStatus =0x00;
                    targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
                        tempBuf, 2, args->converter->useFallback);
                }
            }
            else{
                if(args->converter->fromUnicodeStatus == 0x00){
                    targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
                        mySource - 1, 1, args->converter->useFallback);
                }
                else{
                    goto SAVE_STATE;
                }

            }
            if(targetUniChar < 0xfffe){
                if(args->offsets) {
                    args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
                }

                *(myTarget++)=(UChar)targetUniChar;
            }
            else if(targetUniChar>=0xfffe){
SAVE_STATE:
                if(targetUniChar == 0xfffe){
                    *err = U_INVALID_CHAR_FOUND;
                }
                else{
                    *err = U_ILLEGAL_CHAR_FOUND;
                }
                if(myData->isStateDBCS){
                    /* this should never occur since isStateDBCS is set to true 
                     * only after tempBuf[0] and tempBuf[1]
                     * are set to the input ..  just to please BEAM 
                     */
                    if(tempBuf[0]==0 || tempBuf[1]==0){
                        *err = U_INTERNAL_PROGRAM_ERROR;
                    }else{
                        args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80);
                        args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80);
                        args->converter->toULength=2;
                    }
                }
                else{
                    args->converter->toUBytes[0] = (uint8_t)mySourceChar;
                    args->converter->toULength=1;
                }
                break;
            }
        }
        else{
            *err =U_BUFFER_OVERFLOW_ERROR;
            break;
        }
    }
EXIT:
    args->target = myTarget;
    args->source = mySource;
}


static void 
UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
                                                      UErrorCode * err){
    const UChar *mySource = args->source;
    char *myTarget = args->target;
    int32_t* offsets = args->offsets;
    int32_t mySourceIndex = 0;
    int32_t myTargetIndex = 0;
    int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
    int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
    int32_t length=0;
    uint32_t targetUniChar = 0x0000;
    UChar32 mySourceChar = 0x0000;
    UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo;
    UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
    UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
    int len =0;
    const char* escSeq=NULL;
    
    if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){
        *err = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
        goto getTrail;
    }
    /*writing the char to the output stream */
    while (mySourceIndex < mySourceLength){
        targetUniChar = missingCharMarker;
        if (myTargetIndex < targetLength){
            
            mySourceChar = (UChar) mySource[mySourceIndex++];
            

            oldIsTargetUCharDBCS = isTargetUCharDBCS;
            if(mySourceChar ==UCNV_TILDE){
                /*concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);*/
                len = ESC_LEN;
                escSeq = TILDE_ESCAPE;
                CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
                continue;
            }
            else{
                length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
                    mySourceChar,&targetUniChar,args->converter->useFallback);

            }
            /* only DBCS or SBCS characters are expected*/
            /* DB haracters with high bit set to 1 are expected */
            if(length > 2 || length==0 ||(((targetUniChar & 0x8080) != 0x8080)&& length==2)){
                targetUniChar= missingCharMarker;
            }
            if (targetUniChar != missingCharMarker){
               myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);     
                 if(oldIsTargetUCharDBCS != isTargetUCharDBCS || !myConverterData->isEscapeAppended ){
                    /*Shifting from a double byte to single byte mode*/
                    if(!isTargetUCharDBCS){
                        len =ESC_LEN;
                        escSeq = SB_ESCAPE;
                        CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
                        myConverterData->isEscapeAppended = TRUE;
                    }
                    else{ /* Shifting from a single byte to double byte mode*/
                        len =ESC_LEN;
                        escSeq = DB_ESCAPE;
                        CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
                        myConverterData->isEscapeAppended = TRUE;
                        
                    }
                }
            
                if(isTargetUCharDBCS){
                    if( myTargetIndex <targetLength){
                        myTarget[myTargetIndex++] =(char) ((targetUniChar >> 8) -0x80);
                        if(offsets){
                            *(offsets++) = mySourceIndex-1;
                        }
                        if(myTargetIndex < targetLength){
                            myTarget[myTargetIndex++] =(char) ((targetUniChar & 0x00FF) -0x80);
                            if(offsets){
                                *(offsets++) = mySourceIndex-1;
                            }
                        }else{
                            args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
                            *err = U_BUFFER_OVERFLOW_ERROR;
                        } 
                    }else{
                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) ((targetUniChar >> 8) -0x80);
                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
                        *err = U_BUFFER_OVERFLOW_ERROR;
                    }

                }else{
                    if( myTargetIndex <targetLength){
                        myTarget[myTargetIndex++] = (char) (targetUniChar );
                        if(offsets){
                            *(offsets++) = mySourceIndex-1;
                        }
                        
                    }else{
                        args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
                        *err = U_BUFFER_OVERFLOW_ERROR;
                    }
                }

            }
            else{
                /* oops.. the code point is unassigned */
                /*Handle surrogates */
                /*check if the char is a First surrogate*/
                if(UTF_IS_SURROGATE(mySourceChar)) {
                    if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
                        args->converter->fromUChar32=mySourceChar;
getTrail:
                        /*look ahead to find the trail surrogate*/
                        if(mySourceIndex <  mySourceLength) {
                            /* test the following code unit */
                            UChar trail=(UChar) args->source[mySourceIndex];
                            if(UTF_IS_SECOND_SURROGATE(trail)) {
                                ++mySourceIndex;
                                mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail);
                                args->converter->fromUChar32=0x00;
                                /* there are no surrogates in GB2312*/
                                *err = U_INVALID_CHAR_FOUND;
                                /* exit this condition tree */
                            } else {
                                /* this is an unmatched lead code unit (1st surrogate) */
                                /* callback(illegal) */
                                *err=U_ILLEGAL_CHAR_FOUND;
                            }
                        } else {
                            /* no more input */
                            *err = U_ZERO_ERROR;
                        }
                    } else {
                        /* this is an unmatched trail code unit (2nd surrogate) */
                        /* callback(illegal) */
                        *err=U_ILLEGAL_CHAR_FOUND;
                    }
                } else {
                    /* callback(unassigned) for a BMP code point */
                    *err = U_INVALID_CHAR_FOUND;
                }

                args->converter->fromUChar32=mySourceChar;
                break;
            }
        }
        else{
            *err = U_BUFFER_OVERFLOW_ERROR;
            break;
        }
        targetUniChar=missingCharMarker;
    }

    args->target += myTargetIndex;
    args->source += mySourceIndex;
    myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
}

static void
_HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) {
    UConverter *cnv = args->converter;
    UConverterDataHZ *convData=(UConverterDataHZ *) cnv->extraInfo;
    char *p;
    char buffer[4];
    p = buffer;
    
    if( convData->isTargetUCharDBCS){
        *p++= UCNV_TILDE;
        *p++= UCNV_CLOSE_BRACE;
        convData->isTargetUCharDBCS=FALSE;
    }
    *p++= (char)cnv->subChars[0];

    ucnv_cbFromUWriteBytes(args,
                           buffer, (int32_t)(p - buffer),
                           offsetIndex, err);
}

/*
 * Structure for cloning an HZ converter into a single memory block.
 * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct,
 * and then ucnv_safeClone() of the sub-converter may additionally align
 * subCnv inside the cloneHZStruct, for which we need the deadSpace after
 * subCnv. This is because UAlignedMemory may be larger than the actually
 * necessary alignment size for the platform.
 * The other cloneHZStruct fields will not be moved around,
 * and are aligned properly with cloneHZStruct's alignment.
 */
struct cloneHZStruct
{
    UConverter cnv;
    UConverter subCnv;
    UAlignedMemory deadSpace;
    UConverterDataHZ mydata;
};


static UConverter * 
_HZ_SafeClone(const UConverter *cnv, 
              void *stackBuffer, 
              int32_t *pBufferSize, 
              UErrorCode *status)
{
    struct cloneHZStruct * localClone;
    int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);

    if (U_FAILURE(*status)){
        return 0;
    }

    if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
        *pBufferSize = bufferSizeNeeded;
        return 0;
    }

    localClone = (struct cloneHZStruct *)stackBuffer;
    /* ucnv.c/ucnv_safeClone() copied the main UConverter already */

    uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
    localClone->cnv.extraInfo = &localClone->mydata;
    localClone->cnv.isExtraLocal = TRUE;

    /* deep-clone the sub-converter */
    size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
    ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
        ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);

    return &localClone->cnv;
}

static void
_HZ_GetUnicodeSet(const UConverter *cnv,
                  const USetAdder *sa,
                  UConverterUnicodeSet which,
                  UErrorCode *pErrorCode) {
    /* the tilde '~' is hardcoded in the converter */
    sa->add(sa->set, 0x7e);

    /* add all of the code points that the sub-converter handles */
    ((UConverterDataHZ*)cnv->extraInfo)->
        gbConverter->sharedData->impl->
            getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
                          sa, which, pErrorCode);
}

static const UConverterImpl _HZImpl={

    UCNV_HZ,
    
    NULL,
    NULL,
    
    _HZOpen,
    _HZClose,
    _HZReset,
    
    UConverter_toUnicode_HZ_OFFSETS_LOGIC,
    UConverter_toUnicode_HZ_OFFSETS_LOGIC,
    UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
    UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
    NULL,
    
    NULL,
    NULL,
    _HZ_WriteSub,
    _HZ_SafeClone,
    _HZ_GetUnicodeSet
};

static const UConverterStaticData _HZStaticData={
    sizeof(UConverterStaticData),
        "HZ",
         0, 
         UCNV_IBM, 
         UCNV_HZ, 
         1, 
         4,
        { 0x1a, 0, 0, 0 },
        1,
        FALSE, 
        FALSE,
        0,
        0,
        { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */

};
            
            
const UConverterSharedData _HZData={
    sizeof(UConverterSharedData),
        ~((uint32_t) 0),
        NULL, 
        NULL, 
        &_HZStaticData, 
        FALSE, 
        &_HZImpl, 
        0
};

#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */
Commit	Line	Data
b75a7d8f A	1	/*
b75a7d8f A	2	**********************************************************************
d5d484b0	3	* Copyright (C) 2000-2006, 2008 International Business Machines
b75a7d8f A	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* file name: ucnvhz.c
	7	* encoding: US-ASCII
	8	* tab size: 8 (not used)
	9	* indentation:4
	10	*
	11	* created on: 2000oct16
	12	* created by: Ram Viswanadha
	13	* 10/31/2000 Ram Implemented offsets logic function
	14	*
	15	*/
	16
	17	#include "unicode/utypes.h"
	18
374ca955	19	#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f A	20
b75a7d8f A	21	#include "cmemory.h"
b75a7d8f A	22	#include "unicode/ucnv.h"
	23	#include "unicode/ucnv_cb.h"
	24	#include "unicode/uset.h"
	25	#include "ucnv_bld.h"
	26	#include "ucnv_cnv.h"
	27
	28	#define UCNV_TILDE 0x7E /* ~ */
	29	#define UCNV_OPEN_BRACE 0x7B /* { */
	30	#define UCNV_CLOSE_BRACE 0x7D /* } */
	31	#define SB_ESCAPE "\x7E\x7D"
	32	#define DB_ESCAPE "\x7E\x7B"
	33	#define TILDE_ESCAPE "\x7E\x7E"
	34	#define ESC_LEN 2
	35
	36
	37	#define CONCAT_ESCAPE_MACRO( args, targetIndex,targetLength,strToAppend, err, len,sourceIndex){ \
	38	while(len-->0){ \
	39	if(targetIndex < targetLength){ \
	40	args->target[targetIndex] = (unsigned char) *strToAppend; \
	41	if(args->offsets!=NULL){ \
	42	*(offsets++) = sourceIndex-1; \
	43	} \
	44	targetIndex++; \
	45	} \
	46	else{ \
	47	args->converter->charErrorBuffer[(int)args->converter->charErrorBufferLength++] = (unsigned char) *strToAppend; \
	48	*err =U_BUFFER_OVERFLOW_ERROR; \
	49	} \
	50	strToAppend++; \
	51	} \
	52	}
	53
	54
	55	typedef struct{
73c04bcf	56	UConverter* gbConverter;
b75a7d8f A	57	int32_t targetIndex;
	58	int32_t sourceIndex;
	59	UBool isEscapeAppended;
b75a7d8f A	60	UBool isStateDBCS;
b75a7d8f A	61	UBool isTargetUCharDBCS;
d5d484b0	62	UBool isEmptySegment;
b75a7d8f A	63	}UConverterDataHZ;
	64
	65
	66
	67	static void
	68	_HZOpen(UConverter cnv, const char name,const char locale,uint32_t options, UErrorCode errorCode){
	69	cnv->toUnicodeStatus = 0;
	70	cnv->fromUnicodeStatus= 0;
	71	cnv->mode=0;
374ca955	72	cnv->fromUChar32=0x0000;
73c04bcf	73	cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ));
b75a7d8f	74	if(cnv->extraInfo != NULL){
73c04bcf	75	uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ));
b75a7d8f	76	((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode);
b75a7d8f	77	}
b75a7d8f A	78	else {
	79	*errorCode = U_MEMORY_ALLOCATION_ERROR;
	80	return;
	81	}
	82	}
	83
	84	static void
	85	_HZClose(UConverter *cnv){
	86	if(cnv->extraInfo != NULL) {
	87	ucnv_close (((UConverterDataHZ *) (cnv->extraInfo))->gbConverter);
	88	if(!cnv->isExtraLocal) {
	89	uprv_free(cnv->extraInfo);
	90	}
	91	cnv->extraInfo = NULL;
	92	}
	93	}
	94
	95	static void
	96	_HZReset(UConverter *cnv, UConverterResetChoice choice){
	97	if(choice<=UCNV_RESET_TO_UNICODE) {
	98	cnv->toUnicodeStatus = 0;
	99	cnv->mode=0;
	100	if(cnv->extraInfo != NULL){
	101	((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE;
d5d484b0	102	((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE;
b75a7d8f A	103	}
	104	}
	105	if(choice!=UCNV_RESET_TO_UNICODE) {
	106	cnv->fromUnicodeStatus= 0;
374ca955	107	cnv->fromUChar32=0x0000;
b75a7d8f A	108	if(cnv->extraInfo != NULL){
	109	((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE;
	110	((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0;
	111	((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0;
	112	((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE;
	113	}
	114	}
	115	}
	116
	117	/************************************HZ Encoding***********************************************
	118	* Rules for HZ encoding
	119	*
	120	* In ASCII mode, a byte is interpreted as an ASCII character, unless a
	121	* '~' is encountered. The character '~' is an escape character. By
	122	* convention, it must be immediately followed ONLY by '~', '{' or '\n'
	123	* (<LF>), with the following special meaning.
	124
	125	* 1. The escape sequence '~~' is interpreted as a '~'.
	126	* 2. The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
	127	* 3. The escape sequence '~\n' is a line-continuation marker to be
	128	* consumed with no output produced.
	129	* In GB mode, characters are interpreted two bytes at a time as (pure)
	130	* GB codes until the escape-from-GB code '~}' is read. This code
	131	* switches the mode from GB back to ASCII. (Note that the escape-
	132	* from-GB code '~}' ($7E7D) is outside the defined GB range.)
	133	*
	134	* Source: RFC 1842
	135	*/
	136
	137
	138	static void
	139	UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
	140	UErrorCode* err){
374ca955	141	char tempBuf[2];
b75a7d8f A	142	const char mySource = ( char ) args->source;
b75a7d8f A	143	UChar *myTarget = args->target;
b75a7d8f A	144	const char *mySourceLimit = args->sourceLimit;
	145	UChar32 targetUniChar = 0x0000;
	146	UChar mySourceChar = 0x0000;
	147	UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
73c04bcf A	148	tempBuf[0]=0;
73c04bcf A	149	tempBuf[1]=0;
374ca955	150	if ((args->converter == NULL) \|\| (args->targetLimit < args->target) \|\| (mySourceLimit < args->source)){
b75a7d8f A	151	*err = U_ILLEGAL_ARGUMENT_ERROR;
	152	return;
	153	}
	154
374ca955	155	while(mySource< mySourceLimit){
b75a7d8f A	156
	157	if(myTarget < args->targetLimit){
	158
	159	mySourceChar= (unsigned char) *mySource++;
	160
	161	switch(mySourceChar){
	162	case 0x0A:
	163	if(args->converter->mode ==UCNV_TILDE){
	164	args->converter->mode=0;
	165
	166	}
	167	*(myTarget++)=(UChar)mySourceChar;
d5d484b0	168	myData->isEmptySegment = FALSE;
b75a7d8f A	169	continue;
	170
	171	case UCNV_TILDE:
	172	if(args->converter->mode ==UCNV_TILDE){
	173	*(myTarget++)=(UChar)mySourceChar;
	174	args->converter->mode=0;
d5d484b0	175	myData->isEmptySegment = FALSE;
b75a7d8f A	176	continue;
	177
	178	}
	179	else if(args->converter->toUnicodeStatus !=0){
	180	args->converter->mode=0;
	181	break;
	182	}
	183	else{
	184	args->converter->mode = UCNV_TILDE;
	185	continue;
	186	}
	187
	188
	189	case UCNV_OPEN_BRACE:
	190	if(args->converter->mode == UCNV_TILDE){
	191	args->converter->mode=0;
	192	myData->isStateDBCS = TRUE;
d5d484b0	193	myData->isEmptySegment = TRUE;
b75a7d8f A	194	continue;
	195	}
	196	else{
	197	break;
	198	}
	199
	200
	201	case UCNV_CLOSE_BRACE:
	202	if(args->converter->mode == UCNV_TILDE){
	203	args->converter->mode=0;
	204	myData->isStateDBCS = FALSE;
d5d484b0 A	205	if (myData->isEmptySegment) {
	206	myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
	207	err = U_PARSE_ERROR; / temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
	208	args->converter->toUBytes[0] = UCNV_TILDE;
	209	args->converter->toUBytes[1] = mySourceChar;
	210	args->converter->toULength = 2;
	211	goto EXIT;
	212	}
	213	myData->isEmptySegment = TRUE;
b75a7d8f A	214	continue;
	215	}
	216	else{
	217	break;
	218	}
	219
	220	default:
	221	/* if the first byte is equal to TILDE and the trail byte
	222	* is not a valid byte then it is an error condition
	223	*/
	224	if(args->converter->mode == UCNV_TILDE){
	225	args->converter->mode=0;
	226	mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) \| ((mySourceChar & 0x00ff)+0x80));
d5d484b0	227	myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
b75a7d8f A	228	goto SAVE_STATE;
	229	}
	230
	231	break;
	232
	233	}
	234
d5d484b0	235	myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
b75a7d8f A	236	if(myData->isStateDBCS){
	237	if(args->converter->toUnicodeStatus == 0x00){
	238	args->converter->toUnicodeStatus = (UChar) mySourceChar;
	239	continue;
	240	}
	241	else{
	242	tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ;
	243	tempBuf[1] = (char) (mySourceChar+0x80);
	244	mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) \| ((mySourceChar & 0x00ff)+0x80));
	245	args->converter->toUnicodeStatus =0x00;
374ca955 A	246	targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
374ca955 A	247	tempBuf, 2, args->converter->useFallback);
b75a7d8f A	248	}
	249	}
	250	else{
	251	if(args->converter->fromUnicodeStatus == 0x00){
374ca955 A	252	targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
374ca955 A	253	mySource - 1, 1, args->converter->useFallback);
b75a7d8f A	254	}
	255	else{
	256	goto SAVE_STATE;
	257	}
	258
	259	}
	260	if(targetUniChar < 0xfffe){
	261	if(args->offsets) {
	262	args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 1-(myData->isStateDBCS));
	263	}
	264
	265	*(myTarget++)=(UChar)targetUniChar;
	266	}
	267	else if(targetUniChar>=0xfffe){
	268	SAVE_STATE:
374ca955 A	269	if(targetUniChar == 0xfffe){
	270	*err = U_INVALID_CHAR_FOUND;
	271	}
	272	else{
	273	*err = U_ILLEGAL_CHAR_FOUND;
	274	}
	275	if(myData->isStateDBCS){
73c04bcf A	276	/* this should never occur since isStateDBCS is set to true
	277	* only after tempBuf[0] and tempBuf[1]
	278	* are set to the input .. just to please BEAM
	279	*/
	280	if(tempBuf[0]==0 \|\| tempBuf[1]==0){
	281	*err = U_INTERNAL_PROGRAM_ERROR;
	282	}else{
	283	args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80);
	284	args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80);
	285	args->converter->toULength=2;
	286	}
374ca955 A	287	}
	288	else{
	289	args->converter->toUBytes[0] = (uint8_t)mySourceChar;
	290	args->converter->toULength=1;
b75a7d8f	291	}
374ca955	292	break;
b75a7d8f A	293	}
	294	}
	295	else{
	296	*err =U_BUFFER_OVERFLOW_ERROR;
	297	break;
	298	}
	299	}
d5d484b0	300	EXIT:
b75a7d8f A	301	args->target = myTarget;
	302	args->source = mySource;
	303	}
	304
	305
	306	static void
	307	UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args,
	308	UErrorCode * err){
	309	const UChar *mySource = args->source;
374ca955	310	char *myTarget = args->target;
b75a7d8f A	311	int32_t* offsets = args->offsets;
	312	int32_t mySourceIndex = 0;
	313	int32_t myTargetIndex = 0;
374ca955	314	int32_t targetLength = (int32_t)(args->targetLimit - myTarget);
b75a7d8f A	315	int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source);
	316	int32_t length=0;
	317	uint32_t targetUniChar = 0x0000;
73c04bcf	318	UChar32 mySourceChar = 0x0000;
b75a7d8f A	319	UConverterDataHZ myConverterData=(UConverterDataHZ)args->converter->extraInfo;
	320	UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS;
	321	UBool oldIsTargetUCharDBCS = isTargetUCharDBCS;
b75a7d8f A	322	int len =0;
	323	const char* escSeq=NULL;
	324
374ca955	325	if ((args->converter == NULL) \|\| (args->targetLimit < myTarget) \|\| (args->sourceLimit < args->source)){
b75a7d8f A	326	*err = U_ILLEGAL_ARGUMENT_ERROR;
	327	return;
	328	}
374ca955	329	if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) {
b75a7d8f A	330	goto getTrail;
	331	}
	332	/writing the char to the output stream /
	333	while (mySourceIndex < mySourceLength){
	334	targetUniChar = missingCharMarker;
	335	if (myTargetIndex < targetLength){
	336
73c04bcf	337	mySourceChar = (UChar) mySource[mySourceIndex++];
b75a7d8f A	338
	339
	340	oldIsTargetUCharDBCS = isTargetUCharDBCS;
	341	if(mySourceChar ==UCNV_TILDE){
	342	/concatEscape(args, &myTargetIndex, &targetLength,"\x7E\x7E",err,2,&mySourceIndex);/
	343	len = ESC_LEN;
	344	escSeq = TILDE_ESCAPE;
	345	CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
	346	continue;
	347	}
	348	else{
374ca955	349	length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData,
b75a7d8f A	350	mySourceChar,&targetUniChar,args->converter->useFallback);
	351
	352	}
	353	/* only DBCS or SBCS characters are expected*/
	354	/* DB haracters with high bit set to 1 are expected */
	355	if(length > 2 \|\| length==0 \|\|(((targetUniChar & 0x8080) != 0x8080)&& length==2)){
	356	targetUniChar= missingCharMarker;
	357	}
	358	if (targetUniChar != missingCharMarker){
	359	myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF);
	360	if(oldIsTargetUCharDBCS != isTargetUCharDBCS \|\| !myConverterData->isEscapeAppended ){
	361	/Shifting from a double byte to single byte mode/
	362	if(!isTargetUCharDBCS){
	363	len =ESC_LEN;
	364	escSeq = SB_ESCAPE;
	365	CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
73c04bcf	366	myConverterData->isEscapeAppended = TRUE;
b75a7d8f A	367	}
	368	else{ /* Shifting from a single byte to double byte mode*/
	369	len =ESC_LEN;
	370	escSeq = DB_ESCAPE;
	371	CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex);
73c04bcf	372	myConverterData->isEscapeAppended = TRUE;
b75a7d8f A	373
	374	}
	375	}
	376
	377	if(isTargetUCharDBCS){
	378	if( myTargetIndex <targetLength){
374ca955	379	myTarget[myTargetIndex++] =(char) ((targetUniChar >> 8) -0x80);
b75a7d8f A	380	if(offsets){
	381	*(offsets++) = mySourceIndex-1;
	382	}
	383	if(myTargetIndex < targetLength){
374ca955	384	myTarget[myTargetIndex++] =(char) ((targetUniChar & 0x00FF) -0x80);
b75a7d8f A	385	if(offsets){
	386	*(offsets++) = mySourceIndex-1;
	387	}
	388	}else{
	389	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
	390	*err = U_BUFFER_OVERFLOW_ERROR;
	391	}
	392	}else{
	393	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) ((targetUniChar >> 8) -0x80);
	394	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80);
	395	*err = U_BUFFER_OVERFLOW_ERROR;
	396	}
	397
	398	}else{
	399	if( myTargetIndex <targetLength){
374ca955	400	myTarget[myTargetIndex++] = (char) (targetUniChar );
b75a7d8f A	401	if(offsets){
	402	*(offsets++) = mySourceIndex-1;
	403	}
	404
	405	}else{
	406	args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar;
	407	*err = U_BUFFER_OVERFLOW_ERROR;
	408	}
	409	}
	410
	411	}
	412	else{
374ca955	413	/* oops.. the code point is unassigned */
b75a7d8f A	414	/Handle surrogates /
	415	/check if the char is a First surrogate/
	416	if(UTF_IS_SURROGATE(mySourceChar)) {
	417	if(UTF_IS_SURROGATE_FIRST(mySourceChar)) {
374ca955	418	args->converter->fromUChar32=mySourceChar;
b75a7d8f A	419	getTrail:
	420	/look ahead to find the trail surrogate/
	421	if(mySourceIndex < mySourceLength) {
	422	/* test the following code unit */
	423	UChar trail=(UChar) args->source[mySourceIndex];
	424	if(UTF_IS_SECOND_SURROGATE(trail)) {
	425	++mySourceIndex;
374ca955 A	426	mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail);
374ca955 A	427	args->converter->fromUChar32=0x00;
b75a7d8f A	428	/* there are no surrogates in GB2312*/
b75a7d8f A	429	*err = U_INVALID_CHAR_FOUND;
b75a7d8f A	430	/* exit this condition tree */
	431	} else {
	432	/* this is an unmatched lead code unit (1st surrogate) */
	433	/* callback(illegal) */
b75a7d8f A	434	*err=U_ILLEGAL_CHAR_FOUND;
	435	}
	436	} else {
	437	/* no more input */
	438	*err = U_ZERO_ERROR;
b75a7d8f A	439	}
	440	} else {
	441	/* this is an unmatched trail code unit (2nd surrogate) */
	442	/* callback(illegal) */
b75a7d8f A	443	*err=U_ILLEGAL_CHAR_FOUND;
b75a7d8f A	444	}
374ca955 A	445	} else {
	446	/* callback(unassigned) for a BMP code point */
	447	*err = U_INVALID_CHAR_FOUND;
b75a7d8f A	448	}
b75a7d8f A	449
374ca955 A	450	args->converter->fromUChar32=mySourceChar;
374ca955 A	451	break;
b75a7d8f A	452	}
	453	}
	454	else{
	455	*err = U_BUFFER_OVERFLOW_ERROR;
	456	break;
	457	}
	458	targetUniChar=missingCharMarker;
	459	}
b75a7d8f A	460
	461	args->target += myTargetIndex;
	462	args->source += mySourceIndex;
	463	myConverterData->isTargetUCharDBCS = isTargetUCharDBCS;
	464	}
	465
	466	static void
	467	_HZ_WriteSub(UConverterFromUnicodeArgs args, int32_t offsetIndex, UErrorCode err) {
	468	UConverter *cnv = args->converter;
	469	UConverterDataHZ convData=(UConverterDataHZ ) cnv->extraInfo;
	470	char *p;
	471	char buffer[4];
	472	p = buffer;
	473
	474	if( convData->isTargetUCharDBCS){
	475	*p++= UCNV_TILDE;
	476	*p++= UCNV_CLOSE_BRACE;
	477	convData->isTargetUCharDBCS=FALSE;
	478	}
73c04bcf	479	*p++= (char)cnv->subChars[0];
b75a7d8f A	480
	481	ucnv_cbFromUWriteBytes(args,
	482	buffer, (int32_t)(p - buffer),
	483	offsetIndex, err);
	484	}
	485
73c04bcf A	486	/*
	487	* Structure for cloning an HZ converter into a single memory block.
	488	* ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct,
	489	* and then ucnv_safeClone() of the sub-converter may additionally align
	490	* subCnv inside the cloneHZStruct, for which we need the deadSpace after
	491	* subCnv. This is because UAlignedMemory may be larger than the actually
	492	* necessary alignment size for the platform.
	493	* The other cloneHZStruct fields will not be moved around,
	494	* and are aligned properly with cloneHZStruct's alignment.
	495	*/
374ca955	496	struct cloneHZStruct
b75a7d8f A	497	{
b75a7d8f A	498	UConverter cnv;
b75a7d8f	499	UConverter subCnv;
73c04bcf	500	UAlignedMemory deadSpace;
b75a7d8f A	501	UConverterDataHZ mydata;
	502	};
	503
	504
	505	static UConverter *
	506	_HZ_SafeClone(const UConverter *cnv,
	507	void *stackBuffer,
	508	int32_t *pBufferSize,
	509	UErrorCode *status)
	510	{
374ca955 A	511	struct cloneHZStruct * localClone;
374ca955 A	512	int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct);
b75a7d8f A	513
	514	if (U_FAILURE(*status)){
	515	return 0;
	516	}
	517
	518	if (pBufferSize == 0){ / 'preflighting' request - set needed size into pBufferSize /
	519	*pBufferSize = bufferSizeNeeded;
	520	return 0;
	521	}
	522
374ca955	523	localClone = (struct cloneHZStruct *)stackBuffer;
73c04bcf	524	/* ucnv.c/ucnv_safeClone() copied the main UConverter already */
b75a7d8f A	525
	526	uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ));
	527	localClone->cnv.extraInfo = &localClone->mydata;
	528	localClone->cnv.isExtraLocal = TRUE;
	529
	530	/* deep-clone the sub-converter */
73c04bcf	531	size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */
b75a7d8f A	532	((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter =
	533	ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status);
	534
	535	return &localClone->cnv;
	536	}
	537
	538	static void
	539	_HZ_GetUnicodeSet(const UConverter *cnv,
73c04bcf	540	const USetAdder *sa,
b75a7d8f A	541	UConverterUnicodeSet which,
	542	UErrorCode *pErrorCode) {
	543	/* the tilde '~' is hardcoded in the converter */
374ca955	544	sa->add(sa->set, 0x7e);
b75a7d8f A	545
	546	/* add all of the code points that the sub-converter handles */
	547	((UConverterDataHZ*)cnv->extraInfo)->
	548	gbConverter->sharedData->impl->
	549	getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter,
374ca955	550	sa, which, pErrorCode);
b75a7d8f A	551	}
	552
	553	static const UConverterImpl _HZImpl={
	554
	555	UCNV_HZ,
	556
	557	NULL,
	558	NULL,
	559
	560	_HZOpen,
	561	_HZClose,
	562	_HZReset,
	563
	564	UConverter_toUnicode_HZ_OFFSETS_LOGIC,
	565	UConverter_toUnicode_HZ_OFFSETS_LOGIC,
	566	UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
	567	UConverter_fromUnicode_HZ_OFFSETS_LOGIC,
	568	NULL,
	569
	570	NULL,
	571	NULL,
	572	_HZ_WriteSub,
	573	_HZ_SafeClone,
	574	_HZ_GetUnicodeSet
	575	};
	576
	577	static const UConverterStaticData _HZStaticData={
	578	sizeof(UConverterStaticData),
	579	"HZ",
	580	0,
	581	UCNV_IBM,
	582	UCNV_HZ,
	583	1,
	584	4,
	585	{ 0x1a, 0, 0, 0 },
	586	1,
	587	FALSE,
	588	FALSE,
	589	0,
	590	0,
	591	{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */
	592
	593	};
	594
	595
	596	const UConverterSharedData _HZData={
	597	sizeof(UConverterSharedData),
	598	~((uint32_t) 0),
	599	NULL,
	600	NULL,
	601	&_HZStaticData,
	602	FALSE,
	603	&_HZImpl,
	604	0
	605	};
	606
	607	#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */