X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..b25be06635768807f8f693286fa73bb2297bb06c:/icuSources/common/ucnvhz.c?ds=sidebyside diff --git a/icuSources/common/ucnvhz.c b/icuSources/common/ucnvhz.c index 1dffd44d..074b4f4a 100644 --- a/icuSources/common/ucnvhz.c +++ b/icuSources/common/ucnvhz.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2003, International Business Machines +* Copyright (C) 2000-2009, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnvhz.c @@ -16,15 +16,15 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_LEGACY_CONVERSION +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION #include "cmemory.h" -#include "unicode/ucnv_err.h" #include "unicode/ucnv.h" #include "unicode/ucnv_cb.h" #include "unicode/uset.h" #include "ucnv_bld.h" #include "ucnv_cnv.h" +#include "ucnv_imp.h" #define UCNV_TILDE 0x7E /* ~ */ #define UCNV_OPEN_BRACE 0x7B /* { */ @@ -54,33 +54,39 @@ typedef struct{ + UConverter* gbConverter; int32_t targetIndex; int32_t sourceIndex; UBool isEscapeAppended; - UConverter* gbConverter; UBool isStateDBCS; UBool isTargetUCharDBCS; + UBool isEmptySegment; }UConverterDataHZ; static void -_HZOpen(UConverter *cnv, const char *name,const char *locale,uint32_t options, UErrorCode *errorCode){ +_HZOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode){ + UConverter *gbConverter; + if(pArgs->onlyTestIsLoadable) { + ucnv_canCreateConverter("GBK", errorCode); /* errorCode carries result */ + return; + } + gbConverter = ucnv_open("GBK", errorCode); + if(U_FAILURE(*errorCode)) { + return; + } cnv->toUnicodeStatus = 0; cnv->fromUnicodeStatus= 0; cnv->mode=0; - cnv->fromUSurrogateLead=0x0000; - cnv->extraInfo = uprv_malloc (sizeof (UConverterDataHZ)); + cnv->fromUChar32=0x0000; + cnv->extraInfo = uprv_malloc(sizeof(UConverterDataHZ)); if(cnv->extraInfo != NULL){ - ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = ucnv_open("ibm-1386",errorCode); - ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; - ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; - ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; - ((UConverterDataHZ*)cnv->extraInfo)->sourceIndex = 0; - ((UConverterDataHZ*)cnv->extraInfo)->isTargetUCharDBCS = FALSE; + uprv_memset(cnv->extraInfo, 0, sizeof(UConverterDataHZ)); + ((UConverterDataHZ*)cnv->extraInfo)->gbConverter = gbConverter; } - /* test for NULL */ else { + ucnv_close(gbConverter); *errorCode = U_MEMORY_ALLOCATION_ERROR; return; } @@ -104,11 +110,12 @@ _HZReset(UConverter *cnv, UConverterResetChoice choice){ cnv->mode=0; if(cnv->extraInfo != NULL){ ((UConverterDataHZ*)cnv->extraInfo)->isStateDBCS = FALSE; + ((UConverterDataHZ*)cnv->extraInfo)->isEmptySegment = FALSE; } } if(choice!=UCNV_RESET_TO_UNICODE) { cnv->fromUnicodeStatus= 0; - cnv->fromUSurrogateLead=0x0000; + cnv->fromUChar32=0x0000; if(cnv->extraInfo != NULL){ ((UConverterDataHZ*)cnv->extraInfo)->isEscapeAppended = FALSE; ((UConverterDataHZ*)cnv->extraInfo)->targetIndex = 0; @@ -136,122 +143,155 @@ _HZReset(UConverter *cnv, UConverterResetChoice choice){ * from-GB code '~}' ($7E7D) is outside the defined GB range.) * * Source: RFC 1842 +* +* Note that the formal syntax in RFC 1842 is invalid. I assume that the +* intended definition of single-byte-segment is as follows (pedberg): +* single-byte-segment = single-byte-seq 1*single-byte-char */ static void UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err){ - char tempBuf[3]; - const char* pBuf; + char tempBuf[2]; const char *mySource = ( char *) args->source; UChar *myTarget = args->target; - char *tempLimit = &tempBuf[3]; const char *mySourceLimit = args->sourceLimit; UChar32 targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; + int32_t mySourceChar = 0x0000; UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); - - if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){ + tempBuf[0]=0; + tempBuf[1]=0; + + /* Calling code already handles this situation. */ + /*if ((args->converter == NULL) || (args->targetLimit < args->target) || (mySourceLimit < args->source)){ *err = U_ILLEGAL_ARGUMENT_ERROR; return; - } + }*/ - while(mySource< args->sourceLimit){ + while(mySource< mySourceLimit){ if(myTarget < args->targetLimit){ mySourceChar= (unsigned char) *mySource++; - switch(mySourceChar){ + if(args->converter->mode == UCNV_TILDE) { + /* second byte after ~ */ + args->converter->mode=0; + switch(mySourceChar) { case 0x0A: - if(args->converter->mode ==UCNV_TILDE){ - args->converter->mode=0; - - } - *(myTarget++)=(UChar)mySourceChar; + /* no output for ~\n (line-continuation marker) */ continue; - case UCNV_TILDE: - if(args->converter->mode ==UCNV_TILDE){ - *(myTarget++)=(UChar)mySourceChar; - args->converter->mode=0; - continue; - - } - else if(args->converter->toUnicodeStatus !=0){ - args->converter->mode=0; - break; + if(args->offsets) { + args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); } - else{ - args->converter->mode = UCNV_TILDE; - continue; - } - - + *(myTarget++)=(UChar)mySourceChar; + myData->isEmptySegment = FALSE; + continue; case UCNV_OPEN_BRACE: - if(args->converter->mode == UCNV_TILDE){ - args->converter->mode=0; - myData->isStateDBCS = TRUE; - continue; - } - else{ - break; - } - - case UCNV_CLOSE_BRACE: - if(args->converter->mode == UCNV_TILDE){ - args->converter->mode=0; - myData->isStateDBCS = FALSE; - continue; - } - else{ - break; + myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); + if (myData->isEmptySegment) { + myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUCallbackReason = UCNV_IRREGULAR; + args->converter->toUBytes[0] = UCNV_TILDE; + args->converter->toUBytes[1] = mySourceChar; + args->converter->toULength = 2; + args->target = myTarget; + args->source = mySource; + return; } - + myData->isEmptySegment = TRUE; + continue; default: /* if the first byte is equal to TILDE and the trail byte * is not a valid byte then it is an error condition */ - if(args->converter->mode == UCNV_TILDE){ - args->converter->mode=0; - mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); - goto SAVE_STATE; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + */ + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUBytes[0] = UCNV_TILDE; + if( myData->isStateDBCS ? + (0x21 <= mySourceChar && mySourceChar <= 0x7e) : + mySourceChar <= 0x7f + ) { + /* The current byte could be the start of a character: Back it out. */ + args->converter->toULength = 1; + --mySource; + } else { + /* Include the current byte in the illegal sequence. */ + args->converter->toUBytes[1] = mySourceChar; + args->converter->toULength = 2; } - - break; - - } - - if(myData->isStateDBCS){ + args->target = myTarget; + args->source = mySource; + return; + } + } else if(myData->isStateDBCS) { if(args->converter->toUnicodeStatus == 0x00){ - args->converter->toUnicodeStatus = (UChar) mySourceChar; + /* lead byte */ + if(mySourceChar == UCNV_TILDE) { + args->converter->mode = UCNV_TILDE; + } else { + /* add another bit to distinguish a 0 byte from not having seen a lead byte */ + args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); + myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ + } continue; } else{ - tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ; - tempBuf[1] = (char) (mySourceChar+0x80); - mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); + /* trail byte */ + int leadIsOk, trailIsOk; + uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; + targetUniChar = 0xffff; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In HZ DBCS, if the second byte is in the 21..7e range, + * we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); + trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + tempBuf[0] = (char) (leadByte+0x80) ; + tempBuf[1] = (char) (mySourceChar+0x80); + targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, + tempBuf, 2, args->converter->useFallback); + mySourceChar= (leadByte << 8) | mySourceChar; + } else if (trailIsOk) { + /* report a single illegal byte and continue with the following DBCS starter byte */ + --mySource; + mySourceChar = (int32_t)leadByte; + } else { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; + } args->converter->toUnicodeStatus =0x00; - pBuf = &tempBuf[0]; - tempLimit = &tempBuf[2]+1; - targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, - &pBuf,tempLimit,args->converter->useFallback); } } else{ - if(args->converter->fromUnicodeStatus == 0x00){ - tempBuf[0] = (char) mySourceChar; - pBuf = &tempBuf[0]; - tempLimit = &tempBuf[1]; - targetUniChar = _MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, - &pBuf,tempLimit,args->converter->useFallback); - } - else{ - goto SAVE_STATE; + if(mySourceChar == UCNV_TILDE) { + args->converter->mode = UCNV_TILDE; + continue; + } else if(mySourceChar <= 0x7f) { + targetUniChar = (UChar)mySourceChar; /* ASCII */ + myData->isEmptySegment = FALSE; /* the segment has something valid */ + } else { + targetUniChar = 0xffff; + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ } - } if(targetUniChar < 0xfffe){ if(args->offsets) { @@ -260,64 +300,23 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, *(myTarget++)=(UChar)targetUniChar; } - else if(targetUniChar>=0xfffe){ -SAVE_STATE: - { - const char *saveSource = args->source; - UChar *saveTarget = args->target; - int32_t *saveOffsets = args->offsets; - - UConverterCallbackReason reason; - int32_t currentOffset ; - int32_t saveIndex = (int32_t)(myTarget - args->target); - - args->converter->invalidCharLength=0; - - if(targetUniChar == 0xfffe){ - reason = UCNV_UNASSIGNED; - *err = U_INVALID_CHAR_FOUND; - } - else{ - reason = UCNV_ILLEGAL; - *err = U_ILLEGAL_CHAR_FOUND; - } - if(myData->isStateDBCS){ - - args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[0]-0x80); - args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)(tempBuf[1]-0x80); - currentOffset= (int32_t)(mySource - args->source -2); - - } - else{ - args->converter->invalidCharBuffer[args->converter->invalidCharLength++] = (char)mySourceChar; - currentOffset= (int32_t)(mySource - args->source -1); - } - args->offsets = args->offsets?args->offsets+(myTarget - args->target):0; - args->target = myTarget; - args->source = mySource; - myTarget = saveTarget; - args->converter->fromCharErrorBehaviour ( - args->converter->toUContext, - args, - args->converter->invalidCharBuffer, - args->converter->invalidCharLength, - reason, - err); - - if(args->offsets){ - args->offsets = saveOffsets; - - for (;saveIndex < (args->target - myTarget);saveIndex++) { - args->offsets[saveIndex] += currentOffset; - } - } - args->source = saveSource; - myTarget = args->target; - args->target = saveTarget; - args->offsets = saveOffsets; - if(U_FAILURE(*err)) - break; + else /* targetUniChar>=0xfffe */ { + if(targetUniChar == 0xfffe){ + *err = U_INVALID_CHAR_FOUND; + } + else{ + *err = U_ILLEGAL_CHAR_FOUND; } + if(mySourceChar > 0xff){ + args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); + args->converter->toUBytes[1] = (uint8_t)mySourceChar; + args->converter->toULength=2; + } + else{ + args->converter->toUBytes[0] = (uint8_t)mySourceChar; + args->converter->toULength=1; + } + break; } } else{ @@ -325,18 +324,6 @@ SAVE_STATE: break; } } - if((args->flush==TRUE) - && (mySource == mySourceLimit) - && ( args->converter->toUnicodeStatus !=0x00)){ - *err = U_TRUNCATED_CHAR_FOUND; - args->converter->toUnicodeStatus = 0x00; - } - /* Reset the state of converter if we consumed - * the source and flush is true - */ - if( (mySource == mySourceLimit) && args->flush){ - _HZReset(args->converter, UCNV_RESET_TO_UNICODE); - } args->target = myTarget; args->source = mySource; @@ -347,28 +334,27 @@ static void UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, UErrorCode * err){ const UChar *mySource = args->source; - unsigned char *myTarget = (unsigned char *) args->target; + char *myTarget = args->target; int32_t* offsets = args->offsets; int32_t mySourceIndex = 0; int32_t myTargetIndex = 0; - int32_t targetLength = (int32_t)(args->targetLimit - args->target); + int32_t targetLength = (int32_t)(args->targetLimit - myTarget); int32_t mySourceLength = (int32_t)(args->sourceLimit - args->source); int32_t length=0; uint32_t targetUniChar = 0x0000; - UChar32 mySourceChar = 0x0000,c=0x0000; + UChar32 mySourceChar = 0x0000; UConverterDataHZ *myConverterData=(UConverterDataHZ*)args->converter->extraInfo; UBool isTargetUCharDBCS = (UBool) myConverterData->isTargetUCharDBCS; UBool oldIsTargetUCharDBCS = isTargetUCharDBCS; - UConverterCallbackReason reason; - UBool isEscapeAppended =FALSE; int len =0; const char* escSeq=NULL; - if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)){ + /* Calling code already handles this situation. */ + /*if ((args->converter == NULL) || (args->targetLimit < myTarget) || (args->sourceLimit < args->source)){ *err = U_ILLEGAL_ARGUMENT_ERROR; return; - } - if(args->converter->fromUSurrogateLead!=0 && myTargetIndex < targetLength) { + }*/ + if(args->converter->fromUChar32!=0 && myTargetIndex < targetLength) { goto getTrail; } /*writing the char to the output stream */ @@ -376,7 +362,7 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, targetUniChar = missingCharMarker; if (myTargetIndex < targetLength){ - c=mySourceChar = (UChar) args->source[mySourceIndex++]; + mySourceChar = (UChar) mySource[mySourceIndex++]; oldIsTargetUCharDBCS = isTargetUCharDBCS; @@ -386,16 +372,21 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, escSeq = TILDE_ESCAPE; CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); continue; - } - else{ - length= _MBCSFromUChar32(myConverterData->gbConverter->sharedData, + } else if(mySourceChar <= 0x7f) { + length = 1; + targetUniChar = mySourceChar; + } else { + length= ucnv_MBCSFromUChar32(myConverterData->gbConverter->sharedData, mySourceChar,&targetUniChar,args->converter->useFallback); - - } - /* only DBCS or SBCS characters are expected*/ - /* DB haracters with high bit set to 1 are expected */ - if(length > 2 || length==0 ||(((targetUniChar & 0x8080) != 0x8080)&& length==2)){ - targetUniChar= missingCharMarker; + /* we can only use lead bytes 21..7D and trail bytes 21..7E */ + if( length == 2 && + (uint16_t)(targetUniChar - 0xa1a1) <= (0xfdfe - 0xa1a1) && + (uint8_t)(targetUniChar - 0xa1) <= (0xfe - 0xa1) + ) { + targetUniChar -= 0x8080; + } else { + targetUniChar = missingCharMarker; + } } if (targetUniChar != missingCharMarker){ myConverterData->isTargetUCharDBCS = isTargetUCharDBCS = (UBool)(targetUniChar>0x00FF); @@ -405,41 +396,41 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, len =ESC_LEN; escSeq = SB_ESCAPE; CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); - myConverterData->isEscapeAppended =isEscapeAppended =TRUE; + myConverterData->isEscapeAppended = TRUE; } else{ /* Shifting from a single byte to double byte mode*/ len =ESC_LEN; escSeq = DB_ESCAPE; CONCAT_ESCAPE_MACRO(args, myTargetIndex, targetLength, escSeq,err,len,mySourceIndex); - myConverterData->isEscapeAppended =isEscapeAppended =TRUE; + myConverterData->isEscapeAppended = TRUE; } } if(isTargetUCharDBCS){ if( myTargetIndex target[myTargetIndex++] =(char) ((targetUniChar >> 8) -0x80); + myTarget[myTargetIndex++] =(char) (targetUniChar >> 8); if(offsets){ *(offsets++) = mySourceIndex-1; } if(myTargetIndex < targetLength){ - args->target[myTargetIndex++] =(char) ((targetUniChar & 0x00FF) -0x80); + myTarget[myTargetIndex++] =(char) targetUniChar; if(offsets){ *(offsets++) = mySourceIndex-1; } }else{ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80); + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; *err = U_BUFFER_OVERFLOW_ERROR; } }else{ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) ((targetUniChar >> 8) -0x80); - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) ((targetUniChar & 0x00FF) -0x80); + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =(char) (targetUniChar >> 8); + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = (char) targetUniChar; *err = U_BUFFER_OVERFLOW_ERROR; } }else{ if( myTargetIndex target[myTargetIndex++] = (char) (targetUniChar ); + myTarget[myTargetIndex++] = (char) (targetUniChar ); if(offsets){ *(offsets++) = mySourceIndex-1; } @@ -452,16 +443,12 @@ UConverter_fromUnicode_HZ_OFFSETS_LOGIC (UConverterFromUnicodeArgs * args, } else{ - /* oops.. the code point is unassingned - * set the error and reason - */ - reason =UCNV_UNASSIGNED; - *err =U_INVALID_CHAR_FOUND; + /* oops.. the code point is unassigned */ /*Handle surrogates */ /*check if the char is a First surrogate*/ if(UTF_IS_SURROGATE(mySourceChar)) { if(UTF_IS_SURROGATE_FIRST(mySourceChar)) { - args->converter->fromUSurrogateLead=(UChar)mySourceChar; + args->converter->fromUChar32=mySourceChar; getTrail: /*look ahead to find the trail surrogate*/ if(mySourceIndex < mySourceLength) { @@ -469,87 +456,32 @@ getTrail: UChar trail=(UChar) args->source[mySourceIndex]; if(UTF_IS_SECOND_SURROGATE(trail)) { ++mySourceIndex; - mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUSurrogateLead, trail); - args->converter->fromUSurrogateLead=0x00; + mySourceChar=UTF16_GET_PAIR_VALUE(args->converter->fromUChar32, trail); + args->converter->fromUChar32=0x00; /* there are no surrogates in GB2312*/ *err = U_INVALID_CHAR_FOUND; - reason=UCNV_UNASSIGNED; /* exit this condition tree */ } else { /* this is an unmatched lead code unit (1st surrogate) */ /* callback(illegal) */ - reason=UCNV_ILLEGAL; *err=U_ILLEGAL_CHAR_FOUND; } } else { /* no more input */ *err = U_ZERO_ERROR; - break; } } else { /* this is an unmatched trail code unit (2nd surrogate) */ /* callback(illegal) */ - reason=UCNV_ILLEGAL; *err=U_ILLEGAL_CHAR_FOUND; } + } else { + /* callback(unassigned) for a BMP code point */ + *err = U_INVALID_CHAR_FOUND; } - { - int32_t saveIndex=0; - int32_t currentOffset = (args->offsets) ? *(offsets-1)+1:0; - char * saveTarget = args->target; - const UChar* saveSource = args->source; - int32_t *saveOffsets = args->offsets; - - args->converter->invalidUCharLength = 0; - - if(mySourceChar>0xffff){ - args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((mySourceChar)>>10)+0xd7c0); - args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(uint16_t)(((mySourceChar)&0x3ff)|0xdc00); - } - else{ - args->converter->invalidUCharBuffer[args->converter->invalidUCharLength++] =(UChar)mySourceChar; - } - - myConverterData->isTargetUCharDBCS = (UBool)isTargetUCharDBCS; - args->target += myTargetIndex; - args->source += mySourceIndex; - args->offsets = args->offsets?offsets:0; - - - saveIndex = myTargetIndex; - /*copies current values for the ErrorFunctor to update */ - /*Calls the ErrorFunctor */ - args->converter->fromUCharErrorBehaviour ( args->converter->fromUContext, - args, - args->converter->invalidUCharBuffer, - args->converter->invalidUCharLength, - (UChar32) (mySourceChar), - reason, - err); - /*Update the local Indexes so that the conversion - *can restart at the right points - */ - myTargetIndex = (int32_t)(args->target - (char*)myTarget); - mySourceIndex = (int32_t)(args->source - mySource); - args->offsets = saveOffsets; - saveIndex = myTargetIndex - saveIndex; - if(args->offsets){ - args->offsets = saveOffsets; - while(saveIndex-->0){ - *offsets = currentOffset; - offsets++; - } - } - isTargetUCharDBCS=myConverterData->isTargetUCharDBCS; - args->source = saveSource; - args->target = saveTarget; - args->offsets = saveOffsets; - args->converter->fromUSurrogateLead=0x00; - if (U_FAILURE (*err)) - break; - - } + args->converter->fromUChar32=mySourceChar; + break; } } else{ @@ -558,19 +490,6 @@ getTrail: } targetUniChar=missingCharMarker; } - /*If at the end of conversion we are still carrying state information - *flush is TRUE, we can deduce that the input stream is truncated - */ - if (args->converter->fromUSurrogateLead !=0 && (mySourceIndex == mySourceLength) && args->flush){ - *err = U_TRUNCATED_CHAR_FOUND; - args->converter->toUnicodeStatus = 0x00; - } - /* Reset the state of converter if we consumed - * the source and flush is true - */ - if( (mySourceIndex == mySourceLength) && args->flush){ - _HZReset(args->converter, UCNV_RESET_FROM_UNICODE); - } args->target += myTargetIndex; args->source += mySourceIndex; @@ -590,20 +509,28 @@ _HZ_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *e *p++= UCNV_CLOSE_BRACE; convData->isTargetUCharDBCS=FALSE; } - *p++= cnv->subChar[0]; + *p++= (char)cnv->subChars[0]; ucnv_cbFromUWriteBytes(args, buffer, (int32_t)(p - buffer), offsetIndex, err); } -/* structure for SafeClone calculations */ -struct cloneStruct +/* + * Structure for cloning an HZ converter into a single memory block. + * ucnv_safeClone() of the HZ converter will align the entire cloneHZStruct, + * and then ucnv_safeClone() of the sub-converter may additionally align + * subCnv inside the cloneHZStruct, for which we need the deadSpace after + * subCnv. This is because UAlignedMemory may be larger than the actually + * necessary alignment size for the platform. + * The other cloneHZStruct fields will not be moved around, + * and are aligned properly with cloneHZStruct's alignment. + */ +struct cloneHZStruct { UConverter cnv; - UAlignedMemory deadSpace1; UConverter subCnv; - UAlignedMemory deadSpace2; + UAlignedMemory deadSpace; UConverterDataHZ mydata; }; @@ -614,8 +541,8 @@ _HZ_SafeClone(const UConverter *cnv, int32_t *pBufferSize, UErrorCode *status) { - struct cloneStruct * localClone; - int32_t size, bufferSizeNeeded = sizeof(struct cloneStruct); + struct cloneHZStruct * localClone; + int32_t size, bufferSizeNeeded = sizeof(struct cloneHZStruct); if (U_FAILURE(*status)){ return 0; @@ -626,16 +553,15 @@ _HZ_SafeClone(const UConverter *cnv, return 0; } - localClone = (struct cloneStruct *)stackBuffer; - uprv_memcpy(&localClone->cnv, cnv, sizeof(UConverter)); - localClone->cnv.isCopyLocal = TRUE; + localClone = (struct cloneHZStruct *)stackBuffer; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataHZ)); localClone->cnv.extraInfo = &localClone->mydata; localClone->cnv.isExtraLocal = TRUE; /* deep-clone the sub-converter */ - size = (int32_t)sizeof(UConverter); + size = (int32_t)(sizeof(UConverter) + sizeof(UAlignedMemory)); /* include size of padding */ ((UConverterDataHZ*)localClone->cnv.extraInfo)->gbConverter = ucnv_safeClone(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, &localClone->subCnv, &size, status); @@ -644,17 +570,17 @@ _HZ_SafeClone(const UConverter *cnv, static void _HZ_GetUnicodeSet(const UConverter *cnv, - USet *set, + const USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { - /* the tilde '~' is hardcoded in the converter */ - uset_add(set, 0x7e); + /* HZ converts all of ASCII */ + sa->addRange(sa->set, 0, 0x7f); /* add all of the code points that the sub-converter handles */ - ((UConverterDataHZ*)cnv->extraInfo)-> - gbConverter->sharedData->impl-> - getUnicodeSet(((UConverterDataHZ*)cnv->extraInfo)->gbConverter, - set, which, pErrorCode); + ucnv_MBCSGetFilteredUnicodeSetForUnicode( + ((UConverterDataHZ*)cnv->extraInfo)->gbConverter->sharedData, + sa, which, UCNV_SET_FILTER_HZ, + pErrorCode); } static const UConverterImpl _HZImpl={