X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..73c04bcfe1096173b00431f0cdc742894b15eef0:/icuSources/common/ucnvlat1.c diff --git a/icuSources/common/ucnvlat1.c b/icuSources/common/ucnvlat1.c index 56f2e674..bbaece60 100644 --- a/icuSources/common/ucnvlat1.c +++ b/icuSources/common/ucnvlat1.c @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 2000-2003, International Business Machines +* Copyright (C) 2000-2004, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: ucnvlat1.cpp @@ -13,8 +13,10 @@ */ #include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + #include "unicode/ucnv.h" -#include "unicode/ucnv_err.h" #include "unicode/uset.h" #include "ucnv_bld.h" #include "ucnv_cnv.h" @@ -26,7 +28,7 @@ /* ISO 8859-1 --------------------------------------------------------------- */ -/* This is a table-less and callback-less version of _MBCSSingleToBMPWithOffsets(). */ +/* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ static void _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, UErrorCode *pErrorCode) { @@ -40,7 +42,7 @@ _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, /* set up the local pointers */ source=(const uint8_t *)pArgs->source; target=pArgs->target; - targetCapacity=pArgs->targetLimit-pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); offsets=pArgs->offsets; sourceIndex=0; @@ -49,7 +51,7 @@ _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, * since the conversion here is 1:1 UChar:uint8_t, we need only one counter * for the minimum of the sourceLength and targetCapacity */ - length=(const uint8_t *)pArgs->sourceLimit-source; + length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); if(length<=targetCapacity) { targetCapacity=length; } else { @@ -126,7 +128,7 @@ _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, } } -/* This is a table-less and callback-less version of _MBCSSingleGetNextUChar(). */ +/* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */ static UChar32 _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *pErrorCode) { @@ -141,29 +143,27 @@ _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs, return 0xffff; } -/* This is a table-less version of _MBCSSingleFromBMPWithOffsets(). */ +/* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */ static void _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, UErrorCode *pErrorCode) { UConverter *cnv; - const UChar *source, *sourceLimit, *lastSource; - uint8_t *target; + const UChar *source, *sourceLimit; + uint8_t *target, *oldTarget; int32_t targetCapacity, length; int32_t *offsets; - UChar32 c, max; + UChar32 cp; + UChar c, max; int32_t sourceIndex; - UConverterCallbackReason reason; - int32_t i; - /* set up the local pointers */ cnv=pArgs->converter; source=pArgs->source; sourceLimit=pArgs->sourceLimit; - target=(uint8_t *)pArgs->target; - targetCapacity=pArgs->targetLimit-pArgs->target; + target=oldTarget=(uint8_t *)pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); offsets=pArgs->offsets; if(cnv->sharedData==&_Latin1Data) { @@ -173,29 +173,27 @@ _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, } /* get the converter state from UConverter */ - c=cnv->fromUSurrogateLead; + cp=cnv->fromUChar32; /* sourceIndex=-1 if the current character began in the previous buffer */ - sourceIndex= c==0 ? 0 : -1; - lastSource=source; + sourceIndex= cp==0 ? 0 : -1; /* * since the conversion here is 1:1 UChar:uint8_t, we need only one counter * for the minimum of the sourceLength and targetCapacity */ - length=sourceLimit-source; + length=(int32_t)(sourceLimit-source); if(length0) { + if(cp!=0 && targetCapacity>0) { goto getTrail; } #if LATIN1_UNROLL_FROM_UNICODE /* unroll the loop with the most common case */ -unrolled: if(targetCapacity>=16) { int32_t count, loops; UChar u, oredChars; @@ -247,7 +245,7 @@ unrolled: targetCapacity-=16*count; if(offsets!=NULL) { - lastSource+=16*count; + oldTarget+=16*count; while(count>0) { *offsets++=sourceIndex++; *offsets++=sourceIndex++; @@ -268,156 +266,62 @@ unrolled: --count; } } - - c=0; } #endif - while(targetCapacity>0) { - /* - * Get a correct Unicode code point: - * a single UChar for a BMP code point or - * a matched surrogate pair for a "surrogate code point". - */ - c=*source++; - if(c<=max) { - /* convert the Unicode code point */ - *target++=(uint8_t)c; - --targetCapacity; - - /* normal end of conversion: prepare for a new character */ - c=0; - } else { - if(!UTF_IS_SURROGATE(c)) { - /* callback(unassigned) */ - reason=UCNV_UNASSIGNED; - *pErrorCode=U_INVALID_CHAR_FOUND; - } else if(UTF_IS_SURROGATE_FIRST(c)) { + /* conversion loop */ + c=0; + while(targetCapacity>0 && (c=*source++)<=max) { + /* convert the Unicode code point */ + *target++=(uint8_t)c; + --targetCapacity; + } + + if(c>max) { + cp=c; + if(!U_IS_SURROGATE(cp)) { + /* callback(unassigned) */ + } else if(U_IS_SURROGATE_LEAD(cp)) { getTrail: - if(source0) { - *offsets++=sourceIndex++; - --count; - } - /* offset and sourceIndex are now set for the current character */ - } - - /* update the arguments structure */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; - - /* set the converter state in UConverter to deal with the next character */ - cnv->fromUSurrogateLead=0; - - /* write the code point as code units */ - i=0; - UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c); - cnv->invalidUCharLength=(int8_t)i; - /* i==length */ - - /* call the callback function */ - cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, reason, pErrorCode); - - /* get the converter state from UConverter */ - c=cnv->fromUSurrogateLead; - - /* update target and deal with offsets if necessary */ - offsets=ucnv_updateCallbackOffsets(offsets, ((uint8_t *)pArgs->target)-target, sourceIndex); - target=(uint8_t *)pArgs->target; - - /* update the source pointer and index */ - sourceIndex+=length+(pArgs->source-source); - source=lastSource=pArgs->source; - targetCapacity=(uint8_t *)pArgs->targetLimit-target; - length=sourceLimit-source; - if(lengthcharErrorBufferLength>0) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; + /* no more input */ + cnv->fromUChar32=cp; + goto noMoreInput; } - -#if LATIN1_UNROLL_FROM_UNICODE - goto unrolled; -#endif + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ } - } - if(U_SUCCESS(*pErrorCode) && source=(uint8_t *)pArgs->targetLimit) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND; + cnv->fromUChar32=cp; } +noMoreInput: - /* set offsets since the start or the last callback */ + /* set offsets since the start */ if(offsets!=NULL) { - size_t count=source-lastSource; + size_t count=target-oldTarget; while(count>0) { *offsets++=sourceIndex++; --count; } } - if(pArgs->flush && source>=sourceLimit) { - /* reset the state for the next conversion */ - if(c!=0 && U_SUCCESS(*pErrorCode)) { - /* a Unicode code point remains incomplete (only a first surrogate) */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } - cnv->fromUSurrogateLead=0; - } else { - /* set the converter state back into UConverter */ - cnv->fromUSurrogateLead=(UChar)c; + if(U_SUCCESS(*pErrorCode) && source=(uint8_t *)pArgs->targetLimit) { + /* target is full */ + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; } /* write back the updated pointers */ @@ -428,10 +332,10 @@ getTrail: static void _Latin1GetUnicodeSet(const UConverter *cnv, - USet *set, + const USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { - uset_addRange(set, 0, 0xff); + sa->addRange(sa->set, 0, 0xff); } static const UConverterImpl _Latin1Impl={ @@ -475,40 +379,40 @@ const UConverterSharedData _Latin1Data={ /* US-ASCII ----------------------------------------------------------------- */ -/* This is a table-less version of _MBCSSingleToBMPWithOffsets(). */ +/* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */ static void _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, UErrorCode *pErrorCode) { - const uint8_t *source, *sourceLimit, *lastSource; - UChar *target; + const uint8_t *source, *sourceLimit; + UChar *target, *oldTarget; int32_t targetCapacity, length; int32_t *offsets; int32_t sourceIndex; + uint8_t c; + /* set up the local pointers */ source=(const uint8_t *)pArgs->source; sourceLimit=(const uint8_t *)pArgs->sourceLimit; - target=pArgs->target; - targetCapacity=pArgs->targetLimit-pArgs->target; + target=oldTarget=pArgs->target; + targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target); offsets=pArgs->offsets; /* sourceIndex=-1 if the current character began in the previous buffer */ sourceIndex=0; - lastSource=source; /* * since the conversion here is 1:1 UChar:uint8_t, we need only one counter * for the minimum of the sourceLength and targetCapacity */ - length=sourceLimit-source; + length=(int32_t)(sourceLimit-source); if(length=16) { int32_t count, loops; UChar oredChars; @@ -544,7 +448,7 @@ unrolled: targetCapacity-=16*count; if(offsets!=NULL) { - lastSource+=16*count; + oldTarget+=16*count; while(count>0) { *offsets++=sourceIndex++; *offsets++=sourceIndex++; @@ -569,86 +473,26 @@ unrolled: #endif /* conversion loop */ - while(targetCapacity>0) { - if((*target++=*source++)<=0x7f) { - --targetCapacity; - } else { - UConverter *cnv; - - /* back out the illegal character */ - --target; - - /* call the callback function with all the preparations and post-processing */ - cnv=pArgs->converter; - - /* callback(illegal) */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - - /* set offsets since the start or the last callback */ - if(offsets!=NULL) { - int32_t count=(int32_t)(source-lastSource); - - /* predecrement: do not set the offset for the callback-causing character */ - while(--count>0) { - *offsets++=sourceIndex++; - } - /* offset and sourceIndex are now set for the current character */ - } - - /* update the arguments structure */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - - /* copy the current bytes to invalidCharBuffer */ - cnv->invalidCharBuffer[0]=*(source-1); - cnv->invalidCharLength=1; - - /* call the callback function */ - cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode); - - /* update target and deal with offsets if necessary */ - offsets=ucnv_updateCallbackOffsets(offsets, pArgs->target-target, sourceIndex); - target=pArgs->target; - - /* update the source pointer and index */ - sourceIndex+=1+((const uint8_t *)pArgs->source-source); - source=lastSource=(const uint8_t *)pArgs->source; - targetCapacity=pArgs->targetLimit-target; - length=sourceLimit-source; - if(lengthUCharErrorBufferLength>0) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - break; - } - -#if ASCII_UNROLL_TO_UNICODE - goto unrolled; -#endif - } + c=0; + while(targetCapacity>0 && (c=*source++)<=0x7f) { + *target++=c; + --targetCapacity; } - if(U_SUCCESS(*pErrorCode) && source=pArgs->targetLimit) { + if(c>0x7f) { + /* callback(illegal); copy the current bytes to toUBytes[] */ + UConverter *cnv=pArgs->converter; + cnv->toUBytes[0]=c; + cnv->toULength=1; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + } else if(source=pArgs->targetLimit) { /* target is full */ *pErrorCode=U_BUFFER_OVERFLOW_ERROR; } - /* set offsets since the start or the last callback */ + /* set offsets since the start */ if(offsets!=NULL) { - size_t count=source-lastSource; + size_t count=target-oldTarget; while(count>0) { *offsets++=sourceIndex++; --count; @@ -661,76 +505,39 @@ unrolled: pArgs->offsets=offsets; } -/* This is a table-less version of _MBCSSingleGetNextUChar(). */ +/* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */ static UChar32 _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *pErrorCode) { - UChar buffer[UTF_MAX_CHAR_LENGTH]; const uint8_t *source; uint8_t b; - /* set up the local pointers */ source=(const uint8_t *)pArgs->source; - - /* conversion loop */ - while(source<(const uint8_t *)pArgs->sourceLimit) { + if(source<(const uint8_t *)pArgs->sourceLimit) { b=*source++; pArgs->source=(const char *)source; if(b<=0x7f) { return b; } else { - /* call the callback function with all the preparations and post-processing */ UConverter *cnv=pArgs->converter; - - /* callback(illegal) */ + cnv->toUBytes[0]=b; + cnv->toULength=1; *pErrorCode=U_ILLEGAL_CHAR_FOUND; - - /* update the arguments structure */ - pArgs->target=buffer; - pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH; - - /* copy the current byte to invalidCharBuffer */ - cnv->invalidCharBuffer[0]=(char)b; - cnv->invalidCharLength=1; - - /* call the callback function */ - cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, 1, UCNV_ILLEGAL, pErrorCode); - - /* update the source pointer */ - source=(const uint8_t *)pArgs->source; - - /* - * return the first character if the callback wrote some - * we do not need to goto finish because the converter state is already set - */ - if(U_SUCCESS(*pErrorCode)) { - int32_t length=pArgs->target-buffer; - if(length>0) { - return ucnv_getUChar32KeepOverflow(cnv, buffer, length); - } - /* else (callback did not write anything) continue */ - } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - *pErrorCode=U_ZERO_ERROR; - return ucnv_getUChar32KeepOverflow(cnv, buffer, UTF_MAX_CHAR_LENGTH); - } else { - /* break on error */ - /* ### what if a callback set an error but _also_ generated output?! */ - return 0xffff; - } + return 0xffff; } } - /* no output because of empty input or only skipping callbacks */ + /* no output because of empty input */ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; return 0xffff; } static void _ASCIIGetUnicodeSet(const UConverter *cnv, - USet *set, + const USetAdder *sa, UConverterUnicodeSet which, UErrorCode *pErrorCode) { - uset_addRange(set, 0, 0x7f); + sa->addRange(sa->set, 0, 0x7f); } static const UConverterImpl _ASCIIImpl={ @@ -771,3 +578,5 @@ const UConverterSharedData _ASCIIData={ NULL, NULL, &_ASCIIStaticData, FALSE, &_ASCIIImpl, 0 }; + +#endif