X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..2ca993e82fb37b597a3c73ecd1586a139a6579c5:/icuSources/common/ucnvscsu.c diff --git a/icuSources/common/ucnvscsu.c b/icuSources/common/ucnvscsu.c index e02f9a70..15ed9c49 100644 --- a/icuSources/common/ucnvscsu.c +++ b/icuSources/common/ucnvscsu.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2000-2003, International Business Machines +* Copyright (C) 2000-2016, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -20,8 +20,12 @@ */ #include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + #include "unicode/ucnv.h" #include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" #include "ucnv_bld.h" #include "ucnv_cnv.h" #include "cmemory.h" @@ -181,16 +185,18 @@ _SCSUReset(UConverter *cnv, UConverterResetChoice choice) { break; } - cnv->fromUSurrogateLead=0; + cnv->fromUChar32=0; } } static void _SCSUOpen(UConverter *cnv, - const char *name, - const char *locale, - uint32_t options, + UConverterLoadArgs *pArgs, UErrorCode *pErrorCode) { + const char *locale=pArgs->locale; + if(pArgs->onlyTestIsLoadable) { + return; + } cnv->extraInfo=uprv_malloc(sizeof(SCSUData)); if(cnv->extraInfo!=NULL) { if(locale!=NULL && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) { @@ -202,6 +208,10 @@ _SCSUOpen(UConverter *cnv, } else { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; } + + /* Set the substitution character U+fffd as a Unicode string. */ + cnv->subUChars[0]=0xfffd; + cnv->subCharLen=-1; } static void @@ -216,8 +226,6 @@ _SCSUClose(UConverter *cnv) { /* SCSU-to-Unicode conversion functions ------------------------------------- */ -/* ### TODO check operator precedence | << + < */ - static void _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, UErrorCode *pErrorCode) { @@ -272,11 +280,9 @@ _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, * The end of the input or output buffer is also handled by the slower loop. * The slow loop jumps (goto) to the fast-path loop again as soon as possible. * - * The callback handling is done by jumping (goto) to the callback section at the end - * of the function. From there, it either jumps to here to continue or to - * the endloop section to clean up and return. + * The callback handling is done by returning with an error code. + * The conversion framework actually calls the callback function. */ -loop: if(isSingleByteMode) { /* fast path for single-byte mode */ if(state==readCommand) { @@ -367,13 +373,20 @@ singleByteMode: goto fastUnicode; } else /* Srs */ { /* callback(illegal) */ - cnv->invalidCharBuffer[0]=b; - cnv->invalidCharLength=1; - goto callback; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; } + + /* store the first byte of a multibyte sequence in toUBytes[] */ + cnv->toUBytes[0]=b; + cnv->toULength=1; break; case quotePairOne: byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; state=quotePairTwo; break; case quotePairTwo: @@ -426,6 +439,8 @@ singleByteMode: case definePairOne: dynamicWindow=(int8_t)((b>>5)&7); byteOne=(uint8_t)(b&0x1f); + cnv->toUBytes[1]=b; + cnv->toULength=2; state=definePairTwo; break; case definePairTwo: @@ -436,10 +451,9 @@ singleByteMode: case defineOne: if(b==0) { /* callback(illegal): Reserved window offset value 0 */ - cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow); - cnv->invalidCharBuffer[1]=b; - cnv->invalidCharLength=2; - goto callback; + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; } else if(btoUDynamicOffsets[dynamicWindow]=b<<7UL; } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { @@ -448,10 +462,9 @@ singleByteMode: scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; } else { /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ - cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow); - cnv->invalidCharBuffer[1]=b; - cnv->invalidCharLength=2; - goto callback; + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; } sourceIndex=nextSourceIndex; state=readCommand; @@ -487,6 +500,8 @@ fastUnicode: case readCommand: if((uint8_t)(b-UC0)>(Urs-UC0)) { byteOne=b; + cnv->toUBytes[0]=b; + cnv->toULength=1; state=quotePairTwo; } else if(/* UC0<=b && */ b<=UC7) { dynamicWindow=(int8_t)(b-UC0); @@ -496,23 +511,32 @@ fastUnicode: } else if(/* UD0<=b && */ b<=UD7) { dynamicWindow=(int8_t)(b-UD0); isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; state=defineOne; goto singleByteMode; } else if(b==UDX) { isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; state=definePairOne; goto singleByteMode; } else if(b==UQU) { + cnv->toUBytes[0]=b; + cnv->toULength=1; state=quotePairOne; } else /* Urs */ { /* callback(illegal) */ - cnv->invalidCharBuffer[0]=b; - cnv->invalidCharLength=1; - goto callback; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; } break; case quotePairOne: byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; state=quotePairTwo; break; case quotePairTwo: @@ -528,80 +552,25 @@ fastUnicode: } endloop: - if(pArgs->flush && source>=sourceLimit) { - /* reset the state for the next conversion */ - if(state!=readCommand && U_SUCCESS(*pErrorCode)) { - /* a character byte sequence remains incomplete */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } - _SCSUReset(cnv, UCNV_RESET_TO_UNICODE); - } else { - /* set the converter state back into UConverter */ - scsu->toUIsSingleByteMode=isSingleByteMode; - scsu->toUState=state; - scsu->toUQuoteWindow=quoteWindow; - scsu->toUDynamicWindow=dynamicWindow; - scsu->toUByteOne=byteOne; + /* set the converter state back into UConverter */ + if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { + /* reset to deal with the next character */ + state=readCommand; + } else if(state==readCommand) { + /* not in a multi-byte sequence, reset toULength */ + cnv->toULength=0; } + scsu->toUIsSingleByteMode=isSingleByteMode; + scsu->toUState=state; + scsu->toUQuoteWindow=quoteWindow; + scsu->toUDynamicWindow=dynamicWindow; + scsu->toUByteOne=byteOne; -finish: /* write back the updated pointers */ pArgs->source=(const char *)source; pArgs->target=target; pArgs->offsets=offsets; return; - -callback: - /* call the callback function with all the preparations and post-processing */ - /* update the arguments structure */ - pArgs->source=(const char *)source; - pArgs->target=target; - pArgs->offsets=offsets; - /* the current bytes were copied to invalidCharBuffer before the goto callback jump */ - - /* set the converter state in UConverter to deal with the next character */ - scsu->toUIsSingleByteMode=isSingleByteMode; - scsu->toUState=readCommand; - scsu->toUQuoteWindow=quoteWindow; - scsu->toUDynamicWindow=dynamicWindow; - scsu->toUByteOne=0; - - /* call the callback function */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode); - - /* get the converter state from UConverter */ - isSingleByteMode=scsu->toUIsSingleByteMode; - state=scsu->toUState; - quoteWindow=scsu->toUQuoteWindow; - dynamicWindow=scsu->toUDynamicWindow; - byteOne=scsu->toUByteOne; - - /* update target and deal with offsets if necessary */ - offsets=ucnv_updateCallbackOffsets(offsets, (int32_t)(pArgs->target-target), sourceIndex); - target=pArgs->target; - - /* update the source pointer and index */ - sourceIndex=(int32_t)(nextSourceIndex+((const uint8_t *)pArgs->source-source)); - source=(const uint8_t *)pArgs->source; - - /* - * If the callback overflowed the target, then we need to - * stop here with an overflow indication. - */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - goto endloop; - } else if(cnv->UCharErrorBufferLength>0) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } else if(U_FAILURE(*pErrorCode)) { - /* break on error */ - _SCSUReset(cnv, UCNV_RESET_TO_UNICODE); - goto finish; - } else { - goto loop; - } } /* @@ -619,7 +588,6 @@ _SCSUToUnicode(UConverterToUnicodeArgs *pArgs, const uint8_t *source, *sourceLimit; UChar *target; const UChar *targetLimit; - UBool isSingleByteMode; uint8_t state, byteOne; int8_t quoteWindow, dynamicWindow; @@ -658,11 +626,9 @@ _SCSUToUnicode(UConverterToUnicodeArgs *pArgs, * The end of the input or output buffer is also handled by the slower loop. * The slow loop jumps (goto) to the fast-path loop again as soon as possible. * - * The callback handling is done by jumping (goto) to the callback section at the end - * of the function. From there, it either jumps to here to continue or to - * the endloop section to clean up and return. + * The callback handling is done by returning with an error code. + * The conversion framework actually calls the callback function. */ -loop: if(isSingleByteMode) { /* fast path for single-byte mode */ if(state==readCommand) { @@ -731,13 +697,20 @@ singleByteMode: goto fastUnicode; } else /* Srs */ { /* callback(illegal) */ - cnv->invalidCharBuffer[0]=b; - cnv->invalidCharLength=1; - goto callback; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; } + + /* store the first byte of a multibyte sequence in toUBytes[] */ + cnv->toUBytes[0]=b; + cnv->toULength=1; break; case quotePairOne: byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; state=quotePairTwo; break; case quotePairTwo: @@ -772,6 +745,8 @@ singleByteMode: case definePairOne: dynamicWindow=(int8_t)((b>>5)&7); byteOne=(uint8_t)(b&0x1f); + cnv->toUBytes[1]=b; + cnv->toULength=2; state=definePairTwo; break; case definePairTwo: @@ -781,10 +756,9 @@ singleByteMode: case defineOne: if(b==0) { /* callback(illegal): Reserved window offset value 0 */ - cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow); - cnv->invalidCharBuffer[1]=b; - cnv->invalidCharLength=2; - goto callback; + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; } else if(btoUDynamicOffsets[dynamicWindow]=b<<7UL; } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) { @@ -793,10 +767,9 @@ singleByteMode: scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold]; } else { /* callback(illegal): Reserved window offset value 0xa8..0xf8 */ - cnv->invalidCharBuffer[0]=(char)(SD0+dynamicWindow); - cnv->invalidCharBuffer[1]=b; - cnv->invalidCharLength=2; - goto callback; + cnv->toUBytes[1]=b; + cnv->toULength=2; + goto endloop; } state=readCommand; goto fastSingle; @@ -825,6 +798,8 @@ fastUnicode: case readCommand: if((uint8_t)(b-UC0)>(Urs-UC0)) { byteOne=b; + cnv->toUBytes[0]=b; + cnv->toULength=1; state=quotePairTwo; } else if(/* UC0<=b && */ b<=UC7) { dynamicWindow=(int8_t)(b-UC0); @@ -833,23 +808,32 @@ fastUnicode: } else if(/* UD0<=b && */ b<=UD7) { dynamicWindow=(int8_t)(b-UD0); isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; state=defineOne; goto singleByteMode; } else if(b==UDX) { isSingleByteMode=TRUE; + cnv->toUBytes[0]=b; + cnv->toULength=1; state=definePairOne; goto singleByteMode; } else if(b==UQU) { + cnv->toUBytes[0]=b; + cnv->toULength=1; state=quotePairOne; } else /* Urs */ { /* callback(illegal) */ - cnv->invalidCharBuffer[0]=b; - cnv->invalidCharLength=1; - goto callback; + *pErrorCode=U_ILLEGAL_CHAR_FOUND; + cnv->toUBytes[0]=b; + cnv->toULength=1; + goto endloop; } break; case quotePairOne: byteOne=b; + cnv->toUBytes[1]=b; + cnv->toULength=2; state=quotePairTwo; break; case quotePairTwo: @@ -861,80 +845,24 @@ fastUnicode: } endloop: - if(pArgs->flush && source>=sourceLimit) { - /* reset the state for the next conversion */ - if(state!=readCommand && U_SUCCESS(*pErrorCode)) { - /* a character byte sequence remains incomplete */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } - _SCSUReset(cnv, UCNV_RESET_TO_UNICODE); - } else { - /* set the converter state back into UConverter */ - scsu->toUIsSingleByteMode=isSingleByteMode; - scsu->toUState=state; - scsu->toUQuoteWindow=quoteWindow; - scsu->toUDynamicWindow=dynamicWindow; - scsu->toUByteOne=byteOne; + /* set the converter state back into UConverter */ + if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) { + /* reset to deal with the next character */ + state=readCommand; + } else if(state==readCommand) { + /* not in a multi-byte sequence, reset toULength */ + cnv->toULength=0; } + scsu->toUIsSingleByteMode=isSingleByteMode; + scsu->toUState=state; + scsu->toUQuoteWindow=quoteWindow; + scsu->toUDynamicWindow=dynamicWindow; + scsu->toUByteOne=byteOne; -finish: /* write back the updated pointers */ pArgs->source=(const char *)source; pArgs->target=target; return; - -callback: - /* call the callback function with all the preparations and post-processing */ - /* update the arguments structure */ - pArgs->source=(const char *)source; - pArgs->target=target; - /* the current bytes were copied to invalidCharBuffer before the goto callback jump */ - - /* set the converter state in UConverter to deal with the next character */ - scsu->toUIsSingleByteMode=isSingleByteMode; - scsu->toUState=readCommand; - scsu->toUQuoteWindow=quoteWindow; - scsu->toUDynamicWindow=dynamicWindow; - scsu->toUByteOne=0; - - /* call the callback function */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, cnv->invalidCharBuffer, cnv->invalidCharLength, UCNV_ILLEGAL, pErrorCode); - - /* get the converter state from UConverter */ - isSingleByteMode=scsu->toUIsSingleByteMode; - state=scsu->toUState; - quoteWindow=scsu->toUQuoteWindow; - dynamicWindow=scsu->toUDynamicWindow; - byteOne=scsu->toUByteOne; - - target=pArgs->target; - - source=(const uint8_t *)pArgs->source; - - /* - * If the callback overflowed the target, then we need to - * stop here with an overflow indication. - */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - goto endloop; - } else if(cnv->UCharErrorBufferLength>0) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } else if(U_FAILURE(*pErrorCode)) { - /* break on error */ - _SCSUReset(cnv, UCNV_RESET_TO_UNICODE); - goto finish; - } else { - goto loop; - } -} - -static UChar32 -_SCSUGetNextUChar(UConverterToUnicodeArgs *pArgs, - UErrorCode *pErrorCode) { - return ucnv_getNextUCharFromToUImpl(pArgs, _SCSUToUnicode, TRUE, pErrorCode); } /* SCSU-from-Unicode conversion functions ----------------------------------- */ @@ -1095,7 +1023,6 @@ _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, int32_t sourceIndex, nextSourceIndex; - uint32_t i; int32_t length; /* variables for compression heuristics */ @@ -1120,7 +1047,7 @@ _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, dynamicWindow=scsu->fromUDynamicWindow; currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - c=cnv->fromUSurrogateLead; + c=cnv->fromUChar32; /* sourceIndex=-1 if the current character began in the previous buffer */ sourceIndex= c==0 ? 0 : -1; @@ -1172,23 +1099,24 @@ loop: *offsets++=sourceIndex; } --targetCapacity; - } else if(UTF_IS_SURROGATE(c)) { - if(UTF_IS_SURROGATE_FIRST(c)) { + } else if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { getTrailSingle: lead=(UChar)c; if(sourceflush && source>=sourceLimit) { - /* reset the state for the next conversion */ - if(c!=0 && U_SUCCESS(*pErrorCode)) { - /* a character byte sequence remains incomplete */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } - _SCSUReset(cnv, UCNV_RESET_FROM_UNICODE); - } else { - /* set the converter state back into UConverter */ - scsu->fromUIsSingleByteMode=isSingleByteMode; - scsu->fromUDynamicWindow=dynamicWindow; + /* set the converter state back into UConverter */ + scsu->fromUIsSingleByteMode=isSingleByteMode; + scsu->fromUDynamicWindow=dynamicWindow; - cnv->fromUSurrogateLead=(UChar)c; - } + cnv->fromUChar32=c; -finish: /* write back the updated pointers */ pArgs->source=source; pArgs->target=(char *)target; @@ -1474,12 +1395,16 @@ outputBytes: /* each branch falls through to the next one */ case 4: *target++=(uint8_t)(c>>24); + U_FALLTHROUGH; case 3: *target++=(uint8_t)(c>>16); + U_FALLTHROUGH; case 2: *target++=(uint8_t)(c>>8); + U_FALLTHROUGH; case 1: *target++=(uint8_t)c; + U_FALLTHROUGH; default: /* will never occur */ break; @@ -1490,15 +1415,19 @@ outputBytes: case 4: *target++=(uint8_t)(c>>24); *offsets++=sourceIndex; + U_FALLTHROUGH; case 3: *target++=(uint8_t)(c>>16); *offsets++=sourceIndex; + U_FALLTHROUGH; case 2: *target++=(uint8_t)(c>>8); *offsets++=sourceIndex; + U_FALLTHROUGH; case 1: *target++=(uint8_t)c; *offsets++=sourceIndex; + U_FALLTHROUGH; default: /* will never occur */ break; @@ -1519,17 +1448,24 @@ outputBytes: * first to the overflow buffer what does not fit into the * regular target. */ - /* we know that 1<=targetCapacitycharErrorBuffer; switch(length) { /* each branch falls through to the next one */ + case 4: + *p++=(uint8_t)(c>>24); + U_FALLTHROUGH; case 3: *p++=(uint8_t)(c>>16); + U_FALLTHROUGH; case 2: *p++=(uint8_t)(c>>8); + U_FALLTHROUGH; case 1: *p=(uint8_t)c; + U_FALLTHROUGH; default: /* will never occur */ break; @@ -1545,18 +1481,20 @@ outputBytes: if(offsets!=NULL) { *offsets++=sourceIndex; } + U_FALLTHROUGH; case 2: *target++=(uint8_t)(c>>8); if(offsets!=NULL) { *offsets++=sourceIndex; } + U_FALLTHROUGH; case 1: *target++=(uint8_t)c; if(offsets!=NULL) { *offsets++=sourceIndex; } + U_FALLTHROUGH; default: - /* will never occur */ break; } @@ -1566,59 +1504,6 @@ outputBytes: c=0; goto endloop; } - -callback: - /* call the callback function with all the preparations and post-processing */ - /* update the arguments structure */ - pArgs->source=source; - pArgs->target=(char *)target; - pArgs->offsets=offsets; - /* set the converter state in UConverter to deal with the next character */ - scsu->fromUIsSingleByteMode=isSingleByteMode; - scsu->fromUDynamicWindow=dynamicWindow; - cnv->fromUSurrogateLead=0; - - /* write the code point as code units */ - i=0; - UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c); - cnv->invalidUCharLength=(int8_t)i; - - /* call the callback function */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, UCNV_ILLEGAL, pErrorCode); - - /* get the converter state from UConverter */ - isSingleByteMode=scsu->fromUIsSingleByteMode; - dynamicWindow=scsu->fromUDynamicWindow; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - c=cnv->fromUSurrogateLead; - - /* update target and deal with offsets if necessary */ - offsets=ucnv_updateCallbackOffsets(offsets, (int32_t)(((uint8_t *)pArgs->target)-target), sourceIndex); - target=(uint8_t *)pArgs->target; - - /* update the source pointer and index */ - sourceIndex=(int32_t)(nextSourceIndex+(pArgs->source-source)); - source=pArgs->source; - targetCapacity=(int32_t)((uint8_t *)pArgs->targetLimit-target); - - /* - * If the callback overflowed the target, then we need to - * stop here with an overflow indication. - */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - goto endloop; - } else if(cnv->charErrorBufferLength>0) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } else if(U_FAILURE(*pErrorCode)) { - /* break on error */ - _SCSUReset(cnv, UCNV_RESET_FROM_UNICODE); - goto finish; - } else { - goto loop; - } } /* @@ -1643,7 +1528,6 @@ _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, uint32_t c, delta; - uint32_t i; int32_t length; /* variables for compression heuristics */ @@ -1667,7 +1551,7 @@ _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs, dynamicWindow=scsu->fromUDynamicWindow; currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - c=cnv->fromUSurrogateLead; + c=cnv->fromUChar32; /* similar conversion "loop" as in toUnicode */ loop: @@ -1705,22 +1589,23 @@ loop: /* use the current dynamic window */ *target++=(uint8_t)(delta|0x80); --targetCapacity; - } else if(UTF_IS_SURROGATE(c)) { - if(UTF_IS_SURROGATE_FIRST(c)) { + } else if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c)) { getTrailSingle: lead=(UChar)c; if(sourceflush && source>=sourceLimit) { - /* reset the state for the next conversion */ - if(c!=0 && U_SUCCESS(*pErrorCode)) { - /* a character byte sequence remains incomplete */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } - _SCSUReset(cnv, UCNV_RESET_FROM_UNICODE); - } else { - /* set the converter state back into UConverter */ - scsu->fromUIsSingleByteMode=isSingleByteMode; - scsu->fromUDynamicWindow=dynamicWindow; + /* set the converter state back into UConverter */ + scsu->fromUIsSingleByteMode=isSingleByteMode; + scsu->fromUDynamicWindow=dynamicWindow; - cnv->fromUSurrogateLead=(UChar)c; - } + cnv->fromUChar32=c; -finish: /* write back the updated pointers */ pArgs->source=source; pArgs->target=(char *)target; @@ -1990,12 +1868,16 @@ outputBytes: /* each branch falls through to the next one */ case 4: *target++=(uint8_t)(c>>24); + U_FALLTHROUGH; case 3: *target++=(uint8_t)(c>>16); + U_FALLTHROUGH; case 2: *target++=(uint8_t)(c>>8); + U_FALLTHROUGH; case 1: *target++=(uint8_t)c; + U_FALLTHROUGH; default: /* will never occur */ break; @@ -2014,17 +1896,24 @@ outputBytes: * first to the overflow buffer what does not fit into the * regular target. */ - /* we know that 1<=targetCapacitycharErrorBuffer; switch(length) { /* each branch falls through to the next one */ + case 4: + *p++=(uint8_t)(c>>24); + U_FALLTHROUGH; case 3: *p++=(uint8_t)(c>>16); + U_FALLTHROUGH; case 2: *p++=(uint8_t)(c>>8); + U_FALLTHROUGH; case 1: *p=(uint8_t)c; + U_FALLTHROUGH; default: /* will never occur */ break; @@ -2037,12 +1926,14 @@ outputBytes: /* each branch falls through to the next one */ case 3: *target++=(uint8_t)(c>>16); + U_FALLTHROUGH; case 2: *target++=(uint8_t)(c>>8); + U_FALLTHROUGH; case 1: *target++=(uint8_t)c; + U_FALLTHROUGH; default: - /* will never occur */ break; } @@ -2052,54 +1943,6 @@ outputBytes: c=0; goto endloop; } - -callback: - /* call the callback function with all the preparations and post-processing */ - /* update the arguments structure */ - pArgs->source=source; - pArgs->target=(char *)target; - /* set the converter state in UConverter to deal with the next character */ - scsu->fromUIsSingleByteMode=isSingleByteMode; - scsu->fromUDynamicWindow=dynamicWindow; - cnv->fromUSurrogateLead=0; - - /* write the code point as code units */ - i=0; - UTF_APPEND_CHAR_UNSAFE(cnv->invalidUCharBuffer, i, c); - cnv->invalidUCharLength=(int8_t)i; - - /* call the callback function */ - *pErrorCode=U_ILLEGAL_CHAR_FOUND; - cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, cnv->invalidUCharBuffer, i, c, UCNV_ILLEGAL, pErrorCode); - - /* get the converter state from UConverter */ - isSingleByteMode=scsu->fromUIsSingleByteMode; - dynamicWindow=scsu->fromUDynamicWindow; - currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]; - c=cnv->fromUSurrogateLead; - - target=(uint8_t *)pArgs->target; - - source=pArgs->source; - targetCapacity=(int32_t)((uint8_t *)pArgs->targetLimit-target); - - /* - * If the callback overflowed the target, then we need to - * stop here with an overflow indication. - */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - goto endloop; - } else if(cnv->charErrorBufferLength>0) { - /* target is full */ - *pErrorCode=U_BUFFER_OVERFLOW_ERROR; - goto endloop; - } else if(U_FAILURE(*pErrorCode)) { - /* break on error */ - _SCSUReset(cnv, UCNV_RESET_FROM_UNICODE); - goto finish; - } else { - goto loop; - } } /* miscellaneous ------------------------------------------------------------ */ @@ -2116,32 +1959,8 @@ _SCSUGetName(const UConverter *cnv) { } } -static void -_SCSUWriteSub(UConverterFromUnicodeArgs *pArgs, - int32_t offsetIndex, - UErrorCode *pErrorCode) { - static const char squ_fffd[]={ (char)SQU, (char)0xffu, (char)0xfdu }; - - /* - * The substitution character is U+fffd={ ff, fd }. - * If the SCSU converter is in Unicode mode, then these two bytes just need to - * be written. Otherwise, this character is quoted. - */ - if(((SCSUData *)pArgs->converter->extraInfo)->fromUIsSingleByteMode) { - /* single-byte mode: quote Unicode */ - ucnv_cbFromUWriteBytes(pArgs, - squ_fffd, 3, - offsetIndex, pErrorCode); - } else { - /* Unicode mode: just write U+fffd */ - ucnv_cbFromUWriteBytes(pArgs, - squ_fffd+1, 2, - offsetIndex, pErrorCode); - } -} - /* structure for SafeClone calculations */ -struct cloneStruct +struct cloneSCSUStruct { UConverter cnv; SCSUData mydata; @@ -2153,8 +1972,8 @@ _SCSUSafeClone(const UConverter *cnv, int32_t *pBufferSize, UErrorCode *status) { - struct cloneStruct * localClone; - int32_t bufferSizeNeeded = sizeof(struct cloneStruct); + struct cloneSCSUStruct * localClone; + int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct); if (U_FAILURE(*status)){ return 0; @@ -2165,9 +1984,8 @@ _SCSUSafeClone(const UConverter *cnv, return 0; } - localClone = (struct cloneStruct *)stackBuffer; - uprv_memcpy(&localClone->cnv, cnv, sizeof(UConverter)); - localClone->cnv.isCopyLocal = TRUE; + localClone = (struct cloneSCSUStruct *)stackBuffer; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData)); localClone->cnv.extraInfo = &localClone->mydata; @@ -2177,9 +1995,6 @@ _SCSUSafeClone(const UConverter *cnv, } - - - static const UConverterImpl _SCSUImpl={ UCNV_SCSU, @@ -2194,11 +2009,11 @@ static const UConverterImpl _SCSUImpl={ _SCSUToUnicodeWithOffsets, _SCSUFromUnicode, _SCSUFromUnicodeWithOffsets, - _SCSUGetNextUChar, + NULL, NULL, _SCSUGetName, - _SCSUWriteSub, + NULL, _SCSUSafeClone, ucnv_getCompleteUnicodeSet }; @@ -2206,20 +2021,21 @@ static const UConverterImpl _SCSUImpl={ static const UConverterStaticData _SCSUStaticData={ sizeof(UConverterStaticData), "SCSU", - 0, /* CCSID for SCSU */ + 1212, /* CCSID for SCSU */ UCNV_IBM, UCNV_SCSU, 1, 3, /* one UChar generates at least 1 byte and at most 3 bytes */ - { 0x0e, 0xff, 0xfd, 0 }, 3, /* ### the subchar really must be written by an SCSU function! */ + /* + * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode + * substitution string. + */ + { 0x0e, 0xff, 0xfd, 0 }, 3, FALSE, FALSE, 0, 0, { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ }; -const UConverterSharedData _SCSUData={ - sizeof(UConverterSharedData), ~((uint32_t)0), - NULL, NULL, &_SCSUStaticData, FALSE, &_SCSUImpl, - 0 -}; +const UConverterSharedData _SCSUData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl); -/* ### clarify: if an error occurs, does a converter reset itself? or is it in a defined or undefined state? */ +#endif