X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..57a6839dcb3bba09e8228b822b290604668416fe:/icuSources/common/ucnvmbcs.c diff --git a/icuSources/common/ucnvmbcs.c b/icuSources/common/ucnvmbcs.c index 538a18c5..143daf69 100644 --- a/icuSources/common/ucnvmbcs.c +++ b/icuSources/common/ucnvmbcs.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2000-2010, International Business Machines +* Copyright (C) 2000-2013, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -48,13 +48,15 @@ #include "unicode/ucnv_cb.h" #include "unicode/udata.h" #include "unicode/uset.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" #include "ucnv_bld.h" #include "ucnvmbcs.h" #include "ucnv_ext.h" #include "ucnv_cnv.h" -#include "umutex.h" #include "cmemory.h" #include "cstring.h" +#include "cmutex.h" /* control optimizations according to the platform */ #define MBCS_UNROLL_SINGLE_TO_BMP 1 @@ -379,10 +381,11 @@ static const UConverterImpl _DBCSUTF8Impl; * as of the re-released mapping tables from 2000-nov-30. */ static const uint32_t -gb18030Ranges[13][4]={ +gb18030Ranges[14][4]={ {0x10000, 0x10FFFF, LINEAR(0x90308130), LINEAR(0xE3329A35)}, {0x9FA6, 0xD7FF, LINEAR(0x82358F33), LINEAR(0x8336C738)}, - {0x0452, 0x200F, LINEAR(0x8130D330), LINEAR(0x8136A531)}, + {0x0452, 0x1E3E, LINEAR(0x8130D330), LINEAR(0x8135F436)}, + {0x1E40, 0x200F, LINEAR(0x8135F438), LINEAR(0x8136A531)}, {0xE865, 0xF92B, LINEAR(0x8336D030), LINEAR(0x84308534)}, {0x2643, 0x2E80, LINEAR(0x8137A839), LINEAR(0x8138FD38)}, {0xFA2A, 0xFE2F, LINEAR(0x84309C38), LINEAR(0x84318537)}, @@ -823,9 +826,9 @@ ucnv_MBCSGetFilteredUnicodeSetForUnicode(const UConverterSharedData *sharedData, switch(st3Multiplier) { case 4: b|=*stage3++; - case 3: + case 3: /*fall through*/ b|=*stage3++; - case 2: + case 2: /*fall through*/ b|=stage3[0]|stage3[1]; stage3+=2; default: @@ -1340,7 +1343,6 @@ reconstituteData(UConverterMBCSTable *mbcsTable, UErrorCode *pErrorCode) { uint16_t *stage1; uint32_t *stage2; - uint8_t *bytes; uint32_t dataLength=stage1Length*2+fullStage2Length*4+mbcsTable->fromUBytesLength; mbcsTable->reconstitutedData=(uint8_t *)uprv_malloc(dataLength); if(mbcsTable->reconstitutedData==NULL) { @@ -1359,7 +1361,7 @@ reconstituteData(UConverterMBCSTable *mbcsTable, stage2Length*4); mbcsTable->fromUnicodeTable=stage1; - mbcsTable->fromUnicodeBytes=bytes=(uint8_t *)(stage2+fullStage2Length); + mbcsTable->fromUnicodeBytes=(uint8_t *)(stage2+fullStage2Length); /* indexes into stage 2 count from the bottom of the fromUnicodeTable */ stage2=(uint32_t *)stage1; @@ -3351,16 +3353,16 @@ ucnv_MBCSDoubleFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, * If it does, then surrogates are not paired but mapped separately. * Note that in this case unmatched surrogates are not detected. */ - if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { - if(UTF_IS_SURROGATE_FIRST(c)) { + if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { + if(U16_IS_SURROGATE_LEAD(c)) { getTrail: if(sourceoptions, si_value); - so_value_length = getSISOBytes(SO, cnv->options, so_value); + siLength = getSISOBytes(SI, cnv->options, siBytes); + soLength = getSISOBytes(SO, cnv->options, soBytes); /* conversion loop */ /* @@ -4105,12 +4108,12 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, length=1; } else { /* change from double-byte mode to single-byte */ - if (si_value_length == 1) { - value|=(uint32_t)si_value[0]<<8; + if (siLength == 1) { + value|=(uint32_t)siBytes[0]<<8; length = 2; - } else if (si_value_length == 2) { - value|=(uint32_t)si_value[1]<<8; - value|=(uint32_t)si_value[0]<<16; + } else if (siLength == 2) { + value|=(uint32_t)siBytes[1]<<8; + value|=(uint32_t)siBytes[0]<<16; length = 3; } prevLength=1; @@ -4120,12 +4123,12 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, length=2; } else { /* change from single-byte mode to double-byte */ - if (so_value_length == 1) { - value|=(uint32_t)so_value[0]<<16; + if (soLength == 1) { + value|=(uint32_t)soBytes[0]<<16; length = 3; - } else if (so_value_length == 2) { - value|=(uint32_t)so_value[1]<<16; - value|=(uint32_t)so_value[0]<<24; + } else if (soLength == 2) { + value|=(uint32_t)soBytes[1]<<16; + value|=(uint32_t)soBytes[0]<<24; length = 4; } prevLength=2; @@ -4234,16 +4237,16 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, * If it does, then surrogates are not paired but mapped separately. * Note that in this case unmatched surrogates are not detected. */ - if(UTF_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { - if(UTF_IS_SURROGATE_FIRST(c)) { + if(U16_IS_SURROGATE(c) && !(unicodeMask&UCNV_HAS_SURROGATES)) { + if(U16_IS_SURROGATE_LEAD(c)) { getTrail: if(sourcefromUnicodeStatus=prevLength; /* save the old state */ @@ -4337,12 +4340,12 @@ getTrail: length=1; } else { /* change from double-byte mode to single-byte */ - if (si_value_length == 1) { - value|=(uint32_t)si_value[0]<<8; + if (siLength == 1) { + value|=(uint32_t)siBytes[0]<<8; length = 2; - } else if (si_value_length == 2) { - value|=(uint32_t)si_value[1]<<8; - value|=(uint32_t)si_value[0]<<16; + } else if (siLength == 2) { + value|=(uint32_t)siBytes[1]<<8; + value|=(uint32_t)siBytes[0]<<16; length = 3; } prevLength=1; @@ -4352,12 +4355,12 @@ getTrail: length=2; } else { /* change from single-byte mode to double-byte */ - if (so_value_length == 1) { - value|=(uint32_t)so_value[0]<<16; + if (soLength == 1) { + value|=(uint32_t)soBytes[0]<<16; length = 3; - } else if (so_value_length == 2) { - value|=(uint32_t)so_value[1]<<16; - value|=(uint32_t)so_value[0]<<24; + } else if (soLength == 2) { + value|=(uint32_t)soBytes[1]<<16; + value|=(uint32_t)soBytes[0]<<24; length = 4; } prevLength=2; @@ -4494,11 +4497,11 @@ unassigned: /* each branch falls through to the next one */ case 4: *target++=(uint8_t)(value>>24); - case 3: + case 3: /*fall through*/ *target++=(uint8_t)(value>>16); - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(value>>8); - case 1: + case 1: /*fall through*/ *target++=(uint8_t)value; default: /* will never occur */ @@ -4510,13 +4513,13 @@ unassigned: case 4: *target++=(uint8_t)(value>>24); *offsets++=sourceIndex; - case 3: + case 3: /*fall through*/ *target++=(uint8_t)(value>>16); *offsets++=sourceIndex; - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(value>>8); *offsets++=sourceIndex; - case 1: + case 1: /*fall through*/ *target++=(uint8_t)value; *offsets++=sourceIndex; default: @@ -4541,9 +4544,9 @@ unassigned: /* each branch falls through to the next one */ case 3: *charErrorBuffer++=(uint8_t)(value>>16); - case 2: + case 2: /*fall through*/ *charErrorBuffer++=(uint8_t)(value>>8); - case 1: + case 1: /*fall through*/ *charErrorBuffer=(uint8_t)value; default: /* will never occur */ @@ -4560,12 +4563,12 @@ unassigned: if(offsets!=NULL) { *offsets++=sourceIndex; } - case 2: + case 2: /*fall through*/ *target++=(uint8_t)(value>>8); if(offsets!=NULL) { *offsets++=sourceIndex; } - case 1: + case 1: /*fall through*/ *target++=(uint8_t)value; if(offsets!=NULL) { *offsets++=sourceIndex; @@ -4612,14 +4615,14 @@ unassigned: ) { /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */ if(targetCapacity>0) { - *target++=(uint8_t)si_value[0]; - if (si_value_length == 2) { + *target++=(uint8_t)siBytes[0]; + if (siLength == 2) { if (targetCapacity<2) { - cnv->charErrorBuffer[0]=(uint8_t)si_value[1]; + cnv->charErrorBuffer[0]=(uint8_t)siBytes[1]; cnv->charErrorBufferLength=1; *pErrorCode=U_BUFFER_OVERFLOW_ERROR; } else { - *target++=(uint8_t)si_value[1]; + *target++=(uint8_t)siBytes[1]; } } if(offsets!=NULL) { @@ -4628,11 +4631,11 @@ unassigned: } } else { /* target is full */ - cnv->charErrorBuffer[0]=(uint8_t)si_value[0]; - if (si_value_length == 2) { - cnv->charErrorBuffer[1]=(uint8_t)si_value[1]; + cnv->charErrorBuffer[0]=(uint8_t)siBytes[0]; + if (siLength == 2) { + cnv->charErrorBuffer[1]=(uint8_t)siBytes[1]; } - cnv->charErrorBufferLength=si_value_length; + cnv->charErrorBufferLength=siLength; *pErrorCode=U_BUFFER_OVERFLOW_ERROR; } prevLength=1; /* we switched into SBCS */ @@ -4928,7 +4931,7 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, if(U8_IS_TRAIL(b)) { ++i; } else { - if(isourceLimit)) { + if(U_SUCCESS(*pErrorCode) && + cnv->preFromUFirstCP<0 && + source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { c=utf8->toUBytes[0]=b=*source++; toULength=1; - toULimit=utf8_countTrailBytes[b]+1; + toULimit=U8_COUNT_TRAIL_BYTES(b)+1; while(sourcetoUBytes[toULength++]=b=*source++; c=(c<<6)+b; @@ -5177,7 +5183,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, uint32_t stage2Entry; uint32_t asciiRoundtrips; - uint16_t value, minValue; + uint16_t value; UBool hasSupplementary; /* set up the local pointers */ @@ -5197,13 +5203,6 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, } asciiRoundtrips=cnv->sharedData->mbcs.asciiRoundtrips; - if(cnv->useFallback) { - /* use all roundtrip and fallback results */ - minValue=0x800; - } else { - /* use only roundtrips and fallbacks from private-use characters */ - minValue=0xc00; - } hasSupplementary=(UBool)(cnv->sharedData->mbcs.unicodeMask&UCNV_HAS_SUPPLEMENTARY); /* get the converter state from the UTF-8 UConverter */ @@ -5232,7 +5231,7 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs, if(U8_IS_TRAIL(b)) { ++i; } else { - if(isourceLimit)) { + if(U_SUCCESS(*pErrorCode) && + cnv->preFromUFirstCP<0 && + source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) { c=utf8->toUBytes[0]=b=*source++; toULength=1; - toULimit=utf8_countTrailBytes[b]+1; + toULimit=U8_COUNT_TRAIL_BYTES(b)+1; while(sourcetoUBytes[toULength++]=b=*source++; c=(c<<6)+b;