/*
******************************************************************************
*
-* Copyright (C) 2003-2004, International Business Machines
+* Copyright (C) 2003-2009, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
const uint32_t *toUTable, *toUSection;
uint32_t value, matchValue;
- int32_t i, j, index, length, matchLength;
+ int32_t i, j, idx, length, matchLength;
uint8_t b;
if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) {
/* initialize */
toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t);
- index=0;
+ idx=0;
matchValue=0;
i=j=matchLength=0;
/* match input units until there is a full match or the input is consumed */
for(;;) {
/* go to the next section */
- toUSection=toUTable+index;
+ toUSection=toUTable+idx;
/* read first pair of the section */
value=*toUSection++;
} else {
if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
/* partial match, continue */
- index=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value);
+ idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value);
} else {
if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
TO_U_USE_FALLBACK(useFallback)) &&
int32_t **offsets, int32_t srcIndex,
UBool flush,
UErrorCode *pErrorCode) {
- uint32_t value;
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
int32_t match;
/* try to match */
ucnv_extSimpleMatchToU(const int32_t *cx,
const char *source, int32_t length,
UBool useFallback) {
- uint32_t value;
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
int32_t match;
if(length<=0) {
ucnv_extContinueMatchToU(UConverter *cnv,
UConverterToUnicodeArgs *pArgs, int32_t srcIndex,
UErrorCode *pErrorCode) {
- uint32_t value;
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
int32_t match, length;
match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
const uint32_t *fromUTableValues, *fromUSectionValues;
uint32_t value, matchValue;
- int32_t i, j, index, length, matchLength;
+ int32_t i, j, idx, length, matchLength;
UChar c;
if(cx==NULL) {
}
/* trie lookup of firstCP */
- index=firstCP>>10; /* stage 1 index */
- if(index>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) {
+ idx=firstCP>>10; /* stage 1 index */
+ if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) {
return 0; /* the first code point is outside the trie */
}
stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
- index=UCNV_EXT_FROM_U(stage12, stage3, index, firstCP);
+ idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP);
stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
- value=stage3b[index];
+ value=stage3b[idx];
if(value==0) {
return 0;
}
+ /*
+ * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0:
+ * Do not interpret values with reserved bits used, for forward compatibility,
+ * and do not even remember intermediate results with reserved bits used.
+ */
+
if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
/* partial match, enter the loop below */
- index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+ idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
/* initialize */
fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar);
/* match input units until there is a full match or the input is consumed */
for(;;) {
/* go to the next section */
- fromUSectionUChars=fromUTableUChars+index;
- fromUSectionValues=fromUTableValues+index;
+ fromUSectionUChars=fromUTableUChars+idx;
+ fromUSectionValues=fromUTableValues+idx;
/* read first pair of the section */
length=*fromUSectionUChars++;
value=*fromUSectionValues++;
if( value!=0 &&
(UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
- FROM_U_USE_FALLBACK(useFallback, firstCP))
+ FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
) {
/* remember longest match so far */
matchValue=value;
}
/* search for the current UChar */
- index=ucnv_extFindFromU(fromUSectionUChars, length, c);
- if(index<0) {
+ idx=ucnv_extFindFromU(fromUSectionUChars, length, c);
+ if(idx<0) {
/* no match here, stop with the longest match so far */
break;
} else {
- value=fromUSectionValues[index];
+ value=fromUSectionValues[idx];
if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
/* partial match, continue */
- index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+ idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
} else {
- if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
- FROM_U_USE_FALLBACK(useFallback, firstCP)
+ if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+ FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
) {
/* full match, stop with result */
matchValue=value;
return 0;
}
} else /* result from firstCP trie lookup */ {
- if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
- FROM_U_USE_FALLBACK(useFallback, firstCP)
+ if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+ FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
) {
/* full match, stop with result */
matchValue=value;
}
}
- if(matchValue&UCNV_EXT_FROM_U_RESERVED_MASK) {
- /* do not interpret values with reserved bits used, for forward compatibility */
- return 0;
- }
-
/* return result */
if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) {
return 1; /* assert matchLength==2 */
}
- *pMatchValue=UCNV_EXT_FROM_U_MASK_ROUNDTRIP(matchValue);
+ *pMatchValue=matchValue;
return matchLength;
}
+/*
+ * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits
+ */
static U_INLINE void
ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
uint32_t value,
int32_t **offsets, int32_t srcIndex,
UBool flush,
UErrorCode *pErrorCode) {
- uint32_t value;
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
int32_t match;
/* try to match */
}
}
+/*
+ * Used by ISO 2022 implementation.
+ * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping
+ */
U_CFUNC int32_t
ucnv_extSimpleMatchFromU(const int32_t *cx,
UChar32 cp, uint32_t *pValue,
if(match>=2) {
/* write result for simple, single-character conversion */
int32_t length;
-
+ int isRoundtrip;
+
+ isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value);
length=UCNV_EXT_FROM_U_GET_LENGTH(value);
value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
*pValue=value;
- return length;
+ return isRoundtrip ? length : -length;
#if 0 /* not currently used */
} else if(length==4) {
/* de-serialize a 4-byte result */
((uint32_t)result[1]<<16)|
((uint32_t)result[2]<<8)|
result[3];
- return 4;
+ return isRoundtrip ? 4 : -4;
#endif
}
}
ucnv_extContinueMatchFromU(UConverter *cnv,
UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
UErrorCode *pErrorCode) {
- uint32_t value;
+ uint32_t value = 0; /* initialize output-only param to 0 to silence gcc */
int32_t match;
match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes,
ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
const int32_t *cx,
const USetAdder *sa,
- UConverterUnicodeSet which,
+ UBool useFallback,
int32_t minLength,
UChar32 c,
UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
value=*fromUSectionValues++;
if( value!=0 &&
- UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) &&
+ (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) &&
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
) {
if(c>=0) {
/* no mapping, do nothing */
} else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
ucnv_extGetUnicodeSetString(
- sharedData, cx, sa, which, minLength,
+ sharedData, cx, sa, useFallback, minLength,
U_SENTINEL, s, length+1,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);
- } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
- UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
+ } else if((useFallback ?
+ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
+ ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+ UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
) {
sa->addString(sa->set, s, length+1);
ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
const USetAdder *sa,
UConverterUnicodeSet which,
+ UConverterSetFilter filter,
UErrorCode *pErrorCode) {
const int32_t *cx;
const uint16_t *stage12, *stage3, *ps2, *ps3;
uint32_t value;
int32_t st1, stage1Length, st2, st3, minLength;
+ UBool useFallback;
UChar s[UCNV_EXT_MAX_UCHARS];
UChar32 c;
stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
+ useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
+
/* enumerate the from-Unicode trie table */
c=0; /* keep track of the current code point while enumerating */
- if(sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) {
+ if(filter==UCNV_SET_FILTER_2022_CN) {
+ minLength=3;
+ } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
+ filter!=UCNV_SET_FILTER_NONE
+ ) {
/* DBCS-only, ignore single-byte results */
minLength=2;
} else {
length=0;
U16_APPEND_UNSAFE(s, length, c);
ucnv_extGetUnicodeSetString(
- sharedData, cx, sa, which, minLength,
+ sharedData, cx, sa, useFallback, minLength,
c, s, length,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);
- } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
- UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
+ } else if((useFallback ?
+ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
+ ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+ UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
) {
+ switch(filter) {
+ case UCNV_SET_FILTER_2022_CN:
+ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {
+ continue;
+ }
+ break;
+ case UCNV_SET_FILTER_SJIS:
+ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) {
+ continue;
+ }
+ break;
+ case UCNV_SET_FILTER_GR94DBCS:
+ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
+ (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) &&
+ (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
+ continue;
+ }
+ break;
+ case UCNV_SET_FILTER_HZ:
+ if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
+ (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
+ (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
+ continue;
+ }
+ break;
+ default:
+ /*
+ * UCNV_SET_FILTER_NONE,
+ * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
+ */
+ break;
+ }
sa->add(sa->set, c);
}
} while((++c&0xf)!=0);