+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
-* Copyright (C) 2003-2011, International Business Machines
+* Copyright (C) 2003-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: ucnv_ext.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION
#include "unicode/uset.h"
+#include "unicode/ustring.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
#include "ucnv_ext.h"
/* from Unicode ------------------------------------------------------------- */
+// Use roundtrips, "good one-way" mappings, and some normal fallbacks.
+static inline UBool
+extFromUUseMapping(UBool useFallback, uint32_t value, UChar32 firstCP) {
+ return
+ ((value&UCNV_EXT_FROM_U_STATUS_MASK)!=0 ||
+ FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+ (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0;
+}
+
/*
* @return index of the UChar, if found; else <0
*/
/* read first pair of the section */
length=*fromUSectionUChars++;
value=*fromUSectionValues++;
- if( value!=0 &&
- (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
- FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
- (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
- ) {
+ if(value!=0 && extFromUUseMapping(useFallback, value, firstCP)) {
/* remember longest match so far */
matchValue=value;
matchLength=2+i+j;
/* partial match, continue */
idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
} else {
- if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
- FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
- (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
- ) {
+ if(extFromUUseMapping(useFallback, value, firstCP)) {
/* full match, stop with result */
matchValue=value;
matchLength=2+i+j;
return 0;
}
} else /* result from firstCP trie lookup */ {
- if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
- FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
- (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
- ) {
+ if(extFromUUseMapping(useFallback, value, firstCP)) {
/* full match, stop with result */
matchValue=value;
matchLength=2;
switch(length) {
case 3:
*p++=(uint8_t)(value>>16);
- case 2: /*fall through*/
+ U_FALLTHROUGH;
+ case 2:
*p++=(uint8_t)(value>>8);
- case 1: /*fall through*/
+ U_FALLTHROUGH;
+ case 1:
*p++=(uint8_t)value;
+ U_FALLTHROUGH;
default:
break; /* will never occur */
}
} else {
/* the match did not use all of preFromU[] - keep the rest for replay */
int32_t length=cnv->preFromULength-match;
- uprv_memmove(cnv->preFromU, cnv->preFromU+match, length*U_SIZEOF_UCHAR);
+ u_memmove(cnv->preFromU, cnv->preFromU+match, length);
cnv->preFromULength=(int8_t)-length;
}
}
}
+static UBool
+extSetUseMapping(UConverterUnicodeSet which, int32_t minLength, uint32_t value) {
+ if(which==UCNV_ROUNDTRIP_SET) {
+ // Add only code points for which the roundtrip flag is set.
+ // Do not add any fallbacks, even if ucnv_fromUnicode() would use them
+ // (fallbacks from PUA). See the API docs for ucnv_getUnicodeSet().
+ //
+ // By analogy, also do not add "good one-way" mappings.
+ //
+ // Do not add entries with reserved bits set.
+ if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))!=
+ UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) {
+ return FALSE;
+ }
+ } else /* UCNV_ROUNDTRIP_AND_FALLBACK_SET */ {
+ // Do not add entries with reserved bits set.
+ if((value&UCNV_EXT_FROM_U_RESERVED_MASK)!=0) {
+ return FALSE;
+ }
+ }
+ // Do not add <subchar1> entries or other (future?) pseudo-entries
+ // with an output length of 0.
+ return UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength;
+}
+
static void
ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
const int32_t *cx,
const USetAdder *sa,
- UBool useFallback,
+ UConverterUnicodeSet which,
int32_t minLength,
- UChar32 c,
+ UChar32 firstCP,
UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
int32_t sectionIndex,
UErrorCode *pErrorCode) {
count=*fromUSectionUChars++;
value=*fromUSectionValues++;
- if( value!=0 &&
- (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) &&
- UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
- ) {
- if(c>=0) {
+ if(extSetUseMapping(which, minLength, value)) {
+ if(length==U16_LENGTH(firstCP)) {
/* add the initial code point */
- sa->add(sa->set, c);
+ sa->add(sa->set, firstCP);
} else {
/* add the string so far */
sa->addString(sa->set, s, length);
/* no mapping, do nothing */
} else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
ucnv_extGetUnicodeSetString(
- sharedData, cx, sa, useFallback, minLength,
- U_SENTINEL, s, length+1,
+ sharedData, cx, sa, which, minLength,
+ firstCP, s, length+1,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);
- } else if((useFallback ?
- (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
- ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
- UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
- UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
- ) {
+ } else if(extSetUseMapping(which, minLength, value)) {
sa->addString(sa->set, s, length+1);
}
}
uint32_t value;
int32_t st1, stage1Length, st2, st3, minLength;
- UBool useFallback;
UChar s[UCNV_EXT_MAX_UCHARS];
UChar32 c;
stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
- useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
-
/* enumerate the from-Unicode trie table */
c=0; /* keep track of the current code point while enumerating */
/* read the stage 3 block */
ps3=stage3+st3;
- /*
- * Add code points for which the roundtrip flag is set.
- * Do not add <subchar1> entries or other (future?) pseudo-entries
- * with an output length of 0, or entries with reserved bits set.
- * Recurse for partial results.
- */
do {
value=stage3b[*ps3++];
if(value==0) {
/* no mapping, do nothing */
} else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
+ // Recurse for partial results.
length=0;
U16_APPEND_UNSAFE(s, length, c);
ucnv_extGetUnicodeSetString(
- sharedData, cx, sa, useFallback, minLength,
+ sharedData, cx, sa, which, minLength,
c, s, length,
(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
pErrorCode);
- } else if((useFallback ?
- (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
- ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
- UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
- UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
- ) {
+ } else if(extSetUseMapping(which, minLength, value)) {
switch(filter) {
case UCNV_SET_FILTER_2022_CN:
if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {