]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/ucnv_ext.c
ICU-461.17.tar.gz
[apple/icu.git] / icuSources / common / ucnv_ext.c
index 18fe3f948e9f224e5cf38fb4a938efdcead67eef..8e8b326e9fa1f05d6bef042e518b2f9c721dc530 100644 (file)
@@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2003-2004, International Business Machines
+*   Copyright (C) 2003-2009, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@@ -125,7 +125,7 @@ ucnv_extMatchToU(const int32_t *cx, int8_t sisoState,
     const uint32_t *toUTable, *toUSection;
 
     uint32_t value, matchValue;
-    int32_t i, j, index, length, matchLength;
+    int32_t i, j, idx, length, matchLength;
     uint8_t b;
 
     if(cx==NULL || cx[UCNV_EXT_TO_U_LENGTH]<=0) {
@@ -134,7 +134,7 @@ ucnv_extMatchToU(const int32_t *cx, int8_t sisoState,
 
     /* initialize */
     toUTable=UCNV_EXT_ARRAY(cx, UCNV_EXT_TO_U_INDEX, uint32_t);
-    index=0;
+    idx=0;
 
     matchValue=0;
     i=j=matchLength=0;
@@ -158,7 +158,7 @@ ucnv_extMatchToU(const int32_t *cx, int8_t sisoState,
     /* match input units until there is a full match or the input is consumed */
     for(;;) {
         /* go to the next section */
-        toUSection=toUTable+index;
+        toUSection=toUTable+idx;
 
         /* read first pair of the section */
         value=*toUSection++;
@@ -202,7 +202,7 @@ ucnv_extMatchToU(const int32_t *cx, int8_t sisoState,
         } else {
             if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
                 /* partial match, continue */
-                index=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value);
+                idx=(int32_t)UCNV_EXT_TO_U_GET_PARTIAL_INDEX(value);
             } else {
                 if( (UCNV_EXT_TO_U_IS_ROUNDTRIP(value) ||
                      TO_U_USE_FALLBACK(useFallback)) &&
@@ -279,7 +279,7 @@ ucnv_extInitialMatchToU(UConverter *cnv, const int32_t *cx,
                         int32_t **offsets, int32_t srcIndex,
                         UBool flush,
                         UErrorCode *pErrorCode) {
-    uint32_t value;
+    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
     int32_t match;
 
     /* try to match */
@@ -329,7 +329,7 @@ U_CFUNC UChar32
 ucnv_extSimpleMatchToU(const int32_t *cx,
                        const char *source, int32_t length,
                        UBool useFallback) {
-    uint32_t value;
+    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
     int32_t match;
 
     if(length<=0) {
@@ -367,7 +367,7 @@ U_CFUNC void
 ucnv_extContinueMatchToU(UConverter *cnv,
                          UConverterToUnicodeArgs *pArgs, int32_t srcIndex,
                          UErrorCode *pErrorCode) {
-    uint32_t value;
+    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
     int32_t match, length;
 
     match=ucnv_extMatchToU(cnv->sharedData->mbcs.extIndexes, (int8_t)UCNV_SISO_STATE(cnv),
@@ -528,7 +528,7 @@ ucnv_extMatchFromU(const int32_t *cx,
     const uint32_t *fromUTableValues, *fromUSectionValues;
 
     uint32_t value, matchValue;
-    int32_t i, j, index, length, matchLength;
+    int32_t i, j, idx, length, matchLength;
     UChar c;
 
     if(cx==NULL) {
@@ -536,24 +536,30 @@ ucnv_extMatchFromU(const int32_t *cx,
     }
 
     /* trie lookup of firstCP */
-    index=firstCP>>10; /* stage 1 index */
-    if(index>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) {
+    idx=firstCP>>10; /* stage 1 index */
+    if(idx>=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH]) {
         return 0; /* the first code point is outside the trie */
     }
 
     stage12=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_12_INDEX, uint16_t);
     stage3=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3_INDEX, uint16_t);
-    index=UCNV_EXT_FROM_U(stage12, stage3, index, firstCP);
+    idx=UCNV_EXT_FROM_U(stage12, stage3, idx, firstCP);
 
     stage3b=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_STAGE_3B_INDEX, uint32_t);
-    value=stage3b[index];
+    value=stage3b[idx];
     if(value==0) {
         return 0;
     }
 
+    /*
+     * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0:
+     * Do not interpret values with reserved bits used, for forward compatibility,
+     * and do not even remember intermediate results with reserved bits used.
+     */
+
     if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
         /* partial match, enter the loop below */
-        index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+        idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
 
         /* initialize */
         fromUTableUChars=UCNV_EXT_ARRAY(cx, UCNV_EXT_FROM_U_UCHARS_INDEX, UChar);
@@ -567,15 +573,16 @@ ucnv_extMatchFromU(const int32_t *cx,
         /* match input units until there is a full match or the input is consumed */
         for(;;) {
             /* go to the next section */
-            fromUSectionUChars=fromUTableUChars+index;
-            fromUSectionValues=fromUTableValues+index;
+            fromUSectionUChars=fromUTableUChars+idx;
+            fromUSectionValues=fromUTableValues+idx;
 
             /* read first pair of the section */
             length=*fromUSectionUChars++;
             value=*fromUSectionValues++;
             if( value!=0 &&
                 (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
-                 FROM_U_USE_FALLBACK(useFallback, firstCP))
+                 FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+                (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
             ) {
                 /* remember longest match so far */
                 matchValue=value;
@@ -603,18 +610,19 @@ ucnv_extMatchFromU(const int32_t *cx,
             }
 
             /* search for the current UChar */
-            index=ucnv_extFindFromU(fromUSectionUChars, length, c);
-            if(index<0) {
+            idx=ucnv_extFindFromU(fromUSectionUChars, length, c);
+            if(idx<0) {
                 /* no match here, stop with the longest match so far */
                 break;
             } else {
-                value=fromUSectionValues[index];
+                value=fromUSectionValues[idx];
                 if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
                     /* partial match, continue */
-                    index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
+                    idx=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
                 } else {
-                    if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
-                         FROM_U_USE_FALLBACK(useFallback, firstCP)
+                    if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+                         FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+                        (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
                     ) {
                         /* full match, stop with result */
                         matchValue=value;
@@ -632,8 +640,9 @@ ucnv_extMatchFromU(const int32_t *cx,
             return 0;
         }
     } else /* result from firstCP trie lookup */ {
-        if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
-             FROM_U_USE_FALLBACK(useFallback, firstCP)
+        if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+             FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+            (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
         ) {
             /* full match, stop with result */
             matchValue=value;
@@ -644,20 +653,18 @@ ucnv_extMatchFromU(const int32_t *cx,
         }
     }
 
-    if(matchValue&UCNV_EXT_FROM_U_RESERVED_MASK) {
-        /* do not interpret values with reserved bits used, for forward compatibility */
-        return 0;
-    }
-
     /* return result */
     if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) {
         return 1; /* assert matchLength==2 */
     }
 
-    *pMatchValue=UCNV_EXT_FROM_U_MASK_ROUNDTRIP(matchValue);
+    *pMatchValue=matchValue;
     return matchLength;
 }
 
+/*
+ * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits
+ */
 static U_INLINE void
 ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
                    uint32_t value,
@@ -741,7 +748,7 @@ ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
                           int32_t **offsets, int32_t srcIndex,
                           UBool flush,
                           UErrorCode *pErrorCode) {
-    uint32_t value;
+    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
     int32_t match;
 
     /* try to match */
@@ -792,6 +799,10 @@ ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
     }
 }
 
+/*
+ * Used by ISO 2022 implementation.
+ * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping
+ */
 U_CFUNC int32_t
 ucnv_extSimpleMatchFromU(const int32_t *cx,
                          UChar32 cp, uint32_t *pValue,
@@ -809,13 +820,15 @@ ucnv_extSimpleMatchFromU(const int32_t *cx,
     if(match>=2) {
         /* write result for simple, single-character conversion */
         int32_t length;
-        
+        int isRoundtrip;
+
+        isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value);
         length=UCNV_EXT_FROM_U_GET_LENGTH(value);
         value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
 
         if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
             *pValue=value;
-            return length;
+            return isRoundtrip ? length : -length;
 #if 0 /* not currently used */
         } else if(length==4) {
             /* de-serialize a 4-byte result */
@@ -825,7 +838,7 @@ ucnv_extSimpleMatchFromU(const int32_t *cx,
                 ((uint32_t)result[1]<<16)|
                 ((uint32_t)result[2]<<8)|
                 result[3];
-            return 4;
+            return isRoundtrip ? 4 : -4;
 #endif
         }
     }
@@ -848,7 +861,7 @@ U_CFUNC void
 ucnv_extContinueMatchFromU(UConverter *cnv,
                            UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
                            UErrorCode *pErrorCode) {
-    uint32_t value;
+    uint32_t value = 0;  /* initialize output-only param to 0 to silence gcc */
     int32_t match;
 
     match=ucnv_extMatchFromU(cnv->sharedData->mbcs.extIndexes,
@@ -933,7 +946,7 @@ static void
 ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
                             const int32_t *cx,
                             const USetAdder *sa,
-                            UConverterUnicodeSet which,
+                            UBool useFallback,
                             int32_t minLength,
                             UChar32 c,
                             UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
@@ -953,7 +966,7 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
     value=*fromUSectionValues++;
 
     if( value!=0 &&
-        UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) &&
+        (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) &&
         UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
     ) {
         if(c>=0) {
@@ -974,12 +987,14 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
             /* no mapping, do nothing */
         } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
             ucnv_extGetUnicodeSetString(
-                sharedData, cx, sa, which, minLength,
+                sharedData, cx, sa, useFallback, minLength,
                 U_SENTINEL, s, length+1,
                 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
                 pErrorCode);
-        } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
-                           UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
+        } else if((useFallback ?
+                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
+                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
                   UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
         ) {
             sa->addString(sa->set, s, length+1);
@@ -991,6 +1006,7 @@ U_CFUNC void
 ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
                       const USetAdder *sa,
                       UConverterUnicodeSet which,
+                      UConverterSetFilter filter,
                       UErrorCode *pErrorCode) {
     const int32_t *cx;
     const uint16_t *stage12, *stage3, *ps2, *ps3;
@@ -998,6 +1014,7 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
 
     uint32_t value;
     int32_t st1, stage1Length, st2, st3, minLength;
+    UBool useFallback;
 
     UChar s[UCNV_EXT_MAX_UCHARS];
     UChar32 c;
@@ -1014,10 +1031,16 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
 
     stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
 
+    useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
+
     /* enumerate the from-Unicode trie table */
     c=0; /* keep track of the current code point while enumerating */
 
-    if(sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) {
+    if(filter==UCNV_SET_FILTER_2022_CN) {
+        minLength=3;
+    } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
+               filter!=UCNV_SET_FILTER_NONE
+    ) {
         /* DBCS-only, ignore single-byte results */
         minLength=2;
     } else {
@@ -1051,14 +1074,48 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
                             length=0;
                             U16_APPEND_UNSAFE(s, length, c);
                             ucnv_extGetUnicodeSetString(
-                                sharedData, cx, sa, which, minLength,
+                                sharedData, cx, sa, useFallback, minLength,
                                 c, s, length,
                                 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
                                 pErrorCode);
-                        } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
-                                           UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
+                        } else if((useFallback ?
+                                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
+                                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+                                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
                                   UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
                         ) {
+                            switch(filter) {
+                            case UCNV_SET_FILTER_2022_CN:
+                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {
+                                    continue;
+                                }
+                                break;
+                            case UCNV_SET_FILTER_SJIS:
+                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) {
+                                    continue;
+                                }
+                                break;
+                            case UCNV_SET_FILTER_GR94DBCS:
+                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
+                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) &&
+                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
+                                    continue;
+                                }
+                                break;
+                            case UCNV_SET_FILTER_HZ:
+                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
+                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
+                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
+                                    continue;
+                                }
+                                break;
+                            default:
+                                /*
+                                 * UCNV_SET_FILTER_NONE,
+                                 * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
+                                 */
+                                break;
+                            }
                             sa->add(sa->set, c);
                         }
                     } while((++c&0xf)!=0);