]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/ucnv_ext.c
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / common / ucnv_ext.c
index 18fe3f948e9f224e5cf38fb4a938efdcead67eef..38616f8af96a56a294758e72138ddea48cde49bc 100644 (file)
@@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2003-2004, International Business Machines
+*   Copyright (C) 2003-2007, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@@ -551,6 +551,12 @@ ucnv_extMatchFromU(const int32_t *cx,
         return 0;
     }
 
+    /*
+     * Tests for (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0:
+     * Do not interpret values with reserved bits used, for forward compatibility,
+     * and do not even remember intermediate results with reserved bits used.
+     */
+
     if(UCNV_EXT_TO_U_IS_PARTIAL(value)) {
         /* partial match, enter the loop below */
         index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
@@ -575,7 +581,8 @@ ucnv_extMatchFromU(const int32_t *cx,
             value=*fromUSectionValues++;
             if( value!=0 &&
                 (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
-                 FROM_U_USE_FALLBACK(useFallback, firstCP))
+                 FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+                (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
             ) {
                 /* remember longest match so far */
                 matchValue=value;
@@ -613,8 +620,9 @@ ucnv_extMatchFromU(const int32_t *cx,
                     /* partial match, continue */
                     index=(int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value);
                 } else {
-                    if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
-                         FROM_U_USE_FALLBACK(useFallback, firstCP)
+                    if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+                         FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+                        (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
                     ) {
                         /* full match, stop with result */
                         matchValue=value;
@@ -632,8 +640,9 @@ ucnv_extMatchFromU(const int32_t *cx,
             return 0;
         }
     } else /* result from firstCP trie lookup */ {
-        if( UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
-             FROM_U_USE_FALLBACK(useFallback, firstCP)
+        if( (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) ||
+             FROM_U_USE_FALLBACK(useFallback, firstCP)) &&
+            (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0
         ) {
             /* full match, stop with result */
             matchValue=value;
@@ -644,20 +653,18 @@ ucnv_extMatchFromU(const int32_t *cx,
         }
     }
 
-    if(matchValue&UCNV_EXT_FROM_U_RESERVED_MASK) {
-        /* do not interpret values with reserved bits used, for forward compatibility */
-        return 0;
-    }
-
     /* return result */
     if(matchValue==UCNV_EXT_FROM_U_SUBCHAR1) {
         return 1; /* assert matchLength==2 */
     }
 
-    *pMatchValue=UCNV_EXT_FROM_U_MASK_ROUNDTRIP(matchValue);
+    *pMatchValue=matchValue;
     return matchLength;
 }
 
+/*
+ * @param value fromUnicode mapping table value; ignores roundtrip and reserved bits
+ */
 static U_INLINE void
 ucnv_extWriteFromU(UConverter *cnv, const int32_t *cx,
                    uint32_t value,
@@ -792,6 +799,10 @@ ucnv_extInitialMatchFromU(UConverter *cnv, const int32_t *cx,
     }
 }
 
+/*
+ * Used by ISO 2022 implementation.
+ * @return number of bytes in *pValue; negative number if fallback; 0 for no mapping
+ */
 U_CFUNC int32_t
 ucnv_extSimpleMatchFromU(const int32_t *cx,
                          UChar32 cp, uint32_t *pValue,
@@ -809,13 +820,15 @@ ucnv_extSimpleMatchFromU(const int32_t *cx,
     if(match>=2) {
         /* write result for simple, single-character conversion */
         int32_t length;
-        
+        int isRoundtrip;
+
+        isRoundtrip=UCNV_EXT_FROM_U_IS_ROUNDTRIP(value);
         length=UCNV_EXT_FROM_U_GET_LENGTH(value);
         value=(uint32_t)UCNV_EXT_FROM_U_GET_DATA(value);
 
         if(length<=UCNV_EXT_FROM_U_MAX_DIRECT_LENGTH) {
             *pValue=value;
-            return length;
+            return isRoundtrip ? length : -length;
 #if 0 /* not currently used */
         } else if(length==4) {
             /* de-serialize a 4-byte result */
@@ -825,7 +838,7 @@ ucnv_extSimpleMatchFromU(const int32_t *cx,
                 ((uint32_t)result[1]<<16)|
                 ((uint32_t)result[2]<<8)|
                 result[3];
-            return 4;
+            return isRoundtrip ? 4 : -4;
 #endif
         }
     }
@@ -933,7 +946,7 @@ static void
 ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
                             const int32_t *cx,
                             const USetAdder *sa,
-                            UConverterUnicodeSet which,
+                            UBool useFallback,
                             int32_t minLength,
                             UChar32 c,
                             UChar s[UCNV_EXT_MAX_UCHARS], int32_t length,
@@ -953,7 +966,7 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
     value=*fromUSectionValues++;
 
     if( value!=0 &&
-        UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) &&
+        (UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) || useFallback) &&
         UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
     ) {
         if(c>=0) {
@@ -974,12 +987,14 @@ ucnv_extGetUnicodeSetString(const UConverterSharedData *sharedData,
             /* no mapping, do nothing */
         } else if(UCNV_EXT_FROM_U_IS_PARTIAL(value)) {
             ucnv_extGetUnicodeSetString(
-                sharedData, cx, sa, which, minLength,
+                sharedData, cx, sa, useFallback, minLength,
                 U_SENTINEL, s, length+1,
                 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
                 pErrorCode);
-        } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
-                           UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
+        } else if((useFallback ?
+                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
+                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
                   UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
         ) {
             sa->addString(sa->set, s, length+1);
@@ -991,6 +1006,7 @@ U_CFUNC void
 ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
                       const USetAdder *sa,
                       UConverterUnicodeSet which,
+                      UConverterSetFilter filter,
                       UErrorCode *pErrorCode) {
     const int32_t *cx;
     const uint16_t *stage12, *stage3, *ps2, *ps3;
@@ -998,6 +1014,7 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
 
     uint32_t value;
     int32_t st1, stage1Length, st2, st3, minLength;
+    UBool useFallback;
 
     UChar s[UCNV_EXT_MAX_UCHARS];
     UChar32 c;
@@ -1014,10 +1031,16 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
 
     stage1Length=cx[UCNV_EXT_FROM_U_STAGE_1_LENGTH];
 
+    useFallback=(UBool)(which==UCNV_ROUNDTRIP_AND_FALLBACK_SET);
+
     /* enumerate the from-Unicode trie table */
     c=0; /* keep track of the current code point while enumerating */
 
-    if(sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY) {
+    if(filter==UCNV_SET_FILTER_2022_CN) {
+        minLength=3;
+    } else if( sharedData->mbcs.outputType==MBCS_OUTPUT_DBCS_ONLY ||
+               filter!=UCNV_SET_FILTER_NONE
+    ) {
         /* DBCS-only, ignore single-byte results */
         minLength=2;
     } else {
@@ -1051,14 +1074,48 @@ ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
                             length=0;
                             U16_APPEND_UNSAFE(s, length, c);
                             ucnv_extGetUnicodeSetString(
-                                sharedData, cx, sa, which, minLength,
+                                sharedData, cx, sa, useFallback, minLength,
                                 c, s, length,
                                 (int32_t)UCNV_EXT_FROM_U_GET_PARTIAL_INDEX(value),
                                 pErrorCode);
-                        } else if(((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
-                                           UCNV_EXT_FROM_U_ROUNDTRIP_FLAG) &&
+                        } else if((useFallback ?
+                                      (value&UCNV_EXT_FROM_U_RESERVED_MASK)==0 :
+                                      ((value&(UCNV_EXT_FROM_U_ROUNDTRIP_FLAG|UCNV_EXT_FROM_U_RESERVED_MASK))==
+                                          UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)) &&
                                   UCNV_EXT_FROM_U_GET_LENGTH(value)>=minLength
                         ) {
+                            switch(filter) {
+                            case UCNV_SET_FILTER_2022_CN:
+                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==3 && UCNV_EXT_FROM_U_GET_DATA(value)<=0x82ffff)) {
+                                    continue;
+                                }
+                                break;
+                            case UCNV_SET_FILTER_SJIS:
+                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 && (value=UCNV_EXT_FROM_U_GET_DATA(value))>=0x8140 && value<=0xeffc)) {
+                                    continue;
+                                }
+                                break;
+                            case UCNV_SET_FILTER_GR94DBCS:
+                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
+                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfefe - 0xa1a1) &&
+                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
+                                    continue;
+                                }
+                                break;
+                            case UCNV_SET_FILTER_HZ:
+                                if(!(UCNV_EXT_FROM_U_GET_LENGTH(value)==2 &&
+                                     (uint16_t)((value=UCNV_EXT_FROM_U_GET_DATA(value))-0xa1a1)<=(0xfdfe - 0xa1a1) &&
+                                     (uint8_t)(value-0xa1)<=(0xfe - 0xa1))) {
+                                    continue;
+                                }
+                                break;
+                            default:
+                                /*
+                                 * UCNV_SET_FILTER_NONE,
+                                 * or UCNV_SET_FILTER_DBCS_ONLY which is handled via minLength
+                                 */
+                                break;
+                            }
                             sa->add(sa->set, c);
                         }
                     } while((++c&0xf)!=0);