]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/ucnvmbcs.c
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / common / ucnvmbcs.c
index 9b55c17d05da023be765e670f1a9ded5fffcde97..538a18c521de5d070c24d2b4dd01dc2b5b3054c2 100644 (file)
@@ -1,7 +1,7 @@
 /*
 ******************************************************************************
 *
-*   Copyright (C) 2000-2008, International Business Machines
+*   Copyright (C) 2000-2010, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 ******************************************************************************
@@ -398,6 +398,76 @@ gb18030Ranges[13][4]={
 /* bit flag for UConverter.options indicating GB 18030 special handling */
 #define _MBCS_OPTION_GB18030 0x8000
 
+/* bit flag for UConverter.options indicating KEIS,JEF,JIF special handling */
+#define _MBCS_OPTION_KEIS 0x01000
+#define _MBCS_OPTION_JEF  0x02000
+#define _MBCS_OPTION_JIPS 0x04000
+
+#define KEIS_SO_CHAR_1 0x0A
+#define KEIS_SO_CHAR_2 0x42
+#define KEIS_SI_CHAR_1 0x0A
+#define KEIS_SI_CHAR_2 0x41
+
+#define JEF_SO_CHAR 0x28
+#define JEF_SI_CHAR 0x29
+
+#define JIPS_SO_CHAR_1 0x1A
+#define JIPS_SO_CHAR_2 0x70
+#define JIPS_SI_CHAR_1 0x1A
+#define JIPS_SI_CHAR_2 0x71
+
+enum SISO_Option {
+    SI,
+    SO
+};
+typedef enum SISO_Option SISO_Option;
+
+static int32_t getSISOBytes(SISO_Option option, uint32_t cnvOption, uint8_t *value) {
+    int32_t SISOLength = 0;
+
+    switch (option) {
+        case SI:
+            if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+                value[0] = KEIS_SI_CHAR_1;
+                value[1] = KEIS_SI_CHAR_2;
+                SISOLength = 2;
+            } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+                value[0] = JEF_SI_CHAR;
+                SISOLength = 1;
+            } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+                value[0] = JIPS_SI_CHAR_1;
+                value[1] = JIPS_SI_CHAR_2;
+                SISOLength = 2;
+            } else {
+                value[0] = UCNV_SI;
+                SISOLength = 1;
+            }
+            break;
+        case SO:
+            if ((cnvOption&_MBCS_OPTION_KEIS)!=0) {
+                value[0] = KEIS_SO_CHAR_1;
+                value[1] = KEIS_SO_CHAR_2;
+                SISOLength = 2;
+            } else if ((cnvOption&_MBCS_OPTION_JEF)!=0) {
+                value[0] = JEF_SO_CHAR;
+                SISOLength = 1;
+            } else if ((cnvOption&_MBCS_OPTION_JIPS)!=0) {
+                value[0] = JIPS_SO_CHAR_1;
+                value[1] = JIPS_SO_CHAR_2;
+                SISOLength = 2;
+            } else {
+                value[0] = UCNV_SO;
+                SISOLength = 1;
+            }
+            break;
+        default:
+            /* Should never happen. */
+            break;
+    }
+
+    return SISOLength;
+}
+
 /* Miscellaneous ------------------------------------------------------------ */
 
 /**
@@ -1400,6 +1470,7 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
         /* TODO parse package name out of the prefix of the base name in the extension .cnv file? */
         args.size=sizeof(UConverterLoadArgs);
         args.nestedLoads=2;
+        args.onlyTestIsLoadable=pArgs->onlyTestIsLoadable;
         args.reserved=pArgs->reserved;
         args.options=pArgs->options;
         args.pkg=pArgs->pkg;
@@ -1415,6 +1486,16 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
             *pErrorCode=U_INVALID_TABLE_FORMAT;
             return;
         }
+        if(pArgs->onlyTestIsLoadable) {
+            /*
+             * Exit as soon as we know that we can load the converter
+             * and the format is valid and supported.
+             * The worst that can happen in the following code is a memory
+             * allocation error.
+             */
+            ucnv_unload(baseSharedData);
+            return;
+        }
 
         /* copy the base table data */
         uprv_memcpy(mbcsTable, &baseSharedData->mbcs, sizeof(UConverterMBCSTable));
@@ -1529,6 +1610,15 @@ ucnv_MBCSLoad(UConverterSharedData *sharedData,
             *pErrorCode=U_INVALID_TABLE_FORMAT;
             return;
         }
+        if(pArgs->onlyTestIsLoadable) {
+            /*
+             * Exit as soon as we know that we can load the converter
+             * and the format is valid and supported.
+             * The worst that can happen in the following code is a memory
+             * allocation error.
+             */
+            return;
+        }
 
         mbcsTable->countStates=(uint8_t)header->countStates;
         mbcsTable->countToUFallbacks=header->countToUFallbacks;
@@ -1660,24 +1750,26 @@ ucnv_MBCSUnload(UConverterSharedData *sharedData) {
 
 static void
 ucnv_MBCSOpen(UConverter *cnv,
-          const char *name,
-          const char *locale,
-          uint32_t options,
-          UErrorCode *pErrorCode) {
+              UConverterLoadArgs *pArgs,
+              UErrorCode *pErrorCode) {
     UConverterMBCSTable *mbcsTable;
     const int32_t *extIndexes;
     uint8_t outputType;
     int8_t maxBytesPerUChar;
 
+    if(pArgs->onlyTestIsLoadable) {
+        return;
+    }
+
     mbcsTable=&cnv->sharedData->mbcs;
     outputType=mbcsTable->outputType;
 
     if(outputType==MBCS_OUTPUT_DBCS_ONLY) {
         /* the swaplfnl option does not apply, remove it */
-        cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
+        cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
     }
 
-    if((options&UCNV_OPTION_SWAP_LFNL)!=0) {
+    if((pArgs->options&UCNV_OPTION_SWAP_LFNL)!=0) {
         /* do this because double-checked locking is broken */
         UBool isCached;
 
@@ -1692,16 +1784,25 @@ ucnv_MBCSOpen(UConverter *cnv,
                 }
 
                 /* the option does not apply, remove it */
-                cnv->options=options&=~UCNV_OPTION_SWAP_LFNL;
+                cnv->options=pArgs->options&=~UCNV_OPTION_SWAP_LFNL;
             }
         }
     }
 
-    if(uprv_strstr(name, "18030")!=NULL) {
-        if(uprv_strstr(name, "gb18030")!=NULL || uprv_strstr(name, "GB18030")!=NULL) {
+    if(uprv_strstr(pArgs->name, "18030")!=NULL) {
+        if(uprv_strstr(pArgs->name, "gb18030")!=NULL || uprv_strstr(pArgs->name, "GB18030")!=NULL) {
             /* set a flag for GB 18030 mode, which changes the callback behavior */
             cnv->options|=_MBCS_OPTION_GB18030;
         }
+    } else if((uprv_strstr(pArgs->name, "KEIS")!=NULL) || (uprv_strstr(pArgs->name, "keis")!=NULL)) {
+        /* set a flag for KEIS converter, which changes the SI/SO character sequence */
+        cnv->options|=_MBCS_OPTION_KEIS;
+    } else if((uprv_strstr(pArgs->name, "JEF")!=NULL) || (uprv_strstr(pArgs->name, "jef")!=NULL)) {
+        /* set a flag for JEF converter, which changes the SI/SO character sequence */
+        cnv->options|=_MBCS_OPTION_JEF;
+    } else if((uprv_strstr(pArgs->name, "JIPS")!=NULL) || (uprv_strstr(pArgs->name, "jips")!=NULL)) {
+        /* set a flag for JIPS converter, which changes the SI/SO character sequence */
+        cnv->options|=_MBCS_OPTION_JIPS;
     }
 
     /* fix maxBytesPerUChar depending on outputType and options etc. */
@@ -2052,7 +2153,7 @@ unrolled:
 #endif
 
     /* conversion loop */
-    while(targetCapacity>0) {
+    while(targetCapacity > 0 && source < sourceLimit) {
         entry=stateTable[0][*source++];
         /* MBCS_ENTRY_IS_FINAL(entry) */
 
@@ -3837,7 +3938,10 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
     uint32_t stage2Entry;
     uint32_t asciiRoundtrips;
     uint32_t value;
-    int32_t length, prevLength;
+    uint8_t si_value[2] = {0, 0}; 
+    uint8_t so_value[2] = {0, 0}; 
+    uint8_t si_value_length, so_value_length;
+    int32_t length = 0, prevLength;
     uint8_t unicodeMask;
 
     cnv=pArgs->converter;
@@ -3908,6 +4012,10 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
     sourceIndex= c==0 ? 0 : -1;
     nextSourceIndex=0;
 
+    /* Get the SI/SO character for the converter */
+    si_value_length = getSISOBytes(SI, cnv->options, si_value);
+    so_value_length = getSISOBytes(SO, cnv->options, so_value);
+
     /* conversion loop */
     /*
      * This is another piece of ugly code:
@@ -3997,8 +4105,14 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                             length=1;
                         } else {
                             /* change from double-byte mode to single-byte */
-                            value|=(uint32_t)UCNV_SI<<8;
-                            length=2;
+                            if (si_value_length == 1) {
+                                value|=(uint32_t)si_value[0]<<8;
+                                length = 2;
+                            } else if (si_value_length == 2) {
+                                value|=(uint32_t)si_value[1]<<8;
+                                value|=(uint32_t)si_value[0]<<16;
+                                length = 3;
+                            }
                             prevLength=1;
                         }
                     } else {
@@ -4006,8 +4120,14 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                             length=2;
                         } else {
                             /* change from single-byte mode to double-byte */
-                            value|=(uint32_t)UCNV_SO<<16;
-                            length=3;
+                            if (so_value_length == 1) {
+                                value|=(uint32_t)so_value[0]<<16;
+                                length = 3;
+                            } else if (so_value_length == 2) {
+                                value|=(uint32_t)so_value[1]<<16;
+                                value|=(uint32_t)so_value[0]<<24;
+                                length = 4;
+                            }
                             prevLength=2;
                         }
                     }
@@ -4217,8 +4337,14 @@ getTrail:
                             length=1;
                         } else {
                             /* change from double-byte mode to single-byte */
-                            value|=(uint32_t)UCNV_SI<<8;
-                            length=2;
+                            if (si_value_length == 1) {
+                                value|=(uint32_t)si_value[0]<<8;
+                                length = 2;
+                            } else if (si_value_length == 2) {
+                                value|=(uint32_t)si_value[1]<<8;
+                                value|=(uint32_t)si_value[0]<<16;
+                                length = 3;
+                            }
                             prevLength=1;
                         }
                     } else {
@@ -4226,8 +4352,14 @@ getTrail:
                             length=2;
                         } else {
                             /* change from single-byte mode to double-byte */
-                            value|=(uint32_t)UCNV_SO<<16;
-                            length=3;
+                            if (so_value_length == 1) {
+                                value|=(uint32_t)so_value[0]<<16;
+                                length = 3;
+                            } else if (so_value_length == 2) {
+                                value|=(uint32_t)so_value[1]<<16;
+                                value|=(uint32_t)so_value[0]<<24;
+                                length = 4;
+                            }
                             prevLength=2;
                         }
                     }
@@ -4480,15 +4612,27 @@ unassigned:
     ) {
         /* EBCDIC_STATEFUL ending with DBCS: emit an SI to return the output stream to SBCS */
         if(targetCapacity>0) {
-            *target++=(uint8_t)UCNV_SI;
+            *target++=(uint8_t)si_value[0];
+            if (si_value_length == 2) {
+                if (targetCapacity<2) {
+                    cnv->charErrorBuffer[0]=(uint8_t)si_value[1];
+                    cnv->charErrorBufferLength=1;
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                } else {
+                    *target++=(uint8_t)si_value[1];
+                }
+            }
             if(offsets!=NULL) {
                 /* set the last source character's index (sourceIndex points at sourceLimit now) */
                 *offsets++=prevSourceIndex;
             }
         } else {
             /* target is full */
-            cnv->charErrorBuffer[0]=(char)UCNV_SI;
-            cnv->charErrorBufferLength=1;
+            cnv->charErrorBuffer[0]=(uint8_t)si_value[0];
+            if (si_value_length == 2) {
+                cnv->charErrorBuffer[1]=(uint8_t)si_value[1];
+            }
+            cnv->charErrorBufferLength=si_value_length;
             *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
         }
         prevLength=1; /* we switched into SBCS */
@@ -4881,7 +5025,14 @@ ucnv_SBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                     c=b;
 moreBytes:
                     while(toULength<toULimit) {
-                        if(source<sourceLimit) {
+                        /*
+                         * The sourceLimit may have been adjusted before the conversion loop
+                         * to stop before a truncated sequence.
+                         * Here we need to use the real limit in case we have two truncated
+                         * sequences at the end.
+                         * See ticket #7492.
+                         */
+                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
                             b=*source;
                             if(U8_IS_TRAIL(b)) {
                                 ++source;
@@ -5158,7 +5309,14 @@ ucnv_DBCSFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
                     c=b;
 moreBytes:
                     while(toULength<toULimit) {
-                        if(source<sourceLimit) {
+                        /*
+                         * The sourceLimit may have been adjusted before the conversion loop
+                         * to stop before a truncated sequence.
+                         * Here we need to use the real limit in case we have two truncated
+                         * sequences at the end.
+                         * See ticket #7492.
+                         */
+                        if(source<(uint8_t *)pToUArgs->sourceLimit) {
                             b=*source;
                             if(U8_IS_TRAIL(b)) {
                                 ++source;