]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/ucnv_u7.c
ICU-491.11.3.tar.gz
[apple/icu.git] / icuSources / common / ucnv_u7.c
index f15da503312b9d7fd938c6c6019a1cf39f1ada72..42943f4129a8b867763c83e41d6a94197dcb9dac 100644 (file)
@@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 2002-2004, International Business Machines
+*   Copyright (C) 2002-2011, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnv_u7.c
@@ -21,6 +21,7 @@
 #include "unicode/ucnv.h"
 #include "ucnv_bld.h"
 #include "ucnv_cnv.h"
+#include "uassert.h"
 
 /* UTF-7 -------------------------------------------------------------------- */
 
@@ -190,12 +191,11 @@ _UTF7Reset(UConverter *cnv, UConverterResetChoice choice) {
 
 static void
 _UTF7Open(UConverter *cnv,
-          const char *name,
-          const char *locale,
-          uint32_t options,
+          UConverterLoadArgs *pArgs,
           UErrorCode *pErrorCode) {
-    if((options&0xf)<=1) {
-        cnv->fromUnicodeStatus=(options&0xf)<<28;
+    if(UCNV_GET_VERSION(cnv)<=1) {
+        /* TODO(markus): Should just use cnv->options rather than copying the version number. */
+        cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
         _UTF7Reset(cnv, UCNV_RESET_BOTH);
     } else {
         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
@@ -259,8 +259,8 @@ directMode:
          * In Direct Mode, only the sourceIndex is used.
          */
         byteIndex=0;
-        length=sourceLimit-source;
-        targetCapacity=targetLimit-target;
+        length=(int32_t)(sourceLimit-source);
+        targetCapacity=(int32_t)(targetLimit-target);
         if(length>targetCapacity) {
             length=targetCapacity;
         }
@@ -309,12 +309,51 @@ unicodeMode:
             if(target<targetLimit) {
                 bytes[byteIndex++]=b=*source++;
                 ++nextSourceIndex;
-                if(b>=126) {
-                    /* illegal - test other illegal US-ASCII values by base64Value==-3 */
+                base64Value = -3; /* initialize as illegal */
+                if(b>=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) {
+                    /* either
+                     * base64Value==-1 for any legal character except base64 and minus sign, or
+                     * base64Value==-3 for illegal characters:
+                     * 1. In either case, leave Unicode mode.
+                     * 2.1. If we ended with an incomplete UChar or none after the +, then
+                     *      generate an error for the preceding erroneous sequence and deal with
+                     *      the current (possibly illegal) character next time through.
+                     * 2.2. Else the current char comes after a complete UChar, which was already
+                     *      pushed to the output buf, so:
+                     * 2.2.1. If the current char is legal, just save it for processing next time.
+                     *        It may be for example, a plus which we need to deal with in direct mode.
+                     * 2.2.2. Else if the current char is illegal, we might as well deal with it here.
+                     */
                     inDirectMode=TRUE;
-                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                    break;
-                } else if((base64Value=fromBase64[b])>=0) {
+                    if(base64Counter==-1) {
+                        /* illegal: + immediately followed by something other than base64 or minus sign */
+                        /* include the plus sign in the reported sequence, but not the subsequent char */
+                        --source;
+                        bytes[0]=PLUS;
+                        byteIndex=1;
+                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                        break;
+                    } else if(bits!=0) {
+                        /* bits are illegally left over, a UChar is incomplete */
+                        /* don't include current char (legal or illegal) in error seq */
+                        --source;
+                        --byteIndex;
+                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                        break;
+                    } else {
+                        /* previous UChar was complete */
+                        if(base64Value==-3) {
+                            /* current character is illegal, deal with it here */
+                            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                            break;
+                        } else {
+                            /* un-read the current character in case it is a plus sign */
+                            --source;
+                            sourceIndex=nextSourceIndex-1;
+                            goto directMode;
+                        }
+                    }
+                } else if(base64Value>=0) {
                     /* collect base64 bytes into UChars */
                     switch(base64Counter) {
                     case -1: /* -1 is immediately after the + */
@@ -365,7 +404,7 @@ unicodeMode:
                         /* will never occur */
                         break;
                     }
-                } else if(base64Value==-2) {
+                } else /*base64Value==-2*/ {
                     /* minus sign terminates the base64 sequence */
                     inDirectMode=TRUE;
                     if(base64Counter==-1) {
@@ -384,33 +423,6 @@ unicodeMode:
                     }
                     sourceIndex=nextSourceIndex;
                     goto directMode;
-                } else if(base64Value==-1) /* for any legal character except base64 and minus sign */ {
-                    /* leave the Unicode Mode */
-                    inDirectMode=TRUE;
-                    if(base64Counter==-1) {
-                        /* illegal: + immediately followed by something other than base64 or minus sign */
-                        /* include the plus sign in the reported sequence */
-                        --sourceIndex;
-                        bytes[0]=PLUS;
-                        bytes[1]=b;
-                        byteIndex=2;
-                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                        break;
-                    } else if(bits==0) {
-                        /* un-read the character in case it is a plus sign */
-                        --source;
-                        sourceIndex=nextSourceIndex-1;
-                        goto directMode;
-                    } else {
-                        /* bits are illegally left over, a UChar is incomplete */
-                        *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                        break;
-                    }
-                } else /* base64Value==-3 for illegal characters */ {
-                    /* illegal */
-                    inDirectMode=TRUE;
-                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
-                    break;
                 }
             } else {
                 /* target is full */
@@ -475,6 +487,7 @@ _UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
         inDirectMode=(UBool)((status>>24)&1);
         base64Counter=(int8_t)(status>>16);
         bits=(uint8_t)status;
+        U_ASSERT(bits<=sizeof(toBase64)/sizeof(toBase64[0]));
     }
 
     /* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
@@ -482,8 +495,8 @@ _UTF7FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
 
     if(inDirectMode) {
 directMode:
-        length=sourceLimit-source;
-        targetCapacity=targetLimit-target;
+        length=(int32_t)(sourceLimit-source);
+        targetCapacity=(int32_t)(targetLimit-target);
         if(length>targetCapacity) {
             length=targetCapacity;
         }
@@ -677,14 +690,26 @@ unicodeMode:
 
     if(pArgs->flush && source>=sourceLimit) {
         /* flush remaining bits to the target */
-        if(!inDirectMode && base64Counter!=0) {
+        if(!inDirectMode) {
+            if (base64Counter!=0) {
+                if(target<targetLimit) {
+                    *target++=toBase64[bits];
+                    if(offsets!=NULL) {
+                        *offsets++=sourceIndex-1;
+                    }
+                } else {
+                    cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                }
+            }
+            /* Add final MINUS to terminate unicodeMode */
             if(target<targetLimit) {
-                *target++=toBase64[bits];
+                *target++=MINUS;
                 if(offsets!=NULL) {
                     *offsets++=sourceIndex-1;
                 }
             } else {
-                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
+                cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
                 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
             }
         }
@@ -917,8 +942,8 @@ directMode:
          * In Direct Mode, only the sourceIndex is used.
          */
         byteIndex=0;
-        length=sourceLimit-source;
-        targetCapacity=targetLimit-target;
+        length=(int32_t)(sourceLimit-source);
+        targetCapacity=(int32_t)(targetLimit-target);
         if(length>targetCapacity) {
             length=targetCapacity;
         }
@@ -1167,8 +1192,8 @@ _IMAPFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
 
     if(inDirectMode) {
 directMode:
-        length=sourceLimit-source;
-        targetCapacity=targetLimit-target;
+        length=(int32_t)(sourceLimit-source);
+        targetCapacity=(int32_t)(targetLimit-target);
         if(length>targetCapacity) {
             length=targetCapacity;
         }