/*
**********************************************************************
-* Copyright (C) 2002-2004, International Business Machines
+* Copyright (C) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u7.c
#include "unicode/ucnv.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
+#include "uassert.h"
/* UTF-7 -------------------------------------------------------------------- */
static void
_UTF7Open(UConverter *cnv,
- const char *name,
- const char *locale,
- uint32_t options,
+ UConverterLoadArgs *pArgs,
UErrorCode *pErrorCode) {
- if((options&0xf)<=1) {
- cnv->fromUnicodeStatus=(options&0xf)<<28;
+ if(UCNV_GET_VERSION(cnv)<=1) {
+ /* TODO(markus): Should just use cnv->options rather than copying the version number. */
+ cnv->fromUnicodeStatus=UCNV_GET_VERSION(cnv)<<28;
_UTF7Reset(cnv, UCNV_RESET_BOTH);
} else {
*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
* In Direct Mode, only the sourceIndex is used.
*/
byteIndex=0;
- length=sourceLimit-source;
- targetCapacity=targetLimit-target;
+ length=(int32_t)(sourceLimit-source);
+ targetCapacity=(int32_t)(targetLimit-target);
if(length>targetCapacity) {
length=targetCapacity;
}
if(target<targetLimit) {
bytes[byteIndex++]=b=*source++;
++nextSourceIndex;
- if(b>=126) {
- /* illegal - test other illegal US-ASCII values by base64Value==-3 */
+ base64Value = -3; /* initialize as illegal */
+ if(b>=126 || (base64Value=fromBase64[b])==-3 || base64Value==-1) {
+ /* either
+ * base64Value==-1 for any legal character except base64 and minus sign, or
+ * base64Value==-3 for illegal characters:
+ * 1. In either case, leave Unicode mode.
+ * 2.1. If we ended with an incomplete UChar or none after the +, then
+ * generate an error for the preceding erroneous sequence and deal with
+ * the current (possibly illegal) character next time through.
+ * 2.2. Else the current char comes after a complete UChar, which was already
+ * pushed to the output buf, so:
+ * 2.2.1. If the current char is legal, just save it for processing next time.
+ * It may be for example, a plus which we need to deal with in direct mode.
+ * 2.2.2. Else if the current char is illegal, we might as well deal with it here.
+ */
inDirectMode=TRUE;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- } else if((base64Value=fromBase64[b])>=0) {
+ if(base64Counter==-1) {
+ /* illegal: + immediately followed by something other than base64 or minus sign */
+ /* include the plus sign in the reported sequence, but not the subsequent char */
+ --source;
+ bytes[0]=PLUS;
+ byteIndex=1;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ } else if(bits!=0) {
+ /* bits are illegally left over, a UChar is incomplete */
+ /* don't include current char (legal or illegal) in error seq */
+ --source;
+ --byteIndex;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ } else {
+ /* previous UChar was complete */
+ if(base64Value==-3) {
+ /* current character is illegal, deal with it here */
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ } else {
+ /* un-read the current character in case it is a plus sign */
+ --source;
+ sourceIndex=nextSourceIndex-1;
+ goto directMode;
+ }
+ }
+ } else if(base64Value>=0) {
/* collect base64 bytes into UChars */
switch(base64Counter) {
case -1: /* -1 is immediately after the + */
/* will never occur */
break;
}
- } else if(base64Value==-2) {
+ } else /*base64Value==-2*/ {
/* minus sign terminates the base64 sequence */
inDirectMode=TRUE;
if(base64Counter==-1) {
}
sourceIndex=nextSourceIndex;
goto directMode;
- } else if(base64Value==-1) /* for any legal character except base64 and minus sign */ {
- /* leave the Unicode Mode */
- inDirectMode=TRUE;
- if(base64Counter==-1) {
- /* illegal: + immediately followed by something other than base64 or minus sign */
- /* include the plus sign in the reported sequence */
- --sourceIndex;
- bytes[0]=PLUS;
- bytes[1]=b;
- byteIndex=2;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- } else if(bits==0) {
- /* un-read the character in case it is a plus sign */
- --source;
- sourceIndex=nextSourceIndex-1;
- goto directMode;
- } else {
- /* bits are illegally left over, a UChar is incomplete */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else /* base64Value==-3 for illegal characters */ {
- /* illegal */
- inDirectMode=TRUE;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
}
} else {
/* target is full */
inDirectMode=(UBool)((status>>24)&1);
base64Counter=(int8_t)(status>>16);
bits=(uint8_t)status;
+ U_ASSERT(bits<=sizeof(toBase64)/sizeof(toBase64[0]));
}
/* UTF-7 always encodes UTF-16 code units, therefore we need only a simple sourceIndex */
if(inDirectMode) {
directMode:
- length=sourceLimit-source;
- targetCapacity=targetLimit-target;
+ length=(int32_t)(sourceLimit-source);
+ targetCapacity=(int32_t)(targetLimit-target);
if(length>targetCapacity) {
length=targetCapacity;
}
if(pArgs->flush && source>=sourceLimit) {
/* flush remaining bits to the target */
- if(!inDirectMode && base64Counter!=0) {
+ if(!inDirectMode) {
+ if (base64Counter!=0) {
+ if(target<targetLimit) {
+ *target++=toBase64[bits];
+ if(offsets!=NULL) {
+ *offsets++=sourceIndex-1;
+ }
+ } else {
+ cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ /* Add final MINUS to terminate unicodeMode */
if(target<targetLimit) {
- *target++=toBase64[bits];
+ *target++=MINUS;
if(offsets!=NULL) {
*offsets++=sourceIndex-1;
}
} else {
- cnv->charErrorBuffer[cnv->charErrorBufferLength++]=toBase64[bits];
+ cnv->charErrorBuffer[cnv->charErrorBufferLength++]=MINUS;
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
}
}
* In Direct Mode, only the sourceIndex is used.
*/
byteIndex=0;
- length=sourceLimit-source;
- targetCapacity=targetLimit-target;
+ length=(int32_t)(sourceLimit-source);
+ targetCapacity=(int32_t)(targetLimit-target);
if(length>targetCapacity) {
length=targetCapacity;
}
if(inDirectMode) {
directMode:
- length=sourceLimit-source;
- targetCapacity=targetLimit-target;
+ length=(int32_t)(sourceLimit-source);
+ targetCapacity=(int32_t)(targetLimit-target);
if(length>targetCapacity) {
length=targetCapacity;
}