/*
**********************************************************************
-* Copyright (C) 2002-2004, International Business Machines
+* Copyright (C) 2002-2006, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u16.c
#include "ucnv_cnv.h"
#include "cmemory.h"
+enum {
+ UCNV_NEED_TO_WRITE_BOM=1
+};
+
/* UTF-16BE ----------------------------------------------------------------- */
#if U_IS_BIG_ENDIAN
# define _UTF16PEFromUnicodeWithOffsets _UTF16LEFromUnicodeWithOffsets
#endif
+
static void
_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
UErrorCode *pErrorCode) {
UConverter *cnv;
const UChar *source;
- uint8_t *target;
+ char *target;
int32_t *offsets;
- int32_t targetCapacity, length, count, sourceIndex;
+ uint32_t targetCapacity, length, sourceIndex;
UChar c, trail;
char overflow[4];
source=pArgs->source;
- length=pArgs->sourceLimit-source;
+ length=(int32_t)(pArgs->sourceLimit-source);
if(length<=0) {
/* no input, nothing to do */
return;
}
- targetCapacity=pArgs->targetLimit-pArgs->target;
- if(targetCapacity<=0) {
+ cnv=pArgs->converter;
+
+ /* write the BOM if necessary */
+ if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xfe, (char)0xff };
+ ucnv_fromUWriteBytes(cnv,
+ bom, 2,
+ &pArgs->target, pArgs->targetLimit,
+ &pArgs->offsets, -1,
+ pErrorCode);
+ cnv->fromUnicodeStatus=0;
+ }
+
+ target=pArgs->target;
+ if(target >= pArgs->targetLimit) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
return;
}
- cnv=pArgs->converter;
- target=(uint8_t *)pArgs->target;
+ targetCapacity=(uint32_t)(pArgs->targetLimit-target);
offsets=pArgs->offsets;
sourceIndex=0;
cnv->fromUChar32=c=0;
}
- /* copy an even number of bytes for complete UChars */
- count=2*length;
- if(count>targetCapacity) {
- count=targetCapacity&~1;
- }
- /* count is even */
if(c==0) {
+ /* copy an even number of bytes for complete UChars */
+ uint32_t count=2*length;
+ if(count>targetCapacity) {
+ count=targetCapacity&~1;
+ }
+ /* count is even */
targetCapacity-=count;
count>>=1;
length-=count;
(char **)&target, pArgs->targetLimit,
&offsets, sourceIndex,
pErrorCode);
- targetCapacity=pArgs->targetLimit-(char *)target;
+ targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
}
if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
UChar *target;
int32_t *offsets;
- int32_t targetCapacity, length, count, sourceIndex;
+ uint32_t targetCapacity, length, count, sourceIndex;
UChar c, trail;
cnv=pArgs->converter;
source=(const uint8_t *)pArgs->source;
- length=(const uint8_t *)pArgs->sourceLimit-source;
+ length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
if(length<=0 && cnv->toUnicodeStatus==0) {
/* no input, nothing to do */
return;
}
- targetCapacity=pArgs->targetLimit-pArgs->target;
- if(targetCapacity<=0) {
+ target=pArgs->target;
+ if(target >= pArgs->targetLimit) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
return;
}
- target=pArgs->target;
+ targetCapacity=(uint32_t)(pArgs->targetLimit-target);
offsets=pArgs->offsets;
sourceIndex=0;
c=0;
NULL,
NULL,
NULL,
- ucnv_getCompleteUnicodeSet
+ ucnv_getNonSurrogateUnicodeSet
};
static const UConverterStaticData _UTF16BEStaticData={
UErrorCode *pErrorCode) {
UConverter *cnv;
const UChar *source;
- uint8_t *target;
+ char *target;
int32_t *offsets;
- int32_t targetCapacity, length, count, sourceIndex;
+ uint32_t targetCapacity, length, sourceIndex;
UChar c, trail;
char overflow[4];
source=pArgs->source;
- length=pArgs->sourceLimit-source;
+ length=(int32_t)(pArgs->sourceLimit-source);
if(length<=0) {
/* no input, nothing to do */
return;
}
- targetCapacity=pArgs->targetLimit-pArgs->target;
- if(targetCapacity<=0) {
+ cnv=pArgs->converter;
+
+ /* write the BOM if necessary */
+ if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xff, (char)0xfe };
+ ucnv_fromUWriteBytes(cnv,
+ bom, 2,
+ &pArgs->target, pArgs->targetLimit,
+ &pArgs->offsets, -1,
+ pErrorCode);
+ cnv->fromUnicodeStatus=0;
+ }
+
+ target=pArgs->target;
+ if(target >= pArgs->targetLimit) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
return;
}
- cnv=pArgs->converter;
- target=(uint8_t *)pArgs->target;
+ targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
offsets=pArgs->offsets;
sourceIndex=0;
cnv->fromUChar32=c=0;
}
- /* copy an even number of bytes for complete UChars */
- count=2*length;
- if(count>targetCapacity) {
- count=targetCapacity&~1;
- }
- /* count is even */
if(c==0) {
+ /* copy an even number of bytes for complete UChars */
+ uint32_t count=2*length;
+ if(count>targetCapacity) {
+ count=targetCapacity&~1;
+ }
+ /* count is even */
targetCapacity-=count;
count>>=1;
length-=count;
/* output length bytes with overflow (length>targetCapacity>0) */
ucnv_fromUWriteBytes(cnv,
overflow, length,
- (char **)&target, pArgs->targetLimit,
+ &target, pArgs->targetLimit,
&offsets, sourceIndex,
pErrorCode);
- targetCapacity=pArgs->targetLimit-(char *)target;
+ targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
}
if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
/* write back the updated pointers */
pArgs->source=source;
- pArgs->target=(char *)target;
+ pArgs->target=target;
pArgs->offsets=offsets;
}
UChar *target;
int32_t *offsets;
- int32_t targetCapacity, length, count, sourceIndex;
+ uint32_t targetCapacity, length, count, sourceIndex;
UChar c, trail;
cnv=pArgs->converter;
source=(const uint8_t *)pArgs->source;
- length=(const uint8_t *)pArgs->sourceLimit-source;
+ length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
if(length<=0 && cnv->toUnicodeStatus==0) {
/* no input, nothing to do */
return;
}
- targetCapacity=pArgs->targetLimit-pArgs->target;
- if(targetCapacity<=0) {
+ target=pArgs->target;
+ if(target >= pArgs->targetLimit) {
*pErrorCode=U_BUFFER_OVERFLOW_ERROR;
return;
}
- target=pArgs->target;
+ targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
offsets=pArgs->offsets;
sourceIndex=0;
c=0;
NULL,
NULL,
NULL,
- ucnv_getCompleteUnicodeSet
+ ucnv_getNonSurrogateUnicodeSet
};
}
if(choice!=UCNV_RESET_TO_UNICODE) {
/* reset fromUnicode: prepare to output the UTF-16PE BOM */
- cnv->charErrorBufferLength=2;
-#if U_IS_BIG_ENDIAN
- cnv->charErrorBuffer[0]=0xfe;
- cnv->charErrorBuffer[1]=0xff;
-#else
- cnv->charErrorBuffer[0]=0xff;
- cnv->charErrorBuffer[1]=0xfe;
-#endif
+ cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
}
}
++source;
if(state==1) {
state=8; /* detect UTF-16BE */
- offsetDelta=source-pArgs->source;
+ offsetDelta=(int32_t)(source-pArgs->source);
} else if(state==5) {
state=9; /* detect UTF-16LE */
- offsetDelta=source-pArgs->source;
+ offsetDelta=(int32_t)(source-pArgs->source);
}
} else {
/* switch to UTF-16BE and pass the previous bytes */
NULL,
NULL,
NULL,
- ucnv_getCompleteUnicodeSet
+ ucnv_getNonSurrogateUnicodeSet
};
static const UConverterStaticData _UTF16StaticData = {
sizeof(UConverterStaticData),
"UTF-16",
- 0, /* ### TODO review correctness of all Unicode CCSIDs */
+ 1204, /* CCSID for BOM sensitive UTF-16 */
UCNV_IBM, UCNV_UTF16, 2, 2,
#if U_IS_BIG_ENDIAN
{ 0xff, 0xfd, 0, 0 }, 2,