/*
**********************************************************************
-* Copyright (C) 2002-2004, International Business Machines
+* Copyright (C) 2002-2006, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u32.c
/* -SURROGATE_LOW_START + HALF_BASE */
#define SURROGATE_LOW_BASE 9216
+enum {
+ UCNV_NEED_TO_WRITE_BOM=1
+};
+
/* UTF-32BE ----------------------------------------------------------------- */
static void
unsigned char *toUBytes = args->converter->toUBytes;
uint32_t ch, i;
- /* UTF-8 returns here for only non-offset, this needs to change.*/
+ /* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
args->converter->toUnicodeStatus = 0;
uint32_t ch, i;
int32_t offsetNum = 0;
+ /* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
args->converter->toUnicodeStatus = 0;
UErrorCode * err)
{
const UChar *mySource = args->source;
- unsigned char *myTarget = (unsigned char *) args->target;
+ unsigned char *myTarget;
const UChar *sourceLimit = args->sourceLimit;
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
UChar32 ch, ch2;
unsigned int indexToWrite;
unsigned char temp[sizeof(uint32_t)];
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
temp[0] = 0;
if (args->converter->fromUChar32) {
UErrorCode * err)
{
const UChar *mySource = args->source;
- unsigned char *myTarget = (unsigned char *) args->target;
- int32_t *myOffsets = args->offsets;
+ unsigned char *myTarget;
+ int32_t *myOffsets;
const UChar *sourceLimit = args->sourceLimit;
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
UChar32 ch, ch2;
unsigned int indexToWrite;
unsigned char temp[sizeof(uint32_t)];
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
+ myOffsets = args->offsets;
temp[0] = 0;
if (args->converter->fromUChar32) {
*err = U_BUFFER_OVERFLOW_ERROR;
}
}
- offsetNum++;
+ offsetNum = offsetNum + 1 + (temp[1] != 0);
}
if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
NULL,
NULL,
NULL,
- ucnv_getCompleteUnicodeSet
+ ucnv_getNonSurrogateUnicodeSet
};
/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
unsigned char *toUBytes = args->converter->toUBytes;
uint32_t ch, i;
- /* UTF-8 returns here for only non-offset, this needs to change.*/
+ /* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
/* Stores the previously calculated ch from a previous call*/
ch = args->converter->toUnicodeStatus - 1;
uint32_t ch, i;
int32_t offsetNum = 0;
- /* UTF-8 returns here for only non-offset, this needs to change.*/
+ /* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
/* Stores the previously calculated ch from a previous call*/
ch = args->converter->toUnicodeStatus - 1;
UErrorCode * err)
{
const UChar *mySource = args->source;
- unsigned char *myTarget = (unsigned char *) args->target;
+ unsigned char *myTarget;
const UChar *sourceLimit = args->sourceLimit;
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
UChar32 ch, ch2;
unsigned int indexToWrite;
unsigned char temp[sizeof(uint32_t)];
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
temp[3] = 0;
if (args->converter->fromUChar32)
UErrorCode * err)
{
const UChar *mySource = args->source;
- unsigned char *myTarget = (unsigned char *) args->target;
- int32_t *myOffsets = args->offsets;
+ unsigned char *myTarget;
+ int32_t *myOffsets;
const UChar *sourceLimit = args->sourceLimit;
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
UChar32 ch, ch2;
unsigned char temp[sizeof(uint32_t)];
int32_t offsetNum = 0;
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
+ myOffsets = args->offsets;
temp[3] = 0;
if (args->converter->fromUChar32)
*err = U_BUFFER_OVERFLOW_ERROR;
}
}
- offsetNum++;
+ offsetNum = offsetNum + 1 + (temp[2] != 0);
}
if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
NULL,
NULL,
NULL,
- ucnv_getCompleteUnicodeSet
+ ucnv_getNonSurrogateUnicodeSet
};
/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
}
if(choice!=UCNV_RESET_TO_UNICODE) {
/* reset fromUnicode: prepare to output the UTF-32PE BOM */
- cnv->charErrorBufferLength=4;
-#if U_IS_BIG_ENDIAN
- cnv->charErrorBuffer[0]=0;
- cnv->charErrorBuffer[1]=0;
- cnv->charErrorBuffer[2]=0xfe;
- cnv->charErrorBuffer[3]=0xff;
-#else
- cnv->charErrorBuffer[0]=0xff;
- cnv->charErrorBuffer[1]=0xfe;
- cnv->charErrorBuffer[2]=0;
- cnv->charErrorBuffer[3]=0;
-#endif
+ cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
}
}
++source;
if(state==4) {
state=8; /* detect UTF-32BE */
- offsetDelta=source-pArgs->source;
+ offsetDelta=(int32_t)(source-pArgs->source);
} else if(state==8) {
state=9; /* detect UTF-32LE */
- offsetDelta=source-pArgs->source;
+ offsetDelta=(int32_t)(source-pArgs->source);
}
} else {
/* switch to UTF-32BE and pass the previous bytes */
- int32_t count=source-pArgs->source; /* number of bytes from this buffer */
+ int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */
/* reset the source */
source=pArgs->source;
NULL,
NULL,
NULL,
- ucnv_getCompleteUnicodeSet
+ ucnv_getNonSurrogateUnicodeSet
};
+/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
static const UConverterStaticData _UTF32StaticData = {
sizeof(UConverterStaticData),
"UTF-32",
- 0, /* ### TODO review correctness of all Unicode CCSIDs */
+ 1236,
UCNV_IBM, UCNV_UTF32, 4, 4,
#if U_IS_BIG_ENDIAN
{ 0, 0, 0xff, 0xfd }, 4,