/*
**********************************************************************
-* Copyright (C) 2002-2004, International Business Machines
+* Copyright (C) 2002-2015, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* file name: ucnv_u32.c
#include "unicode/utypes.h"
-#if !UCONFIG_NO_CONVERSION
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
#include "unicode/ucnv.h"
+#include "unicode/utf.h"
#include "ucnv_bld.h"
#include "ucnv_cnv.h"
#include "cmemory.h"
/* -SURROGATE_LOW_START + HALF_BASE */
#define SURROGATE_LOW_BASE 9216
+enum {
+ UCNV_NEED_TO_WRITE_BOM=1
+};
+
/* UTF-32BE ----------------------------------------------------------------- */
static void
unsigned char *toUBytes = args->converter->toUBytes;
uint32_t ch, i;
- /* UTF-8 returns here for only non-offset, this needs to change.*/
+ /* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
args->converter->toUnicodeStatus = 0;
uint32_t ch, i;
int32_t offsetNum = 0;
+ /* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit) {
i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
ch = args->converter->toUnicodeStatus - 1;/*Stores the previously calculated ch from a previous call*/
args->converter->toUnicodeStatus = 0;
UErrorCode * err)
{
const UChar *mySource = args->source;
- unsigned char *myTarget = (unsigned char *) args->target;
+ unsigned char *myTarget;
const UChar *sourceLimit = args->sourceLimit;
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
UChar32 ch, ch2;
unsigned int indexToWrite;
unsigned char temp[sizeof(uint32_t)];
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
temp[0] = 0;
if (args->converter->fromUChar32) {
while (mySource < sourceLimit && myTarget < targetLimit) {
ch = *(mySource++);
- if (UTF_IS_SURROGATE(ch)) {
+ if (U_IS_SURROGATE(ch)) {
if (U_IS_LEAD(ch)) {
lowsurogate:
if (mySource < sourceLimit) {
UErrorCode * err)
{
const UChar *mySource = args->source;
- unsigned char *myTarget = (unsigned char *) args->target;
- int32_t *myOffsets = args->offsets;
+ unsigned char *myTarget;
+ int32_t *myOffsets;
const UChar *sourceLimit = args->sourceLimit;
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
UChar32 ch, ch2;
unsigned int indexToWrite;
unsigned char temp[sizeof(uint32_t)];
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ 0, 0, (char)0xfe, (char)0xff };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
+ myOffsets = args->offsets;
temp[0] = 0;
if (args->converter->fromUChar32) {
while (mySource < sourceLimit && myTarget < targetLimit) {
ch = *(mySource++);
- if (UTF_IS_SURROGATE(ch)) {
+ if (U_IS_SURROGATE(ch)) {
if (U_IS_LEAD(ch)) {
lowsurogate:
if (mySource < sourceLimit) {
*err = U_BUFFER_OVERFLOW_ERROR;
}
}
- offsetNum++;
+ offsetNum = offsetNum + 1 + (temp[1] != 0);
}
if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err)) {
NULL,
NULL,
NULL,
- ucnv_getCompleteUnicodeSet
+ ucnv_getNonSurrogateUnicodeSet
};
/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
};
-const UConverterSharedData _UTF32BEData = {
- sizeof(UConverterSharedData), ~((uint32_t) 0),
- NULL, NULL, &_UTF32BEStaticData, FALSE, &_UTF32BEImpl,
- 0
-};
+const UConverterSharedData _UTF32BEData =
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32BEStaticData, &_UTF32BEImpl);
/* UTF-32LE ---------------------------------------------------------- */
unsigned char *toUBytes = args->converter->toUBytes;
uint32_t ch, i;
- /* UTF-8 returns here for only non-offset, this needs to change.*/
+ /* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
/* Stores the previously calculated ch from a previous call*/
ch = args->converter->toUnicodeStatus - 1;
uint32_t ch, i;
int32_t offsetNum = 0;
- /* UTF-8 returns here for only non-offset, this needs to change.*/
+ /* Restore state of current sequence */
if (args->converter->toUnicodeStatus && myTarget < targetLimit)
{
i = args->converter->toULength; /* restore # of bytes consumed */
+ args->converter->toULength = 0;
/* Stores the previously calculated ch from a previous call*/
ch = args->converter->toUnicodeStatus - 1;
UErrorCode * err)
{
const UChar *mySource = args->source;
- unsigned char *myTarget = (unsigned char *) args->target;
+ unsigned char *myTarget;
const UChar *sourceLimit = args->sourceLimit;
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
UChar32 ch, ch2;
unsigned int indexToWrite;
unsigned char temp[sizeof(uint32_t)];
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
temp[3] = 0;
if (args->converter->fromUChar32)
{
ch = *(mySource++);
- if (UTF_IS_SURROGATE(ch)) {
- if (U_IS_LEAD(ch))
+ if (U16_IS_SURROGATE(ch)) {
+ if (U16_IS_LEAD(ch))
{
lowsurogate:
if (mySource < sourceLimit)
{
ch2 = *mySource;
- if (U_IS_TRAIL(ch2)) {
+ if (U16_IS_TRAIL(ch2)) {
ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
mySource++;
}
UErrorCode * err)
{
const UChar *mySource = args->source;
- unsigned char *myTarget = (unsigned char *) args->target;
- int32_t *myOffsets = args->offsets;
+ unsigned char *myTarget;
+ int32_t *myOffsets;
const UChar *sourceLimit = args->sourceLimit;
const unsigned char *targetLimit = (unsigned char *) args->targetLimit;
UChar32 ch, ch2;
unsigned char temp[sizeof(uint32_t)];
int32_t offsetNum = 0;
+ if(mySource >= sourceLimit) {
+ /* no input, nothing to do */
+ return;
+ }
+
+ /* write the BOM if necessary */
+ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+ static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 };
+ ucnv_fromUWriteBytes(args->converter,
+ bom, 4,
+ &args->target, args->targetLimit,
+ &args->offsets, -1,
+ err);
+ args->converter->fromUnicodeStatus=0;
+ }
+
+ myTarget = (unsigned char *) args->target;
+ myOffsets = args->offsets;
temp[3] = 0;
if (args->converter->fromUChar32)
{
ch = *(mySource++);
- if (UTF_IS_SURROGATE(ch)) {
- if (U_IS_LEAD(ch))
+ if (U16_IS_SURROGATE(ch)) {
+ if (U16_IS_LEAD(ch))
{
lowsurogate:
if (mySource < sourceLimit)
{
ch2 = *mySource;
- if (U_IS_TRAIL(ch2))
+ if (U16_IS_TRAIL(ch2))
{
ch = ((ch - SURROGATE_HIGH_START) << HALF_SHIFT) + ch2 + SURROGATE_LOW_BASE;
mySource++;
*err = U_BUFFER_OVERFLOW_ERROR;
}
}
- offsetNum++;
+ offsetNum = offsetNum + 1 + (temp[2] != 0);
}
if (mySource < sourceLimit && myTarget >= targetLimit && U_SUCCESS(*err))
NULL,
NULL,
NULL,
- ucnv_getCompleteUnicodeSet
+ ucnv_getNonSurrogateUnicodeSet
};
/* The 1232 CCSID refers to any version of Unicode with any endianess of UTF-32 */
};
-const UConverterSharedData _UTF32LEData = {
- sizeof(UConverterSharedData), ~((uint32_t) 0),
- NULL, NULL, &_UTF32LEStaticData, FALSE, &_UTF32LEImpl,
- 0
-};
+const UConverterSharedData _UTF32LEData =
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32LEStaticData, &_UTF32LEImpl);
/* UTF-32 (Detect BOM) ------------------------------------------------------ */
}
if(choice!=UCNV_RESET_TO_UNICODE) {
/* reset fromUnicode: prepare to output the UTF-32PE BOM */
- cnv->charErrorBufferLength=4;
-#if U_IS_BIG_ENDIAN
- cnv->charErrorBuffer[0]=0;
- cnv->charErrorBuffer[1]=0;
- cnv->charErrorBuffer[2]=0xfe;
- cnv->charErrorBuffer[3]=0xff;
-#else
- cnv->charErrorBuffer[0]=0xff;
- cnv->charErrorBuffer[1]=0xfe;
- cnv->charErrorBuffer[2]=0;
- cnv->charErrorBuffer[3]=0;
-#endif
+ cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
}
}
static void
_UTF32Open(UConverter *cnv,
- const char *name,
- const char *locale,
- uint32_t options,
+ UConverterLoadArgs *pArgs,
UErrorCode *pErrorCode) {
_UTF32Reset(cnv, UCNV_RESET_BOTH);
}
++source;
if(state==4) {
state=8; /* detect UTF-32BE */
- offsetDelta=source-pArgs->source;
+ offsetDelta=(int32_t)(source-pArgs->source);
} else if(state==8) {
state=9; /* detect UTF-32LE */
- offsetDelta=source-pArgs->source;
+ offsetDelta=(int32_t)(source-pArgs->source);
}
} else {
/* switch to UTF-32BE and pass the previous bytes */
- int32_t count=source-pArgs->source; /* number of bytes from this buffer */
+ int32_t count=(int32_t)(source-pArgs->source); /* number of bytes from this buffer */
/* reset the source */
source=pArgs->source;
NULL,
NULL,
NULL,
- ucnv_getCompleteUnicodeSet
+ ucnv_getNonSurrogateUnicodeSet
};
+/* The 1236 CCSID refers to any version of Unicode with a BOM sensitive endianess of UTF-32 */
static const UConverterStaticData _UTF32StaticData = {
sizeof(UConverterStaticData),
"UTF-32",
- 0, /* ### TODO review correctness of all Unicode CCSIDs */
+ 1236,
UCNV_IBM, UCNV_UTF32, 4, 4,
#if U_IS_BIG_ENDIAN
{ 0, 0, 0xff, 0xfd }, 4,
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
};
-const UConverterSharedData _UTF32Data = {
- sizeof(UConverterSharedData), ~((uint32_t) 0),
- NULL, NULL, &_UTF32StaticData, FALSE, &_UTF32Impl,
- 0
-};
+const UConverterSharedData _UTF32Data =
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF32StaticData, &_UTF32Impl);
#endif