]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/ucnv_u16.c
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / common / ucnv_u16.c
index 86fd19991c3ccb4a075b350d4f9eaba6dbf9068d..6fc9e2a4493d71264bc7b4408b515ee159ce53f2 100644 (file)
@@ -1,6 +1,6 @@
 /*  
 **********************************************************************
-*   Copyright (C) 2002-2004, International Business Machines
+*   Copyright (C) 2002-2006, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *   file name:  ucnv_u16.c
 #include "ucnv_cnv.h"
 #include "cmemory.h"
 
+enum {
+    UCNV_NEED_TO_WRITE_BOM=1
+};
+
 /* UTF-16BE ----------------------------------------------------------------- */
 
 #if U_IS_BIG_ENDIAN
 #   define _UTF16PEFromUnicodeWithOffsets   _UTF16LEFromUnicodeWithOffsets
 #endif
 
+
 static void
 _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                                UErrorCode *pErrorCode) {
     UConverter *cnv;
     const UChar *source;
-    uint8_t *target;
+    char *target;
     int32_t *offsets;
 
-    int32_t targetCapacity, length, count, sourceIndex;
+    uint32_t targetCapacity, length, sourceIndex;
     UChar c, trail;
     char overflow[4];
 
     source=pArgs->source;
-    length=pArgs->sourceLimit-source;
+    length=(int32_t)(pArgs->sourceLimit-source);
     if(length<=0) {
         /* no input, nothing to do */
         return;
     }
 
-    targetCapacity=pArgs->targetLimit-pArgs->target;
-    if(targetCapacity<=0) {
+    cnv=pArgs->converter;
+
+    /* write the BOM if necessary */
+    if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+        static const char bom[]={ (char)0xfe, (char)0xff };
+        ucnv_fromUWriteBytes(cnv,
+                             bom, 2,
+                             &pArgs->target, pArgs->targetLimit,
+                             &pArgs->offsets, -1,
+                             pErrorCode);
+        cnv->fromUnicodeStatus=0;
+    }
+
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
         return;
     }
 
-    cnv=pArgs->converter;
-    target=(uint8_t *)pArgs->target;
+    targetCapacity=(uint32_t)(pArgs->targetLimit-target);
     offsets=pArgs->offsets;
     sourceIndex=0;
 
@@ -83,13 +100,13 @@ _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
         cnv->fromUChar32=c=0;
     }
 
-    /* copy an even number of bytes for complete UChars */
-    count=2*length;
-    if(count>targetCapacity) {
-        count=targetCapacity&~1;
-    }
-    /* count is even */
     if(c==0) {
+        /* copy an even number of bytes for complete UChars */
+        uint32_t count=2*length;
+        if(count>targetCapacity) {
+            count=targetCapacity&~1;
+        }
+        /* count is even */
         targetCapacity-=count;
         count>>=1;
         length-=count;
@@ -210,7 +227,7 @@ _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                              (char **)&target, pArgs->targetLimit,
                              &offsets, sourceIndex,
                              pErrorCode);
-        targetCapacity=pArgs->targetLimit-(char *)target;
+        targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
     }
 
     if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
@@ -231,24 +248,24 @@ _UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
     UChar *target;
     int32_t *offsets;
 
-    int32_t targetCapacity, length, count, sourceIndex;
+    uint32_t targetCapacity, length, count, sourceIndex;
     UChar c, trail;
 
     cnv=pArgs->converter;
     source=(const uint8_t *)pArgs->source;
-    length=(const uint8_t *)pArgs->sourceLimit-source;
+    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
     if(length<=0 && cnv->toUnicodeStatus==0) {
         /* no input, nothing to do */
         return;
     }
 
-    targetCapacity=pArgs->targetLimit-pArgs->target;
-    if(targetCapacity<=0) {
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
         return;
     }
 
-    target=pArgs->target;
+    targetCapacity=(uint32_t)(pArgs->targetLimit-target);
     offsets=pArgs->offsets;
     sourceIndex=0;
     c=0;
@@ -551,7 +568,7 @@ static const UConverterImpl _UTF16BEImpl={
     NULL,
     NULL,
     NULL,
-    ucnv_getCompleteUnicodeSet
+    ucnv_getNonSurrogateUnicodeSet
 };
 
 static const UConverterStaticData _UTF16BEStaticData={
@@ -578,28 +595,40 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                                UErrorCode *pErrorCode) {
     UConverter *cnv;
     const UChar *source;
-    uint8_t *target;
+    char *target;
     int32_t *offsets;
 
-    int32_t targetCapacity, length, count, sourceIndex;
+    uint32_t targetCapacity, length, sourceIndex;
     UChar c, trail;
     char overflow[4];
 
     source=pArgs->source;
-    length=pArgs->sourceLimit-source;
+    length=(int32_t)(pArgs->sourceLimit-source);
     if(length<=0) {
         /* no input, nothing to do */
         return;
     }
 
-    targetCapacity=pArgs->targetLimit-pArgs->target;
-    if(targetCapacity<=0) {
+    cnv=pArgs->converter;
+
+    /* write the BOM if necessary */
+    if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+        static const char bom[]={ (char)0xff, (char)0xfe };
+        ucnv_fromUWriteBytes(cnv,
+                             bom, 2,
+                             &pArgs->target, pArgs->targetLimit,
+                             &pArgs->offsets, -1,
+                             pErrorCode);
+        cnv->fromUnicodeStatus=0;
+    }
+
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
         return;
     }
 
-    cnv=pArgs->converter;
-    target=(uint8_t *)pArgs->target;
+    targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
     offsets=pArgs->offsets;
     sourceIndex=0;
 
@@ -625,13 +654,13 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
         cnv->fromUChar32=c=0;
     }
 
-    /* copy an even number of bytes for complete UChars */
-    count=2*length;
-    if(count>targetCapacity) {
-        count=targetCapacity&~1;
-    }
-    /* count is even */
     if(c==0) {
+        /* copy an even number of bytes for complete UChars */
+        uint32_t count=2*length;
+        if(count>targetCapacity) {
+            count=targetCapacity&~1;
+        }
+        /* count is even */
         targetCapacity-=count;
         count>>=1;
         length-=count;
@@ -749,10 +778,10 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
         /* output length bytes with overflow (length>targetCapacity>0) */
         ucnv_fromUWriteBytes(cnv,
                              overflow, length,
-                             (char **)&target, pArgs->targetLimit,
+                             &target, pArgs->targetLimit,
                              &offsets, sourceIndex,
                              pErrorCode);
-        targetCapacity=pArgs->targetLimit-(char *)target;
+        targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
     }
 
     if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
@@ -761,7 +790,7 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
 
     /* write back the updated pointers */
     pArgs->source=source;
-    pArgs->target=(char *)target;
+    pArgs->target=target;
     pArgs->offsets=offsets;
 }
 
@@ -773,24 +802,24 @@ _UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
     UChar *target;
     int32_t *offsets;
 
-    int32_t targetCapacity, length, count, sourceIndex;
+    uint32_t targetCapacity, length, count, sourceIndex;
     UChar c, trail;
 
     cnv=pArgs->converter;
     source=(const uint8_t *)pArgs->source;
-    length=(const uint8_t *)pArgs->sourceLimit-source;
+    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
     if(length<=0 && cnv->toUnicodeStatus==0) {
         /* no input, nothing to do */
         return;
     }
 
-    targetCapacity=pArgs->targetLimit-pArgs->target;
-    if(targetCapacity<=0) {
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
         *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
         return;
     }
 
-    target=pArgs->target;
+    targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
     offsets=pArgs->offsets;
     sourceIndex=0;
     c=0;
@@ -1093,7 +1122,7 @@ static const UConverterImpl _UTF16LEImpl={
     NULL,
     NULL,
     NULL,
-    ucnv_getCompleteUnicodeSet
+    ucnv_getNonSurrogateUnicodeSet
 };
 
 
@@ -1144,14 +1173,7 @@ _UTF16Reset(UConverter *cnv, UConverterResetChoice choice) {
     }
     if(choice!=UCNV_RESET_TO_UNICODE) {
         /* reset fromUnicode: prepare to output the UTF-16PE BOM */
-        cnv->charErrorBufferLength=2;
-#if U_IS_BIG_ENDIAN
-        cnv->charErrorBuffer[0]=0xfe;
-        cnv->charErrorBuffer[1]=0xff;
-#else
-        cnv->charErrorBuffer[0]=0xff;
-        cnv->charErrorBuffer[1]=0xfe;
-#endif
+        cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
     }
 }
 
@@ -1206,10 +1228,10 @@ _UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                 ++source;
                 if(state==1) {
                     state=8; /* detect UTF-16BE */
-                    offsetDelta=source-pArgs->source;
+                    offsetDelta=(int32_t)(source-pArgs->source);
                 } else if(state==5) {
                     state=9; /* detect UTF-16LE */
-                    offsetDelta=source-pArgs->source;
+                    offsetDelta=(int32_t)(source-pArgs->source);
                 }
             } else {
                 /* switch to UTF-16BE and pass the previous bytes */
@@ -1322,13 +1344,13 @@ static const UConverterImpl _UTF16Impl = {
     NULL,
     NULL,
     NULL,
-    ucnv_getCompleteUnicodeSet
+    ucnv_getNonSurrogateUnicodeSet
 };
 
 static const UConverterStaticData _UTF16StaticData = {
     sizeof(UConverterStaticData),
     "UTF-16",
-    0, /* ### TODO review correctness of all Unicode CCSIDs */
+    1204, /* CCSID for BOM sensitive UTF-16 */
     UCNV_IBM, UCNV_UTF16, 2, 2,
 #if U_IS_BIG_ENDIAN
     { 0xff, 0xfd, 0, 0 }, 2,