ICU-8.11.tar.gz

[apple/icu.git] / icuSources / common / ucnv_u16.c
diff --git a/icuSources/common/ucnv_u16.c b/icuSources/common/ucnv_u16.c

index 86fd19991c3ccb4a075b350d4f9eaba6dbf9068d..6fc9e2a4493d71264bc7b4408b515ee159ce53f2 100644 (file)
--- a/icuSources/common/ucnv_u16.c
+++ b/icuSources/common/ucnv_u16.c
@@ -1,6 +1,6 @@
  /*  
  **********************************************************************
-*   Copyright (C) 2002-2004, International Business Machines
+*   Copyright (C) 2002-2006, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   file name:  ucnv_u16.c
@@ -23,6 +23,10 @@
  #include "ucnv_cnv.h"
  #include "cmemory.h"
  
+enum {
+    UCNV_NEED_TO_WRITE_BOM=1
+};
+
  /* UTF-16BE ----------------------------------------------------------------- */
  
  #if U_IS_BIG_ENDIAN
@@ -31,33 +35,46 @@
  #   define _UTF16PEFromUnicodeWithOffsets   _UTF16LEFromUnicodeWithOffsets
  #endif
  
+
  static void
  _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                                 UErrorCode *pErrorCode) {
      UConverter *cnv;
      const UChar *source;
-    uint8_t *target;
+    char *target;
      int32_t *offsets;
  
-    int32_t targetCapacity, length, count, sourceIndex;
+    uint32_t targetCapacity, length, sourceIndex;
      UChar c, trail;
      char overflow[4];
  
      source=pArgs->source;
-    length=pArgs->sourceLimit-source;
+    length=(int32_t)(pArgs->sourceLimit-source);
      if(length<=0) {
          /* no input, nothing to do */
          return;
      }
  
-    targetCapacity=pArgs->targetLimit-pArgs->target;
-    if(targetCapacity<=0) {
+    cnv=pArgs->converter;
+
+    /* write the BOM if necessary */
+    if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+        static const char bom[]={ (char)0xfe, (char)0xff };
+        ucnv_fromUWriteBytes(cnv,
+                             bom, 2,
+                             &pArgs->target, pArgs->targetLimit,
+                             &pArgs->offsets, -1,
+                             pErrorCode);
+        cnv->fromUnicodeStatus=0;
+    }
+
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
          return;
      }
  
-    cnv=pArgs->converter;
-    target=(uint8_t *)pArgs->target;
+    targetCapacity=(uint32_t)(pArgs->targetLimit-target);
      offsets=pArgs->offsets;
      sourceIndex=0;
  
@@ -83,13 +100,13 @@ _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
          cnv->fromUChar32=c=0;
      }
  
-    /* copy an even number of bytes for complete UChars */
-    count=2*length;
-    if(count>targetCapacity) {
-        count=targetCapacity&~1;
-    }
-    /* count is even */
      if(c==0) {
+        /* copy an even number of bytes for complete UChars */
+        uint32_t count=2*length;
+        if(count>targetCapacity) {
+            count=targetCapacity&~1;
+        }
+        /* count is even */
          targetCapacity-=count;
          count>>=1;
          length-=count;
@@ -210,7 +227,7 @@ _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                               (char **)&target, pArgs->targetLimit,
                               &offsets, sourceIndex,
                               pErrorCode);
-        targetCapacity=pArgs->targetLimit-(char *)target;
+        targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
      }
  
      if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
@@ -231,24 +248,24 @@ _UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
      UChar *target;
      int32_t *offsets;
  
-    int32_t targetCapacity, length, count, sourceIndex;
+    uint32_t targetCapacity, length, count, sourceIndex;
      UChar c, trail;
  
      cnv=pArgs->converter;
      source=(const uint8_t *)pArgs->source;
-    length=(const uint8_t *)pArgs->sourceLimit-source;
+    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
      if(length<=0 && cnv->toUnicodeStatus==0) {
          /* no input, nothing to do */
          return;
      }
  
-    targetCapacity=pArgs->targetLimit-pArgs->target;
-    if(targetCapacity<=0) {
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
          return;
      }
  
-    target=pArgs->target;
+    targetCapacity=(uint32_t)(pArgs->targetLimit-target);
      offsets=pArgs->offsets;
      sourceIndex=0;
      c=0;
@@ -551,7 +568,7 @@ static const UConverterImpl _UTF16BEImpl={
      NULL,
      NULL,
      NULL,
-    ucnv_getCompleteUnicodeSet
+    ucnv_getNonSurrogateUnicodeSet
  };
  
  static const UConverterStaticData _UTF16BEStaticData={
@@ -578,28 +595,40 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
                                 UErrorCode *pErrorCode) {
      UConverter *cnv;
      const UChar *source;
-    uint8_t *target;
+    char *target;
      int32_t *offsets;
  
-    int32_t targetCapacity, length, count, sourceIndex;
+    uint32_t targetCapacity, length, sourceIndex;
      UChar c, trail;
      char overflow[4];
  
      source=pArgs->source;
-    length=pArgs->sourceLimit-source;
+    length=(int32_t)(pArgs->sourceLimit-source);
      if(length<=0) {
          /* no input, nothing to do */
          return;
      }
  
-    targetCapacity=pArgs->targetLimit-pArgs->target;
-    if(targetCapacity<=0) {
+    cnv=pArgs->converter;
+
+    /* write the BOM if necessary */
+    if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+        static const char bom[]={ (char)0xff, (char)0xfe };
+        ucnv_fromUWriteBytes(cnv,
+                             bom, 2,
+                             &pArgs->target, pArgs->targetLimit,
+                             &pArgs->offsets, -1,
+                             pErrorCode);
+        cnv->fromUnicodeStatus=0;
+    }
+
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
          return;
      }
  
-    cnv=pArgs->converter;
-    target=(uint8_t *)pArgs->target;
+    targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
      offsets=pArgs->offsets;
      sourceIndex=0;
  
@@ -625,13 +654,13 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
          cnv->fromUChar32=c=0;
      }
  
-    /* copy an even number of bytes for complete UChars */
-    count=2*length;
-    if(count>targetCapacity) {
-        count=targetCapacity&~1;
-    }
-    /* count is even */
      if(c==0) {
+        /* copy an even number of bytes for complete UChars */
+        uint32_t count=2*length;
+        if(count>targetCapacity) {
+            count=targetCapacity&~1;
+        }
+        /* count is even */
          targetCapacity-=count;
          count>>=1;
          length-=count;
@@ -749,10 +778,10 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
          /* output length bytes with overflow (length>targetCapacity>0) */
          ucnv_fromUWriteBytes(cnv,
                               overflow, length,
-                             (char **)&target, pArgs->targetLimit,
+                             &target, pArgs->targetLimit,
                               &offsets, sourceIndex,
                               pErrorCode);
-        targetCapacity=pArgs->targetLimit-(char *)target;
+        targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
      }
  
      if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
@@ -761,7 +790,7 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
  
      /* write back the updated pointers */
      pArgs->source=source;
-    pArgs->target=(char *)target;
+    pArgs->target=target;
      pArgs->offsets=offsets;
  }
  
@@ -773,24 +802,24 @@ _UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
      UChar *target;
      int32_t *offsets;
  
-    int32_t targetCapacity, length, count, sourceIndex;
+    uint32_t targetCapacity, length, count, sourceIndex;
      UChar c, trail;
  
      cnv=pArgs->converter;
      source=(const uint8_t *)pArgs->source;
-    length=(const uint8_t *)pArgs->sourceLimit-source;
+    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
      if(length<=0 && cnv->toUnicodeStatus==0) {
          /* no input, nothing to do */
          return;
      }
  
-    targetCapacity=pArgs->targetLimit-pArgs->target;
-    if(targetCapacity<=0) {
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
          *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
          return;
      }
  
-    target=pArgs->target;
+    targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
      offsets=pArgs->offsets;
      sourceIndex=0;
      c=0;
@@ -1093,7 +1122,7 @@ static const UConverterImpl _UTF16LEImpl={
      NULL,
      NULL,
      NULL,
-    ucnv_getCompleteUnicodeSet
+    ucnv_getNonSurrogateUnicodeSet
  };
  
  
@@ -1144,14 +1173,7 @@ _UTF16Reset(UConverter *cnv, UConverterResetChoice choice) {
      }
      if(choice!=UCNV_RESET_TO_UNICODE) {
          /* reset fromUnicode: prepare to output the UTF-16PE BOM */
-        cnv->charErrorBufferLength=2;
-#if U_IS_BIG_ENDIAN
-        cnv->charErrorBuffer[0]=0xfe;
-        cnv->charErrorBuffer[1]=0xff;
-#else
-        cnv->charErrorBuffer[0]=0xff;
-        cnv->charErrorBuffer[1]=0xfe;
-#endif
+        cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
      }
  }
  
@@ -1206,10 +1228,10 @@ _UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                  ++source;
                  if(state==1) {
                      state=8; /* detect UTF-16BE */
-                    offsetDelta=source-pArgs->source;
+                    offsetDelta=(int32_t)(source-pArgs->source);
                  } else if(state==5) {
                      state=9; /* detect UTF-16LE */
-                    offsetDelta=source-pArgs->source;
+                    offsetDelta=(int32_t)(source-pArgs->source);
                  }
              } else {
                  /* switch to UTF-16BE and pass the previous bytes */
@@ -1322,13 +1344,13 @@ static const UConverterImpl _UTF16Impl = {
      NULL,
      NULL,
      NULL,
-    ucnv_getCompleteUnicodeSet
+    ucnv_getNonSurrogateUnicodeSet
  };
  
  static const UConverterStaticData _UTF16StaticData = {
      sizeof(UConverterStaticData),
      "UTF-16",
-    0, /* ### TODO review correctness of all Unicode CCSIDs */
+    1204, /* CCSID for BOM sensitive UTF-16 */
      UCNV_IBM, UCNV_UTF16, 2, 2,
  #if U_IS_BIG_ENDIAN
      { 0xff, 0xfd, 0, 0 }, 2,