ICU-6.2.4.tar.gz

[apple/icu.git] / icuSources / common / ucnv_cnv.c
diff --git a/icuSources/common/ucnv_cnv.c b/icuSources/common/ucnv_cnv.c

index ae0d61ed3a9a9b39f817b7cd90fd1e001792f19f..48c2201063b6d8944009d214b2870d395b89a7e4 100644 (file)
--- a/icuSources/common/ucnv_cnv.c
+++ b/icuSources/common/ucnv_cnv.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 1999-2003, International Business Machines
+*   Copyright (C) 1999-2004, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -16,244 +16,159 @@
  */
  
  #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
  #include "unicode/ucnv_err.h"
  #include "unicode/ucnv.h"
  #include "unicode/uset.h"
  #include "ucnv_cnv.h"
+#include "ucnv_bld.h"
  #include "cmemory.h"
  
-/*Empties the internal unicode output buffer */
-void  ucnv_flushInternalUnicodeBuffer (UConverter * _this,
-                                  UChar * myTarget,
-                                  int32_t * myTargetIndex,
-                                  int32_t targetLength,
-                                  int32_t** offsets,
-                                  UErrorCode * err)
-{
-    int32_t myUCharErrorBufferLength = _this->UCharErrorBufferLength;
-    
-    if (myUCharErrorBufferLength <= targetLength)
-    {
-        /*we have enough space
-        *So we just copy the whole Error Buffer in to the output stream
-        */
-        uprv_memcpy (myTarget,
-            _this->UCharErrorBuffer,
-            sizeof (UChar) * myUCharErrorBufferLength);
-        if (offsets) 
-        {
-            int32_t i=0;
-            for (i=0; i<myUCharErrorBufferLength;i++) (*offsets)[i] = -1; 
-            *offsets += myUCharErrorBufferLength;
-        }
-        *myTargetIndex += myUCharErrorBufferLength;
-        _this->UCharErrorBufferLength = 0;
-    }
-    else
-    {
-        /* We don't have enough space so we copy as much as we can
-        * on the output stream and update the object
-        * by updating the internal buffer*/
-        uprv_memcpy (myTarget, _this->UCharErrorBuffer, sizeof (UChar) * targetLength);
-        if (offsets) 
-        {
-            int32_t i=0;
-            for (i=0; i< targetLength;i++) (*offsets)[i] = -1; 
-            *offsets += targetLength;
-        }
-        uprv_memmove (_this->UCharErrorBuffer,
-                    _this->UCharErrorBuffer + targetLength,
-                    sizeof (UChar) * (myUCharErrorBufferLength - targetLength));
-        _this->UCharErrorBufferLength -= (int8_t) targetLength;
-        *myTargetIndex = targetLength;
-        *err = U_BUFFER_OVERFLOW_ERROR;
-    }
+U_CFUNC void
+ucnv_getCompleteUnicodeSet(const UConverter *cnv,
+                   USetAdder *sa,
+                   UConverterUnicodeSet which,
+                   UErrorCode *pErrorCode) {
+    sa->addRange(sa->set, 0, 0x10ffff);
+}
+
+U_CFUNC void
+ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
+                               USetAdder *sa,
+                               UConverterUnicodeSet which,
+                               UErrorCode *pErrorCode) {
+    sa->addRange(sa->set, 0, 0xd7ff);
+    sa->addRange(sa->set, 0xe000, 0x10ffff);
  }
  
-/*Empties the internal codepage output buffer */
-void  ucnv_flushInternalCharBuffer (UConverter * _this,
-                               char *myTarget,
-                               int32_t * myTargetIndex,
-                               int32_t targetLength,
-                               int32_t** offsets,
-                               UErrorCode * err)
-{
-    int32_t myCharErrorBufferLength = _this->charErrorBufferLength;
-    
-    /*we have enough space */
-    if (myCharErrorBufferLength <= targetLength)
-    {
-        uprv_memcpy (myTarget, _this->charErrorBuffer, myCharErrorBufferLength);
-        if (offsets) 
-        {
-            int32_t i=0;
-            for (i=0; i<myCharErrorBufferLength;i++) (*offsets)[i] = -1; 
-            *offsets += myCharErrorBufferLength;
+U_CFUNC void
+ucnv_fromUWriteBytes(UConverter *cnv,
+                     const char *bytes, int32_t length,
+                     char **target, const char *targetLimit,
+                     int32_t **offsets,
+                     int32_t sourceIndex,
+                     UErrorCode *pErrorCode) {
+    char *t=*target;
+    int32_t *o;
+
+    /* write bytes */
+    if(offsets==NULL || (o=*offsets)==NULL) {
+        while(length>0 && t<targetLimit) {
+            *t++=*bytes++;
+            --length;
          }
-        
-        *myTargetIndex += myCharErrorBufferLength;
-        _this->charErrorBufferLength = 0;
+    } else {
+        /* output with offsets */
+        while(length>0 && t<targetLimit) {
+            *t++=*bytes++;
+            *o++=sourceIndex;
+            --length;
+        }
+        *offsets=o;
      }
-    else
-    {
-        /* We don't have enough space so we copy as much as we can
-        * on the output stream and update the object
-        */
-        uprv_memcpy (myTarget, _this->charErrorBuffer, targetLength);
-        if (offsets) 
-        {
-            int32_t i=0;
-            for (i=0; i< targetLength;i++) (*offsets)[i] = -1; 
-            *offsets += targetLength;
+    *target=t;
+
+    /* write overflow */
+    if(length>0) {
+        if(cnv!=NULL) {
+            t=(char *)cnv->charErrorBuffer;
+            cnv->charErrorBufferLength=(int8_t)length;
+            do {
+                *t++=(uint8_t)*bytes++;
+            } while(--length>0);
          }
-        uprv_memmove (_this->charErrorBuffer,
-            _this->charErrorBuffer + targetLength,
-            (myCharErrorBufferLength - targetLength));
-        _this->charErrorBufferLength -= (int8_t) targetLength;
-        *myTargetIndex = targetLength;
-        *err = U_BUFFER_OVERFLOW_ERROR;
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
      }
  }
  
-/**
- * This function is useful for implementations of getNextUChar().
- * After a call to a callback function or to toUnicode(), an output buffer
- * begins with a Unicode code point that needs to be returned as UChar32,
- * and all following code units must be prepended to the - potentially
- * prefilled - overflow buffer in the UConverter.
- * The buffer should be at least of capacity UTF_MAX_CHAR_LENGTH so that a
- * complete UChar32's UChars fit into it.
- *
- * @param cnv    The converter that will get remaining UChars copied to its overflow area.
- * @param buffer An array of UChars that was passed into a callback function
- *               or a toUnicode() function.
- * @param length The number of code units (UChars) that are actually in the buffer.
- *               This must be >0.
- * @return The code point from the first UChars in the buffer.
- */
-U_CFUNC UChar32
-ucnv_getUChar32KeepOverflow(UConverter *cnv, const UChar *buffer, int32_t length) {
-    UChar32 c;
-    int32_t i;
-
-    if(length<=0) {
-        return 0xffff;
+U_CFUNC void
+ucnv_toUWriteUChars(UConverter *cnv,
+                    const UChar *uchars, int32_t length,
+                    UChar **target, const UChar *targetLimit,
+                    int32_t **offsets,
+                    int32_t sourceIndex,
+                    UErrorCode *pErrorCode) {
+    UChar *t=*target;
+    int32_t *o;
+
+    /* write UChars */
+    if(offsets==NULL || (o=*offsets)==NULL) {
+        while(length>0 && t<targetLimit) {
+            *t++=*uchars++;
+            --length;
+        }
+    } else {
+        /* output with offsets */
+        while(length>0 && t<targetLimit) {
+            *t++=*uchars++;
+            *o++=sourceIndex;
+            --length;
+        }
+        *offsets=o;
      }
+    *target=t;
  
-    /* get the first code point in the buffer */
-    i=0;
-    UTF_NEXT_CHAR(buffer, i, length, c);
-    if(i<length) {
-        /* there are UChars left in the buffer that need to go into the overflow buffer */
-        UChar *overflow=cnv->UCharErrorBuffer;
-        int32_t j=cnv->UCharErrorBufferLength;
-
-        if(j>0) {
-            /* move the overflow buffer contents to make room for the extra UChars */
-            int32_t k;
-
-            cnv->UCharErrorBufferLength=(int8_t)(k=(length-i)+j);
+    /* write overflow */
+    if(length>0) {
+        if(cnv!=NULL) {
+            t=cnv->UCharErrorBuffer;
+            cnv->UCharErrorBufferLength=(int8_t)length;
              do {
-                overflow[--k]=overflow[--j];
-            } while(j>0);
-        } else {
-            cnv->UCharErrorBufferLength=(int8_t)(length-i);
+                *t++=*uchars++;
+            } while(--length>0);
          }
-
-        /* copy the remaining UChars to the beginning of the overflow buffer */
-        do {
-            overflow[j++]=buffer[i++];
-        } while(i<length);
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
      }
-    return c;
  }
  
-/* update target offsets after a callback call */
-U_CFUNC int32_t *
-ucnv_updateCallbackOffsets(int32_t *offsets, int32_t length, int32_t sourceIndex) {
-    if(offsets!=NULL) {
-        if(sourceIndex>=0) {
-            /* add the sourceIndex to the relative offsets that the callback wrote */
-            while(length>0) {
-                *offsets+=sourceIndex;
-                ++offsets;
-                --length;
-            }
-        } else {
-            /* sourceIndex==-1, set -1 offsets */
-            while(length>0) {
-                *offsets=-1;
-                ++offsets;
-                --length;
+U_CFUNC void
+ucnv_toUWriteCodePoint(UConverter *cnv,
+                       UChar32 c,
+                       UChar **target, const UChar *targetLimit,
+                       int32_t **offsets,
+                       int32_t sourceIndex,
+                       UErrorCode *pErrorCode) {
+    UChar *t;
+    int32_t *o;
+
+    t=*target;
+
+    if(t<targetLimit) {
+        if(c<=0xffff) {
+            *t++=(UChar)c;
+            c=U_SENTINEL;
+        } else /* c is a supplementary code point */ {
+            *t++=U16_LEAD(c);
+            c=U16_TRAIL(c);
+            if(t<targetLimit) {
+                *t++=(UChar)c;
+                c=U_SENTINEL;
              }
          }
-        return offsets;
-    } else {
-        return NULL;
-    }
-}
-
-/*
- * This is a simple implementation of ucnv_getNextUChar() that uses the
- * converter's toUnicode() function. See ucnv_cnv.h for details.
- */
-U_CFUNC UChar32
-ucnv_getNextUCharFromToUImpl(UConverterToUnicodeArgs *pArgs,
-                             T_ToUnicodeFunction toU,
-                             UBool collectPairs,
-                             UErrorCode *pErrorCode) {
-    UChar buffer[UTF_MAX_CHAR_LENGTH];
-    const char *realLimit=pArgs->sourceLimit;
-
-    pArgs->target=buffer;
-    pArgs->targetLimit=buffer+UTF_MAX_CHAR_LENGTH;
-
-    while(pArgs->source<realLimit) {
-        /* feed in one byte at a time to make sure to get only one character out */
-        pArgs->sourceLimit=pArgs->source+1;
-        pArgs->flush= (UBool)(pArgs->sourceLimit==realLimit);
-
-        /* convert this byte and check the result */
-        toU(pArgs, pErrorCode);
-        if(U_SUCCESS(*pErrorCode)) {
-            int32_t length=(int32_t)(pArgs->target-buffer);
  
-            /* this test is UTF-16 specific */
-            if(/* some output and
-                  (source consumed or don't collect surrogate pairs or not a surrogate or a surrogate pair) */
-               length>0 &&
-               (pArgs->flush || !collectPairs || !UTF_IS_FIRST_SURROGATE(buffer[0]) || length==2)
-            ) {
-                return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, length);
+        /* write offsets */
+        if(offsets!=NULL && (o=*offsets)!=NULL) {
+            *o++=sourceIndex;
+            if((*target+1)<t) {
+                *o++=sourceIndex;
              }
-            /* else continue with the loop */
-        } else if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-            *pErrorCode=U_ZERO_ERROR;
-            return ucnv_getUChar32KeepOverflow(pArgs->converter, buffer, UTF_MAX_CHAR_LENGTH);
-        } else {
-            /* U_FAILURE() */
-            return 0xffff;
+            *offsets=o;
          }
      }
  
-    /* no output because of empty input or only state changes and skipping callbacks */
-    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-    return 0xffff;
-}
+    *target=t;
  
-U_CFUNC void
-ucnv_getCompleteUnicodeSet(const UConverter *cnv,
-                   USet *set,
-                   UConverterUnicodeSet which,
-                   UErrorCode *pErrorCode) {
-    uset_addRange(set, 0, 0x10ffff);
+    /* write overflow from c */
+    if(c>=0) {
+        if(cnv!=NULL) {
+            int8_t i=0;
+            U16_APPEND_UNSAFE(cnv->UCharErrorBuffer, i, c);
+            cnv->UCharErrorBufferLength=i;
+        }
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+    }
  }
  
-U_CFUNC void
-ucnv_getNonSurrogateUnicodeSet(const UConverter *cnv,
-                               USet *set,
-                               UConverterUnicodeSet which,
-                               UErrorCode *pErrorCode) {
-    uset_addRange(set, 0, 0xd7ff);
-    uset_addRange(set, 0xe000, 0x10ffff);
-}
+#endif