icuSources/common/ucnv_cb.c

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 2000-2006, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6  *  ucnv_cb.c:
   7  *  External APIs for the ICU's codeset conversion library
   8  *  Helena Shih
   9  *
  10  * Modification History:
  11  *
  12  *   Date        Name        Description
  13  *   7/28/2000   srl         Implementation
  14  */
  15
  16 /**
  17  * @name Character Conversion C API
  18  *
  19  */
  20
  21 #include "unicode/utypes.h"
  22
  23 #if !UCONFIG_NO_CONVERSION
  24
  25 #include "unicode/ucnv_cb.h"
  26 #include "ucnv_bld.h"
  27 #include "ucnv_cnv.h"
  28 #include "cmemory.h"
  29
  30 /* need to update the offsets when the target moves. */
  31 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
  32 if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
  33 the same call stack if the complexity arises. */
  34 U_CAPI void  U_EXPORT2
  35 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
  36                        const char* source,
  37                        int32_t length,
  38                        int32_t offsetIndex,
  39                        UErrorCode * err)
  40 {
  41     if(U_FAILURE(*err)) {
  42         return;
  43     }
  44
  45     ucnv_fromUWriteBytes(
  46         args->converter,
  47         source, length,
  48         &args->target, args->targetLimit,
  49         &args->offsets, offsetIndex,
  50         err);
  51 }
  52
  53 U_CAPI void  U_EXPORT2
  54 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
  55                              const UChar** source,
  56                              const UChar*  sourceLimit,
  57                              int32_t offsetIndex,
  58                              UErrorCode * err)
  59 {
  60     /*
  61     This is a fun one.  Recursion can occur - we're basically going to
  62     just retry shoving data through the same converter. Note, if you got
  63     here through some kind of invalid sequence, you maybe should emit a
  64     reset sequence of some kind and/or call ucnv_reset().  Since this
  65     IS an actual conversion, take care that you've changed the callback
  66     or the data, or you'll get an infinite loop.
  67
  68     Please set the err value to something reasonable before calling
  69     into this.
  70     */
  71
  72     char *oldTarget;
  73
  74     if(U_FAILURE(*err))
  75     {
  76         return;
  77     }
  78
  79     oldTarget = args->target;
  80
  81     ucnv_fromUnicode(args->converter,
  82         &args->target,
  83         args->targetLimit,
  84         source,
  85         sourceLimit,
  86         NULL, /* no offsets */
  87         FALSE, /* no flush */
  88         err);
  89
  90     if(args->offsets)
  91     {
  92         while (args->target != oldTarget)  /* if it moved at all.. */
  93         {
  94             *(args->offsets)++ = offsetIndex;
  95             oldTarget++;
  96         }
  97     }
  98
  99     /*
 100     Note, if you did something like used a Stop subcallback, things would get interesting.
 101     In fact, here's where we want to return the partially consumed in-source!
 102     */
 103     if(*err == U_BUFFER_OVERFLOW_ERROR)
 104     /* && (*source < sourceLimit && args->target >= args->targetLimit)
 105     -- S. Hrcek */
 106     {
 107         /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
 108         It's a fixed size. If we overflow it... Hmm */
 109         char *newTarget;
 110         const char *newTargetLimit;
 111         UErrorCode err2 = U_ZERO_ERROR;
 112
 113         int8_t errBuffLen;
 114
 115         errBuffLen  = args->converter->charErrorBufferLength;
 116
 117         /* start the new target at the first free slot in the errbuff.. */
 118         newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
 119
 120         newTargetLimit = (char *)(args->converter->charErrorBuffer +
 121             sizeof(args->converter->charErrorBuffer));
 122
 123         if(newTarget >= newTargetLimit)
 124         {
 125             *err = U_INTERNAL_PROGRAM_ERROR;
 126             return;
 127         }
 128
 129         /* We're going to tell the converter that the errbuff len is empty.
 130         This prevents the existing errbuff from being 'flushed' out onto
 131         itself.  If the errbuff is needed by the converter this time,
 132         we're hosed - we're out of space! */
 133
 134         args->converter->charErrorBufferLength = 0;
 135
 136         ucnv_fromUnicode(args->converter,
 137                          &newTarget,
 138                          newTargetLimit,
 139                          source,
 140                          sourceLimit,
 141                          NULL,
 142                          FALSE,
 143                          &err2);
 144
 145         /* We can go ahead and overwrite the  length here. We know just how
 146         to recalculate it. */
 147
 148         args->converter->charErrorBufferLength = (int8_t)(
 149             newTarget - (char*)args->converter->charErrorBuffer);
 150
 151         if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
 152         {
 153             /* now we're REALLY in trouble.
 154             Internal program error - callback shouldn't have written this much
 155             data!
 156             */
 157             *err = U_INTERNAL_PROGRAM_ERROR;
 158             return;
 159         }
 160         /*else {*/
 161             /* sub errs could be invalid/truncated/illegal chars or w/e.
 162             These might want to be passed on up.. But the problem is, we already
 163             need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
 164             other errs.. */
 165
 166             /*
 167             if(U_FAILURE(err2))
 168             ??
 169             */
 170         /*}*/
 171     }
 172 }
 173
 174 U_CAPI void  U_EXPORT2
 175 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
 176                            int32_t offsetIndex,
 177                            UErrorCode * err)
 178 {
 179     UConverter *converter;
 180     int32_t length;
 181
 182     if(U_FAILURE(*err)) {
 183         return;
 184     }
 185     converter = args->converter;
 186     length = converter->subCharLen;
 187
 188     if(length == 0) {
 189         return;
 190     }
 191
 192     if(length < 0) {
 193         /*
 194          * Write/convert the substitution string. Its real length is -length.
 195          * Unlike the escape callback, we need not change the converter's
 196          * callback function because ucnv_setSubstString() verified that
 197          * the string can be converted, so we will not get a conversion error
 198          * and will not recurse.
 199          * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
 200          */
 201         const UChar *source = (const UChar *)converter->subChars;
 202         ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
 203         return;
 204     }
 205
 206     if(converter->sharedData->impl->writeSub!=NULL) {
 207         converter->sharedData->impl->writeSub(args, offsetIndex, err);
 208     }
 209     else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
 210         /*
 211         TODO: Is this untestable because the MBCS converter has a writeSub function to call
 212         and the other converters don't use subChar1?
 213         */
 214         ucnv_cbFromUWriteBytes(args,
 215                                (const char *)&converter->subChar1, 1,
 216                                offsetIndex, err);
 217     }
 218     else {
 219         ucnv_cbFromUWriteBytes(args,
 220                                (const char *)converter->subChars, length,
 221                                offsetIndex, err);
 222     }
 223 }
 224
 225 U_CAPI void  U_EXPORT2
 226 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
 227                             const UChar* source,
 228                             int32_t length,
 229                             int32_t offsetIndex,
 230                             UErrorCode * err)
 231 {
 232     if(U_FAILURE(*err)) {
 233         return;
 234     }
 235
 236     ucnv_toUWriteUChars(
 237         args->converter,
 238         source, length,
 239         &args->target, args->targetLimit,
 240         &args->offsets, offsetIndex,
 241         err);
 242 }
 243
 244 U_CAPI void  U_EXPORT2
 245 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
 246                          int32_t offsetIndex,
 247                        UErrorCode * err)
 248 {
 249     static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
 250
 251     /* could optimize this case, just one uchar */
 252     if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
 253         ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
 254     } else {
 255         ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
 256     }
 257 }
 258
 259 #endif