icuSources/common/ucnv_cb.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 **********************************************************************
   5 *   Copyright (C) 2000-2006, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 **********************************************************************
   8  *  ucnv_cb.c:
   9  *  External APIs for the ICU's codeset conversion library
  10  *  Helena Shih
  11  *
  12  * Modification History:
  13  *
  14  *   Date        Name        Description
  15  *   7/28/2000   srl         Implementation
  16  */
  17
  18 /**
  19  * @name Character Conversion C API
  20  *
  21  */
  22
  23 #include "unicode/utypes.h"
  24
  25 #if !UCONFIG_NO_CONVERSION
  26
  27 #include "unicode/ucnv_cb.h"
  28 #include "ucnv_bld.h"
  29 #include "ucnv_cnv.h"
  30 #include "cmemory.h"
  31
  32 /* need to update the offsets when the target moves. */
  33 /* Note: Recursion may occur in the cb functions, be sure to update the offsets correctly
  34 if you don't use ucnv_cbXXX functions.  Make sure you don't use the same callback within
  35 the same call stack if the complexity arises. */
  36 U_CAPI void  U_EXPORT2
  37 ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
  38                        const char* source,
  39                        int32_t length,
  40                        int32_t offsetIndex,
  41                        UErrorCode * err)
  42 {
  43     if(U_FAILURE(*err)) {
  44         return;
  45     }
  46
  47     ucnv_fromUWriteBytes(
  48         args->converter,
  49         source, length,
  50         &args->target, args->targetLimit,
  51         &args->offsets, offsetIndex,
  52         err);
  53 }
  54
  55 U_CAPI void  U_EXPORT2
  56 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
  57                              const UChar** source,
  58                              const UChar*  sourceLimit,
  59                              int32_t offsetIndex,
  60                              UErrorCode * err)
  61 {
  62     /*
  63     This is a fun one.  Recursion can occur - we're basically going to
  64     just retry shoving data through the same converter. Note, if you got
  65     here through some kind of invalid sequence, you maybe should emit a
  66     reset sequence of some kind and/or call ucnv_reset().  Since this
  67     IS an actual conversion, take care that you've changed the callback
  68     or the data, or you'll get an infinite loop.
  69
  70     Please set the err value to something reasonable before calling
  71     into this.
  72     */
  73
  74     char *oldTarget;
  75
  76     if(U_FAILURE(*err))
  77     {
  78         return;
  79     }
  80
  81     oldTarget = args->target;
  82
  83     ucnv_fromUnicode(args->converter,
  84         &args->target,
  85         args->targetLimit,
  86         source,
  87         sourceLimit,
  88         NULL, /* no offsets */
  89         FALSE, /* no flush */
  90         err);
  91
  92     if(args->offsets)
  93     {
  94         while (args->target != oldTarget)  /* if it moved at all.. */
  95         {
  96             *(args->offsets)++ = offsetIndex;
  97             oldTarget++;
  98         }
  99     }
 100
 101     /*
 102     Note, if you did something like used a Stop subcallback, things would get interesting.
 103     In fact, here's where we want to return the partially consumed in-source!
 104     */
 105     if(*err == U_BUFFER_OVERFLOW_ERROR)
 106     /* && (*source < sourceLimit && args->target >= args->targetLimit)
 107     -- S. Hrcek */
 108     {
 109         /* Overflowed the target.  Now, we'll write into the charErrorBuffer.
 110         It's a fixed size. If we overflow it... Hmm */
 111         char *newTarget;
 112         const char *newTargetLimit;
 113         UErrorCode err2 = U_ZERO_ERROR;
 114
 115         int8_t errBuffLen;
 116
 117         errBuffLen  = args->converter->charErrorBufferLength;
 118
 119         /* start the new target at the first free slot in the errbuff.. */
 120         newTarget = (char *)(args->converter->charErrorBuffer + errBuffLen);
 121
 122         newTargetLimit = (char *)(args->converter->charErrorBuffer +
 123             sizeof(args->converter->charErrorBuffer));
 124
 125         if(newTarget >= newTargetLimit)
 126         {
 127             *err = U_INTERNAL_PROGRAM_ERROR;
 128             return;
 129         }
 130
 131         /* We're going to tell the converter that the errbuff len is empty.
 132         This prevents the existing errbuff from being 'flushed' out onto
 133         itself.  If the errbuff is needed by the converter this time,
 134         we're hosed - we're out of space! */
 135
 136         args->converter->charErrorBufferLength = 0;
 137
 138         ucnv_fromUnicode(args->converter,
 139                          &newTarget,
 140                          newTargetLimit,
 141                          source,
 142                          sourceLimit,
 143                          NULL,
 144                          FALSE,
 145                          &err2);
 146
 147         /* We can go ahead and overwrite the  length here. We know just how
 148         to recalculate it. */
 149
 150         args->converter->charErrorBufferLength = (int8_t)(
 151             newTarget - (char*)args->converter->charErrorBuffer);
 152
 153         if((newTarget >= newTargetLimit) || (err2 == U_BUFFER_OVERFLOW_ERROR))
 154         {
 155             /* now we're REALLY in trouble.
 156             Internal program error - callback shouldn't have written this much
 157             data!
 158             */
 159             *err = U_INTERNAL_PROGRAM_ERROR;
 160             return;
 161         }
 162         /*else {*/
 163             /* sub errs could be invalid/truncated/illegal chars or w/e.
 164             These might want to be passed on up.. But the problem is, we already
 165             need to pass U_BUFFER_OVERFLOW_ERROR. That has to override these
 166             other errs.. */
 167
 168             /*
 169             if(U_FAILURE(err2))
 170             ??
 171             */
 172         /*}*/
 173     }
 174 }
 175
 176 U_CAPI void  U_EXPORT2
 177 ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
 178                            int32_t offsetIndex,
 179                            UErrorCode * err)
 180 {
 181     UConverter *converter;
 182     int32_t length;
 183
 184     if(U_FAILURE(*err)) {
 185         return;
 186     }
 187     converter = args->converter;
 188     length = converter->subCharLen;
 189
 190     if(length == 0) {
 191         return;
 192     }
 193
 194     if(length < 0) {
 195         /*
 196          * Write/convert the substitution string. Its real length is -length.
 197          * Unlike the escape callback, we need not change the converter's
 198          * callback function because ucnv_setSubstString() verified that
 199          * the string can be converted, so we will not get a conversion error
 200          * and will not recurse.
 201          * At worst we should get a U_BUFFER_OVERFLOW_ERROR.
 202          */
 203         const UChar *source = (const UChar *)converter->subChars;
 204         ucnv_cbFromUWriteUChars(args, &source, source - length, offsetIndex, err);
 205         return;
 206     }
 207
 208     if(converter->sharedData->impl->writeSub!=NULL) {
 209         converter->sharedData->impl->writeSub(args, offsetIndex, err);
 210     }
 211     else if(converter->subChar1!=0 && (uint16_t)converter->invalidUCharBuffer[0]<=(uint16_t)0xffu) {
 212         /*
 213         TODO: Is this untestable because the MBCS converter has a writeSub function to call
 214         and the other converters don't use subChar1?
 215         */
 216         ucnv_cbFromUWriteBytes(args,
 217                                (const char *)&converter->subChar1, 1,
 218                                offsetIndex, err);
 219     }
 220     else {
 221         ucnv_cbFromUWriteBytes(args,
 222                                (const char *)converter->subChars, length,
 223                                offsetIndex, err);
 224     }
 225 }
 226
 227 U_CAPI void  U_EXPORT2
 228 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
 229                             const UChar* source,
 230                             int32_t length,
 231                             int32_t offsetIndex,
 232                             UErrorCode * err)
 233 {
 234     if(U_FAILURE(*err)) {
 235         return;
 236     }
 237
 238     ucnv_toUWriteUChars(
 239         args->converter,
 240         source, length,
 241         &args->target, args->targetLimit,
 242         &args->offsets, offsetIndex,
 243         err);
 244 }
 245
 246 U_CAPI void  U_EXPORT2
 247 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
 248                          int32_t offsetIndex,
 249                        UErrorCode * err)
 250 {
 251     static const UChar kSubstituteChar1 = 0x1A, kSubstituteChar = 0xFFFD;
 252
 253     /* could optimize this case, just one uchar */
 254     if(args->converter->invalidCharLength == 1 && args->converter->subChar1 != 0) {
 255         ucnv_cbToUWriteUChars(args, &kSubstituteChar1, 1, offsetIndex, err);
 256     } else {
 257         ucnv_cbToUWriteUChars(args, &kSubstituteChar, 1, offsetIndex, err);
 258     }
 259 }
 260
 261 #endif