icuSources/common/unicode/ucnv_err.h

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 **********************************************************************
   5 *   Copyright (C) 1999-2009, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 **********************************************************************
   8  *
   9  *
  10  *   ucnv_err.h:
  11  */
  12
  13 /**
  14  * \file
  15  * \brief C UConverter predefined error callbacks
  16  *
  17  *  <h2>Error Behaviour Functions</h2>
  18  *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
  19  *  These are provided as part of ICU and many are stable, but they
  20  *  can also be considered only as an example of what can be done with
  21  *  callbacks.  You may of course write your own.
  22  *
  23  *  If you want to write your own, you may also find the functions from
  24  *  ucnv_cb.h useful when writing your own callbacks.
  25  *
  26  *  These functions, although public, should NEVER be called directly.
  27  *  They should be used as parameters to the ucnv_setFromUCallback
  28  *  and ucnv_setToUCallback functions, to set the behaviour of a converter
  29  *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
  30  *
  31  *  usage example:  'STOP' doesn't need any context, but newContext
  32  *    could be set to something other than 'NULL' if needed. The available
  33  *    contexts in this header can modify the default behavior of the callback.
  34  *
  35  *  \code
  36  *  UErrorCode err = U_ZERO_ERROR;
  37  *  UConverter *myConverter = ucnv_open("ibm-949", &err);
  38  *  const void *oldContext;
  39  *  UConverterFromUCallback oldAction;
  40  *
  41  *
  42  *  if (U_SUCCESS(err))
  43  *  {
  44  *      ucnv_setFromUCallBack(myConverter,
  45  *                       UCNV_FROM_U_CALLBACK_STOP,
  46  *                       NULL,
  47  *                       &oldAction,
  48  *                       &oldContext,
  49  *                       &status);
  50  *  }
  51  *  \endcode
  52  *
  53  *  The code above tells "myConverter" to stop when it encounters an
  54  *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
  55  *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
  56  *  and ucnv_setToUCallBack would need to be called in order to change
  57  *  that behavior too.
  58  *
  59  *  Here is an example with a context:
  60  *
  61  *  \code
  62  *  UErrorCode err = U_ZERO_ERROR;
  63  *  UConverter *myConverter = ucnv_open("ibm-949", &err);
  64  *  const void *oldContext;
  65  *  UConverterFromUCallback oldAction;
  66  *
  67  *
  68  *  if (U_SUCCESS(err))
  69  *  {
  70  *      ucnv_setToUCallBack(myConverter,
  71  *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
  72  *                       UCNV_SUB_STOP_ON_ILLEGAL,
  73  *                       &oldAction,
  74  *                       &oldContext,
  75  *                       &status);
  76  *  }
  77  *  \endcode
  78  *
  79  *  The code above tells "myConverter" to stop when it encounters an
  80  *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
  81  *  Codepage -> Unicode. Any unmapped and legal characters will be
  82  *  substituted to be the default substitution character.
  83  */
  84
  85 #ifndef UCNV_ERR_H
  86 #define UCNV_ERR_H
  87
  88 #include "unicode/utypes.h"
  89
  90 #if !UCONFIG_NO_CONVERSION
  91
  92 /** Forward declaring the UConverter structure. @stable ICU 2.0 */
  93 struct UConverter;
  94
  95 /** @stable ICU 2.0 */
  96 typedef struct UConverter UConverter;
  97
  98 /**
  99  * FROM_U, TO_U context options for sub callback
 100  * @stable ICU 2.0
 101  */
 102 #define UCNV_SUB_STOP_ON_ILLEGAL "i"
 103
 104 /**
 105  * FROM_U, TO_U context options for skip callback
 106  * @stable ICU 2.0
 107  */
 108 #define UCNV_SKIP_STOP_ON_ILLEGAL "i"
 109
 110 /**
 111  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
 112  * @stable ICU 2.0
 113  */
 114 #define UCNV_ESCAPE_ICU       NULL
 115 /**
 116  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
 117  * @stable ICU 2.0
 118  */
 119 #define UCNV_ESCAPE_JAVA      "J"
 120 /**
 121  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
 122  * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
 123  * @stable ICU 2.0
 124  */
 125 #define UCNV_ESCAPE_C         "C"
 126 /**
 127  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
 128  * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
 129  * @stable ICU 2.0
 130  */
 131 #define UCNV_ESCAPE_XML_DEC   "D"
 132 /**
 133  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
 134  * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
 135  * @stable ICU 2.0
 136  */
 137 #define UCNV_ESCAPE_XML_HEX   "X"
 138 /**
 139  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
 140  * @stable ICU 2.0
 141  */
 142 #define UCNV_ESCAPE_UNICODE   "U"
 143
 144 /**
 145  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
 146  * a backslash, 1..6 hex digits, and a space)
 147  * @stable ICU 4.0
 148  */
 149 #define UCNV_ESCAPE_CSS2   "S"
 150
 151 /**
 152  * The process condition code to be used with the callbacks.
 153  * Codes which are greater than UCNV_IRREGULAR should be
 154  * passed on to any chained callbacks.
 155  * @stable ICU 2.0
 156  */
 157 typedef enum {
 158     UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
 159                              The error code U_INVALID_CHAR_FOUND will be set. */
 160     UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example,
 161                              \\x81\\x2E is illegal in SJIS because \\x2E
 162                              is not a valid trail byte for the \\x81
 163                              lead byte.
 164                              Also, starting with Unicode 3.0.1, non-shortest byte sequences
 165                              in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
 166                              are also illegal, not just irregular.
 167                              The error code U_ILLEGAL_CHAR_FOUND will be set. */
 168     UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in
 169                              the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
 170                              are irregular UTF-8 byte sequences for single surrogate
 171                              code points.
 172                              The error code U_INVALID_CHAR_FOUND will be set. */
 173     UCNV_RESET = 3,       /**< The callback is called with this reason when a
 174                              'reset' has occured. Callback should reset all
 175                              state. */
 176     UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
 177                              callback should release any allocated memory.*/
 178     UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
 179                               converter. the pointer available as the
 180                               'context' is an alias to the original converters'
 181                               context pointer. If the context must be owned
 182                               by the new converter, the callback must clone
 183                               the data and call ucnv_setFromUCallback
 184                               (or setToUCallback) with the correct pointer.
 185                               @stable ICU 2.2
 186                            */
 187 } UConverterCallbackReason;
 188
 189
 190 /**
 191  * The structure for the fromUnicode callback function parameter.
 192  * @stable ICU 2.0
 193  */
 194 typedef struct {
 195     uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
 196     UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0    */
 197     UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
 198     const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
 199     const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
 200     char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
 201     const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
 202     int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
 203 } UConverterFromUnicodeArgs;
 204
 205
 206 /**
 207  * The structure for the toUnicode callback function parameter.
 208  * @stable ICU 2.0
 209  */
 210 typedef struct {
 211     uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
 212     UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0   */
 213     UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
 214     const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
 215     const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
 216     UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
 217     const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
 218     int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
 219 } UConverterToUnicodeArgs;
 220
 221
 222 /**
 223  * DO NOT CALL THIS FUNCTION DIRECTLY!
 224  * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
 225  * returning the error code back to the caller immediately.
 226  *
 227  * @param context Pointer to the callback's private data
 228  * @param fromUArgs Information about the conversion in progress
 229  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 230  * @param length Size (in bytes) of the concerned codepage sequence
 231  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 232  * @param reason Defines the reason the callback was invoked
 233  * @param err This should always be set to a failure status prior to calling.
 234  * @stable ICU 2.0
 235  */
 236 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
 237                   const void *context,
 238                   UConverterFromUnicodeArgs *fromUArgs,
 239                   const UChar* codeUnits,
 240                   int32_t length,
 241                   UChar32 codePoint,
 242                   UConverterCallbackReason reason,
 243                   UErrorCode * err);
 244
 245
 246
 247 /**
 248  * DO NOT CALL THIS FUNCTION DIRECTLY!
 249  * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
 250  * returning the error code back to the caller immediately.
 251  *
 252  * @param context Pointer to the callback's private data
 253  * @param toUArgs Information about the conversion in progress
 254  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 255  * @param length Size (in bytes) of the concerned codepage sequence
 256  * @param reason Defines the reason the callback was invoked
 257  * @param err This should always be set to a failure status prior to calling.
 258  * @stable ICU 2.0
 259  */
 260 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
 261                   const void *context,
 262                   UConverterToUnicodeArgs *toUArgs,
 263                   const char* codeUnits,
 264                   int32_t length,
 265                   UConverterCallbackReason reason,
 266                   UErrorCode * err);
 267
 268 /**
 269  * DO NOT CALL THIS FUNCTION DIRECTLY!
 270  * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
 271  * skips only UNASSINGED_SEQUENCE depending on the context parameter
 272  * simply ignoring those characters.
 273  *
 274  * @param context  The function currently recognizes the callback options:
 275  *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 276  *                      returning the error code back to the caller immediately.
 277  *                 NULL: Skips any ILLEGAL_SEQUENCE
 278  * @param fromUArgs Information about the conversion in progress
 279  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 280  * @param length Size (in bytes) of the concerned codepage sequence
 281  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 282  * @param reason Defines the reason the callback was invoked
 283  * @param err Return value will be set to success if the callback was handled,
 284  *      otherwise this value will be set to a failure status.
 285  * @stable ICU 2.0
 286  */
 287 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
 288                   const void *context,
 289                   UConverterFromUnicodeArgs *fromUArgs,
 290                   const UChar* codeUnits,
 291                   int32_t length,
 292                   UChar32 codePoint,
 293                   UConverterCallbackReason reason,
 294                   UErrorCode * err);
 295
 296 /**
 297  * DO NOT CALL THIS FUNCTION DIRECTLY!
 298  * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
 299  * UNASSIGNED_SEQUENCE depending on context parameter, with the
 300  * current substitution string for the converter. This is the default
 301  * callback.
 302  *
 303  * @param context The function currently recognizes the callback options:
 304  *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 305  *                      returning the error code back to the caller immediately.
 306  *                 NULL: Substitutes any ILLEGAL_SEQUENCE
 307  * @param fromUArgs Information about the conversion in progress
 308  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 309  * @param length Size (in bytes) of the concerned codepage sequence
 310  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 311  * @param reason Defines the reason the callback was invoked
 312  * @param err Return value will be set to success if the callback was handled,
 313  *      otherwise this value will be set to a failure status.
 314  * @see ucnv_setSubstChars
 315  * @stable ICU 2.0
 316  */
 317 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
 318                   const void *context,
 319                   UConverterFromUnicodeArgs *fromUArgs,
 320                   const UChar* codeUnits,
 321                   int32_t length,
 322                   UChar32 codePoint,
 323                   UConverterCallbackReason reason,
 324                   UErrorCode * err);
 325
 326 /**
 327  * DO NOT CALL THIS FUNCTION DIRECTLY!
 328  * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
 329  * hexadecimal representation of the illegal codepoints
 330  *
 331  * @param context The function currently recognizes the callback options:
 332  *        <ul>
 333  *        <li>UCNV_ESCAPE_ICU: Substitues the  ILLEGAL SEQUENCE with the hexadecimal
 334  *          representation in the format  %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
 335  *          In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
 336  *          it will  substitute  the illegal sequence with the substitution characters.
 337  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 338  *          %UD84D%UDC56</li>
 339  *        <li>UCNV_ESCAPE_JAVA: Substitues the  ILLEGAL SEQUENCE with the hexadecimal
 340  *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
 341  *          In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
 342  *          it will  substitute  the illegal sequence with the substitution characters.
 343  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 344  *          \\uD84D\\uDC56</li>
 345  *        <li>UCNV_ESCAPE_C: Substitues the  ILLEGAL SEQUENCE with the hexadecimal
 346  *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
 347  *          In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
 348  *          it will  substitute  the illegal sequence with the substitution characters.
 349  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 350  *          \\U00023456</li>
 351  *        <li>UCNV_ESCAPE_XML_DEC: Substitues the  ILLEGAL SEQUENCE with the decimal
 352  *          representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly.
 353  *          In the Event the converter doesn't support the characters {&amp;,#}[0-9],
 354  *          it will  substitute  the illegal sequence with the substitution characters.
 355  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 356  *          &amp;#144470; and Zero padding is ignored.</li>
 357  *        <li>UCNV_ESCAPE_XML_HEX:Substitues the  ILLEGAL SEQUENCE with the decimal
 358  *          representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly.
 359  *          In the Event the converter doesn't support the characters {&,#,x}[0-9],
 360  *          it will  substitute  the illegal sequence with the substitution characters.
 361  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 362  *          \htmlonly&amp;#x23456;\endhtmlonly</li>
 363  *        </ul>
 364  * @param fromUArgs Information about the conversion in progress
 365  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 366  * @param length Size (in bytes) of the concerned codepage sequence
 367  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 368  * @param reason Defines the reason the callback was invoked
 369  * @param err Return value will be set to success if the callback was handled,
 370  *      otherwise this value will be set to a failure status.
 371  * @stable ICU 2.0
 372  */
 373 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
 374                   const void *context,
 375                   UConverterFromUnicodeArgs *fromUArgs,
 376                   const UChar* codeUnits,
 377                   int32_t length,
 378                   UChar32 codePoint,
 379                   UConverterCallbackReason reason,
 380                   UErrorCode * err);
 381
 382
 383 /**
 384  * DO NOT CALL THIS FUNCTION DIRECTLY!
 385  * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
 386  * skips only UNASSINGED_SEQUENCE depending on the context parameter
 387  * simply ignoring those characters.
 388  *
 389  * @param context  The function currently recognizes the callback options:
 390  *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 391  *                      returning the error code back to the caller immediately.
 392  *                 NULL: Skips any ILLEGAL_SEQUENCE
 393  * @param toUArgs Information about the conversion in progress
 394  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 395  * @param length Size (in bytes) of the concerned codepage sequence
 396  * @param reason Defines the reason the callback was invoked
 397  * @param err Return value will be set to success if the callback was handled,
 398  *      otherwise this value will be set to a failure status.
 399  * @stable ICU 2.0
 400  */
 401 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
 402                   const void *context,
 403                   UConverterToUnicodeArgs *toUArgs,
 404                   const char* codeUnits,
 405                   int32_t length,
 406                   UConverterCallbackReason reason,
 407                   UErrorCode * err);
 408
 409 /**
 410  * DO NOT CALL THIS FUNCTION DIRECTLY!
 411  * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
 412  * UNASSIGNED_SEQUENCE depending on context parameter,  with the
 413  * Unicode substitution character, U+FFFD.
 414  *
 415  * @param context  The function currently recognizes the callback options:
 416  *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 417  *                      returning the error code back to the caller immediately.
 418  *                 NULL: Substitutes any ILLEGAL_SEQUENCE
 419  * @param toUArgs Information about the conversion in progress
 420  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 421  * @param length Size (in bytes) of the concerned codepage sequence
 422  * @param reason Defines the reason the callback was invoked
 423  * @param err Return value will be set to success if the callback was handled,
 424  *      otherwise this value will be set to a failure status.
 425  * @stable ICU 2.0
 426  */
 427 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
 428                   const void *context,
 429                   UConverterToUnicodeArgs *toUArgs,
 430                   const char* codeUnits,
 431                   int32_t length,
 432                   UConverterCallbackReason reason,
 433                   UErrorCode * err);
 434
 435 /**
 436  * DO NOT CALL THIS FUNCTION DIRECTLY!
 437  * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
 438  * hexadecimal representation of the illegal bytes
 439  *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
 440  *
 441  * @param context This function currently recognizes the callback options:
 442  *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
 443  *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
 444  * @param toUArgs Information about the conversion in progress
 445  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 446  * @param length Size (in bytes) of the concerned codepage sequence
 447  * @param reason Defines the reason the callback was invoked
 448  * @param err Return value will be set to success if the callback was handled,
 449  *      otherwise this value will be set to a failure status.
 450  * @stable ICU 2.0
 451  */
 452
 453 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
 454                   const void *context,
 455                   UConverterToUnicodeArgs *toUArgs,
 456                   const char* codeUnits,
 457                   int32_t length,
 458                   UConverterCallbackReason reason,
 459                   UErrorCode * err);
 460
 461 #endif
 462
 463 #endif
 464
 465 /*UCNV_ERR_H*/