icuSources/common/unicode/ucnv_err.h

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1999-2005, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6  *
   7  *
   8  *   ucnv_err.h:
   9  */
  10
  11 /**
  12  * \file
  13  * \brief C UConverter predefined error callbacks
  14  *
  15  *  <h2>Error Behaviour Functions</h2>
  16  *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
  17  *  These are provided as part of ICU and many are stable, but they
  18  *  can also be considered only as an example of what can be done with
  19  *  callbacks.  You may of course write your own.
  20  *
  21  *  If you want to write your own, you may also find the functions from
  22  *  ucnv_cb.h useful when writing your own callbacks.
  23  *
  24  *  These functions, although public, should NEVER be called directly.
  25  *  They should be used as parameters to the ucnv_setFromUCallback
  26  *  and ucnv_setToUCallback functions, to set the behaviour of a converter
  27  *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
  28  *
  29  *  usage example:  'STOP' doesn't need any context, but newContext
  30  *    could be set to something other than 'NULL' if needed. The available
  31  *    contexts in this header can modify the default behavior of the callback.
  32  *
  33  *  \code
  34  *  UErrorCode err = U_ZERO_ERROR;
  35  *  UConverter *myConverter = ucnv_open("ibm-949", &err);
  36  *  const void *oldContext;
  37  *  UConverterFromUCallback oldAction;
  38  *
  39  *
  40  *  if (U_SUCCESS(err))
  41  *  {
  42  *      ucnv_setFromUCallBack(myConverter,
  43  *                       UCNV_FROM_U_CALLBACK_STOP,
  44  *                       NULL,
  45  *                       &oldAction,
  46  *                       &oldContext,
  47  *                       &status);
  48  *  }
  49  *  \endcode
  50  *
  51  *  The code above tells "myConverter" to stop when it encounters an
  52  *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
  53  *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
  54  *  and ucnv_setToUCallBack would need to be called in order to change
  55  *  that behavior too.
  56  *
  57  *  Here is an example with a context:
  58  *
  59  *  \code
  60  *  UErrorCode err = U_ZERO_ERROR;
  61  *  UConverter *myConverter = ucnv_open("ibm-949", &err);
  62  *  const void *oldContext;
  63  *  UConverterFromUCallback oldAction;
  64  *
  65  *
  66  *  if (U_SUCCESS(err))
  67  *  {
  68  *      ucnv_setToUCallBack(myConverter,
  69  *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
  70  *                       UCNV_SUB_STOP_ON_ILLEGAL,
  71  *                       &oldAction,
  72  *                       &oldContext,
  73  *                       &status);
  74  *  }
  75  *  \endcode
  76  *
  77  *  The code above tells "myConverter" to stop when it encounters an
  78  *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
  79  *  Codepage -> Unicode. Any unmapped and legal characters will be
  80  *  substituted to be the default substitution character.
  81  */
  82
  83 #ifndef UCNV_ERR_H
  84 #define UCNV_ERR_H
  85
  86 #include "unicode/utypes.h"
  87
  88 #if !UCONFIG_NO_CONVERSION
  89
  90 /** Forward declaring the UConverter structure. @stable ICU 2.0 */
  91 struct UConverter;
  92
  93 /** @stable ICU 2.0 */
  94 typedef struct UConverter UConverter;
  95
  96 /**
  97  * FROM_U, TO_U context options for sub callback
  98  * @stable ICU 2.0
  99  */
 100 #define UCNV_SUB_STOP_ON_ILLEGAL "i"
 101
 102 /**
 103  * FROM_U, TO_U context options for skip callback
 104  * @stable ICU 2.0
 105  */
 106 #define UCNV_SKIP_STOP_ON_ILLEGAL "i"
 107
 108 /**
 109  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
 110  * @stable ICU 2.0
 111  */
 112 #define UCNV_ESCAPE_ICU       NULL
 113 /**
 114  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
 115  * @stable ICU 2.0
 116  */
 117 #define UCNV_ESCAPE_JAVA      "J"
 118 /**
 119  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
 120  * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\\xXXXX)
 121  * @stable ICU 2.0
 122  */
 123 #define UCNV_ESCAPE_C         "C"
 124 /**
 125  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
 126  * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
 127  * @stable ICU 2.0
 128  */
 129 #define UCNV_ESCAPE_XML_DEC   "D"
 130 /**
 131  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
 132  * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
 133  * @stable ICU 2.0
 134  */
 135 #define UCNV_ESCAPE_XML_HEX   "X"
 136 /**
 137  * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
 138  * @stable ICU 2.0
 139  */
 140 #define UCNV_ESCAPE_UNICODE   "U"
 141
 142 /**
 143  * The process condition code to be used with the callbacks.
 144  * Codes which are greater than UCNV_IRREGULAR should be
 145  * passed on to any chained callbacks.
 146  * @stable ICU 2.0
 147  */
 148 typedef enum {
 149     UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
 150                              The error code U_INVALID_CHAR_FOUND will be set. */
 151     UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example,
 152                              \\x81\\x2E is illegal in SJIS because \\x2E
 153                              is not a valid trail byte for the \\x81
 154                              lead byte.
 155                              Also, starting with Unicode 3.0.1, non-shortest byte sequences
 156                              in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
 157                              are also illegal, not just irregular.
 158                              The error code U_ILLEGAL_CHAR_FOUND will be set. */
 159     UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in
 160                              the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
 161                              are irregular UTF-8 byte sequences for single surrogate
 162                              code points.
 163                              The error code U_INVALID_CHAR_FOUND will be set. */
 164     UCNV_RESET = 3,       /**< The callback is called with this reason when a
 165                              'reset' has occured. Callback should reset all
 166                              state. */
 167     UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
 168                              callback should release any allocated memory.*/
 169     UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
 170                               converter. the pointer available as the
 171                               'context' is an alias to the original converters'
 172                               context pointer. If the context must be owned
 173                               by the new converter, the callback must clone
 174                               the data and call ucnv_setFromUCallback
 175                               (or setToUCallback) with the correct pointer.
 176                               @stable ICU 2.2
 177                            */
 178 } UConverterCallbackReason;
 179
 180
 181 /**
 182  * The structure for the fromUnicode callback function parameter.
 183  * @stable ICU 2.0
 184  */
 185 typedef struct {
 186     uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
 187     UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0    */
 188     UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
 189     const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
 190     const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
 191     char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
 192     const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
 193     int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
 194 } UConverterFromUnicodeArgs;
 195
 196
 197 /**
 198  * The structure for the toUnicode callback function parameter.
 199  * @stable ICU 2.0
 200  */
 201 typedef struct {
 202     uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
 203     UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0   */
 204     UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
 205     const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
 206     const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
 207     UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
 208     const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
 209     int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
 210 } UConverterToUnicodeArgs;
 211
 212
 213 /**
 214  * DO NOT CALL THIS FUNCTION DIRECTLY!
 215  * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
 216  * returning the error code back to the caller immediately.
 217  *
 218  * @param context Pointer to the callback's private data
 219  * @param fromUArgs Information about the conversion in progress
 220  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 221  * @param length Size (in bytes) of the concerned codepage sequence
 222  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 223  * @param reason Defines the reason the callback was invoked
 224  * @param err This should always be set to a failure status prior to calling.
 225  * @stable ICU 2.0
 226  */
 227 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
 228                   const void *context,
 229                   UConverterFromUnicodeArgs *fromUArgs,
 230                   const UChar* codeUnits,
 231                   int32_t length,
 232                   UChar32 codePoint,
 233                   UConverterCallbackReason reason,
 234                   UErrorCode * err);
 235
 236
 237
 238 /**
 239  * DO NOT CALL THIS FUNCTION DIRECTLY!
 240  * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
 241  * returning the error code back to the caller immediately.
 242  *
 243  * @param context Pointer to the callback's private data
 244  * @param toUArgs Information about the conversion in progress
 245  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 246  * @param length Size (in bytes) of the concerned codepage sequence
 247  * @param reason Defines the reason the callback was invoked
 248  * @param err This should always be set to a failure status prior to calling.
 249  * @stable ICU 2.0
 250  */
 251 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
 252                   const void *context,
 253                   UConverterToUnicodeArgs *toUArgs,
 254                   const char* codeUnits,
 255                   int32_t length,
 256                   UConverterCallbackReason reason,
 257                   UErrorCode * err);
 258
 259 /**
 260  * DO NOT CALL THIS FUNCTION DIRECTLY!
 261  * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
 262  * skips only UNASSINGED_SEQUENCE depending on the context parameter
 263  * simply ignoring those characters.
 264  *
 265  * @param context  The function currently recognizes the callback options:
 266  *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 267  *                      returning the error code back to the caller immediately.
 268  *                 NULL: Skips any ILLEGAL_SEQUENCE
 269  * @param fromUArgs Information about the conversion in progress
 270  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 271  * @param length Size (in bytes) of the concerned codepage sequence
 272  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 273  * @param reason Defines the reason the callback was invoked
 274  * @param err Return value will be set to success if the callback was handled,
 275  *      otherwise this value will be set to a failure status.
 276  * @stable ICU 2.0
 277  */
 278 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
 279                   const void *context,
 280                   UConverterFromUnicodeArgs *fromUArgs,
 281                   const UChar* codeUnits,
 282                   int32_t length,
 283                   UChar32 codePoint,
 284                   UConverterCallbackReason reason,
 285                   UErrorCode * err);
 286
 287 /**
 288  * DO NOT CALL THIS FUNCTION DIRECTLY!
 289  * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
 290  * UNASSIGNED_SEQUENCE depending on context parameter, with the
 291  * current substitution string for the converter. This is the default
 292  * callback.
 293  *
 294  * @param context The function currently recognizes the callback options:
 295  *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 296  *                      returning the error code back to the caller immediately.
 297  *                 NULL: Substitutes any ILLEGAL_SEQUENCE
 298  * @param fromUArgs Information about the conversion in progress
 299  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 300  * @param length Size (in bytes) of the concerned codepage sequence
 301  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 302  * @param reason Defines the reason the callback was invoked
 303  * @param err Return value will be set to success if the callback was handled,
 304  *      otherwise this value will be set to a failure status.
 305  * @see ucnv_setSubstChars
 306  * @stable ICU 2.0
 307  */
 308 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
 309                   const void *context,
 310                   UConverterFromUnicodeArgs *fromUArgs,
 311                   const UChar* codeUnits,
 312                   int32_t length,
 313                   UChar32 codePoint,
 314                   UConverterCallbackReason reason,
 315                   UErrorCode * err);
 316
 317 /**
 318  * DO NOT CALL THIS FUNCTION DIRECTLY!
 319  * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
 320  * hexadecimal representation of the illegal codepoints
 321  *
 322  * @param context The function currently recognizes the callback options:
 323  *        <ul>
 324  *        <li>UCNV_ESCAPE_ICU: Substitues the  ILLEGAL SEQUENCE with the hexadecimal
 325  *          representation in the format  %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
 326  *          In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
 327  *          it will  substitute  the illegal sequence with the substitution characters.
 328  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 329  *          %UD84D%UDC56</li>
 330  *        <li>UCNV_ESCAPE_JAVA: Substitues the  ILLEGAL SEQUENCE with the hexadecimal
 331  *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
 332  *          In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
 333  *          it will  substitute  the illegal sequence with the substitution characters.
 334  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 335  *          \\uD84D\\uDC56</li>
 336  *        <li>UCNV_ESCAPE_C: Substitues the  ILLEGAL SEQUENCE with the hexadecimal
 337  *          representation in the format  \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
 338  *          In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
 339  *          it will  substitute  the illegal sequence with the substitution characters.
 340  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 341  *          \\U00023456</li>
 342  *        <li>UCNV_ESCAPE_XML_DEC: Substitues the  ILLEGAL SEQUENCE with the decimal
 343  *          representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly.
 344  *          In the Event the converter doesn't support the characters {&amp;,#}[0-9],
 345  *          it will  substitute  the illegal sequence with the substitution characters.
 346  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 347  *          &amp;#144470; and Zero padding is ignored.</li>
 348  *        <li>UCNV_ESCAPE_XML_HEX:Substitues the  ILLEGAL SEQUENCE with the decimal
 349  *          representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly.
 350  *          In the Event the converter doesn't support the characters {&,#,x}[0-9],
 351  *          it will  substitute  the illegal sequence with the substitution characters.
 352  *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 353  *          \htmlonly&amp;#x23456;\endhtmlonly</li>
 354  *        </ul>
 355  * @param fromUArgs Information about the conversion in progress
 356  * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 357  * @param length Size (in bytes) of the concerned codepage sequence
 358  * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 359  * @param reason Defines the reason the callback was invoked
 360  * @param err Return value will be set to success if the callback was handled,
 361  *      otherwise this value will be set to a failure status.
 362  * @stable ICU 2.0
 363  */
 364 U_STABLE void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
 365                   const void *context,
 366                   UConverterFromUnicodeArgs *fromUArgs,
 367                   const UChar* codeUnits,
 368                   int32_t length,
 369                   UChar32 codePoint,
 370                   UConverterCallbackReason reason,
 371                   UErrorCode * err);
 372
 373
 374 /**
 375  * DO NOT CALL THIS FUNCTION DIRECTLY!
 376  * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
 377  * skips only UNASSINGED_SEQUENCE depending on the context parameter
 378  * simply ignoring those characters.
 379  *
 380  * @param context  The function currently recognizes the callback options:
 381  *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 382  *                      returning the error code back to the caller immediately.
 383  *                 NULL: Skips any ILLEGAL_SEQUENCE
 384  * @param toUArgs Information about the conversion in progress
 385  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 386  * @param length Size (in bytes) of the concerned codepage sequence
 387  * @param reason Defines the reason the callback was invoked
 388  * @param err Return value will be set to success if the callback was handled,
 389  *      otherwise this value will be set to a failure status.
 390  * @stable ICU 2.0
 391  */
 392 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
 393                   const void *context,
 394                   UConverterToUnicodeArgs *toUArgs,
 395                   const char* codeUnits,
 396                   int32_t length,
 397                   UConverterCallbackReason reason,
 398                   UErrorCode * err);
 399
 400 /**
 401  * DO NOT CALL THIS FUNCTION DIRECTLY!
 402  * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
 403  * UNASSIGNED_SEQUENCE depending on context parameter,  with the
 404  * Unicode substitution character, U+FFFD.
 405  *
 406  * @param context  The function currently recognizes the callback options:
 407  *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 408  *                      returning the error code back to the caller immediately.
 409  *                 NULL: Substitutes any ILLEGAL_SEQUENCE
 410  * @param toUArgs Information about the conversion in progress
 411  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 412  * @param length Size (in bytes) of the concerned codepage sequence
 413  * @param reason Defines the reason the callback was invoked
 414  * @param err Return value will be set to success if the callback was handled,
 415  *      otherwise this value will be set to a failure status.
 416  * @stable ICU 2.0
 417  */
 418 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
 419                   const void *context,
 420                   UConverterToUnicodeArgs *toUArgs,
 421                   const char* codeUnits,
 422                   int32_t length,
 423                   UConverterCallbackReason reason,
 424                   UErrorCode * err);
 425
 426 /**
 427  * DO NOT CALL THIS FUNCTION DIRECTLY!
 428  * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
 429  * hexadecimal representation of the illegal bytes
 430  *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
 431  *
 432  * @param context This function currently recognizes the callback options:
 433  *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
 434  *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
 435  * @param toUArgs Information about the conversion in progress
 436  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 437  * @param length Size (in bytes) of the concerned codepage sequence
 438  * @param reason Defines the reason the callback was invoked
 439  * @param err Return value will be set to success if the callback was handled,
 440  *      otherwise this value will be set to a failure status.
 441  * @stable ICU 2.0
 442  */
 443
 444 U_STABLE void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
 445                   const void *context,
 446                   UConverterToUnicodeArgs *toUArgs,
 447                   const char* codeUnits,
 448                   int32_t length,
 449                   UConverterCallbackReason reason,
 450                   UErrorCode * err);
 451
 452 #endif
 453
 454 #endif
 455
 456 /*UCNV_ERR_H*/