[apple/icu.git] / icuSources / common / unicode / ucnv_err.h

/*
**********************************************************************
*   Copyright (C) 1999-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
 *
 *
 *   ucnv_err.h:
 */

/**
 * \file
 * \brief C UConverter predefined error callbacks
 *
 *  <h2>Error Behaviour Functions</h2>
 *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
 *  These are provided as part of ICU and many are stable, but they
 *  can also be considered only as an example of what can be done with
 *  callbacks.  You may of course write your own.
 *
 *  If you want to write your own, you may also find the functions from
 *  ucnv_cb.h useful when writing your own callbacks.
 *
 *  These functions, although public, should NEVER be called directly.
 *  They should be used as parameters to the ucnv_setFromUCallback
 *  and ucnv_setToUCallback functions, to set the behaviour of a converter
 *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
 *
 *  usage example:  'STOP' doesn't need any context, but newContext
 *    could be set to something other than 'NULL' if needed. The available
 *    contexts in this header can modify the default behavior of the callback.
 *
 *  \code
 *  UErrorCode err = U_ZERO_ERROR;
 *  UConverter *myConverter = ucnv_open("ibm-949", &err);
 *  const void *oldContext;
 *  UConverterFromUCallback oldAction;
 *
 *
 *  if (U_SUCCESS(err))
 *  {
 *      ucnv_setFromUCallBack(myConverter,
 *                       UCNV_FROM_U_CALLBACK_STOP,
 *                       NULL,
 *                       &oldAction,
 *                       &oldContext,
 *                       &status);
 *  }
 *  \endcode
 *
 *  The code above tells "myConverter" to stop when it encounters an
 *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
 *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
 *  and ucnv_setToUCallBack would need to be called in order to change
 *  that behavior too.
 *
 *  Here is an example with a context:
 *
 *  \code
 *  UErrorCode err = U_ZERO_ERROR;
 *  UConverter *myConverter = ucnv_open("ibm-949", &err);
 *  const void *oldContext;
 *  UConverterFromUCallback oldAction;
 *
 *
 *  if (U_SUCCESS(err))
 *  {
 *      ucnv_setToUCallBack(myConverter,
 *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
 *                       UCNV_SUB_STOP_ON_ILLEGAL,
 *                       &oldAction,
 *                       &oldContext,
 *                       &status);
 *  }
 *  \endcode
 *
 *  The code above tells "myConverter" to stop when it encounters an
 *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
 *  Codepage -> Unicode. Any unmapped and legal characters will be
 *  substituted to be the default substitution character.
 */

/* This file isn't designed to be included all by itself. */
#ifndef UCNV_H
# include "unicode/ucnv.h"
 /* and the rest of this file will be ignored. */
#endif

#ifndef UCNV_ERR_H
#define UCNV_ERR_H

#include "unicode/utypes.h"


/**
 * FROM_U, TO_U context options for sub callback
 * @stable ICU 2.0
 */
#define UCNV_SUB_STOP_ON_ILLEGAL "i"

/**
 * FROM_U, TO_U context options for skip callback
 * @stable ICU 2.0
 */
#define UCNV_SKIP_STOP_ON_ILLEGAL "i"

/**
 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX) 
 * @stable ICU 2.0
 */
#define UCNV_ESCAPE_ICU       NULL
/**
 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\uXXXX)
 * @stable ICU 2.0
 */
#define UCNV_ESCAPE_JAVA      "J"
/**
 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\uXXXX \UXXXXXXXX)
 * TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\xXXXX)
 * @stable ICU 2.0
 */
#define UCNV_ESCAPE_C         "C"
/**
 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape (&amp;#DDDD;)
 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape (&amp;#DDDD;)
 * @stable ICU 2.0
 */
#define UCNV_ESCAPE_XML_DEC   "D"
/**
 * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape (&amp;#xXXXX;)
 * TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape (&amp;#xXXXX;)
 * @stable ICU 2.0
 */
#define UCNV_ESCAPE_XML_HEX   "X"
/**
 * FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
 * @stable ICU 2.0
 */
#define UCNV_ESCAPE_UNICODE   "U"

/** 
 * The process condition code to be used with the callbacks.  
 * Codes which are greater than UCNV_IRREGULAR should be 
 * passed on to any chained callbacks.
 * @stable ICU 2.0
 */
typedef enum {
    UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
                             The error code U_INVALID_CHAR_FOUND will be set. */
    UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example, 
                             \x81\x2E is illegal in SJIS because \x2E
                             is not a valid trail byte for the \x81 
                             lead byte.
                             Also, starting with Unicode 3.0.1, non-shortest byte sequences
                             in UTF-8 (like \xC1\xA1 instead of \x61 for U+0061)
                             are also illegal, not just irregular.
                             The error code U_ILLEGAL_CHAR_FOUND will be set. */
    UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in 
                             the encoding. For example, \xED\xA0\x80..\xED\xBF\xBF
                             are irregular UTF-8 byte sequences for single surrogate
                             code points.
                             The error code U_INVALID_CHAR_FOUND will be set. */
    UCNV_RESET = 3,       /**< The callback is called with this reason when a
                             'reset' has occured. Callback should reset all
                             state. */
    UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
                             callback should release any allocated memory.*/
    UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
                              converter. the pointer available as the
                              'context' is an alias to the original converters'
                              context pointer. If the context must be owned
                              by the new converter, the callback must clone 
                              the data and call ucnv_setFromUCallback 
                              (or setToUCallback) with the correct pointer.
                              @draft ICU 2.2
                           */
} UConverterCallbackReason;


/**
 * The structure for the fromUnicode callback function parameter.
 * @stable ICU 2.0
 */
typedef struct {
    uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0    */
    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
    const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
    const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
    char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
    const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
} UConverterFromUnicodeArgs;


/**
 * The structure for the toUnicode callback function parameter.
 * @stable ICU 2.0
 */
typedef struct {
    uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
    UBool flush;                /**< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0   */
    UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
    const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
    const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
    UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
    const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
    int32_t *offsets;           /**< Pointer to the buffer that recieves the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
} UConverterToUnicodeArgs;


/**
 * DO NOT CALL THIS FUNCTION DIRECTLY!
 * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
 * returning the error code back to the caller immediately.
 *
 * @param context Pointer to the callback's private data
 * @param fromUArgs Information about the conversion in progress
 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 * @param reason Defines the reason the callback was invoked
 * @param err This should always be set to a failure status prior to calling.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
                  const void *context,
                  UConverterFromUnicodeArgs *fromUArgs,
                  const UChar* codeUnits,
                  int32_t length,
                  UChar32 codePoint,
                  UConverterCallbackReason reason,
                  UErrorCode * err);


/**
 * DO NOT CALL THIS FUNCTION DIRECTLY!
 * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
 * returning the error code back to the caller immediately.
 *
 * @param context Pointer to the callback's private data
 * @param toUArgs Information about the conversion in progress
 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param reason Defines the reason the callback was invoked
 * @param err This should always be set to a failure status prior to calling.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
                  const void *context,
                  UConverterToUnicodeArgs *toUArgs,
                  const char* codeUnits,
                  int32_t length,
                  UConverterCallbackReason reason,
                  UErrorCode * err);

/**
 * DO NOT CALL THIS FUNCTION DIRECTLY!
 * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
 * skips only UNASSINGED_SEQUENCE depending on the context parameter
 * simply ignoring those characters. 
 *
 * @param context  The function currently recognizes the callback options:
 *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 *                      returning the error code back to the caller immediately.
 *                 NULL: Skips any ILLEGAL_SEQUENCE
 * @param fromUArgs Information about the conversion in progress
 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 * @param reason Defines the reason the callback was invoked
 * @param err Return value will be set to success if the callback was handled,
 *      otherwise this value will be set to a failure status.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
                  const void *context,
                  UConverterFromUnicodeArgs *fromUArgs,
                  const UChar* codeUnits,
                  int32_t length,
                  UChar32 codePoint,
                  UConverterCallbackReason reason,
                  UErrorCode * err);

/**
 * DO NOT CALL THIS FUNCTION DIRECTLY!
 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or 
 * UNASSIGNED_SEQUENCE depending on context parameter, with the
 * current substitution string for the converter. This is the default
 * callback.
 *
 * @param context The function currently recognizes the callback options:
 *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 *                      returning the error code back to the caller immediately.
 *                 NULL: Substitutes any ILLEGAL_SEQUENCE
 * @param fromUArgs Information about the conversion in progress
 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 * @param reason Defines the reason the callback was invoked
 * @param err Return value will be set to success if the callback was handled,
 *      otherwise this value will be set to a failure status.
 * @see ucnv_setSubstChars
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
                  const void *context,
                  UConverterFromUnicodeArgs *fromUArgs,
                  const UChar* codeUnits,
                  int32_t length,
                  UChar32 codePoint,
                  UConverterCallbackReason reason,
                  UErrorCode * err);

/**
 * DO NOT CALL THIS FUNCTION DIRECTLY!
 * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
 * hexadecimal representation of the illegal codepoints
 *
 * @param context The function currently recognizes the callback options:
 *        <ul>
 *        <li>UCNV_ESCAPE_ICU: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
 *          representation in the format  %UXXXX, e.g. "%uFFFE%u00AC%uC8FE"). 
 *          In the Event the converter doesn't support the characters {%,U}[A-F][0-9], 
 *          it will  substitute  the illegal sequence with the substitution characters.
 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 *          %UD84D%UDC56</li>
 *        <li>UCNV_ESCAPE_JAVA: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
 *          representation in the format  \uXXXX, e.g. "\uFFFE\u00AC\uC8FE"). 
 *          In the Event the converter doesn't support the characters {\,u}[A-F][0-9], 
 *          it will  substitute  the illegal sequence with the substitution characters.
 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 *          \uD84D\uDC56</li>
 *        <li>UCNV_ESCAPE_C: Substitues the  ILLEGAL SEQUENCE with the hexadecimal 
 *          representation in the format  \uXXXX, e.g. "\uFFFE\u00AC\uC8FE"). 
 *          In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9], 
 *          it will  substitute  the illegal sequence with the substitution characters.
 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 *          \U00023456</li>
 *        <li>UCNV_ESCAPE_XML_DEC: Substitues the  ILLEGAL SEQUENCE with the decimal 
 *          representation in the format  &amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;"). 
 *          In the Event the converter doesn't support the characters {&amp;,#}[0-9], 
 *          it will  substitute  the illegal sequence with the substitution characters.
 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 *          &amp;#144470; and Zero padding is ignored.</li>
 *        <li>UCNV_ESCAPE_XML_HEX:Substitues the  ILLEGAL SEQUENCE with the decimal 
 *          representation in the format  &#xXXXX, e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;"). 
 *          In the Event the converter doesn't support the characters {&,#,x}[0-9], 
 *          it will  substitute  the illegal sequence with the substitution characters.
 *          Note that  codeUnit(32bit int eg: unit of a surrogate pair) is represented as
 *          &amp;#x23456;</li>
 *        </ul>
 * @param fromUArgs Information about the conversion in progress
 * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
 * @param reason Defines the reason the callback was invoked
 * @param err Return value will be set to success if the callback was handled,
 *      otherwise this value will be set to a failure status.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
                  const void *context,
                  UConverterFromUnicodeArgs *fromUArgs,
                  const UChar* codeUnits,
                  int32_t length,
                  UChar32 codePoint,
                  UConverterCallbackReason reason,
                  UErrorCode * err);


/**
 * DO NOT CALL THIS FUNCTION DIRECTLY!
 * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
 * skips only UNASSINGED_SEQUENCE depending on the context parameter
 * simply ignoring those characters. 
 *
 * @param context  The function currently recognizes the callback options:
 *                 UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 *                      returning the error code back to the caller immediately.
 *                 NULL: Skips any ILLEGAL_SEQUENCE
 * @param toUArgs Information about the conversion in progress
 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param reason Defines the reason the callback was invoked
 * @param err Return value will be set to success if the callback was handled,
 *      otherwise this value will be set to a failure status.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
                  const void *context,
                  UConverterToUnicodeArgs *toUArgs,
                  const char* codeUnits,
                  int32_t length,
                  UConverterCallbackReason reason,
                  UErrorCode * err);

/**
 * DO NOT CALL THIS FUNCTION DIRECTLY!
 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or 
 * UNASSIGNED_SEQUENCE depending on context parameter,  with the
 * Unicode substitution character, U+FFFD.
 *
 * @param context  The function currently recognizes the callback options:
 *                 UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
 *                      returning the error code back to the caller immediately.
 *                 NULL: Substitutes any ILLEGAL_SEQUENCE
 * @param toUArgs Information about the conversion in progress
 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param reason Defines the reason the callback was invoked
 * @param err Return value will be set to success if the callback was handled,
 *      otherwise this value will be set to a failure status.
 * @stable ICU 2.0
 */
U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
                  const void *context,
                  UConverterToUnicodeArgs *toUArgs,
                  const char* codeUnits,
                  int32_t length,
                  UConverterCallbackReason reason,
                  UErrorCode * err);

/**
 * DO NOT CALL THIS FUNCTION DIRECTLY!
 * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
 * hexadecimal representation of the illegal bytes
 *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
 *
 * @param context This function currently recognizes the callback options:
 *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
 *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
 * @param toUArgs Information about the conversion in progress
 * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
 * @param length Size (in bytes) of the concerned codepage sequence
 * @param reason Defines the reason the callback was invoked
 * @param err Return value will be set to success if the callback was handled,
 *      otherwise this value will be set to a failure status.
 * @stable ICU 2.0
 */

U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
                  const void *context,
                  UConverterToUnicodeArgs *toUArgs,
                  const char* codeUnits,
                  int32_t length,
                  UConverterCallbackReason reason,
                  UErrorCode * err);

#endif

/*UCNV_ERR_H*/
Commit	Line	Data
b75a7d8f A	1	/*
	2	**********************************************************************
	3	* Copyright (C) 1999-2003, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	*
	7	*
	8	* ucnv_err.h:
	9	*/
	10
	11	/**
	12	* \file
	13	* \brief C UConverter predefined error callbacks
	14	*
	15	* <h2>Error Behaviour Functions</h2>
	16	* Defines some error behaviour functions called by ucnv_{from,to}Unicode
	17	* These are provided as part of ICU and many are stable, but they
	18	* can also be considered only as an example of what can be done with
	19	* callbacks. You may of course write your own.
	20	*
	21	* If you want to write your own, you may also find the functions from
	22	* ucnv_cb.h useful when writing your own callbacks.
	23	*
	24	* These functions, although public, should NEVER be called directly.
	25	* They should be used as parameters to the ucnv_setFromUCallback
	26	* and ucnv_setToUCallback functions, to set the behaviour of a converter
	27	* when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
	28	*
	29	* usage example: 'STOP' doesn't need any context, but newContext
	30	* could be set to something other than 'NULL' if needed. The available
	31	* contexts in this header can modify the default behavior of the callback.
	32	*
	33	* \code
	34	* UErrorCode err = U_ZERO_ERROR;
	35	* UConverter *myConverter = ucnv_open("ibm-949", &err);
	36	* const void *oldContext;
	37	* UConverterFromUCallback oldAction;
	38	*
	39	*
	40	* if (U_SUCCESS(err))
	41	* {
	42	* ucnv_setFromUCallBack(myConverter,
	43	* UCNV_FROM_U_CALLBACK_STOP,
	44	* NULL,
	45	* &oldAction,
	46	* &oldContext,
	47	* &status);
	48	* }
	49	* \endcode
	50	*
	51	* The code above tells "myConverter" to stop when it encounters an
	52	* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
	53	* Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
	54	* and ucnv_setToUCallBack would need to be called in order to change
	55	* that behavior too.
	56	*
	57	* Here is an example with a context:
	58	*
	59	* \code
	60	* UErrorCode err = U_ZERO_ERROR;
	61	* UConverter *myConverter = ucnv_open("ibm-949", &err);
	62	* const void *oldContext;
	63	* UConverterFromUCallback oldAction;
	64	*
65	*
66	* if (U_SUCCESS(err))
67	* {
68	* ucnv_setToUCallBack(myConverter,
69	* UCNV_TO_U_CALLBACK_SUBSTITUTE,
70	* UCNV_SUB_STOP_ON_ILLEGAL,
71	* &oldAction,
72	* &oldContext,
73	* &status);
74	* }
75	* \endcode
76	*
77	* The code above tells "myConverter" to stop when it encounters an
78	* ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
79	* Codepage -> Unicode. Any unmapped and legal characters will be
80	* substituted to be the default substitution character.
81	*/
82
83	/* This file isn't designed to be included all by itself. */
84	#ifndef UCNV_H
85	# include "unicode/ucnv.h"
86	/* and the rest of this file will be ignored. */
87	#endif
88
89	#ifndef UCNV_ERR_H
90	#define UCNV_ERR_H
91
92	#include "unicode/utypes.h"
93
94
95	/**
96	* FROM_U, TO_U context options for sub callback
97	* @stable ICU 2.0
98	*/
99	#define UCNV_SUB_STOP_ON_ILLEGAL "i"
100
101	/**
102	* FROM_U, TO_U context options for skip callback
103	* @stable ICU 2.0
104	*/
105	#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
106
107	/**
108	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
109	* @stable ICU 2.0
110	*/
111	#define UCNV_ESCAPE_ICU NULL
112	/**
113	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\uXXXX)
114	* @stable ICU 2.0
115	*/
116	#define UCNV_ESCAPE_JAVA "J"
117	/**
118	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\uXXXX \UXXXXXXXX)
119	* TO_U_CALLBACK_ESCAPE option to escape the character value accoding to C (\xXXXX)
120	* @stable ICU 2.0
121	*/
122	#define UCNV_ESCAPE_C "C"
123	/**
124	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape (&#DDDD;)
125	* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Decimal escape (&#DDDD;)
126	* @stable ICU 2.0
127	*/
128	#define UCNV_ESCAPE_XML_DEC "D"
129	/**
130	* FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape (&#xXXXX;)
131	* TO_U_CALLBACK_ESCAPE context option to escape the character value accoding to XML Hex escape (&#xXXXX;)
132	* @stable ICU 2.0
133	*/
134	#define UCNV_ESCAPE_XML_HEX "X"
135	/**
136	* FROM_U_CALLBACK_ESCAPE context option to escape teh code unit according to Unicode (U+XXXXX)
137	* @stable ICU 2.0
138	*/
139	#define UCNV_ESCAPE_UNICODE "U"
140
141	/**
142	* The process condition code to be used with the callbacks.
143	* Codes which are greater than UCNV_IRREGULAR should be
144	* passed on to any chained callbacks.
145	* @stable ICU 2.0
146	*/
147	typedef enum {
148	UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
149	The error code U_INVALID_CHAR_FOUND will be set. */
150	UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
151	\x81\x2E is illegal in SJIS because \x2E
152	is not a valid trail byte for the \x81
153	lead byte.
154	Also, starting with Unicode 3.0.1, non-shortest byte sequences
155	in UTF-8 (like \xC1\xA1 instead of \x61 for U+0061)
156	are also illegal, not just irregular.
157	The error code U_ILLEGAL_CHAR_FOUND will be set. */
158	UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
159	the encoding. For example, \xED\xA0\x80..\xED\xBF\xBF
160	are irregular UTF-8 byte sequences for single surrogate
161	code points.
162	The error code U_INVALID_CHAR_FOUND will be set. */
163	UCNV_RESET = 3, /**< The callback is called with this reason when a
164	'reset' has occured. Callback should reset all
165	state. */
166	UCNV_CLOSE = 4, /**< Called when the converter is closed. The
167	callback should release any allocated memory.*/
168	UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
169	converter. the pointer available as the
170	'context' is an alias to the original converters'
171	context pointer. If the context must be owned
172	by the new converter, the callback must clone
173	the data and call ucnv_setFromUCallback
174	(or setToUCallback) with the correct pointer.
175	@draft ICU 2.2
176	*/
177	} UConverterCallbackReason;
178
179
180	/**
181	* The structure for the fromUnicode callback function parameter.
182	* @stable ICU 2.0
183	*/
184	typedef struct {
185	uint16_t size; /*< The size of this struct. @stable ICU 2.0 /
186	UBool flush; /*< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 /
187	UConverter converter; /< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 /
188	const UChar source; /< Pointer to the source source buffer. @stable ICU 2.0 /
189	const UChar sourceLimit; /< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 /
190	char target; /< Pointer to the target buffer. @stable ICU 2.0 /
191	const char targetLimit; /< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 /
192	int32_t offsets; /< Pointer to the buffer that recieves the offsets. offset = blah ; offset++;. @stable ICU 2.0 */
193	} UConverterFromUnicodeArgs;
194
195
196	/**
197	* The structure for the toUnicode callback function parameter.
198	* @stable ICU 2.0
199	*/
200	typedef struct {
201	uint16_t size; /*< The size of this struct @stable ICU 2.0 /
202	UBool flush; /*< The internal state of converter will be reset and data flushed if set to TRUE. @stable ICU 2.0 /
203	UConverter converter; /< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 /
204	const char source; /< Pointer to the source source buffer. @stable ICU 2.0 /
205	const char sourceLimit; /< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 /
206	UChar target; /< Pointer to the target buffer. @stable ICU 2.0 /
207	const UChar targetLimit; /< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 /
208	int32_t offsets; /< Pointer to the buffer that recieves the offsets. offset = blah ; offset++;. @stable ICU 2.0 */
209	} UConverterToUnicodeArgs;
210
211
212	/**
213	* DO NOT CALL THIS FUNCTION DIRECTLY!
214	* This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
215	* returning the error code back to the caller immediately.
216	*
217	* @param context Pointer to the callback's private data
218	* @param fromUArgs Information about the conversion in progress
219	* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
220	* @param length Size (in bytes) of the concerned codepage sequence
221	* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
222	* @param reason Defines the reason the callback was invoked
223	* @param err This should always be set to a failure status prior to calling.
224	* @stable ICU 2.0
225	*/
226	U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
227	const void *context,
228	UConverterFromUnicodeArgs *fromUArgs,
229	const UChar* codeUnits,
230	int32_t length,
231	UChar32 codePoint,
232	UConverterCallbackReason reason,
233	UErrorCode * err);
234
235
236
237	/**
238	* DO NOT CALL THIS FUNCTION DIRECTLY!
239	* This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
240	* returning the error code back to the caller immediately.
241	*
242	* @param context Pointer to the callback's private data
243	* @param toUArgs Information about the conversion in progress
244	* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
245	* @param length Size (in bytes) of the concerned codepage sequence
246	* @param reason Defines the reason the callback was invoked
247	* @param err This should always be set to a failure status prior to calling.
248	* @stable ICU 2.0
249	*/
250	U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
251	const void *context,
252	UConverterToUnicodeArgs *toUArgs,
253	const char* codeUnits,
254	int32_t length,
255	UConverterCallbackReason reason,
256	UErrorCode * err);
257
258	/**
259	* DO NOT CALL THIS FUNCTION DIRECTLY!
260	* This From Unicode callback skips any ILLEGAL_SEQUENCE, or
261	* skips only UNASSINGED_SEQUENCE depending on the context parameter
262	* simply ignoring those characters.
263	*
264	* @param context The function currently recognizes the callback options:
265	* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
266	* returning the error code back to the caller immediately.
267	* NULL: Skips any ILLEGAL_SEQUENCE
268	* @param fromUArgs Information about the conversion in progress
269	* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
270	* @param length Size (in bytes) of the concerned codepage sequence
271	* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
272	* @param reason Defines the reason the callback was invoked
273	* @param err Return value will be set to success if the callback was handled,
274	* otherwise this value will be set to a failure status.
275	* @stable ICU 2.0
276	*/
277	U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
278	const void *context,
279	UConverterFromUnicodeArgs *fromUArgs,
280	const UChar* codeUnits,
281	int32_t length,
282	UChar32 codePoint,
283	UConverterCallbackReason reason,
284	UErrorCode * err);
285
286	/**
287	* DO NOT CALL THIS FUNCTION DIRECTLY!
288	* This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
289	* UNASSIGNED_SEQUENCE depending on context parameter, with the
290	* current substitution string for the converter. This is the default
291	* callback.
292	*
293	* @param context The function currently recognizes the callback options:
294	* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
295	* returning the error code back to the caller immediately.
296	* NULL: Substitutes any ILLEGAL_SEQUENCE
297	* @param fromUArgs Information about the conversion in progress
298	* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
299	* @param length Size (in bytes) of the concerned codepage sequence
300	* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
301	* @param reason Defines the reason the callback was invoked
302	* @param err Return value will be set to success if the callback was handled,
303	* otherwise this value will be set to a failure status.
304	* @see ucnv_setSubstChars
305	* @stable ICU 2.0
306	*/
307	U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
308	const void *context,
309	UConverterFromUnicodeArgs *fromUArgs,
310	const UChar* codeUnits,
311	int32_t length,
312	UChar32 codePoint,
313	UConverterCallbackReason reason,
314	UErrorCode * err);
315
316	/**
317	* DO NOT CALL THIS FUNCTION DIRECTLY!
318	* This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
319	* hexadecimal representation of the illegal codepoints
320	*
321	* @param context The function currently recognizes the callback options:
322	* <ul>
323	* <li>UCNV_ESCAPE_ICU: Substitues the ILLEGAL SEQUENCE with the hexadecimal
324	* representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
325	* In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
326	* it will substitute the illegal sequence with the substitution characters.
327	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
328	* %UD84D%UDC56</li>
329	* <li>UCNV_ESCAPE_JAVA: Substitues the ILLEGAL SEQUENCE with the hexadecimal
330	* representation in the format \uXXXX, e.g. "\uFFFE\u00AC\uC8FE").
331	* In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
332	* it will substitute the illegal sequence with the substitution characters.
333	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
334	* \uD84D\uDC56</li>
335	* <li>UCNV_ESCAPE_C: Substitues the ILLEGAL SEQUENCE with the hexadecimal
336	* representation in the format \uXXXX, e.g. "\uFFFE\u00AC\uC8FE").
337	* In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
338	* it will substitute the illegal sequence with the substitution characters.
339	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
340	* \U00023456</li>
341	* <li>UCNV_ESCAPE_XML_DEC: Substitues the ILLEGAL SEQUENCE with the decimal
342	* representation in the format &#DDDDDDDD;, e.g. "&#65534;&#172;&#51454;").
343	* In the Event the converter doesn't support the characters {&,#}[0-9],
344	* it will substitute the illegal sequence with the substitution characters.
345	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
346	* &#144470; and Zero padding is ignored.</li>
347	* <li>UCNV_ESCAPE_XML_HEX:Substitues the ILLEGAL SEQUENCE with the decimal
348	* representation in the format &#xXXXX, e.g. "&#xFFFE;&#x00AC;&#xC8FE;").
349	* In the Event the converter doesn't support the characters {&,#,x}[0-9],
350	* it will substitute the illegal sequence with the substitution characters.
351	* Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
352	* &#x23456;</li>
353	* </ul>
354	* @param fromUArgs Information about the conversion in progress
355	* @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
356	* @param length Size (in bytes) of the concerned codepage sequence
357	* @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
358	* @param reason Defines the reason the callback was invoked
359	* @param err Return value will be set to success if the callback was handled,
360	* otherwise this value will be set to a failure status.
361	* @stable ICU 2.0
362	*/
363	U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
364	const void *context,
365	UConverterFromUnicodeArgs *fromUArgs,
366	const UChar* codeUnits,
367	int32_t length,
368	UChar32 codePoint,
369	UConverterCallbackReason reason,
370	UErrorCode * err);
371
372
373	/**
374	* DO NOT CALL THIS FUNCTION DIRECTLY!
375	* This To Unicode callback skips any ILLEGAL_SEQUENCE, or
376	* skips only UNASSINGED_SEQUENCE depending on the context parameter
377	* simply ignoring those characters.
378	*
379	* @param context The function currently recognizes the callback options:
380	* UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
381	* returning the error code back to the caller immediately.
382	* NULL: Skips any ILLEGAL_SEQUENCE
383	* @param toUArgs Information about the conversion in progress
384	* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
385	* @param length Size (in bytes) of the concerned codepage sequence
386	* @param reason Defines the reason the callback was invoked
387	* @param err Return value will be set to success if the callback was handled,
388	* otherwise this value will be set to a failure status.
389	* @stable ICU 2.0
390	*/
391	U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
392	const void *context,
393	UConverterToUnicodeArgs *toUArgs,
394	const char* codeUnits,
395	int32_t length,
396	UConverterCallbackReason reason,
397	UErrorCode * err);
398
399	/**
400	* DO NOT CALL THIS FUNCTION DIRECTLY!
401	* This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
402	* UNASSIGNED_SEQUENCE depending on context parameter, with the
403	* Unicode substitution character, U+FFFD.
404	*
405	* @param context The function currently recognizes the callback options:
406	* UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
407	* returning the error code back to the caller immediately.
408	* NULL: Substitutes any ILLEGAL_SEQUENCE
409	* @param toUArgs Information about the conversion in progress
410	* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
411	* @param length Size (in bytes) of the concerned codepage sequence
412	* @param reason Defines the reason the callback was invoked
413	* @param err Return value will be set to success if the callback was handled,
414	* otherwise this value will be set to a failure status.
415	* @stable ICU 2.0
416	*/
417	U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
418	const void *context,
419	UConverterToUnicodeArgs *toUArgs,
420	const char* codeUnits,
421	int32_t length,
422	UConverterCallbackReason reason,
423	UErrorCode * err);
424
425	/**
426	* DO NOT CALL THIS FUNCTION DIRECTLY!
427	* This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
428	* hexadecimal representation of the illegal bytes
429	* (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
430	*
431	* @param context This function currently recognizes the callback options:
432	* UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
433	* UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
434	* @param toUArgs Information about the conversion in progress
435	* @param codeUnits Points to 'length' bytes of the concerned codepage sequence
436	* @param length Size (in bytes) of the concerned codepage sequence
437	* @param reason Defines the reason the callback was invoked
438	* @param err Return value will be set to success if the callback was handled,
439	* otherwise this value will be set to a failure status.
440	* @stable ICU 2.0
441	*/
442
443	U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
444	const void *context,
445	UConverterToUnicodeArgs *toUArgs,
446	const char* codeUnits,
447	int32_t length,
448	UConverterCallbackReason reason,
449	UErrorCode * err);
450
451	#endif
452
453	/UCNV_ERR_H/