2 *******************************************************************************
4 * Copyright (C) 2008-2010, International Business Machines
5 * Corporation, Google and others. All Rights Reserved.
7 *******************************************************************************
10 * Author : eldawy@google.com (Mohamed Eldawy)
13 * Purpose: To generate a list of encodings capable of handling
14 * a given Unicode text
16 * Started 09-April-2008
19 #ifndef __ICU_UCNV_SEL_H__
20 #define __ICU_UCNV_SEL_H__
22 #include "unicode/uset.h"
23 #include "unicode/utypes.h"
24 #include "unicode/utf16.h"
25 #include "unicode/uenum.h"
26 #include "unicode/ucnv.h"
27 #include "unicode/localpointer.h"
32 * A converter selector is built with a set of encoding/charset names
33 * and given an input string returns the set of names of the
34 * corresponding converters which can convert the string.
36 * A converter selector can be serialized into a buffer and reopened
37 * from the serialized form.
42 * The selector data structure
44 struct UConverterSelector
;
45 typedef struct UConverterSelector UConverterSelector
;
50 * If converterListSize is 0, build for all available converters.
51 * If excludedCodePoints is NULL, don't exclude any code points.
53 * @param converterList a pointer to encoding names needed to be involved.
54 * Can be NULL if converterListSize==0.
55 * The list and the names will be cloned, and the caller
56 * retains ownership of the original.
57 * @param converterListSize number of encodings in above list.
58 * If 0, builds a selector for all available converters.
59 * @param excludedCodePoints a set of code points to be excluded from consideration.
60 * That is, excluded code points in a string do not change
61 * the selection result. (They might be handled by a callback.)
62 * Use NULL to exclude nothing.
63 * @param whichSet what converter set to use? Use this to determine whether
64 * to consider only roundtrip mappings or also fallbacks.
65 * @param status an in/out ICU UErrorCode
66 * @return the new selector
70 U_STABLE UConverterSelector
* U_EXPORT2
71 ucnvsel_open(const char* const* converterList
, int32_t converterListSize
,
72 const USet
* excludedCodePoints
,
73 const UConverterUnicodeSet whichSet
, UErrorCode
* status
);
77 * If any Enumerations were returned by ucnv_select*, they become invalid.
78 * They can be closed before or after calling ucnv_closeSelector,
79 * but should never be used after the selector is closed.
81 * @see ucnv_selectForString
82 * @see ucnv_selectForUTF8
84 * @param sel selector to close
88 U_STABLE
void U_EXPORT2
89 ucnvsel_close(UConverterSelector
*sel
);
91 #if U_SHOW_CPLUSPLUS_API
96 * \class LocalUConverterSelectorPointer
97 * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
98 * For most methods see the LocalPointerBase base class.
100 * @see LocalPointerBase
104 U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer
, UConverterSelector
, ucnvsel_close
);
111 * Open a selector from its serialized form.
112 * The buffer must remain valid and unchanged for the lifetime of the selector.
113 * This is much faster than creating a selector from scratch.
114 * Using a serialized form from a different machine (endianness/charset) is supported.
116 * @param buffer pointer to the serialized form of a converter selector;
117 * must be 32-bit-aligned
118 * @param length the capacity of this buffer (can be equal to or larger than
119 * the actual data length)
120 * @param status an in/out ICU UErrorCode
121 * @return the new selector
125 U_STABLE UConverterSelector
* U_EXPORT2
126 ucnvsel_openFromSerialized(const void* buffer
, int32_t length
, UErrorCode
* status
);
129 * Serialize a selector into a linear buffer.
130 * The serialized form is portable to different machines.
132 * @param sel selector to consider
133 * @param buffer pointer to 32-bit-aligned memory to be filled with the
134 * serialized form of this converter selector
135 * @param bufferCapacity the capacity of this buffer
136 * @param status an in/out ICU UErrorCode
137 * @return the required buffer capacity to hold serialize data (even if the call fails
138 * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
142 U_STABLE
int32_t U_EXPORT2
143 ucnvsel_serialize(const UConverterSelector
* sel
,
144 void* buffer
, int32_t bufferCapacity
, UErrorCode
* status
);
147 * Select converters that can map all characters in a UTF-16 string,
148 * ignoring the excluded code points.
150 * @param sel a selector
151 * @param s UTF-16 string
152 * @param length length of the string, or -1 if NUL-terminated
153 * @param status an in/out ICU UErrorCode
154 * @return an enumeration containing encoding names.
155 * The returned encoding names and their order will be the same as
156 * supplied when building the selector.
160 U_STABLE UEnumeration
* U_EXPORT2
161 ucnvsel_selectForString(const UConverterSelector
* sel
,
162 const UChar
*s
, int32_t length
, UErrorCode
*status
);
165 * Select converters that can map all characters in a UTF-8 string,
166 * ignoring the excluded code points.
168 * @param sel a selector
169 * @param s UTF-8 string
170 * @param length length of the string, or -1 if NUL-terminated
171 * @param status an in/out ICU UErrorCode
172 * @return an enumeration containing encoding names.
173 * The returned encoding names and their order will be the same as
174 * supplied when building the selector.
178 U_STABLE UEnumeration
* U_EXPORT2
179 ucnvsel_selectForUTF8(const UConverterSelector
* sel
,
180 const char *s
, int32_t length
, UErrorCode
*status
);
182 #endif /* __ICU_UCNV_SEL_H__ */