]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
729e4ab9 A |
3 | /* |
4 | ******************************************************************************* | |
4388f060 | 5 | * Copyright (C) 2010-2012, International Business Machines |
729e4ab9 A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * file name: idna.h | |
f3c0d7a5 | 9 | * encoding: UTF-8 |
729e4ab9 A |
10 | * tab size: 8 (not used) |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2010mar05 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | #ifndef __IDNA_H__ | |
18 | #define __IDNA_H__ | |
19 | ||
20 | /** | |
21 | * \file | |
22 | * \brief C++ API: Internationalizing Domain Names in Applications (IDNA) | |
23 | */ | |
24 | ||
25 | #include "unicode/utypes.h" | |
26 | ||
27 | #if !UCONFIG_NO_IDNA | |
28 | ||
29 | #include "unicode/bytestream.h" | |
30 | #include "unicode/stringpiece.h" | |
31 | #include "unicode/uidna.h" | |
32 | #include "unicode/unistr.h" | |
33 | ||
f3c0d7a5 | 34 | #if U_SHOW_CPLUSPLUS_API |
729e4ab9 A |
35 | U_NAMESPACE_BEGIN |
36 | ||
51004dcb | 37 | class IDNAInfo; |
729e4ab9 A |
38 | |
39 | /** | |
40 | * Abstract base class for IDNA processing. | |
41 | * See http://www.unicode.org/reports/tr46/ | |
42 | * and http://www.ietf.org/rfc/rfc3490.txt | |
43 | * | |
44 | * The IDNA class is not intended for public subclassing. | |
45 | * | |
46 | * This C++ API currently only implements UTS #46. | |
47 | * The uidna.h C API implements both UTS #46 (functions using UIDNA service object) | |
48 | * and IDNA2003 (functions that do not use a service object). | |
4388f060 | 49 | * @stable ICU 4.6 |
729e4ab9 A |
50 | */ |
51 | class U_COMMON_API IDNA : public UObject { | |
52 | public: | |
4388f060 A |
53 | /** |
54 | * Destructor. | |
55 | * @stable ICU 4.6 | |
56 | */ | |
57 | ~IDNA(); | |
58 | ||
729e4ab9 A |
59 | /** |
60 | * Returns an IDNA instance which implements UTS #46. | |
61 | * Returns an unmodifiable instance, owned by the caller. | |
62 | * Cache it for multiple operations, and delete it when done. | |
63 | * The instance is thread-safe, that is, it can be used concurrently. | |
64 | * | |
65 | * UTS #46 defines Unicode IDNA Compatibility Processing, | |
66 | * updated to the latest version of Unicode and compatible with both | |
67 | * IDNA2003 and IDNA2008. | |
68 | * | |
69 | * The worker functions use transitional processing, including deviation mappings, | |
70 | * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE | |
71 | * is used in which case the deviation characters are passed through without change. | |
72 | * | |
73 | * Disallowed characters are mapped to U+FFFD. | |
74 | * | |
75 | * For available options see the uidna.h header. | |
76 | * Operations with the UTS #46 instance do not support the | |
77 | * UIDNA_ALLOW_UNASSIGNED option. | |
78 | * | |
79 | * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). | |
80 | * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than | |
81 | * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. | |
82 | * | |
83 | * @param options Bit set to modify the processing and error checking. | |
84 | * See option bit set values in uidna.h. | |
85 | * @param errorCode Standard ICU error code. Its input value must | |
86 | * pass the U_SUCCESS() test, or else the function returns | |
87 | * immediately. Check for U_FAILURE() on output or use with | |
88 | * function chaining. (See User Guide for details.) | |
89 | * @return the UTS #46 IDNA instance, if successful | |
4388f060 | 90 | * @stable ICU 4.6 |
729e4ab9 A |
91 | */ |
92 | static IDNA * | |
93 | createUTS46Instance(uint32_t options, UErrorCode &errorCode); | |
94 | ||
95 | /** | |
96 | * Converts a single domain name label into its ASCII form for DNS lookup. | |
97 | * If any processing step fails, then info.hasErrors() will be TRUE and | |
98 | * the result might not be an ASCII string. | |
99 | * The label might be modified according to the types of errors. | |
100 | * Labels with severe errors will be left in (or turned into) their Unicode form. | |
101 | * | |
102 | * The UErrorCode indicates an error only in exceptional cases, | |
103 | * such as a U_MEMORY_ALLOCATION_ERROR. | |
104 | * | |
105 | * @param label Input domain name label | |
106 | * @param dest Destination string object | |
107 | * @param info Output container of IDNA processing details. | |
108 | * @param errorCode Standard ICU error code. Its input value must | |
109 | * pass the U_SUCCESS() test, or else the function returns | |
110 | * immediately. Check for U_FAILURE() on output or use with | |
111 | * function chaining. (See User Guide for details.) | |
112 | * @return dest | |
4388f060 | 113 | * @stable ICU 4.6 |
729e4ab9 A |
114 | */ |
115 | virtual UnicodeString & | |
116 | labelToASCII(const UnicodeString &label, UnicodeString &dest, | |
117 | IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
118 | ||
119 | /** | |
120 | * Converts a single domain name label into its Unicode form for human-readable display. | |
121 | * If any processing step fails, then info.hasErrors() will be TRUE. | |
122 | * The label might be modified according to the types of errors. | |
123 | * | |
124 | * The UErrorCode indicates an error only in exceptional cases, | |
125 | * such as a U_MEMORY_ALLOCATION_ERROR. | |
126 | * | |
127 | * @param label Input domain name label | |
128 | * @param dest Destination string object | |
129 | * @param info Output container of IDNA processing details. | |
130 | * @param errorCode Standard ICU error code. Its input value must | |
131 | * pass the U_SUCCESS() test, or else the function returns | |
132 | * immediately. Check for U_FAILURE() on output or use with | |
133 | * function chaining. (See User Guide for details.) | |
134 | * @return dest | |
4388f060 | 135 | * @stable ICU 4.6 |
729e4ab9 A |
136 | */ |
137 | virtual UnicodeString & | |
138 | labelToUnicode(const UnicodeString &label, UnicodeString &dest, | |
139 | IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
140 | ||
141 | /** | |
142 | * Converts a whole domain name into its ASCII form for DNS lookup. | |
143 | * If any processing step fails, then info.hasErrors() will be TRUE and | |
144 | * the result might not be an ASCII string. | |
145 | * The domain name might be modified according to the types of errors. | |
146 | * Labels with severe errors will be left in (or turned into) their Unicode form. | |
147 | * | |
148 | * The UErrorCode indicates an error only in exceptional cases, | |
149 | * such as a U_MEMORY_ALLOCATION_ERROR. | |
150 | * | |
151 | * @param name Input domain name | |
152 | * @param dest Destination string object | |
153 | * @param info Output container of IDNA processing details. | |
154 | * @param errorCode Standard ICU error code. Its input value must | |
155 | * pass the U_SUCCESS() test, or else the function returns | |
156 | * immediately. Check for U_FAILURE() on output or use with | |
157 | * function chaining. (See User Guide for details.) | |
158 | * @return dest | |
4388f060 | 159 | * @stable ICU 4.6 |
729e4ab9 A |
160 | */ |
161 | virtual UnicodeString & | |
162 | nameToASCII(const UnicodeString &name, UnicodeString &dest, | |
163 | IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
164 | ||
165 | /** | |
166 | * Converts a whole domain name into its Unicode form for human-readable display. | |
167 | * If any processing step fails, then info.hasErrors() will be TRUE. | |
168 | * The domain name might be modified according to the types of errors. | |
169 | * | |
170 | * The UErrorCode indicates an error only in exceptional cases, | |
171 | * such as a U_MEMORY_ALLOCATION_ERROR. | |
172 | * | |
173 | * @param name Input domain name | |
174 | * @param dest Destination string object | |
175 | * @param info Output container of IDNA processing details. | |
176 | * @param errorCode Standard ICU error code. Its input value must | |
177 | * pass the U_SUCCESS() test, or else the function returns | |
178 | * immediately. Check for U_FAILURE() on output or use with | |
179 | * function chaining. (See User Guide for details.) | |
180 | * @return dest | |
4388f060 | 181 | * @stable ICU 4.6 |
729e4ab9 A |
182 | */ |
183 | virtual UnicodeString & | |
184 | nameToUnicode(const UnicodeString &name, UnicodeString &dest, | |
185 | IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
186 | ||
187 | // UTF-8 versions of the processing methods ---------------------------- *** | |
188 | ||
189 | /** | |
190 | * Converts a single domain name label into its ASCII form for DNS lookup. | |
191 | * UTF-8 version of labelToASCII(), same behavior. | |
192 | * | |
193 | * @param label Input domain name label | |
194 | * @param dest Destination byte sink; Flush()ed if successful | |
195 | * @param info Output container of IDNA processing details. | |
196 | * @param errorCode Standard ICU error code. Its input value must | |
197 | * pass the U_SUCCESS() test, or else the function returns | |
198 | * immediately. Check for U_FAILURE() on output or use with | |
199 | * function chaining. (See User Guide for details.) | |
200 | * @return dest | |
4388f060 | 201 | * @stable ICU 4.6 |
729e4ab9 A |
202 | */ |
203 | virtual void | |
f3c0d7a5 | 204 | labelToASCII_UTF8(StringPiece label, ByteSink &dest, |
729e4ab9 A |
205 | IDNAInfo &info, UErrorCode &errorCode) const; |
206 | ||
207 | /** | |
208 | * Converts a single domain name label into its Unicode form for human-readable display. | |
209 | * UTF-8 version of labelToUnicode(), same behavior. | |
210 | * | |
211 | * @param label Input domain name label | |
212 | * @param dest Destination byte sink; Flush()ed if successful | |
213 | * @param info Output container of IDNA processing details. | |
214 | * @param errorCode Standard ICU error code. Its input value must | |
215 | * pass the U_SUCCESS() test, or else the function returns | |
216 | * immediately. Check for U_FAILURE() on output or use with | |
217 | * function chaining. (See User Guide for details.) | |
218 | * @return dest | |
4388f060 | 219 | * @stable ICU 4.6 |
729e4ab9 A |
220 | */ |
221 | virtual void | |
f3c0d7a5 | 222 | labelToUnicodeUTF8(StringPiece label, ByteSink &dest, |
729e4ab9 A |
223 | IDNAInfo &info, UErrorCode &errorCode) const; |
224 | ||
225 | /** | |
226 | * Converts a whole domain name into its ASCII form for DNS lookup. | |
227 | * UTF-8 version of nameToASCII(), same behavior. | |
228 | * | |
229 | * @param name Input domain name | |
230 | * @param dest Destination byte sink; Flush()ed if successful | |
231 | * @param info Output container of IDNA processing details. | |
232 | * @param errorCode Standard ICU error code. Its input value must | |
233 | * pass the U_SUCCESS() test, or else the function returns | |
234 | * immediately. Check for U_FAILURE() on output or use with | |
235 | * function chaining. (See User Guide for details.) | |
236 | * @return dest | |
4388f060 | 237 | * @stable ICU 4.6 |
729e4ab9 A |
238 | */ |
239 | virtual void | |
f3c0d7a5 | 240 | nameToASCII_UTF8(StringPiece name, ByteSink &dest, |
729e4ab9 A |
241 | IDNAInfo &info, UErrorCode &errorCode) const; |
242 | ||
243 | /** | |
244 | * Converts a whole domain name into its Unicode form for human-readable display. | |
245 | * UTF-8 version of nameToUnicode(), same behavior. | |
246 | * | |
247 | * @param name Input domain name | |
248 | * @param dest Destination byte sink; Flush()ed if successful | |
249 | * @param info Output container of IDNA processing details. | |
250 | * @param errorCode Standard ICU error code. Its input value must | |
251 | * pass the U_SUCCESS() test, or else the function returns | |
252 | * immediately. Check for U_FAILURE() on output or use with | |
253 | * function chaining. (See User Guide for details.) | |
254 | * @return dest | |
4388f060 | 255 | * @stable ICU 4.6 |
729e4ab9 A |
256 | */ |
257 | virtual void | |
f3c0d7a5 | 258 | nameToUnicodeUTF8(StringPiece name, ByteSink &dest, |
729e4ab9 | 259 | IDNAInfo &info, UErrorCode &errorCode) const; |
729e4ab9 A |
260 | }; |
261 | ||
262 | class UTS46; | |
263 | ||
264 | /** | |
265 | * Output container for IDNA processing errors. | |
266 | * The IDNAInfo class is not suitable for subclassing. | |
4388f060 | 267 | * @stable ICU 4.6 |
729e4ab9 A |
268 | */ |
269 | class U_COMMON_API IDNAInfo : public UMemory { | |
270 | public: | |
271 | /** | |
272 | * Constructor for stack allocation. | |
4388f060 | 273 | * @stable ICU 4.6 |
729e4ab9 A |
274 | */ |
275 | IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {} | |
276 | /** | |
277 | * Were there IDNA processing errors? | |
278 | * @return TRUE if there were processing errors | |
4388f060 | 279 | * @stable ICU 4.6 |
729e4ab9 A |
280 | */ |
281 | UBool hasErrors() const { return errors!=0; } | |
282 | /** | |
283 | * Returns a bit set indicating IDNA processing errors. | |
284 | * See UIDNA_ERROR_... constants in uidna.h. | |
285 | * @return bit set of processing errors | |
4388f060 | 286 | * @stable ICU 4.6 |
729e4ab9 A |
287 | */ |
288 | uint32_t getErrors() const { return errors; } | |
289 | /** | |
290 | * Returns TRUE if transitional and nontransitional processing produce different results. | |
291 | * This is the case when the input label or domain name contains | |
292 | * one or more deviation characters outside a Punycode label (see UTS #46). | |
293 | * <ul> | |
294 | * <li>With nontransitional processing, such characters are | |
295 | * copied to the destination string. | |
296 | * <li>With transitional processing, such characters are | |
297 | * mapped (sharp s/sigma) or removed (joiner/nonjoiner). | |
298 | * </ul> | |
299 | * @return TRUE if transitional and nontransitional processing produce different results | |
4388f060 | 300 | * @stable ICU 4.6 |
729e4ab9 A |
301 | */ |
302 | UBool isTransitionalDifferent() const { return isTransDiff; } | |
303 | ||
304 | private: | |
305 | friend class UTS46; | |
306 | ||
307 | IDNAInfo(const IDNAInfo &other); // no copying | |
308 | IDNAInfo &operator=(const IDNAInfo &other); // no copying | |
309 | ||
310 | void reset() { | |
311 | errors=labelErrors=0; | |
312 | isTransDiff=FALSE; | |
313 | isBiDi=FALSE; | |
314 | isOkBiDi=TRUE; | |
315 | } | |
316 | ||
317 | uint32_t errors, labelErrors; | |
318 | UBool isTransDiff; | |
319 | UBool isBiDi; | |
320 | UBool isOkBiDi; | |
321 | }; | |
322 | ||
323 | U_NAMESPACE_END | |
f3c0d7a5 | 324 | #endif // U_SHOW_CPLUSPLUS_API |
729e4ab9 A |
325 | |
326 | #endif // UCONFIG_NO_IDNA | |
327 | #endif // __IDNA_H__ |