]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unicode/idna.h
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / common / unicode / idna.h
1 /*
2 *******************************************************************************
3 * Copyright (C) 2010, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: idna.h
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2010mar05
12 * created by: Markus W. Scherer
13 */
14
15 #ifndef __IDNA_H__
16 #define __IDNA_H__
17
18 /**
19 * \file
20 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
21 */
22
23 #include "unicode/utypes.h"
24
25 #if !UCONFIG_NO_IDNA
26
27 #include "unicode/bytestream.h"
28 #include "unicode/stringpiece.h"
29 #include "unicode/uidna.h"
30 #include "unicode/unistr.h"
31
32 U_NAMESPACE_BEGIN
33
34 class U_COMMON_API IDNAInfo;
35
36 /**
37 * Abstract base class for IDNA processing.
38 * See http://www.unicode.org/reports/tr46/
39 * and http://www.ietf.org/rfc/rfc3490.txt
40 *
41 * The IDNA class is not intended for public subclassing.
42 *
43 * This C++ API currently only implements UTS #46.
44 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
45 * and IDNA2003 (functions that do not use a service object).
46 * @draft ICU 4.6
47 */
48 class U_COMMON_API IDNA : public UObject {
49 public:
50 /**
51 * Returns an IDNA instance which implements UTS #46.
52 * Returns an unmodifiable instance, owned by the caller.
53 * Cache it for multiple operations, and delete it when done.
54 * The instance is thread-safe, that is, it can be used concurrently.
55 *
56 * UTS #46 defines Unicode IDNA Compatibility Processing,
57 * updated to the latest version of Unicode and compatible with both
58 * IDNA2003 and IDNA2008.
59 *
60 * The worker functions use transitional processing, including deviation mappings,
61 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
62 * is used in which case the deviation characters are passed through without change.
63 *
64 * Disallowed characters are mapped to U+FFFD.
65 *
66 * For available options see the uidna.h header.
67 * Operations with the UTS #46 instance do not support the
68 * UIDNA_ALLOW_UNASSIGNED option.
69 *
70 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
71 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
72 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
73 *
74 * @param options Bit set to modify the processing and error checking.
75 * See option bit set values in uidna.h.
76 * @param errorCode Standard ICU error code. Its input value must
77 * pass the U_SUCCESS() test, or else the function returns
78 * immediately. Check for U_FAILURE() on output or use with
79 * function chaining. (See User Guide for details.)
80 * @return the UTS #46 IDNA instance, if successful
81 * @draft ICU 4.6
82 */
83 static IDNA *
84 createUTS46Instance(uint32_t options, UErrorCode &errorCode);
85
86 /**
87 * Converts a single domain name label into its ASCII form for DNS lookup.
88 * If any processing step fails, then info.hasErrors() will be TRUE and
89 * the result might not be an ASCII string.
90 * The label might be modified according to the types of errors.
91 * Labels with severe errors will be left in (or turned into) their Unicode form.
92 *
93 * The UErrorCode indicates an error only in exceptional cases,
94 * such as a U_MEMORY_ALLOCATION_ERROR.
95 *
96 * @param label Input domain name label
97 * @param dest Destination string object
98 * @param info Output container of IDNA processing details.
99 * @param errorCode Standard ICU error code. Its input value must
100 * pass the U_SUCCESS() test, or else the function returns
101 * immediately. Check for U_FAILURE() on output or use with
102 * function chaining. (See User Guide for details.)
103 * @return dest
104 * @draft ICU 4.6
105 */
106 virtual UnicodeString &
107 labelToASCII(const UnicodeString &label, UnicodeString &dest,
108 IDNAInfo &info, UErrorCode &errorCode) const = 0;
109
110 /**
111 * Converts a single domain name label into its Unicode form for human-readable display.
112 * If any processing step fails, then info.hasErrors() will be TRUE.
113 * The label might be modified according to the types of errors.
114 *
115 * The UErrorCode indicates an error only in exceptional cases,
116 * such as a U_MEMORY_ALLOCATION_ERROR.
117 *
118 * @param label Input domain name label
119 * @param dest Destination string object
120 * @param info Output container of IDNA processing details.
121 * @param errorCode Standard ICU error code. Its input value must
122 * pass the U_SUCCESS() test, or else the function returns
123 * immediately. Check for U_FAILURE() on output or use with
124 * function chaining. (See User Guide for details.)
125 * @return dest
126 * @draft ICU 4.6
127 */
128 virtual UnicodeString &
129 labelToUnicode(const UnicodeString &label, UnicodeString &dest,
130 IDNAInfo &info, UErrorCode &errorCode) const = 0;
131
132 /**
133 * Converts a whole domain name into its ASCII form for DNS lookup.
134 * If any processing step fails, then info.hasErrors() will be TRUE and
135 * the result might not be an ASCII string.
136 * The domain name might be modified according to the types of errors.
137 * Labels with severe errors will be left in (or turned into) their Unicode form.
138 *
139 * The UErrorCode indicates an error only in exceptional cases,
140 * such as a U_MEMORY_ALLOCATION_ERROR.
141 *
142 * @param name Input domain name
143 * @param dest Destination string object
144 * @param info Output container of IDNA processing details.
145 * @param errorCode Standard ICU error code. Its input value must
146 * pass the U_SUCCESS() test, or else the function returns
147 * immediately. Check for U_FAILURE() on output or use with
148 * function chaining. (See User Guide for details.)
149 * @return dest
150 * @draft ICU 4.6
151 */
152 virtual UnicodeString &
153 nameToASCII(const UnicodeString &name, UnicodeString &dest,
154 IDNAInfo &info, UErrorCode &errorCode) const = 0;
155
156 /**
157 * Converts a whole domain name into its Unicode form for human-readable display.
158 * If any processing step fails, then info.hasErrors() will be TRUE.
159 * The domain name might be modified according to the types of errors.
160 *
161 * The UErrorCode indicates an error only in exceptional cases,
162 * such as a U_MEMORY_ALLOCATION_ERROR.
163 *
164 * @param name Input domain name
165 * @param dest Destination string object
166 * @param info Output container of IDNA processing details.
167 * @param errorCode Standard ICU error code. Its input value must
168 * pass the U_SUCCESS() test, or else the function returns
169 * immediately. Check for U_FAILURE() on output or use with
170 * function chaining. (See User Guide for details.)
171 * @return dest
172 * @draft ICU 4.6
173 */
174 virtual UnicodeString &
175 nameToUnicode(const UnicodeString &name, UnicodeString &dest,
176 IDNAInfo &info, UErrorCode &errorCode) const = 0;
177
178 // UTF-8 versions of the processing methods ---------------------------- ***
179
180 /**
181 * Converts a single domain name label into its ASCII form for DNS lookup.
182 * UTF-8 version of labelToASCII(), same behavior.
183 *
184 * @param label Input domain name label
185 * @param dest Destination byte sink; Flush()ed if successful
186 * @param info Output container of IDNA processing details.
187 * @param errorCode Standard ICU error code. Its input value must
188 * pass the U_SUCCESS() test, or else the function returns
189 * immediately. Check for U_FAILURE() on output or use with
190 * function chaining. (See User Guide for details.)
191 * @return dest
192 * @draft ICU 4.6
193 */
194 virtual void
195 labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
196 IDNAInfo &info, UErrorCode &errorCode) const;
197
198 /**
199 * Converts a single domain name label into its Unicode form for human-readable display.
200 * UTF-8 version of labelToUnicode(), same behavior.
201 *
202 * @param label Input domain name label
203 * @param dest Destination byte sink; Flush()ed if successful
204 * @param info Output container of IDNA processing details.
205 * @param errorCode Standard ICU error code. Its input value must
206 * pass the U_SUCCESS() test, or else the function returns
207 * immediately. Check for U_FAILURE() on output or use with
208 * function chaining. (See User Guide for details.)
209 * @return dest
210 * @draft ICU 4.6
211 */
212 virtual void
213 labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
214 IDNAInfo &info, UErrorCode &errorCode) const;
215
216 /**
217 * Converts a whole domain name into its ASCII form for DNS lookup.
218 * UTF-8 version of nameToASCII(), same behavior.
219 *
220 * @param name Input domain name
221 * @param dest Destination byte sink; Flush()ed if successful
222 * @param info Output container of IDNA processing details.
223 * @param errorCode Standard ICU error code. Its input value must
224 * pass the U_SUCCESS() test, or else the function returns
225 * immediately. Check for U_FAILURE() on output or use with
226 * function chaining. (See User Guide for details.)
227 * @return dest
228 * @draft ICU 4.6
229 */
230 virtual void
231 nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
232 IDNAInfo &info, UErrorCode &errorCode) const;
233
234 /**
235 * Converts a whole domain name into its Unicode form for human-readable display.
236 * UTF-8 version of nameToUnicode(), same behavior.
237 *
238 * @param name Input domain name
239 * @param dest Destination byte sink; Flush()ed if successful
240 * @param info Output container of IDNA processing details.
241 * @param errorCode Standard ICU error code. Its input value must
242 * pass the U_SUCCESS() test, or else the function returns
243 * immediately. Check for U_FAILURE() on output or use with
244 * function chaining. (See User Guide for details.)
245 * @return dest
246 * @draft ICU 4.6
247 */
248 virtual void
249 nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
250 IDNAInfo &info, UErrorCode &errorCode) const;
251
252 private:
253 // No ICU "poor man's RTTI" for this class nor its subclasses.
254 virtual UClassID getDynamicClassID() const;
255 };
256
257 class UTS46;
258
259 /**
260 * Output container for IDNA processing errors.
261 * The IDNAInfo class is not suitable for subclassing.
262 * @draft ICU 4.6
263 */
264 class U_COMMON_API IDNAInfo : public UMemory {
265 public:
266 /**
267 * Constructor for stack allocation.
268 * @draft ICU 4.6
269 */
270 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
271 /**
272 * Were there IDNA processing errors?
273 * @return TRUE if there were processing errors
274 * @draft ICU 4.6
275 */
276 UBool hasErrors() const { return errors!=0; }
277 /**
278 * Returns a bit set indicating IDNA processing errors.
279 * See UIDNA_ERROR_... constants in uidna.h.
280 * @return bit set of processing errors
281 * @draft ICU 4.6
282 */
283 uint32_t getErrors() const { return errors; }
284 /**
285 * Returns TRUE if transitional and nontransitional processing produce different results.
286 * This is the case when the input label or domain name contains
287 * one or more deviation characters outside a Punycode label (see UTS #46).
288 * <ul>
289 * <li>With nontransitional processing, such characters are
290 * copied to the destination string.
291 * <li>With transitional processing, such characters are
292 * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
293 * </ul>
294 * @return TRUE if transitional and nontransitional processing produce different results
295 * @draft ICU 4.6
296 */
297 UBool isTransitionalDifferent() const { return isTransDiff; }
298
299 private:
300 friend class UTS46;
301
302 IDNAInfo(const IDNAInfo &other); // no copying
303 IDNAInfo &operator=(const IDNAInfo &other); // no copying
304
305 void reset() {
306 errors=labelErrors=0;
307 isTransDiff=FALSE;
308 isBiDi=FALSE;
309 isOkBiDi=TRUE;
310 }
311
312 uint32_t errors, labelErrors;
313 UBool isTransDiff;
314 UBool isBiDi;
315 UBool isOkBiDi;
316 };
317
318 U_NAMESPACE_END
319
320 #endif // UCONFIG_NO_IDNA
321 #endif // __IDNA_H__