]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/idna.h
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / common / unicode / idna.h
CommitLineData
729e4ab9
A
1/*
2*******************************************************************************
4388f060 3* Copyright (C) 2010-2012, International Business Machines
729e4ab9
A
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* file name: idna.h
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* created on: 2010mar05
12* created by: Markus W. Scherer
13*/
14
15#ifndef __IDNA_H__
16#define __IDNA_H__
17
18/**
19 * \file
20 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
21 */
22
23#include "unicode/utypes.h"
24
25#if !UCONFIG_NO_IDNA
26
27#include "unicode/bytestream.h"
28#include "unicode/stringpiece.h"
29#include "unicode/uidna.h"
30#include "unicode/unistr.h"
31
32U_NAMESPACE_BEGIN
33
34class U_COMMON_API IDNAInfo;
35
36/**
37 * Abstract base class for IDNA processing.
38 * See http://www.unicode.org/reports/tr46/
39 * and http://www.ietf.org/rfc/rfc3490.txt
40 *
41 * The IDNA class is not intended for public subclassing.
42 *
43 * This C++ API currently only implements UTS #46.
44 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
45 * and IDNA2003 (functions that do not use a service object).
4388f060 46 * @stable ICU 4.6
729e4ab9
A
47 */
48class U_COMMON_API IDNA : public UObject {
49public:
4388f060
A
50 /**
51 * Destructor.
52 * @stable ICU 4.6
53 */
54 ~IDNA();
55
729e4ab9
A
56 /**
57 * Returns an IDNA instance which implements UTS #46.
58 * Returns an unmodifiable instance, owned by the caller.
59 * Cache it for multiple operations, and delete it when done.
60 * The instance is thread-safe, that is, it can be used concurrently.
61 *
62 * UTS #46 defines Unicode IDNA Compatibility Processing,
63 * updated to the latest version of Unicode and compatible with both
64 * IDNA2003 and IDNA2008.
65 *
66 * The worker functions use transitional processing, including deviation mappings,
67 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
68 * is used in which case the deviation characters are passed through without change.
69 *
70 * Disallowed characters are mapped to U+FFFD.
71 *
72 * For available options see the uidna.h header.
73 * Operations with the UTS #46 instance do not support the
74 * UIDNA_ALLOW_UNASSIGNED option.
75 *
76 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
77 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
78 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
79 *
80 * @param options Bit set to modify the processing and error checking.
81 * See option bit set values in uidna.h.
82 * @param errorCode Standard ICU error code. Its input value must
83 * pass the U_SUCCESS() test, or else the function returns
84 * immediately. Check for U_FAILURE() on output or use with
85 * function chaining. (See User Guide for details.)
86 * @return the UTS #46 IDNA instance, if successful
4388f060 87 * @stable ICU 4.6
729e4ab9
A
88 */
89 static IDNA *
90 createUTS46Instance(uint32_t options, UErrorCode &errorCode);
91
92 /**
93 * Converts a single domain name label into its ASCII form for DNS lookup.
94 * If any processing step fails, then info.hasErrors() will be TRUE and
95 * the result might not be an ASCII string.
96 * The label might be modified according to the types of errors.
97 * Labels with severe errors will be left in (or turned into) their Unicode form.
98 *
99 * The UErrorCode indicates an error only in exceptional cases,
100 * such as a U_MEMORY_ALLOCATION_ERROR.
101 *
102 * @param label Input domain name label
103 * @param dest Destination string object
104 * @param info Output container of IDNA processing details.
105 * @param errorCode Standard ICU error code. Its input value must
106 * pass the U_SUCCESS() test, or else the function returns
107 * immediately. Check for U_FAILURE() on output or use with
108 * function chaining. (See User Guide for details.)
109 * @return dest
4388f060 110 * @stable ICU 4.6
729e4ab9
A
111 */
112 virtual UnicodeString &
113 labelToASCII(const UnicodeString &label, UnicodeString &dest,
114 IDNAInfo &info, UErrorCode &errorCode) const = 0;
115
116 /**
117 * Converts a single domain name label into its Unicode form for human-readable display.
118 * If any processing step fails, then info.hasErrors() will be TRUE.
119 * The label might be modified according to the types of errors.
120 *
121 * The UErrorCode indicates an error only in exceptional cases,
122 * such as a U_MEMORY_ALLOCATION_ERROR.
123 *
124 * @param label Input domain name label
125 * @param dest Destination string object
126 * @param info Output container of IDNA processing details.
127 * @param errorCode Standard ICU error code. Its input value must
128 * pass the U_SUCCESS() test, or else the function returns
129 * immediately. Check for U_FAILURE() on output or use with
130 * function chaining. (See User Guide for details.)
131 * @return dest
4388f060 132 * @stable ICU 4.6
729e4ab9
A
133 */
134 virtual UnicodeString &
135 labelToUnicode(const UnicodeString &label, UnicodeString &dest,
136 IDNAInfo &info, UErrorCode &errorCode) const = 0;
137
138 /**
139 * Converts a whole domain name into its ASCII form for DNS lookup.
140 * If any processing step fails, then info.hasErrors() will be TRUE and
141 * the result might not be an ASCII string.
142 * The domain name might be modified according to the types of errors.
143 * Labels with severe errors will be left in (or turned into) their Unicode form.
144 *
145 * The UErrorCode indicates an error only in exceptional cases,
146 * such as a U_MEMORY_ALLOCATION_ERROR.
147 *
148 * @param name Input domain name
149 * @param dest Destination string object
150 * @param info Output container of IDNA processing details.
151 * @param errorCode Standard ICU error code. Its input value must
152 * pass the U_SUCCESS() test, or else the function returns
153 * immediately. Check for U_FAILURE() on output or use with
154 * function chaining. (See User Guide for details.)
155 * @return dest
4388f060 156 * @stable ICU 4.6
729e4ab9
A
157 */
158 virtual UnicodeString &
159 nameToASCII(const UnicodeString &name, UnicodeString &dest,
160 IDNAInfo &info, UErrorCode &errorCode) const = 0;
161
162 /**
163 * Converts a whole domain name into its Unicode form for human-readable display.
164 * If any processing step fails, then info.hasErrors() will be TRUE.
165 * The domain name might be modified according to the types of errors.
166 *
167 * The UErrorCode indicates an error only in exceptional cases,
168 * such as a U_MEMORY_ALLOCATION_ERROR.
169 *
170 * @param name Input domain name
171 * @param dest Destination string object
172 * @param info Output container of IDNA processing details.
173 * @param errorCode Standard ICU error code. Its input value must
174 * pass the U_SUCCESS() test, or else the function returns
175 * immediately. Check for U_FAILURE() on output or use with
176 * function chaining. (See User Guide for details.)
177 * @return dest
4388f060 178 * @stable ICU 4.6
729e4ab9
A
179 */
180 virtual UnicodeString &
181 nameToUnicode(const UnicodeString &name, UnicodeString &dest,
182 IDNAInfo &info, UErrorCode &errorCode) const = 0;
183
184 // UTF-8 versions of the processing methods ---------------------------- ***
185
186 /**
187 * Converts a single domain name label into its ASCII form for DNS lookup.
188 * UTF-8 version of labelToASCII(), same behavior.
189 *
190 * @param label Input domain name label
191 * @param dest Destination byte sink; Flush()ed if successful
192 * @param info Output container of IDNA processing details.
193 * @param errorCode Standard ICU error code. Its input value must
194 * pass the U_SUCCESS() test, or else the function returns
195 * immediately. Check for U_FAILURE() on output or use with
196 * function chaining. (See User Guide for details.)
197 * @return dest
4388f060 198 * @stable ICU 4.6
729e4ab9
A
199 */
200 virtual void
201 labelToASCII_UTF8(const StringPiece &label, ByteSink &dest,
202 IDNAInfo &info, UErrorCode &errorCode) const;
203
204 /**
205 * Converts a single domain name label into its Unicode form for human-readable display.
206 * UTF-8 version of labelToUnicode(), same behavior.
207 *
208 * @param label Input domain name label
209 * @param dest Destination byte sink; Flush()ed if successful
210 * @param info Output container of IDNA processing details.
211 * @param errorCode Standard ICU error code. Its input value must
212 * pass the U_SUCCESS() test, or else the function returns
213 * immediately. Check for U_FAILURE() on output or use with
214 * function chaining. (See User Guide for details.)
215 * @return dest
4388f060 216 * @stable ICU 4.6
729e4ab9
A
217 */
218 virtual void
219 labelToUnicodeUTF8(const StringPiece &label, ByteSink &dest,
220 IDNAInfo &info, UErrorCode &errorCode) const;
221
222 /**
223 * Converts a whole domain name into its ASCII form for DNS lookup.
224 * UTF-8 version of nameToASCII(), same behavior.
225 *
226 * @param name Input domain name
227 * @param dest Destination byte sink; Flush()ed if successful
228 * @param info Output container of IDNA processing details.
229 * @param errorCode Standard ICU error code. Its input value must
230 * pass the U_SUCCESS() test, or else the function returns
231 * immediately. Check for U_FAILURE() on output or use with
232 * function chaining. (See User Guide for details.)
233 * @return dest
4388f060 234 * @stable ICU 4.6
729e4ab9
A
235 */
236 virtual void
237 nameToASCII_UTF8(const StringPiece &name, ByteSink &dest,
238 IDNAInfo &info, UErrorCode &errorCode) const;
239
240 /**
241 * Converts a whole domain name into its Unicode form for human-readable display.
242 * UTF-8 version of nameToUnicode(), same behavior.
243 *
244 * @param name Input domain name
245 * @param dest Destination byte sink; Flush()ed if successful
246 * @param info Output container of IDNA processing details.
247 * @param errorCode Standard ICU error code. Its input value must
248 * pass the U_SUCCESS() test, or else the function returns
249 * immediately. Check for U_FAILURE() on output or use with
250 * function chaining. (See User Guide for details.)
251 * @return dest
4388f060 252 * @stable ICU 4.6
729e4ab9
A
253 */
254 virtual void
255 nameToUnicodeUTF8(const StringPiece &name, ByteSink &dest,
256 IDNAInfo &info, UErrorCode &errorCode) const;
257
258private:
259 // No ICU "poor man's RTTI" for this class nor its subclasses.
260 virtual UClassID getDynamicClassID() const;
261};
262
263class UTS46;
264
265/**
266 * Output container for IDNA processing errors.
267 * The IDNAInfo class is not suitable for subclassing.
4388f060 268 * @stable ICU 4.6
729e4ab9
A
269 */
270class U_COMMON_API IDNAInfo : public UMemory {
271public:
272 /**
273 * Constructor for stack allocation.
4388f060 274 * @stable ICU 4.6
729e4ab9
A
275 */
276 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
277 /**
278 * Were there IDNA processing errors?
279 * @return TRUE if there were processing errors
4388f060 280 * @stable ICU 4.6
729e4ab9
A
281 */
282 UBool hasErrors() const { return errors!=0; }
283 /**
284 * Returns a bit set indicating IDNA processing errors.
285 * See UIDNA_ERROR_... constants in uidna.h.
286 * @return bit set of processing errors
4388f060 287 * @stable ICU 4.6
729e4ab9
A
288 */
289 uint32_t getErrors() const { return errors; }
290 /**
291 * Returns TRUE if transitional and nontransitional processing produce different results.
292 * This is the case when the input label or domain name contains
293 * one or more deviation characters outside a Punycode label (see UTS #46).
294 * <ul>
295 * <li>With nontransitional processing, such characters are
296 * copied to the destination string.
297 * <li>With transitional processing, such characters are
298 * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
299 * </ul>
300 * @return TRUE if transitional and nontransitional processing produce different results
4388f060 301 * @stable ICU 4.6
729e4ab9
A
302 */
303 UBool isTransitionalDifferent() const { return isTransDiff; }
304
305private:
306 friend class UTS46;
307
308 IDNAInfo(const IDNAInfo &other); // no copying
309 IDNAInfo &operator=(const IDNAInfo &other); // no copying
310
311 void reset() {
312 errors=labelErrors=0;
313 isTransDiff=FALSE;
314 isBiDi=FALSE;
315 isOkBiDi=TRUE;
316 }
317
318 uint32_t errors, labelErrors;
319 UBool isTransDiff;
320 UBool isBiDi;
321 UBool isOkBiDi;
322};
323
324U_NAMESPACE_END
325
326#endif // UCONFIG_NO_IDNA
327#endif // __IDNA_H__