]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/idna.h
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / unicode / idna.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
729e4ab9
A
3/*
4*******************************************************************************
4388f060 5* Copyright (C) 2010-2012, International Business Machines
729e4ab9
A
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* file name: idna.h
f3c0d7a5 9* encoding: UTF-8
729e4ab9
A
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2010mar05
14* created by: Markus W. Scherer
15*/
16
17#ifndef __IDNA_H__
18#define __IDNA_H__
19
20/**
21 * \file
22 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
23 */
24
25#include "unicode/utypes.h"
26
27#if !UCONFIG_NO_IDNA
28
29#include "unicode/bytestream.h"
30#include "unicode/stringpiece.h"
31#include "unicode/uidna.h"
32#include "unicode/unistr.h"
33
f3c0d7a5 34#if U_SHOW_CPLUSPLUS_API
729e4ab9
A
35U_NAMESPACE_BEGIN
36
51004dcb 37class IDNAInfo;
729e4ab9
A
38
39/**
40 * Abstract base class for IDNA processing.
41 * See http://www.unicode.org/reports/tr46/
42 * and http://www.ietf.org/rfc/rfc3490.txt
43 *
44 * The IDNA class is not intended for public subclassing.
45 *
46 * This C++ API currently only implements UTS #46.
47 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
48 * and IDNA2003 (functions that do not use a service object).
4388f060 49 * @stable ICU 4.6
729e4ab9
A
50 */
51class U_COMMON_API IDNA : public UObject {
52public:
4388f060
A
53 /**
54 * Destructor.
55 * @stable ICU 4.6
56 */
57 ~IDNA();
58
729e4ab9
A
59 /**
60 * Returns an IDNA instance which implements UTS #46.
61 * Returns an unmodifiable instance, owned by the caller.
62 * Cache it for multiple operations, and delete it when done.
63 * The instance is thread-safe, that is, it can be used concurrently.
64 *
65 * UTS #46 defines Unicode IDNA Compatibility Processing,
66 * updated to the latest version of Unicode and compatible with both
67 * IDNA2003 and IDNA2008.
68 *
69 * The worker functions use transitional processing, including deviation mappings,
70 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
71 * is used in which case the deviation characters are passed through without change.
72 *
73 * Disallowed characters are mapped to U+FFFD.
74 *
75 * For available options see the uidna.h header.
76 * Operations with the UTS #46 instance do not support the
77 * UIDNA_ALLOW_UNASSIGNED option.
78 *
79 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
80 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
81 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
82 *
83 * @param options Bit set to modify the processing and error checking.
84 * See option bit set values in uidna.h.
85 * @param errorCode Standard ICU error code. Its input value must
86 * pass the U_SUCCESS() test, or else the function returns
87 * immediately. Check for U_FAILURE() on output or use with
88 * function chaining. (See User Guide for details.)
89 * @return the UTS #46 IDNA instance, if successful
4388f060 90 * @stable ICU 4.6
729e4ab9
A
91 */
92 static IDNA *
93 createUTS46Instance(uint32_t options, UErrorCode &errorCode);
94
95 /**
96 * Converts a single domain name label into its ASCII form for DNS lookup.
97 * If any processing step fails, then info.hasErrors() will be TRUE and
98 * the result might not be an ASCII string.
99 * The label might be modified according to the types of errors.
100 * Labels with severe errors will be left in (or turned into) their Unicode form.
101 *
102 * The UErrorCode indicates an error only in exceptional cases,
103 * such as a U_MEMORY_ALLOCATION_ERROR.
104 *
105 * @param label Input domain name label
106 * @param dest Destination string object
107 * @param info Output container of IDNA processing details.
108 * @param errorCode Standard ICU error code. Its input value must
109 * pass the U_SUCCESS() test, or else the function returns
110 * immediately. Check for U_FAILURE() on output or use with
111 * function chaining. (See User Guide for details.)
112 * @return dest
4388f060 113 * @stable ICU 4.6
729e4ab9
A
114 */
115 virtual UnicodeString &
116 labelToASCII(const UnicodeString &label, UnicodeString &dest,
117 IDNAInfo &info, UErrorCode &errorCode) const = 0;
118
119 /**
120 * Converts a single domain name label into its Unicode form for human-readable display.
121 * If any processing step fails, then info.hasErrors() will be TRUE.
122 * The label might be modified according to the types of errors.
123 *
124 * The UErrorCode indicates an error only in exceptional cases,
125 * such as a U_MEMORY_ALLOCATION_ERROR.
126 *
127 * @param label Input domain name label
128 * @param dest Destination string object
129 * @param info Output container of IDNA processing details.
130 * @param errorCode Standard ICU error code. Its input value must
131 * pass the U_SUCCESS() test, or else the function returns
132 * immediately. Check for U_FAILURE() on output or use with
133 * function chaining. (See User Guide for details.)
134 * @return dest
4388f060 135 * @stable ICU 4.6
729e4ab9
A
136 */
137 virtual UnicodeString &
138 labelToUnicode(const UnicodeString &label, UnicodeString &dest,
139 IDNAInfo &info, UErrorCode &errorCode) const = 0;
140
141 /**
142 * Converts a whole domain name into its ASCII form for DNS lookup.
143 * If any processing step fails, then info.hasErrors() will be TRUE and
144 * the result might not be an ASCII string.
145 * The domain name might be modified according to the types of errors.
146 * Labels with severe errors will be left in (or turned into) their Unicode form.
147 *
148 * The UErrorCode indicates an error only in exceptional cases,
149 * such as a U_MEMORY_ALLOCATION_ERROR.
150 *
151 * @param name Input domain name
152 * @param dest Destination string object
153 * @param info Output container of IDNA processing details.
154 * @param errorCode Standard ICU error code. Its input value must
155 * pass the U_SUCCESS() test, or else the function returns
156 * immediately. Check for U_FAILURE() on output or use with
157 * function chaining. (See User Guide for details.)
158 * @return dest
4388f060 159 * @stable ICU 4.6
729e4ab9
A
160 */
161 virtual UnicodeString &
162 nameToASCII(const UnicodeString &name, UnicodeString &dest,
163 IDNAInfo &info, UErrorCode &errorCode) const = 0;
164
165 /**
166 * Converts a whole domain name into its Unicode form for human-readable display.
167 * If any processing step fails, then info.hasErrors() will be TRUE.
168 * The domain name might be modified according to the types of errors.
169 *
170 * The UErrorCode indicates an error only in exceptional cases,
171 * such as a U_MEMORY_ALLOCATION_ERROR.
172 *
173 * @param name Input domain name
174 * @param dest Destination string object
175 * @param info Output container of IDNA processing details.
176 * @param errorCode Standard ICU error code. Its input value must
177 * pass the U_SUCCESS() test, or else the function returns
178 * immediately. Check for U_FAILURE() on output or use with
179 * function chaining. (See User Guide for details.)
180 * @return dest
4388f060 181 * @stable ICU 4.6
729e4ab9
A
182 */
183 virtual UnicodeString &
184 nameToUnicode(const UnicodeString &name, UnicodeString &dest,
185 IDNAInfo &info, UErrorCode &errorCode) const = 0;
186
187 // UTF-8 versions of the processing methods ---------------------------- ***
188
189 /**
190 * Converts a single domain name label into its ASCII form for DNS lookup.
191 * UTF-8 version of labelToASCII(), same behavior.
192 *
193 * @param label Input domain name label
194 * @param dest Destination byte sink; Flush()ed if successful
195 * @param info Output container of IDNA processing details.
196 * @param errorCode Standard ICU error code. Its input value must
197 * pass the U_SUCCESS() test, or else the function returns
198 * immediately. Check for U_FAILURE() on output or use with
199 * function chaining. (See User Guide for details.)
200 * @return dest
4388f060 201 * @stable ICU 4.6
729e4ab9
A
202 */
203 virtual void
f3c0d7a5 204 labelToASCII_UTF8(StringPiece label, ByteSink &dest,
729e4ab9
A
205 IDNAInfo &info, UErrorCode &errorCode) const;
206
207 /**
208 * Converts a single domain name label into its Unicode form for human-readable display.
209 * UTF-8 version of labelToUnicode(), same behavior.
210 *
211 * @param label Input domain name label
212 * @param dest Destination byte sink; Flush()ed if successful
213 * @param info Output container of IDNA processing details.
214 * @param errorCode Standard ICU error code. Its input value must
215 * pass the U_SUCCESS() test, or else the function returns
216 * immediately. Check for U_FAILURE() on output or use with
217 * function chaining. (See User Guide for details.)
218 * @return dest
4388f060 219 * @stable ICU 4.6
729e4ab9
A
220 */
221 virtual void
f3c0d7a5 222 labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
729e4ab9
A
223 IDNAInfo &info, UErrorCode &errorCode) const;
224
225 /**
226 * Converts a whole domain name into its ASCII form for DNS lookup.
227 * UTF-8 version of nameToASCII(), same behavior.
228 *
229 * @param name Input domain name
230 * @param dest Destination byte sink; Flush()ed if successful
231 * @param info Output container of IDNA processing details.
232 * @param errorCode Standard ICU error code. Its input value must
233 * pass the U_SUCCESS() test, or else the function returns
234 * immediately. Check for U_FAILURE() on output or use with
235 * function chaining. (See User Guide for details.)
236 * @return dest
4388f060 237 * @stable ICU 4.6
729e4ab9
A
238 */
239 virtual void
f3c0d7a5 240 nameToASCII_UTF8(StringPiece name, ByteSink &dest,
729e4ab9
A
241 IDNAInfo &info, UErrorCode &errorCode) const;
242
243 /**
244 * Converts a whole domain name into its Unicode form for human-readable display.
245 * UTF-8 version of nameToUnicode(), same behavior.
246 *
247 * @param name Input domain name
248 * @param dest Destination byte sink; Flush()ed if successful
249 * @param info Output container of IDNA processing details.
250 * @param errorCode Standard ICU error code. Its input value must
251 * pass the U_SUCCESS() test, or else the function returns
252 * immediately. Check for U_FAILURE() on output or use with
253 * function chaining. (See User Guide for details.)
254 * @return dest
4388f060 255 * @stable ICU 4.6
729e4ab9
A
256 */
257 virtual void
f3c0d7a5 258 nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
729e4ab9 259 IDNAInfo &info, UErrorCode &errorCode) const;
729e4ab9
A
260};
261
262class UTS46;
263
264/**
265 * Output container for IDNA processing errors.
266 * The IDNAInfo class is not suitable for subclassing.
4388f060 267 * @stable ICU 4.6
729e4ab9
A
268 */
269class U_COMMON_API IDNAInfo : public UMemory {
270public:
271 /**
272 * Constructor for stack allocation.
4388f060 273 * @stable ICU 4.6
729e4ab9
A
274 */
275 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
276 /**
277 * Were there IDNA processing errors?
278 * @return TRUE if there were processing errors
4388f060 279 * @stable ICU 4.6
729e4ab9
A
280 */
281 UBool hasErrors() const { return errors!=0; }
282 /**
283 * Returns a bit set indicating IDNA processing errors.
284 * See UIDNA_ERROR_... constants in uidna.h.
285 * @return bit set of processing errors
4388f060 286 * @stable ICU 4.6
729e4ab9
A
287 */
288 uint32_t getErrors() const { return errors; }
289 /**
290 * Returns TRUE if transitional and nontransitional processing produce different results.
291 * This is the case when the input label or domain name contains
292 * one or more deviation characters outside a Punycode label (see UTS #46).
293 * <ul>
294 * <li>With nontransitional processing, such characters are
295 * copied to the destination string.
296 * <li>With transitional processing, such characters are
297 * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
298 * </ul>
299 * @return TRUE if transitional and nontransitional processing produce different results
4388f060 300 * @stable ICU 4.6
729e4ab9
A
301 */
302 UBool isTransitionalDifferent() const { return isTransDiff; }
303
304private:
305 friend class UTS46;
306
307 IDNAInfo(const IDNAInfo &other); // no copying
308 IDNAInfo &operator=(const IDNAInfo &other); // no copying
309
310 void reset() {
311 errors=labelErrors=0;
312 isTransDiff=FALSE;
313 isBiDi=FALSE;
314 isOkBiDi=TRUE;
315 }
316
317 uint32_t errors, labelErrors;
318 UBool isTransDiff;
319 UBool isBiDi;
320 UBool isOkBiDi;
321};
322
323U_NAMESPACE_END
f3c0d7a5 324#endif // U_SHOW_CPLUSPLUS_API
729e4ab9
A
325
326#endif // UCONFIG_NO_IDNA
327#endif // __IDNA_H__