]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/unicode/uidna.h
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / common / unicode / uidna.h
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uidna.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17 #ifndef __UIDNA_H__
18 #define __UIDNA_H__
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_IDNA
23
24 #include "unicode/localpointer.h"
25 #include "unicode/parseerr.h"
26
27 /**
28 * \file
29 * \brief C API: Internationalizing Domain Names in Applications (IDNA)
30 *
31 * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
32 *
33 * The C API functions which do take a UIDNA * service object pointer
34 * implement UTS #46 and IDNA2008.
35 *
36 * IDNA2003 is obsolete.
37 * The C API functions which do not take a service object pointer
38 * implement IDNA2003. They are all deprecated.
39 */
40
41 /*
42 * IDNA option bit set values.
43 */
44 enum {
45 /**
46 * Default options value: None of the other options are set.
47 * For use in static worker and factory methods.
48 * @stable ICU 2.6
49 */
50 UIDNA_DEFAULT=0,
51 #ifndef U_HIDE_DEPRECATED_API
52 /**
53 * Option to allow unassigned code points in domain names and labels.
54 * For use in static worker and factory methods.
55 * <p>This option is ignored by the UTS46 implementation.
56 * (UTS #46 disallows unassigned code points.)
57 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
58 */
59 UIDNA_ALLOW_UNASSIGNED=1,
60 #endif /* U_HIDE_DEPRECATED_API */
61 /**
62 * Option to check whether the input conforms to the STD3 ASCII rules,
63 * for example the restriction of labels to LDH characters
64 * (ASCII Letters, Digits and Hyphen-Minus).
65 * For use in static worker and factory methods.
66 * @stable ICU 2.6
67 */
68 UIDNA_USE_STD3_RULES=2,
69 /**
70 * IDNA option to check for whether the input conforms to the BiDi rules.
71 * For use in static worker and factory methods.
72 * <p>This option is ignored by the IDNA2003 implementation.
73 * (IDNA2003 always performs a BiDi check.)
74 * @stable ICU 4.6
75 */
76 UIDNA_CHECK_BIDI=4,
77 /**
78 * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
79 * For use in static worker and factory methods.
80 * <p>This option is ignored by the IDNA2003 implementation.
81 * (The CONTEXTJ check is new in IDNA2008.)
82 * @stable ICU 4.6
83 */
84 UIDNA_CHECK_CONTEXTJ=8,
85 /**
86 * IDNA option for nontransitional processing in ToASCII().
87 * For use in static worker and factory methods.
88 * <p>By default, ToASCII() uses transitional processing.
89 * <p>This option is ignored by the IDNA2003 implementation.
90 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
91 * @stable ICU 4.6
92 */
93 UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
94 /**
95 * IDNA option for nontransitional processing in ToUnicode().
96 * For use in static worker and factory methods.
97 * <p>By default, ToUnicode() uses transitional processing.
98 * <p>This option is ignored by the IDNA2003 implementation.
99 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
100 * @stable ICU 4.6
101 */
102 UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
103 /**
104 * IDNA option to check for whether the input conforms to the CONTEXTO rules.
105 * For use in static worker and factory methods.
106 * <p>This option is ignored by the IDNA2003 implementation.
107 * (The CONTEXTO check is new in IDNA2008.)
108 * <p>This is for use by registries for IDNA2008 conformance.
109 * UTS #46 does not require the CONTEXTO check.
110 * @stable ICU 49
111 */
112 UIDNA_CHECK_CONTEXTO=0x40
113 };
114
115 /**
116 * Opaque C service object type for the new IDNA API.
117 * @stable ICU 4.6
118 */
119 struct UIDNA;
120 typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
121
122 /**
123 * Returns a UIDNA instance which implements UTS #46.
124 * Returns an unmodifiable instance, owned by the caller.
125 * Cache it for multiple operations, and uidna_close() it when done.
126 * The instance is thread-safe, that is, it can be used concurrently.
127 *
128 * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
129 *
130 * @param options Bit set to modify the processing and error checking.
131 * See option bit set values in uidna.h.
132 * @param pErrorCode Standard ICU error code. Its input value must
133 * pass the U_SUCCESS() test, or else the function returns
134 * immediately. Check for U_FAILURE() on output or use with
135 * function chaining. (See User Guide for details.)
136 * @return the UTS #46 UIDNA instance, if successful
137 * @stable ICU 4.6
138 */
139 U_STABLE UIDNA * U_EXPORT2
140 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
141
142 /**
143 * Closes a UIDNA instance.
144 * @param idna UIDNA instance to be closed
145 * @stable ICU 4.6
146 */
147 U_STABLE void U_EXPORT2
148 uidna_close(UIDNA *idna);
149
150 #if U_SHOW_CPLUSPLUS_API
151
152 U_NAMESPACE_BEGIN
153
154 /**
155 * \class LocalUIDNAPointer
156 * "Smart pointer" class, closes a UIDNA via uidna_close().
157 * For most methods see the LocalPointerBase base class.
158 *
159 * @see LocalPointerBase
160 * @see LocalPointer
161 * @stable ICU 4.6
162 */
163 U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
164
165 U_NAMESPACE_END
166
167 #endif
168
169 /**
170 * Output container for IDNA processing errors.
171 * Initialize with UIDNA_INFO_INITIALIZER:
172 * \code
173 * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
174 * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
175 * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
176 * \endcode
177 * @stable ICU 4.6
178 */
179 typedef struct UIDNAInfo {
180 /** sizeof(UIDNAInfo) @stable ICU 4.6 */
181 int16_t size;
182 /**
183 * Set to TRUE if transitional and nontransitional processing produce different results.
184 * For details see C++ IDNAInfo::isTransitionalDifferent().
185 * @stable ICU 4.6
186 */
187 UBool isTransitionalDifferent;
188 UBool reservedB3; /**< Reserved field, do not use. @internal */
189 /**
190 * Bit set indicating IDNA processing errors. 0 if no errors.
191 * See UIDNA_ERROR_... constants.
192 * @stable ICU 4.6
193 */
194 uint32_t errors;
195 int32_t reservedI2; /**< Reserved field, do not use. @internal */
196 int32_t reservedI3; /**< Reserved field, do not use. @internal */
197 } UIDNAInfo;
198
199 /**
200 * Static initializer for a UIDNAInfo struct.
201 * @stable ICU 4.6
202 */
203 #define UIDNA_INFO_INITIALIZER { \
204 (int16_t)sizeof(UIDNAInfo), \
205 FALSE, FALSE, \
206 0, 0, 0 }
207
208 /**
209 * Converts a single domain name label into its ASCII form for DNS lookup.
210 * If any processing step fails, then pInfo->errors will be non-zero and
211 * the result might not be an ASCII string.
212 * The label might be modified according to the types of errors.
213 * Labels with severe errors will be left in (or turned into) their Unicode form.
214 *
215 * The UErrorCode indicates an error only in exceptional cases,
216 * such as a U_MEMORY_ALLOCATION_ERROR.
217 *
218 * @param idna UIDNA instance
219 * @param label Input domain name label
220 * @param length Label length, or -1 if NUL-terminated
221 * @param dest Destination string buffer
222 * @param capacity Destination buffer capacity
223 * @param pInfo Output container of IDNA processing details.
224 * @param pErrorCode Standard ICU error code. Its input value must
225 * pass the U_SUCCESS() test, or else the function returns
226 * immediately. Check for U_FAILURE() on output or use with
227 * function chaining. (See User Guide for details.)
228 * @return destination string length
229 * @stable ICU 4.6
230 */
231 U_STABLE int32_t U_EXPORT2
232 uidna_labelToASCII(const UIDNA *idna,
233 const UChar *label, int32_t length,
234 UChar *dest, int32_t capacity,
235 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
236
237 /**
238 * Converts a single domain name label into its Unicode form for human-readable display.
239 * If any processing step fails, then pInfo->errors will be non-zero.
240 * The label might be modified according to the types of errors.
241 *
242 * The UErrorCode indicates an error only in exceptional cases,
243 * such as a U_MEMORY_ALLOCATION_ERROR.
244 *
245 * @param idna UIDNA instance
246 * @param label Input domain name label
247 * @param length Label length, or -1 if NUL-terminated
248 * @param dest Destination string buffer
249 * @param capacity Destination buffer capacity
250 * @param pInfo Output container of IDNA processing details.
251 * @param pErrorCode Standard ICU error code. Its input value must
252 * pass the U_SUCCESS() test, or else the function returns
253 * immediately. Check for U_FAILURE() on output or use with
254 * function chaining. (See User Guide for details.)
255 * @return destination string length
256 * @stable ICU 4.6
257 */
258 U_STABLE int32_t U_EXPORT2
259 uidna_labelToUnicode(const UIDNA *idna,
260 const UChar *label, int32_t length,
261 UChar *dest, int32_t capacity,
262 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
263
264 /**
265 * Converts a whole domain name into its ASCII form for DNS lookup.
266 * If any processing step fails, then pInfo->errors will be non-zero and
267 * the result might not be an ASCII string.
268 * The domain name might be modified according to the types of errors.
269 * Labels with severe errors will be left in (or turned into) their Unicode form.
270 *
271 * The UErrorCode indicates an error only in exceptional cases,
272 * such as a U_MEMORY_ALLOCATION_ERROR.
273 *
274 * @param idna UIDNA instance
275 * @param name Input domain name
276 * @param length Domain name length, or -1 if NUL-terminated
277 * @param dest Destination string buffer
278 * @param capacity Destination buffer capacity
279 * @param pInfo Output container of IDNA processing details.
280 * @param pErrorCode Standard ICU error code. Its input value must
281 * pass the U_SUCCESS() test, or else the function returns
282 * immediately. Check for U_FAILURE() on output or use with
283 * function chaining. (See User Guide for details.)
284 * @return destination string length
285 * @stable ICU 4.6
286 */
287 U_STABLE int32_t U_EXPORT2
288 uidna_nameToASCII(const UIDNA *idna,
289 const UChar *name, int32_t length,
290 UChar *dest, int32_t capacity,
291 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
292
293 /**
294 * Converts a whole domain name into its Unicode form for human-readable display.
295 * If any processing step fails, then pInfo->errors will be non-zero.
296 * The domain name might be modified according to the types of errors.
297 *
298 * The UErrorCode indicates an error only in exceptional cases,
299 * such as a U_MEMORY_ALLOCATION_ERROR.
300 *
301 * @param idna UIDNA instance
302 * @param name Input domain name
303 * @param length Domain name length, or -1 if NUL-terminated
304 * @param dest Destination string buffer
305 * @param capacity Destination buffer capacity
306 * @param pInfo Output container of IDNA processing details.
307 * @param pErrorCode Standard ICU error code. Its input value must
308 * pass the U_SUCCESS() test, or else the function returns
309 * immediately. Check for U_FAILURE() on output or use with
310 * function chaining. (See User Guide for details.)
311 * @return destination string length
312 * @stable ICU 4.6
313 */
314 U_STABLE int32_t U_EXPORT2
315 uidna_nameToUnicode(const UIDNA *idna,
316 const UChar *name, int32_t length,
317 UChar *dest, int32_t capacity,
318 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
319
320 /* UTF-8 versions of the processing methods --------------------------------- */
321
322 /**
323 * Converts a single domain name label into its ASCII form for DNS lookup.
324 * UTF-8 version of uidna_labelToASCII(), same behavior.
325 *
326 * @param idna UIDNA instance
327 * @param label Input domain name label
328 * @param length Label length, or -1 if NUL-terminated
329 * @param dest Destination string buffer
330 * @param capacity Destination buffer capacity
331 * @param pInfo Output container of IDNA processing details.
332 * @param pErrorCode Standard ICU error code. Its input value must
333 * pass the U_SUCCESS() test, or else the function returns
334 * immediately. Check for U_FAILURE() on output or use with
335 * function chaining. (See User Guide for details.)
336 * @return destination string length
337 * @stable ICU 4.6
338 */
339 U_STABLE int32_t U_EXPORT2
340 uidna_labelToASCII_UTF8(const UIDNA *idna,
341 const char *label, int32_t length,
342 char *dest, int32_t capacity,
343 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
344
345 /**
346 * Converts a single domain name label into its Unicode form for human-readable display.
347 * UTF-8 version of uidna_labelToUnicode(), same behavior.
348 *
349 * @param idna UIDNA instance
350 * @param label Input domain name label
351 * @param length Label length, or -1 if NUL-terminated
352 * @param dest Destination string buffer
353 * @param capacity Destination buffer capacity
354 * @param pInfo Output container of IDNA processing details.
355 * @param pErrorCode Standard ICU error code. Its input value must
356 * pass the U_SUCCESS() test, or else the function returns
357 * immediately. Check for U_FAILURE() on output or use with
358 * function chaining. (See User Guide for details.)
359 * @return destination string length
360 * @stable ICU 4.6
361 */
362 U_STABLE int32_t U_EXPORT2
363 uidna_labelToUnicodeUTF8(const UIDNA *idna,
364 const char *label, int32_t length,
365 char *dest, int32_t capacity,
366 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
367
368 /**
369 * Converts a whole domain name into its ASCII form for DNS lookup.
370 * UTF-8 version of uidna_nameToASCII(), same behavior.
371 *
372 * @param idna UIDNA instance
373 * @param name Input domain name
374 * @param length Domain name length, or -1 if NUL-terminated
375 * @param dest Destination string buffer
376 * @param capacity Destination buffer capacity
377 * @param pInfo Output container of IDNA processing details.
378 * @param pErrorCode Standard ICU error code. Its input value must
379 * pass the U_SUCCESS() test, or else the function returns
380 * immediately. Check for U_FAILURE() on output or use with
381 * function chaining. (See User Guide for details.)
382 * @return destination string length
383 * @stable ICU 4.6
384 */
385 U_STABLE int32_t U_EXPORT2
386 uidna_nameToASCII_UTF8(const UIDNA *idna,
387 const char *name, int32_t length,
388 char *dest, int32_t capacity,
389 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
390
391 /**
392 * Converts a whole domain name into its Unicode form for human-readable display.
393 * UTF-8 version of uidna_nameToUnicode(), same behavior.
394 *
395 * @param idna UIDNA instance
396 * @param name Input domain name
397 * @param length Domain name length, or -1 if NUL-terminated
398 * @param dest Destination string buffer
399 * @param capacity Destination buffer capacity
400 * @param pInfo Output container of IDNA processing details.
401 * @param pErrorCode Standard ICU error code. Its input value must
402 * pass the U_SUCCESS() test, or else the function returns
403 * immediately. Check for U_FAILURE() on output or use with
404 * function chaining. (See User Guide for details.)
405 * @return destination string length
406 * @stable ICU 4.6
407 */
408 U_STABLE int32_t U_EXPORT2
409 uidna_nameToUnicodeUTF8(const UIDNA *idna,
410 const char *name, int32_t length,
411 char *dest, int32_t capacity,
412 UIDNAInfo *pInfo, UErrorCode *pErrorCode);
413
414 /*
415 * IDNA error bit set values.
416 * When a domain name or label fails a processing step or does not meet the
417 * validity criteria, then one or more of these error bits are set.
418 */
419 enum {
420 /**
421 * A non-final domain name label (or the whole domain name) is empty.
422 * @stable ICU 4.6
423 */
424 UIDNA_ERROR_EMPTY_LABEL=1,
425 /**
426 * A domain name label is longer than 63 bytes.
427 * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
428 * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
429 * @stable ICU 4.6
430 */
431 UIDNA_ERROR_LABEL_TOO_LONG=2,
432 /**
433 * A domain name is longer than 255 bytes in its storage form.
434 * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
435 * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
436 * @stable ICU 4.6
437 */
438 UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
439 /**
440 * A label starts with a hyphen-minus ('-').
441 * @stable ICU 4.6
442 */
443 UIDNA_ERROR_LEADING_HYPHEN=8,
444 /**
445 * A label ends with a hyphen-minus ('-').
446 * @stable ICU 4.6
447 */
448 UIDNA_ERROR_TRAILING_HYPHEN=0x10,
449 /**
450 * A label contains hyphen-minus ('-') in the third and fourth positions.
451 * @stable ICU 4.6
452 */
453 UIDNA_ERROR_HYPHEN_3_4=0x20,
454 /**
455 * A label starts with a combining mark.
456 * @stable ICU 4.6
457 */
458 UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
459 /**
460 * A label or domain name contains disallowed characters.
461 * @stable ICU 4.6
462 */
463 UIDNA_ERROR_DISALLOWED=0x80,
464 /**
465 * A label starts with "xn--" but does not contain valid Punycode.
466 * That is, an xn-- label failed Punycode decoding.
467 * @stable ICU 4.6
468 */
469 UIDNA_ERROR_PUNYCODE=0x100,
470 /**
471 * A label contains a dot=full stop.
472 * This can occur in an input string for a single-label function.
473 * @stable ICU 4.6
474 */
475 UIDNA_ERROR_LABEL_HAS_DOT=0x200,
476 /**
477 * An ACE label does not contain a valid label string.
478 * The label was successfully ACE (Punycode) decoded but the resulting
479 * string had severe validation errors. For example,
480 * it might contain characters that are not allowed in ACE labels,
481 * or it might not be normalized.
482 * @stable ICU 4.6
483 */
484 UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
485 /**
486 * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
487 * @stable ICU 4.6
488 */
489 UIDNA_ERROR_BIDI=0x800,
490 /**
491 * A label does not meet the IDNA CONTEXTJ requirements.
492 * @stable ICU 4.6
493 */
494 UIDNA_ERROR_CONTEXTJ=0x1000,
495 /**
496 * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
497 * Some punctuation characters "Would otherwise have been DISALLOWED"
498 * but are allowed in certain contexts. (RFC 5892)
499 * @stable ICU 49
500 */
501 UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
502 /**
503 * A label does not meet the IDNA CONTEXTO requirements for digits.
504 * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
505 * @stable ICU 49
506 */
507 UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
508 };
509
510 #ifndef U_HIDE_DEPRECATED_API
511
512 /* IDNA2003 API ------------------------------------------------------------- */
513
514 /**
515 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
516 * This operation is done on <b>single labels</b> before sending it to something that expects
517 * ASCII names. A label is an individual part of a domain name. Labels are usually
518 * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
519 *
520 * IDNA2003 API Overview:
521 *
522 * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
523 * (http://www.ietf.org/rfc/rfc3490.txt).
524 * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
525 * containing non-ASCII code points are processed by the
526 * ToASCII operation before passing it to resolver libraries. Domain names
527 * that are obtained from resolver libraries are processed by the
528 * ToUnicode operation before displaying the domain name to the user.
529 * IDNA requires that implementations process input strings with Nameprep
530 * (http://www.ietf.org/rfc/rfc3491.txt),
531 * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
532 * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
533 * Implementations of IDNA MUST fully implement Nameprep and Punycode;
534 * neither Nameprep nor Punycode are optional.
535 * The input and output of ToASCII and ToUnicode operations are Unicode
536 * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
537 * multiple times to an input string will yield the same result as applying the operation
538 * once.
539 * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
540 * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
541 *
542 * @param src Input UChar array containing label in Unicode.
543 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
544 * @param dest Output UChar array with ASCII (ACE encoded) label.
545 * @param destCapacity Size of dest.
546 * @param options A bit set of options:
547 *
548 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
549 * and do not use STD3 ASCII rules
550 * If unassigned code points are found the operation fails with
551 * U_UNASSIGNED_ERROR error code.
552 *
553 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
554 * If this option is set, the unassigned code points are in the input
555 * are treated as normal Unicode code points.
556 *
557 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
558 * If this option is set and the input does not satisfy STD3 rules,
559 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
560 *
561 * @param parseError Pointer to UParseError struct to receive information on position
562 * of error if an error is encountered. Can be NULL.
563 * @param status ICU in/out error code parameter.
564 * U_INVALID_CHAR_FOUND if src contains
565 * unmatched single surrogates.
566 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
567 * too many code points.
568 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
569 * @return The length of the result string, if successful - or in case of a buffer overflow,
570 * in which case it will be greater than destCapacity.
571 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
572 */
573 U_DEPRECATED int32_t U_EXPORT2
574 uidna_toASCII(const UChar* src, int32_t srcLength,
575 UChar* dest, int32_t destCapacity,
576 int32_t options,
577 UParseError* parseError,
578 UErrorCode* status);
579
580
581 /**
582 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
583 * This operation is done on <b>single labels</b> before sending it to something that expects
584 * Unicode names. A label is an individual part of a domain name. Labels are usually
585 * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
586 *
587 * @param src Input UChar array containing ASCII (ACE encoded) label.
588 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
589 * @param dest Output Converted UChar array containing Unicode equivalent of label.
590 * @param destCapacity Size of dest.
591 * @param options A bit set of options:
592 *
593 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
594 * and do not use STD3 ASCII rules
595 * If unassigned code points are found the operation fails with
596 * U_UNASSIGNED_ERROR error code.
597 *
598 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
599 * If this option is set, the unassigned code points are in the input
600 * are treated as normal Unicode code points. <b> Note: </b> This option is
601 * required on toUnicode operation because the RFC mandates
602 * verification of decoded ACE input by applying toASCII and comparing
603 * its output with source
604 *
605 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
606 * If this option is set and the input does not satisfy STD3 rules,
607 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
608 *
609 * @param parseError Pointer to UParseError struct to receive information on position
610 * of error if an error is encountered. Can be NULL.
611 * @param status ICU in/out error code parameter.
612 * U_INVALID_CHAR_FOUND if src contains
613 * unmatched single surrogates.
614 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
615 * too many code points.
616 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
617 * @return The length of the result string, if successful - or in case of a buffer overflow,
618 * in which case it will be greater than destCapacity.
619 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
620 */
621 U_DEPRECATED int32_t U_EXPORT2
622 uidna_toUnicode(const UChar* src, int32_t srcLength,
623 UChar* dest, int32_t destCapacity,
624 int32_t options,
625 UParseError* parseError,
626 UErrorCode* status);
627
628
629 /**
630 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
631 * This operation is done on complete domain names, e.g: "www.example.com".
632 * It is important to note that this operation can fail. If it fails, then the input
633 * domain name cannot be used as an Internationalized Domain Name and the application
634 * should have methods defined to deal with the failure.
635 *
636 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
637 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
638 * and then convert. This function does not offer that level of granularity. The options once
639 * set will apply to all labels in the domain name
640 *
641 * @param src Input UChar array containing IDN in Unicode.
642 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
643 * @param dest Output UChar array with ASCII (ACE encoded) IDN.
644 * @param destCapacity Size of dest.
645 * @param options A bit set of options:
646 *
647 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
648 * and do not use STD3 ASCII rules
649 * If unassigned code points are found the operation fails with
650 * U_UNASSIGNED_CODE_POINT_FOUND error code.
651 *
652 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
653 * If this option is set, the unassigned code points are in the input
654 * are treated as normal Unicode code points.
655 *
656 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
657 * If this option is set and the input does not satisfy STD3 rules,
658 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
659 *
660 * @param parseError Pointer to UParseError struct to receive information on position
661 * of error if an error is encountered. Can be NULL.
662 * @param status ICU in/out error code parameter.
663 * U_INVALID_CHAR_FOUND if src contains
664 * unmatched single surrogates.
665 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
666 * too many code points.
667 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
668 * @return The length of the result string, if successful - or in case of a buffer overflow,
669 * in which case it will be greater than destCapacity.
670 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
671 */
672 U_DEPRECATED int32_t U_EXPORT2
673 uidna_IDNToASCII( const UChar* src, int32_t srcLength,
674 UChar* dest, int32_t destCapacity,
675 int32_t options,
676 UParseError* parseError,
677 UErrorCode* status);
678
679 /**
680 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
681 * This operation is done on complete domain names, e.g: "www.example.com".
682 *
683 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
684 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
685 * and then convert. This function does not offer that level of granularity. The options once
686 * set will apply to all labels in the domain name
687 *
688 * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
689 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
690 * @param dest Output UChar array containing Unicode equivalent of source IDN.
691 * @param destCapacity Size of dest.
692 * @param options A bit set of options:
693 *
694 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
695 * and do not use STD3 ASCII rules
696 * If unassigned code points are found the operation fails with
697 * U_UNASSIGNED_CODE_POINT_FOUND error code.
698 *
699 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
700 * If this option is set, the unassigned code points are in the input
701 * are treated as normal Unicode code points.
702 *
703 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
704 * If this option is set and the input does not satisfy STD3 rules,
705 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
706 *
707 * @param parseError Pointer to UParseError struct to receive information on position
708 * of error if an error is encountered. Can be NULL.
709 * @param status ICU in/out error code parameter.
710 * U_INVALID_CHAR_FOUND if src contains
711 * unmatched single surrogates.
712 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
713 * too many code points.
714 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
715 * @return The length of the result string, if successful - or in case of a buffer overflow,
716 * in which case it will be greater than destCapacity.
717 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
718 */
719 U_DEPRECATED int32_t U_EXPORT2
720 uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
721 UChar* dest, int32_t destCapacity,
722 int32_t options,
723 UParseError* parseError,
724 UErrorCode* status);
725
726 /**
727 * IDNA2003: Compare two IDN strings for equivalence.
728 * This function splits the domain names into labels and compares them.
729 * According to IDN RFC, whenever two labels are compared, they are
730 * considered equal if and only if their ASCII forms (obtained by
731 * applying toASCII) match using an case-insensitive ASCII comparison.
732 * Two domain names are considered a match if and only if all labels
733 * match regardless of whether label separators match.
734 *
735 * @param s1 First source string.
736 * @param length1 Length of first source string, or -1 if NUL-terminated.
737 *
738 * @param s2 Second source string.
739 * @param length2 Length of second source string, or -1 if NUL-terminated.
740 * @param options A bit set of options:
741 *
742 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
743 * and do not use STD3 ASCII rules
744 * If unassigned code points are found the operation fails with
745 * U_UNASSIGNED_CODE_POINT_FOUND error code.
746 *
747 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
748 * If this option is set, the unassigned code points are in the input
749 * are treated as normal Unicode code points.
750 *
751 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
752 * If this option is set and the input does not satisfy STD3 rules,
753 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
754 *
755 * @param status ICU error code in/out parameter.
756 * Must fulfill U_SUCCESS before the function call.
757 * @return <0 or 0 or >0 as usual for string comparisons
758 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
759 */
760 U_DEPRECATED int32_t U_EXPORT2
761 uidna_compare( const UChar *s1, int32_t length1,
762 const UChar *s2, int32_t length2,
763 int32_t options,
764 UErrorCode* status);
765
766 #endif /* U_HIDE_DEPRECATED_API */
767
768 #endif /* #if !UCONFIG_NO_IDNA */
769
770 #endif