2 *******************************************************************************
4 * Copyright (C) 2003-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
20 #include "unicode/utypes.h"
24 #include "unicode/localpointer.h"
25 #include "unicode/parseerr.h"
29 * \brief C API: Internationalizing Domain Names in Applications (IDNA)
31 * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
33 * The C API functions which do take a UIDNA * service object pointer
34 * implement UTS #46 and IDNA2008.
36 * IDNA2003 is obsolete.
37 * The C API functions which do not take a service object pointer
38 * implement IDNA2003. They are all deprecated.
42 * IDNA option bit set values.
46 * Default options value: None of the other options are set.
47 * For use in static worker and factory methods.
51 #ifndef U_HIDE_DEPRECATED_API
53 * Option to allow unassigned code points in domain names and labels.
54 * For use in static worker and factory methods.
55 * <p>This option is ignored by the UTS46 implementation.
56 * (UTS #46 disallows unassigned code points.)
57 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
59 UIDNA_ALLOW_UNASSIGNED
=1,
60 #endif /* U_HIDE_DEPRECATED_API */
62 * Option to check whether the input conforms to the STD3 ASCII rules,
63 * for example the restriction of labels to LDH characters
64 * (ASCII Letters, Digits and Hyphen-Minus).
65 * For use in static worker and factory methods.
68 UIDNA_USE_STD3_RULES
=2,
70 * IDNA option to check for whether the input conforms to the BiDi rules.
71 * For use in static worker and factory methods.
72 * <p>This option is ignored by the IDNA2003 implementation.
73 * (IDNA2003 always performs a BiDi check.)
78 * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
79 * For use in static worker and factory methods.
80 * <p>This option is ignored by the IDNA2003 implementation.
81 * (The CONTEXTJ check is new in IDNA2008.)
84 UIDNA_CHECK_CONTEXTJ
=8,
86 * IDNA option for nontransitional processing in ToASCII().
87 * For use in static worker and factory methods.
88 * <p>By default, ToASCII() uses transitional processing.
89 * <p>This option is ignored by the IDNA2003 implementation.
90 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
93 UIDNA_NONTRANSITIONAL_TO_ASCII
=0x10,
95 * IDNA option for nontransitional processing in ToUnicode().
96 * For use in static worker and factory methods.
97 * <p>By default, ToUnicode() uses transitional processing.
98 * <p>This option is ignored by the IDNA2003 implementation.
99 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
102 UIDNA_NONTRANSITIONAL_TO_UNICODE
=0x20,
104 * IDNA option to check for whether the input conforms to the CONTEXTO rules.
105 * For use in static worker and factory methods.
106 * <p>This option is ignored by the IDNA2003 implementation.
107 * (The CONTEXTO check is new in IDNA2008.)
108 * <p>This is for use by registries for IDNA2008 conformance.
109 * UTS #46 does not require the CONTEXTO check.
112 UIDNA_CHECK_CONTEXTO
=0x40
116 * Opaque C service object type for the new IDNA API.
120 typedef struct UIDNA UIDNA
; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
123 * Returns a UIDNA instance which implements UTS #46.
124 * Returns an unmodifiable instance, owned by the caller.
125 * Cache it for multiple operations, and uidna_close() it when done.
126 * The instance is thread-safe, that is, it can be used concurrently.
128 * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
130 * @param options Bit set to modify the processing and error checking.
131 * See option bit set values in uidna.h.
132 * @param pErrorCode Standard ICU error code. Its input value must
133 * pass the U_SUCCESS() test, or else the function returns
134 * immediately. Check for U_FAILURE() on output or use with
135 * function chaining. (See User Guide for details.)
136 * @return the UTS #46 UIDNA instance, if successful
139 U_STABLE UIDNA
* U_EXPORT2
140 uidna_openUTS46(uint32_t options
, UErrorCode
*pErrorCode
);
143 * Closes a UIDNA instance.
144 * @param idna UIDNA instance to be closed
147 U_STABLE
void U_EXPORT2
148 uidna_close(UIDNA
*idna
);
150 #if U_SHOW_CPLUSPLUS_API
155 * \class LocalUIDNAPointer
156 * "Smart pointer" class, closes a UIDNA via uidna_close().
157 * For most methods see the LocalPointerBase base class.
159 * @see LocalPointerBase
163 U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer
, UIDNA
, uidna_close
);
170 * Output container for IDNA processing errors.
171 * Initialize with UIDNA_INFO_INITIALIZER:
173 * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
174 * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
175 * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
179 typedef struct UIDNAInfo
{
180 /** sizeof(UIDNAInfo) @stable ICU 4.6 */
183 * Set to TRUE if transitional and nontransitional processing produce different results.
184 * For details see C++ IDNAInfo::isTransitionalDifferent().
187 UBool isTransitionalDifferent
;
188 UBool reservedB3
; /**< Reserved field, do not use. @internal */
190 * Bit set indicating IDNA processing errors. 0 if no errors.
191 * See UIDNA_ERROR_... constants.
195 int32_t reservedI2
; /**< Reserved field, do not use. @internal */
196 int32_t reservedI3
; /**< Reserved field, do not use. @internal */
200 * Static initializer for a UIDNAInfo struct.
203 #define UIDNA_INFO_INITIALIZER { \
204 (int16_t)sizeof(UIDNAInfo), \
209 * Converts a single domain name label into its ASCII form for DNS lookup.
210 * If any processing step fails, then pInfo->errors will be non-zero and
211 * the result might not be an ASCII string.
212 * The label might be modified according to the types of errors.
213 * Labels with severe errors will be left in (or turned into) their Unicode form.
215 * The UErrorCode indicates an error only in exceptional cases,
216 * such as a U_MEMORY_ALLOCATION_ERROR.
218 * @param idna UIDNA instance
219 * @param label Input domain name label
220 * @param length Label length, or -1 if NUL-terminated
221 * @param dest Destination string buffer
222 * @param capacity Destination buffer capacity
223 * @param pInfo Output container of IDNA processing details.
224 * @param pErrorCode Standard ICU error code. Its input value must
225 * pass the U_SUCCESS() test, or else the function returns
226 * immediately. Check for U_FAILURE() on output or use with
227 * function chaining. (See User Guide for details.)
228 * @return destination string length
231 U_STABLE
int32_t U_EXPORT2
232 uidna_labelToASCII(const UIDNA
*idna
,
233 const UChar
*label
, int32_t length
,
234 UChar
*dest
, int32_t capacity
,
235 UIDNAInfo
*pInfo
, UErrorCode
*pErrorCode
);
238 * Converts a single domain name label into its Unicode form for human-readable display.
239 * If any processing step fails, then pInfo->errors will be non-zero.
240 * The label might be modified according to the types of errors.
242 * The UErrorCode indicates an error only in exceptional cases,
243 * such as a U_MEMORY_ALLOCATION_ERROR.
245 * @param idna UIDNA instance
246 * @param label Input domain name label
247 * @param length Label length, or -1 if NUL-terminated
248 * @param dest Destination string buffer
249 * @param capacity Destination buffer capacity
250 * @param pInfo Output container of IDNA processing details.
251 * @param pErrorCode Standard ICU error code. Its input value must
252 * pass the U_SUCCESS() test, or else the function returns
253 * immediately. Check for U_FAILURE() on output or use with
254 * function chaining. (See User Guide for details.)
255 * @return destination string length
258 U_STABLE
int32_t U_EXPORT2
259 uidna_labelToUnicode(const UIDNA
*idna
,
260 const UChar
*label
, int32_t length
,
261 UChar
*dest
, int32_t capacity
,
262 UIDNAInfo
*pInfo
, UErrorCode
*pErrorCode
);
265 * Converts a whole domain name into its ASCII form for DNS lookup.
266 * If any processing step fails, then pInfo->errors will be non-zero and
267 * the result might not be an ASCII string.
268 * The domain name might be modified according to the types of errors.
269 * Labels with severe errors will be left in (or turned into) their Unicode form.
271 * The UErrorCode indicates an error only in exceptional cases,
272 * such as a U_MEMORY_ALLOCATION_ERROR.
274 * @param idna UIDNA instance
275 * @param name Input domain name
276 * @param length Domain name length, or -1 if NUL-terminated
277 * @param dest Destination string buffer
278 * @param capacity Destination buffer capacity
279 * @param pInfo Output container of IDNA processing details.
280 * @param pErrorCode Standard ICU error code. Its input value must
281 * pass the U_SUCCESS() test, or else the function returns
282 * immediately. Check for U_FAILURE() on output or use with
283 * function chaining. (See User Guide for details.)
284 * @return destination string length
287 U_STABLE
int32_t U_EXPORT2
288 uidna_nameToASCII(const UIDNA
*idna
,
289 const UChar
*name
, int32_t length
,
290 UChar
*dest
, int32_t capacity
,
291 UIDNAInfo
*pInfo
, UErrorCode
*pErrorCode
);
294 * Converts a whole domain name into its Unicode form for human-readable display.
295 * If any processing step fails, then pInfo->errors will be non-zero.
296 * The domain name might be modified according to the types of errors.
298 * The UErrorCode indicates an error only in exceptional cases,
299 * such as a U_MEMORY_ALLOCATION_ERROR.
301 * @param idna UIDNA instance
302 * @param name Input domain name
303 * @param length Domain name length, or -1 if NUL-terminated
304 * @param dest Destination string buffer
305 * @param capacity Destination buffer capacity
306 * @param pInfo Output container of IDNA processing details.
307 * @param pErrorCode Standard ICU error code. Its input value must
308 * pass the U_SUCCESS() test, or else the function returns
309 * immediately. Check for U_FAILURE() on output or use with
310 * function chaining. (See User Guide for details.)
311 * @return destination string length
314 U_STABLE
int32_t U_EXPORT2
315 uidna_nameToUnicode(const UIDNA
*idna
,
316 const UChar
*name
, int32_t length
,
317 UChar
*dest
, int32_t capacity
,
318 UIDNAInfo
*pInfo
, UErrorCode
*pErrorCode
);
320 /* UTF-8 versions of the processing methods --------------------------------- */
323 * Converts a single domain name label into its ASCII form for DNS lookup.
324 * UTF-8 version of uidna_labelToASCII(), same behavior.
326 * @param idna UIDNA instance
327 * @param label Input domain name label
328 * @param length Label length, or -1 if NUL-terminated
329 * @param dest Destination string buffer
330 * @param capacity Destination buffer capacity
331 * @param pInfo Output container of IDNA processing details.
332 * @param pErrorCode Standard ICU error code. Its input value must
333 * pass the U_SUCCESS() test, or else the function returns
334 * immediately. Check for U_FAILURE() on output or use with
335 * function chaining. (See User Guide for details.)
336 * @return destination string length
339 U_STABLE
int32_t U_EXPORT2
340 uidna_labelToASCII_UTF8(const UIDNA
*idna
,
341 const char *label
, int32_t length
,
342 char *dest
, int32_t capacity
,
343 UIDNAInfo
*pInfo
, UErrorCode
*pErrorCode
);
346 * Converts a single domain name label into its Unicode form for human-readable display.
347 * UTF-8 version of uidna_labelToUnicode(), same behavior.
349 * @param idna UIDNA instance
350 * @param label Input domain name label
351 * @param length Label length, or -1 if NUL-terminated
352 * @param dest Destination string buffer
353 * @param capacity Destination buffer capacity
354 * @param pInfo Output container of IDNA processing details.
355 * @param pErrorCode Standard ICU error code. Its input value must
356 * pass the U_SUCCESS() test, or else the function returns
357 * immediately. Check for U_FAILURE() on output or use with
358 * function chaining. (See User Guide for details.)
359 * @return destination string length
362 U_STABLE
int32_t U_EXPORT2
363 uidna_labelToUnicodeUTF8(const UIDNA
*idna
,
364 const char *label
, int32_t length
,
365 char *dest
, int32_t capacity
,
366 UIDNAInfo
*pInfo
, UErrorCode
*pErrorCode
);
369 * Converts a whole domain name into its ASCII form for DNS lookup.
370 * UTF-8 version of uidna_nameToASCII(), same behavior.
372 * @param idna UIDNA instance
373 * @param name Input domain name
374 * @param length Domain name length, or -1 if NUL-terminated
375 * @param dest Destination string buffer
376 * @param capacity Destination buffer capacity
377 * @param pInfo Output container of IDNA processing details.
378 * @param pErrorCode Standard ICU error code. Its input value must
379 * pass the U_SUCCESS() test, or else the function returns
380 * immediately. Check for U_FAILURE() on output or use with
381 * function chaining. (See User Guide for details.)
382 * @return destination string length
385 U_STABLE
int32_t U_EXPORT2
386 uidna_nameToASCII_UTF8(const UIDNA
*idna
,
387 const char *name
, int32_t length
,
388 char *dest
, int32_t capacity
,
389 UIDNAInfo
*pInfo
, UErrorCode
*pErrorCode
);
392 * Converts a whole domain name into its Unicode form for human-readable display.
393 * UTF-8 version of uidna_nameToUnicode(), same behavior.
395 * @param idna UIDNA instance
396 * @param name Input domain name
397 * @param length Domain name length, or -1 if NUL-terminated
398 * @param dest Destination string buffer
399 * @param capacity Destination buffer capacity
400 * @param pInfo Output container of IDNA processing details.
401 * @param pErrorCode Standard ICU error code. Its input value must
402 * pass the U_SUCCESS() test, or else the function returns
403 * immediately. Check for U_FAILURE() on output or use with
404 * function chaining. (See User Guide for details.)
405 * @return destination string length
408 U_STABLE
int32_t U_EXPORT2
409 uidna_nameToUnicodeUTF8(const UIDNA
*idna
,
410 const char *name
, int32_t length
,
411 char *dest
, int32_t capacity
,
412 UIDNAInfo
*pInfo
, UErrorCode
*pErrorCode
);
415 * IDNA error bit set values.
416 * When a domain name or label fails a processing step or does not meet the
417 * validity criteria, then one or more of these error bits are set.
421 * A non-final domain name label (or the whole domain name) is empty.
424 UIDNA_ERROR_EMPTY_LABEL
=1,
426 * A domain name label is longer than 63 bytes.
427 * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
428 * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
431 UIDNA_ERROR_LABEL_TOO_LONG
=2,
433 * A domain name is longer than 255 bytes in its storage form.
434 * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
435 * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
438 UIDNA_ERROR_DOMAIN_NAME_TOO_LONG
=4,
440 * A label starts with a hyphen-minus ('-').
443 UIDNA_ERROR_LEADING_HYPHEN
=8,
445 * A label ends with a hyphen-minus ('-').
448 UIDNA_ERROR_TRAILING_HYPHEN
=0x10,
450 * A label contains hyphen-minus ('-') in the third and fourth positions.
453 UIDNA_ERROR_HYPHEN_3_4
=0x20,
455 * A label starts with a combining mark.
458 UIDNA_ERROR_LEADING_COMBINING_MARK
=0x40,
460 * A label or domain name contains disallowed characters.
463 UIDNA_ERROR_DISALLOWED
=0x80,
465 * A label starts with "xn--" but does not contain valid Punycode.
466 * That is, an xn-- label failed Punycode decoding.
469 UIDNA_ERROR_PUNYCODE
=0x100,
471 * A label contains a dot=full stop.
472 * This can occur in an input string for a single-label function.
475 UIDNA_ERROR_LABEL_HAS_DOT
=0x200,
477 * An ACE label does not contain a valid label string.
478 * The label was successfully ACE (Punycode) decoded but the resulting
479 * string had severe validation errors. For example,
480 * it might contain characters that are not allowed in ACE labels,
481 * or it might not be normalized.
484 UIDNA_ERROR_INVALID_ACE_LABEL
=0x400,
486 * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
489 UIDNA_ERROR_BIDI
=0x800,
491 * A label does not meet the IDNA CONTEXTJ requirements.
494 UIDNA_ERROR_CONTEXTJ
=0x1000,
496 * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
497 * Some punctuation characters "Would otherwise have been DISALLOWED"
498 * but are allowed in certain contexts. (RFC 5892)
501 UIDNA_ERROR_CONTEXTO_PUNCTUATION
=0x2000,
503 * A label does not meet the IDNA CONTEXTO requirements for digits.
504 * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
507 UIDNA_ERROR_CONTEXTO_DIGITS
=0x4000
510 #ifndef U_HIDE_DEPRECATED_API
512 /* IDNA2003 API ------------------------------------------------------------- */
515 * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
516 * This operation is done on <b>single labels</b> before sending it to something that expects
517 * ASCII names. A label is an individual part of a domain name. Labels are usually
518 * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
520 * IDNA2003 API Overview:
522 * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
523 * (http://www.ietf.org/rfc/rfc3490.txt).
524 * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
525 * containing non-ASCII code points are processed by the
526 * ToASCII operation before passing it to resolver libraries. Domain names
527 * that are obtained from resolver libraries are processed by the
528 * ToUnicode operation before displaying the domain name to the user.
529 * IDNA requires that implementations process input strings with Nameprep
530 * (http://www.ietf.org/rfc/rfc3491.txt),
531 * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
532 * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
533 * Implementations of IDNA MUST fully implement Nameprep and Punycode;
534 * neither Nameprep nor Punycode are optional.
535 * The input and output of ToASCII and ToUnicode operations are Unicode
536 * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
537 * multiple times to an input string will yield the same result as applying the operation
539 * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
540 * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
542 * @param src Input UChar array containing label in Unicode.
543 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
544 * @param dest Output UChar array with ASCII (ACE encoded) label.
545 * @param destCapacity Size of dest.
546 * @param options A bit set of options:
548 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
549 * and do not use STD3 ASCII rules
550 * If unassigned code points are found the operation fails with
551 * U_UNASSIGNED_ERROR error code.
553 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
554 * If this option is set, the unassigned code points are in the input
555 * are treated as normal Unicode code points.
557 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
558 * If this option is set and the input does not satisfy STD3 rules,
559 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
561 * @param parseError Pointer to UParseError struct to receive information on position
562 * of error if an error is encountered. Can be NULL.
563 * @param status ICU in/out error code parameter.
564 * U_INVALID_CHAR_FOUND if src contains
565 * unmatched single surrogates.
566 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
567 * too many code points.
568 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
569 * @return The length of the result string, if successful - or in case of a buffer overflow,
570 * in which case it will be greater than destCapacity.
571 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
573 U_DEPRECATED
int32_t U_EXPORT2
574 uidna_toASCII(const UChar
* src
, int32_t srcLength
,
575 UChar
* dest
, int32_t destCapacity
,
577 UParseError
* parseError
,
582 * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
583 * This operation is done on <b>single labels</b> before sending it to something that expects
584 * Unicode names. A label is an individual part of a domain name. Labels are usually
585 * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
587 * @param src Input UChar array containing ASCII (ACE encoded) label.
588 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
589 * @param dest Output Converted UChar array containing Unicode equivalent of label.
590 * @param destCapacity Size of dest.
591 * @param options A bit set of options:
593 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
594 * and do not use STD3 ASCII rules
595 * If unassigned code points are found the operation fails with
596 * U_UNASSIGNED_ERROR error code.
598 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
599 * If this option is set, the unassigned code points are in the input
600 * are treated as normal Unicode code points. <b> Note: </b> This option is
601 * required on toUnicode operation because the RFC mandates
602 * verification of decoded ACE input by applying toASCII and comparing
603 * its output with source
605 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
606 * If this option is set and the input does not satisfy STD3 rules,
607 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
609 * @param parseError Pointer to UParseError struct to receive information on position
610 * of error if an error is encountered. Can be NULL.
611 * @param status ICU in/out error code parameter.
612 * U_INVALID_CHAR_FOUND if src contains
613 * unmatched single surrogates.
614 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
615 * too many code points.
616 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
617 * @return The length of the result string, if successful - or in case of a buffer overflow,
618 * in which case it will be greater than destCapacity.
619 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
621 U_DEPRECATED
int32_t U_EXPORT2
622 uidna_toUnicode(const UChar
* src
, int32_t srcLength
,
623 UChar
* dest
, int32_t destCapacity
,
625 UParseError
* parseError
,
630 * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
631 * This operation is done on complete domain names, e.g: "www.example.com".
632 * It is important to note that this operation can fail. If it fails, then the input
633 * domain name cannot be used as an Internationalized Domain Name and the application
634 * should have methods defined to deal with the failure.
636 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
637 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
638 * and then convert. This function does not offer that level of granularity. The options once
639 * set will apply to all labels in the domain name
641 * @param src Input UChar array containing IDN in Unicode.
642 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
643 * @param dest Output UChar array with ASCII (ACE encoded) IDN.
644 * @param destCapacity Size of dest.
645 * @param options A bit set of options:
647 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
648 * and do not use STD3 ASCII rules
649 * If unassigned code points are found the operation fails with
650 * U_UNASSIGNED_CODE_POINT_FOUND error code.
652 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
653 * If this option is set, the unassigned code points are in the input
654 * are treated as normal Unicode code points.
656 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
657 * If this option is set and the input does not satisfy STD3 rules,
658 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
660 * @param parseError Pointer to UParseError struct to receive information on position
661 * of error if an error is encountered. Can be NULL.
662 * @param status ICU in/out error code parameter.
663 * U_INVALID_CHAR_FOUND if src contains
664 * unmatched single surrogates.
665 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
666 * too many code points.
667 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
668 * @return The length of the result string, if successful - or in case of a buffer overflow,
669 * in which case it will be greater than destCapacity.
670 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
672 U_DEPRECATED
int32_t U_EXPORT2
673 uidna_IDNToASCII( const UChar
* src
, int32_t srcLength
,
674 UChar
* dest
, int32_t destCapacity
,
676 UParseError
* parseError
,
680 * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
681 * This operation is done on complete domain names, e.g: "www.example.com".
683 * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
684 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
685 * and then convert. This function does not offer that level of granularity. The options once
686 * set will apply to all labels in the domain name
688 * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
689 * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
690 * @param dest Output UChar array containing Unicode equivalent of source IDN.
691 * @param destCapacity Size of dest.
692 * @param options A bit set of options:
694 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
695 * and do not use STD3 ASCII rules
696 * If unassigned code points are found the operation fails with
697 * U_UNASSIGNED_CODE_POINT_FOUND error code.
699 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
700 * If this option is set, the unassigned code points are in the input
701 * are treated as normal Unicode code points.
703 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
704 * If this option is set and the input does not satisfy STD3 rules,
705 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
707 * @param parseError Pointer to UParseError struct to receive information on position
708 * of error if an error is encountered. Can be NULL.
709 * @param status ICU in/out error code parameter.
710 * U_INVALID_CHAR_FOUND if src contains
711 * unmatched single surrogates.
712 * U_INDEX_OUTOFBOUNDS_ERROR if src contains
713 * too many code points.
714 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
715 * @return The length of the result string, if successful - or in case of a buffer overflow,
716 * in which case it will be greater than destCapacity.
717 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
719 U_DEPRECATED
int32_t U_EXPORT2
720 uidna_IDNToUnicode( const UChar
* src
, int32_t srcLength
,
721 UChar
* dest
, int32_t destCapacity
,
723 UParseError
* parseError
,
727 * IDNA2003: Compare two IDN strings for equivalence.
728 * This function splits the domain names into labels and compares them.
729 * According to IDN RFC, whenever two labels are compared, they are
730 * considered equal if and only if their ASCII forms (obtained by
731 * applying toASCII) match using an case-insensitive ASCII comparison.
732 * Two domain names are considered a match if and only if all labels
733 * match regardless of whether label separators match.
735 * @param s1 First source string.
736 * @param length1 Length of first source string, or -1 if NUL-terminated.
738 * @param s2 Second source string.
739 * @param length2 Length of second source string, or -1 if NUL-terminated.
740 * @param options A bit set of options:
742 * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
743 * and do not use STD3 ASCII rules
744 * If unassigned code points are found the operation fails with
745 * U_UNASSIGNED_CODE_POINT_FOUND error code.
747 * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
748 * If this option is set, the unassigned code points are in the input
749 * are treated as normal Unicode code points.
751 * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
752 * If this option is set and the input does not satisfy STD3 rules,
753 * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
755 * @param status ICU error code in/out parameter.
756 * Must fulfill U_SUCCESS before the function call.
757 * @return <0 or 0 or >0 as usual for string comparisons
758 * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
760 U_DEPRECATED
int32_t U_EXPORT2
761 uidna_compare( const UChar
*s1
, int32_t length1
,
762 const UChar
*s2
, int32_t length2
,
766 #endif /* U_HIDE_DEPRECATED_API */
768 #endif /* #if !UCONFIG_NO_IDNA */