]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/punyref.h
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / punyref.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 2003, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 */
11 /*
12 punycode.c from draft-ietf-idn-punycode-03
13 http://www.nicemice.net/idn/
14 Adam M. Costello
15 http://www.nicemice.net/amc/
16
17 This is ANSI C code (C89) implementing
18 Punycode (draft-ietf-idn-punycode-03).
19
20 Disclaimer and license
21
22 Regarding this entire document or any portion of it (including
23 the pseudocode and C code), the author makes no guarantees and
24 is not responsible for any damage resulting from its use. The
25 author grants irrevocable permission to anyone to use, modify,
26 and distribute it in any way that does not diminish the rights
27 of anyone else to use, modify, and distribute it, provided that
28 redistributed derivative works do not contain misleading author or
29 version information. Derivative works need not be licensed under
30 similar terms.
31
32 */
33 #ifndef _PUNYREF_H
34 #define _PUNYREF_H
35
36 /************************************************************/
37 /* Public interface (would normally go in its own .h file): */
38
39 #include "unicode/utypes.h"
40
41 #if !UCONFIG_NO_IDNA
42
43 enum punycode_status {
44 punycode_success,
45 punycode_bad_input, /* Input is invalid. */
46 punycode_big_output, /* Output would exceed the space provided. */
47 punycode_overflow /* Input needs wider integers to process. */
48 };
49
50
51 typedef uint32_t punycode_uint;
52
53 U_CDECL_BEGIN
54
55 enum punycode_status punycode_encode(
56 punycode_uint input_length,
57 const punycode_uint input[],
58 const unsigned char case_flags[],
59 punycode_uint *output_length,
60 char output[] );
61
62 /* punycode_encode() converts Unicode to Punycode. The input */
63 /* is represented as an array of Unicode code points (not code */
64 /* units; surrogate pairs are not allowed), and the output */
65 /* will be represented as an array of ASCII code points. The */
66 /* output string is *not* null-terminated; it will contain */
67 /* zeros if and only if the input contains zeros. (Of course */
68 /* the caller can leave room for a terminator and add one if */
69 /* needed.) The input_length is the number of code points in */
70 /* the input. The output_length is an in/out argument: the */
71 /* caller passes in the maximum number of code points that it */
72 /* can receive, and on successful return it will contain the */
73 /* number of code points actually output. The case_flags array */
74 /* holds input_length boolean values, where nonzero suggests that */
75 /* the corresponding Unicode character be forced to uppercase */
76 /* after being decoded (if possible), and zero suggests that */
77 /* it be forced to lowercase (if possible). ASCII code points */
78 /* are encoded literally, except that ASCII letters are forced */
79 /* to uppercase or lowercase according to the corresponding */
80 /* uppercase flags. If case_flags is a null pointer then ASCII */
81 /* letters are left as they are, and other code points are */
82 /* treated as if their uppercase flags were zero. The return */
83 /* value can be any of the punycode_status values defined above */
84 /* except punycode_bad_input; if not punycode_success, then */
85 /* output_size and output might contain garbage. */
86
87 enum punycode_status punycode_decode(
88 punycode_uint input_length,
89 const char input[],
90 punycode_uint *output_length,
91 punycode_uint output[],
92 unsigned char case_flags[] );
93
94 /* punycode_decode() converts Punycode to Unicode. The input is */
95 /* represented as an array of ASCII code points, and the output */
96 /* will be represented as an array of Unicode code points. The */
97 /* input_length is the number of code points in the input. The */
98 /* output_length is an in/out argument: the caller passes in */
99 /* the maximum number of code points that it can receive, and */
100 /* on successful return it will contain the actual number of */
101 /* code points output. The case_flags array needs room for at */
102 /* least output_length values, or it can be a null pointer if the */
103 /* case information is not needed. A nonzero flag suggests that */
104 /* the corresponding Unicode character be forced to uppercase */
105 /* by the caller (if possible), while zero suggests that it be */
106 /* forced to lowercase (if possible). ASCII code points are */
107 /* output already in the proper case, but their flags will be set */
108 /* appropriately so that applying the flags would be harmless. */
109 /* The return value can be any of the punycode_status values */
110 /* defined above; if not punycode_success, then output_length, */
111 /* output, and case_flags might contain garbage. On success, the */
112 /* decoder will never need to write an output_length greater than */
113 /* input_length, because of how the encoding is defined. */
114 U_CDECL_END
115
116 #endif /* #if !UCONFIG_NO_IDNA */
117
118 #endif