]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | ****************************************************************************** | |
5 | * | |
6 | * Copyright (C) 2003, International Business Machines | |
7 | * Corporation and others. All Rights Reserved. | |
8 | * | |
9 | ****************************************************************************** | |
10 | */ | |
11 | /* | |
12 | punycode.c from draft-ietf-idn-punycode-03 | |
13 | http://www.nicemice.net/idn/ | |
14 | Adam M. Costello | |
15 | http://www.nicemice.net/amc/ | |
16 | ||
17 | This is ANSI C code (C89) implementing | |
18 | Punycode (draft-ietf-idn-punycode-03). | |
19 | ||
20 | Disclaimer and license | |
21 | ||
22 | Regarding this entire document or any portion of it (including | |
23 | the pseudocode and C code), the author makes no guarantees and | |
24 | is not responsible for any damage resulting from its use. The | |
25 | author grants irrevocable permission to anyone to use, modify, | |
26 | and distribute it in any way that does not diminish the rights | |
27 | of anyone else to use, modify, and distribute it, provided that | |
28 | redistributed derivative works do not contain misleading author or | |
29 | version information. Derivative works need not be licensed under | |
30 | similar terms. | |
31 | ||
32 | */ | |
33 | #ifndef _PUNYREF_H | |
34 | #define _PUNYREF_H | |
35 | ||
36 | /************************************************************/ | |
37 | /* Public interface (would normally go in its own .h file): */ | |
38 | ||
b75a7d8f A |
39 | #include "unicode/utypes.h" |
40 | ||
41 | #if !UCONFIG_NO_IDNA | |
42 | ||
43 | enum punycode_status { | |
44 | punycode_success, | |
45 | punycode_bad_input, /* Input is invalid. */ | |
46 | punycode_big_output, /* Output would exceed the space provided. */ | |
47 | punycode_overflow /* Input needs wider integers to process. */ | |
48 | }; | |
49 | ||
50 | ||
51 | typedef uint32_t punycode_uint; | |
52 | ||
53 | U_CDECL_BEGIN | |
54 | ||
55 | enum punycode_status punycode_encode( | |
56 | punycode_uint input_length, | |
57 | const punycode_uint input[], | |
58 | const unsigned char case_flags[], | |
59 | punycode_uint *output_length, | |
60 | char output[] ); | |
61 | ||
62 | /* punycode_encode() converts Unicode to Punycode. The input */ | |
63 | /* is represented as an array of Unicode code points (not code */ | |
64 | /* units; surrogate pairs are not allowed), and the output */ | |
65 | /* will be represented as an array of ASCII code points. The */ | |
66 | /* output string is *not* null-terminated; it will contain */ | |
67 | /* zeros if and only if the input contains zeros. (Of course */ | |
68 | /* the caller can leave room for a terminator and add one if */ | |
69 | /* needed.) The input_length is the number of code points in */ | |
70 | /* the input. The output_length is an in/out argument: the */ | |
71 | /* caller passes in the maximum number of code points that it */ | |
72 | /* can receive, and on successful return it will contain the */ | |
73 | /* number of code points actually output. The case_flags array */ | |
74 | /* holds input_length boolean values, where nonzero suggests that */ | |
75 | /* the corresponding Unicode character be forced to uppercase */ | |
76 | /* after being decoded (if possible), and zero suggests that */ | |
77 | /* it be forced to lowercase (if possible). ASCII code points */ | |
78 | /* are encoded literally, except that ASCII letters are forced */ | |
79 | /* to uppercase or lowercase according to the corresponding */ | |
80 | /* uppercase flags. If case_flags is a null pointer then ASCII */ | |
81 | /* letters are left as they are, and other code points are */ | |
82 | /* treated as if their uppercase flags were zero. The return */ | |
83 | /* value can be any of the punycode_status values defined above */ | |
84 | /* except punycode_bad_input; if not punycode_success, then */ | |
85 | /* output_size and output might contain garbage. */ | |
86 | ||
87 | enum punycode_status punycode_decode( | |
88 | punycode_uint input_length, | |
89 | const char input[], | |
90 | punycode_uint *output_length, | |
91 | punycode_uint output[], | |
92 | unsigned char case_flags[] ); | |
93 | ||
94 | /* punycode_decode() converts Punycode to Unicode. The input is */ | |
95 | /* represented as an array of ASCII code points, and the output */ | |
96 | /* will be represented as an array of Unicode code points. The */ | |
97 | /* input_length is the number of code points in the input. The */ | |
98 | /* output_length is an in/out argument: the caller passes in */ | |
99 | /* the maximum number of code points that it can receive, and */ | |
100 | /* on successful return it will contain the actual number of */ | |
101 | /* code points output. The case_flags array needs room for at */ | |
102 | /* least output_length values, or it can be a null pointer if the */ | |
103 | /* case information is not needed. A nonzero flag suggests that */ | |
104 | /* the corresponding Unicode character be forced to uppercase */ | |
105 | /* by the caller (if possible), while zero suggests that it be */ | |
106 | /* forced to lowercase (if possible). ASCII code points are */ | |
107 | /* output already in the proper case, but their flags will be set */ | |
108 | /* appropriately so that applying the flags would be harmless. */ | |
109 | /* The return value can be any of the punycode_status values */ | |
110 | /* defined above; if not punycode_success, then output_length, */ | |
111 | /* output, and case_flags might contain garbage. On success, the */ | |
112 | /* decoder will never need to write an output_length greater than */ | |
113 | /* input_length, because of how the encoding is defined. */ | |
114 | U_CDECL_END | |
115 | ||
116 | #endif /* #if !UCONFIG_NO_IDNA */ | |
117 | ||
118 | #endif |