]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
2d21ac55 | 2 | * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | ||
29 | #ifndef _SYS_UTFCONV_H_ | |
30 | #define _SYS_UTFCONV_H_ | |
31 | ||
9bccf70c | 32 | #include <sys/appleapiopts.h> |
55e303ae | 33 | #include <sys/cdefs.h> |
9bccf70c | 34 | |
1c79356b | 35 | #ifdef KERNEL |
9bccf70c | 36 | #ifdef __APPLE_API_UNSTABLE |
2d21ac55 | 37 | |
1c79356b A |
38 | /* |
39 | * UTF-8 encode/decode flags | |
40 | */ | |
2d21ac55 A |
41 | #define UTF_REVERSE_ENDIAN 0x0001 /* reverse UCS-2 byte order */ |
42 | #define UTF_NO_NULL_TERM 0x0002 /* do not add null termination */ | |
43 | #define UTF_DECOMPOSED 0x0004 /* generate fully decomposed UCS-2 */ | |
44 | #define UTF_PRECOMPOSED 0x0008 /* generate precomposed UCS-2 */ | |
45 | #define UTF_ESCAPE_ILLEGAL 0x0010 /* escape illegal UTF-8 */ | |
46 | #define UTF_SFM_CONVERSIONS 0x0020 /* Use SFM mappings for illegal NTFS chars */ | |
47 | ||
48 | #define UTF_BIG_ENDIAN \ | |
49 | ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) | |
50 | ||
51 | #define UTF_LITTLE_ENDIAN \ | |
52 | ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN) | |
1c79356b A |
53 | |
54 | __BEGIN_DECLS | |
1c79356b | 55 | |
b0d623f7 A |
56 | |
57 | /* | |
58 | * unicode_combinable - Test for a combining unicode character. | |
59 | * | |
60 | * This function is similar to __CFUniCharIsNonBaseCharacter except | |
61 | * that it also includes Hangul Jamo characters. | |
62 | */ | |
63 | ||
64 | int unicode_combinable(u_int16_t character); | |
65 | ||
66 | /* | |
67 | * Test for a precomposed character. | |
68 | * | |
69 | * Similar to __CFUniCharIsDecomposableCharacter. | |
70 | */ | |
71 | ||
72 | int unicode_decomposeable(u_int16_t character); | |
73 | ||
74 | ||
2d21ac55 A |
75 | /* |
76 | * utf8_encodelen - Calculate the UTF-8 encoding length | |
77 | * | |
78 | * This function takes an Unicode input string, ucsp, of ucslen bytes | |
79 | * and calculates the size of the UTF-8 output in bytes (not including | |
80 | * a NULL termination byte). The string must reside in kernel memory. | |
81 | * | |
82 | * FLAGS | |
83 | * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime | |
84 | * | |
85 | * UTF_BIG_ENDIAN: Unicode byte order is always big endian | |
86 | * | |
87 | * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian | |
88 | * | |
89 | * UTF_DECOMPOSED: assume fully decomposed output | |
90 | * | |
91 | * ERRORS | |
92 | * None | |
93 | */ | |
94 | size_t | |
95 | utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, | |
96 | int flags); | |
97 | ||
98 | ||
99 | /* | |
100 | * utf8_encodestr - Encodes a Unicode string into UTF-8 | |
101 | * | |
102 | * This function takes an Unicode input string, ucsp, of ucslen bytes | |
103 | * and produces the UTF-8 output into a buffer of buflen bytes pointed | |
104 | * to by utf8p. The size of the output in bytes (not including a NULL | |
105 | * termination byte) is returned in utf8len. The UTF-8 string output | |
106 | * is NULL terminated. Both buffers must reside in kernel memory. | |
107 | * | |
108 | * If '/' chars are possible in the Unicode input then an alternate | |
109 | * (replacement) char must be provided in altslash. | |
110 | * | |
111 | * FLAGS | |
112 | * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime | |
113 | * | |
114 | * UTF_BIG_ENDIAN: Unicode byte order is always big endian | |
115 | * | |
116 | * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian | |
117 | * | |
118 | * UTF_NO_NULL_TERM: do not add null termination to output string | |
119 | * | |
120 | * UTF_DECOMPOSED: generate fully decomposed output | |
121 | * | |
122 | * ERRORS | |
123 | * ENAMETOOLONG: output did not fit; only utf8len bytes were encoded | |
124 | * | |
125 | * EINVAL: illegal Unicode char encountered | |
126 | */ | |
127 | int | |
128 | utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, | |
129 | size_t * utf8len, size_t buflen, u_int16_t altslash, int flags); | |
91447636 | 130 | |
91447636 | 131 | |
2d21ac55 A |
132 | /* |
133 | * utf8_decodestr - Decodes a UTF-8 string into Unicode | |
134 | * | |
135 | * This function takes an UTF-8 input string, utf8p, of utf8len bytes | |
136 | * and produces the Unicode output into a buffer of buflen bytes pointed | |
137 | * to by ucsp. The size of the output in bytes (not including a NULL | |
138 | * termination byte) is returned in ucslen. Both buffers must reside | |
139 | * in kernel memory. | |
140 | * | |
141 | * If '/' chars are allowed in the Unicode output then an alternate | |
142 | * (replacement) char must be provided in altslash. | |
143 | * | |
144 | * FLAGS | |
145 | * UTF_REV_ENDIAN: Unicode byte order is opposite current runtime | |
146 | * | |
147 | * UTF_BIG_ENDIAN: Unicode byte order is always big endian | |
148 | * | |
149 | * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian | |
150 | * | |
151 | * UTF_DECOMPOSED: generate fully decomposed output (NFD) | |
152 | * | |
153 | * UTF_PRECOMPOSED: generate precomposed output (NFC) | |
154 | * | |
155 | * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input | |
156 | * | |
157 | * ERRORS | |
158 | * ENAMETOOLONG: output did not fit; only ucslen bytes were decoded. | |
159 | * | |
160 | * EINVAL: illegal UTF-8 sequence encountered. | |
161 | */ | |
162 | int | |
163 | utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, | |
164 | size_t *ucslen, size_t buflen, u_int16_t altslash, int flags); | |
165 | ||
166 | ||
167 | /* | |
168 | * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD) | |
169 | * | |
170 | * This function takes an UTF-8 input string, instr, of inlen bytes | |
171 | * and produces normalized UTF-8 output into a buffer of buflen bytes | |
172 | * pointed to by outstr. The size of the output in bytes (not including | |
173 | * a NULL termination byte) is returned in outlen. In-place conversions | |
174 | * are not supported (i.e. instr != outstr). Both buffers must reside | |
175 | * in kernel memory. | |
176 | * | |
177 | * FLAGS | |
178 | * UTF_DECOMPOSED: output string will be fully decomposed (NFD) | |
179 | * | |
180 | * UTF_PRECOMPOSED: output string will be precomposed (NFC) | |
181 | * | |
182 | * UTF_NO_NULL_TERM: do not add null termination to output string | |
183 | * | |
184 | * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input | |
185 | * | |
186 | * ERRORS | |
187 | * ENAMETOOLONG: output did not fit or input exceeded MAXPATHLEN bytes | |
188 | * | |
189 | * EINVAL: illegal UTF-8 sequence encountered or invalid flags | |
190 | */ | |
191 | int | |
192 | utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, | |
193 | size_t *outlen, size_t buflen, int flags); | |
194 | ||
195 | ||
196 | /* | |
197 | * utf8_validatestr - validates a UTF-8 string | |
198 | * | |
199 | * This function takes an UTF-8 input string, utf8p, of utf8len bytes | |
200 | * and determines if its valid UTF-8. The string must reside in kernel | |
201 | * memory. | |
202 | * | |
203 | * ERRORS | |
204 | * EINVAL: illegal UTF-8 sequence encountered. | |
205 | */ | |
206 | int | |
207 | utf8_validatestr(const u_int8_t* utf8p, size_t utf8len); | |
208 | ||
1c79356b | 209 | |
1c79356b A |
210 | __END_DECLS |
211 | ||
9bccf70c | 212 | #endif /* __APPLE_API_UNSTABLE */ |
1c79356b A |
213 | #endif /* KERNEL */ |
214 | ||
215 | #endif /* !_SYS_UTFCONV_H_ */ |