]> git.saurik.com Git - apple/xnu.git/blob - bsd/sys/utfconv.h
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / sys / utfconv.h
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #ifndef _SYS_UTFCONV_H_
30 #define _SYS_UTFCONV_H_
31
32 #include <sys/appleapiopts.h>
33 #include <sys/cdefs.h>
34
35 #ifdef KERNEL
36 #ifdef __APPLE_API_UNSTABLE
37
38 /*
39 * UTF-8 encode/decode flags
40 */
41 #define UTF_REVERSE_ENDIAN 0x0001 /* reverse UCS-2 byte order */
42 #define UTF_NO_NULL_TERM 0x0002 /* do not add null termination */
43 #define UTF_DECOMPOSED 0x0004 /* generate fully decomposed UCS-2 */
44 #define UTF_PRECOMPOSED 0x0008 /* generate precomposed UCS-2 */
45 #define UTF_ESCAPE_ILLEGAL 0x0010 /* escape illegal UTF-8 */
46 #define UTF_SFM_CONVERSIONS 0x0020 /* Use SFM mappings for illegal NTFS chars */
47
48 #define UTF_BIG_ENDIAN \
49 ((BYTE_ORDER == BIG_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN)
50
51 #define UTF_LITTLE_ENDIAN \
52 ((BYTE_ORDER == LITTLE_ENDIAN) ? 0 : UTF_REVERSE_ENDIAN)
53
54 __BEGIN_DECLS
55
56
57 /*
58 * unicode_combinable - Test for a combining unicode character.
59 *
60 * This function is similar to __CFUniCharIsNonBaseCharacter except
61 * that it also includes Hangul Jamo characters.
62 */
63
64 int unicode_combinable(u_int16_t character);
65
66 /*
67 * Test for a precomposed character.
68 *
69 * Similar to __CFUniCharIsDecomposableCharacter.
70 */
71
72 int unicode_decomposeable(u_int16_t character);
73
74
75 /*
76 * utf8_encodelen - Calculate the UTF-8 encoding length
77 *
78 * This function takes an Unicode input string, ucsp, of ucslen bytes
79 * and calculates the size of the UTF-8 output in bytes (not including
80 * a NULL termination byte). The string must reside in kernel memory.
81 *
82 * FLAGS
83 * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime
84 *
85 * UTF_BIG_ENDIAN: Unicode byte order is always big endian
86 *
87 * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian
88 *
89 * UTF_DECOMPOSED: assume fully decomposed output
90 *
91 * ERRORS
92 * None
93 */
94 size_t
95 utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
96 int flags);
97
98
99 /*
100 * utf8_encodestr - Encodes a Unicode string into UTF-8
101 *
102 * This function takes an Unicode input string, ucsp, of ucslen bytes
103 * and produces the UTF-8 output into a buffer of buflen bytes pointed
104 * to by utf8p. The size of the output in bytes (not including a NULL
105 * termination byte) is returned in utf8len. The UTF-8 string output
106 * is NULL terminated. Both buffers must reside in kernel memory.
107 *
108 * If '/' chars are possible in the Unicode input then an alternate
109 * (replacement) char must be provided in altslash.
110 *
111 * FLAGS
112 * UTF_REVERSE_ENDIAN: Unicode byte order is opposite current runtime
113 *
114 * UTF_BIG_ENDIAN: Unicode byte order is always big endian
115 *
116 * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian
117 *
118 * UTF_NO_NULL_TERM: do not add null termination to output string
119 *
120 * UTF_DECOMPOSED: generate fully decomposed output
121 *
122 * ERRORS
123 * ENAMETOOLONG: output did not fit; only utf8len bytes were encoded
124 *
125 * EINVAL: illegal Unicode char encountered
126 */
127 int
128 utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
129 size_t * utf8len, size_t buflen, u_int16_t altslash, int flags);
130
131
132 /*
133 * utf8_decodestr - Decodes a UTF-8 string into Unicode
134 *
135 * This function takes an UTF-8 input string, utf8p, of utf8len bytes
136 * and produces the Unicode output into a buffer of buflen bytes pointed
137 * to by ucsp. The size of the output in bytes (not including a NULL
138 * termination byte) is returned in ucslen. Both buffers must reside
139 * in kernel memory.
140 *
141 * If '/' chars are allowed in the Unicode output then an alternate
142 * (replacement) char must be provided in altslash.
143 *
144 * FLAGS
145 * UTF_REV_ENDIAN: Unicode byte order is opposite current runtime
146 *
147 * UTF_BIG_ENDIAN: Unicode byte order is always big endian
148 *
149 * UTF_LITTLE_ENDIAN: Unicode byte order is always little endian
150 *
151 * UTF_DECOMPOSED: generate fully decomposed output (NFD)
152 *
153 * UTF_PRECOMPOSED: generate precomposed output (NFC)
154 *
155 * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input
156 *
157 * ERRORS
158 * ENAMETOOLONG: output did not fit; only ucslen bytes were decoded.
159 *
160 * EINVAL: illegal UTF-8 sequence encountered.
161 */
162 int
163 utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
164 size_t *ucslen, size_t buflen, u_int16_t altslash, int flags);
165
166
167 /*
168 * utf8_normalizestr - Normalize a UTF-8 string (NFC or NFD)
169 *
170 * This function takes an UTF-8 input string, instr, of inlen bytes
171 * and produces normalized UTF-8 output into a buffer of buflen bytes
172 * pointed to by outstr. The size of the output in bytes (not including
173 * a NULL termination byte) is returned in outlen. In-place conversions
174 * are not supported (i.e. instr != outstr). Both buffers must reside
175 * in kernel memory.
176 *
177 * FLAGS
178 * UTF_DECOMPOSED: output string will be fully decomposed (NFD)
179 *
180 * UTF_PRECOMPOSED: output string will be precomposed (NFC)
181 *
182 * UTF_NO_NULL_TERM: do not add null termination to output string
183 *
184 * UTF_ESCAPE_ILLEGAL: percent escape any illegal UTF-8 input
185 *
186 * ERRORS
187 * ENAMETOOLONG: output did not fit or input exceeded MAXPATHLEN bytes
188 *
189 * EINVAL: illegal UTF-8 sequence encountered or invalid flags
190 */
191 int
192 utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr,
193 size_t *outlen, size_t buflen, int flags);
194
195
196 /*
197 * utf8_validatestr - validates a UTF-8 string
198 *
199 * This function takes an UTF-8 input string, utf8p, of utf8len bytes
200 * and determines if its valid UTF-8. The string must reside in kernel
201 * memory.
202 *
203 * ERRORS
204 * EINVAL: illegal UTF-8 sequence encountered.
205 */
206 int
207 utf8_validatestr(const u_int8_t* utf8p, size_t utf8len);
208
209
210 __END_DECLS
211
212 #endif /* __APPLE_API_UNSTABLE */
213 #endif /* KERNEL */
214
215 #endif /* !_SYS_UTFCONV_H_ */