]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
39236c6e | 2 | * Copyright (c) 2000-2013 Apple Computer, Inc. All rights reserved. |
5d5c5d0d | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b | 27 | */ |
39236c6e A |
28 | |
29 | #include <sys/types.h> | |
30 | #include <sys/errno.h> | |
31 | ||
2d21ac55 | 32 | #if HFS |
1c79356b A |
33 | |
34 | #include <sys/param.h> | |
35 | #include <sys/systm.h> | |
36 | #include <sys/kernel.h> | |
1c79356b A |
37 | #include <sys/malloc.h> |
38 | #include <sys/queue.h> | |
39 | #include <sys/utfconv.h> | |
2d21ac55 A |
40 | #include <kern/host.h> |
41 | #include <mach/host_priv.h> | |
b0d623f7 A |
42 | #include <libkern/OSKextLib.h> |
43 | #include <libkern/OSKextLibPrivate.h> | |
1c79356b A |
44 | |
45 | #include "hfs.h" | |
46 | ||
47 | ||
91447636 A |
48 | lck_grp_t * encodinglst_lck_grp; |
49 | lck_grp_attr_t * encodinglst_lck_grp_attr; | |
50 | lck_attr_t * encodinglst_lck_attr; | |
51 | ||
52 | ||
1c79356b A |
53 | /* hfs encoding converter list */ |
54 | SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0}; | |
91447636 A |
55 | |
56 | lck_mtx_t encodinglst_mutex; | |
57 | ||
1c79356b A |
58 | /* hfs encoding converter entry */ |
59 | struct hfs_encoding { | |
60 | SLIST_ENTRY(hfs_encoding) link; | |
61 | int refcount; | |
62 | int kmod_id; | |
2d21ac55 | 63 | u_int32_t encoding; |
1c79356b A |
64 | hfs_to_unicode_func_t get_unicode_func; |
65 | unicode_to_hfs_func_t get_hfsname_func; | |
66 | }; | |
67 | ||
1c79356b A |
68 | #define MAX_HFS_UNICODE_CHARS (15*5) |
69 | ||
39236c6e | 70 | #if CONFIG_HFS_STD |
2d21ac55 | 71 | static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str); |
39236c6e | 72 | #endif |
1c79356b A |
73 | |
74 | void | |
75 | hfs_converterinit(void) | |
76 | { | |
77 | SLIST_INIT(&hfs_encoding_list); | |
91447636 A |
78 | |
79 | encodinglst_lck_grp_attr= lck_grp_attr_alloc_init(); | |
91447636 | 80 | encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr); |
91447636 | 81 | encodinglst_lck_attr = lck_attr_alloc_init(); |
91447636 A |
82 | |
83 | lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr); | |
1c79356b | 84 | |
39236c6e | 85 | #if CONFIG_HFS_STD |
1c79356b A |
86 | /* |
87 | * add resident MacRoman converter and take a reference | |
39236c6e A |
88 | * since its always "loaded". MacRoman is the default converter |
89 | * for HFS standard volumes. | |
90 | * | |
91 | * Only do this if we are actually supporting HFS standard | |
92 | * volumes. The converter is not used on configurations | |
93 | * that do not support HFS standard. | |
1c79356b A |
94 | */ |
95 | hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman); | |
96 | SLIST_FIRST(&hfs_encoding_list)->refcount++; | |
39236c6e A |
97 | #endif |
98 | ||
99 | } | |
100 | ||
101 | #if !CONFIG_HFS_STD | |
102 | ||
103 | /* | |
104 | * Function stubs are needed for KPI export. | |
105 | * It is a little swizzly to have two separate copies of the stub functions in this file | |
106 | * but the prototypes of these functions are different if we're using the real headers | |
107 | * vs. the dummy prototypes at the end of the file. (hfs_to_unicode_func_t vs. void*) | |
108 | * | |
109 | * As a result, we need our own copies in the no-HFS-Standard configuration | |
110 | */ | |
111 | int hfs_addconverter( __unused int id, | |
112 | __unused u_int32_t encoding, | |
113 | __unused hfs_to_unicode_func_t get_unicode, | |
114 | __unused unicode_to_hfs_func_t get_hfsname ) | |
115 | { | |
116 | return(0); | |
117 | } | |
118 | ||
119 | int hfs_getconverter( __unused u_int32_t encoding, | |
120 | __unused hfs_to_unicode_func_t *get_unicode, | |
121 | __unused unicode_to_hfs_func_t *get_hfsname) | |
122 | { | |
123 | return(EINVAL); | |
124 | } | |
125 | ||
126 | int hfs_relconverter(__unused u_int32_t encoding) | |
127 | { | |
128 | return(EINVAL); | |
1c79356b A |
129 | } |
130 | ||
39236c6e A |
131 | int hfs_remconverter(__unused int id, __unused u_int32_t encoding) |
132 | { | |
133 | return(0); | |
134 | } | |
135 | ||
136 | #else | |
137 | ||
138 | /* | |
139 | * For configurations that do support HFS standard, we need all of these.. | |
140 | */ | |
1c79356b A |
141 | |
142 | /* | |
143 | * hfs_addconverter - add an HFS encoding converter | |
144 | * | |
145 | * This is called exclusivly by kernel loadable modules | |
146 | * (like HFS_Japanese.kmod) to register hfs encoding | |
147 | * conversion routines. | |
148 | * | |
149 | */ | |
150 | int | |
2d21ac55 | 151 | hfs_addconverter(int id, u_int32_t encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname) |
1c79356b A |
152 | { |
153 | struct hfs_encoding *encp; | |
154 | ||
155 | MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK); | |
156 | ||
91447636 | 157 | lck_mtx_lock(&encodinglst_mutex); |
1c79356b A |
158 | |
159 | encp->link.sle_next = NULL; | |
160 | encp->refcount = 0; | |
161 | encp->encoding = encoding; | |
162 | encp->get_unicode_func = get_unicode; | |
163 | encp->get_hfsname_func = get_hfsname; | |
164 | encp->kmod_id = id; | |
165 | SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link); | |
166 | ||
91447636 | 167 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b A |
168 | return (0); |
169 | } | |
170 | ||
171 | ||
172 | /* | |
173 | * hfs_remconverter - remove an HFS encoding converter | |
174 | * | |
175 | * Can be called by a kernel loadable module's finalize | |
176 | * routine to remove an encoding converter so that the | |
177 | * module (i.e. the code) can be unloaded. | |
178 | * | |
179 | * However, in the normal case, the removing and unloading | |
180 | * of these converters is done in hfs_relconverter. | |
181 | * The call is initiated from within the kernel during the unmounting of an hfs voulume. | |
182 | */ | |
183 | int | |
2d21ac55 | 184 | hfs_remconverter(int id, u_int32_t encoding) |
1c79356b A |
185 | { |
186 | struct hfs_encoding *encp; | |
1c79356b | 187 | |
91447636 | 188 | lck_mtx_lock(&encodinglst_mutex); |
1c79356b A |
189 | SLIST_FOREACH(encp, &hfs_encoding_list, link) { |
190 | if (encp->encoding == encoding && encp->kmod_id == id) { | |
191 | encp->refcount--; | |
192 | ||
193 | /* if converter is no longer in use, release it */ | |
194 | if (encp->refcount <= 0 && encp->kmod_id != 0) { | |
195 | SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link); | |
91447636 | 196 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b | 197 | FREE(encp, M_TEMP); |
91447636 | 198 | return (0); |
1c79356b | 199 | } else { |
91447636 A |
200 | lck_mtx_unlock(&encodinglst_mutex); |
201 | return (1); /* busy */ | |
1c79356b A |
202 | } |
203 | break; | |
204 | } | |
205 | } | |
91447636 | 206 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b | 207 | |
91447636 | 208 | return (0); |
1c79356b A |
209 | } |
210 | ||
211 | ||
212 | /* | |
213 | * hfs_getconverter - get HFS encoding converters | |
214 | * | |
215 | * Normally called during the mounting of an hfs voulume. | |
216 | */ | |
217 | int | |
2d21ac55 | 218 | hfs_getconverter(u_int32_t encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname) |
1c79356b A |
219 | { |
220 | struct hfs_encoding *encp; | |
221 | int found = 0; | |
222 | ||
91447636 | 223 | lck_mtx_lock(&encodinglst_mutex); |
1c79356b A |
224 | SLIST_FOREACH(encp, &hfs_encoding_list, link) { |
225 | if (encp->encoding == encoding) { | |
226 | found = 1; | |
227 | *get_unicode = encp->get_unicode_func; | |
228 | *get_hfsname = encp->get_hfsname_func; | |
229 | ++encp->refcount; | |
230 | break; | |
231 | } | |
232 | } | |
91447636 | 233 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b A |
234 | |
235 | if (!found) { | |
236 | *get_unicode = NULL; | |
237 | *get_hfsname = NULL; | |
238 | return (EINVAL); | |
239 | } | |
240 | ||
241 | return (0); | |
242 | } | |
243 | ||
244 | ||
245 | /* | |
246 | * hfs_relconverter - release interest in an HFS encoding converter | |
247 | * | |
248 | * Normally called during the unmounting of an hfs voulume. | |
249 | */ | |
250 | int | |
2d21ac55 | 251 | hfs_relconverter(u_int32_t encoding) |
1c79356b A |
252 | { |
253 | struct hfs_encoding *encp; | |
1c79356b | 254 | |
91447636 | 255 | lck_mtx_lock(&encodinglst_mutex); |
1c79356b A |
256 | SLIST_FOREACH(encp, &hfs_encoding_list, link) { |
257 | if (encp->encoding == encoding) { | |
1c79356b A |
258 | encp->refcount--; |
259 | ||
260 | /* if converter is no longer in use, release it */ | |
261 | if (encp->refcount <= 0 && encp->kmod_id != 0) { | |
b0d623f7 | 262 | uint32_t loadTag = (uint32_t)encp->kmod_id; |
1c79356b A |
263 | |
264 | SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link); | |
91447636 A |
265 | lck_mtx_unlock(&encodinglst_mutex); |
266 | ||
267 | FREE(encp, M_TEMP); | |
b0d623f7 | 268 | (void)OSKextUnloadKextWithLoadTag(loadTag); |
91447636 | 269 | return (0); |
1c79356b | 270 | } |
91447636 A |
271 | lck_mtx_unlock(&encodinglst_mutex); |
272 | return (0); | |
1c79356b A |
273 | } |
274 | } | |
91447636 | 275 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b | 276 | |
91447636 | 277 | return (EINVAL); |
1c79356b A |
278 | } |
279 | ||
1c79356b A |
280 | /* |
281 | * Convert HFS encoded string into UTF-8 | |
282 | * | |
283 | * Unicode output is fully decomposed | |
284 | * '/' chars are converted to ':' | |
285 | */ | |
286 | int | |
2d21ac55 | 287 | hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr) |
1c79356b A |
288 | { |
289 | int error; | |
290 | UniChar uniStr[MAX_HFS_UNICODE_CHARS]; | |
291 | ItemCount uniCount; | |
292 | size_t utf8len; | |
293 | hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode; | |
6d2010ae | 294 | u_int8_t pascal_length = 0; |
1c79356b | 295 | |
d41d1dae A |
296 | /* |
297 | * Validate the length of the Pascal-style string before passing it | |
298 | * down to the decoding engine. | |
299 | */ | |
300 | pascal_length = *((const u_int8_t*)(hfs_str)); | |
301 | if (pascal_length > 31) { | |
302 | /* invalid string; longer than 31 bytes */ | |
303 | error = EINVAL; | |
304 | return error; | |
305 | } | |
6d2010ae | 306 | |
1c79356b A |
307 | error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount); |
308 | ||
309 | if (uniCount == 0) | |
310 | error = EINVAL; | |
311 | ||
312 | if (error == 0) { | |
313 | error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0); | |
314 | if (error == ENAMETOOLONG) | |
315 | *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0); | |
316 | else | |
317 | *actualDstLen = utf8len; | |
318 | } | |
319 | ||
320 | return error; | |
321 | } | |
322 | ||
1c79356b A |
323 | /* |
324 | * When an HFS name cannot be encoded with the current | |
325 | * volume encoding then MacRoman is used as a fallback. | |
326 | */ | |
327 | int | |
2d21ac55 | 328 | mac_roman_to_utf8(const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr) |
1c79356b A |
329 | { |
330 | int error; | |
331 | UniChar uniStr[MAX_HFS_UNICODE_CHARS]; | |
332 | ItemCount uniCount; | |
333 | size_t utf8len; | |
d41d1dae A |
334 | u_int8_t pascal_length = 0; |
335 | ||
336 | /* | |
337 | * Validate the length of the Pascal-style string before passing it | |
338 | * down to the decoding engine. | |
339 | */ | |
340 | pascal_length = *((const u_int8_t*)(hfs_str)); | |
341 | if (pascal_length > 31) { | |
342 | /* invalid string; longer than 31 bytes */ | |
343 | error = EINVAL; | |
344 | return error; | |
6d2010ae | 345 | } |
1c79356b A |
346 | |
347 | error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount); | |
348 | ||
349 | if (uniCount == 0) | |
350 | error = EINVAL; | |
351 | ||
352 | if (error == 0) { | |
353 | error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0); | |
354 | if (error == ENAMETOOLONG) | |
355 | *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0); | |
356 | else | |
357 | *actualDstLen = utf8len; | |
358 | } | |
359 | ||
360 | return error; | |
361 | } | |
362 | ||
9bccf70c A |
363 | /* |
364 | * Convert Unicode string into HFS encoding | |
365 | * | |
366 | * ':' chars are converted to '/' | |
367 | * Assumes input represents fully decomposed Unicode | |
368 | */ | |
369 | int | |
370 | unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry) | |
371 | { | |
372 | int error; | |
373 | unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname; | |
374 | ||
375 | error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr); | |
376 | if (error && retry) { | |
377 | error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr); | |
378 | } | |
379 | return error; | |
380 | } | |
381 | ||
1c79356b A |
382 | /* |
383 | * Convert UTF-8 string into HFS encoding | |
384 | * | |
385 | * ':' chars are converted to '/' | |
386 | * Assumes input represents fully decomposed Unicode | |
387 | */ | |
388 | int | |
9bccf70c | 389 | utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/) |
1c79356b A |
390 | { |
391 | int error; | |
392 | UniChar uniStr[MAX_HFS_UNICODE_CHARS]; | |
393 | size_t ucslen; | |
1c79356b A |
394 | |
395 | error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0); | |
396 | if (error == 0) | |
9bccf70c | 397 | error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1); |
1c79356b A |
398 | |
399 | return error; | |
400 | } | |
401 | ||
39236c6e | 402 | |
1c79356b A |
403 | int |
404 | utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr) | |
405 | { | |
406 | int error; | |
407 | UniChar uniStr[MAX_HFS_UNICODE_CHARS]; | |
408 | size_t ucslen; | |
409 | ||
410 | error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0); | |
411 | if (error == 0) | |
412 | error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr); | |
413 | ||
414 | return error; | |
415 | } | |
416 | ||
417 | /* | |
418 | * HFS MacRoman to/from Unicode conversions are built into the kernel | |
419 | * All others hfs encodings are loadable. | |
420 | */ | |
421 | ||
422 | /* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */ | |
2d21ac55 | 423 | static u_int8_t gLatin1Table[] = { |
1c79356b A |
424 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
425 | /* 0x00A0 */ 0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4, '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2, '?', 0xA8, 0xF8, | |
426 | /* 0x00B0 */ 0xA1, 0XB1, '?', '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC, '?', 0xBC, 0xC8, '?', '?', '?', 0xC0, | |
427 | /* 0x00C0 */ '?', '?', '?', '?', '?', '?', 0xAE, '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
428 | /* 0x00D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xAF, '?', '?', '?', '?', '?', '?', 0xA7, | |
429 | /* 0x00E0 */ '?', '?', '?', '?', '?', '?', 0xBE, '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
430 | /* 0x00F0 */ '?', '?', '?', '?', '?', '?', '?', 0xD6, 0xBF, '?', '?', '?', '?', '?', '?', '?' | |
431 | }; | |
432 | ||
433 | /* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */ | |
2d21ac55 | 434 | static u_int8_t gSpaceModsTable[] = { |
1c79356b A |
435 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
436 | /* 0x02C0 */ '?', '?', '?', '?', '?', '?', 0xF6, 0xFF, '?', '?', '?', '?', '?', '?', '?', '?', | |
437 | /* 0x02D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD, '?', '?' | |
438 | }; | |
439 | ||
440 | /* 0x2010 - 0x20AF = General Punctuation (17 total) */ | |
2d21ac55 | 441 | static u_int8_t gPunctTable[] = { |
1c79356b A |
442 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
443 | /* 0x2010 */ '?', '?', '?', 0xd0, 0xd1, '?', '?', '?', 0xd4, 0xd5, 0xe2, '?', 0xd2, 0xd3, 0xe3, '?', | |
444 | /* 0x2020 */ 0xa0, 0xe0, 0xa5, '?', '?', '?', 0xc9, '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
445 | /* 0x2030 */ 0xe4, '?', '?', '?', '?', '?', '?', '?', '?', 0xdc, 0xdd, '?', '?', '?', '?', '?', | |
446 | /* 0x2040 */ '?', '?', '?', '?', 0xda, '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
447 | /* 0x2050 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
448 | /* 0x2060 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
449 | /* 0x2070 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
450 | /* 0x2080 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
451 | /* 0x2090 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
452 | /* 0x20A0 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xdb, '?', '?', '?' | |
453 | }; | |
454 | ||
455 | /* 0x22xx = Mathematical Operators (11 total) */ | |
2d21ac55 | 456 | static u_int8_t gMathTable[] = { |
1c79356b A |
457 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
458 | /* 0x2200 */ '?', '?', 0xb6, '?', '?', '?', 0xc6, '?', '?', '?', '?', '?', '?', '?', '?', 0xb8, | |
459 | /* 0x2210 */ '?', 0xb7, '?', '?', '?', '?', '?', '?', '?', '?', 0xc3, '?', '?', '?', 0xb0, '?', | |
460 | /* 0x2220 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xba, '?', '?', '?', '?', | |
461 | /* 0x2230 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
462 | /* 0x2240 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xc5, '?', '?', '?', '?', '?', '?', '?', | |
463 | /* 0x2250 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
464 | /* 0x2260 */ 0xad, '?', '?', '?', 0xb2, 0xb3, '?', '?' | |
465 | }; | |
466 | ||
467 | /* */ | |
2d21ac55 | 468 | static u_int8_t gReverseCombTable[] = { |
1c79356b A |
469 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
470 | /* 0x40 */ 0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98, | |
471 | /* 0x50 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, | |
472 | /* 0x60 */ 0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3, | |
473 | /* 0x70 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, | |
474 | ||
475 | /* Combining Diacritical Marks (0x0300 - 0x030A) */ | |
476 | /* 0 1 2 3 4 5 6 7 8 9 A */ | |
477 | /* 'A' */ | |
478 | /* 0x0300 */ 0xCB, 0xE7, 0xE5, 0xCC, '?', '?', '?', '?', 0x80, '?', 0x81, | |
479 | ||
480 | /* 'a' */ | |
481 | /* 0x0300 */ 0x88, 0x87, 0x89, 0x8B, '?', '?', '?', '?', 0x8A, '?', 0x8C, | |
482 | ||
483 | /* 'E' */ | |
484 | /* 0x0300 */ 0xE9, 0x83, 0xE6, '?', '?', '?', '?', '?', 0xE8, '?', '?', | |
485 | ||
486 | /* 'e' */ | |
487 | /* 0x0300 */ 0x8F, 0x8E, 0x90, '?', '?', '?', '?', '?', 0x91, '?', '?', | |
488 | ||
489 | /* 'I' */ | |
490 | /* 0x0300 */ 0xED, 0xEA, 0xEB, '?', '?', '?', '?', '?', 0xEC, '?', '?', | |
491 | ||
492 | /* 'i' */ | |
493 | /* 0x0300 */ 0x93, 0x92, 0x94, '?', '?', '?', '?', '?', 0x95, '?', '?', | |
494 | ||
495 | /* 'N' */ | |
496 | /* 0x0300 */ '?', '?', '?', 0x84, '?', '?', '?', '?', '?', '?', '?', | |
497 | ||
498 | /* 'n' */ | |
499 | /* 0x0300 */ '?', '?', '?', 0x96, '?', '?', '?', '?', '?', '?', '?', | |
500 | ||
501 | /* 'O' */ | |
502 | /* 0x0300 */ 0xF1, 0xEE, 0xEF, 0xCD, '?', '?', '?', '?', 0x85, '?', '?', | |
503 | ||
504 | /* 'o' */ | |
505 | /* 0x0300 */ 0x98, 0x97, 0x99, 0x9B, '?', '?', '?', '?', 0x9A, '?', '?', | |
506 | ||
507 | /* 'U' */ | |
508 | /* 0x0300 */ 0xF4, 0xF2, 0xF3, '?', '?', '?', '?', '?', 0x86, '?', '?', | |
509 | ||
510 | /* 'u' */ | |
511 | /* 0x0300 */ 0x9D, 0x9C, 0x9E, '?', '?', '?', '?', '?', 0x9F, '?', '?', | |
512 | ||
513 | /* 'Y' */ | |
514 | /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD9, '?', '?', | |
515 | ||
516 | /* 'y' */ | |
517 | /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD8, '?', '?', | |
518 | ||
519 | /* else */ | |
520 | /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?' | |
521 | }; | |
522 | ||
523 | ||
524 | /* | |
525 | * Convert Unicode string into HFS MacRoman encoding | |
526 | * | |
527 | * Assumes Unicode input is fully decomposed | |
528 | */ | |
2d21ac55 | 529 | static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str) |
1c79356b | 530 | { |
2d21ac55 | 531 | u_int8_t *p; |
1c79356b A |
532 | const UniChar *u; |
533 | UniChar c; | |
534 | UniChar mask; | |
2d21ac55 A |
535 | u_int16_t inputChars; |
536 | u_int16_t pascalChars; | |
1c79356b | 537 | OSErr result = noErr; |
2d21ac55 A |
538 | u_int8_t lsb; |
539 | u_int8_t prevChar; | |
540 | u_int8_t mc; | |
1c79356b A |
541 | |
542 | mask = (UniChar) 0xFF80; | |
543 | p = &hfs_str[1]; | |
544 | u = uni_str; | |
545 | inputChars = unicodeChars; | |
546 | pascalChars = prevChar = 0; | |
547 | ||
548 | while (inputChars) { | |
549 | c = *(u++); | |
2d21ac55 | 550 | lsb = (u_int8_t) c; |
1c79356b A |
551 | |
552 | /* | |
553 | * If its not 7-bit ascii, then we need to map it | |
554 | */ | |
555 | if ( c & mask ) { | |
556 | mc = '?'; | |
557 | switch (c & 0xFF00) { | |
558 | case 0x0000: | |
559 | if (lsb >= 0xA0) | |
560 | mc = gLatin1Table[lsb - 0xA0]; | |
561 | break; | |
562 | ||
563 | case 0x0200: | |
564 | if (lsb >= 0xC0 && lsb <= 0xDF) | |
565 | mc = gSpaceModsTable[lsb - 0xC0]; | |
566 | break; | |
567 | ||
568 | case 0x2000: | |
569 | if (lsb >= 0x10 && lsb <= 0xAF) | |
570 | mc = gPunctTable[lsb- 0x10]; | |
571 | break; | |
572 | ||
573 | case 0x2200: | |
fe8ab488 | 574 | if (lsb < 0x68) |
1c79356b A |
575 | mc = gMathTable[lsb]; |
576 | break; | |
577 | ||
578 | case 0x0300: | |
579 | if (c <= 0x030A) { | |
580 | if (prevChar >= 'A' && prevChar < 'z') { | |
581 | mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb]; | |
582 | --p; /* backup over base char */ | |
583 | --pascalChars; | |
584 | } | |
585 | } else { | |
586 | switch (c) { | |
587 | case 0x0327: /* combining cedilla */ | |
588 | if (prevChar == 'C') | |
589 | mc = 0x82; | |
590 | else if (prevChar == 'c') | |
591 | mc = 0x8D; | |
592 | else | |
593 | break; | |
594 | --p; /* backup over base char */ | |
595 | --pascalChars; | |
596 | break; | |
597 | ||
598 | case 0x03A9: mc = 0xBD; break; /* omega */ | |
599 | ||
600 | case 0x03C0: mc = 0xB9; break; /* pi */ | |
601 | } | |
602 | } | |
603 | break; | |
604 | ||
605 | default: | |
606 | switch (c) { | |
607 | case 0x0131: mc = 0xf5; break; /* dotless i */ | |
608 | ||
609 | case 0x0152: mc = 0xce; break; /* OE */ | |
610 | ||
611 | case 0x0153: mc = 0xcf; break; /* oe */ | |
612 | ||
613 |