]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
9bccf70c | 2 | * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved. |
1c79356b A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
37839358 A |
6 | * The contents of this file constitute Original Code as defined in and |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
1c79356b | 11 | * |
37839358 A |
12 | * This Original Code and all software distributed under the License are |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
37839358 A |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
1c79356b A |
19 | * |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | ||
23 | #include <sys/param.h> | |
24 | #include <sys/systm.h> | |
25 | #include <sys/kernel.h> | |
1c79356b A |
26 | #include <sys/malloc.h> |
27 | #include <sys/queue.h> | |
28 | #include <sys/utfconv.h> | |
29 | ||
30 | #include "hfs.h" | |
31 | ||
32 | ||
91447636 A |
33 | lck_grp_t * encodinglst_lck_grp; |
34 | lck_grp_attr_t * encodinglst_lck_grp_attr; | |
35 | lck_attr_t * encodinglst_lck_attr; | |
36 | ||
37 | ||
1c79356b A |
38 | /* hfs encoding converter list */ |
39 | SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0}; | |
91447636 A |
40 | |
41 | lck_mtx_t encodinglst_mutex; | |
42 | ||
1c79356b A |
43 | |
44 | ||
45 | /* hfs encoding converter entry */ | |
46 | struct hfs_encoding { | |
47 | SLIST_ENTRY(hfs_encoding) link; | |
48 | int refcount; | |
49 | int kmod_id; | |
50 | UInt32 encoding; | |
51 | hfs_to_unicode_func_t get_unicode_func; | |
52 | unicode_to_hfs_func_t get_hfsname_func; | |
53 | }; | |
54 | ||
55 | /* XXX We should use an "official" interface! */ | |
56 | extern kern_return_t kmod_destroy(host_priv_t host_priv, kmod_t id); | |
57 | extern struct host realhost; | |
58 | ||
59 | #define MAX_HFS_UNICODE_CHARS (15*5) | |
60 | ||
55e303ae | 61 | int mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str, UInt32 maxCharLen, UInt32 *usedCharLen); |
1c79356b A |
62 | |
63 | static int unicode_to_mac_roman(UniChar *uni_str, UInt32 unicodeChars, Str31 hfs_str); | |
64 | ||
65 | ||
66 | void | |
67 | hfs_converterinit(void) | |
68 | { | |
69 | SLIST_INIT(&hfs_encoding_list); | |
91447636 A |
70 | |
71 | encodinglst_lck_grp_attr= lck_grp_attr_alloc_init(); | |
72 | lck_grp_attr_setstat(encodinglst_lck_grp_attr); | |
73 | encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr); | |
74 | ||
75 | encodinglst_lck_attr = lck_attr_alloc_init(); | |
76 | //lck_attr_setdebug(encodinglst_lck_attr); | |
77 | ||
78 | lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr); | |
1c79356b A |
79 | |
80 | /* | |
81 | * add resident MacRoman converter and take a reference | |
82 | * since its always "loaded". | |
83 | */ | |
84 | hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman); | |
85 | SLIST_FIRST(&hfs_encoding_list)->refcount++; | |
86 | } | |
87 | ||
88 | ||
89 | /* | |
90 | * hfs_addconverter - add an HFS encoding converter | |
91 | * | |
92 | * This is called exclusivly by kernel loadable modules | |
93 | * (like HFS_Japanese.kmod) to register hfs encoding | |
94 | * conversion routines. | |
95 | * | |
96 | */ | |
97 | int | |
98 | hfs_addconverter(int id, UInt32 encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname) | |
99 | { | |
100 | struct hfs_encoding *encp; | |
101 | ||
102 | MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK); | |
103 | ||
91447636 | 104 | lck_mtx_lock(&encodinglst_mutex); |
1c79356b A |
105 | |
106 | encp->link.sle_next = NULL; | |
107 | encp->refcount = 0; | |
108 | encp->encoding = encoding; | |
109 | encp->get_unicode_func = get_unicode; | |
110 | encp->get_hfsname_func = get_hfsname; | |
111 | encp->kmod_id = id; | |
112 | SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link); | |
113 | ||
91447636 | 114 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b A |
115 | return (0); |
116 | } | |
117 | ||
118 | ||
119 | /* | |
120 | * hfs_remconverter - remove an HFS encoding converter | |
121 | * | |
122 | * Can be called by a kernel loadable module's finalize | |
123 | * routine to remove an encoding converter so that the | |
124 | * module (i.e. the code) can be unloaded. | |
125 | * | |
126 | * However, in the normal case, the removing and unloading | |
127 | * of these converters is done in hfs_relconverter. | |
128 | * The call is initiated from within the kernel during the unmounting of an hfs voulume. | |
129 | */ | |
130 | int | |
131 | hfs_remconverter(int id, UInt32 encoding) | |
132 | { | |
133 | struct hfs_encoding *encp; | |
1c79356b | 134 | |
91447636 | 135 | lck_mtx_lock(&encodinglst_mutex); |
1c79356b A |
136 | SLIST_FOREACH(encp, &hfs_encoding_list, link) { |
137 | if (encp->encoding == encoding && encp->kmod_id == id) { | |
138 | encp->refcount--; | |
139 | ||
140 | /* if converter is no longer in use, release it */ | |
141 | if (encp->refcount <= 0 && encp->kmod_id != 0) { | |
142 | SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link); | |
91447636 | 143 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b | 144 | FREE(encp, M_TEMP); |
91447636 | 145 | return (0); |
1c79356b | 146 | } else { |
91447636 A |
147 | lck_mtx_unlock(&encodinglst_mutex); |
148 | return (1); /* busy */ | |
1c79356b A |
149 | } |
150 | break; | |
151 | } | |
152 | } | |
91447636 | 153 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b | 154 | |
91447636 | 155 | return (0); |
1c79356b A |
156 | } |
157 | ||
158 | ||
159 | /* | |
160 | * hfs_getconverter - get HFS encoding converters | |
161 | * | |
162 | * Normally called during the mounting of an hfs voulume. | |
163 | */ | |
164 | int | |
165 | hfs_getconverter(UInt32 encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname) | |
166 | { | |
167 | struct hfs_encoding *encp; | |
168 | int found = 0; | |
169 | ||
91447636 | 170 | lck_mtx_lock(&encodinglst_mutex); |
1c79356b A |
171 | SLIST_FOREACH(encp, &hfs_encoding_list, link) { |
172 | if (encp->encoding == encoding) { | |
173 | found = 1; | |
174 | *get_unicode = encp->get_unicode_func; | |
175 | *get_hfsname = encp->get_hfsname_func; | |
176 | ++encp->refcount; | |
177 | break; | |
178 | } | |
179 | } | |
91447636 | 180 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b A |
181 | |
182 | if (!found) { | |
183 | *get_unicode = NULL; | |
184 | *get_hfsname = NULL; | |
185 | return (EINVAL); | |
186 | } | |
187 | ||
188 | return (0); | |
189 | } | |
190 | ||
191 | ||
192 | /* | |
193 | * hfs_relconverter - release interest in an HFS encoding converter | |
194 | * | |
195 | * Normally called during the unmounting of an hfs voulume. | |
196 | */ | |
197 | int | |
198 | hfs_relconverter(UInt32 encoding) | |
199 | { | |
200 | struct hfs_encoding *encp; | |
1c79356b | 201 | |
91447636 | 202 | lck_mtx_lock(&encodinglst_mutex); |
1c79356b A |
203 | SLIST_FOREACH(encp, &hfs_encoding_list, link) { |
204 | if (encp->encoding == encoding) { | |
1c79356b A |
205 | encp->refcount--; |
206 | ||
207 | /* if converter is no longer in use, release it */ | |
208 | if (encp->refcount <= 0 && encp->kmod_id != 0) { | |
209 | int id = encp->kmod_id; | |
210 | ||
211 | SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link); | |
91447636 A |
212 | lck_mtx_unlock(&encodinglst_mutex); |
213 | ||
214 | FREE(encp, M_TEMP); | |
215 | kmod_destroy((host_priv_t) host_priv_self(), id); | |
216 | return (0); | |
1c79356b | 217 | } |
91447636 A |
218 | lck_mtx_unlock(&encodinglst_mutex); |
219 | return (0); | |
1c79356b A |
220 | } |
221 | } | |
91447636 | 222 | lck_mtx_unlock(&encodinglst_mutex); |
1c79356b | 223 | |
91447636 | 224 | return (EINVAL); |
1c79356b A |
225 | } |
226 | ||
227 | ||
228 | /* | |
229 | * Convert HFS encoded string into UTF-8 | |
230 | * | |
231 | * Unicode output is fully decomposed | |
232 | * '/' chars are converted to ':' | |
233 | */ | |
234 | int | |
235 | hfs_to_utf8(ExtendedVCB *vcb, Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr) | |
236 | { | |
237 | int error; | |
238 | UniChar uniStr[MAX_HFS_UNICODE_CHARS]; | |
239 | ItemCount uniCount; | |
240 | size_t utf8len; | |
241 | hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode; | |
242 | ||
243 | error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount); | |
244 | ||
245 | if (uniCount == 0) | |
246 | error = EINVAL; | |
247 | ||
248 | if (error == 0) { | |
249 | error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0); | |
250 | if (error == ENAMETOOLONG) | |
251 | *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0); | |
252 | else | |
253 | *actualDstLen = utf8len; | |
254 | } | |
255 | ||
256 | return error; | |
257 | } | |
258 | ||
259 | ||
260 | /* | |
261 | * When an HFS name cannot be encoded with the current | |
262 | * volume encoding then MacRoman is used as a fallback. | |
263 | */ | |
264 | int | |
265 | mac_roman_to_utf8(Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr) | |
266 | { | |
267 | int error; | |
268 | UniChar uniStr[MAX_HFS_UNICODE_CHARS]; | |
269 | ItemCount uniCount; | |
270 | size_t utf8len; | |
271 | ||
272 | error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount); | |
273 | ||
274 | if (uniCount == 0) | |
275 | error = EINVAL; | |
276 | ||
277 | if (error == 0) { | |
278 | error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0); | |
279 | if (error == ENAMETOOLONG) | |
280 | *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0); | |
281 | else | |
282 | *actualDstLen = utf8len; | |
283 | } | |
284 | ||
285 | return error; | |
286 | } | |
287 | ||
288 | ||
9bccf70c A |
289 | /* |
290 | * Convert Unicode string into HFS encoding | |
291 | * | |
292 | * ':' chars are converted to '/' | |
293 | * Assumes input represents fully decomposed Unicode | |
294 | */ | |
295 | int | |
296 | unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry) | |
297 | { | |
298 | int error; | |
299 | unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname; | |
300 | ||
301 | error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr); | |
302 | if (error && retry) { | |
303 | error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr); | |
304 | } | |
305 | return error; | |
306 | } | |
307 | ||
1c79356b A |
308 | /* |
309 | * Convert UTF-8 string into HFS encoding | |
310 | * | |
311 | * ':' chars are converted to '/' | |
312 | * Assumes input represents fully decomposed Unicode | |
313 | */ | |
314 | int | |
9bccf70c | 315 | utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/) |
1c79356b A |
316 | { |
317 | int error; | |
318 | UniChar uniStr[MAX_HFS_UNICODE_CHARS]; | |
319 | size_t ucslen; | |
1c79356b A |
320 | |
321 | error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0); | |
322 | if (error == 0) | |
9bccf70c | 323 | error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1); |
1c79356b A |
324 | |
325 | return error; | |
326 | } | |
327 | ||
328 | int | |
329 | utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr) | |
330 | { | |
331 | int error; | |
332 | UniChar uniStr[MAX_HFS_UNICODE_CHARS]; | |
333 | size_t ucslen; | |
334 | ||
335 | error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0); | |
336 | if (error == 0) | |
337 | error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr); | |
338 | ||
339 | return error; | |
340 | } | |
341 | ||
342 | /* | |
343 | * HFS MacRoman to/from Unicode conversions are built into the kernel | |
344 | * All others hfs encodings are loadable. | |
345 | */ | |
346 | ||
347 | /* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */ | |
348 | static UInt8 gLatin1Table[] = { | |
349 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ | |
350 | /* 0x00A0 */ 0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4, '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2, '?', 0xA8, 0xF8, | |
351 | /* 0x00B0 */ 0xA1, 0XB1, '?', '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC, '?', 0xBC, 0xC8, '?', '?', '?', 0xC0, | |
352 | /* 0x00C0 */ '?', '?', '?', '?', '?', '?', 0xAE, '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
353 | /* 0x00D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xAF, '?', '?', '?', '?', '?', '?', 0xA7, | |
354 | /* 0x00E0 */ '?', '?', '?', '?', '?', '?', 0xBE, '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
355 | /* 0x00F0 */ '?', '?', '?', '?', '?', '?', '?', 0xD6, 0xBF, '?', '?', '?', '?', '?', '?', '?' | |
356 | }; | |
357 | ||
358 | /* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */ | |
359 | static UInt8 gSpaceModsTable[] = { | |
360 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ | |
361 | /* 0x02C0 */ '?', '?', '?', '?', '?', '?', 0xF6, 0xFF, '?', '?', '?', '?', '?', '?', '?', '?', | |
362 | /* 0x02D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD, '?', '?' | |
363 | }; | |
364 | ||
365 | /* 0x2010 - 0x20AF = General Punctuation (17 total) */ | |
366 | static UInt8 gPunctTable[] = { | |
367 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ | |
368 | /* 0x2010 */ '?', '?', '?', 0xd0, 0xd1, '?', '?', '?', 0xd4, 0xd5, 0xe2, '?', 0xd2, 0xd3, 0xe3, '?', | |
369 | /* 0x2020 */ 0xa0, 0xe0, 0xa5, '?', '?', '?', 0xc9, '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
370 | /* 0x2030 */ 0xe4, '?', '?', '?', '?', '?', '?', '?', '?', 0xdc, 0xdd, '?', '?', '?', '?', '?', | |
371 | /* 0x2040 */ '?', '?', '?', '?', 0xda, '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
372 | /* 0x2050 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
373 | /* 0x2060 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
374 | /* 0x2070 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
375 | /* 0x2080 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
376 | /* 0x2090 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
377 | /* 0x20A0 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xdb, '?', '?', '?' | |
378 | }; | |
379 | ||
380 | /* 0x22xx = Mathematical Operators (11 total) */ | |
381 | static UInt8 gMathTable[] = { | |
382 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ | |
383 | /* 0x2200 */ '?', '?', 0xb6, '?', '?', '?', 0xc6, '?', '?', '?', '?', '?', '?', '?', '?', 0xb8, | |
384 | /* 0x2210 */ '?', 0xb7, '?', '?', '?', '?', '?', '?', '?', '?', 0xc3, '?', '?', '?', 0xb0, '?', | |
385 | /* 0x2220 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xba, '?', '?', '?', '?', | |
386 | /* 0x2230 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
387 | /* 0x2240 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xc5, '?', '?', '?', '?', '?', '?', '?', | |
388 | /* 0x2250 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', | |
389 | /* 0x2260 */ 0xad, '?', '?', '?', 0xb2, 0xb3, '?', '?' | |
390 | }; | |
391 | ||
392 | /* */ | |
393 | static UInt8 gReverseCombTable[] = { | |
394 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ | |
395 | /* 0x40 */ 0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98, | |
396 | /* 0x50 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, | |
397 | /* 0x60 */ 0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3, | |
398 | /* 0x70 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, | |
399 | ||
400 | /* Combining Diacritical Marks (0x0300 - 0x030A) */ | |
401 | /* 0 1 2 3 4 5 6 7 8 9 A */ | |
402 | /* 'A' */ | |
403 | /* 0x0300 */ 0xCB, 0xE7, 0xE5, 0xCC, '?', '?', '?', '?', 0x80, '?', 0x81, | |
404 | ||
405 | /* 'a' */ | |
406 | /* 0x0300 */ 0x88, 0x87, 0x89, 0x8B, '?', '?', '?', '?', 0x8A, '?', 0x8C, | |
407 | ||
408 | /* 'E' */ | |
409 | /* 0x0300 */ 0xE9, 0x83, 0xE6, '?', '?', '?', '?', '?', 0xE8, '?', '?', | |
410 | ||
411 | /* 'e' */ | |
412 | /* 0x0300 */ 0x8F, 0x8E, 0x90, '?', '?', '?', '?', '?', 0x91, '?', '?', | |
413 | ||
414 | /* 'I' */ | |
415 | /* 0x0300 */ 0xED, 0xEA, 0xEB, '?', '?', '?', '?', '?', 0xEC, '?', '?', | |
416 | ||
417 | /* 'i' */ | |
418 | /* 0x0300 */ 0x93, 0x92, 0x94, '?', '?', '?', '?', '?', 0x95, '?', '?', | |
419 | ||
420 | /* 'N' */ | |
421 | /* 0x0300 */ '?', '?', '?', 0x84, '?', '?', '?', '?', '?', '?', '?', | |
422 | ||
423 | /* 'n' */ | |
424 | /* 0x0300 */ '?', '?', '?', 0x96, '?', '?', '?', '?', '?', '?', '?', | |
425 | ||
426 | /* 'O' */ | |
427 | /* 0x0300 */ 0xF1, 0xEE, 0xEF, 0xCD, '?', '?', '?', '?', 0x85, '?', '?', | |
428 | ||
429 | /* 'o' */ | |
430 | /* 0x0300 */ 0x98, 0x97, 0x99, 0x9B, '?', '?', '?', '?', 0x9A, '?', '?', | |
431 | ||
432 | /* 'U' */ | |
433 | /* 0x0300 */ 0xF4, 0xF2, 0xF3, '?', '?', '?', '?', '?', 0x86, '?', '?', | |
434 | ||
435 | /* 'u' */ | |
436 | /* 0x0300 */ 0x9D, 0x9C, 0x9E, '?', '?', '?', '?', '?', 0x9F, '?', '?', | |
437 | ||
438 | /* 'Y' */ | |
439 | /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD9, '?', '?', | |
440 | ||
441 | /* 'y' */ | |
442 | /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD8, '?', '?', | |
443 | ||
444 | /* else */ | |
445 | /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?' | |
446 | }; | |
447 | ||
448 | ||
449 | /* | |
450 | * Convert Unicode string into HFS MacRoman encoding | |
451 | * | |
452 | * Assumes Unicode input is fully decomposed | |
453 | */ | |
454 | static int unicode_to_mac_roman(UniChar *uni_str, UInt32 unicodeChars, Str31 hfs_str) | |
455 | { | |
456 | UInt8 *p; | |
457 | const UniChar *u; | |
458 | UniChar c; | |
459 | UniChar mask; | |
460 | UInt16 inputChars; | |
461 | UInt16 pascalChars; | |
462 | OSErr result = noErr; | |
463 | UInt8 lsb; | |
464 | UInt8 prevChar; | |
465 | UInt8 mc; | |
466 | ||
467 | mask = (UniChar) 0xFF80; | |
468 | p = &hfs_str[1]; | |
469 | u = uni_str; | |
470 | inputChars = unicodeChars; | |
471 | pascalChars = prevChar = 0; | |
472 | ||
473 | while (inputChars) { | |
474 | c = *(u++); | |
475 | lsb = (UInt8) c; | |
476 | ||
477 | /* | |
478 | * If its not 7-bit ascii, then we need to map it | |
479 | */ | |
480 | if ( c & mask ) { | |
481 | mc = '?'; | |
482 | switch (c & 0xFF00) { | |
483 | case 0x0000: | |
484 | if (lsb >= 0xA0) | |
485 | mc = gLatin1Table[lsb - 0xA0]; | |
486 | break; | |
487 | ||
488 | case 0x0200: | |
489 | if (lsb >= 0xC0 && lsb <= 0xDF) | |
490 | mc = gSpaceModsTable[lsb - 0xC0]; | |
491 | break; | |
492 | ||
493 | case 0x2000: | |
494 | if (lsb >= 0x10 && lsb <= 0xAF) | |
495 | mc = gPunctTable[lsb- 0x10]; | |
496 | break; | |
497 | ||
498 | case 0x2200: | |
499 | if (lsb <= 0x68) | |
500 | mc = gMathTable[lsb]; | |
501 | break; | |
502 | ||
503 | case 0x0300: | |
504 | if (c <= 0x030A) { | |
505 | if (prevChar >= 'A' && prevChar < 'z') { | |
506 | mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb]; | |
507 | --p; /* backup over base char */ | |
508 | --pascalChars; | |
509 | } | |
510 | } else { | |
511 | switch (c) { | |
512 | case 0x0327: /* combining cedilla */ | |
513 | if (prevChar == 'C') | |
514 | mc = 0x82; | |
515 | else if (prevChar == 'c') | |
516 | mc = 0x8D; | |
517 | else | |
518 | break; | |
519 | --p; /* backup over base char */ | |
520 | --pascalChars; | |
521 | break; | |
522 | ||
523 | case 0x03A9: mc = 0xBD; break; /* omega */ | |
524 | ||
525 | case 0x03C0: mc = 0xB9; break; /* pi */ | |
526 | } | |
527 | } | |
528 | break; | |
529 | ||
530 | default: | |
531 | switch (c) { | |
532 | case 0x0131: mc = 0xf5; break; /* dotless i */ | |
533 | ||
534 | case 0x0152: mc = 0xce; break; /* OE */ | |
535 | ||
536 | case 0x0153: mc = 0xcf; break; /* oe */ | |
537 | ||
538 |