]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_encodings.c
5ef6c4d68ab1766ff1eeb8c80cfb490d3370d22e
[apple/xnu.git] / bsd / hfs / hfs_encodings.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/malloc.h>
35 #include <sys/queue.h>
36 #include <sys/utfconv.h>
37
38 #include "hfs.h"
39
40
41 lck_grp_t * encodinglst_lck_grp;
42 lck_grp_attr_t * encodinglst_lck_grp_attr;
43 lck_attr_t * encodinglst_lck_attr;
44
45
46 /* hfs encoding converter list */
47 SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0};
48
49 lck_mtx_t encodinglst_mutex;
50
51
52
53 /* hfs encoding converter entry */
54 struct hfs_encoding {
55 SLIST_ENTRY(hfs_encoding) link;
56 int refcount;
57 int kmod_id;
58 UInt32 encoding;
59 hfs_to_unicode_func_t get_unicode_func;
60 unicode_to_hfs_func_t get_hfsname_func;
61 };
62
63 /* XXX We should use an "official" interface! */
64 extern kern_return_t kmod_destroy(host_priv_t host_priv, kmod_t id);
65 extern struct host realhost;
66
67 #define MAX_HFS_UNICODE_CHARS (15*5)
68
69 int mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str, UInt32 maxCharLen, UInt32 *usedCharLen);
70
71 static int unicode_to_mac_roman(UniChar *uni_str, UInt32 unicodeChars, Str31 hfs_str);
72
73
74 void
75 hfs_converterinit(void)
76 {
77 SLIST_INIT(&hfs_encoding_list);
78
79 encodinglst_lck_grp_attr= lck_grp_attr_alloc_init();
80 lck_grp_attr_setstat(encodinglst_lck_grp_attr);
81 encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr);
82
83 encodinglst_lck_attr = lck_attr_alloc_init();
84 //lck_attr_setdebug(encodinglst_lck_attr);
85
86 lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr);
87
88 /*
89 * add resident MacRoman converter and take a reference
90 * since its always "loaded".
91 */
92 hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman);
93 SLIST_FIRST(&hfs_encoding_list)->refcount++;
94 }
95
96
97 /*
98 * hfs_addconverter - add an HFS encoding converter
99 *
100 * This is called exclusivly by kernel loadable modules
101 * (like HFS_Japanese.kmod) to register hfs encoding
102 * conversion routines.
103 *
104 */
105 int
106 hfs_addconverter(int id, UInt32 encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname)
107 {
108 struct hfs_encoding *encp;
109
110 MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK);
111
112 lck_mtx_lock(&encodinglst_mutex);
113
114 encp->link.sle_next = NULL;
115 encp->refcount = 0;
116 encp->encoding = encoding;
117 encp->get_unicode_func = get_unicode;
118 encp->get_hfsname_func = get_hfsname;
119 encp->kmod_id = id;
120 SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link);
121
122 lck_mtx_unlock(&encodinglst_mutex);
123 return (0);
124 }
125
126
127 /*
128 * hfs_remconverter - remove an HFS encoding converter
129 *
130 * Can be called by a kernel loadable module's finalize
131 * routine to remove an encoding converter so that the
132 * module (i.e. the code) can be unloaded.
133 *
134 * However, in the normal case, the removing and unloading
135 * of these converters is done in hfs_relconverter.
136 * The call is initiated from within the kernel during the unmounting of an hfs voulume.
137 */
138 int
139 hfs_remconverter(int id, UInt32 encoding)
140 {
141 struct hfs_encoding *encp;
142
143 lck_mtx_lock(&encodinglst_mutex);
144 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
145 if (encp->encoding == encoding && encp->kmod_id == id) {
146 encp->refcount--;
147
148 /* if converter is no longer in use, release it */
149 if (encp->refcount <= 0 && encp->kmod_id != 0) {
150 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
151 lck_mtx_unlock(&encodinglst_mutex);
152 FREE(encp, M_TEMP);
153 return (0);
154 } else {
155 lck_mtx_unlock(&encodinglst_mutex);
156 return (1); /* busy */
157 }
158 break;
159 }
160 }
161 lck_mtx_unlock(&encodinglst_mutex);
162
163 return (0);
164 }
165
166
167 /*
168 * hfs_getconverter - get HFS encoding converters
169 *
170 * Normally called during the mounting of an hfs voulume.
171 */
172 int
173 hfs_getconverter(UInt32 encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname)
174 {
175 struct hfs_encoding *encp;
176 int found = 0;
177
178 lck_mtx_lock(&encodinglst_mutex);
179 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
180 if (encp->encoding == encoding) {
181 found = 1;
182 *get_unicode = encp->get_unicode_func;
183 *get_hfsname = encp->get_hfsname_func;
184 ++encp->refcount;
185 break;
186 }
187 }
188 lck_mtx_unlock(&encodinglst_mutex);
189
190 if (!found) {
191 *get_unicode = NULL;
192 *get_hfsname = NULL;
193 return (EINVAL);
194 }
195
196 return (0);
197 }
198
199
200 /*
201 * hfs_relconverter - release interest in an HFS encoding converter
202 *
203 * Normally called during the unmounting of an hfs voulume.
204 */
205 int
206 hfs_relconverter(UInt32 encoding)
207 {
208 struct hfs_encoding *encp;
209
210 lck_mtx_lock(&encodinglst_mutex);
211 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
212 if (encp->encoding == encoding) {
213 encp->refcount--;
214
215 /* if converter is no longer in use, release it */
216 if (encp->refcount <= 0 && encp->kmod_id != 0) {
217 int id = encp->kmod_id;
218
219 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
220 lck_mtx_unlock(&encodinglst_mutex);
221
222 FREE(encp, M_TEMP);
223 kmod_destroy((host_priv_t) host_priv_self(), id);
224 return (0);
225 }
226 lck_mtx_unlock(&encodinglst_mutex);
227 return (0);
228 }
229 }
230 lck_mtx_unlock(&encodinglst_mutex);
231
232 return (EINVAL);
233 }
234
235
236 /*
237 * Convert HFS encoded string into UTF-8
238 *
239 * Unicode output is fully decomposed
240 * '/' chars are converted to ':'
241 */
242 int
243 hfs_to_utf8(ExtendedVCB *vcb, Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
244 {
245 int error;
246 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
247 ItemCount uniCount;
248 size_t utf8len;
249 hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
250
251 error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
252
253 if (uniCount == 0)
254 error = EINVAL;
255
256 if (error == 0) {
257 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
258 if (error == ENAMETOOLONG)
259 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
260 else
261 *actualDstLen = utf8len;
262 }
263
264 return error;
265 }
266
267
268 /*
269 * When an HFS name cannot be encoded with the current
270 * volume encoding then MacRoman is used as a fallback.
271 */
272 int
273 mac_roman_to_utf8(Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
274 {
275 int error;
276 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
277 ItemCount uniCount;
278 size_t utf8len;
279
280 error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
281
282 if (uniCount == 0)
283 error = EINVAL;
284
285 if (error == 0) {
286 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
287 if (error == ENAMETOOLONG)
288 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
289 else
290 *actualDstLen = utf8len;
291 }
292
293 return error;
294 }
295
296
297 /*
298 * Convert Unicode string into HFS encoding
299 *
300 * ':' chars are converted to '/'
301 * Assumes input represents fully decomposed Unicode
302 */
303 int
304 unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
305 {
306 int error;
307 unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
308
309 error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
310 if (error && retry) {
311 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
312 }
313 return error;
314 }
315
316 /*
317 * Convert UTF-8 string into HFS encoding
318 *
319 * ':' chars are converted to '/'
320 * Assumes input represents fully decomposed Unicode
321 */
322 int
323 utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
324 {
325 int error;
326 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
327 size_t ucslen;
328
329 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
330 if (error == 0)
331 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
332
333 return error;
334 }
335
336 int
337 utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr)
338 {
339 int error;
340 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
341 size_t ucslen;
342
343 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
344 if (error == 0)
345 error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr);
346
347 return error;
348 }
349
350 /*
351 * HFS MacRoman to/from Unicode conversions are built into the kernel
352 * All others hfs encodings are loadable.
353 */
354
355 /* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */
356 static UInt8 gLatin1Table[] = {
357 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
358 /* 0x00A0 */ 0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4, '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2, '?', 0xA8, 0xF8,
359 /* 0x00B0 */ 0xA1, 0XB1, '?', '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC, '?', 0xBC, 0xC8, '?', '?', '?', 0xC0,
360 /* 0x00C0 */ '?', '?', '?', '?', '?', '?', 0xAE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
361 /* 0x00D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xAF, '?', '?', '?', '?', '?', '?', 0xA7,
362 /* 0x00E0 */ '?', '?', '?', '?', '?', '?', 0xBE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
363 /* 0x00F0 */ '?', '?', '?', '?', '?', '?', '?', 0xD6, 0xBF, '?', '?', '?', '?', '?', '?', '?'
364 };
365
366 /* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */
367 static UInt8 gSpaceModsTable[] = {
368 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
369 /* 0x02C0 */ '?', '?', '?', '?', '?', '?', 0xF6, 0xFF, '?', '?', '?', '?', '?', '?', '?', '?',
370 /* 0x02D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD, '?', '?'
371 };
372
373 /* 0x2010 - 0x20AF = General Punctuation (17 total) */
374 static UInt8 gPunctTable[] = {
375 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
376 /* 0x2010 */ '?', '?', '?', 0xd0, 0xd1, '?', '?', '?', 0xd4, 0xd5, 0xe2, '?', 0xd2, 0xd3, 0xe3, '?',
377 /* 0x2020 */ 0xa0, 0xe0, 0xa5, '?', '?', '?', 0xc9, '?', '?', '?', '?', '?', '?', '?', '?', '?',
378 /* 0x2030 */ 0xe4, '?', '?', '?', '?', '?', '?', '?', '?', 0xdc, 0xdd, '?', '?', '?', '?', '?',
379 /* 0x2040 */ '?', '?', '?', '?', 0xda, '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
380 /* 0x2050 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
381 /* 0x2060 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
382 /* 0x2070 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
383 /* 0x2080 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
384 /* 0x2090 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
385 /* 0x20A0 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xdb, '?', '?', '?'
386 };
387
388 /* 0x22xx = Mathematical Operators (11 total) */
389 static UInt8 gMathTable[] = {
390 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
391 /* 0x2200 */ '?', '?', 0xb6, '?', '?', '?', 0xc6, '?', '?', '?', '?', '?', '?', '?', '?', 0xb8,
392 /* 0x2210 */ '?', 0xb7, '?', '?', '?', '?', '?', '?', '?', '?', 0xc3, '?', '?', '?', 0xb0, '?',
393 /* 0x2220 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xba, '?', '?', '?', '?',
394 /* 0x2230 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
395 /* 0x2240 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xc5, '?', '?', '?', '?', '?', '?', '?',
396 /* 0x2250 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
397 /* 0x2260 */ 0xad, '?', '?', '?', 0xb2, 0xb3, '?', '?'
398 };
399
400 /* */
401 static UInt8 gReverseCombTable[] = {
402 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
403 /* 0x40 */ 0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98,
404 /* 0x50 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
405 /* 0x60 */ 0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3,
406 /* 0x70 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
407
408 /* Combining Diacritical Marks (0x0300 - 0x030A) */
409 /* 0 1 2 3 4 5 6 7 8 9 A */
410 /* 'A' */
411 /* 0x0300 */ 0xCB, 0xE7, 0xE5, 0xCC, '?', '?', '?', '?', 0x80, '?', 0x81,
412
413 /* 'a' */
414 /* 0x0300 */ 0x88, 0x87, 0x89, 0x8B, '?', '?', '?', '?', 0x8A, '?', 0x8C,
415
416 /* 'E' */
417 /* 0x0300 */ 0xE9, 0x83, 0xE6, '?', '?', '?', '?', '?', 0xE8, '?', '?',
418
419 /* 'e' */
420 /* 0x0300 */ 0x8F, 0x8E, 0x90, '?', '?', '?', '?', '?', 0x91, '?', '?',
421
422 /* 'I' */
423 /* 0x0300 */ 0xED, 0xEA, 0xEB, '?', '?', '?', '?', '?', 0xEC, '?', '?',
424
425 /* 'i' */
426 /* 0x0300 */ 0x93, 0x92, 0x94, '?', '?', '?', '?', '?', 0x95, '?', '?',
427
428 /* 'N' */
429 /* 0x0300 */ '?', '?', '?', 0x84, '?', '?', '?', '?', '?', '?', '?',
430
431 /* 'n' */
432 /* 0x0300 */ '?', '?', '?', 0x96, '?', '?', '?', '?', '?', '?', '?',
433
434 /* 'O' */
435 /* 0x0300 */ 0xF1, 0xEE, 0xEF, 0xCD, '?', '?', '?', '?', 0x85, '?', '?',
436
437 /* 'o' */
438 /* 0x0300 */ 0x98, 0x97, 0x99, 0x9B, '?', '?', '?', '?', 0x9A, '?', '?',
439
440 /* 'U' */
441 /* 0x0300 */ 0xF4, 0xF2, 0xF3, '?', '?', '?', '?', '?', 0x86, '?', '?',
442
443 /* 'u' */
444 /* 0x0300 */ 0x9D, 0x9C, 0x9E, '?', '?', '?', '?', '?', 0x9F, '?', '?',
445
446 /* 'Y' */
447 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD9, '?', '?',
448
449 /* 'y' */
450 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD8, '?', '?',
451
452 /* else */
453 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?'
454 };
455
456
457 /*
458 * Convert Unicode string into HFS MacRoman encoding
459 *
460 * Assumes Unicode input is fully decomposed
461 */
462 static int unicode_to_mac_roman(UniChar *uni_str, UInt32 unicodeChars, Str31 hfs_str)
463 {
464 UInt8 *p;
465 const UniChar *u;
466 UniChar c;
467 UniChar mask;
468 UInt16 inputChars;
469 UInt16 pascalChars;
470 OSErr result = noErr;
471 UInt8 lsb;
472 UInt8 prevChar;
473 UInt8 mc;
474
475 mask = (UniChar) 0xFF80;
476 p = &hfs_str[1];
477 u = uni_str;
478 inputChars = unicodeChars;
479 pascalChars = prevChar = 0;
480
481 while (inputChars) {
482 c = *(u++);
483 lsb = (UInt8) c;
484
485 /*
486 * If its not 7-bit ascii, then we need to map it
487 */
488 if ( c & mask ) {
489 mc = '?';
490 switch (c & 0xFF00) {
491 case 0x0000:
492 if (lsb >= 0xA0)
493 mc = gLatin1Table[lsb - 0xA0];
494 break;
495
496 case 0x0200:
497 if (lsb >= 0xC0 && lsb <= 0xDF)
498 mc = gSpaceModsTable[lsb - 0xC0];
499 break;
500
501 case 0x2000:
502 if (lsb >= 0x10 && lsb <= 0xAF)
503 mc = gPunctTable[lsb- 0x10];
504 break;
505
506 case 0x2200:
507 if (lsb <= 0x68)
508 mc = gMathTable[lsb];
509 break;
510
511 case 0x0300:
512 if (c <= 0x030A) {
513 if (prevChar >= 'A' && prevChar < 'z') {
514 mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb];
515 --p; /* backup over base char */
516 --pascalChars;
517 }
518 } else {
519 switch (c) {
520 case 0x0327: /* combining cedilla */
521 if (prevChar == 'C')
522 mc = 0x82;
523 else if (prevChar == 'c')
524 mc = 0x8D;
525 else
526 break;
527 --p; /* backup over base char */
528 --pascalChars;
529 break;
530
531 case 0x03A9: mc = 0xBD; break; /* omega */
532
533 case 0x03C0: mc = 0xB9; break; /* pi */
534 }
535 }
536 break;
537
538 default:
539 switch (c) {
540 case 0x0131: mc = 0xf5; break; /* dotless i */
541
542 case 0x0152: mc = 0xce; break; /* OE */
543
544 case 0x0153: mc = 0xcf; break; /* oe */
545
546 case 0x0192: mc = 0xc4; break; /* Ä */
547
548 case 0x2122: mc = 0xaa; break; /* TM */
549
550 case 0x25ca: mc = 0xd7; break; /* diamond */
551
552 case 0xf8ff: mc = 0xf0; break; /* apple logo */
553
554 case 0xfb01: mc = 0xde; break; /* fi */
555
556 case 0xfb02: mc = 0xdf; break; /* fl */
557 }
558 } /* end switch (c & 0xFF00) */
559
560 /*
561 * If we have an unmapped character then we need to mangle the name...
562 */
563 if (mc == '?')
564 result = kTECUsedFallbacksStatus;
565
566 prevChar = 0;
567 lsb = mc;
568
569 } else {
570 prevChar = lsb;
571 }
572
573 if (pascalChars >= 31)
574 break;
575
576 *(p++) = lsb;
577 ++pascalChars;
578 --inputChars;
579
580 } /* end while */
581
582 hfs_str[0] = pascalChars;
583
584 if (inputChars > 0)
585 result = ENAMETOOLONG; /* ran out of room! */
586
587 return result;
588 }
589
590
591 static UniChar gHiBitBaseUnicode[128] = {
592 /* 0x80 */ 0x0041, 0x0041, 0x0043, 0x0045, 0x004e, 0x004f, 0x0055, 0x0061,
593 /* 0x88 */ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0063, 0x0065, 0x0065,
594 /* 0x90 */ 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 0x006e, 0x006f,
595 /* 0x98 */ 0x006f, 0x006f, 0x006f, 0x006f, 0x0075, 0x0075, 0x0075, 0x0075,
596 /* 0xa0 */ 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df,
597 /* 0xa8 */ 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8,
598 /* 0xb0 */ 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211,
599 /* 0xb8 */ 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8,
600 /* 0xc0 */ 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab,
601 /* 0xc8 */ 0x00bb, 0x2026, 0x00a0, 0x0041, 0x0041, 0x004f, 0x0152, 0x0153,
602 /* 0xd0 */ 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca,
603 /* 0xd8 */ 0x0079, 0x0059, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
604 /* 0xe0 */ 0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x0041, 0x0045, 0x0041,
605 /* 0xe8 */ 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004f, 0x004f,
606 /* 0xf0 */ 0xf8ff, 0x004f, 0x0055, 0x0055, 0x0055, 0x0131, 0x02c6, 0x02dc,
607 /* 0xf8 */ 0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7
608 };
609
610 static UniChar gHiBitCombUnicode[128] = {
611 /* 0x80 */ 0x0308, 0x030a, 0x0327, 0x0301, 0x0303, 0x0308, 0x0308, 0x0301,
612 /* 0x88 */ 0x0300, 0x0302, 0x0308, 0x0303, 0x030a, 0x0327, 0x0301, 0x0300,
613 /* 0x90 */ 0x0302, 0x0308, 0x0301, 0x0300, 0x0302, 0x0308, 0x0303, 0x0301,
614 /* 0x98 */ 0x0300, 0x0302, 0x0308, 0x0303, 0x0301, 0x0300, 0x0302, 0x0308,
615 /* 0xa0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
616 /* 0xa8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
617 /* 0xb0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
618 /* 0xb8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
619 /* 0xc0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
620 /* 0xc8 */ 0x0000, 0x0000, 0x0000, 0x0300, 0x0303, 0x0303, 0x0000, 0x0000,
621 /* 0xd0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
622 /* 0xd8 */ 0x0308, 0x0308, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
623 /* 0xe0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0302, 0x0302, 0x0301,
624 /* 0xe8 */ 0x0308, 0x0300, 0x0301, 0x0302, 0x0308, 0x0300, 0x0301, 0x0302,
625 /* 0xf0 */ 0x0000, 0x0300, 0x0301, 0x0302, 0x0300, 0x0000, 0x0000, 0x0000,
626 /* 0xf8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
627 };
628
629
630 /*
631 * Convert HFS MacRoman encoded string into Unicode
632 *
633 * Unicode output is fully decomposed
634 */
635 int
636 mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str,
637 UInt32 maxCharLen, UInt32 *unicodeChars)
638 {
639 const UInt8 *p;
640 UniChar *u;
641 UInt16 pascalChars;
642 UInt8 c;
643
644 p = hfs_str;
645 u = uni_str;
646
647 *unicodeChars = pascalChars = *(p++); /* pick up length byte */
648
649 while (pascalChars--) {
650 c = *(p++);
651
652 if ( (SInt8) c >= 0 ) { /* check if seven bit ascii */
653 *(u++) = (UniChar) c; /* just pad high byte with zero */
654 } else { /* its a hi bit character */
655 UniChar uc;
656
657 c &= 0x7F;
658 *(u++) = uc = gHiBitBaseUnicode[c];
659
660 /*
661 * if the unicode character we get back is an alpha char
662 * then we must have an additional combining character
663 */
664 if ((uc <= (UniChar) 'z') && (uc >= (UniChar) 'A')) {
665 *(u++) = gHiBitCombUnicode[c];
666 ++(*unicodeChars);
667 }
668 }
669 }
670
671 return noErr;
672 }
673