]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_encodings.c
e85de58dc9c179eabb4e52b5e5813d6aebe7180f
[apple/xnu.git] / bsd / hfs / hfs_encodings.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23 #include <sys/param.h>
24 #include <sys/systm.h>
25 #include <sys/kernel.h>
26 #include <sys/lock.h>
27 #include <sys/malloc.h>
28 #include <sys/queue.h>
29 #include <sys/utfconv.h>
30
31 #include "hfs.h"
32
33
34 /* hfs encoding converter list */
35 SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0};
36 decl_simple_lock_data(,hfs_encoding_list_slock);
37
38
39 /* hfs encoding converter entry */
40 struct hfs_encoding {
41 SLIST_ENTRY(hfs_encoding) link;
42 int refcount;
43 int kmod_id;
44 UInt32 encoding;
45 hfs_to_unicode_func_t get_unicode_func;
46 unicode_to_hfs_func_t get_hfsname_func;
47 };
48
49 /* XXX We should use an "official" interface! */
50 extern kern_return_t kmod_destroy(host_priv_t host_priv, kmod_t id);
51 extern struct host realhost;
52
53 #define MAX_HFS_UNICODE_CHARS (15*5)
54
55 static int mac_roman_to_unicode(Str31 hfs_str, UniChar *uni_str, UInt32 maxCharLen, UInt32 *usedCharLen);
56
57 static int unicode_to_mac_roman(UniChar *uni_str, UInt32 unicodeChars, Str31 hfs_str);
58
59
60 void
61 hfs_converterinit(void)
62 {
63 SLIST_INIT(&hfs_encoding_list);
64 simple_lock_init(&hfs_encoding_list_slock);
65
66 /*
67 * add resident MacRoman converter and take a reference
68 * since its always "loaded".
69 */
70 hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman);
71 SLIST_FIRST(&hfs_encoding_list)->refcount++;
72 }
73
74
75 /*
76 * hfs_addconverter - add an HFS encoding converter
77 *
78 * This is called exclusivly by kernel loadable modules
79 * (like HFS_Japanese.kmod) to register hfs encoding
80 * conversion routines.
81 *
82 */
83 int
84 hfs_addconverter(int id, UInt32 encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname)
85 {
86 struct hfs_encoding *encp;
87
88 MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK);
89
90 simple_lock(&hfs_encoding_list_slock);
91
92 encp->link.sle_next = NULL;
93 encp->refcount = 0;
94 encp->encoding = encoding;
95 encp->get_unicode_func = get_unicode;
96 encp->get_hfsname_func = get_hfsname;
97 encp->kmod_id = id;
98 SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link);
99
100 simple_unlock(&hfs_encoding_list_slock);
101 return (0);
102 }
103
104
105 /*
106 * hfs_remconverter - remove an HFS encoding converter
107 *
108 * Can be called by a kernel loadable module's finalize
109 * routine to remove an encoding converter so that the
110 * module (i.e. the code) can be unloaded.
111 *
112 * However, in the normal case, the removing and unloading
113 * of these converters is done in hfs_relconverter.
114 * The call is initiated from within the kernel during the unmounting of an hfs voulume.
115 */
116 int
117 hfs_remconverter(int id, UInt32 encoding)
118 {
119 struct hfs_encoding *encp;
120 int busy = 0;
121
122 simple_lock(&hfs_encoding_list_slock);
123 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
124 if (encp->encoding == encoding && encp->kmod_id == id) {
125 encp->refcount--;
126
127 /* if converter is no longer in use, release it */
128 if (encp->refcount <= 0 && encp->kmod_id != 0) {
129 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
130 FREE(encp, M_TEMP);
131 } else {
132 busy = 1;
133 }
134 break;
135 }
136 }
137 simple_unlock(&hfs_encoding_list_slock);
138
139 return (busy);
140 }
141
142
143 /*
144 * hfs_getconverter - get HFS encoding converters
145 *
146 * Normally called during the mounting of an hfs voulume.
147 */
148 int
149 hfs_getconverter(UInt32 encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname)
150 {
151 struct hfs_encoding *encp;
152 int found = 0;
153
154 simple_lock(&hfs_encoding_list_slock);
155 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
156 if (encp->encoding == encoding) {
157 found = 1;
158 *get_unicode = encp->get_unicode_func;
159 *get_hfsname = encp->get_hfsname_func;
160 ++encp->refcount;
161 break;
162 }
163 }
164 simple_unlock(&hfs_encoding_list_slock);
165
166 if (!found) {
167 *get_unicode = NULL;
168 *get_hfsname = NULL;
169 return (EINVAL);
170 }
171
172 return (0);
173 }
174
175
176 /*
177 * hfs_relconverter - release interest in an HFS encoding converter
178 *
179 * Normally called during the unmounting of an hfs voulume.
180 */
181 int
182 hfs_relconverter(UInt32 encoding)
183 {
184 struct hfs_encoding *encp;
185 int found = 0;
186
187 simple_lock(&hfs_encoding_list_slock);
188 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
189 if (encp->encoding == encoding) {
190 found = 1;
191 encp->refcount--;
192
193 /* if converter is no longer in use, release it */
194 if (encp->refcount <= 0 && encp->kmod_id != 0) {
195 int id = encp->kmod_id;
196
197 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
198 FREE(encp, M_TEMP);
199 encp = NULL;
200
201 simple_unlock(&hfs_encoding_list_slock);
202 kmod_destroy(host_priv_self(), id);
203 simple_lock(&hfs_encoding_list_slock);
204 }
205 break;
206 }
207 }
208 simple_unlock(&hfs_encoding_list_slock);
209
210 return (found ? 0 : EINVAL);
211 }
212
213
214 /*
215 * Convert HFS encoded string into UTF-8
216 *
217 * Unicode output is fully decomposed
218 * '/' chars are converted to ':'
219 */
220 int
221 hfs_to_utf8(ExtendedVCB *vcb, Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
222 {
223 int error;
224 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
225 ItemCount uniCount;
226 size_t utf8len;
227 hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
228
229 error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
230
231 if (uniCount == 0)
232 error = EINVAL;
233
234 if (error == 0) {
235 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
236 if (error == ENAMETOOLONG)
237 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
238 else
239 *actualDstLen = utf8len;
240 }
241
242 return error;
243 }
244
245
246 /*
247 * When an HFS name cannot be encoded with the current
248 * volume encoding then MacRoman is used as a fallback.
249 */
250 int
251 mac_roman_to_utf8(Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
252 {
253 int error;
254 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
255 ItemCount uniCount;
256 size_t utf8len;
257
258 error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
259
260 if (uniCount == 0)
261 error = EINVAL;
262
263 if (error == 0) {
264 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
265 if (error == ENAMETOOLONG)
266 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
267 else
268 *actualDstLen = utf8len;
269 }
270
271 return error;
272 }
273
274
275 /*
276 * Convert UTF-8 string into HFS encoding
277 *
278 * ':' chars are converted to '/'
279 * Assumes input represents fully decomposed Unicode
280 */
281 int
282 utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr)
283 {
284 int error;
285 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
286 size_t ucslen;
287 unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
288
289 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
290 if (error == 0)
291 error = hfs_get_hfsname(uniStr, ucslen/sizeof(UniChar), dstStr);
292
293 return error;
294 }
295
296 int
297 utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr)
298 {
299 int error;
300 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
301 size_t ucslen;
302
303 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
304 if (error == 0)
305 error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr);
306
307 return error;
308 }
309
310 /*
311 * HFS MacRoman to/from Unicode conversions are built into the kernel
312 * All others hfs encodings are loadable.
313 */
314
315 /* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */
316 static UInt8 gLatin1Table[] = {
317 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
318 /* 0x00A0 */ 0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4, '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2, '?', 0xA8, 0xF8,
319 /* 0x00B0 */ 0xA1, 0XB1, '?', '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC, '?', 0xBC, 0xC8, '?', '?', '?', 0xC0,
320 /* 0x00C0 */ '?', '?', '?', '?', '?', '?', 0xAE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
321 /* 0x00D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xAF, '?', '?', '?', '?', '?', '?', 0xA7,
322 /* 0x00E0 */ '?', '?', '?', '?', '?', '?', 0xBE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
323 /* 0x00F0 */ '?', '?', '?', '?', '?', '?', '?', 0xD6, 0xBF, '?', '?', '?', '?', '?', '?', '?'
324 };
325
326 /* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */
327 static UInt8 gSpaceModsTable[] = {
328 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
329 /* 0x02C0 */ '?', '?', '?', '?', '?', '?', 0xF6, 0xFF, '?', '?', '?', '?', '?', '?', '?', '?',
330 /* 0x02D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD, '?', '?'
331 };
332
333 /* 0x2010 - 0x20AF = General Punctuation (17 total) */
334 static UInt8 gPunctTable[] = {
335 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
336 /* 0x2010 */ '?', '?', '?', 0xd0, 0xd1, '?', '?', '?', 0xd4, 0xd5, 0xe2, '?', 0xd2, 0xd3, 0xe3, '?',
337 /* 0x2020 */ 0xa0, 0xe0, 0xa5, '?', '?', '?', 0xc9, '?', '?', '?', '?', '?', '?', '?', '?', '?',
338 /* 0x2030 */ 0xe4, '?', '?', '?', '?', '?', '?', '?', '?', 0xdc, 0xdd, '?', '?', '?', '?', '?',
339 /* 0x2040 */ '?', '?', '?', '?', 0xda, '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
340 /* 0x2050 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
341 /* 0x2060 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
342 /* 0x2070 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
343 /* 0x2080 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
344 /* 0x2090 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
345 /* 0x20A0 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xdb, '?', '?', '?'
346 };
347
348 /* 0x22xx = Mathematical Operators (11 total) */
349 static UInt8 gMathTable[] = {
350 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
351 /* 0x2200 */ '?', '?', 0xb6, '?', '?', '?', 0xc6, '?', '?', '?', '?', '?', '?', '?', '?', 0xb8,
352 /* 0x2210 */ '?', 0xb7, '?', '?', '?', '?', '?', '?', '?', '?', 0xc3, '?', '?', '?', 0xb0, '?',
353 /* 0x2220 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xba, '?', '?', '?', '?',
354 /* 0x2230 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
355 /* 0x2240 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xc5, '?', '?', '?', '?', '?', '?', '?',
356 /* 0x2250 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
357 /* 0x2260 */ 0xad, '?', '?', '?', 0xb2, 0xb3, '?', '?'
358 };
359
360 /* */
361 static UInt8 gReverseCombTable[] = {
362 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
363 /* 0x40 */ 0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98,
364 /* 0x50 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
365 /* 0x60 */ 0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3,
366 /* 0x70 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
367
368 /* Combining Diacritical Marks (0x0300 - 0x030A) */
369 /* 0 1 2 3 4 5 6 7 8 9 A */
370 /* 'A' */
371 /* 0x0300 */ 0xCB, 0xE7, 0xE5, 0xCC, '?', '?', '?', '?', 0x80, '?', 0x81,
372
373 /* 'a' */
374 /* 0x0300 */ 0x88, 0x87, 0x89, 0x8B, '?', '?', '?', '?', 0x8A, '?', 0x8C,
375
376 /* 'E' */
377 /* 0x0300 */ 0xE9, 0x83, 0xE6, '?', '?', '?', '?', '?', 0xE8, '?', '?',
378
379 /* 'e' */
380 /* 0x0300 */ 0x8F, 0x8E, 0x90, '?', '?', '?', '?', '?', 0x91, '?', '?',
381
382 /* 'I' */
383 /* 0x0300 */ 0xED, 0xEA, 0xEB, '?', '?', '?', '?', '?', 0xEC, '?', '?',
384
385 /* 'i' */
386 /* 0x0300 */ 0x93, 0x92, 0x94, '?', '?', '?', '?', '?', 0x95, '?', '?',
387
388 /* 'N' */
389 /* 0x0300 */ '?', '?', '?', 0x84, '?', '?', '?', '?', '?', '?', '?',
390
391 /* 'n' */
392 /* 0x0300 */ '?', '?', '?', 0x96, '?', '?', '?', '?', '?', '?', '?',
393
394 /* 'O' */
395 /* 0x0300 */ 0xF1, 0xEE, 0xEF, 0xCD, '?', '?', '?', '?', 0x85, '?', '?',
396
397 /* 'o' */
398 /* 0x0300 */ 0x98, 0x97, 0x99, 0x9B, '?', '?', '?', '?', 0x9A, '?', '?',
399
400 /* 'U' */
401 /* 0x0300 */ 0xF4, 0xF2, 0xF3, '?', '?', '?', '?', '?', 0x86, '?', '?',
402
403 /* 'u' */
404 /* 0x0300 */ 0x9D, 0x9C, 0x9E, '?', '?', '?', '?', '?', 0x9F, '?', '?',
405
406 /* 'Y' */
407 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD9, '?', '?',
408
409 /* 'y' */
410 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD8, '?', '?',
411
412 /* else */
413 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?'
414 };
415
416
417 /*
418 * Convert Unicode string into HFS MacRoman encoding
419 *
420 * Assumes Unicode input is fully decomposed
421 */
422 static int unicode_to_mac_roman(UniChar *uni_str, UInt32 unicodeChars, Str31 hfs_str)
423 {
424 UInt8 *p;
425 const UniChar *u;
426 UniChar c;
427 UniChar mask;
428 UInt16 inputChars;
429 UInt16 pascalChars;
430 OSErr result = noErr;
431 UInt8 lsb;
432 UInt8 prevChar;
433 UInt8 mc;
434
435 mask = (UniChar) 0xFF80;
436 p = &hfs_str[1];
437 u = uni_str;
438 inputChars = unicodeChars;
439 pascalChars = prevChar = 0;
440
441 while (inputChars) {
442 c = *(u++);
443 lsb = (UInt8) c;
444
445 /*
446 * If its not 7-bit ascii, then we need to map it
447 */
448 if ( c & mask ) {
449 mc = '?';
450 switch (c & 0xFF00) {
451 case 0x0000:
452 if (lsb >= 0xA0)
453 mc = gLatin1Table[lsb - 0xA0];
454 break;
455
456 case 0x0200:
457 if (lsb >= 0xC0 && lsb <= 0xDF)
458 mc = gSpaceModsTable[lsb - 0xC0];
459 break;
460
461 case 0x2000:
462 if (lsb >= 0x10 && lsb <= 0xAF)
463 mc = gPunctTable[lsb- 0x10];
464 break;
465
466 case 0x2200:
467 if (lsb <= 0x68)
468 mc = gMathTable[lsb];
469 break;
470
471 case 0x0300:
472 if (c <= 0x030A) {
473 if (prevChar >= 'A' && prevChar < 'z') {
474 mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb];
475 --p; /* backup over base char */
476 --pascalChars;
477 }
478 } else {
479 switch (c) {
480 case 0x0327: /* combining cedilla */
481 if (prevChar == 'C')
482 mc = 0x82;
483 else if (prevChar == 'c')
484 mc = 0x8D;
485 else
486 break;
487 --p; /* backup over base char */
488 --pascalChars;
489 break;
490
491 case 0x03A9: mc = 0xBD; break; /* omega */
492
493 case 0x03C0: mc = 0xB9; break; /* pi */
494 }
495 }
496 break;
497
498 default:
499 switch (c) {
500 case 0x0131: mc = 0xf5; break; /* dotless i */
501
502 case 0x0152: mc = 0xce; break; /* OE */
503
504 case 0x0153: mc = 0xcf; break; /* oe */
505
506 case 0x0192: mc = 0xc4; break; /* Ä */
507
508 case 0x2122: mc = 0xaa; break; /* TM */
509
510 case 0x25ca: mc = 0xd7; break; /* diamond */
511
512 case 0xf8ff: mc = 0xf0; break; /* apple logo */
513
514 case 0xfb01: mc = 0xde; break; /* fi */
515
516 case 0xfb02: mc = 0xdf; break; /* fl */
517 }
518 } /* end switch (c & 0xFF00) */
519
520 /*
521 * If we have an unmapped character then we need to mangle the name...
522 */
523 if (mc == '?')
524 result = kTECUsedFallbacksStatus;
525
526 prevChar = 0;
527 lsb = mc;
528
529 } else {
530 prevChar = lsb;
531 }
532
533 if (pascalChars >= 31)
534 break;
535
536 *(p++) = lsb;
537 ++pascalChars;
538 --inputChars;
539
540 } /* end while */
541
542 hfs_str[0] = pascalChars;
543
544 if (inputChars > 0)
545 result = ENAMETOOLONG; /* ran out of room! */
546
547 return result;
548 }
549
550
551 static UniChar gHiBitBaseUnicode[128] = {
552 /* 0x80 */ 0x0041, 0x0041, 0x0043, 0x0045, 0x004e, 0x004f, 0x0055, 0x0061,
553 /* 0x88 */ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0063, 0x0065, 0x0065,
554 /* 0x90 */ 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 0x006e, 0x006f,
555 /* 0x98 */ 0x006f, 0x006f, 0x006f, 0x006f, 0x0075, 0x0075, 0x0075, 0x0075,
556 /* 0xa0 */ 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df,
557 /* 0xa8 */ 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8,
558 /* 0xb0 */ 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211,
559 /* 0xb8 */ 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8,
560 /* 0xc0 */ 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab,
561 /* 0xc8 */ 0x00bb, 0x2026, 0x00a0, 0x0041, 0x0041, 0x004f, 0x0152, 0x0153,
562 /* 0xd0 */ 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca,
563 /* 0xd8 */ 0x0079, 0x0059, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
564 /* 0xe0 */ 0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x0041, 0x0045, 0x0041,
565 /* 0xe8 */ 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004f, 0x004f,
566 /* 0xf0 */ 0xf8ff, 0x004f, 0x0055, 0x0055, 0x0055, 0x0131, 0x02c6, 0x02dc,
567 /* 0xf8 */ 0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7
568 };
569
570 static UniChar gHiBitCombUnicode[128] = {
571 /* 0x80 */ 0x0308, 0x030a, 0x0327, 0x0301, 0x0303, 0x0308, 0x0308, 0x0301,
572 /* 0x88 */ 0x0300, 0x0302, 0x0308, 0x0303, 0x030a, 0x0327, 0x0301, 0x0300,
573 /* 0x90 */ 0x0302, 0x0308, 0x0301, 0x0300, 0x0302, 0x0308, 0x0303, 0x0301,
574 /* 0x98 */ 0x0300, 0x0302, 0x0308, 0x0303, 0x0301, 0x0300, 0x0302, 0x0308,
575 /* 0xa0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
576 /* 0xa8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
577 /* 0xb0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
578 /* 0xb8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
579 /* 0xc0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
580 /* 0xc8 */ 0x0000, 0x0000, 0x0000, 0x0300, 0x0303, 0x0303, 0x0000, 0x0000,
581 /* 0xd0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
582 /* 0xd8 */ 0x0308, 0x0308, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
583 /* 0xe0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0302, 0x0302, 0x0301,
584 /* 0xe8 */ 0x0308, 0x0300, 0x0301, 0x0302, 0x0308, 0x0300, 0x0301, 0x0302,
585 /* 0xf0 */ 0x0000, 0x0300, 0x0301, 0x0302, 0x0300, 0x0000, 0x0000, 0x0000,
586 /* 0xf8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
587 };
588
589
590 /*
591 * Convert HFS MacRoman encoded string into Unicode
592 *
593 * Unicode output is fully decomposed
594 */
595 static int mac_roman_to_unicode(Str31 hfs_str, UniChar *uni_str,
596 UInt32 maxCharLen, UInt32 *unicodeChars)
597 {
598 const UInt8 *p;
599 UniChar *u;
600 UInt16 pascalChars;
601 UInt8 c;
602
603 p = hfs_str;
604 u = uni_str;
605
606 *unicodeChars = pascalChars = *(p++); /* pick up length byte */
607
608 while (pascalChars--) {
609 c = *(p++);
610
611 if ( (SInt8) c >= 0 ) { /* check if seven bit ascii */
612 *(u++) = (UniChar) c; /* just pad high byte with zero */
613 } else { /* its a hi bit character */
614 UniChar uc;
615
616 c &= 0x7F;
617 *(u++) = uc = gHiBitBaseUnicode[c];
618
619 /*
620 * if the unicode character we get back is an alpha char
621 * then we must have an additional combining character
622 */
623 if ((uc <= (UniChar) 'z') && (uc >= (UniChar) 'A')) {
624 *(u++) = gHiBitCombUnicode[c];
625 ++(*unicodeChars);
626 }
627 }
628 }
629
630 return noErr;
631 }
632