]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_encodings.c
xnu-1504.3.12.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_encodings.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #if HFS
29
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
33 #include <sys/malloc.h>
34 #include <sys/queue.h>
35 #include <sys/utfconv.h>
36 #include <kern/host.h>
37 #include <mach/host_priv.h>
38 #include <libkern/OSKextLib.h>
39 #include <libkern/OSKextLibPrivate.h>
40
41 #include "hfs.h"
42
43
44 lck_grp_t * encodinglst_lck_grp;
45 lck_grp_attr_t * encodinglst_lck_grp_attr;
46 lck_attr_t * encodinglst_lck_attr;
47
48
49 /* hfs encoding converter list */
50 SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0};
51
52 lck_mtx_t encodinglst_mutex;
53
54
55
56 /* hfs encoding converter entry */
57 struct hfs_encoding {
58 SLIST_ENTRY(hfs_encoding) link;
59 int refcount;
60 int kmod_id;
61 u_int32_t encoding;
62 hfs_to_unicode_func_t get_unicode_func;
63 unicode_to_hfs_func_t get_hfsname_func;
64 };
65
66 #define MAX_HFS_UNICODE_CHARS (15*5)
67
68 static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str);
69
70 void
71 hfs_converterinit(void)
72 {
73 SLIST_INIT(&hfs_encoding_list);
74
75 encodinglst_lck_grp_attr= lck_grp_attr_alloc_init();
76 encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr);
77 encodinglst_lck_attr = lck_attr_alloc_init();
78
79 lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr);
80
81 /*
82 * add resident MacRoman converter and take a reference
83 * since its always "loaded".
84 */
85 hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman);
86 SLIST_FIRST(&hfs_encoding_list)->refcount++;
87 }
88
89
90 /*
91 * hfs_addconverter - add an HFS encoding converter
92 *
93 * This is called exclusivly by kernel loadable modules
94 * (like HFS_Japanese.kmod) to register hfs encoding
95 * conversion routines.
96 *
97 */
98 int
99 hfs_addconverter(int id, u_int32_t encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname)
100 {
101 struct hfs_encoding *encp;
102
103 MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK);
104
105 lck_mtx_lock(&encodinglst_mutex);
106
107 encp->link.sle_next = NULL;
108 encp->refcount = 0;
109 encp->encoding = encoding;
110 encp->get_unicode_func = get_unicode;
111 encp->get_hfsname_func = get_hfsname;
112 encp->kmod_id = id;
113 SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link);
114
115 lck_mtx_unlock(&encodinglst_mutex);
116 return (0);
117 }
118
119
120 /*
121 * hfs_remconverter - remove an HFS encoding converter
122 *
123 * Can be called by a kernel loadable module's finalize
124 * routine to remove an encoding converter so that the
125 * module (i.e. the code) can be unloaded.
126 *
127 * However, in the normal case, the removing and unloading
128 * of these converters is done in hfs_relconverter.
129 * The call is initiated from within the kernel during the unmounting of an hfs voulume.
130 */
131 int
132 hfs_remconverter(int id, u_int32_t encoding)
133 {
134 struct hfs_encoding *encp;
135
136 lck_mtx_lock(&encodinglst_mutex);
137 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
138 if (encp->encoding == encoding && encp->kmod_id == id) {
139 encp->refcount--;
140
141 /* if converter is no longer in use, release it */
142 if (encp->refcount <= 0 && encp->kmod_id != 0) {
143 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
144 lck_mtx_unlock(&encodinglst_mutex);
145 FREE(encp, M_TEMP);
146 return (0);
147 } else {
148 lck_mtx_unlock(&encodinglst_mutex);
149 return (1); /* busy */
150 }
151 break;
152 }
153 }
154 lck_mtx_unlock(&encodinglst_mutex);
155
156 return (0);
157 }
158
159
160 /*
161 * hfs_getconverter - get HFS encoding converters
162 *
163 * Normally called during the mounting of an hfs voulume.
164 */
165 int
166 hfs_getconverter(u_int32_t encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname)
167 {
168 struct hfs_encoding *encp;
169 int found = 0;
170
171 lck_mtx_lock(&encodinglst_mutex);
172 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
173 if (encp->encoding == encoding) {
174 found = 1;
175 *get_unicode = encp->get_unicode_func;
176 *get_hfsname = encp->get_hfsname_func;
177 ++encp->refcount;
178 break;
179 }
180 }
181 lck_mtx_unlock(&encodinglst_mutex);
182
183 if (!found) {
184 *get_unicode = NULL;
185 *get_hfsname = NULL;
186 return (EINVAL);
187 }
188
189 return (0);
190 }
191
192
193 /*
194 * hfs_relconverter - release interest in an HFS encoding converter
195 *
196 * Normally called during the unmounting of an hfs voulume.
197 */
198 int
199 hfs_relconverter(u_int32_t encoding)
200 {
201 struct hfs_encoding *encp;
202
203 lck_mtx_lock(&encodinglst_mutex);
204 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
205 if (encp->encoding == encoding) {
206 encp->refcount--;
207
208 /* if converter is no longer in use, release it */
209 if (encp->refcount <= 0 && encp->kmod_id != 0) {
210 uint32_t loadTag = (uint32_t)encp->kmod_id;
211
212 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
213 lck_mtx_unlock(&encodinglst_mutex);
214
215 FREE(encp, M_TEMP);
216 (void)OSKextUnloadKextWithLoadTag(loadTag);
217 return (0);
218 }
219 lck_mtx_unlock(&encodinglst_mutex);
220 return (0);
221 }
222 }
223 lck_mtx_unlock(&encodinglst_mutex);
224
225 return (EINVAL);
226 }
227
228
229 /*
230 * Convert HFS encoded string into UTF-8
231 *
232 * Unicode output is fully decomposed
233 * '/' chars are converted to ':'
234 */
235 int
236 hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
237 {
238 int error;
239 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
240 ItemCount uniCount;
241 size_t utf8len;
242 hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
243
244 error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
245
246 if (uniCount == 0)
247 error = EINVAL;
248
249 if (error == 0) {
250 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
251 if (error == ENAMETOOLONG)
252 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
253 else
254 *actualDstLen = utf8len;
255 }
256
257 return error;
258 }
259
260
261 /*
262 * When an HFS name cannot be encoded with the current
263 * volume encoding then MacRoman is used as a fallback.
264 */
265 int
266 mac_roman_to_utf8(const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
267 {
268 int error;
269 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
270 ItemCount uniCount;
271 size_t utf8len;
272
273 error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
274
275 if (uniCount == 0)
276 error = EINVAL;
277
278 if (error == 0) {
279 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
280 if (error == ENAMETOOLONG)
281 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
282 else
283 *actualDstLen = utf8len;
284 }
285
286 return error;
287 }
288
289
290 /*
291 * Convert Unicode string into HFS encoding
292 *
293 * ':' chars are converted to '/'
294 * Assumes input represents fully decomposed Unicode
295 */
296 int
297 unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
298 {
299 int error;
300 unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
301
302 error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
303 if (error && retry) {
304 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
305 }
306 return error;
307 }
308
309 /*
310 * Convert UTF-8 string into HFS encoding
311 *
312 * ':' chars are converted to '/'
313 * Assumes input represents fully decomposed Unicode
314 */
315 int
316 utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
317 {
318 int error;
319 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
320 size_t ucslen;
321
322 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
323 if (error == 0)
324 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
325
326 return error;
327 }
328
329 int
330 utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr)
331 {
332 int error;
333 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
334 size_t ucslen;
335
336 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
337 if (error == 0)
338 error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr);
339
340 return error;
341 }
342
343 /*
344 * HFS MacRoman to/from Unicode conversions are built into the kernel
345 * All others hfs encodings are loadable.
346 */
347
348 /* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */
349 static u_int8_t gLatin1Table[] = {
350 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
351 /* 0x00A0 */ 0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4, '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2, '?', 0xA8, 0xF8,
352 /* 0x00B0 */ 0xA1, 0XB1, '?', '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC, '?', 0xBC, 0xC8, '?', '?', '?', 0xC0,
353 /* 0x00C0 */ '?', '?', '?', '?', '?', '?', 0xAE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
354 /* 0x00D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xAF, '?', '?', '?', '?', '?', '?', 0xA7,
355 /* 0x00E0 */ '?', '?', '?', '?', '?', '?', 0xBE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
356 /* 0x00F0 */ '?', '?', '?', '?', '?', '?', '?', 0xD6, 0xBF, '?', '?', '?', '?', '?', '?', '?'
357 };
358
359 /* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */
360 static u_int8_t gSpaceModsTable[] = {
361 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
362 /* 0x02C0 */ '?', '?', '?', '?', '?', '?', 0xF6, 0xFF, '?', '?', '?', '?', '?', '?', '?', '?',
363 /* 0x02D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD, '?', '?'
364 };
365
366 /* 0x2010 - 0x20AF = General Punctuation (17 total) */
367 static u_int8_t gPunctTable[] = {
368 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
369 /* 0x2010 */ '?', '?', '?', 0xd0, 0xd1, '?', '?', '?', 0xd4, 0xd5, 0xe2, '?', 0xd2, 0xd3, 0xe3, '?',
370 /* 0x2020 */ 0xa0, 0xe0, 0xa5, '?', '?', '?', 0xc9, '?', '?', '?', '?', '?', '?', '?', '?', '?',
371 /* 0x2030 */ 0xe4, '?', '?', '?', '?', '?', '?', '?', '?', 0xdc, 0xdd, '?', '?', '?', '?', '?',
372 /* 0x2040 */ '?', '?', '?', '?', 0xda, '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
373 /* 0x2050 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
374 /* 0x2060 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
375 /* 0x2070 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
376 /* 0x2080 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
377 /* 0x2090 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
378 /* 0x20A0 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xdb, '?', '?', '?'
379 };
380
381 /* 0x22xx = Mathematical Operators (11 total) */
382 static u_int8_t gMathTable[] = {
383 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
384 /* 0x2200 */ '?', '?', 0xb6, '?', '?', '?', 0xc6, '?', '?', '?', '?', '?', '?', '?', '?', 0xb8,
385 /* 0x2210 */ '?', 0xb7, '?', '?', '?', '?', '?', '?', '?', '?', 0xc3, '?', '?', '?', 0xb0, '?',
386 /* 0x2220 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xba, '?', '?', '?', '?',
387 /* 0x2230 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
388 /* 0x2240 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xc5, '?', '?', '?', '?', '?', '?', '?',
389 /* 0x2250 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
390 /* 0x2260 */ 0xad, '?', '?', '?', 0xb2, 0xb3, '?', '?'
391 };
392
393 /* */
394 static u_int8_t gReverseCombTable[] = {
395 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
396 /* 0x40 */ 0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98,
397 /* 0x50 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
398 /* 0x60 */ 0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3,
399 /* 0x70 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
400
401 /* Combining Diacritical Marks (0x0300 - 0x030A) */
402 /* 0 1 2 3 4 5 6 7 8 9 A */
403 /* 'A' */
404 /* 0x0300 */ 0xCB, 0xE7, 0xE5, 0xCC, '?', '?', '?', '?', 0x80, '?', 0x81,
405
406 /* 'a' */
407 /* 0x0300 */ 0x88, 0x87, 0x89, 0x8B, '?', '?', '?', '?', 0x8A, '?', 0x8C,
408
409 /* 'E' */
410 /* 0x0300 */ 0xE9, 0x83, 0xE6, '?', '?', '?', '?', '?', 0xE8, '?', '?',
411
412 /* 'e' */
413 /* 0x0300 */ 0x8F, 0x8E, 0x90, '?', '?', '?', '?', '?', 0x91, '?', '?',
414
415 /* 'I' */
416 /* 0x0300 */ 0xED, 0xEA, 0xEB, '?', '?', '?', '?', '?', 0xEC, '?', '?',
417
418 /* 'i' */
419 /* 0x0300 */ 0x93, 0x92, 0x94, '?', '?', '?', '?', '?', 0x95, '?', '?',
420
421 /* 'N' */
422 /* 0x0300 */ '?', '?', '?', 0x84, '?', '?', '?', '?', '?', '?', '?',
423
424 /* 'n' */
425 /* 0x0300 */ '?', '?', '?', 0x96, '?', '?', '?', '?', '?', '?', '?',
426
427 /* 'O' */
428 /* 0x0300 */ 0xF1, 0xEE, 0xEF, 0xCD, '?', '?', '?', '?', 0x85, '?', '?',
429
430 /* 'o' */
431 /* 0x0300 */ 0x98, 0x97, 0x99, 0x9B, '?', '?', '?', '?', 0x9A, '?', '?',
432
433 /* 'U' */
434 /* 0x0300 */ 0xF4, 0xF2, 0xF3, '?', '?', '?', '?', '?', 0x86, '?', '?',
435
436 /* 'u' */
437 /* 0x0300 */ 0x9D, 0x9C, 0x9E, '?', '?', '?', '?', '?', 0x9F, '?', '?',
438
439 /* 'Y' */
440 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD9, '?', '?',
441
442 /* 'y' */
443 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD8, '?', '?',
444
445 /* else */
446 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?'
447 };
448
449
450 /*
451 * Convert Unicode string into HFS MacRoman encoding
452 *
453 * Assumes Unicode input is fully decomposed
454 */
455 static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str)
456 {
457 u_int8_t *p;
458 const UniChar *u;
459 UniChar c;
460 UniChar mask;
461 u_int16_t inputChars;
462 u_int16_t pascalChars;
463 OSErr result = noErr;
464 u_int8_t lsb;
465 u_int8_t prevChar;
466 u_int8_t mc;
467
468 mask = (UniChar) 0xFF80;
469 p = &hfs_str[1];
470 u = uni_str;
471 inputChars = unicodeChars;
472 pascalChars = prevChar = 0;
473
474 while (inputChars) {
475 c = *(u++);
476 lsb = (u_int8_t) c;
477
478 /*
479 * If its not 7-bit ascii, then we need to map it
480 */
481 if ( c & mask ) {
482 mc = '?';
483 switch (c & 0xFF00) {
484 case 0x0000:
485 if (lsb >= 0xA0)
486 mc = gLatin1Table[lsb - 0xA0];
487 break;
488
489 case 0x0200:
490 if (lsb >= 0xC0 && lsb <= 0xDF)
491 mc = gSpaceModsTable[lsb - 0xC0];
492 break;
493
494 case 0x2000:
495 if (lsb >= 0x10 && lsb <= 0xAF)
496 mc = gPunctTable[lsb- 0x10];
497 break;
498
499 case 0x2200:
500 if (lsb <= 0x68)
501 mc = gMathTable[lsb];
502 break;
503
504 case 0x0300:
505 if (c <= 0x030A) {
506 if (prevChar >= 'A' && prevChar < 'z') {
507 mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb];
508 --p; /* backup over base char */
509 --pascalChars;
510 }
511 } else {
512 switch (c) {
513 case 0x0327: /* combining cedilla */
514 if (prevChar == 'C')
515 mc = 0x82;
516 else if (prevChar == 'c')
517 mc = 0x8D;
518 else
519 break;
520 --p; /* backup over base char */
521 --pascalChars;
522 break;
523
524 case 0x03A9: mc = 0xBD; break; /* omega */
525
526 case 0x03C0: mc = 0xB9; break; /* pi */
527 }
528 }
529 break;
530
531 default:
532 switch (c) {
533 case 0x0131: mc = 0xf5; break; /* dotless i */
534
535 case 0x0152: mc = 0xce; break; /* OE */
536
537 case 0x0153: mc = 0xcf; break; /* oe */
538
539 case 0x0192: mc = 0xc4; break; /* Ä */
540
541 case 0x2122: mc = 0xaa; break; /* TM */
542
543 case 0x25ca: mc = 0xd7; break; /* diamond */
544
545 case 0xf8ff: mc = 0xf0; break; /* apple logo */
546
547 case 0xfb01: mc = 0xde; break; /* fi */
548
549 case 0xfb02: mc = 0xdf; break; /* fl */
550 }
551 } /* end switch (c & 0xFF00) */
552
553 /*
554 * If we have an unmapped character then we need to mangle the name...
555 */
556 if (mc == '?')
557 result = kTECUsedFallbacksStatus;
558
559 prevChar = 0;
560 lsb = mc;
561
562 } else {
563 prevChar = lsb;
564 }
565
566 if (pascalChars >= 31)
567 break;
568
569 *(p++) = lsb;
570 ++pascalChars;
571 --inputChars;
572
573 } /* end while */
574
575 hfs_str[0] = pascalChars;
576
577 if (inputChars > 0)
578 result = ENAMETOOLONG; /* ran out of room! */
579
580 return result;
581 }
582
583
584 static UniChar gHiBitBaseUnicode[128] = {
585 /* 0x80 */ 0x0041, 0x0041, 0x0043, 0x0045, 0x004e, 0x004f, 0x0055, 0x0061,
586 /* 0x88 */ 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0063, 0x0065, 0x0065,
587 /* 0x90 */ 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 0x006e, 0x006f,
588 /* 0x98 */ 0x006f, 0x006f, 0x006f, 0x006f, 0x0075, 0x0075, 0x0075, 0x0075,
589 /* 0xa0 */ 0x2020, 0x00b0, 0x00a2, 0x00a3, 0x00a7, 0x2022, 0x00b6, 0x00df,
590 /* 0xa8 */ 0x00ae, 0x00a9, 0x2122, 0x00b4, 0x00a8, 0x2260, 0x00c6, 0x00d8,
591 /* 0xb0 */ 0x221e, 0x00b1, 0x2264, 0x2265, 0x00a5, 0x00b5, 0x2202, 0x2211,
592 /* 0xb8 */ 0x220f, 0x03c0, 0x222b, 0x00aa, 0x00ba, 0x03a9, 0x00e6, 0x00f8,
593 /* 0xc0 */ 0x00bf, 0x00a1, 0x00ac, 0x221a, 0x0192, 0x2248, 0x2206, 0x00ab,
594 /* 0xc8 */ 0x00bb, 0x2026, 0x00a0, 0x0041, 0x0041, 0x004f, 0x0152, 0x0153,
595 /* 0xd0 */ 0x2013, 0x2014, 0x201c, 0x201d, 0x2018, 0x2019, 0x00f7, 0x25ca,
596 /* 0xd8 */ 0x0079, 0x0059, 0x2044, 0x20ac, 0x2039, 0x203a, 0xfb01, 0xfb02,
597 /* 0xe0 */ 0x2021, 0x00b7, 0x201a, 0x201e, 0x2030, 0x0041, 0x0045, 0x0041,
598 /* 0xe8 */ 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 0x004f, 0x004f,
599 /* 0xf0 */ 0xf8ff, 0x004f, 0x0055, 0x0055, 0x0055, 0x0131, 0x02c6, 0x02dc,
600 /* 0xf8 */ 0x00af, 0x02d8, 0x02d9, 0x02da, 0x00b8, 0x02dd, 0x02db, 0x02c7
601 };
602
603 static UniChar gHiBitCombUnicode[128] = {
604 /* 0x80 */ 0x0308, 0x030a, 0x0327, 0x0301, 0x0303, 0x0308, 0x0308, 0x0301,
605 /* 0x88 */ 0x0300, 0x0302, 0x0308, 0x0303, 0x030a, 0x0327, 0x0301, 0x0300,
606 /* 0x90 */ 0x0302, 0x0308, 0x0301, 0x0300, 0x0302, 0x0308, 0x0303, 0x0301,
607 /* 0x98 */ 0x0300, 0x0302, 0x0308, 0x0303, 0x0301, 0x0300, 0x0302, 0x0308,
608 /* 0xa0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
609 /* 0xa8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
610 /* 0xb0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
611 /* 0xb8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
612 /* 0xc0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
613 /* 0xc8 */ 0x0000, 0x0000, 0x0000, 0x0300, 0x0303, 0x0303, 0x0000, 0x0000,
614 /* 0xd0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
615 /* 0xd8 */ 0x0308, 0x0308, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
616 /* 0xe0 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0302, 0x0302, 0x0301,
617 /* 0xe8 */ 0x0308, 0x0300, 0x0301, 0x0302, 0x0308, 0x0300, 0x0301, 0x0302,
618 /* 0xf0 */ 0x0000, 0x0300, 0x0301, 0x0302, 0x0300, 0x0000, 0x0000, 0x0000,
619 /* 0xf8 */ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
620 };
621
622
623 /*
624 * Convert HFS MacRoman encoded string into Unicode
625 *
626 * Unicode output is fully decomposed
627 */
628 int
629 mac_roman_to_unicode(const Str31 hfs_str, UniChar *uni_str,
630 __unused u_int32_t maxCharLen, u_int32_t *unicodeChars)
631 {
632 const u_int8_t *p;
633 UniChar *u;
634 u_int16_t pascalChars;
635 u_int8_t c;
636
637 p = hfs_str;
638 u = uni_str;
639
640 *unicodeChars = pascalChars = *(p++); /* pick up length byte */
641
642 while (pascalChars--) {
643 c = *(p++);
644
645 if ( (int8_t) c >= 0 ) { /* check if seven bit ascii */
646 *(u++) = (UniChar) c; /* just pad high byte with zero */
647 } else { /* its a hi bit character */
648 UniChar uc;
649
650 c &= 0x7F;
651 *(u++) = uc = gHiBitBaseUnicode[c];
652
653 /*
654 * if the unicode character we get back is an alpha char
655 * then we must have an additional combining character
656 */
657 if ((uc <= (UniChar) 'z') && (uc >= (UniChar) 'A')) {
658 *(u++) = gHiBitCombUnicode[c];
659 ++(*unicodeChars);
660 }
661 }
662 }
663
664 return noErr;
665 }
666
667 #else /* not HFS - temp workaround until 4277828 is fixed */
668 /* stubs for exported routines that aren't present when we build kernel without HFS */
669
670 #include <sys/types.h>
671 #include <sys/errno.h>
672
673 int hfs_addconverter(int id, u_int32_t encoding, void * get_unicode, void * get_hfsname);
674 int hfs_getconverter(u_int32_t encoding, void *get_unicode, void *get_hfsname);
675 int hfs_relconverter(u_int32_t encoding);
676 int hfs_remconverter(int id, u_int32_t encoding);
677
678 int hfs_addconverter( __unused int id,
679 __unused u_int32_t encoding,
680 __unused void * get_unicode,
681 __unused void * get_hfsname )
682 {
683 return(0);
684 }
685
686 int hfs_getconverter(__unused u_int32_t encoding, __unused void *get_unicode, __unused void *get_hfsname)
687 {
688 return(EINVAL);
689 }
690
691 int hfs_relconverter(__unused u_int32_t encoding)
692 {
693 return(EINVAL);
694 }
695
696 int hfs_remconverter(__unused int id, __unused u_int32_t encoding)
697 {
698 return(0);
699 }
700
701 #endif /* HFS */