]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/hfs/hfs_encodings.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_encodings.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#if HFS
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/malloc.h>
34#include <sys/queue.h>
35#include <sys/utfconv.h>
36#include <kern/host.h>
37#include <mach/host_priv.h>
38
39#include "hfs.h"
40
41
42lck_grp_t * encodinglst_lck_grp;
43lck_grp_attr_t * encodinglst_lck_grp_attr;
44lck_attr_t * encodinglst_lck_attr;
45
46
47/* hfs encoding converter list */
48SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0};
49
50lck_mtx_t encodinglst_mutex;
51
52
53
54/* hfs encoding converter entry */
55struct hfs_encoding {
56 SLIST_ENTRY(hfs_encoding) link;
57 int refcount;
58 int kmod_id;
59 u_int32_t encoding;
60 hfs_to_unicode_func_t get_unicode_func;
61 unicode_to_hfs_func_t get_hfsname_func;
62};
63
64#define MAX_HFS_UNICODE_CHARS (15*5)
65
66static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str);
67
68void
69hfs_converterinit(void)
70{
71 SLIST_INIT(&hfs_encoding_list);
72
73 encodinglst_lck_grp_attr= lck_grp_attr_alloc_init();
74 encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr);
75 encodinglst_lck_attr = lck_attr_alloc_init();
76
77 lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr);
78
79 /*
80 * add resident MacRoman converter and take a reference
81 * since its always "loaded".
82 */
83 hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman);
84 SLIST_FIRST(&hfs_encoding_list)->refcount++;
85}
86
87
88/*
89 * hfs_addconverter - add an HFS encoding converter
90 *
91 * This is called exclusivly by kernel loadable modules
92 * (like HFS_Japanese.kmod) to register hfs encoding
93 * conversion routines.
94 *
95 */
96int
97hfs_addconverter(int id, u_int32_t encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname)
98{
99 struct hfs_encoding *encp;
100
101 MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK);
102
103 lck_mtx_lock(&encodinglst_mutex);
104
105 encp->link.sle_next = NULL;
106 encp->refcount = 0;
107 encp->encoding = encoding;
108 encp->get_unicode_func = get_unicode;
109 encp->get_hfsname_func = get_hfsname;
110 encp->kmod_id = id;
111 SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link);
112
113 lck_mtx_unlock(&encodinglst_mutex);
114 return (0);
115}
116
117
118/*
119 * hfs_remconverter - remove an HFS encoding converter
120 *
121 * Can be called by a kernel loadable module's finalize
122 * routine to remove an encoding converter so that the
123 * module (i.e. the code) can be unloaded.
124 *
125 * However, in the normal case, the removing and unloading
126 * of these converters is done in hfs_relconverter.
127 * The call is initiated from within the kernel during the unmounting of an hfs voulume.
128 */
129int
130hfs_remconverter(int id, u_int32_t encoding)
131{
132 struct hfs_encoding *encp;
133
134 lck_mtx_lock(&encodinglst_mutex);
135 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
136 if (encp->encoding == encoding && encp->kmod_id == id) {
137 encp->refcount--;
138
139 /* if converter is no longer in use, release it */
140 if (encp->refcount <= 0 && encp->kmod_id != 0) {
141 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
142 lck_mtx_unlock(&encodinglst_mutex);
143 FREE(encp, M_TEMP);
144 return (0);
145 } else {
146 lck_mtx_unlock(&encodinglst_mutex);
147 return (1); /* busy */
148 }
149 break;
150 }
151 }
152 lck_mtx_unlock(&encodinglst_mutex);
153
154 return (0);
155}
156
157
158/*
159 * hfs_getconverter - get HFS encoding converters
160 *
161 * Normally called during the mounting of an hfs voulume.
162 */
163int
164hfs_getconverter(u_int32_t encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname)
165{
166 struct hfs_encoding *encp;
167 int found = 0;
168
169 lck_mtx_lock(&encodinglst_mutex);
170 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
171 if (encp->encoding == encoding) {
172 found = 1;
173 *get_unicode = encp->get_unicode_func;
174 *get_hfsname = encp->get_hfsname_func;
175 ++encp->refcount;
176 break;
177 }
178 }
179 lck_mtx_unlock(&encodinglst_mutex);
180
181 if (!found) {
182 *get_unicode = NULL;
183 *get_hfsname = NULL;
184 return (EINVAL);
185 }
186
187 return (0);
188}
189
190
191/*
192 * hfs_relconverter - release interest in an HFS encoding converter
193 *
194 * Normally called during the unmounting of an hfs voulume.
195 */
196int
197hfs_relconverter(u_int32_t encoding)
198{
199 struct hfs_encoding *encp;
200
201 lck_mtx_lock(&encodinglst_mutex);
202 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
203 if (encp->encoding == encoding) {
204 encp->refcount--;
205
206 /* if converter is no longer in use, release it */
207 if (encp->refcount <= 0 && encp->kmod_id != 0) {
208 int id = encp->kmod_id;
209
210 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
211 lck_mtx_unlock(&encodinglst_mutex);
212
213 FREE(encp, M_TEMP);
214 record_kext_unload(id);
215 kmod_destroy((host_priv_t) host_priv_self(), id);
216 return (0);
217 }
218 lck_mtx_unlock(&encodinglst_mutex);
219 return (0);
220 }
221 }
222 lck_mtx_unlock(&encodinglst_mutex);
223
224 return (EINVAL);
225}
226
227
228/*
229 * Convert HFS encoded string into UTF-8
230 *
231 * Unicode output is fully decomposed
232 * '/' chars are converted to ':'
233 */
234int
235hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
236{
237 int error;
238 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
239 ItemCount uniCount;
240 size_t utf8len;
241 hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
242
243 error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
244
245 if (uniCount == 0)
246 error = EINVAL;
247
248 if (error == 0) {
249 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
250 if (error == ENAMETOOLONG)
251 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
252 else
253 *actualDstLen = utf8len;
254 }
255
256 return error;
257}
258
259
260/*
261 * When an HFS name cannot be encoded with the current
262 * volume encoding then MacRoman is used as a fallback.
263 */
264int
265mac_roman_to_utf8(const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
266{
267 int error;
268 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
269 ItemCount uniCount;
270 size_t utf8len;
271
272 error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
273
274 if (uniCount == 0)
275 error = EINVAL;
276
277 if (error == 0) {
278 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
279 if (error == ENAMETOOLONG)
280 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
281 else
282 *actualDstLen = utf8len;
283 }
284
285 return error;
286}
287
288
289/*
290 * Convert Unicode string into HFS encoding
291 *
292 * ':' chars are converted to '/'
293 * Assumes input represents fully decomposed Unicode
294 */
295int
296unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
297{
298 int error;
299 unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
300
301 error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
302 if (error && retry) {
303 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
304 }
305 return error;
306}
307
308/*
309 * Convert UTF-8 string into HFS encoding
310 *
311 * ':' chars are converted to '/'
312 * Assumes input represents fully decomposed Unicode
313 */
314int
315utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
316{
317 int error;
318 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
319 size_t ucslen;
320
321 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
322 if (error == 0)
323 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
324
325 return error;
326}
327
328int
329utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr)
330{
331 int error;
332 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
333 size_t ucslen;
334
335 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
336 if (error == 0)
337 error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr);
338
339 return error;
340}
341
342/*
343 * HFS MacRoman to/from Unicode conversions are built into the kernel
344 * All others hfs encodings are loadable.
345 */
346
347/* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */
348static u_int8_t gLatin1Table[] = {
349 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
350 /* 0x00A0 */ 0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4, '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2, '?', 0xA8, 0xF8,
351 /* 0x00B0 */ 0xA1, 0XB1, '?', '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC, '?', 0xBC, 0xC8, '?', '?', '?', 0xC0,
352 /* 0x00C0 */ '?', '?', '?', '?', '?', '?', 0xAE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
353 /* 0x00D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xAF, '?', '?', '?', '?', '?', '?', 0xA7,
354 /* 0x00E0 */ '?', '?', '?', '?', '?', '?', 0xBE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
355 /* 0x00F0 */ '?', '?', '?', '?', '?', '?', '?', 0xD6, 0xBF, '?', '?', '?', '?', '?', '?', '?'
356};
357
358/* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */
359static u_int8_t gSpaceModsTable[] = {
360 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
361 /* 0x02C0 */ '?', '?', '?', '?', '?', '?', 0xF6, 0xFF, '?', '?', '?', '?', '?', '?', '?', '?',
362 /* 0x02D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD, '?', '?'
363};
364
365/* 0x2010 - 0x20AF = General Punctuation (17 total) */
366static u_int8_t gPunctTable[] = {
367 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
368 /* 0x2010 */ '?', '?', '?', 0xd0, 0xd1, '?', '?', '?', 0xd4, 0xd5, 0xe2, '?', 0xd2, 0xd3, 0xe3, '?',
369 /* 0x2020 */ 0xa0, 0xe0, 0xa5, '?', '?', '?', 0xc9, '?', '?', '?', '?', '?', '?', '?', '?', '?',
370 /* 0x2030 */ 0xe4, '?', '?', '?', '?', '?', '?', '?', '?', 0xdc, 0xdd, '?', '?', '?', '?', '?',
371 /* 0x2040 */ '?', '?', '?', '?', 0xda, '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
372 /* 0x2050 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
373 /* 0x2060 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
374 /* 0x2070 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
375 /* 0x2080 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
376 /* 0x2090 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
377 /* 0x20A0 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xdb, '?', '?', '?'
378};
379
380/* 0x22xx = Mathematical Operators (11 total) */
381static u_int8_t gMathTable[] = {
382 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
383 /* 0x2200 */ '?', '?', 0xb6, '?', '?', '?', 0xc6, '?', '?', '?', '?', '?', '?', '?', '?', 0xb8,
384 /* 0x2210 */ '?', 0xb7, '?', '?', '?', '?', '?', '?', '?', '?', 0xc3, '?', '?', '?', 0xb0, '?',
385 /* 0x2220 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xba, '?', '?', '?', '?',
386 /* 0x2230 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
387 /* 0x2240 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xc5, '?', '?', '?', '?', '?', '?', '?',
388 /* 0x2250 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
389 /* 0x2260 */ 0xad, '?', '?', '?', 0xb2, 0xb3, '?', '?'
390};
391
392/* */
393static u_int8_t gReverseCombTable[] = {
394 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
395 /* 0x40 */ 0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98,
396 /* 0x50 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
397 /* 0x60 */ 0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3,
398 /* 0x70 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
399
400 /* Combining Diacritical Marks (0x0300 - 0x030A) */
401 /* 0 1 2 3 4 5 6 7 8 9 A */
402 /* 'A' */
403 /* 0x0300 */ 0xCB, 0xE7, 0xE5, 0xCC, '?', '?', '?', '?', 0x80, '?', 0x81,
404
405 /* 'a' */
406 /* 0x0300 */ 0x88, 0x87, 0x89, 0x8B, '?', '?', '?', '?', 0x8A, '?', 0x8C,
407
408 /* 'E' */
409 /* 0x0300 */ 0xE9, 0x83, 0xE6, '?', '?', '?', '?', '?', 0xE8, '?', '?',
410
411 /* 'e' */
412 /* 0x0300 */ 0x8F, 0x8E, 0x90, '?', '?', '?', '?', '?', 0x91, '?', '?',
413
414 /* 'I' */
415 /* 0x0300 */ 0xED, 0xEA, 0xEB, '?', '?', '?', '?', '?', 0xEC, '?', '?',
416
417 /* 'i' */
418 /* 0x0300 */ 0x93, 0x92, 0x94, '?', '?', '?', '?', '?', 0x95, '?', '?',
419
420 /* 'N' */
421 /* 0x0300 */ '?', '?', '?', 0x84, '?', '?', '?', '?', '?', '?', '?',
422
423 /* 'n' */
424 /* 0x0300 */ '?', '?', '?', 0x96, '?', '?', '?', '?', '?', '?', '?',
425
426 /* 'O' */
427 /* 0x0300 */ 0xF1, 0xEE, 0xEF, 0xCD, '?', '?', '?', '?', 0x85, '?', '?',
428
429 /* 'o' */
430 /* 0x0300 */ 0x98, 0x97, 0x99, 0x9B, '?', '?', '?', '?', 0x9A, '?', '?',
431
432 /* 'U' */
433 /* 0x0300 */ 0xF4, 0xF2, 0xF3, '?', '?', '?', '?', '?', 0x86, '?', '?',
434
435 /* 'u' */
436 /* 0x0300 */ 0x9D, 0x9C, 0x9E, '?', '?', '?', '?', '?', 0x9F, '?', '?',
437
438 /* 'Y' */
439 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD9, '?', '?',
440
441 /* 'y' */
442 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD8, '?', '?',
443
444 /* else */
445 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?'
446};
447
448
449/*
450 * Convert Unicode string into HFS MacRoman encoding
451 *
452 * Assumes Unicode input is fully decomposed
453 */
454static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str)
455{
456 u_int8_t *p;
457 const UniChar *u;
458 UniChar c;
459 UniChar mask;
460 u_int16_t inputChars;
461 u_int16_t pascalChars;
462 OSErr result = noErr;
463 u_int8_t lsb;
464 u_int8_t prevChar;
465 u_int8_t mc;
466
467 mask = (UniChar) 0xFF80;
468 p = &hfs_str[1];
469 u = uni_str;
470 inputChars = unicodeChars;
471 pascalChars = prevChar = 0;
472
473 while (inputChars) {
474 c = *(u++);
475 lsb = (u_int8_t) c;
476
477 /*
478 * If its not 7-bit ascii, then we need to map it
479 */
480 if ( c & mask ) {
481 mc = '?';
482 switch (c & 0xFF00) {
483 case 0x0000:
484 if (lsb >= 0xA0)
485 mc = gLatin1Table[lsb - 0xA0];
486 break;
487
488 case 0x0200:
489 if (lsb >= 0xC0 && lsb <= 0xDF)
490 mc = gSpaceModsTable[lsb - 0xC0];
491 break;
492
493 case 0x2000:
494 if (lsb >= 0x10 && lsb <= 0xAF)
495 mc = gPunctTable[lsb- 0x10];
496 break;
497
498 case 0x2200:
499 if (lsb <= 0x68)
500 mc = gMathTable[lsb];
501 break;
502
503 case 0x0300:
504 if (c <= 0x030A) {
505 if (prevChar >= 'A' && prevChar < 'z') {
506 mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb];
507 --p; /* backup over base char */
508 --pascalChars;
509 }
510 } else {
511 switch (c) {
512 case 0x0327: /* combining cedilla */
513 if (prevChar == 'C')
514 mc = 0x82;
515 else if (prevChar == 'c')
516 mc = 0x8D;
517 else
518 break;
519 --p; /* backup over base char */
520 --pascalChars;
521 break;
522
523 case 0x03A9: mc = 0xBD; break; /* omega */
524
525 case 0x03C0: mc = 0xB9; break; /* pi */
526 }
527 }
528 break;
529
530 default:
531 switch (c) {
532 case 0x0131: mc = 0xf5; break; /* dotless i */
533
534 case 0x0152: mc = 0xce; break; /* OE */
535
536 case 0x0153: mc = 0xcf; break; /* oe */
537
538