]> git.saurik.com Git - apple/hfs.git/blame - hfs_encodings/hfs_encodings.c
hfs-366.30.3.tar.gz
[apple/hfs.git] / hfs_encodings / hfs_encodings.c
CommitLineData
558d2836
A
1/*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <IOKit/IOLib.h>
30
31#include <sys/types.h>
32#include <sys/errno.h>
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/utfconv.h>
40#include <kern/host.h>
41#include <mach/host_priv.h>
42#include <libkern/OSKextLib.h>
43#include <libkern/OSKextLibPrivate.h>
44
45#include "hfs_encodings.h"
46#include "../core/hfs_macos_defs.h"
47
48lck_grp_t * encodinglst_lck_grp;
49lck_grp_attr_t * encodinglst_lck_grp_attr;
50lck_attr_t * encodinglst_lck_attr;
51
52
53/* hfs encoding converter list */
54SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0};
55
56lck_mtx_t encodinglst_mutex;
57
58/* hfs encoding converter entry */
59struct hfs_encoding {
60 SLIST_ENTRY(hfs_encoding) link;
61 int refcount;
62 int kmod_id;
63 u_int32_t encoding;
64 hfs_to_unicode_func_t get_unicode_func;
65 unicode_to_hfs_func_t get_hfsname_func;
66};
67
68void
69hfs_converterinit(void)
70{
71 SLIST_INIT(&hfs_encoding_list);
72
73 encodinglst_lck_grp_attr= lck_grp_attr_alloc_init();
74 encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr);
75 encodinglst_lck_attr = lck_attr_alloc_init();
76
77 lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr);
78
79 /*
80 * add resident MacRoman converter and take a reference
81 * since its always "loaded". MacRoman is the default converter
82 * for HFS standard volumes.
83 *
84 * Only do this if we are actually supporting HFS standard
85 * volumes. The converter is not used on configurations
86 * that do not support HFS standard.
87 */
88 hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman);
89 SLIST_FIRST(&hfs_encoding_list)->refcount++;
90}
91
92void hfs_converterdone(void)
93{
94 if (encodinglst_lck_grp_attr)
95 lck_grp_attr_free(encodinglst_lck_grp_attr);
96 if (encodinglst_lck_grp)
97 lck_grp_free(encodinglst_lck_grp);
98 if (encodinglst_lck_attr)
99 lck_attr_free(encodinglst_lck_attr);
100}
101
102/*
103 * For configurations that do support HFS standard, we need all of these..
104 */
105
106/*
107 * hfs_addconverter - add an HFS encoding converter
108 *
109 * This is called exclusivly by kernel loadable modules
110 * (like HFS_Japanese.kmod) to register hfs encoding
111 * conversion routines.
112 *
113 */
114int
115hfs_addconverter(int id, u_int32_t encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname)
116{
117 struct hfs_encoding *encp = (struct hfs_encoding *)IOMalloc(sizeof(*encp));
118
119 lck_mtx_lock(&encodinglst_mutex);
120
121 encp->link.sle_next = NULL;
122 encp->refcount = 0;
123 encp->encoding = encoding;
124 encp->get_unicode_func = get_unicode;
125 encp->get_hfsname_func = get_hfsname;
126 encp->kmod_id = id;
127 SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link);
128
129 lck_mtx_unlock(&encodinglst_mutex);
130
131 if (id)
132 OSKextRetainKextWithLoadTag(id);
133
134 return (0);
135}
136
137
138/*
139 * hfs_remconverter - remove an HFS encoding converter
140 *
141 * Can be called by a kernel loadable module's finalize
142 * routine to remove an encoding converter so that the
143 * module (i.e. the code) can be unloaded.
144 *
145 * However, in the normal case, the removing and unloading
146 * of these converters is done in hfs_relconverter.
147 * The call is initiated from within the kernel during the unmounting of an hfs voulume.
148 */
149int
150hfs_remconverter(int id, u_int32_t encoding)
151{
152 struct hfs_encoding *encp;
153
154 lck_mtx_lock(&encodinglst_mutex);
155 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
156 if (encp->encoding == encoding && encp->kmod_id == id) {
157 encp->refcount--;
158
159 /* if converter is no longer in use, release it */
160 if (encp->refcount <= 0 && encp->kmod_id != 0) {
161 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
162 lck_mtx_unlock(&encodinglst_mutex);
163 OSKextReleaseKextWithLoadTag(encp->kmod_id);
164 IOFree(encp, sizeof(*encp));
165 return (0);
166 } else {
167 lck_mtx_unlock(&encodinglst_mutex);
168 return (1); /* busy */
169 }
170 break;
171 }
172 }
173 lck_mtx_unlock(&encodinglst_mutex);
174
175 return (0);
176}
177
178/*
179 * hfs_getconverter - get HFS encoding converters
180 *
181 * Normally called during the mounting of an hfs voulume.
182 */
183int
184hfs_getconverter(u_int32_t encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname)
185{
186 struct hfs_encoding *encp;
187 int found = 0;
188
189 lck_mtx_lock(&encodinglst_mutex);
190 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
191 if (encp->encoding == encoding) {
192 found = 1;
193 *get_unicode = encp->get_unicode_func;
194 *get_hfsname = encp->get_hfsname_func;
195 ++encp->refcount;
196 break;
197 }
198 }
199 lck_mtx_unlock(&encodinglst_mutex);
200
201 if (!found) {
202 *get_unicode = NULL;
203 *get_hfsname = NULL;
204 return (EINVAL);
205 }
206
207 return (0);
208}
209
210
211/*
212 * hfs_relconverter - release interest in an HFS encoding converter
213 *
214 * Normally called during the unmounting of an hfs voulume.
215 */
216int
217hfs_relconverter(u_int32_t encoding)
218{
219 struct hfs_encoding *encp;
220
221 lck_mtx_lock(&encodinglst_mutex);
222 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
223 if (encp->encoding == encoding) {
224 encp->refcount--;
225
226 /* if converter is no longer in use, release it */
227 if (encp->refcount <= 0 && encp->kmod_id != 0) {
228 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
229 lck_mtx_unlock(&encodinglst_mutex);
230
231 OSKextReleaseKextWithLoadTag(encp->kmod_id);
232 IOFree(encp, sizeof(*encp));
233 return (0);
234 }
235 lck_mtx_unlock(&encodinglst_mutex);
236 return (0);
237 }
238 }
239 lck_mtx_unlock(&encodinglst_mutex);
240
241 return (EINVAL);
242}
243
244/*
245 * When an HFS name cannot be encoded with the current
246 * volume encoding then MacRoman is used as a fallback.
247 */
248int
249mac_roman_to_utf8(const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
250{
251 int error;
252 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
253 ItemCount uniCount;
254 size_t utf8len;
255 u_int8_t pascal_length = 0;
256
257 /*
258 * Validate the length of the Pascal-style string before passing it
259 * down to the decoding engine.
260 */
261 pascal_length = *((const u_int8_t*)(hfs_str));
262 if (pascal_length > 31) {
263 /* invalid string; longer than 31 bytes */
264 error = EINVAL;
265 return error;
266 }
267
268 error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
269
270 if (uniCount == 0)
271 error = EINVAL;
272
273 if (error == 0) {
274 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
275 if (error == ENAMETOOLONG)
276 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
277 else
278 *actualDstLen = utf8len;
279 }
280
281 return error;
282}
283
284int
285utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr)
286{
287 int error;
288 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
289 size_t ucslen;
290
291 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
292 if (error == 0)
293 error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr);
294
295 return error;
296}
297
298/*
299 * HFS MacRoman to/from Unicode conversions are built into the kernel
300 * All others hfs encodings are loadable.
301 */
302
303/* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */
304static u_int8_t gLatin1Table[] = {
305 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
306 /* 0x00A0 */ 0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4, '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2, '?', 0xA8, 0xF8,
307 /* 0x00B0 */ 0xA1, 0XB1, '?', '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC, '?', 0xBC, 0xC8, '?', '?', '?', 0xC0,
308 /* 0x00C0 */ '?', '?', '?', '?', '?', '?', 0xAE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
309 /* 0x00D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xAF, '?', '?', '?', '?', '?', '?', 0xA7,
310 /* 0x00E0 */ '?', '?', '?', '?', '?', '?', 0xBE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
311 /* 0x00F0 */ '?', '?', '?', '?', '?', '?', '?', 0xD6, 0xBF, '?', '?', '?', '?', '?', '?', '?'
312};
313
314/* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */
315static u_int8_t gSpaceModsTable[] = {
316 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
317 /* 0x02C0 */ '?', '?', '?', '?', '?', '?', 0xF6, 0xFF, '?', '?', '?', '?', '?', '?', '?', '?',
318 /* 0x02D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD, '?', '?'
319};
320
321/* 0x2010 - 0x20AF = General Punctuation (17 total) */
322static u_int8_t gPunctTable[] = {
323 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
324 /* 0x2010 */ '?', '?', '?', 0xd0, 0xd1, '?', '?', '?', 0xd4, 0xd5, 0xe2, '?', 0xd2, 0xd3, 0xe3, '?',
325 /* 0x2020 */ 0xa0, 0xe0, 0xa5, '?', '?', '?', 0xc9, '?', '?', '?', '?', '?', '?', '?', '?', '?',
326 /* 0x2030 */ 0xe4, '?', '?', '?', '?', '?', '?', '?', '?', 0xdc, 0xdd, '?', '?', '?', '?', '?',
327 /* 0x2040 */ '?', '?', '?', '?', 0xda, '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
328 /* 0x2050 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
329 /* 0x2060 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
330 /* 0x2070 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
331 /* 0x2080 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
332 /* 0x2090 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
333 /* 0x20A0 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xdb, '?', '?', '?'
334};
335
336/* 0x22xx = Mathematical Operators (11 total) */
337static u_int8_t gMathTable[] = {
338 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
339 /* 0x2200 */ '?', '?', 0xb6, '?', '?', '?', 0xc6, '?', '?', '?', '?', '?', '?', '?', '?', 0xb8,
340 /* 0x2210 */ '?', 0xb7, '?', '?', '?', '?', '?', '?', '?', '?', 0xc3, '?', '?', '?', 0xb0, '?',
341 /* 0x2220 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xba, '?', '?', '?', '?',
342 /* 0x2230 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
343 /* 0x2240 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xc5, '?', '?', '?', '?', '?', '?', '?',
344 /* 0x2250 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
345 /* 0x2260 */ 0xad, '?', '?', '?', 0xb2, 0xb3, '?', '?'
346};
347
348/* */
349static u_int8_t gReverseCombTable[] = {
350 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
351 /* 0x40 */ 0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98,
352 /* 0x50 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
353 /* 0x60 */ 0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3,
354 /* 0x70 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
355
356 /* Combining Diacritical Marks (0x0300 - 0x030A) */
357 /* 0 1 2 3 4 5 6 7 8 9 A */
358 /* 'A' */
359 /* 0x0300 */ 0xCB, 0xE7, 0xE5, 0xCC, '?', '?', '?', '?', 0x80, '?', 0x81,
360
361 /* 'a' */
362 /* 0x0300 */ 0x88, 0x87, 0x89, 0x8B, '?', '?', '?', '?', 0x8A, '?', 0x8C,
363
364 /* 'E' */
365 /* 0x0300 */ 0xE9, 0x83, 0xE6, '?', '?', '?', '?', '?', 0xE8, '?', '?',
366
367 /* 'e' */
368 /* 0x0300 */ 0x8F, 0x8E, 0x90, '?', '?', '?', '?', '?', 0x91, '?', '?',
369
370 /* 'I' */
371 /* 0x0300 */ 0xED, 0xEA, 0xEB, '?', '?', '?', '?', '?', 0xEC, '?', '?',
372
373 /* 'i' */
374 /* 0x0300 */ 0x93, 0x92, 0x94, '?', '?', '?', '?', '?', 0x95, '?', '?',
375
376 /* 'N' */
377 /* 0x0300 */ '?', '?', '?', 0x84, '?', '?', '?', '?', '?', '?', '?',
378
379 /* 'n' */
380 /* 0x0300 */ '?', '?', '?', 0x96, '?', '?', '?', '?', '?', '?', '?',
381
382 /* 'O' */
383 /* 0x0300 */ 0xF1, 0xEE, 0xEF, 0xCD, '?', '?', '?', '?', 0x85, '?', '?',
384
385 /* 'o' */
386 /* 0x0300 */ 0x98, 0x97, 0x99, 0x9B, '?', '?', '?', '?', 0x9A, '?', '?',
387
388 /* 'U' */
389 /* 0x0300 */ 0xF4, 0xF2, 0xF3, '?', '?', '?', '?', '?', 0x86, '?', '?',
390
391 /* 'u' */
392 /* 0x0300 */ 0x9D, 0x9C, 0x9E, '?', '?', '?', '?', '?', 0x9F, '?', '?',
393
394 /* 'Y' */
395 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD9, '?', '?',
396
397 /* 'y' */
398 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD8, '?', '?',
399
400 /* else */
401 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?'
402};
403
404
405/*
406 * Convert Unicode string into HFS MacRoman encoding
407 *
408 * Assumes Unicode input is fully decomposed
409 */
410int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str)
411{
412 u_int8_t *p;
413 const UniChar *u;
414 UniChar c;
415 UniChar mask;
416 u_int16_t inputChars;
417 u_int16_t pascalChars;
418 OSErr result = noErr;
419 u_int8_t lsb;
420 u_int8_t prevChar;
421 u_int8_t mc;
422
423 mask = (UniChar) 0xFF80;
424 p = &hfs_str[1];
425 u = uni_str;
426 inputChars = unicodeChars;
427 pascalChars = prevChar = 0;
428
429 while (inputChars) {
430 c = *(u++);
431 lsb = (u_int8_t) c;
432
433 /*
434 * If its not 7-bit ascii, then we need to map it
435 */
436 if ( c & mask ) {
437 mc = '?';
438 switch (c & 0xFF00) {
439 case 0x0000:
440 if (lsb >= 0xA0)
441 mc = gLatin1Table[lsb - 0xA0];
442 break;
443
444 case 0x0200:
445 if (lsb >= 0xC0 && lsb <= 0xDF)
446 mc = gSpaceModsTable[lsb - 0xC0];
447 break;
448
449 case 0x2000:
450 if (lsb >= 0x10 && lsb <= 0xAF)
451 mc = gPunctTable[lsb- 0x10];
452 break;
453
454 case 0x2200:
455 if (lsb < 0x68)
456 mc = gMathTable[lsb];
457 break;
458
459 case 0x0300:
460 if (c <= 0x030A) {
461 if (prevChar >= 'A' && prevChar < 'z') {
462 mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb];
463 --p; /* backup over base char */
464 --pascalChars;
465 }
466 } else {
467 switch (c) {
468 case 0x0327: /* combining cedilla */
469 if (prevChar == 'C')
470 mc = 0x82;
471 else if (prevChar == 'c')
472 mc = 0x8D;
473 else
474 break;
475 --p; /* backup over base char */
476 --pascalChars;
477 break;
478
479 case 0x03A9: mc = 0xBD; break; /* omega */
480
481 case 0x03C0: mc = 0xB9; break; /* pi */
482 }
483 }
484 break;
485
486 default:
487 switch (c) {
488 case 0x0131: mc = 0xf5; break; /* dotless i */
489
490 case 0x0152: mc = 0xce; break; /* OE */
491
492 case 0x0153: mc = 0xcf; break; /* oe */
493
494