]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfs_encodings.c
xnu-2782.20.48.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_encodings.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2013 Apple Computer, Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b 27 */
39236c6e
A
28
29#include <sys/types.h>
30#include <sys/errno.h>
31
2d21ac55 32#if HFS
1c79356b
A
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
1c79356b
A
37#include <sys/malloc.h>
38#include <sys/queue.h>
39#include <sys/utfconv.h>
2d21ac55
A
40#include <kern/host.h>
41#include <mach/host_priv.h>
b0d623f7
A
42#include <libkern/OSKextLib.h>
43#include <libkern/OSKextLibPrivate.h>
1c79356b
A
44
45#include "hfs.h"
46
47
91447636
A
48lck_grp_t * encodinglst_lck_grp;
49lck_grp_attr_t * encodinglst_lck_grp_attr;
50lck_attr_t * encodinglst_lck_attr;
51
52
1c79356b
A
53/* hfs encoding converter list */
54SLIST_HEAD(encodinglst, hfs_encoding) hfs_encoding_list = {0};
91447636
A
55
56lck_mtx_t encodinglst_mutex;
57
1c79356b
A
58/* hfs encoding converter entry */
59struct hfs_encoding {
60 SLIST_ENTRY(hfs_encoding) link;
61 int refcount;
62 int kmod_id;
2d21ac55 63 u_int32_t encoding;
1c79356b
A
64 hfs_to_unicode_func_t get_unicode_func;
65 unicode_to_hfs_func_t get_hfsname_func;
66};
67
1c79356b
A
68#define MAX_HFS_UNICODE_CHARS (15*5)
69
39236c6e 70#if CONFIG_HFS_STD
2d21ac55 71static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str);
39236c6e 72#endif
1c79356b
A
73
74void
75hfs_converterinit(void)
76{
77 SLIST_INIT(&hfs_encoding_list);
91447636
A
78
79 encodinglst_lck_grp_attr= lck_grp_attr_alloc_init();
91447636 80 encodinglst_lck_grp = lck_grp_alloc_init("cnode_hash", encodinglst_lck_grp_attr);
91447636 81 encodinglst_lck_attr = lck_attr_alloc_init();
91447636
A
82
83 lck_mtx_init(&encodinglst_mutex, encodinglst_lck_grp, encodinglst_lck_attr);
1c79356b 84
39236c6e 85#if CONFIG_HFS_STD
1c79356b
A
86 /*
87 * add resident MacRoman converter and take a reference
39236c6e
A
88 * since its always "loaded". MacRoman is the default converter
89 * for HFS standard volumes.
90 *
91 * Only do this if we are actually supporting HFS standard
92 * volumes. The converter is not used on configurations
93 * that do not support HFS standard.
1c79356b
A
94 */
95 hfs_addconverter(0, kTextEncodingMacRoman, mac_roman_to_unicode, unicode_to_mac_roman);
96 SLIST_FIRST(&hfs_encoding_list)->refcount++;
39236c6e
A
97#endif
98
99}
100
101#if !CONFIG_HFS_STD
102
103/*
104 * Function stubs are needed for KPI export.
105 * It is a little swizzly to have two separate copies of the stub functions in this file
106 * but the prototypes of these functions are different if we're using the real headers
107 * vs. the dummy prototypes at the end of the file. (hfs_to_unicode_func_t vs. void*)
108 *
109 * As a result, we need our own copies in the no-HFS-Standard configuration
110 */
111int hfs_addconverter( __unused int id,
112 __unused u_int32_t encoding,
113 __unused hfs_to_unicode_func_t get_unicode,
114 __unused unicode_to_hfs_func_t get_hfsname )
115{
116 return(0);
117}
118
119int hfs_getconverter( __unused u_int32_t encoding,
120 __unused hfs_to_unicode_func_t *get_unicode,
121 __unused unicode_to_hfs_func_t *get_hfsname)
122{
123 return(EINVAL);
124}
125
126int hfs_relconverter(__unused u_int32_t encoding)
127{
128 return(EINVAL);
1c79356b
A
129}
130
39236c6e
A
131int hfs_remconverter(__unused int id, __unused u_int32_t encoding)
132{
133 return(0);
134}
135
136#else
137
138/*
139 * For configurations that do support HFS standard, we need all of these..
140 */
1c79356b
A
141
142/*
143 * hfs_addconverter - add an HFS encoding converter
144 *
145 * This is called exclusivly by kernel loadable modules
146 * (like HFS_Japanese.kmod) to register hfs encoding
147 * conversion routines.
148 *
149 */
150int
2d21ac55 151hfs_addconverter(int id, u_int32_t encoding, hfs_to_unicode_func_t get_unicode, unicode_to_hfs_func_t get_hfsname)
1c79356b
A
152{
153 struct hfs_encoding *encp;
154
155 MALLOC(encp, struct hfs_encoding *, sizeof(struct hfs_encoding), M_TEMP, M_WAITOK);
156
91447636 157 lck_mtx_lock(&encodinglst_mutex);
1c79356b
A
158
159 encp->link.sle_next = NULL;
160 encp->refcount = 0;
161 encp->encoding = encoding;
162 encp->get_unicode_func = get_unicode;
163 encp->get_hfsname_func = get_hfsname;
164 encp->kmod_id = id;
165 SLIST_INSERT_HEAD(&hfs_encoding_list, encp, link);
166
91447636 167 lck_mtx_unlock(&encodinglst_mutex);
1c79356b
A
168 return (0);
169}
170
171
172/*
173 * hfs_remconverter - remove an HFS encoding converter
174 *
175 * Can be called by a kernel loadable module's finalize
176 * routine to remove an encoding converter so that the
177 * module (i.e. the code) can be unloaded.
178 *
179 * However, in the normal case, the removing and unloading
180 * of these converters is done in hfs_relconverter.
181 * The call is initiated from within the kernel during the unmounting of an hfs voulume.
182 */
183int
2d21ac55 184hfs_remconverter(int id, u_int32_t encoding)
1c79356b
A
185{
186 struct hfs_encoding *encp;
1c79356b 187
91447636 188 lck_mtx_lock(&encodinglst_mutex);
1c79356b
A
189 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
190 if (encp->encoding == encoding && encp->kmod_id == id) {
191 encp->refcount--;
192
193 /* if converter is no longer in use, release it */
194 if (encp->refcount <= 0 && encp->kmod_id != 0) {
195 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
91447636 196 lck_mtx_unlock(&encodinglst_mutex);
1c79356b 197 FREE(encp, M_TEMP);
91447636 198 return (0);
1c79356b 199 } else {
91447636
A
200 lck_mtx_unlock(&encodinglst_mutex);
201 return (1); /* busy */
1c79356b
A
202 }
203 break;
204 }
205 }
91447636 206 lck_mtx_unlock(&encodinglst_mutex);
1c79356b 207
91447636 208 return (0);
1c79356b
A
209}
210
211
212/*
213 * hfs_getconverter - get HFS encoding converters
214 *
215 * Normally called during the mounting of an hfs voulume.
216 */
217int
2d21ac55 218hfs_getconverter(u_int32_t encoding, hfs_to_unicode_func_t *get_unicode, unicode_to_hfs_func_t *get_hfsname)
1c79356b
A
219{
220 struct hfs_encoding *encp;
221 int found = 0;
222
91447636 223 lck_mtx_lock(&encodinglst_mutex);
1c79356b
A
224 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
225 if (encp->encoding == encoding) {
226 found = 1;
227 *get_unicode = encp->get_unicode_func;
228 *get_hfsname = encp->get_hfsname_func;
229 ++encp->refcount;
230 break;
231 }
232 }
91447636 233 lck_mtx_unlock(&encodinglst_mutex);
1c79356b
A
234
235 if (!found) {
236 *get_unicode = NULL;
237 *get_hfsname = NULL;
238 return (EINVAL);
239 }
240
241 return (0);
242}
243
244
245/*
246 * hfs_relconverter - release interest in an HFS encoding converter
247 *
248 * Normally called during the unmounting of an hfs voulume.
249 */
250int
2d21ac55 251hfs_relconverter(u_int32_t encoding)
1c79356b
A
252{
253 struct hfs_encoding *encp;
1c79356b 254
91447636 255 lck_mtx_lock(&encodinglst_mutex);
1c79356b
A
256 SLIST_FOREACH(encp, &hfs_encoding_list, link) {
257 if (encp->encoding == encoding) {
1c79356b
A
258 encp->refcount--;
259
260 /* if converter is no longer in use, release it */
261 if (encp->refcount <= 0 && encp->kmod_id != 0) {
b0d623f7 262 uint32_t loadTag = (uint32_t)encp->kmod_id;
1c79356b
A
263
264 SLIST_REMOVE(&hfs_encoding_list, encp, hfs_encoding, link);
91447636
A
265 lck_mtx_unlock(&encodinglst_mutex);
266
267 FREE(encp, M_TEMP);
b0d623f7 268 (void)OSKextUnloadKextWithLoadTag(loadTag);
91447636 269 return (0);
1c79356b 270 }
91447636
A
271 lck_mtx_unlock(&encodinglst_mutex);
272 return (0);
1c79356b
A
273 }
274 }
91447636 275 lck_mtx_unlock(&encodinglst_mutex);
1c79356b 276
91447636 277 return (EINVAL);
1c79356b
A
278}
279
1c79356b
A
280/*
281 * Convert HFS encoded string into UTF-8
282 *
283 * Unicode output is fully decomposed
284 * '/' chars are converted to ':'
285 */
286int
2d21ac55 287hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
1c79356b
A
288{
289 int error;
290 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
291 ItemCount uniCount;
292 size_t utf8len;
293 hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
6d2010ae 294 u_int8_t pascal_length = 0;
1c79356b 295
d41d1dae
A
296 /*
297 * Validate the length of the Pascal-style string before passing it
298 * down to the decoding engine.
299 */
300 pascal_length = *((const u_int8_t*)(hfs_str));
301 if (pascal_length > 31) {
302 /* invalid string; longer than 31 bytes */
303 error = EINVAL;
304 return error;
305 }
6d2010ae 306
1c79356b
A
307 error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
308
309 if (uniCount == 0)
310 error = EINVAL;
311
312 if (error == 0) {
313 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
314 if (error == ENAMETOOLONG)
315 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
316 else
317 *actualDstLen = utf8len;
318 }
319
320 return error;
321}
322
1c79356b
A
323/*
324 * When an HFS name cannot be encoded with the current
325 * volume encoding then MacRoman is used as a fallback.
326 */
327int
2d21ac55 328mac_roman_to_utf8(const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
1c79356b
A
329{
330 int error;
331 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
332 ItemCount uniCount;
333 size_t utf8len;
d41d1dae
A
334 u_int8_t pascal_length = 0;
335
336 /*
337 * Validate the length of the Pascal-style string before passing it
338 * down to the decoding engine.
339 */
340 pascal_length = *((const u_int8_t*)(hfs_str));
341 if (pascal_length > 31) {
342 /* invalid string; longer than 31 bytes */
343 error = EINVAL;
344 return error;
6d2010ae 345 }
1c79356b
A
346
347 error = mac_roman_to_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
348
349 if (uniCount == 0)
350 error = EINVAL;
351
352 if (error == 0) {
353 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
354 if (error == ENAMETOOLONG)
355 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
356 else
357 *actualDstLen = utf8len;
358 }
359
360 return error;
361}
362
9bccf70c
A
363/*
364 * Convert Unicode string into HFS encoding
365 *
366 * ':' chars are converted to '/'
367 * Assumes input represents fully decomposed Unicode
368 */
369int
370unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
371{
372 int error;
373 unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
374
375 error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
376 if (error && retry) {
377 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
378 }
379 return error;
380}
381
1c79356b
A
382/*
383 * Convert UTF-8 string into HFS encoding
384 *
385 * ':' chars are converted to '/'
386 * Assumes input represents fully decomposed Unicode
387 */
388int
9bccf70c 389utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
1c79356b
A
390{
391 int error;
392 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
393 size_t ucslen;
1c79356b
A
394
395 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
396 if (error == 0)
9bccf70c 397 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
1c79356b
A
398
399 return error;
400}
401
39236c6e 402
1c79356b
A
403int
404utf8_to_mac_roman(ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr)
405{
406 int error;
407 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
408 size_t ucslen;
409
410 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
411 if (error == 0)
412 error = unicode_to_mac_roman(uniStr, ucslen/sizeof(UniChar), dstStr);
413
414 return error;
415}
416
417/*
418 * HFS MacRoman to/from Unicode conversions are built into the kernel
419 * All others hfs encodings are loadable.
420 */
421
422/* 0x00A0 - 0x00FF = Latin 1 Supplement (30 total) */
2d21ac55 423static u_int8_t gLatin1Table[] = {
1c79356b
A
424 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
425 /* 0x00A0 */ 0xCA, 0xC1, 0xA2, 0xA3, 0xDB, 0xB4, '?', 0xA4, 0xAC, 0xA9, 0xBB, 0xC7, 0xC2, '?', 0xA8, 0xF8,
426 /* 0x00B0 */ 0xA1, 0XB1, '?', '?', 0xAB, 0xB5, 0xA6, 0xe1, 0xFC, '?', 0xBC, 0xC8, '?', '?', '?', 0xC0,
427 /* 0x00C0 */ '?', '?', '?', '?', '?', '?', 0xAE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
428 /* 0x00D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xAF, '?', '?', '?', '?', '?', '?', 0xA7,
429 /* 0x00E0 */ '?', '?', '?', '?', '?', '?', 0xBE, '?', '?', '?', '?', '?', '?', '?', '?', '?',
430 /* 0x00F0 */ '?', '?', '?', '?', '?', '?', '?', 0xD6, 0xBF, '?', '?', '?', '?', '?', '?', '?'
431};
432
433/* 0x02C0 - 0x02DF = Spacing Modifiers (8 total) */
2d21ac55 434static u_int8_t gSpaceModsTable[] = {
1c79356b
A
435 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
436 /* 0x02C0 */ '?', '?', '?', '?', '?', '?', 0xF6, 0xFF, '?', '?', '?', '?', '?', '?', '?', '?',
437 /* 0x02D0 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xF9, 0xFA, 0xFB, 0xFE, 0xF7, 0xFD, '?', '?'
438};
439
440/* 0x2010 - 0x20AF = General Punctuation (17 total) */
2d21ac55 441static u_int8_t gPunctTable[] = {
1c79356b
A
442 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
443 /* 0x2010 */ '?', '?', '?', 0xd0, 0xd1, '?', '?', '?', 0xd4, 0xd5, 0xe2, '?', 0xd2, 0xd3, 0xe3, '?',
444 /* 0x2020 */ 0xa0, 0xe0, 0xa5, '?', '?', '?', 0xc9, '?', '?', '?', '?', '?', '?', '?', '?', '?',
445 /* 0x2030 */ 0xe4, '?', '?', '?', '?', '?', '?', '?', '?', 0xdc, 0xdd, '?', '?', '?', '?', '?',
446 /* 0x2040 */ '?', '?', '?', '?', 0xda, '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
447 /* 0x2050 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
448 /* 0x2060 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
449 /* 0x2070 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
450 /* 0x2080 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
451 /* 0x2090 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
452 /* 0x20A0 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xdb, '?', '?', '?'
453};
454
455/* 0x22xx = Mathematical Operators (11 total) */
2d21ac55 456static u_int8_t gMathTable[] = {
1c79356b
A
457 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
458 /* 0x2200 */ '?', '?', 0xb6, '?', '?', '?', 0xc6, '?', '?', '?', '?', '?', '?', '?', '?', 0xb8,
459 /* 0x2210 */ '?', 0xb7, '?', '?', '?', '?', '?', '?', '?', '?', 0xc3, '?', '?', '?', 0xb0, '?',
460 /* 0x2220 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', 0xba, '?', '?', '?', '?',
461 /* 0x2230 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
462 /* 0x2240 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xc5, '?', '?', '?', '?', '?', '?', '?',
463 /* 0x2250 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?',
464 /* 0x2260 */ 0xad, '?', '?', '?', 0xb2, 0xb3, '?', '?'
465};
466
467/* */
2d21ac55 468static u_int8_t gReverseCombTable[] = {
1c79356b
A
469 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
470 /* 0x40 */ 0xDA, 0x40, 0xDA, 0xDA, 0xDA, 0x56, 0xDA, 0xDA, 0xDA, 0x6C, 0xDA, 0xDA, 0xDA, 0xDA, 0x82, 0x98,
471 /* 0x50 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xAE, 0xDA, 0xDA, 0xDA, 0xC4, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
472 /* 0x60 */ 0xDA, 0x4B, 0xDA, 0xDA, 0xDA, 0x61, 0xDA, 0xDA, 0xDA, 0x77, 0xDA, 0xDA, 0xDA, 0xDA, 0x8D, 0xA3,
473 /* 0x70 */ 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xB9, 0xDA, 0xDA, 0xDA, 0xCF, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA, 0xDA,
474
475 /* Combining Diacritical Marks (0x0300 - 0x030A) */
476 /* 0 1 2 3 4 5 6 7 8 9 A */
477 /* 'A' */
478 /* 0x0300 */ 0xCB, 0xE7, 0xE5, 0xCC, '?', '?', '?', '?', 0x80, '?', 0x81,
479
480 /* 'a' */
481 /* 0x0300 */ 0x88, 0x87, 0x89, 0x8B, '?', '?', '?', '?', 0x8A, '?', 0x8C,
482
483 /* 'E' */
484 /* 0x0300 */ 0xE9, 0x83, 0xE6, '?', '?', '?', '?', '?', 0xE8, '?', '?',
485
486 /* 'e' */
487 /* 0x0300 */ 0x8F, 0x8E, 0x90, '?', '?', '?', '?', '?', 0x91, '?', '?',
488
489 /* 'I' */
490 /* 0x0300 */ 0xED, 0xEA, 0xEB, '?', '?', '?', '?', '?', 0xEC, '?', '?',
491
492 /* 'i' */
493 /* 0x0300 */ 0x93, 0x92, 0x94, '?', '?', '?', '?', '?', 0x95, '?', '?',
494
495 /* 'N' */
496 /* 0x0300 */ '?', '?', '?', 0x84, '?', '?', '?', '?', '?', '?', '?',
497
498 /* 'n' */
499 /* 0x0300 */ '?', '?', '?', 0x96, '?', '?', '?', '?', '?', '?', '?',
500
501 /* 'O' */
502 /* 0x0300 */ 0xF1, 0xEE, 0xEF, 0xCD, '?', '?', '?', '?', 0x85, '?', '?',
503
504 /* 'o' */
505 /* 0x0300 */ 0x98, 0x97, 0x99, 0x9B, '?', '?', '?', '?', 0x9A, '?', '?',
506
507 /* 'U' */
508 /* 0x0300 */ 0xF4, 0xF2, 0xF3, '?', '?', '?', '?', '?', 0x86, '?', '?',
509
510 /* 'u' */
511 /* 0x0300 */ 0x9D, 0x9C, 0x9E, '?', '?', '?', '?', '?', 0x9F, '?', '?',
512
513 /* 'Y' */
514 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD9, '?', '?',
515
516 /* 'y' */
517 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', 0xD8, '?', '?',
518
519 /* else */
520 /* 0x0300 */ '?', '?', '?', '?', '?', '?', '?', '?', '?', '?', '?'
521};
522
523
524/*
525 * Convert Unicode string into HFS MacRoman encoding
526 *
527 * Assumes Unicode input is fully decomposed
528 */
2d21ac55 529static int unicode_to_mac_roman(UniChar *uni_str, u_int32_t unicodeChars, Str31 hfs_str)
1c79356b 530{
2d21ac55 531 u_int8_t *p;
1c79356b
A
532 const UniChar *u;
533 UniChar c;
534 UniChar mask;
2d21ac55
A
535 u_int16_t inputChars;
536 u_int16_t pascalChars;
1c79356b 537 OSErr result = noErr;
2d21ac55
A
538 u_int8_t lsb;
539 u_int8_t prevChar;
540 u_int8_t mc;
1c79356b
A
541
542 mask = (UniChar) 0xFF80;
543 p = &hfs_str[1];
544 u = uni_str;
545 inputChars = unicodeChars;
546 pascalChars = prevChar = 0;
547
548 while (inputChars) {
549 c = *(u++);
2d21ac55 550 lsb = (u_int8_t) c;
1c79356b
A
551
552 /*
553 * If its not 7-bit ascii, then we need to map it
554 */
555 if ( c & mask ) {
556 mc = '?';
557 switch (c & 0xFF00) {
558 case 0x0000:
559 if (lsb >= 0xA0)
560 mc = gLatin1Table[lsb - 0xA0];
561 break;
562
563 case 0x0200:
564 if (lsb >= 0xC0 && lsb <= 0xDF)
565 mc = gSpaceModsTable[lsb - 0xC0];
566 break;
567
568 case 0x2000:
569 if (lsb >= 0x10 && lsb <= 0xAF)
570 mc = gPunctTable[lsb- 0x10];
571 break;
572
573 case 0x2200:
fe8ab488 574 if (lsb < 0x68)
1c79356b
A
575 mc = gMathTable[lsb];
576 break;
577
578 case 0x0300:
579 if (c <= 0x030A) {
580 if (prevChar >= 'A' && prevChar < 'z') {
581 mc = gReverseCombTable[gReverseCombTable[prevChar - 0x40] + lsb];
582 --p; /* backup over base char */
583 --pascalChars;
584 }
585 } else {
586 switch (c) {
587 case 0x0327: /* combining cedilla */
588 if (prevChar == 'C')
589 mc = 0x82;
590 else if (prevChar == 'c')
591 mc = 0x8D;
592 else
593 break;
594 --p; /* backup over base char */
595 --pascalChars;
596 break;
597
598 case 0x03A9: mc = 0xBD; break; /* omega */
599
600 case 0x03C0: mc = 0xB9; break; /* pi */
601 }
602 }
603 break;
604
605 default:
606 switch (c) {
607 case 0x0131: mc = 0xf5; break; /* dotless i */
608
609 case 0x0152: mc = 0xce; break; /* OE */
610
611 case 0x0153: mc = 0xcf; break; /* oe */
612
613