]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_utfconv.c
xnu-517.9.4.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_utfconv.c
CommitLineData
1c79356b 1/*
9bccf70c 2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
9bccf70c
A
22
23 /*
24 Includes Unicode 3.2 decomposition code derived from Core Foundation
25 */
26
1c79356b
A
27#include <sys/param.h>
28#include <sys/utfconv.h>
29#include <sys/errno.h>
30#include <architecture/byte_order.h>
31
1c79356b 32/*
765c9de3 33 * UTF-8 (Unicode Transformation Format)
1c79356b 34 *
765c9de3
A
35 * UTF-8 is the Unicode Transformation Format that serializes a Unicode
36 * character as a sequence of one to four bytes. Only the shortest form
37 * required to represent the significant Unicode bits is legal.
1c79356b
A
38 *
39 * UTF-8 Multibyte Codes
40 *
765c9de3
A
41 * Bytes Bits Unicode Min Unicode Max UTF-8 Byte Sequence (binary)
42 * -----------------------------------------------------------------------------
43 * 1 7 0x0000 0x007F 0xxxxxxx
44 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
45 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
46 * 4 21 0x10000 0x10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
47 * -----------------------------------------------------------------------------
1c79356b
A
48 */
49
50
765c9de3
A
51#define UNICODE_TO_UTF8_LEN(c) \
52 ((c) < 0x0080 ? 1 : ((c) < 0x0800 ? 2 : (((c) & 0xf800) == 0xd800 ? 2 : 3)))
0b4e3aa0
A
53
54#define UCS_ALT_NULL 0x2400
1c79356b 55
765c9de3
A
56/* Surrogate Pair Constants */
57#define SP_HALF_SHIFT 10
58#define SP_HALF_BASE 0x0010000UL
59#define SP_HALF_MASK 0x3FFUL
60
61#define SP_HIGH_FIRST 0xD800UL
62#define SP_HIGH_LAST 0xDBFFUL
63#define SP_LOW_FIRST 0xDC00UL
9bccf70c
A
64#define SP_LOW_LAST 0xDFFFUL
65
1c79356b 66
9bccf70c 67#include "vfs_utfconvdata.h"
765c9de3 68
1c79356b 69
9bccf70c
A
70/*
71 * Test for a combining character.
72 *
73 * Similar to __CFUniCharIsNonBaseCharacter except that
74 * unicode_combinable also includes Hangul Jamo characters.
75 */
76static inline int
77unicode_combinable(u_int16_t character)
78{
79 const u_int8_t *bitmap = __CFUniCharCombiningBitmap;
80 u_int8_t value;
81
82 if (character < 0x0300)
83 return (0);
84
85 value = bitmap[(character >> 8) & 0xFF];
86
87 if (value == 0xFF) {
88 return (1);
89 } else if (value) {
90 bitmap = bitmap + ((value - 1) * 32) + 256;
91 return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
92 }
93 return (0);
94}
95
96/*
97 * Test for a precomposed character.
98 *
99 * Similar to __CFUniCharIsDecomposableCharacter.
100 */
101static inline int
102unicode_decomposeable(u_int16_t character) {
103 const u_int8_t *bitmap = __CFUniCharDecomposableBitmap;
104 u_int8_t value;
105
106 if (character < 0x00C0)
107 return (0);
108
109 value = bitmap[(character >> 8) & 0xFF];
110
111 if (value == 0xFF) {
112 return (1);
113 } else if (value) {
114 bitmap = bitmap + ((value - 1) * 32) + 256;
115 return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
116 }
117 return (0);
118}
119
120static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);
121
122static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);
0b4e3aa0 123
1c79356b 124
765c9de3
A
125char utf_extrabytes[32] = {
126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 2, 2, 3, -1
128};
129
130
1c79356b 131/*
765c9de3 132 * utf8_encodelen - Calculates the UTF-8 encoding length for a Unicode filename
1c79356b
A
133 *
134 * NOTES:
135 * If '/' chars are allowed on disk then an alternate
136 * (replacement) char must be provided in altslash.
137 *
138 * input flags:
765c9de3 139 * UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
1c79356b
A
140 */
141size_t
765c9de3
A
142utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
143 int flags)
1c79356b
A
144{
145 u_int16_t ucs_ch;
146 int charcnt;
147 int swapbytes = (flags & UTF_REVERSE_ENDIAN);
148 size_t len;
149
150 charcnt = ucslen / 2;
151 len = 0;
152
153 while (charcnt-- > 0) {
154 ucs_ch = *ucsp++;
155
156 if (swapbytes)
157 ucs_ch = NXSwapShort(ucs_ch);
0b4e3aa0
A
158 if (ucs_ch == '/')
159 ucs_ch = altslash ? altslash : '_';
160 else if (ucs_ch == '\0')
161 ucs_ch = UCS_ALT_NULL;
1c79356b 162
765c9de3 163 len += UNICODE_TO_UTF8_LEN(ucs_ch);
1c79356b
A
164 }
165
166 return (len);
167}
168
169
170/*
765c9de3 171 * utf8_encodestr - Encodes a Unicode string to UTF-8
1c79356b
A
172 *
173 * NOTES:
0b4e3aa0 174 * The resulting UTF-8 string is NULL terminated.
1c79356b
A
175 *
176 * If '/' chars are allowed on disk then an alternate
177 * (replacement) char must be provided in altslash.
178 *
179 * input flags:
765c9de3 180 * UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
1c79356b 181 * UTF_NO_NULL_TERM: don't add NULL termination to UTF-8 output
0b4e3aa0
A
182 *
183 * result:
184 * ENAMETOOLONG: Name didn't fit; only buflen bytes were encoded
185 * EINVAL: Illegal char found; char was replaced by an '_'.
1c79356b 186 */
765c9de3
A
187int
188utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
189 size_t * utf8len, size_t buflen, u_int16_t altslash, int flags)
1c79356b
A
190{
191 u_int8_t * bufstart;
192 u_int8_t * bufend;
193 u_int16_t ucs_ch;
9bccf70c
A
194 u_int16_t * chp = NULL;
195 u_int16_t sequence[8];
196 int extra = 0;
1c79356b
A
197 int charcnt;
198 int swapbytes = (flags & UTF_REVERSE_ENDIAN);
0b4e3aa0
A
199 int nullterm = ((flags & UTF_NO_NULL_TERM) == 0);
200 int decompose = (flags & UTF_DECOMPOSED);
1c79356b
A
201 int result = 0;
202
203 bufstart = utf8p;
204 bufend = bufstart + buflen;
205 if (nullterm)
206 --bufend;
207 charcnt = ucslen / 2;
208
209 while (charcnt-- > 0) {
9bccf70c
A
210 if (extra > 0) {
211 --extra;
212 ucs_ch = *chp++;
0b4e3aa0
A
213 } else {
214 ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;
9bccf70c
A
215
216 if (decompose && unicode_decomposeable(ucs_ch)) {
217 extra = unicode_decompose(ucs_ch, sequence) - 1;
218 charcnt += extra;
219 ucs_ch = sequence[0];
220 chp = &sequence[1];
221 }
0b4e3aa0 222 }
1c79356b 223
0b4e3aa0
A
224 /* Slash and NULL are not permitted */
225 if (ucs_ch == '/') {
226 if (altslash)
227 ucs_ch = altslash;
228 else {
229 ucs_ch = '_';
230 result = EINVAL;
231 }
232 } else if (ucs_ch == '\0') {
233 ucs_ch = UCS_ALT_NULL;
234 }
1c79356b 235
0b4e3aa0 236 if (ucs_ch < 0x0080) {
1c79356b
A
237 if (utf8p >= bufend) {
238 result = ENAMETOOLONG;
239 break;
765c9de3 240 }
1c79356b
A
241 *utf8p++ = ucs_ch;
242
243 } else if (ucs_ch < 0x800) {
244 if ((utf8p + 1) >= bufend) {
245 result = ENAMETOOLONG;
246 break;
247 }
765c9de3
A
248 *utf8p++ = 0xc0 | (ucs_ch >> 6);
249 *utf8p++ = 0x80 | (0x3f & ucs_ch);
1c79356b
A
250
251 } else {
765c9de3
A
252 /* Combine valid surrogate pairs */
253 if (ucs_ch >= SP_HIGH_FIRST && ucs_ch <= SP_HIGH_LAST
254 && charcnt > 0) {
255 u_int16_t ch2;
256 u_int32_t pair;
257
258 ch2 = swapbytes ? NXSwapShort(*ucsp) : *ucsp;
259 if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
260 pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
261 + (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
262 if ((utf8p + 3) >= bufend) {
263 result = ENAMETOOLONG;
264 break;
265 }
266 --charcnt;
267 ++ucsp;
268 *utf8p++ = 0xf0 | (pair >> 18);
269 *utf8p++ = 0x80 | (0x3f & (pair >> 12));
270 *utf8p++ = 0x80 | (0x3f & (pair >> 6));
271 *utf8p++ = 0x80 | (0x3f & pair);
272 continue;
273 }
274 }
1c79356b
A
275 if ((utf8p + 2) >= bufend) {
276 result = ENAMETOOLONG;
277 break;
278 }
765c9de3
A
279 *utf8p++ = 0xe0 | (ucs_ch >> 12);
280 *utf8p++ = 0x80 | (0x3f & (ucs_ch >> 6));
281 *utf8p++ = 0x80 | (0x3f & ucs_ch);
1c79356b
A
282 }
283 }
284
285 *utf8len = utf8p - bufstart;
286 if (nullterm)
287 *utf8p++ = '\0';
288
289 return (result);
290}
291
292
293/*
765c9de3 294 * utf8_decodestr - Decodes a UTF-8 string back to Unicode
1c79356b
A
295 *
296 * NOTES:
297 * The input UTF-8 string does not need to be null terminated
298 * if utf8len is set.
299 *
300 * If '/' chars are allowed on disk then an alternate
301 * (replacement) char must be provided in altslash.
302 *
303 * input flags:
765c9de3
A
304 * UTF_REV_ENDIAN: Unicode byteorder is oposite current runtime
305 * UTF_DECOMPOSED: Unicode output string must be fully decompsed
0b4e3aa0
A
306 *
307 * result:
308 * ENAMETOOLONG: Name didn't fit; only ucslen chars were decoded.
309 * EINVAL: Illegal UTF-8 sequence found.
1c79356b
A
310 */
311int
765c9de3
A
312utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
313 size_t *ucslen, size_t buflen, u_int16_t altslash, int flags)
1c79356b
A
314{
315 u_int16_t* bufstart;
316 u_int16_t* bufend;
55e303ae
A
317 unsigned int ucs_ch;
318 unsigned int byte;
1c79356b 319 int result = 0;
0b4e3aa0 320 int decompose, precompose, swapbytes;
1c79356b 321
0b4e3aa0
A
322 decompose = (flags & UTF_DECOMPOSED);
323 precompose = (flags & UTF_PRECOMPOSED);
324 swapbytes = (flags & UTF_REVERSE_ENDIAN);
1c79356b
A
325
326 bufstart = ucsp;
327 bufend = (u_int16_t *)((u_int8_t *)ucsp + buflen);
328
329 while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
765c9de3
A
330 if (ucsp >= bufend)
331 goto toolong;
1c79356b
A
332
333 /* check for ascii */
334 if (byte < 0x80) {
55e303ae 335 ucs_ch = byte; /* 1st byte */
1c79356b 336 } else {
765c9de3
A
337 u_int32_t ch;
338 int extrabytes = utf_extrabytes[byte >> 3];
339
340 if (utf8len < extrabytes)
341 goto invalid;
342 utf8len -= extrabytes;
343
344 switch (extrabytes) {
55e303ae
A
345 case 1:
346 ch = byte; ch <<= 6; /* 1st byte */
347 byte = *utf8p++; /* 2nd byte */
348 if ((byte >> 6) != 2)
349 goto invalid;
350 ch += byte;
351 ch -= 0x00003080UL;
352 if (ch < 0x0080)
353 goto invalid;
354 ucs_ch = ch;
765c9de3 355 break;
55e303ae
A
356 case 2:
357 ch = byte; ch <<= 6; /* 1st byte */
358 byte = *utf8p++; /* 2nd byte */
359 if ((byte >> 6) != 2)
360 goto invalid;
361 ch += byte; ch <<= 6;
362 byte = *utf8p++; /* 3rd byte */
363 if ((byte >> 6) != 2)
364 goto invalid;
365 ch += byte;
366 ch -= 0x000E2080UL;
367 if (ch < 0x0800)
368 goto invalid;
369 if (ch >= 0xD800) {
370 if (ch <= 0xDFFF)
765c9de3 371 goto invalid;
55e303ae
A
372 if (ch == 0xFFFE || ch == 0xFFFF)
373 goto invalid;
374 }
375 ucs_ch = ch;
376 break;
377 case 3:
378 ch = byte; ch <<= 6; /* 1st byte */
379 byte = *utf8p++; /* 2nd byte */
380 if ((byte >> 6) != 2)
381 goto invalid;
382 ch += byte; ch <<= 6;
383 byte = *utf8p++; /* 3rd byte */
384 if ((byte >> 6) != 2)
385 goto invalid;
386 ch += byte; ch <<= 6;
387 byte = *utf8p++; /* 4th byte */
388 if ((byte >> 6) != 2)
389 goto invalid;
390 ch += byte;
391 ch -= 0x03C82080UL + SP_HALF_BASE;
392 ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
393 if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
394 goto invalid;
395 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
396 if (ucsp >= bufend)
397 goto toolong;
398 ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
399 if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
400 goto invalid;
401 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
765c9de3 402 continue;
1c79356b 403 default:
55e303ae 404 goto invalid;
1c79356b 405 }
1c79356b 406 if (decompose) {
9bccf70c
A
407 if (unicode_decomposeable(ucs_ch)) {
408 u_int16_t sequence[8];
409 int count, i;
1c79356b 410
9bccf70c 411 count = unicode_decompose(ucs_ch, sequence);
1c79356b 412
9bccf70c
A
413 for (i = 0; i < count; ++i) {
414 ucs_ch = sequence[i];
0b4e3aa0 415 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
765c9de3
A
416 if (ucsp >= bufend)
417 goto toolong;
0b4e3aa0 418 }
9bccf70c 419 continue;
0b4e3aa0
A
420 }
421 } else if (precompose && (ucsp != bufstart)) {
422 u_int16_t composite, base;
423
9bccf70c
A
424 if (unicode_combinable(ucs_ch)) {
425 base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1);
426 composite = unicode_combine(base, ucs_ch);
427 if (composite) {
428 --ucsp;
429 ucs_ch = composite;
430 }
1c79356b
A
431 }
432 }
0b4e3aa0
A
433 if (ucs_ch == UCS_ALT_NULL)
434 ucs_ch = '\0';
1c79356b 435 }
1c79356b
A
436 if (ucs_ch == altslash)
437 ucs_ch = '/';
1c79356b 438
765c9de3 439 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
1c79356b 440 }
765c9de3
A
441
442exit:
1c79356b
A
443 *ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart;
444
445 return (result);
765c9de3
A
446
447invalid:
448 result = EINVAL;
449 goto exit;
450
451toolong:
452 result = ENAMETOOLONG;
453 goto exit;
1c79356b
A
454}
455
456
9bccf70c
A
457 /*
458 * Unicode 3.2 decomposition code (derived from Core Foundation)
459 */
1c79356b 460
9bccf70c
A
461typedef struct {
462 u_int32_t _key;
463 u_int32_t _value;
464} unicode_mappings32;
0b4e3aa0 465
9bccf70c
A
466static inline u_int32_t
467getmappedvalue32(const unicode_mappings32 *theTable, u_int32_t numElem,
468 u_int16_t character)
469{
470 const unicode_mappings32 *p, *q, *divider;
1c79356b 471
9bccf70c
A
472 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
473 return (0);
1c79356b 474
9bccf70c
A
475 p = theTable;
476 q = p + (numElem-1);
477 while (p <= q) {
478 divider = p + ((q - p) >> 1); /* divide by 2 */
479 if (character < divider->_key) { q = divider - 1; }
480 else if (character > divider->_key) { p = divider + 1; }
481 else { return (divider->_value); }
482 }
483 return (0);
484}
1c79356b 485
9bccf70c
A
486#define RECURSIVE_DECOMPOSITION (1 << 15)
487#define EXTRACT_COUNT(value) (((value) >> 12) & 0x0007)
1c79356b 488
9bccf70c
A
489typedef struct {
490 u_int16_t _key;
491 u_int16_t _value;
492} unicode_mappings16;
1c79356b 493
9bccf70c
A
494static inline u_int16_t
495getmappedvalue16(const unicode_mappings16 *theTable, u_int32_t numElem,
496 u_int16_t character)
497{
498 const unicode_mappings16 *p, *q, *divider;
1c79356b 499
9bccf70c
A
500 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
501 return (0);
1c79356b 502
9bccf70c
A
503 p = theTable;
504 q = p + (numElem-1);
505 while (p <= q) {
506 divider = p + ((q - p) >> 1); /* divide by 2 */
507 if (character < divider->_key)
508 q = divider - 1;
509 else if (character > divider->_key)
510 p = divider + 1;
511 else
512 return (divider->_value);
513 }
514 return (0);
515}
516
517
518static u_int32_t
519unicode_recursive_decompose(u_int16_t character, u_int16_t *convertedChars)
520{
521 u_int16_t value;
522 u_int32_t length;
523 u_int16_t firstChar;
524 u_int16_t theChar;
525 const u_int16_t *bmpMappings;
526 u_int32_t usedLength;
527
528 value = getmappedvalue16(
529 (const unicode_mappings16 *)__CFUniCharDecompositionTable,
530 __UniCharDecompositionTableLength, character);
531 length = EXTRACT_COUNT(value);
532 firstChar = value & 0x0FFF;
533 theChar = firstChar;
534 bmpMappings = (length == 1 ? &theChar : __CFUniCharMultipleDecompositionTable + firstChar);
535 usedLength = 0;
536
537 if (value & RECURSIVE_DECOMPOSITION) {
538 usedLength = unicode_recursive_decompose((u_int16_t)*bmpMappings, convertedChars);
539
540 --length; /* Decrement for the first char */
541 if (!usedLength)
542 return 0;
543 ++bmpMappings;
544 convertedChars += usedLength;
545 }
0b4e3aa0 546
9bccf70c 547 usedLength += length;
0b4e3aa0 548
9bccf70c
A
549 while (length--)
550 *(convertedChars++) = *(bmpMappings++);
0b4e3aa0 551
9bccf70c
A
552 return (usedLength);
553}
554
555#define HANGUL_SBASE 0xAC00
556#define HANGUL_LBASE 0x1100
557#define HANGUL_VBASE 0x1161
558#define HANGUL_TBASE 0x11A7
559
560#define HANGUL_SCOUNT 11172
561#define HANGUL_LCOUNT 19
562#define HANGUL_VCOUNT 21
563#define HANGUL_TCOUNT 28
564#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
1c79356b
A
565
566/*
9bccf70c 567 * unicode_decompose - decompose a composed Unicode char
1c79356b
A
568 *
569 * Composed Unicode characters are forbidden on
570 * HFS Plus volumes. ucs_decompose will convert a
571 * composed character into its correct decomposed
572 * sequence.
573 *
9bccf70c 574 * Similar to CFUniCharDecomposeCharacter
1c79356b 575 */
9bccf70c
A
576static int
577unicode_decompose(u_int16_t character, u_int16_t *convertedChars)
1c79356b 578{
9bccf70c
A
579 if ((character >= HANGUL_SBASE) &&
580 (character <= (HANGUL_SBASE + HANGUL_SCOUNT))) {
581 u_int32_t length;
582
583 character -= HANGUL_SBASE;
584 length = (character % HANGUL_TCOUNT ? 3 : 2);
585
586 *(convertedChars++) =
587 character / HANGUL_NCOUNT + HANGUL_LBASE;
588 *(convertedChars++) =
589 (character % HANGUL_NCOUNT) / HANGUL_TCOUNT + HANGUL_VBASE;
590 if (length > 2)
591 *convertedChars = (character % HANGUL_TCOUNT) + HANGUL_TBASE;
592 return (length);
1c79356b 593 } else {
9bccf70c 594 return (unicode_recursive_decompose(character, convertedChars));
1c79356b 595 }
1c79356b
A
596}
597
0b4e3aa0 598/*
9bccf70c 599 * unicode_combine - generate a precomposed Unicode char
0b4e3aa0
A
600 *
601 * Precomposed Unicode characters are required for some volume
9bccf70c
A
602 * formats and network protocols. unicode_combine will combine
603 * a decomposed character sequence into a single precomposed
0b4e3aa0
A
604 * (composite) character.
605 *
9bccf70c
A
606 * Similar toCFUniCharPrecomposeCharacter but unicode_combine
607 * also handles Hangul Jamo characters.
0b4e3aa0
A
608 */
609static u_int16_t
9bccf70c 610unicode_combine(u_int16_t base, u_int16_t combining)
0b4e3aa0 611{
9bccf70c
A
612 u_int32_t value;
613
614 /* Check HANGUL */
615 if ((combining >= HANGUL_VBASE) && (combining < (HANGUL_TBASE + HANGUL_TCOUNT))) {
616 /* 2 char Hangul sequences */
617 if ((combining < (HANGUL_VBASE + HANGUL_VCOUNT)) &&
618 (base >= HANGUL_LBASE && base < (HANGUL_LBASE + HANGUL_LCOUNT))) {
619 return (HANGUL_SBASE +
620 ((base - HANGUL_LBASE)*(HANGUL_VCOUNT*HANGUL_TCOUNT)) +
621 ((combining - HANGUL_VBASE)*HANGUL_TCOUNT));
0b4e3aa0 622 }
9bccf70c
A
623
624 /* 3 char Hangul sequences */
625 if ((combining > HANGUL_TBASE) &&
626 (base >= HANGUL_SBASE && base < (HANGUL_SBASE + HANGUL_SCOUNT))) {
627 if ((base - HANGUL_SBASE) % HANGUL_TCOUNT)
628 return (0);
629 else
630 return (base + (combining - HANGUL_TBASE));
0b4e3aa0 631 }
0b4e3aa0
A
632 }
633
9bccf70c
A
634 value = getmappedvalue32(
635 (const unicode_mappings32 *)__CFUniCharPrecompSourceTable,
636 __CFUniCharPrecompositionTableLength, combining);
0b4e3aa0 637
9bccf70c
A
638 if (value) {
639 value = getmappedvalue16(
640 (const unicode_mappings16 *)
641 ((u_int32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)),
642 (value >> 16), base);
0b4e3aa0 643 }
9bccf70c 644 return (value);
0b4e3aa0
A
645}
646