]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_utfconv.c
xnu-792.6.56.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_utfconv.c
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /*
25 Includes Unicode 3.2 decomposition code derived from Core Foundation
26 */
27
28 #include <sys/param.h>
29 #include <sys/utfconv.h>
30 #include <sys/errno.h>
31 #include <architecture/byte_order.h>
32
33 /*
34 * UTF-8 (Unicode Transformation Format)
35 *
36 * UTF-8 is the Unicode Transformation Format that serializes a Unicode
37 * character as a sequence of one to four bytes. Only the shortest form
38 * required to represent the significant Unicode bits is legal.
39 *
40 * UTF-8 Multibyte Codes
41 *
42 * Bytes Bits Unicode Min Unicode Max UTF-8 Byte Sequence (binary)
43 * -----------------------------------------------------------------------------
44 * 1 7 0x0000 0x007F 0xxxxxxx
45 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
46 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
47 * 4 21 0x10000 0x10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
48 * -----------------------------------------------------------------------------
49 */
50
51
52 #define UNICODE_TO_UTF8_LEN(c) \
53 ((c) < 0x0080 ? 1 : ((c) < 0x0800 ? 2 : (((c) & 0xf800) == 0xd800 ? 2 : 3)))
54
55 #define UCS_ALT_NULL 0x2400
56
57 /* Surrogate Pair Constants */
58 #define SP_HALF_SHIFT 10
59 #define SP_HALF_BASE 0x0010000UL
60 #define SP_HALF_MASK 0x3FFUL
61
62 #define SP_HIGH_FIRST 0xD800UL
63 #define SP_HIGH_LAST 0xDBFFUL
64 #define SP_LOW_FIRST 0xDC00UL
65 #define SP_LOW_LAST 0xDFFFUL
66
67
68 #include "vfs_utfconvdata.h"
69
70
71 /*
72 * Test for a combining character.
73 *
74 * Similar to __CFUniCharIsNonBaseCharacter except that
75 * unicode_combinable also includes Hangul Jamo characters.
76 */
77 static inline int
78 unicode_combinable(u_int16_t character)
79 {
80 const u_int8_t *bitmap = __CFUniCharCombiningBitmap;
81 u_int8_t value;
82
83 if (character < 0x0300)
84 return (0);
85
86 value = bitmap[(character >> 8) & 0xFF];
87
88 if (value == 0xFF) {
89 return (1);
90 } else if (value) {
91 bitmap = bitmap + ((value - 1) * 32) + 256;
92 return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
93 }
94 return (0);
95 }
96
97 /*
98 * Test for a precomposed character.
99 *
100 * Similar to __CFUniCharIsDecomposableCharacter.
101 */
102 static inline int
103 unicode_decomposeable(u_int16_t character) {
104 const u_int8_t *bitmap = __CFUniCharDecomposableBitmap;
105 u_int8_t value;
106
107 if (character < 0x00C0)
108 return (0);
109
110 value = bitmap[(character >> 8) & 0xFF];
111
112 if (value == 0xFF) {
113 return (1);
114 } else if (value) {
115 bitmap = bitmap + ((value - 1) * 32) + 256;
116 return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
117 }
118 return (0);
119 }
120
121 static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);
122
123 static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);
124
125
126 char utf_extrabytes[32] = {
127 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 2, 2, 3, -1
129 };
130
131
132 /*
133 * utf8_encodelen - Calculates the UTF-8 encoding length for a Unicode filename
134 *
135 * NOTES:
136 * If '/' chars are allowed on disk then an alternate
137 * (replacement) char must be provided in altslash.
138 *
139 * input flags:
140 * UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
141 */
142 size_t
143 utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
144 int flags)
145 {
146 u_int16_t ucs_ch;
147 int charcnt;
148 int swapbytes = (flags & UTF_REVERSE_ENDIAN);
149 size_t len;
150
151 charcnt = ucslen / 2;
152 len = 0;
153
154 while (charcnt-- > 0) {
155 ucs_ch = *ucsp++;
156
157 if (swapbytes)
158 ucs_ch = NXSwapShort(ucs_ch);
159 if (ucs_ch == '/')
160 ucs_ch = altslash ? altslash : '_';
161 else if (ucs_ch == '\0')
162 ucs_ch = UCS_ALT_NULL;
163
164 len += UNICODE_TO_UTF8_LEN(ucs_ch);
165 }
166
167 return (len);
168 }
169
170
171 /*
172 * utf8_encodestr - Encodes a Unicode string to UTF-8
173 *
174 * NOTES:
175 * The resulting UTF-8 string is NULL terminated.
176 *
177 * If '/' chars are allowed on disk then an alternate
178 * (replacement) char must be provided in altslash.
179 *
180 * input flags:
181 * UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
182 * UTF_NO_NULL_TERM: don't add NULL termination to UTF-8 output
183 *
184 * result:
185 * ENAMETOOLONG: Name didn't fit; only buflen bytes were encoded
186 * EINVAL: Illegal char found; char was replaced by an '_'.
187 */
188 int
189 utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
190 size_t * utf8len, size_t buflen, u_int16_t altslash, int flags)
191 {
192 u_int8_t * bufstart;
193 u_int8_t * bufend;
194 u_int16_t ucs_ch;
195 u_int16_t * chp = NULL;
196 u_int16_t sequence[8];
197 int extra = 0;
198 int charcnt;
199 int swapbytes = (flags & UTF_REVERSE_ENDIAN);
200 int nullterm = ((flags & UTF_NO_NULL_TERM) == 0);
201 int decompose = (flags & UTF_DECOMPOSED);
202 int result = 0;
203
204 bufstart = utf8p;
205 bufend = bufstart + buflen;
206 if (nullterm)
207 --bufend;
208 charcnt = ucslen / 2;
209
210 while (charcnt-- > 0) {
211 if (extra > 0) {
212 --extra;
213 ucs_ch = *chp++;
214 } else {
215 ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;
216
217 if (decompose && unicode_decomposeable(ucs_ch)) {
218 extra = unicode_decompose(ucs_ch, sequence) - 1;
219 charcnt += extra;
220 ucs_ch = sequence[0];
221 chp = &sequence[1];
222 }
223 }
224
225 /* Slash and NULL are not permitted */
226 if (ucs_ch == '/') {
227 if (altslash)
228 ucs_ch = altslash;
229 else {
230 ucs_ch = '_';
231 result = EINVAL;
232 }
233 } else if (ucs_ch == '\0') {
234 ucs_ch = UCS_ALT_NULL;
235 }
236
237 if (ucs_ch < 0x0080) {
238 if (utf8p >= bufend) {
239 result = ENAMETOOLONG;
240 break;
241 }
242 *utf8p++ = ucs_ch;
243
244 } else if (ucs_ch < 0x800) {
245 if ((utf8p + 1) >= bufend) {
246 result = ENAMETOOLONG;
247 break;
248 }
249 *utf8p++ = 0xc0 | (ucs_ch >> 6);
250 *utf8p++ = 0x80 | (0x3f & ucs_ch);
251
252 } else {
253 /* Combine valid surrogate pairs */
254 if (ucs_ch >= SP_HIGH_FIRST && ucs_ch <= SP_HIGH_LAST
255 && charcnt > 0) {
256 u_int16_t ch2;
257 u_int32_t pair;
258
259 ch2 = swapbytes ? NXSwapShort(*ucsp) : *ucsp;
260 if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
261 pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
262 + (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
263 if ((utf8p + 3) >= bufend) {
264 result = ENAMETOOLONG;
265 break;
266 }
267 --charcnt;
268 ++ucsp;
269 *utf8p++ = 0xf0 | (pair >> 18);
270 *utf8p++ = 0x80 | (0x3f & (pair >> 12));
271 *utf8p++ = 0x80 | (0x3f & (pair >> 6));
272 *utf8p++ = 0x80 | (0x3f & pair);
273 continue;
274 }
275 }
276 if ((utf8p + 2) >= bufend) {
277 result = ENAMETOOLONG;
278 break;
279 }
280 *utf8p++ = 0xe0 | (ucs_ch >> 12);
281 *utf8p++ = 0x80 | (0x3f & (ucs_ch >> 6));
282 *utf8p++ = 0x80 | (0x3f & ucs_ch);
283 }
284 }
285
286 *utf8len = utf8p - bufstart;
287 if (nullterm)
288 *utf8p++ = '\0';
289
290 return (result);
291 }
292
293
294 /*
295 * utf8_decodestr - Decodes a UTF-8 string back to Unicode
296 *
297 * NOTES:
298 * The input UTF-8 string does not need to be null terminated
299 * if utf8len is set.
300 *
301 * If '/' chars are allowed on disk then an alternate
302 * (replacement) char must be provided in altslash.
303 *
304 * input flags:
305 * UTF_REV_ENDIAN: Unicode byteorder is oposite current runtime
306 * UTF_DECOMPOSED: Unicode output string must be fully decompsed
307 *
308 * result:
309 * ENAMETOOLONG: Name didn't fit; only ucslen chars were decoded.
310 * EINVAL: Illegal UTF-8 sequence found.
311 */
312 int
313 utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
314 size_t *ucslen, size_t buflen, u_int16_t altslash, int flags)
315 {
316 u_int16_t* bufstart;
317 u_int16_t* bufend;
318 unsigned int ucs_ch;
319 unsigned int byte;
320 int result = 0;
321 int decompose, precompose, swapbytes;
322
323 decompose = (flags & UTF_DECOMPOSED);
324 precompose = (flags & UTF_PRECOMPOSED);
325 swapbytes = (flags & UTF_REVERSE_ENDIAN);
326
327 bufstart = ucsp;
328 bufend = (u_int16_t *)((u_int8_t *)ucsp + buflen);
329
330 while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
331 if (ucsp >= bufend)
332 goto toolong;
333
334 /* check for ascii */
335 if (byte < 0x80) {
336 ucs_ch = byte; /* 1st byte */
337 } else {
338 u_int32_t ch;
339 int extrabytes = utf_extrabytes[byte >> 3];
340
341 if (utf8len < extrabytes)
342 goto invalid;
343 utf8len -= extrabytes;
344
345 switch (extrabytes) {
346 case 1:
347 ch = byte; ch <<= 6; /* 1st byte */
348 byte = *utf8p++; /* 2nd byte */
349 if ((byte >> 6) != 2)
350 goto invalid;
351 ch += byte;
352 ch -= 0x00003080UL;
353 if (ch < 0x0080)
354 goto invalid;
355 ucs_ch = ch;
356 break;
357 case 2:
358 ch = byte; ch <<= 6; /* 1st byte */
359 byte = *utf8p++; /* 2nd byte */
360 if ((byte >> 6) != 2)
361 goto invalid;
362 ch += byte; ch <<= 6;
363 byte = *utf8p++; /* 3rd byte */
364 if ((byte >> 6) != 2)
365 goto invalid;
366 ch += byte;
367 ch -= 0x000E2080UL;
368 if (ch < 0x0800)
369 goto invalid;
370 if (ch >= 0xD800) {
371 if (ch <= 0xDFFF)
372 goto invalid;
373 if (ch == 0xFFFE || ch == 0xFFFF)
374 goto invalid;
375 }
376 ucs_ch = ch;
377 break;
378 case 3:
379 ch = byte; ch <<= 6; /* 1st byte */
380 byte = *utf8p++; /* 2nd byte */
381 if ((byte >> 6) != 2)
382 goto invalid;
383 ch += byte; ch <<= 6;
384 byte = *utf8p++; /* 3rd byte */
385 if ((byte >> 6) != 2)
386 goto invalid;
387 ch += byte; ch <<= 6;
388 byte = *utf8p++; /* 4th byte */
389 if ((byte >> 6) != 2)
390 goto invalid;
391 ch += byte;
392 ch -= 0x03C82080UL + SP_HALF_BASE;
393 ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
394 if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
395 goto invalid;
396 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
397 if (ucsp >= bufend)
398 goto toolong;
399 ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
400 if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
401 goto invalid;
402 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
403 continue;
404 default:
405 goto invalid;
406 }
407 if (decompose) {
408 if (unicode_decomposeable(ucs_ch)) {
409 u_int16_t sequence[8];
410 int count, i;
411
412 count = unicode_decompose(ucs_ch, sequence);
413
414 for (i = 0; i < count; ++i) {
415 ucs_ch = sequence[i];
416 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
417 if (ucsp >= bufend)
418 goto toolong;
419 }
420 continue;
421 }
422 } else if (precompose && (ucsp != bufstart)) {
423 u_int16_t composite, base;
424
425 if (unicode_combinable(ucs_ch)) {
426 base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1);
427 composite = unicode_combine(base, ucs_ch);
428 if (composite) {
429 --ucsp;
430 ucs_ch = composite;
431 }
432 }
433 }
434 if (ucs_ch == UCS_ALT_NULL)
435 ucs_ch = '\0';
436 }
437 if (ucs_ch == altslash)
438 ucs_ch = '/';
439
440 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
441 }
442
443 exit:
444 *ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart;
445
446 return (result);
447
448 invalid:
449 result = EINVAL;
450 goto exit;
451
452 toolong:
453 result = ENAMETOOLONG;
454 goto exit;
455 }
456
457
458 /*
459 * utf8_validatestr - Check for a valid UTF-8 string.
460 */
461 int
462 utf8_validatestr(const u_int8_t* utf8p, size_t utf8len)
463 {
464 unsigned int byte;
465 u_int32_t ch;
466 unsigned int ucs_ch;
467 size_t extrabytes;
468
469 while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
470 if (byte < 0x80)
471 continue; /* plain ascii */
472
473 extrabytes = utf_extrabytes[byte >> 3];
474
475 if (utf8len < extrabytes)
476 goto invalid;
477 utf8len -= extrabytes;
478
479 switch (extrabytes) {
480 case 1:
481 ch = byte; ch <<= 6; /* 1st byte */
482 byte = *utf8p++; /* 2nd byte */
483 if ((byte >> 6) != 2)
484 goto invalid;
485 ch += byte;
486 ch -= 0x00003080UL;
487 if (ch < 0x0080)
488 goto invalid;
489 break;
490 case 2:
491 ch = byte; ch <<= 6; /* 1st byte */
492 byte = *utf8p++; /* 2nd byte */
493 if ((byte >> 6) != 2)
494 goto invalid;
495 ch += byte; ch <<= 6;
496 byte = *utf8p++; /* 3rd byte */
497 if ((byte >> 6) != 2)
498 goto invalid;
499 ch += byte;
500 ch -= 0x000E2080UL;
501 if (ch < 0x0800)
502 goto invalid;
503 if (ch >= 0xD800) {
504 if (ch <= 0xDFFF)
505 goto invalid;
506 if (ch == 0xFFFE || ch == 0xFFFF)
507 goto invalid;
508 }
509 break;
510 case 3:
511 ch = byte; ch <<= 6; /* 1st byte */
512 byte = *utf8p++; /* 2nd byte */
513 if ((byte >> 6) != 2)
514 goto invalid;
515 ch += byte; ch <<= 6;
516 byte = *utf8p++; /* 3rd byte */
517 if ((byte >> 6) != 2)
518 goto invalid;
519 ch += byte; ch <<= 6;
520 byte = *utf8p++; /* 4th byte */
521 if ((byte >> 6) != 2)
522 goto invalid;
523 ch += byte;
524 ch -= 0x03C82080UL + SP_HALF_BASE;
525 ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
526 if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
527 goto invalid;
528 ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
529 if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
530 goto invalid;
531 break;
532 default:
533 goto invalid;
534 }
535
536 }
537 return (0);
538 invalid:
539 return (EINVAL);
540 }
541
542
543 /*
544 * Unicode 3.2 decomposition code (derived from Core Foundation)
545 */
546
547 typedef struct {
548 u_int32_t _key;
549 u_int32_t _value;
550 } unicode_mappings32;
551
552 static inline u_int32_t
553 getmappedvalue32(const unicode_mappings32 *theTable, u_int32_t numElem,
554 u_int16_t character)
555 {
556 const unicode_mappings32 *p, *q, *divider;
557
558 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
559 return (0);
560
561 p = theTable;
562 q = p + (numElem-1);
563 while (p <= q) {
564 divider = p + ((q - p) >> 1); /* divide by 2 */
565 if (character < divider->_key) { q = divider - 1; }
566 else if (character > divider->_key) { p = divider + 1; }
567 else { return (divider->_value); }
568 }
569 return (0);
570 }
571
572 #define RECURSIVE_DECOMPOSITION (1 << 15)
573 #define EXTRACT_COUNT(value) (((value) >> 12) & 0x0007)
574
575 typedef struct {
576 u_int16_t _key;
577 u_int16_t _value;
578 } unicode_mappings16;
579
580 static inline u_int16_t
581 getmappedvalue16(const unicode_mappings16 *theTable, u_int32_t numElem,
582 u_int16_t character)
583 {
584 const unicode_mappings16 *p, *q, *divider;
585
586 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
587 return (0);
588
589 p = theTable;
590 q = p + (numElem-1);
591 while (p <= q) {
592 divider = p + ((q - p) >> 1); /* divide by 2 */
593 if (character < divider->_key)
594 q = divider - 1;
595 else if (character > divider->_key)
596 p = divider + 1;
597 else
598 return (divider->_value);
599 }
600 return (0);
601 }
602
603
604 static u_int32_t
605 unicode_recursive_decompose(u_int16_t character, u_int16_t *convertedChars)
606 {
607 u_int16_t value;
608 u_int32_t length;
609 u_int16_t firstChar;
610 u_int16_t theChar;
611 const u_int16_t *bmpMappings;
612 u_int32_t usedLength;
613
614 value = getmappedvalue16(
615 (const unicode_mappings16 *)__CFUniCharDecompositionTable,
616 __UniCharDecompositionTableLength, character);
617 length = EXTRACT_COUNT(value);
618 firstChar = value & 0x0FFF;
619 theChar = firstChar;
620 bmpMappings = (length == 1 ? &theChar : __CFUniCharMultipleDecompositionTable + firstChar);
621 usedLength = 0;
622
623 if (value & RECURSIVE_DECOMPOSITION) {
624 usedLength = unicode_recursive_decompose((u_int16_t)*bmpMappings, convertedChars);
625
626 --length; /* Decrement for the first char */
627 if (!usedLength)
628 return 0;
629 ++bmpMappings;
630 convertedChars += usedLength;
631 }
632
633 usedLength += length;
634
635 while (length--)
636 *(convertedChars++) = *(bmpMappings++);
637
638 return (usedLength);
639 }
640
641 #define HANGUL_SBASE 0xAC00
642 #define HANGUL_LBASE 0x1100
643 #define HANGUL_VBASE 0x1161
644 #define HANGUL_TBASE 0x11A7
645
646 #define HANGUL_SCOUNT 11172
647 #define HANGUL_LCOUNT 19
648 #define HANGUL_VCOUNT 21
649 #define HANGUL_TCOUNT 28
650 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
651
652 /*
653 * unicode_decompose - decompose a composed Unicode char
654 *
655 * Composed Unicode characters are forbidden on
656 * HFS Plus volumes. ucs_decompose will convert a
657 * composed character into its correct decomposed
658 * sequence.
659 *
660 * Similar to CFUniCharDecomposeCharacter
661 */
662 static int
663 unicode_decompose(u_int16_t character, u_int16_t *convertedChars)
664 {
665 if ((character >= HANGUL_SBASE) &&
666 (character <= (HANGUL_SBASE + HANGUL_SCOUNT))) {
667 u_int32_t length;
668
669 character -= HANGUL_SBASE;
670 length = (character % HANGUL_TCOUNT ? 3 : 2);
671
672 *(convertedChars++) =
673 character / HANGUL_NCOUNT + HANGUL_LBASE;
674 *(convertedChars++) =
675 (character % HANGUL_NCOUNT) / HANGUL_TCOUNT + HANGUL_VBASE;
676 if (length > 2)
677 *convertedChars = (character % HANGUL_TCOUNT) + HANGUL_TBASE;
678 return (length);
679 } else {
680 return (unicode_recursive_decompose(character, convertedChars));
681 }
682 }
683
684 /*
685 * unicode_combine - generate a precomposed Unicode char
686 *
687 * Precomposed Unicode characters are required for some volume
688 * formats and network protocols. unicode_combine will combine
689 * a decomposed character sequence into a single precomposed
690 * (composite) character.
691 *
692 * Similar toCFUniCharPrecomposeCharacter but unicode_combine
693 * also handles Hangul Jamo characters.
694 */
695 static u_int16_t
696 unicode_combine(u_int16_t base, u_int16_t combining)
697 {
698 u_int32_t value;
699
700 /* Check HANGUL */
701 if ((combining >= HANGUL_VBASE) && (combining < (HANGUL_TBASE + HANGUL_TCOUNT))) {
702 /* 2 char Hangul sequences */
703 if ((combining < (HANGUL_VBASE + HANGUL_VCOUNT)) &&
704 (base >= HANGUL_LBASE && base < (HANGUL_LBASE + HANGUL_LCOUNT))) {
705 return (HANGUL_SBASE +
706 ((base - HANGUL_LBASE)*(HANGUL_VCOUNT*HANGUL_TCOUNT)) +
707 ((combining - HANGUL_VBASE)*HANGUL_TCOUNT));
708 }
709
710 /* 3 char Hangul sequences */
711 if ((combining > HANGUL_TBASE) &&
712 (base >= HANGUL_SBASE && base < (HANGUL_SBASE + HANGUL_SCOUNT))) {
713 if ((base - HANGUL_SBASE) % HANGUL_TCOUNT)
714 return (0);
715 else
716 return (base + (combining - HANGUL_TBASE));
717 }
718 }
719
720 value = getmappedvalue32(
721 (const unicode_mappings32 *)__CFUniCharPrecompSourceTable,
722 __CFUniCharPrecompositionTableLength, combining);
723
724 if (value) {
725 value = getmappedvalue16(
726 (const unicode_mappings16 *)
727 ((u_int32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)),
728 (value >> 16), base);
729 }
730 return (value);
731 }
732