]> git.saurik.com Git - apple/xnu.git/blob - bsd/vfs/vfs_utfconv.c
xnu-792.2.4.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_utfconv.c
1 /*
2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23 /*
24 Includes Unicode 3.2 decomposition code derived from Core Foundation
25 */
26
27 #include <sys/param.h>
28 #include <sys/utfconv.h>
29 #include <sys/errno.h>
30 #include <architecture/byte_order.h>
31
32 /*
33 * UTF-8 (Unicode Transformation Format)
34 *
35 * UTF-8 is the Unicode Transformation Format that serializes a Unicode
36 * character as a sequence of one to four bytes. Only the shortest form
37 * required to represent the significant Unicode bits is legal.
38 *
39 * UTF-8 Multibyte Codes
40 *
41 * Bytes Bits Unicode Min Unicode Max UTF-8 Byte Sequence (binary)
42 * -----------------------------------------------------------------------------
43 * 1 7 0x0000 0x007F 0xxxxxxx
44 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
45 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
46 * 4 21 0x10000 0x10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
47 * -----------------------------------------------------------------------------
48 */
49
50
51 #define UNICODE_TO_UTF8_LEN(c) \
52 ((c) < 0x0080 ? 1 : ((c) < 0x0800 ? 2 : (((c) & 0xf800) == 0xd800 ? 2 : 3)))
53
54 #define UCS_ALT_NULL 0x2400
55
56 /* Surrogate Pair Constants */
57 #define SP_HALF_SHIFT 10
58 #define SP_HALF_BASE 0x0010000UL
59 #define SP_HALF_MASK 0x3FFUL
60
61 #define SP_HIGH_FIRST 0xD800UL
62 #define SP_HIGH_LAST 0xDBFFUL
63 #define SP_LOW_FIRST 0xDC00UL
64 #define SP_LOW_LAST 0xDFFFUL
65
66
67 #include "vfs_utfconvdata.h"
68
69
70 /*
71 * Test for a combining character.
72 *
73 * Similar to __CFUniCharIsNonBaseCharacter except that
74 * unicode_combinable also includes Hangul Jamo characters.
75 */
76 static inline int
77 unicode_combinable(u_int16_t character)
78 {
79 const u_int8_t *bitmap = __CFUniCharCombiningBitmap;
80 u_int8_t value;
81
82 if (character < 0x0300)
83 return (0);
84
85 value = bitmap[(character >> 8) & 0xFF];
86
87 if (value == 0xFF) {
88 return (1);
89 } else if (value) {
90 bitmap = bitmap + ((value - 1) * 32) + 256;
91 return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
92 }
93 return (0);
94 }
95
96 /*
97 * Test for a precomposed character.
98 *
99 * Similar to __CFUniCharIsDecomposableCharacter.
100 */
101 static inline int
102 unicode_decomposeable(u_int16_t character) {
103 const u_int8_t *bitmap = __CFUniCharDecomposableBitmap;
104 u_int8_t value;
105
106 if (character < 0x00C0)
107 return (0);
108
109 value = bitmap[(character >> 8) & 0xFF];
110
111 if (value == 0xFF) {
112 return (1);
113 } else if (value) {
114 bitmap = bitmap + ((value - 1) * 32) + 256;
115 return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
116 }
117 return (0);
118 }
119
120 static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);
121
122 static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);
123
124
125 char utf_extrabytes[32] = {
126 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 2, 2, 3, -1
128 };
129
130
131 /*
132 * utf8_encodelen - Calculates the UTF-8 encoding length for a Unicode filename
133 *
134 * NOTES:
135 * If '/' chars are allowed on disk then an alternate
136 * (replacement) char must be provided in altslash.
137 *
138 * input flags:
139 * UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
140 */
141 size_t
142 utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
143 int flags)
144 {
145 u_int16_t ucs_ch;
146 int charcnt;
147 int swapbytes = (flags & UTF_REVERSE_ENDIAN);
148 size_t len;
149
150 charcnt = ucslen / 2;
151 len = 0;
152
153 while (charcnt-- > 0) {
154 ucs_ch = *ucsp++;
155
156 if (swapbytes)
157 ucs_ch = NXSwapShort(ucs_ch);
158 if (ucs_ch == '/')
159 ucs_ch = altslash ? altslash : '_';
160 else if (ucs_ch == '\0')
161 ucs_ch = UCS_ALT_NULL;
162
163 len += UNICODE_TO_UTF8_LEN(ucs_ch);
164 }
165
166 return (len);
167 }
168
169
170 /*
171 * utf8_encodestr - Encodes a Unicode string to UTF-8
172 *
173 * NOTES:
174 * The resulting UTF-8 string is NULL terminated.
175 *
176 * If '/' chars are allowed on disk then an alternate
177 * (replacement) char must be provided in altslash.
178 *
179 * input flags:
180 * UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
181 * UTF_NO_NULL_TERM: don't add NULL termination to UTF-8 output
182 *
183 * result:
184 * ENAMETOOLONG: Name didn't fit; only buflen bytes were encoded
185 * EINVAL: Illegal char found; char was replaced by an '_'.
186 */
187 int
188 utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
189 size_t * utf8len, size_t buflen, u_int16_t altslash, int flags)
190 {
191 u_int8_t * bufstart;
192 u_int8_t * bufend;
193 u_int16_t ucs_ch;
194 u_int16_t * chp = NULL;
195 u_int16_t sequence[8];
196 int extra = 0;
197 int charcnt;
198 int swapbytes = (flags & UTF_REVERSE_ENDIAN);
199 int nullterm = ((flags & UTF_NO_NULL_TERM) == 0);
200 int decompose = (flags & UTF_DECOMPOSED);
201 int result = 0;
202
203 bufstart = utf8p;
204 bufend = bufstart + buflen;
205 if (nullterm)
206 --bufend;
207 charcnt = ucslen / 2;
208
209 while (charcnt-- > 0) {
210 if (extra > 0) {
211 --extra;
212 ucs_ch = *chp++;
213 } else {
214 ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;
215
216 if (decompose && unicode_decomposeable(ucs_ch)) {
217 extra = unicode_decompose(ucs_ch, sequence) - 1;
218 charcnt += extra;
219 ucs_ch = sequence[0];
220 chp = &sequence[1];
221 }
222 }
223
224 /* Slash and NULL are not permitted */
225 if (ucs_ch == '/') {
226 if (altslash)
227 ucs_ch = altslash;
228 else {
229 ucs_ch = '_';
230 result = EINVAL;
231 }
232 } else if (ucs_ch == '\0') {
233 ucs_ch = UCS_ALT_NULL;
234 }
235
236 if (ucs_ch < 0x0080) {
237 if (utf8p >= bufend) {
238 result = ENAMETOOLONG;
239 break;
240 }
241 *utf8p++ = ucs_ch;
242
243 } else if (ucs_ch < 0x800) {
244 if ((utf8p + 1) >= bufend) {
245 result = ENAMETOOLONG;
246 break;
247 }
248 *utf8p++ = 0xc0 | (ucs_ch >> 6);
249 *utf8p++ = 0x80 | (0x3f & ucs_ch);
250
251 } else {
252 /* Combine valid surrogate pairs */
253 if (ucs_ch >= SP_HIGH_FIRST && ucs_ch <= SP_HIGH_LAST
254 && charcnt > 0) {
255 u_int16_t ch2;
256 u_int32_t pair;
257
258 ch2 = swapbytes ? NXSwapShort(*ucsp) : *ucsp;
259 if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
260 pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
261 + (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
262 if ((utf8p + 3) >= bufend) {
263 result = ENAMETOOLONG;
264 break;
265 }
266 --charcnt;
267 ++ucsp;
268 *utf8p++ = 0xf0 | (pair >> 18);
269 *utf8p++ = 0x80 | (0x3f & (pair >> 12));
270 *utf8p++ = 0x80 | (0x3f & (pair >> 6));
271 *utf8p++ = 0x80 | (0x3f & pair);
272 continue;
273 }
274 }
275 if ((utf8p + 2) >= bufend) {
276 result = ENAMETOOLONG;
277 break;
278 }
279 *utf8p++ = 0xe0 | (ucs_ch >> 12);
280 *utf8p++ = 0x80 | (0x3f & (ucs_ch >> 6));
281 *utf8p++ = 0x80 | (0x3f & ucs_ch);
282 }
283 }
284
285 *utf8len = utf8p - bufstart;
286 if (nullterm)
287 *utf8p++ = '\0';
288
289 return (result);
290 }
291
292
293 /*
294 * utf8_decodestr - Decodes a UTF-8 string back to Unicode
295 *
296 * NOTES:
297 * The input UTF-8 string does not need to be null terminated
298 * if utf8len is set.
299 *
300 * If '/' chars are allowed on disk then an alternate
301 * (replacement) char must be provided in altslash.
302 *
303 * input flags:
304 * UTF_REV_ENDIAN: Unicode byteorder is oposite current runtime
305 * UTF_DECOMPOSED: Unicode output string must be fully decompsed
306 *
307 * result:
308 * ENAMETOOLONG: Name didn't fit; only ucslen chars were decoded.
309 * EINVAL: Illegal UTF-8 sequence found.
310 */
311 int
312 utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
313 size_t *ucslen, size_t buflen, u_int16_t altslash, int flags)
314 {
315 u_int16_t* bufstart;
316 u_int16_t* bufend;
317 unsigned int ucs_ch;
318 unsigned int byte;
319 int result = 0;
320 int decompose, precompose, swapbytes;
321
322 decompose = (flags & UTF_DECOMPOSED);
323 precompose = (flags & UTF_PRECOMPOSED);
324 swapbytes = (flags & UTF_REVERSE_ENDIAN);
325
326 bufstart = ucsp;
327 bufend = (u_int16_t *)((u_int8_t *)ucsp + buflen);
328
329 while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
330 if (ucsp >= bufend)
331 goto toolong;
332
333 /* check for ascii */
334 if (byte < 0x80) {
335 ucs_ch = byte; /* 1st byte */
336 } else {
337 u_int32_t ch;
338 int extrabytes = utf_extrabytes[byte >> 3];
339
340 if (utf8len < extrabytes)
341 goto invalid;
342 utf8len -= extrabytes;
343
344 switch (extrabytes) {
345 case 1:
346 ch = byte; ch <<= 6; /* 1st byte */
347 byte = *utf8p++; /* 2nd byte */
348 if ((byte >> 6) != 2)
349 goto invalid;
350 ch += byte;
351 ch -= 0x00003080UL;
352 if (ch < 0x0080)
353 goto invalid;
354 ucs_ch = ch;
355 break;
356 case 2:
357 ch = byte; ch <<= 6; /* 1st byte */
358 byte = *utf8p++; /* 2nd byte */
359 if ((byte >> 6) != 2)
360 goto invalid;
361 ch += byte; ch <<= 6;
362 byte = *utf8p++; /* 3rd byte */
363 if ((byte >> 6) != 2)
364 goto invalid;
365 ch += byte;
366 ch -= 0x000E2080UL;
367 if (ch < 0x0800)
368 goto invalid;
369 if (ch >= 0xD800) {
370 if (ch <= 0xDFFF)
371 goto invalid;
372 if (ch == 0xFFFE || ch == 0xFFFF)
373 goto invalid;
374 }
375 ucs_ch = ch;
376 break;
377 case 3:
378 ch = byte; ch <<= 6; /* 1st byte */
379 byte = *utf8p++; /* 2nd byte */
380 if ((byte >> 6) != 2)
381 goto invalid;
382 ch += byte; ch <<= 6;
383 byte = *utf8p++; /* 3rd byte */
384 if ((byte >> 6) != 2)
385 goto invalid;
386 ch += byte; ch <<= 6;
387 byte = *utf8p++; /* 4th byte */
388 if ((byte >> 6) != 2)
389 goto invalid;
390 ch += byte;
391 ch -= 0x03C82080UL + SP_HALF_BASE;
392 ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
393 if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
394 goto invalid;
395 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
396 if (ucsp >= bufend)
397 goto toolong;
398 ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
399 if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
400 goto invalid;
401 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
402 continue;
403 default:
404 goto invalid;
405 }
406 if (decompose) {
407 if (unicode_decomposeable(ucs_ch)) {
408 u_int16_t sequence[8];
409 int count, i;
410
411 count = unicode_decompose(ucs_ch, sequence);
412
413 for (i = 0; i < count; ++i) {
414 ucs_ch = sequence[i];
415 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
416 if (ucsp >= bufend)
417 goto toolong;
418 }
419 continue;
420 }
421 } else if (precompose && (ucsp != bufstart)) {
422 u_int16_t composite, base;
423
424 if (unicode_combinable(ucs_ch)) {
425 base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1);
426 composite = unicode_combine(base, ucs_ch);
427 if (composite) {
428 --ucsp;
429 ucs_ch = composite;
430 }
431 }
432 }
433 if (ucs_ch == UCS_ALT_NULL)
434 ucs_ch = '\0';
435 }
436 if (ucs_ch == altslash)
437 ucs_ch = '/';
438
439 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
440 }
441
442 exit:
443 *ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart;
444
445 return (result);
446
447 invalid:
448 result = EINVAL;
449 goto exit;
450
451 toolong:
452 result = ENAMETOOLONG;
453 goto exit;
454 }
455
456
457 /*
458 * utf8_validatestr - Check for a valid UTF-8 string.
459 */
460 int
461 utf8_validatestr(const u_int8_t* utf8p, size_t utf8len)
462 {
463 unsigned int byte;
464 u_int32_t ch;
465 unsigned int ucs_ch;
466 size_t extrabytes;
467
468 while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
469 if (byte < 0x80)
470 continue; /* plain ascii */
471
472 extrabytes = utf_extrabytes[byte >> 3];
473
474 if (utf8len < extrabytes)
475 goto invalid;
476 utf8len -= extrabytes;
477
478 switch (extrabytes) {
479 case 1:
480 ch = byte; ch <<= 6; /* 1st byte */
481 byte = *utf8p++; /* 2nd byte */
482 if ((byte >> 6) != 2)
483 goto invalid;
484 ch += byte;
485 ch -= 0x00003080UL;
486 if (ch < 0x0080)
487 goto invalid;
488 break;
489 case 2:
490 ch = byte; ch <<= 6; /* 1st byte */
491 byte = *utf8p++; /* 2nd byte */
492 if ((byte >> 6) != 2)
493 goto invalid;
494 ch += byte; ch <<= 6;
495 byte = *utf8p++; /* 3rd byte */
496 if ((byte >> 6) != 2)
497 goto invalid;
498 ch += byte;
499 ch -= 0x000E2080UL;
500 if (ch < 0x0800)
501 goto invalid;
502 if (ch >= 0xD800) {
503 if (ch <= 0xDFFF)
504 goto invalid;
505 if (ch == 0xFFFE || ch == 0xFFFF)
506 goto invalid;
507 }
508 break;
509 case 3:
510 ch = byte; ch <<= 6; /* 1st byte */
511 byte = *utf8p++; /* 2nd byte */
512 if ((byte >> 6) != 2)
513 goto invalid;
514 ch += byte; ch <<= 6;
515 byte = *utf8p++; /* 3rd byte */
516 if ((byte >> 6) != 2)
517 goto invalid;
518 ch += byte; ch <<= 6;
519 byte = *utf8p++; /* 4th byte */
520 if ((byte >> 6) != 2)
521 goto invalid;
522 ch += byte;
523 ch -= 0x03C82080UL + SP_HALF_BASE;
524 ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
525 if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
526 goto invalid;
527 ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
528 if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
529 goto invalid;
530 break;
531 default:
532 goto invalid;
533 }
534
535 }
536 return (0);
537 invalid:
538 return (EINVAL);
539 }
540
541
542 /*
543 * Unicode 3.2 decomposition code (derived from Core Foundation)
544 */
545
546 typedef struct {
547 u_int32_t _key;
548 u_int32_t _value;
549 } unicode_mappings32;
550
551 static inline u_int32_t
552 getmappedvalue32(const unicode_mappings32 *theTable, u_int32_t numElem,
553 u_int16_t character)
554 {
555 const unicode_mappings32 *p, *q, *divider;
556
557 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
558 return (0);
559
560 p = theTable;
561 q = p + (numElem-1);
562 while (p <= q) {
563 divider = p + ((q - p) >> 1); /* divide by 2 */
564 if (character < divider->_key) { q = divider - 1; }
565 else if (character > divider->_key) { p = divider + 1; }
566 else { return (divider->_value); }
567 }
568 return (0);
569 }
570
571 #define RECURSIVE_DECOMPOSITION (1 << 15)
572 #define EXTRACT_COUNT(value) (((value) >> 12) & 0x0007)
573
574 typedef struct {
575 u_int16_t _key;
576 u_int16_t _value;
577 } unicode_mappings16;
578
579 static inline u_int16_t
580 getmappedvalue16(const unicode_mappings16 *theTable, u_int32_t numElem,
581 u_int16_t character)
582 {
583 const unicode_mappings16 *p, *q, *divider;
584
585 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
586 return (0);
587
588 p = theTable;
589 q = p + (numElem-1);
590 while (p <= q) {
591 divider = p + ((q - p) >> 1); /* divide by 2 */
592 if (character < divider->_key)
593 q = divider - 1;
594 else if (character > divider->_key)
595 p = divider + 1;
596 else
597 return (divider->_value);
598 }
599 return (0);
600 }
601
602
603 static u_int32_t
604 unicode_recursive_decompose(u_int16_t character, u_int16_t *convertedChars)
605 {
606 u_int16_t value;
607 u_int32_t length;
608 u_int16_t firstChar;
609 u_int16_t theChar;
610 const u_int16_t *bmpMappings;
611 u_int32_t usedLength;
612
613 value = getmappedvalue16(
614 (const unicode_mappings16 *)__CFUniCharDecompositionTable,
615 __UniCharDecompositionTableLength, character);
616 length = EXTRACT_COUNT(value);
617 firstChar = value & 0x0FFF;
618 theChar = firstChar;
619 bmpMappings = (length == 1 ? &theChar : __CFUniCharMultipleDecompositionTable + firstChar);
620 usedLength = 0;
621
622 if (value & RECURSIVE_DECOMPOSITION) {
623 usedLength = unicode_recursive_decompose((u_int16_t)*bmpMappings, convertedChars);
624
625 --length; /* Decrement for the first char */
626 if (!usedLength)
627 return 0;
628 ++bmpMappings;
629 convertedChars += usedLength;
630 }
631
632 usedLength += length;
633
634 while (length--)
635 *(convertedChars++) = *(bmpMappings++);
636
637 return (usedLength);
638 }
639
640 #define HANGUL_SBASE 0xAC00
641 #define HANGUL_LBASE 0x1100
642 #define HANGUL_VBASE 0x1161
643 #define HANGUL_TBASE 0x11A7
644
645 #define HANGUL_SCOUNT 11172
646 #define HANGUL_LCOUNT 19
647 #define HANGUL_VCOUNT 21
648 #define HANGUL_TCOUNT 28
649 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
650
651 /*
652 * unicode_decompose - decompose a composed Unicode char
653 *
654 * Composed Unicode characters are forbidden on
655 * HFS Plus volumes. ucs_decompose will convert a
656 * composed character into its correct decomposed
657 * sequence.
658 *
659 * Similar to CFUniCharDecomposeCharacter
660 */
661 static int
662 unicode_decompose(u_int16_t character, u_int16_t *convertedChars)
663 {
664 if ((character >= HANGUL_SBASE) &&
665 (character <= (HANGUL_SBASE + HANGUL_SCOUNT))) {
666 u_int32_t length;
667
668 character -= HANGUL_SBASE;
669 length = (character % HANGUL_TCOUNT ? 3 : 2);
670
671 *(convertedChars++) =
672 character / HANGUL_NCOUNT + HANGUL_LBASE;
673 *(convertedChars++) =
674 (character % HANGUL_NCOUNT) / HANGUL_TCOUNT + HANGUL_VBASE;
675 if (length > 2)
676 *convertedChars = (character % HANGUL_TCOUNT) + HANGUL_TBASE;
677 return (length);
678 } else {
679 return (unicode_recursive_decompose(character, convertedChars));
680 }
681 }
682
683 /*
684 * unicode_combine - generate a precomposed Unicode char
685 *
686 * Precomposed Unicode characters are required for some volume
687 * formats and network protocols. unicode_combine will combine
688 * a decomposed character sequence into a single precomposed
689 * (composite) character.
690 *
691 * Similar toCFUniCharPrecomposeCharacter but unicode_combine
692 * also handles Hangul Jamo characters.
693 */
694 static u_int16_t
695 unicode_combine(u_int16_t base, u_int16_t combining)
696 {
697 u_int32_t value;
698
699 /* Check HANGUL */
700 if ((combining >= HANGUL_VBASE) && (combining < (HANGUL_TBASE + HANGUL_TCOUNT))) {
701 /* 2 char Hangul sequences */
702 if ((combining < (HANGUL_VBASE + HANGUL_VCOUNT)) &&
703 (base >= HANGUL_LBASE && base < (HANGUL_LBASE + HANGUL_LCOUNT))) {
704 return (HANGUL_SBASE +
705 ((base - HANGUL_LBASE)*(HANGUL_VCOUNT*HANGUL_TCOUNT)) +
706 ((combining - HANGUL_VBASE)*HANGUL_TCOUNT));
707 }
708
709 /* 3 char Hangul sequences */
710 if ((combining > HANGUL_TBASE) &&
711 (base >= HANGUL_SBASE && base < (HANGUL_SBASE + HANGUL_SCOUNT))) {
712 if ((base - HANGUL_SBASE) % HANGUL_TCOUNT)
713 return (0);
714 else
715 return (base + (combining - HANGUL_TBASE));
716 }
717 }
718
719 value = getmappedvalue32(
720 (const unicode_mappings32 *)__CFUniCharPrecompSourceTable,
721 __CFUniCharPrecompositionTableLength, combining);
722
723 if (value) {
724 value = getmappedvalue16(
725 (const unicode_mappings16 *)
726 ((u_int32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)),
727 (value >> 16), base);
728 }
729 return (value);
730 }
731