]> git.saurik.com Git - apple/xnu.git/blame - bsd/vfs/vfs_utfconv.c
xnu-517.3.15.tar.gz
[apple/xnu.git] / bsd / vfs / vfs_utfconv.c
CommitLineData
1c79356b 1/*
9bccf70c 2 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37 6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b 7 *
43866e37
A
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
9bccf70c
A
25
26 /*
27 Includes Unicode 3.2 decomposition code derived from Core Foundation
28 */
29
1c79356b
A
30#include <sys/param.h>
31#include <sys/utfconv.h>
32#include <sys/errno.h>
33#include <architecture/byte_order.h>
34
1c79356b 35/*
765c9de3 36 * UTF-8 (Unicode Transformation Format)
1c79356b 37 *
765c9de3
A
38 * UTF-8 is the Unicode Transformation Format that serializes a Unicode
39 * character as a sequence of one to four bytes. Only the shortest form
40 * required to represent the significant Unicode bits is legal.
1c79356b
A
41 *
42 * UTF-8 Multibyte Codes
43 *
765c9de3
A
44 * Bytes Bits Unicode Min Unicode Max UTF-8 Byte Sequence (binary)
45 * -----------------------------------------------------------------------------
46 * 1 7 0x0000 0x007F 0xxxxxxx
47 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
48 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
49 * 4 21 0x10000 0x10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
50 * -----------------------------------------------------------------------------
1c79356b
A
51 */
52
53
765c9de3
A
54#define UNICODE_TO_UTF8_LEN(c) \
55 ((c) < 0x0080 ? 1 : ((c) < 0x0800 ? 2 : (((c) & 0xf800) == 0xd800 ? 2 : 3)))
0b4e3aa0
A
56
57#define UCS_ALT_NULL 0x2400
1c79356b 58
765c9de3
A
59/* Surrogate Pair Constants */
60#define SP_HALF_SHIFT 10
61#define SP_HALF_BASE 0x0010000UL
62#define SP_HALF_MASK 0x3FFUL
63
64#define SP_HIGH_FIRST 0xD800UL
65#define SP_HIGH_LAST 0xDBFFUL
66#define SP_LOW_FIRST 0xDC00UL
9bccf70c
A
67#define SP_LOW_LAST 0xDFFFUL
68
1c79356b 69
9bccf70c 70#include "vfs_utfconvdata.h"
765c9de3 71
1c79356b 72
9bccf70c
A
73/*
74 * Test for a combining character.
75 *
76 * Similar to __CFUniCharIsNonBaseCharacter except that
77 * unicode_combinable also includes Hangul Jamo characters.
78 */
79static inline int
80unicode_combinable(u_int16_t character)
81{
82 const u_int8_t *bitmap = __CFUniCharCombiningBitmap;
83 u_int8_t value;
84
85 if (character < 0x0300)
86 return (0);
87
88 value = bitmap[(character >> 8) & 0xFF];
89
90 if (value == 0xFF) {
91 return (1);
92 } else if (value) {
93 bitmap = bitmap + ((value - 1) * 32) + 256;
94 return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
95 }
96 return (0);
97}
98
99/*
100 * Test for a precomposed character.
101 *
102 * Similar to __CFUniCharIsDecomposableCharacter.
103 */
104static inline int
105unicode_decomposeable(u_int16_t character) {
106 const u_int8_t *bitmap = __CFUniCharDecomposableBitmap;
107 u_int8_t value;
108
109 if (character < 0x00C0)
110 return (0);
111
112 value = bitmap[(character >> 8) & 0xFF];
113
114 if (value == 0xFF) {
115 return (1);
116 } else if (value) {
117 bitmap = bitmap + ((value - 1) * 32) + 256;
118 return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
119 }
120 return (0);
121}
122
123static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);
124
125static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);
0b4e3aa0 126
1c79356b 127
765c9de3
A
128char utf_extrabytes[32] = {
129 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130 -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 2, 2, 3, -1
131};
132
133
1c79356b 134/*
765c9de3 135 * utf8_encodelen - Calculates the UTF-8 encoding length for a Unicode filename
1c79356b
A
136 *
137 * NOTES:
138 * If '/' chars are allowed on disk then an alternate
139 * (replacement) char must be provided in altslash.
140 *
141 * input flags:
765c9de3 142 * UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
1c79356b
A
143 */
144size_t
765c9de3
A
145utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
146 int flags)
1c79356b
A
147{
148 u_int16_t ucs_ch;
149 int charcnt;
150 int swapbytes = (flags & UTF_REVERSE_ENDIAN);
151 size_t len;
152
153 charcnt = ucslen / 2;
154 len = 0;
155
156 while (charcnt-- > 0) {
157 ucs_ch = *ucsp++;
158
159 if (swapbytes)
160 ucs_ch = NXSwapShort(ucs_ch);
0b4e3aa0
A
161 if (ucs_ch == '/')
162 ucs_ch = altslash ? altslash : '_';
163 else if (ucs_ch == '\0')
164 ucs_ch = UCS_ALT_NULL;
1c79356b 165
765c9de3 166 len += UNICODE_TO_UTF8_LEN(ucs_ch);
1c79356b
A
167 }
168
169 return (len);
170}
171
172
173/*
765c9de3 174 * utf8_encodestr - Encodes a Unicode string to UTF-8
1c79356b
A
175 *
176 * NOTES:
0b4e3aa0 177 * The resulting UTF-8 string is NULL terminated.
1c79356b
A
178 *
179 * If '/' chars are allowed on disk then an alternate
180 * (replacement) char must be provided in altslash.
181 *
182 * input flags:
765c9de3 183 * UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
1c79356b 184 * UTF_NO_NULL_TERM: don't add NULL termination to UTF-8 output
0b4e3aa0
A
185 *
186 * result:
187 * ENAMETOOLONG: Name didn't fit; only buflen bytes were encoded
188 * EINVAL: Illegal char found; char was replaced by an '_'.
1c79356b 189 */
765c9de3
A
190int
191utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
192 size_t * utf8len, size_t buflen, u_int16_t altslash, int flags)
1c79356b
A
193{
194 u_int8_t * bufstart;
195 u_int8_t * bufend;
196 u_int16_t ucs_ch;
9bccf70c
A
197 u_int16_t * chp = NULL;
198 u_int16_t sequence[8];
199 int extra = 0;
1c79356b
A
200 int charcnt;
201 int swapbytes = (flags & UTF_REVERSE_ENDIAN);
0b4e3aa0
A
202 int nullterm = ((flags & UTF_NO_NULL_TERM) == 0);
203 int decompose = (flags & UTF_DECOMPOSED);
1c79356b
A
204 int result = 0;
205
206 bufstart = utf8p;
207 bufend = bufstart + buflen;
208 if (nullterm)
209 --bufend;
210 charcnt = ucslen / 2;
211
212 while (charcnt-- > 0) {
9bccf70c
A
213 if (extra > 0) {
214 --extra;
215 ucs_ch = *chp++;
0b4e3aa0
A
216 } else {
217 ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;
9bccf70c
A
218
219 if (decompose && unicode_decomposeable(ucs_ch)) {
220 extra = unicode_decompose(ucs_ch, sequence) - 1;
221 charcnt += extra;
222 ucs_ch = sequence[0];
223 chp = &sequence[1];
224 }
0b4e3aa0 225 }
1c79356b 226
0b4e3aa0
A
227 /* Slash and NULL are not permitted */
228 if (ucs_ch == '/') {
229 if (altslash)
230 ucs_ch = altslash;
231 else {
232 ucs_ch = '_';
233 result = EINVAL;
234 }
235 } else if (ucs_ch == '\0') {
236 ucs_ch = UCS_ALT_NULL;
237 }
1c79356b 238
0b4e3aa0 239 if (ucs_ch < 0x0080) {
1c79356b
A
240 if (utf8p >= bufend) {
241 result = ENAMETOOLONG;
242 break;
765c9de3 243 }
1c79356b
A
244 *utf8p++ = ucs_ch;
245
246 } else if (ucs_ch < 0x800) {
247 if ((utf8p + 1) >= bufend) {
248 result = ENAMETOOLONG;
249 break;
250 }
765c9de3
A
251 *utf8p++ = 0xc0 | (ucs_ch >> 6);
252 *utf8p++ = 0x80 | (0x3f & ucs_ch);
1c79356b
A
253
254 } else {
765c9de3
A
255 /* Combine valid surrogate pairs */
256 if (ucs_ch >= SP_HIGH_FIRST && ucs_ch <= SP_HIGH_LAST
257 && charcnt > 0) {
258 u_int16_t ch2;
259 u_int32_t pair;
260
261 ch2 = swapbytes ? NXSwapShort(*ucsp) : *ucsp;
262 if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
263 pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
264 + (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
265 if ((utf8p + 3) >= bufend) {
266 result = ENAMETOOLONG;
267 break;
268 }
269 --charcnt;
270 ++ucsp;
271 *utf8p++ = 0xf0 | (pair >> 18);
272 *utf8p++ = 0x80 | (0x3f & (pair >> 12));
273 *utf8p++ = 0x80 | (0x3f & (pair >> 6));
274 *utf8p++ = 0x80 | (0x3f & pair);
275 continue;
276 }
277 }
1c79356b
A
278 if ((utf8p + 2) >= bufend) {
279 result = ENAMETOOLONG;
280 break;
281 }
765c9de3
A
282 *utf8p++ = 0xe0 | (ucs_ch >> 12);
283 *utf8p++ = 0x80 | (0x3f & (ucs_ch >> 6));
284 *utf8p++ = 0x80 | (0x3f & ucs_ch);
1c79356b
A
285 }
286 }
287
288 *utf8len = utf8p - bufstart;
289 if (nullterm)
290 *utf8p++ = '\0';
291
292 return (result);
293}
294
295
296/*
765c9de3 297 * utf8_decodestr - Decodes a UTF-8 string back to Unicode
1c79356b
A
298 *
299 * NOTES:
300 * The input UTF-8 string does not need to be null terminated
301 * if utf8len is set.
302 *
303 * If '/' chars are allowed on disk then an alternate
304 * (replacement) char must be provided in altslash.
305 *
306 * input flags:
765c9de3
A
307 * UTF_REV_ENDIAN: Unicode byteorder is oposite current runtime
308 * UTF_DECOMPOSED: Unicode output string must be fully decompsed
0b4e3aa0
A
309 *
310 * result:
311 * ENAMETOOLONG: Name didn't fit; only ucslen chars were decoded.
312 * EINVAL: Illegal UTF-8 sequence found.
1c79356b
A
313 */
314int
765c9de3
A
315utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
316 size_t *ucslen, size_t buflen, u_int16_t altslash, int flags)
1c79356b
A
317{
318 u_int16_t* bufstart;
319 u_int16_t* bufend;
55e303ae
A
320 unsigned int ucs_ch;
321 unsigned int byte;
1c79356b 322 int result = 0;
0b4e3aa0 323 int decompose, precompose, swapbytes;
1c79356b 324
0b4e3aa0
A
325 decompose = (flags & UTF_DECOMPOSED);
326 precompose = (flags & UTF_PRECOMPOSED);
327 swapbytes = (flags & UTF_REVERSE_ENDIAN);
1c79356b
A
328
329 bufstart = ucsp;
330 bufend = (u_int16_t *)((u_int8_t *)ucsp + buflen);
331
332 while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
765c9de3
A
333 if (ucsp >= bufend)
334 goto toolong;
1c79356b
A
335
336 /* check for ascii */
337 if (byte < 0x80) {
55e303ae 338 ucs_ch = byte; /* 1st byte */
1c79356b 339 } else {
765c9de3
A
340 u_int32_t ch;
341 int extrabytes = utf_extrabytes[byte >> 3];
342
343 if (utf8len < extrabytes)
344 goto invalid;
345 utf8len -= extrabytes;
346
347 switch (extrabytes) {
55e303ae
A
348 case 1:
349 ch = byte; ch <<= 6; /* 1st byte */
350 byte = *utf8p++; /* 2nd byte */
351 if ((byte >> 6) != 2)
352 goto invalid;
353 ch += byte;
354 ch -= 0x00003080UL;
355 if (ch < 0x0080)
356 goto invalid;
357 ucs_ch = ch;
765c9de3 358 break;
55e303ae
A
359 case 2:
360 ch = byte; ch <<= 6; /* 1st byte */
361 byte = *utf8p++; /* 2nd byte */
362 if ((byte >> 6) != 2)
363 goto invalid;
364 ch += byte; ch <<= 6;
365 byte = *utf8p++; /* 3rd byte */
366 if ((byte >> 6) != 2)
367 goto invalid;
368 ch += byte;
369 ch -= 0x000E2080UL;
370 if (ch < 0x0800)
371 goto invalid;
372 if (ch >= 0xD800) {
373 if (ch <= 0xDFFF)
765c9de3 374 goto invalid;
55e303ae
A
375 if (ch == 0xFFFE || ch == 0xFFFF)
376 goto invalid;
377 }
378 ucs_ch = ch;
379 break;
380 case 3:
381 ch = byte; ch <<= 6; /* 1st byte */
382 byte = *utf8p++; /* 2nd byte */
383 if ((byte >> 6) != 2)
384 goto invalid;
385 ch += byte; ch <<= 6;
386 byte = *utf8p++; /* 3rd byte */
387 if ((byte >> 6) != 2)
388 goto invalid;
389 ch += byte; ch <<= 6;
390 byte = *utf8p++; /* 4th byte */
391 if ((byte >> 6) != 2)
392 goto invalid;
393 ch += byte;
394 ch -= 0x03C82080UL + SP_HALF_BASE;
395 ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
396 if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
397 goto invalid;
398 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
399 if (ucsp >= bufend)
400 goto toolong;
401 ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
402 if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
403 goto invalid;
404 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
765c9de3 405 continue;
1c79356b 406 default:
55e303ae 407 goto invalid;
1c79356b 408 }
1c79356b 409 if (decompose) {
9bccf70c
A
410 if (unicode_decomposeable(ucs_ch)) {
411 u_int16_t sequence[8];
412 int count, i;
1c79356b 413
9bccf70c 414 count = unicode_decompose(ucs_ch, sequence);
1c79356b 415
9bccf70c
A
416 for (i = 0; i < count; ++i) {
417 ucs_ch = sequence[i];
0b4e3aa0 418 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
765c9de3
A
419 if (ucsp >= bufend)
420 goto toolong;
0b4e3aa0 421 }
9bccf70c 422 continue;
0b4e3aa0
A
423 }
424 } else if (precompose && (ucsp != bufstart)) {
425 u_int16_t composite, base;
426
9bccf70c
A
427 if (unicode_combinable(ucs_ch)) {
428 base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1);
429 composite = unicode_combine(base, ucs_ch);
430 if (composite) {
431 --ucsp;
432 ucs_ch = composite;
433 }
1c79356b
A
434 }
435 }
0b4e3aa0
A
436 if (ucs_ch == UCS_ALT_NULL)
437 ucs_ch = '\0';
1c79356b 438 }
1c79356b
A
439 if (ucs_ch == altslash)
440 ucs_ch = '/';
1c79356b 441
765c9de3 442 *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
1c79356b 443 }
765c9de3
A
444
445exit:
1c79356b
A
446 *ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart;
447
448 return (result);
765c9de3
A
449
450invalid:
451 result = EINVAL;
452 goto exit;
453
454toolong:
455 result = ENAMETOOLONG;
456 goto exit;
1c79356b
A
457}
458
459
9bccf70c
A
460 /*
461 * Unicode 3.2 decomposition code (derived from Core Foundation)
462 */
1c79356b 463
9bccf70c
A
464typedef struct {
465 u_int32_t _key;
466 u_int32_t _value;
467} unicode_mappings32;
0b4e3aa0 468
9bccf70c
A
469static inline u_int32_t
470getmappedvalue32(const unicode_mappings32 *theTable, u_int32_t numElem,
471 u_int16_t character)
472{
473 const unicode_mappings32 *p, *q, *divider;
1c79356b 474
9bccf70c
A
475 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
476 return (0);
1c79356b 477
9bccf70c
A
478 p = theTable;
479 q = p + (numElem-1);
480 while (p <= q) {
481 divider = p + ((q - p) >> 1); /* divide by 2 */
482 if (character < divider->_key) { q = divider - 1; }
483 else if (character > divider->_key) { p = divider + 1; }
484 else { return (divider->_value); }
485 }
486 return (0);
487}
1c79356b 488
9bccf70c
A
489#define RECURSIVE_DECOMPOSITION (1 << 15)
490#define EXTRACT_COUNT(value) (((value) >> 12) & 0x0007)
1c79356b 491
9bccf70c
A
492typedef struct {
493 u_int16_t _key;
494 u_int16_t _value;
495} unicode_mappings16;
1c79356b 496
9bccf70c
A
497static inline u_int16_t
498getmappedvalue16(const unicode_mappings16 *theTable, u_int32_t numElem,
499 u_int16_t character)
500{
501 const unicode_mappings16 *p, *q, *divider;
1c79356b 502
9bccf70c
A
503 if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
504 return (0);
1c79356b 505
9bccf70c
A
506 p = theTable;
507 q = p + (numElem-1);
508 while (p <= q) {
509 divider = p + ((q - p) >> 1); /* divide by 2 */
510 if (character < divider->_key)
511 q = divider - 1;
512 else if (character > divider->_key)
513 p = divider + 1;
514 else
515 return (divider->_value);
516 }
517 return (0);
518}
519
520
521static u_int32_t
522unicode_recursive_decompose(u_int16_t character, u_int16_t *convertedChars)
523{
524 u_int16_t value;
525 u_int32_t length;
526 u_int16_t firstChar;
527 u_int16_t theChar;
528 const u_int16_t *bmpMappings;
529 u_int32_t usedLength;
530
531 value = getmappedvalue16(
532 (const unicode_mappings16 *)__CFUniCharDecompositionTable,
533 __UniCharDecompositionTableLength, character);
534 length = EXTRACT_COUNT(value);
535 firstChar = value & 0x0FFF;
536 theChar = firstChar;
537 bmpMappings = (length == 1 ? &theChar : __CFUniCharMultipleDecompositionTable + firstChar);
538 usedLength = 0;
539
540 if (value & RECURSIVE_DECOMPOSITION) {
541 usedLength = unicode_recursive_decompose((u_int16_t)*bmpMappings, convertedChars);
542
543 --length; /* Decrement for the first char */
544 if (!usedLength)
545 return 0;
546 ++bmpMappings;
547 convertedChars += usedLength;
548 }
0b4e3aa0 549
9bccf70c 550 usedLength += length;
0b4e3aa0 551
9bccf70c
A
552 while (length--)
553 *(convertedChars++) = *(bmpMappings++);
0b4e3aa0 554
9bccf70c
A
555 return (usedLength);
556}
557
558#define HANGUL_SBASE 0xAC00
559#define HANGUL_LBASE 0x1100
560#define HANGUL_VBASE 0x1161
561#define HANGUL_TBASE 0x11A7
562
563#define HANGUL_SCOUNT 11172
564#define HANGUL_LCOUNT 19
565#define HANGUL_VCOUNT 21
566#define HANGUL_TCOUNT 28
567#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
1c79356b
A
568
569/*
9bccf70c 570 * unicode_decompose - decompose a composed Unicode char
1c79356b
A
571 *
572 * Composed Unicode characters are forbidden on
573 * HFS Plus volumes. ucs_decompose will convert a
574 * composed character into its correct decomposed
575 * sequence.
576 *
9bccf70c 577 * Similar to CFUniCharDecomposeCharacter
1c79356b 578 */
9bccf70c
A
579static int
580unicode_decompose(u_int16_t character, u_int16_t *convertedChars)
1c79356b 581{
9bccf70c
A
582 if ((character >= HANGUL_SBASE) &&
583 (character <= (HANGUL_SBASE + HANGUL_SCOUNT))) {
584 u_int32_t length;
585
586 character -= HANGUL_SBASE;
587 length = (character % HANGUL_TCOUNT ? 3 : 2);
588
589 *(convertedChars++) =
590 character / HANGUL_NCOUNT + HANGUL_LBASE;
591 *(convertedChars++) =
592 (character % HANGUL_NCOUNT) / HANGUL_TCOUNT + HANGUL_VBASE;
593 if (length > 2)
594 *convertedChars = (character % HANGUL_TCOUNT) + HANGUL_TBASE;
595 return (length);
1c79356b 596 } else {
9bccf70c 597 return (unicode_recursive_decompose(character, convertedChars));
1c79356b 598 }
1c79356b
A
599}
600
0b4e3aa0 601/*
9bccf70c 602 * unicode_combine - generate a precomposed Unicode char
0b4e3aa0
A
603 *
604 * Precomposed Unicode characters are required for some volume
9bccf70c
A
605 * formats and network protocols. unicode_combine will combine
606 * a decomposed character sequence into a single precomposed
0b4e3aa0
A
607 * (composite) character.
608 *
9bccf70c
A
609 * Similar toCFUniCharPrecomposeCharacter but unicode_combine
610 * also handles Hangul Jamo characters.
0b4e3aa0
A
611 */
612static u_int16_t
9bccf70c 613unicode_combine(u_int16_t base, u_int16_t combining)
0b4e3aa0 614{
9bccf70c
A
615 u_int32_t value;
616
617 /* Check HANGUL */
618 if ((combining >= HANGUL_VBASE) && (combining < (HANGUL_TBASE + HANGUL_TCOUNT))) {
619 /* 2 char Hangul sequences */
620 if ((combining < (HANGUL_VBASE + HANGUL_VCOUNT)) &&
621 (base >= HANGUL_LBASE && base < (HANGUL_LBASE + HANGUL_LCOUNT))) {
622 return (HANGUL_SBASE +
623 ((base - HANGUL_LBASE)*(HANGUL_VCOUNT*HANGUL_TCOUNT)) +
624 ((combining - HANGUL_VBASE)*HANGUL_TCOUNT));
0b4e3aa0 625 }
9bccf70c
A
626
627 /* 3 char Hangul sequences */
628 if ((combining > HANGUL_TBASE) &&
629 (base >= HANGUL_SBASE && base < (HANGUL_SBASE + HANGUL_SCOUNT))) {
630 if ((base - HANGUL_SBASE) % HANGUL_TCOUNT)
631 return (0);
632 else
633 return (base + (combining - HANGUL_TBASE));
0b4e3aa0 634 }
0b4e3aa0
A
635 }
636
9bccf70c
A
637 value = getmappedvalue32(
638 (const unicode_mappings32 *)__CFUniCharPrecompSourceTable,
639 __CFUniCharPrecompositionTableLength, combining);
0b4e3aa0 640
9bccf70c
A
641 if (value) {
642 value = getmappedvalue16(
643 (const unicode_mappings16 *)
644 ((u_int32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)),
645 (value >> 16), base);
0b4e3aa0 646 }
9bccf70c 647 return (value);
0b4e3aa0
A
648}
649