[apple/xnu.git] / bsd / vfs / vfs_utfconv.c

/*
 * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this
 * file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 */
 
 /*
 	Includes Unicode 3.2 decomposition code derived from Core Foundation
 */

#include <sys/param.h>
#include <sys/utfconv.h>
#include <sys/errno.h>
#include <architecture/byte_order.h>

/*
 * UTF-8 (Unicode Transformation Format)
 *
 * UTF-8 is the Unicode Transformation Format that serializes a Unicode
 * character as a sequence of one to four bytes. Only the shortest form
 * required to represent the significant Unicode bits is legal.
 * 
 * UTF-8 Multibyte Codes
 *
 * Bytes   Bits   Unicode Min  Unicode Max   UTF-8 Byte Sequence (binary)
 * -----------------------------------------------------------------------------
 *   1       7       0x0000        0x007F    0xxxxxxx
 *   2      11       0x0080        0x07FF    110xxxxx 10xxxxxx
 *   3      16       0x0800        0xFFFF    1110xxxx 10xxxxxx 10xxxxxx
 *   4      21      0x10000      0x10FFFF    11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 * -----------------------------------------------------------------------------
 */


#define UNICODE_TO_UTF8_LEN(c)  \
	((c) < 0x0080 ? 1 : ((c) < 0x0800 ? 2 : (((c) & 0xf800) == 0xd800 ? 2 : 3)))

#define UCS_ALT_NULL	0x2400

/* Surrogate Pair Constants */
#define SP_HALF_SHIFT	10
#define SP_HALF_BASE	0x0010000UL
#define SP_HALF_MASK	0x3FFUL

#define SP_HIGH_FIRST	0xD800UL
#define SP_HIGH_LAST	0xDBFFUL
#define SP_LOW_FIRST	0xDC00UL
#define SP_LOW_LAST	0xDFFFUL


#include "vfs_utfconvdata.h"


/*
 * Test for a combining character.
 *
 * Similar to __CFUniCharIsNonBaseCharacter except that
 * unicode_combinable also includes Hangul Jamo characters.
 */
static inline int
unicode_combinable(u_int16_t character)
{
	const u_int8_t *bitmap = __CFUniCharCombiningBitmap;
	u_int8_t value;

	if (character < 0x0300)
		return (0);

	value = bitmap[(character >> 8) & 0xFF];

	if (value == 0xFF) {
		return (1);
	} else if (value) {
		bitmap = bitmap + ((value - 1) * 32) + 256;
		return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
	}
	return (0);
}

/*
 * Test for a precomposed character.
 *
 * Similar to __CFUniCharIsDecomposableCharacter.
 */
static inline int
unicode_decomposeable(u_int16_t character) {
	const u_int8_t *bitmap = __CFUniCharDecomposableBitmap;
	u_int8_t value;
	
	if (character < 0x00C0)
		return (0);

	value = bitmap[(character >> 8) & 0xFF];

	if (value == 0xFF) {
		return (1);
	} else if (value) {
		bitmap = bitmap + ((value - 1) * 32) + 256;
		return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
	}
    	return (0);
}

static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);

static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);


char utf_extrabytes[32] = {
	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
	-1, -1, -1, -1, -1, -1, -1, -1,  1,  1,  1,  1,  2,  2,  3, -1
};


/*
 * utf8_encodelen - Calculates the UTF-8 encoding length for a Unicode filename
 *
 * NOTES:
 *    If '/' chars are allowed on disk then an alternate
 *    (replacement) char must be provided in altslash.
 *
 * input flags:
 *    UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
 */
size_t
utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
               int flags)
{
	u_int16_t ucs_ch;
	int charcnt;
	int swapbytes = (flags & UTF_REVERSE_ENDIAN);
	size_t len;
	
	charcnt = ucslen / 2;
	len = 0;

	while (charcnt-- > 0) {
		ucs_ch = *ucsp++;

		if (swapbytes)
			ucs_ch = NXSwapShort(ucs_ch);
		if (ucs_ch == '/')
			ucs_ch = altslash ? altslash : '_';
		else if (ucs_ch == '\0')
			ucs_ch = UCS_ALT_NULL;
		
		len += UNICODE_TO_UTF8_LEN(ucs_ch);
	}

	return (len);
}


/*
 * utf8_encodestr - Encodes a Unicode string to UTF-8
 *
 * NOTES:
 *    The resulting UTF-8 string is NULL terminated.
 *
 *    If '/' chars are allowed on disk then an alternate
 *    (replacement) char must be provided in altslash.
 *
 * input flags:
 *    UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
 *    UTF_NO_NULL_TERM:  don't add NULL termination to UTF-8 output
 *
 * result:
 *    ENAMETOOLONG: Name didn't fit; only buflen bytes were encoded
 *    EINVAL: Illegal char found; char was replaced by an '_'.
 */
int
utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
               size_t * utf8len, size_t buflen, u_int16_t altslash, int flags)
{
	u_int8_t * bufstart;
	u_int8_t * bufend;
	u_int16_t ucs_ch;
	u_int16_t * chp = NULL;
	u_int16_t sequence[8];
	int extra = 0;
	int charcnt;
	int swapbytes = (flags & UTF_REVERSE_ENDIAN);
	int nullterm  = ((flags & UTF_NO_NULL_TERM) == 0);
	int decompose = (flags & UTF_DECOMPOSED);
	int result = 0;
	
	bufstart = utf8p;
	bufend = bufstart + buflen;
	if (nullterm)
		--bufend;
	charcnt = ucslen / 2;

	while (charcnt-- > 0) {
		if (extra > 0) {
			--extra;
			ucs_ch = *chp++;
		} else {
			ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;

			if (decompose && unicode_decomposeable(ucs_ch)) {
				extra = unicode_decompose(ucs_ch, sequence) - 1;
				charcnt += extra;
				ucs_ch = sequence[0];
				chp = &sequence[1];
			}
		}

		/* Slash and NULL are not permitted */
		if (ucs_ch == '/') {
			if (altslash)
				ucs_ch = altslash;
			else {
				ucs_ch = '_';
				result = EINVAL;
			}
		} else if (ucs_ch == '\0') {
			ucs_ch = UCS_ALT_NULL;
		}

		if (ucs_ch < 0x0080) {
			if (utf8p >= bufend) {
				result = ENAMETOOLONG;
				break;
			}			
			*utf8p++ = ucs_ch;

		} else if (ucs_ch < 0x800) {
			if ((utf8p + 1) >= bufend) {
				result = ENAMETOOLONG;
				break;
			}
			*utf8p++ = 0xc0 | (ucs_ch >> 6);
			*utf8p++ = 0x80 | (0x3f & ucs_ch);

		} else {
			/* Combine valid surrogate pairs */
			if (ucs_ch >= SP_HIGH_FIRST && ucs_ch <= SP_HIGH_LAST
				&& charcnt > 0) {
				u_int16_t ch2;
				u_int32_t pair;

				ch2 = swapbytes ? NXSwapShort(*ucsp) : *ucsp;
				if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
					pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
						+ (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
					if ((utf8p + 3) >= bufend) {
						result = ENAMETOOLONG;
						break;
					}
					--charcnt;
					++ucsp;				
					*utf8p++ = 0xf0 | (pair >> 18);
					*utf8p++ = 0x80 | (0x3f & (pair >> 12));
					*utf8p++ = 0x80 | (0x3f & (pair >> 6));
					*utf8p++ = 0x80 | (0x3f & pair);
					continue;
				}
			}
			if ((utf8p + 2) >= bufend) {
				result = ENAMETOOLONG;
				break;
			}
			*utf8p++ = 0xe0 | (ucs_ch >> 12);
			*utf8p++ = 0x80 | (0x3f & (ucs_ch >> 6));
			*utf8p++ = 0x80 | (0x3f & ucs_ch);
		}	
	}
	
	*utf8len = utf8p - bufstart;
	if (nullterm)
		*utf8p++ = '\0';

	return (result);
}


/*
 * utf8_decodestr - Decodes a UTF-8 string back to Unicode
 *
 * NOTES:
 *    The input UTF-8 string does not need to be null terminated
 *    if utf8len is set.
 *
 *    If '/' chars are allowed on disk then an alternate
 *    (replacement) char must be provided in altslash.
 *
 * input flags:
 *    UTF_REV_ENDIAN:   Unicode byteorder is oposite current runtime
 *    UTF_DECOMPOSED:   Unicode output string must be fully decompsed
 *
 * result:
 *    ENAMETOOLONG: Name didn't fit; only ucslen chars were decoded.
 *    EINVAL: Illegal UTF-8 sequence found.
 */
int
utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
               size_t *ucslen, size_t buflen, u_int16_t altslash, int flags)
{
	u_int16_t* bufstart;
	u_int16_t* bufend;
	unsigned int ucs_ch;
	unsigned int byte;
	int result = 0;
	int decompose, precompose, swapbytes;

	decompose =  (flags & UTF_DECOMPOSED);
	precompose = (flags & UTF_PRECOMPOSED);
	swapbytes =  (flags & UTF_REVERSE_ENDIAN);

	bufstart = ucsp;
	bufend = (u_int16_t *)((u_int8_t *)ucsp + buflen);

	while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
		if (ucsp >= bufend)
			goto toolong;

		/* check for ascii */
		if (byte < 0x80) {
			ucs_ch = byte;                 /* 1st byte */
		} else {
			u_int32_t ch;
			int extrabytes = utf_extrabytes[byte >> 3];

			if (utf8len < extrabytes)
				goto invalid;
			utf8len -= extrabytes;

			switch (extrabytes) {
			case 1:
				ch = byte; ch <<= 6;   /* 1st byte */
				byte = *utf8p++;       /* 2nd byte */
				if ((byte >> 6) != 2)
					goto invalid;
				ch += byte;
				ch -= 0x00003080UL;
				if (ch < 0x0080)
					goto invalid;
				ucs_ch = ch;
			        break;
			case 2:
				ch = byte; ch <<= 6;   /* 1st byte */
				byte = *utf8p++;       /* 2nd byte */
				if ((byte >> 6) != 2)
					goto invalid;
				ch += byte; ch <<= 6;
				byte = *utf8p++;       /* 3rd byte */
				if ((byte >> 6) != 2)
					goto invalid;
				ch += byte;
				ch -= 0x000E2080UL;
				if (ch < 0x0800)
					goto invalid;
				if (ch >= 0xD800) {
					if (ch <= 0xDFFF)
						goto invalid;
					if (ch == 0xFFFE || ch == 0xFFFF)
						goto invalid;
				}
				ucs_ch = ch;
				break;
			case 3:
				ch = byte; ch <<= 6;   /* 1st byte */
				byte = *utf8p++;       /* 2nd byte */
				if ((byte >> 6) != 2)
					goto invalid;
				ch += byte; ch <<= 6;
				byte = *utf8p++;       /* 3rd byte */
				if ((byte >> 6) != 2)
					goto invalid;
				ch += byte; ch <<= 6;
				byte = *utf8p++;       /* 4th byte */
				if ((byte >> 6) != 2)
					goto invalid;
			        ch += byte;
				ch -= 0x03C82080UL + SP_HALF_BASE;
				ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
				if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
					goto invalid;
				*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
				if (ucsp >= bufend)
					goto toolong;
				ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
				if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
					goto invalid;
				*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
			        continue;
			default:
				goto invalid;
			}
			if (decompose) {
				if (unicode_decomposeable(ucs_ch)) {
					u_int16_t sequence[8];
					int count, i;

					count = unicode_decompose(ucs_ch, sequence);

					for (i = 0; i < count; ++i) {
						ucs_ch = sequence[i];
						*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
						if (ucsp >= bufend)
							goto toolong;
					}
					continue;			
				}
			} else if (precompose && (ucsp != bufstart)) {
				u_int16_t composite, base;

				if (unicode_combinable(ucs_ch)) {
					base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1);
					composite = unicode_combine(base, ucs_ch);
					if (composite) {
						--ucsp;
						ucs_ch = composite;
					}
				}
			}
			if (ucs_ch == UCS_ALT_NULL)
				ucs_ch = '\0';
		}
		if (ucs_ch == altslash)
			ucs_ch = '/';

		*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
	}

exit:
	*ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart;

	return (result);

invalid:
	result = EINVAL;
	goto exit;

toolong:
	result = ENAMETOOLONG;
	goto exit;
}


 /*
  * Unicode 3.2 decomposition code (derived from Core Foundation)
  */

typedef struct {
	u_int32_t _key;
	u_int32_t _value;
} unicode_mappings32;

static inline u_int32_t
getmappedvalue32(const unicode_mappings32 *theTable, u_int32_t numElem,
		u_int16_t character)
{
	const unicode_mappings32 *p, *q, *divider;

	if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
		return (0);

	p = theTable;
	q = p + (numElem-1);
	while (p <= q) {
		divider = p + ((q - p) >> 1);	/* divide by 2 */
		if (character < divider->_key) { q = divider - 1; }
		else if (character > divider->_key) { p = divider + 1; }
		else { return (divider->_value); }
	}
	return (0);
}

#define RECURSIVE_DECOMPOSITION	(1 << 15)
#define EXTRACT_COUNT(value)	(((value) >> 12) & 0x0007)

typedef struct {
	u_int16_t _key;
	u_int16_t _value;
} unicode_mappings16;

static inline u_int16_t
getmappedvalue16(const unicode_mappings16 *theTable, u_int32_t numElem,
		u_int16_t character)
{
	const unicode_mappings16 *p, *q, *divider;

	if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key))
		return (0);

	p = theTable;
	q = p + (numElem-1);
	while (p <= q) {
		divider = p + ((q - p) >> 1);	/* divide by 2 */
		if (character < divider->_key)
			q = divider - 1;
		else if (character > divider->_key)
			p = divider + 1;
		else
			return (divider->_value);
	}
	return (0);
}


static u_int32_t
unicode_recursive_decompose(u_int16_t character, u_int16_t *convertedChars)
{
	u_int16_t value;
	u_int32_t length;
	u_int16_t firstChar;
	u_int16_t theChar;
	const u_int16_t *bmpMappings;
	u_int32_t usedLength;

	value = getmappedvalue16(
		(const unicode_mappings16 *)__CFUniCharDecompositionTable,
		__UniCharDecompositionTableLength, character);
	length = EXTRACT_COUNT(value);
	firstChar = value & 0x0FFF;
	theChar = firstChar;
	bmpMappings = (length == 1 ? &theChar : __CFUniCharMultipleDecompositionTable + firstChar);
	usedLength = 0;

	if (value & RECURSIVE_DECOMPOSITION) {
	    usedLength = unicode_recursive_decompose((u_int16_t)*bmpMappings, convertedChars);
	
	    --length;	/* Decrement for the first char */
	    if (!usedLength)
	    	return 0;
	    ++bmpMappings;
	    convertedChars += usedLength;
	}
	
	usedLength += length;
	
	while (length--)
		*(convertedChars++) = *(bmpMappings++);
	
	return (usedLength);
}
    
#define HANGUL_SBASE 0xAC00
#define HANGUL_LBASE 0x1100
#define HANGUL_VBASE 0x1161
#define HANGUL_TBASE 0x11A7

#define HANGUL_SCOUNT 11172
#define HANGUL_LCOUNT 19
#define HANGUL_VCOUNT 21
#define HANGUL_TCOUNT 28
#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)

/*
 * unicode_decompose - decompose a composed Unicode char
 *
 * Composed Unicode characters are forbidden on
 * HFS Plus volumes. ucs_decompose will convert a
 * composed character into its correct decomposed
 * sequence.
 *
 * Similar to CFUniCharDecomposeCharacter
 */
static int
unicode_decompose(u_int16_t character, u_int16_t *convertedChars)
{
	if ((character >= HANGUL_SBASE) &&
	    (character <= (HANGUL_SBASE + HANGUL_SCOUNT))) {
		u_int32_t length;

		character -= HANGUL_SBASE;
		length = (character % HANGUL_TCOUNT ? 3 : 2);

		*(convertedChars++) =
			character / HANGUL_NCOUNT + HANGUL_LBASE;
		*(convertedChars++) =
			(character % HANGUL_NCOUNT) / HANGUL_TCOUNT + HANGUL_VBASE;
		if (length > 2)
			*convertedChars = (character % HANGUL_TCOUNT) + HANGUL_TBASE;
		return (length);
	} else {
		return (unicode_recursive_decompose(character, convertedChars));
	}
}

/*
 * unicode_combine - generate a precomposed Unicode char
 *
 * Precomposed Unicode characters are required for some volume
 * formats and network protocols.  unicode_combine will combine
 * a decomposed character sequence into a single precomposed
 * (composite) character.
 *
 * Similar toCFUniCharPrecomposeCharacter but unicode_combine
 * also handles Hangul Jamo characters.
 */
static u_int16_t
unicode_combine(u_int16_t base, u_int16_t combining)
{
	u_int32_t value;

	/* Check HANGUL */
	if ((combining >= HANGUL_VBASE) && (combining < (HANGUL_TBASE + HANGUL_TCOUNT))) {
		/* 2 char Hangul sequences */
		if ((combining < (HANGUL_VBASE + HANGUL_VCOUNT)) &&
		    (base >= HANGUL_LBASE && base < (HANGUL_LBASE + HANGUL_LCOUNT))) {
		    return (HANGUL_SBASE +
		            ((base - HANGUL_LBASE)*(HANGUL_VCOUNT*HANGUL_TCOUNT)) +
		            ((combining  - HANGUL_VBASE)*HANGUL_TCOUNT));
		}
	
		/* 3 char Hangul sequences */
		if ((combining > HANGUL_TBASE) &&
		    (base >= HANGUL_SBASE && base < (HANGUL_SBASE + HANGUL_SCOUNT))) {
			if ((base - HANGUL_SBASE) % HANGUL_TCOUNT)
				return (0);
			else
				return (base + (combining - HANGUL_TBASE));
		}
	}

	value = getmappedvalue32(
		(const unicode_mappings32 *)__CFUniCharPrecompSourceTable,
		__CFUniCharPrecompositionTableLength, combining);

	if (value) {
		value = getmappedvalue16(
			(const unicode_mappings16 *)
			((u_int32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)),
			(value >> 16), base);
	}
	return (value);
}
Commit	Line	Data
1c79356b	1	/*
9bccf70c	2	* Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
1c79356b A	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
43866e37	6	* Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
1c79356b	7	*
43866e37 A	8	* This file contains Original Code and/or Modifications of Original Code
	9	* as defined in and that are subject to the Apple Public Source License
	10	* Version 2.0 (the 'License'). You may not use this file except in
	11	* compliance with the License. Please obtain a copy of the License at
	12	* http://www.opensource.apple.com/apsl/ and read it before using this
	13	* file.
	14	*
	15	* The Original Code and all software distributed under the License are
	16	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b A	17	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
1c79356b A	18	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37 A	19	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	20	* Please see the License for the specific language governing rights and
	21	* limitations under the License.
1c79356b A	22	*
	23	* @APPLE_LICENSE_HEADER_END@
	24	*/
9bccf70c A	25
	26	/*
	27	Includes Unicode 3.2 decomposition code derived from Core Foundation
	28	*/
	29
1c79356b A	30	#include <sys/param.h>
	31	#include <sys/utfconv.h>
	32	#include <sys/errno.h>
	33	#include <architecture/byte_order.h>
	34
1c79356b	35	/*
765c9de3	36	* UTF-8 (Unicode Transformation Format)
1c79356b	37	*
765c9de3 A	38	* UTF-8 is the Unicode Transformation Format that serializes a Unicode
	39	* character as a sequence of one to four bytes. Only the shortest form
	40	* required to represent the significant Unicode bits is legal.
1c79356b A	41	*
	42	* UTF-8 Multibyte Codes
	43	*
765c9de3 A	44	* Bytes Bits Unicode Min Unicode Max UTF-8 Byte Sequence (binary)
	45	* -----------------------------------------------------------------------------
	46	* 1 7 0x0000 0x007F 0xxxxxxx
	47	* 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
	48	* 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
	49	* 4 21 0x10000 0x10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	50	* -----------------------------------------------------------------------------
1c79356b A	51	*/
	52
	53
765c9de3 A	54	#define UNICODE_TO_UTF8_LEN(c) \
765c9de3 A	55	((c) < 0x0080 ? 1 : ((c) < 0x0800 ? 2 : (((c) & 0xf800) == 0xd800 ? 2 : 3)))
0b4e3aa0 A	56
0b4e3aa0 A	57	#define UCS_ALT_NULL 0x2400
1c79356b	58
765c9de3 A	59	/* Surrogate Pair Constants */
	60	#define SP_HALF_SHIFT 10
	61	#define SP_HALF_BASE 0x0010000UL
	62	#define SP_HALF_MASK 0x3FFUL
	63
	64	#define SP_HIGH_FIRST 0xD800UL
	65	#define SP_HIGH_LAST 0xDBFFUL
	66	#define SP_LOW_FIRST 0xDC00UL
9bccf70c A	67	#define SP_LOW_LAST 0xDFFFUL
9bccf70c A	68
1c79356b	69
9bccf70c	70	#include "vfs_utfconvdata.h"
765c9de3	71
1c79356b	72
9bccf70c A	73	/*
	74	* Test for a combining character.
	75	*
	76	* Similar to __CFUniCharIsNonBaseCharacter except that
	77	* unicode_combinable also includes Hangul Jamo characters.
	78	*/
	79	static inline int
	80	unicode_combinable(u_int16_t character)
	81	{
	82	const u_int8_t *bitmap = __CFUniCharCombiningBitmap;
	83	u_int8_t value;
	84
	85	if (character < 0x0300)
	86	return (0);
	87
	88	value = bitmap[(character >> 8) & 0xFF];
	89
	90	if (value == 0xFF) {
	91	return (1);
	92	} else if (value) {
	93	bitmap = bitmap + ((value - 1) * 32) + 256;
	94	return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
	95	}
	96	return (0);
	97	}
	98
	99	/*
	100	* Test for a precomposed character.
	101	*
	102	* Similar to __CFUniCharIsDecomposableCharacter.
	103	*/
	104	static inline int
	105	unicode_decomposeable(u_int16_t character) {
	106	const u_int8_t *bitmap = __CFUniCharDecomposableBitmap;
	107	u_int8_t value;
	108
	109	if (character < 0x00C0)
	110	return (0);
	111
	112	value = bitmap[(character >> 8) & 0xFF];
	113
	114	if (value == 0xFF) {
	115	return (1);
	116	} else if (value) {
	117	bitmap = bitmap + ((value - 1) * 32) + 256;
	118	return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0);
	119	}
	120	return (0);
	121	}
	122
	123	static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);
	124
	125	static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);
0b4e3aa0	126
1c79356b	127
765c9de3 A	128	char utf_extrabytes[32] = {
	129	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	130	-1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 2, 2, 3, -1
	131	};
	132
	133
1c79356b	134	/*
765c9de3	135	* utf8_encodelen - Calculates the UTF-8 encoding length for a Unicode filename
1c79356b A	136	*
	137	* NOTES:
	138	* If '/' chars are allowed on disk then an alternate
	139	* (replacement) char must be provided in altslash.
	140	*
	141	* input flags:
765c9de3	142	* UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
1c79356b A	143	*/
1c79356b A	144	size_t
765c9de3 A	145	utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
765c9de3 A	146	int flags)
1c79356b A	147	{
	148	u_int16_t ucs_ch;
	149	int charcnt;
	150	int swapbytes = (flags & UTF_REVERSE_ENDIAN);
	151	size_t len;
	152
	153	charcnt = ucslen / 2;
	154	len = 0;
	155
	156	while (charcnt-- > 0) {
	157	ucs_ch = *ucsp++;
	158
	159	if (swapbytes)
	160	ucs_ch = NXSwapShort(ucs_ch);
0b4e3aa0 A	161	if (ucs_ch == '/')
	162	ucs_ch = altslash ? altslash : '_';
	163	else if (ucs_ch == '\0')
	164	ucs_ch = UCS_ALT_NULL;
1c79356b	165
765c9de3	166	len += UNICODE_TO_UTF8_LEN(ucs_ch);
1c79356b A	167	}
	168
	169	return (len);
	170	}
	171
	172
	173	/*
765c9de3	174	* utf8_encodestr - Encodes a Unicode string to UTF-8
1c79356b A	175	*
1c79356b A	176	* NOTES:
0b4e3aa0	177	* The resulting UTF-8 string is NULL terminated.
1c79356b A	178	*
	179	* If '/' chars are allowed on disk then an alternate
	180	* (replacement) char must be provided in altslash.
	181	*
	182	* input flags:
765c9de3	183	* UTF_REVERSE_ENDIAN: Unicode byteorder is opposite current runtime
1c79356b	184	* UTF_NO_NULL_TERM: don't add NULL termination to UTF-8 output
0b4e3aa0 A	185	*
	186	* result:
	187	* ENAMETOOLONG: Name didn't fit; only buflen bytes were encoded
	188	* EINVAL: Illegal char found; char was replaced by an '_'.
1c79356b	189	*/
765c9de3 A	190	int
	191	utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
	192	size_t * utf8len, size_t buflen, u_int16_t altslash, int flags)
1c79356b A	193	{
	194	u_int8_t * bufstart;
	195	u_int8_t * bufend;
	196	u_int16_t ucs_ch;
9bccf70c A	197	u_int16_t * chp = NULL;
	198	u_int16_t sequence[8];
	199	int extra = 0;
1c79356b A	200	int charcnt;
1c79356b A	201	int swapbytes = (flags & UTF_REVERSE_ENDIAN);
0b4e3aa0 A	202	int nullterm = ((flags & UTF_NO_NULL_TERM) == 0);
0b4e3aa0 A	203	int decompose = (flags & UTF_DECOMPOSED);
1c79356b A	204	int result = 0;
	205
	206	bufstart = utf8p;
	207	bufend = bufstart + buflen;
	208	if (nullterm)
	209	--bufend;
	210	charcnt = ucslen / 2;
	211
	212	while (charcnt-- > 0) {
9bccf70c A	213	if (extra > 0) {
	214	--extra;
	215	ucs_ch = *chp++;
0b4e3aa0 A	216	} else {
0b4e3aa0 A	217	ucs_ch = swapbytes ? NXSwapShort(ucsp++) : ucsp++;
9bccf70c A	218
	219	if (decompose && unicode_decomposeable(ucs_ch)) {
	220	extra = unicode_decompose(ucs_ch, sequence) - 1;
	221	charcnt += extra;
	222	ucs_ch = sequence[0];
	223	chp = &sequence[1];
	224	}
0b4e3aa0	225	}
1c79356b	226
0b4e3aa0 A	227	/* Slash and NULL are not permitted */
	228	if (ucs_ch == '/') {
	229	if (altslash)
	230	ucs_ch = altslash;
	231	else {
	232	ucs_ch = '_';
	233	result = EINVAL;
	234	}
	235	} else if (ucs_ch == '\0') {
	236	ucs_ch = UCS_ALT_NULL;
	237	}
1c79356b	238
0b4e3aa0	239	if (ucs_ch < 0x0080) {
1c79356b A	240	if (utf8p >= bufend) {
	241	result = ENAMETOOLONG;
	242	break;
765c9de3	243	}
1c79356b A	244	*utf8p++ = ucs_ch;
	245
	246	} else if (ucs_ch < 0x800) {
	247	if ((utf8p + 1) >= bufend) {
	248	result = ENAMETOOLONG;
	249	break;
	250	}
765c9de3 A	251	*utf8p++ = 0xc0 \| (ucs_ch >> 6);
765c9de3 A	252	*utf8p++ = 0x80 \| (0x3f & ucs_ch);
1c79356b A	253
1c79356b A	254	} else {
765c9de3 A	255	/* Combine valid surrogate pairs */
	256	if (ucs_ch >= SP_HIGH_FIRST && ucs_ch <= SP_HIGH_LAST
	257	&& charcnt > 0) {
	258	u_int16_t ch2;
	259	u_int32_t pair;
	260
	261	ch2 = swapbytes ? NXSwapShort(ucsp) : ucsp;
	262	if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
	263	pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
	264	+ (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
	265	if ((utf8p + 3) >= bufend) {
	266	result = ENAMETOOLONG;
	267	break;
	268	}
	269	--charcnt;
	270	++ucsp;
	271	*utf8p++ = 0xf0 \| (pair >> 18);
	272	*utf8p++ = 0x80 \| (0x3f & (pair >> 12));
	273	*utf8p++ = 0x80 \| (0x3f & (pair >> 6));
	274	*utf8p++ = 0x80 \| (0x3f & pair);
	275	continue;
	276	}
	277	}
1c79356b A	278	if ((utf8p + 2) >= bufend) {
	279	result = ENAMETOOLONG;
	280	break;
	281	}
765c9de3 A	282	*utf8p++ = 0xe0 \| (ucs_ch >> 12);
	283	*utf8p++ = 0x80 \| (0x3f & (ucs_ch >> 6));
	284	*utf8p++ = 0x80 \| (0x3f & ucs_ch);
1c79356b A	285	}
	286	}
	287
	288	*utf8len = utf8p - bufstart;
	289	if (nullterm)
	290	*utf8p++ = '\0';
	291
	292	return (result);
	293	}
	294
	295
	296	/*
765c9de3	297	* utf8_decodestr - Decodes a UTF-8 string back to Unicode
1c79356b A	298	*
	299	* NOTES:
	300	* The input UTF-8 string does not need to be null terminated
	301	* if utf8len is set.
	302	*
	303	* If '/' chars are allowed on disk then an alternate
	304	* (replacement) char must be provided in altslash.
	305	*
	306	* input flags:
765c9de3 A	307	* UTF_REV_ENDIAN: Unicode byteorder is oposite current runtime
765c9de3 A	308	* UTF_DECOMPOSED: Unicode output string must be fully decompsed
0b4e3aa0 A	309	*
	310	* result:
	311	* ENAMETOOLONG: Name didn't fit; only ucslen chars were decoded.
	312	* EINVAL: Illegal UTF-8 sequence found.
1c79356b A	313	*/
1c79356b A	314	int
765c9de3 A	315	utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
765c9de3 A	316	size_t *ucslen, size_t buflen, u_int16_t altslash, int flags)
1c79356b A	317	{
	318	u_int16_t* bufstart;
	319	u_int16_t* bufend;
55e303ae A	320	unsigned int ucs_ch;
55e303ae A	321	unsigned int byte;
1c79356b	322	int result = 0;
0b4e3aa0	323	int decompose, precompose, swapbytes;
1c79356b	324
0b4e3aa0 A	325	decompose = (flags & UTF_DECOMPOSED);
	326	precompose = (flags & UTF_PRECOMPOSED);
	327	swapbytes = (flags & UTF_REVERSE_ENDIAN);
1c79356b A	328
	329	bufstart = ucsp;
	330	bufend = (u_int16_t )((u_int8_t )ucsp + buflen);
	331
	332	while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
765c9de3 A	333	if (ucsp >= bufend)
765c9de3 A	334	goto toolong;
1c79356b A	335
	336	/* check for ascii */
	337	if (byte < 0x80) {
55e303ae	338	ucs_ch = byte; /* 1st byte */
1c79356b	339	} else {
765c9de3 A	340	u_int32_t ch;
	341	int extrabytes = utf_extrabytes[byte >> 3];
	342
	343	if (utf8len < extrabytes)
	344	goto invalid;
	345	utf8len -= extrabytes;
	346
	347	switch (extrabytes) {
55e303ae A	348	case 1:
	349	ch = byte; ch <<= 6; /* 1st byte */
	350	byte = utf8p++; / 2nd byte */
	351	if ((byte >> 6) != 2)
	352	goto invalid;
	353	ch += byte;
	354	ch -= 0x00003080UL;
	355	if (ch < 0x0080)
	356	goto invalid;
	357	ucs_ch = ch;
765c9de3	358	break;
55e303ae A	359	case 2:
	360	ch = byte; ch <<= 6; /* 1st byte */
	361	byte = utf8p++; / 2nd byte */
	362	if ((byte >> 6) != 2)
	363	goto invalid;
	364	ch += byte; ch <<= 6;
	365	byte = utf8p++; / 3rd byte */
	366	if ((byte >> 6) != 2)
	367	goto invalid;
	368	ch += byte;
	369	ch -= 0x000E2080UL;
	370	if (ch < 0x0800)
	371	goto invalid;
	372	if (ch >= 0xD800) {
	373	if (ch <= 0xDFFF)
765c9de3	374	goto invalid;
55e303ae A	375	if (ch == 0xFFFE \|\| ch == 0xFFFF)
	376	goto invalid;
	377	}
	378	ucs_ch = ch;
	379	break;
	380	case 3:
	381	ch = byte; ch <<= 6; /* 1st byte */
	382	byte = utf8p++; / 2nd byte */
	383	if ((byte >> 6) != 2)
	384	goto invalid;
	385	ch += byte; ch <<= 6;
	386	byte = utf8p++; / 3rd byte */
	387	if ((byte >> 6) != 2)
	388	goto invalid;
	389	ch += byte; ch <<= 6;
	390	byte = utf8p++; / 4th byte */
	391	if ((byte >> 6) != 2)
	392	goto invalid;
	393	ch += byte;
	394	ch -= 0x03C82080UL + SP_HALF_BASE;
	395	ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
	396	if (ucs_ch < SP_HIGH_FIRST \|\| ucs_ch > SP_HIGH_LAST)
	397	goto invalid;
	398	*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
	399	if (ucsp >= bufend)
	400	goto toolong;
	401	ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
	402	if (ucs_ch < SP_LOW_FIRST \|\| ucs_ch > SP_LOW_LAST)
	403	goto invalid;
	404	*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
765c9de3	405	continue;
1c79356b	406	default:
55e303ae	407	goto invalid;
1c79356b	408	}
1c79356b	409	if (decompose) {
9bccf70c A	410	if (unicode_decomposeable(ucs_ch)) {
	411	u_int16_t sequence[8];
	412	int count, i;
1c79356b	413
9bccf70c	414	count = unicode_decompose(ucs_ch, sequence);
1c79356b	415
9bccf70c A	416	for (i = 0; i < count; ++i) {
9bccf70c A	417	ucs_ch = sequence[i];
0b4e3aa0	418	*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
765c9de3 A	419	if (ucsp >= bufend)
765c9de3 A	420	goto toolong;
0b4e3aa0	421	}
9bccf70c	422	continue;
0b4e3aa0 A	423	}
	424	} else if (precompose && (ucsp != bufstart)) {
	425	u_int16_t composite, base;
	426
9bccf70c A	427	if (unicode_combinable(ucs_ch)) {
	428	base = swapbytes ? NXSwapShort((ucsp - 1)) : (ucsp - 1);
	429	composite = unicode_combine(base, ucs_ch);
	430	if (composite) {
	431	--ucsp;
	432	ucs_ch = composite;
	433	}
1c79356b A	434	}
1c79356b A	435	}
0b4e3aa0 A	436	if (ucs_ch == UCS_ALT_NULL)
0b4e3aa0 A	437	ucs_ch = '\0';
1c79356b	438	}
1c79356b A	439	if (ucs_ch == altslash)
1c79356b A	440	ucs_ch = '/';
1c79356b	441
765c9de3	442	*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
1c79356b	443	}
765c9de3 A	444
765c9de3 A	445	exit:
1c79356b A	446	ucslen = (u_int8_t)ucsp - (u_int8_t*)bufstart;
	447
	448	return (result);
765c9de3 A	449
	450	invalid:
	451	result = EINVAL;
	452	goto exit;
	453
	454	toolong:
	455	result = ENAMETOOLONG;
	456	goto exit;
1c79356b A	457	}
	458
	459
9bccf70c A	460	/*
	461	* Unicode 3.2 decomposition code (derived from Core Foundation)
	462	*/
1c79356b	463
9bccf70c A	464	typedef struct {
	465	u_int32_t _key;
	466	u_int32_t _value;
	467	} unicode_mappings32;
0b4e3aa0	468
9bccf70c A	469	static inline u_int32_t
	470	getmappedvalue32(const unicode_mappings32 *theTable, u_int32_t numElem,
	471	u_int16_t character)
	472	{
	473	const unicode_mappings32 p, q, *divider;
1c79356b	474
9bccf70c A	475	if ((character < theTable[0]._key) \|\| (character > theTable[numElem-1]._key))
9bccf70c A	476	return (0);
1c79356b	477
9bccf70c A	478	p = theTable;
	479	q = p + (numElem-1);
	480	while (p <= q) {
	481	divider = p + ((q - p) >> 1); /* divide by 2 */
	482	if (character < divider->_key) { q = divider - 1; }
	483	else if (character > divider->_key) { p = divider + 1; }
	484	else { return (divider->_value); }
	485	}
	486	return (0);
	487	}
1c79356b	488
9bccf70c A	489	#define RECURSIVE_DECOMPOSITION (1 << 15)
9bccf70c A	490	#define EXTRACT_COUNT(value) (((value) >> 12) & 0x0007)
1c79356b	491
9bccf70c A	492	typedef struct {
	493	u_int16_t _key;
	494	u_int16_t _value;
	495	} unicode_mappings16;
1c79356b	496
9bccf70c A	497	static inline u_int16_t
	498	getmappedvalue16(const unicode_mappings16 *theTable, u_int32_t numElem,
	499	u_int16_t character)
	500	{
	501	const unicode_mappings16 p, q, *divider;
1c79356b	502
9bccf70c A	503	if ((character < theTable[0]._key) \|\| (character > theTable[numElem-1]._key))
9bccf70c A	504	return (0);
1c79356b	505
9bccf70c A	506	p = theTable;
	507	q = p + (numElem-1);
	508	while (p <= q) {
	509	divider = p + ((q - p) >> 1); /* divide by 2 */
	510	if (character < divider->_key)
	511	q = divider - 1;
	512	else if (character > divider->_key)
	513	p = divider + 1;
	514	else
	515	return (divider->_value);
	516	}
	517	return (0);
	518	}
	519
	520
	521	static u_int32_t
	522	unicode_recursive_decompose(u_int16_t character, u_int16_t *convertedChars)
	523	{
	524	u_int16_t value;
	525	u_int32_t length;
	526	u_int16_t firstChar;
	527	u_int16_t theChar;
	528	const u_int16_t *bmpMappings;
	529	u_int32_t usedLength;
	530
	531	value = getmappedvalue16(
	532	(const unicode_mappings16 *)__CFUniCharDecompositionTable,
	533	__UniCharDecompositionTableLength, character);
	534	length = EXTRACT_COUNT(value);
	535	firstChar = value & 0x0FFF;
	536	theChar = firstChar;
	537	bmpMappings = (length == 1 ? &theChar : __CFUniCharMultipleDecompositionTable + firstChar);
	538	usedLength = 0;
	539
	540	if (value & RECURSIVE_DECOMPOSITION) {
	541	usedLength = unicode_recursive_decompose((u_int16_t)*bmpMappings, convertedChars);
	542
	543	--length; /* Decrement for the first char */
	544	if (!usedLength)
	545	return 0;
	546	++bmpMappings;
	547	convertedChars += usedLength;
	548	}
0b4e3aa0	549
9bccf70c	550	usedLength += length;
0b4e3aa0	551
9bccf70c A	552	while (length--)
9bccf70c A	553	(convertedChars++) = (bmpMappings++);
0b4e3aa0	554
9bccf70c A	555	return (usedLength);
	556	}
	557
	558	#define HANGUL_SBASE 0xAC00
	559	#define HANGUL_LBASE 0x1100
	560	#define HANGUL_VBASE 0x1161
	561	#define HANGUL_TBASE 0x11A7
	562
	563	#define HANGUL_SCOUNT 11172
	564	#define HANGUL_LCOUNT 19
	565	#define HANGUL_VCOUNT 21
	566	#define HANGUL_TCOUNT 28
	567	#define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT)
1c79356b A	568
1c79356b A	569	/*
9bccf70c	570	* unicode_decompose - decompose a composed Unicode char
1c79356b A	571	*
	572	* Composed Unicode characters are forbidden on
	573	* HFS Plus volumes. ucs_decompose will convert a
	574	* composed character into its correct decomposed
	575	* sequence.
	576	*
9bccf70c	577	* Similar to CFUniCharDecomposeCharacter
1c79356b	578	*/
9bccf70c A	579	static int
9bccf70c A	580	unicode_decompose(u_int16_t character, u_int16_t *convertedChars)
1c79356b	581	{
9bccf70c A	582	if ((character >= HANGUL_SBASE) &&
	583	(character <= (HANGUL_SBASE + HANGUL_SCOUNT))) {
	584	u_int32_t length;
	585
	586	character -= HANGUL_SBASE;
	587	length = (character % HANGUL_TCOUNT ? 3 : 2);
	588
	589	*(convertedChars++) =
	590	character / HANGUL_NCOUNT + HANGUL_LBASE;
	591	*(convertedChars++) =
	592	(character % HANGUL_NCOUNT) / HANGUL_TCOUNT + HANGUL_VBASE;
	593	if (length > 2)
	594	*convertedChars = (character % HANGUL_TCOUNT) + HANGUL_TBASE;
	595	return (length);
1c79356b	596	} else {
9bccf70c	597	return (unicode_recursive_decompose(character, convertedChars));
1c79356b	598	}
1c79356b A	599	}
1c79356b A	600
0b4e3aa0	601	/*
9bccf70c	602	* unicode_combine - generate a precomposed Unicode char
0b4e3aa0 A	603	*
0b4e3aa0 A	604	* Precomposed Unicode characters are required for some volume
9bccf70c A	605	* formats and network protocols. unicode_combine will combine
9bccf70c A	606	* a decomposed character sequence into a single precomposed
0b4e3aa0 A	607	* (composite) character.
0b4e3aa0 A	608	*
9bccf70c A	609	* Similar toCFUniCharPrecomposeCharacter but unicode_combine
9bccf70c A	610	* also handles Hangul Jamo characters.
0b4e3aa0 A	611	*/
0b4e3aa0 A	612	static u_int16_t
9bccf70c	613	unicode_combine(u_int16_t base, u_int16_t combining)
0b4e3aa0	614	{
9bccf70c A	615	u_int32_t value;
	616
	617	/* Check HANGUL */
	618	if ((combining >= HANGUL_VBASE) && (combining < (HANGUL_TBASE + HANGUL_TCOUNT))) {
	619	/* 2 char Hangul sequences */
	620	if ((combining < (HANGUL_VBASE + HANGUL_VCOUNT)) &&
	621	(base >= HANGUL_LBASE && base < (HANGUL_LBASE + HANGUL_LCOUNT))) {
	622	return (HANGUL_SBASE +
	623	((base - HANGUL_LBASE)(HANGUL_VCOUNTHANGUL_TCOUNT)) +
	624	((combining - HANGUL_VBASE)*HANGUL_TCOUNT));
0b4e3aa0	625	}
9bccf70c A	626
	627	/* 3 char Hangul sequences */
	628	if ((combining > HANGUL_TBASE) &&
	629	(base >= HANGUL_SBASE && base < (HANGUL_SBASE + HANGUL_SCOUNT))) {
	630	if ((base - HANGUL_SBASE) % HANGUL_TCOUNT)
	631	return (0);
	632	else
	633	return (base + (combining - HANGUL_TBASE));
0b4e3aa0	634	}
0b4e3aa0 A	635	}
0b4e3aa0 A	636
9bccf70c A	637	value = getmappedvalue32(
	638	(const unicode_mappings32 *)__CFUniCharPrecompSourceTable,
	639	__CFUniCharPrecompositionTableLength, combining);
0b4e3aa0	640
9bccf70c A	641	if (value) {
	642	value = getmappedvalue16(
	643	(const unicode_mappings16 *)
	644	((u_int32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)),
	645	(value >> 16), base);
0b4e3aa0	646	}
9bccf70c	647	return (value);
0b4e3aa0 A	648	}
0b4e3aa0 A	649