X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/3e170ce000f1506b7b5d2c5c7faec85ceabb573d..ea3f04195ba4a5034c9c8e9b726d4f7ce96f1832:/bsd/vfs/vfs_utfconv.c?ds=sidebyside diff --git a/bsd/vfs/vfs_utfconv.c b/bsd/vfs/vfs_utfconv.c index 8639edc99..48f21532a 100644 --- a/bsd/vfs/vfs_utfconv.c +++ b/bsd/vfs/vfs_utfconv.c @@ -2,7 +2,7 @@ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,12 +22,12 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ - - /* - Includes Unicode 3.2 decomposition code derived from Core Foundation + +/* + * Includes Unicode 3.2 decomposition code derived from Core Foundation */ #include @@ -36,13 +36,19 @@ #include #include +#if defined(KERNEL) && !defined(VFS_UTF8_UNIT_TEST) +#include +#else +#include +#endif + /* * UTF-8 (Unicode Transformation Format) * * UTF-8 is the Unicode Transformation Format that serializes a Unicode * character as a sequence of one to four bytes. Only the shortest form * required to represent the significant Unicode bits is legal. - * + * * UTF-8 Multibyte Codes * * Bytes Bits Unicode Min Unicode Max UTF-8 Byte Sequence (binary) @@ -58,17 +64,17 @@ #define UNICODE_TO_UTF8_LEN(c) \ ((c) < 0x0080 ? 1 : ((c) < 0x0800 ? 2 : (((c) & 0xf800) == 0xd800 ? 2 : 3))) -#define UCS_ALT_NULL 0x2400 +#define UCS_ALT_NULL 0x2400 /* Surrogate Pair Constants */ -#define SP_HALF_SHIFT 10 -#define SP_HALF_BASE 0x0010000u -#define SP_HALF_MASK 0x3FFu +#define SP_HALF_SHIFT 10 +#define SP_HALF_BASE 0x0010000u +#define SP_HALF_MASK 0x3FFu -#define SP_HIGH_FIRST 0xD800u -#define SP_HIGH_LAST 0xDBFFu -#define SP_LOW_FIRST 0xDC00u -#define SP_LOW_LAST 0xDFFFu +#define SP_HIGH_FIRST 0xD800u +#define SP_HIGH_LAST 0xDBFFu +#define SP_LOW_FIRST 0xDC00u +#define SP_LOW_LAST 0xDFFFu #include "vfs_utfconvdata.h" @@ -86,18 +92,19 @@ unicode_combinable(u_int16_t character) const u_int8_t *bitmap = __CFUniCharCombiningBitmap; u_int8_t value; - if (character < 0x0300) - return (0); + if (character < 0x0300) { + return 0; + } value = bitmap[(character >> 8) & 0xFF]; if (value == 0xFF) { - return (1); + return 1; } else if (value) { bitmap = bitmap + ((value - 1) * 32) + 256; - return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0); + return bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0; } - return (0); + return 0; } /* @@ -106,22 +113,24 @@ unicode_combinable(u_int16_t character) * Similar to __CFUniCharIsDecomposableCharacter. */ int -unicode_decomposeable(u_int16_t character) { +unicode_decomposeable(u_int16_t character) +{ const u_int8_t *bitmap = __CFUniCharDecomposableBitmap; u_int8_t value; - - if (character < 0x00C0) - return (0); + + if (character < 0x00C0) { + return 0; + } value = bitmap[(character >> 8) & 0xFF]; if (value == 0xFF) { - return (1); + return 1; } else if (value) { bitmap = bitmap + ((value - 1) * 32) + 256; - return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0); + return bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? 1 : 0; } - return (0); + return 0; } @@ -131,7 +140,8 @@ unicode_decomposeable(u_int16_t character) { * Similar to CFUniCharGetCombiningPropertyForCharacter. */ static inline u_int8_t -get_combining_class(u_int16_t character) { +get_combining_class(u_int16_t character) +{ const u_int8_t *bitmap = __CFUniCharCombiningPropertyBitmap; u_int8_t value = bitmap[(character >> 8)]; @@ -140,7 +150,7 @@ get_combining_class(u_int16_t character) { bitmap = bitmap + (value * 256); return bitmap[character % 256]; } - return (0); + return 0; } @@ -156,13 +166,13 @@ static u_int16_t sfm_to_ucs(u_int16_t ucs_ch); char utf_extrabytes[32] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 2, 2, 3, -1 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 2, 2, 3, -1 }; const char hexdigits[16] = { - '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; /* @@ -227,7 +237,7 @@ utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, int fl len += UNICODE_TO_UTF8_LEN(ucs_ch); } - return (len); + return len; } @@ -258,7 +268,7 @@ utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash, int fl */ int utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, - size_t * utf8len, size_t buflen, u_int16_t altslash, int flags) + size_t * utf8len, size_t buflen, u_int16_t altslash, int flags) { u_int8_t * bufstart; u_int8_t * bufend; @@ -275,8 +285,9 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, bufstart = utf8p; bufend = bufstart + buflen; - if (nullterm) + if (nullterm) { --bufend; + } charcnt = ucslen / 2; while (charcnt-- > 0) { @@ -296,9 +307,9 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, /* Slash and NULL are not permitted */ if (ucs_ch == '/') { - if (altslash) + if (altslash) { ucs_ch = altslash; - else { + } else { ucs_ch = '_'; result = EINVAL; } @@ -310,9 +321,8 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, if (utf8p >= bufend) { result = ENAMETOOLONG; break; - } + } *utf8p++ = ucs_ch; - } else if (ucs_ch < 0x800) { if ((utf8p + 1) >= bufend) { result = ENAMETOOLONG; @@ -320,7 +330,6 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, } *utf8p++ = 0xc0 | (ucs_ch >> 6); *utf8p++ = 0x80 | (0x3f & ucs_ch); - } else { /* These chars never valid Unicode. */ if (ucs_ch == 0xFFFE || ucs_ch == 0xFFFF) { @@ -330,20 +339,20 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, /* Combine valid surrogate pairs */ if (ucs_ch >= SP_HIGH_FIRST && ucs_ch <= SP_HIGH_LAST - && charcnt > 0) { + && charcnt > 0) { u_int16_t ch2; u_int32_t pair; ch2 = swapbytes ? OSSwapInt16(*ucsp) : *ucsp; if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) { pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT) - + (ch2 - SP_LOW_FIRST) + SP_HALF_BASE; + + (ch2 - SP_LOW_FIRST) + SP_HALF_BASE; if ((utf8p + 3) >= bufend) { result = ENAMETOOLONG; break; } --charcnt; - ++ucsp; + ++ucsp; *utf8p++ = 0xf0 | (pair >> 18); *utf8p++ = 0x80 | (0x3f & (pair >> 12)); *utf8p++ = 0x80 | (0x3f & (pair >> 6)); @@ -356,7 +365,7 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, if (utf8p >= bufend) { result = ENAMETOOLONG; break; - } + } *utf8p++ = ucs_ch; continue; } @@ -368,29 +377,31 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p, *utf8p++ = 0xe0 | (ucs_ch >> 12); *utf8p++ = 0x80 | (0x3f & (ucs_ch >> 6)); *utf8p++ = 0x80 | (0x3f & ucs_ch); - } + } } - + *utf8len = utf8p - bufstart; - if (nullterm) + if (nullterm) { *utf8p++ = '\0'; + } - return (result); + return result; } // Pushes a character taking account of combining character sequences -static void push(uint16_t ucs_ch, int *combcharcnt, uint16_t **ucsp) +static void +push(uint16_t ucs_ch, int *combcharcnt, uint16_t **ucsp) { /* * Make multiple combining character sequences canonical */ if (unicode_combinable(ucs_ch)) { - ++*combcharcnt; /* start tracking a run */ + ++*combcharcnt; /* start tracking a run */ } else if (*combcharcnt) { if (*combcharcnt > 1) { prioritysort(*ucsp - *combcharcnt, *combcharcnt); } - *combcharcnt = 0; /* start over */ + *combcharcnt = 0; /* start over */ } *(*ucsp)++ = ucs_ch; @@ -426,7 +437,7 @@ static void push(uint16_t ucs_ch, int *combcharcnt, uint16_t **ucsp) */ int utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, - size_t *ucslen, size_t buflen, u_int16_t altslash, int flags) + size_t *ucslen, size_t buflen, u_int16_t altslash, int flags) { u_int16_t* bufstart; u_int16_t* bufend; @@ -447,8 +458,9 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, bufend = (u_int16_t *)((u_int8_t *)ucsp + buflen); while (utf8len-- > 0 && (byte = *utf8p++) != '\0') { - if (ucsp >= bufend) + if (ucsp >= bufend) { goto toolong; + } /* check for ascii */ if (byte < 0x80) { @@ -466,63 +478,75 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, case 1: ch = byte; ch <<= 6; /* 1st byte */ byte = *utf8p++; /* 2nd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto escape2; + } ch += byte; ch -= 0x00003080UL; - if (ch < 0x0080) + if (ch < 0x0080) { goto escape2; + } ucs_ch = ch; - break; + break; case 2: ch = byte; ch <<= 6; /* 1st byte */ byte = *utf8p++; /* 2nd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto escape2; + } ch += byte; ch <<= 6; byte = *utf8p++; /* 3rd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto escape3; + } ch += byte; ch -= 0x000E2080UL; - if (ch < 0x0800) + if (ch < 0x0800) { goto escape3; + } if (ch >= 0xD800) { - if (ch <= 0xDFFF) + if (ch <= 0xDFFF) { goto escape3; - if (ch == 0xFFFE || ch == 0xFFFF) + } + if (ch == 0xFFFE || ch == 0xFFFF) { goto escape3; + } } ucs_ch = ch; break; case 3: ch = byte; ch <<= 6; /* 1st byte */ byte = *utf8p++; /* 2nd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto escape2; + } ch += byte; ch <<= 6; byte = *utf8p++; /* 3rd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto escape3; + } ch += byte; ch <<= 6; byte = *utf8p++; /* 4th byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto escape4; - ch += byte; + } + ch += byte; ch -= 0x03C82080UL + SP_HALF_BASE; ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST; - if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST) + if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST) { goto escape4; + } push(ucs_ch, &combcharcnt, &ucsp); - if (ucsp >= bufend) + if (ucsp >= bufend) { goto toolong; + } ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST; if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST) { --ucsp; goto escape4; } *ucsp++ = ucs_ch; - continue; + continue; default: result = EINVAL; goto exit; @@ -535,8 +559,9 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, count = unicode_decompose(ucs_ch, sequence); for (i = 0; i < count; ++i) { - if (ucsp >= bufend) + if (ucsp >= bufend) { goto toolong; + } push(sequence[i], &combcharcnt, &ucsp); } @@ -555,16 +580,18 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp, } } } - if (ucs_ch == UCS_ALT_NULL) + if (ucs_ch == UCS_ALT_NULL) { ucs_ch = '\0'; + } } - if (ucs_ch == altslash) + if (ucs_ch == altslash) { ucs_ch = '/'; + } push(ucs_ch, &combcharcnt, &ucsp); continue; - /* + /* * Escape illegal UTF-8 into something legal. */ escape4: @@ -580,19 +607,21 @@ escape: result = EINVAL; goto exit; } - if (extrabytes > 0) + if (extrabytes > 0) { utf8len += extrabytes; + } byte = *(utf8p - 1); - if ((ucsp + 2) >= bufend) + if ((ucsp + 2) >= bufend) { goto toolong; + } /* Make a previous combining sequence canonical. */ if (combcharcnt > 1) { prioritysort(ucsp - combcharcnt, combcharcnt); } combcharcnt = 0; - + ucs_ch = '%'; *ucsp++ = ucs_ch; ucs_ch = hexdigits[byte >> 4]; @@ -618,7 +647,7 @@ escape: exit: *ucslen = (u_int8_t*)ucsp - (u_int8_t*)bufstart; - return (result); + return result; toolong: result = ENAMETOOLONG; @@ -638,76 +667,88 @@ utf8_validatestr(const u_int8_t* utf8p, size_t utf8len) size_t extrabytes; while (utf8len-- > 0 && (byte = *utf8p++) != '\0') { - if (byte < 0x80) + if (byte < 0x80) { continue; /* plain ascii */ - + } extrabytes = utf_extrabytes[byte >> 3]; - if (utf8len < extrabytes) + if (utf8len < extrabytes) { goto invalid; + } utf8len -= extrabytes; switch (extrabytes) { case 1: ch = byte; ch <<= 6; /* 1st byte */ byte = *utf8p++; /* 2nd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto invalid; + } ch += byte; ch -= 0x00003080UL; - if (ch < 0x0080) + if (ch < 0x0080) { goto invalid; + } break; case 2: ch = byte; ch <<= 6; /* 1st byte */ byte = *utf8p++; /* 2nd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto invalid; + } ch += byte; ch <<= 6; byte = *utf8p++; /* 3rd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto invalid; + } ch += byte; ch -= 0x000E2080UL; - if (ch < 0x0800) + if (ch < 0x0800) { goto invalid; + } if (ch >= 0xD800) { - if (ch <= 0xDFFF) + if (ch <= 0xDFFF) { goto invalid; - if (ch == 0xFFFE || ch == 0xFFFF) + } + if (ch == 0xFFFE || ch == 0xFFFF) { goto invalid; + } } break; case 3: ch = byte; ch <<= 6; /* 1st byte */ byte = *utf8p++; /* 2nd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto invalid; + } ch += byte; ch <<= 6; byte = *utf8p++; /* 3rd byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto invalid; + } ch += byte; ch <<= 6; byte = *utf8p++; /* 4th byte */ - if ((byte >> 6) != 2) + if ((byte >> 6) != 2) { goto invalid; + } ch += byte; ch -= 0x03C82080UL + SP_HALF_BASE; ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST; - if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST) + if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST) { goto invalid; + } ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST; - if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST) + if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST) { goto invalid; + } break; default: goto invalid; } - } - return (0); + return 0; invalid: - return (EINVAL); + return EINVAL; } /* @@ -718,7 +759,7 @@ invalid: * pointed to by outstr. The size of the output in bytes (not including * a NULL termination byte) is returned in outlen. In-place conversions * are not supported (i.e. instr != outstr).] - + * * FLAGS * UTF_DECOMPOSED: output string will be fully decomposed (NFD) * @@ -735,7 +776,7 @@ invalid: */ int utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, - size_t *outlen, size_t buflen, int flags) + size_t *outlen, size_t buflen, int flags) { u_int16_t unicodebuf[32]; u_int16_t* unistr = NULL; @@ -749,12 +790,12 @@ utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, int result = 0; if (flags & ~(UTF_DECOMPOSED | UTF_PRECOMPOSED | UTF_NO_NULL_TERM | UTF_ESCAPE_ILLEGAL)) { - return (EINVAL); + return EINVAL; } decompose = (flags & UTF_DECOMPOSED); precompose = (flags & UTF_PRECOMPOSED); if ((decompose && precompose) || (!decompose && !precompose)) { - return (EINVAL); + return EINVAL; } outbufstart = outstr; outbufend = outbufstart + buflen; @@ -775,15 +816,16 @@ utf8_normalizestr(const u_int8_t* instr, size_t inlen, u_int8_t* outstr, exit: *outlen = outstr - outbufstart; if (((flags & UTF_NO_NULL_TERM) == 0)) { - if (outstr < outbufend) + if (outstr < outbufend) { *outstr++ = '\0'; - else + } else { result = ENAMETOOLONG; + } } - return (result); + return result; - /* + /* * Non-ASCII uses the existing utf8_encodestr/utf8_decodestr * functions to perform the normalization. Since this will * presumably be used to normalize filenames in the back-end @@ -806,18 +848,19 @@ nonASCII: */ unicode_bytes = precompose ? (inbuflen * 2) : (inbuflen * 3); - if (unicode_bytes <= sizeof(unicodebuf)) + if (unicode_bytes <= sizeof(unicodebuf)) { unistr = &unicodebuf[0]; - else + } else { MALLOC(unistr, uint16_t *, unicode_bytes, M_TEMP, M_WAITOK); + } /* Normalize the string. */ result = utf8_decodestr(inbufstart, inbuflen, unistr, &unicode_bytes, - unicode_bytes, 0, flags & ~UTF_NO_NULL_TERM); + unicode_bytes, 0, flags & ~UTF_NO_NULL_TERM); if (result == 0) { /* Put results back into UTF-8. */ result = utf8_encodestr(unistr, unicode_bytes, outbufstart, - &uft8_bytes, buflen, 0, UTF_NO_NULL_TERM); + &uft8_bytes, buflen, 0, UTF_NO_NULL_TERM); outstr = outbufstart + uft8_bytes; } if (unistr && unistr != &unicodebuf[0]) { @@ -827,9 +870,9 @@ nonASCII: } - /* - * Unicode 3.2 decomposition code (derived from Core Foundation) - */ +/* + * Unicode 3.2 decomposition code (derived from Core Foundation) + */ typedef struct { u_int32_t _key; @@ -838,26 +881,31 @@ typedef struct { static inline u_int32_t getmappedvalue32(const unicode_mappings32 *theTable, u_int32_t numElem, - u_int16_t character) + u_int16_t character) { const unicode_mappings32 *p, *q, *divider; - if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) - return (0); + if ((character < theTable[0]._key) || (character > theTable[numElem - 1]._key)) { + return 0; + } p = theTable; - q = p + (numElem-1); + q = p + (numElem - 1); while (p <= q) { - divider = p + ((q - p) >> 1); /* divide by 2 */ - if (character < divider->_key) { q = divider - 1; } - else if (character > divider->_key) { p = divider + 1; } - else { return (divider->_value); } + divider = p + ((q - p) >> 1); /* divide by 2 */ + if (character < divider->_key) { + q = divider - 1; + } else if (character > divider->_key) { + p = divider + 1; + } else { + return divider->_value; + } } - return (0); + return 0; } -#define RECURSIVE_DECOMPOSITION (1 << 15) -#define EXTRACT_COUNT(value) (((value) >> 12) & 0x0007) +#define RECURSIVE_DECOMPOSITION (1 << 15) +#define EXTRACT_COUNT(value) (((value) >> 12) & 0x0007) typedef struct { u_int16_t _key; @@ -866,25 +914,27 @@ typedef struct { static inline u_int16_t getmappedvalue16(const unicode_mappings16 *theTable, u_int32_t numElem, - u_int16_t character) + u_int16_t character) { const unicode_mappings16 *p, *q, *divider; - if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) - return (0); + if ((character < theTable[0]._key) || (character > theTable[numElem - 1]._key)) { + return 0; + } p = theTable; - q = p + (numElem-1); + q = p + (numElem - 1); while (p <= q) { - divider = p + ((q - p) >> 1); /* divide by 2 */ - if (character < divider->_key) + divider = p + ((q - p) >> 1); /* divide by 2 */ + if (character < divider->_key) { q = divider - 1; - else if (character > divider->_key) + } else if (character > divider->_key) { p = divider + 1; - else - return (divider->_value); + } else { + return divider->_value; + } } - return (0); + return 0; } @@ -908,23 +958,25 @@ unicode_recursive_decompose(u_int16_t character, u_int16_t *convertedChars) usedLength = 0; if (value & RECURSIVE_DECOMPOSITION) { - usedLength = unicode_recursive_decompose((u_int16_t)*bmpMappings, convertedChars); - - --length; /* Decrement for the first char */ - if (!usedLength) - return 0; - ++bmpMappings; - convertedChars += usedLength; + usedLength = unicode_recursive_decompose((u_int16_t)*bmpMappings, convertedChars); + + --length; /* Decrement for the first char */ + if (!usedLength) { + return 0; + } + ++bmpMappings; + convertedChars += usedLength; } - + usedLength += length; - - while (length--) + + while (length--) { *(convertedChars++) = *(bmpMappings++); - - return (usedLength); + } + + return usedLength; } - + #define HANGUL_SBASE 0xAC00 #define HANGUL_LBASE 0x1100 #define HANGUL_VBASE 0x1161 @@ -957,14 +1009,15 @@ unicode_decompose(u_int16_t character, u_int16_t *convertedChars) length = (character % HANGUL_TCOUNT ? 3 : 2); *(convertedChars++) = - character / HANGUL_NCOUNT + HANGUL_LBASE; + character / HANGUL_NCOUNT + HANGUL_LBASE; *(convertedChars++) = - (character % HANGUL_NCOUNT) / HANGUL_TCOUNT + HANGUL_VBASE; - if (length > 2) + (character % HANGUL_NCOUNT) / HANGUL_TCOUNT + HANGUL_VBASE; + if (length > 2) { *convertedChars = (character % HANGUL_TCOUNT) + HANGUL_TBASE; - return (length); + } + return length; } else { - return (unicode_recursive_decompose(character, convertedChars)); + return unicode_recursive_decompose(character, convertedChars); } } @@ -989,18 +1042,19 @@ unicode_combine(u_int16_t base, u_int16_t combining) /* 2 char Hangul sequences */ if ((combining < (HANGUL_VBASE + HANGUL_VCOUNT)) && (base >= HANGUL_LBASE && base < (HANGUL_LBASE + HANGUL_LCOUNT))) { - return (HANGUL_SBASE + - ((base - HANGUL_LBASE)*(HANGUL_VCOUNT*HANGUL_TCOUNT)) + - ((combining - HANGUL_VBASE)*HANGUL_TCOUNT)); + return HANGUL_SBASE + + ((base - HANGUL_LBASE) * (HANGUL_VCOUNT * HANGUL_TCOUNT)) + + ((combining - HANGUL_VBASE) * HANGUL_TCOUNT); } - + /* 3 char Hangul sequences */ if ((combining > HANGUL_TBASE) && (base >= HANGUL_SBASE && base < (HANGUL_SBASE + HANGUL_SCOUNT))) { - if ((base - HANGUL_SBASE) % HANGUL_TCOUNT) - return (0); - else - return (base + (combining - HANGUL_TBASE)); + if ((base - HANGUL_SBASE) % HANGUL_TCOUNT) { + return 0; + } else { + return base + (combining - HANGUL_TBASE); + } } } @@ -1014,7 +1068,7 @@ unicode_combine(u_int16_t base, u_int16_t combining) ((const u_int32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16), base); } - return (value); + return value; } @@ -1047,7 +1101,7 @@ prioritysort(u_int16_t* characters, int count) *ch1 = *ch2; *ch2 = tmp; changes = 1; - + /* * Make sure that p2 contains the combining class for the * character now stored at *ch2. This isn't required for @@ -1089,17 +1143,17 @@ prioritysort(u_int16_t* characters, int count) */ #define MAX_SFM2MAC 0x29 -#define SFMCODE_PREFIX_MASK 0xf000 +#define SFMCODE_PREFIX_MASK 0xf000 /* * In the Mac OS 9 days the colon was illegal in a file name. For that reason * SFM had no conversion for the colon. There is a conversion for the * slash. In Mac OS X the slash is illegal in a file name. So for us the colon * is a slash and a slash is a colon. So we can just replace the slash with the - * colon in our tables and everything will just work. + * colon in our tables and everything will just work. */ static u_int8_t -sfm2mac[42] = { + sfm2mac[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00 - 07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08 - 0F */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10 - 17 */ @@ -1107,10 +1161,11 @@ sfm2mac[42] = { 0x22, 0x2a, 0x3a, 0x3c, 0x3e, 0x3f, 0x5c, 0x7c, /* 20 - 27 */ 0x20, 0x2e /* 28 - 29 */ }; +#define SFM2MAC_LEN ((sizeof(sfm2mac))/sizeof(sfm2mac[0])) static u_int8_t -mac2sfm[112] = { - 0x20, 0x21, 0x20, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20 - 27 */ + mac2sfm[] = { + 0x20, 0x21, 0x20, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20 - 27 */ 0x28, 0x29, 0x21, 0x2b, 0x2c, 0x2d, 0x2e, 0x22, /* 28 - 2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30 - 37 */ 0x38, 0x39, 0x22, 0x3b, 0x23, 0x3d, 0x24, 0x25, /* 38 - 3f */ @@ -1123,6 +1178,7 @@ mac2sfm[112] = { 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70 - 77 */ 0x78, 0x79, 0x7a, 0x7b, 0x27, 0x7d, 0x7e, 0x7f /* 78 - 7f */ }; +#define MAC2SFM_LEN ((sizeof(mac2sfm))/sizeof(mac2sfm[0])) /* @@ -1135,22 +1191,25 @@ ucs_to_sfm(u_int16_t ucs_ch, int lastchar) { /* The last character of filename cannot be a space or period. */ if (lastchar) { - if (ucs_ch == 0x20) - return (0xf028); - else if (ucs_ch == 0x2e) - return (0xf029); + if (ucs_ch == 0x20) { + return 0xf028; + } else if (ucs_ch == 0x2e) { + return 0xf029; + } } /* 0x01 - 0x1f is simple transformation. */ if (ucs_ch <= 0x1f) { - return (ucs_ch | 0xf000); - } else /* 0x20 - 0x7f */ { + return ucs_ch | 0xf000; + } else { /* 0x20 - 0x7f */ u_int16_t lsb; + assert((ucs_ch - 0x0020) < MAC2SFM_LEN); lsb = mac2sfm[ucs_ch - 0x0020]; - if (lsb != ucs_ch) - return(0xf000 | lsb); + if (lsb != ucs_ch) { + return 0xf000 | lsb; + } } - return (ucs_ch); + return ucs_ch; } /* @@ -1159,11 +1218,10 @@ ucs_to_sfm(u_int16_t ucs_ch, int lastchar) static u_int16_t sfm_to_ucs(u_int16_t ucs_ch) { - if (((ucs_ch & 0xffC0) == SFMCODE_PREFIX_MASK) && + if (((ucs_ch & 0xffC0) == SFMCODE_PREFIX_MASK) && ((ucs_ch & 0x003f) <= MAX_SFM2MAC)) { + assert((ucs_ch & 0x003f) < SFM2MAC_LEN); ucs_ch = sfm2mac[ucs_ch & 0x003f]; } - return (ucs_ch); + return ucs_ch; } - -