*
* @APPLE_LICENSE_HEADER_START@
*
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License"). You may not use this file except in compliance with the
+ * License. Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
*
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
*
* @APPLE_LICENSE_HEADER_END@
*/
#include <sys/param.h>
#include <sys/utfconv.h>
#include <sys/errno.h>
-#include <architecture/byte_order.h>
+#include <libkern/OSByteOrder.h>
/*
* UTF-8 (Unicode Transformation Format)
return (0);
}
+
+/*
+ * Get the combing class.
+ *
+ * Similar to CFUniCharGetCombiningPropertyForCharacter.
+ */
+static inline u_int8_t
+get_combining_class(u_int16_t character) {
+ const u_int8_t *bitmap = __CFUniCharCombiningPropertyBitmap;
+
+ u_int8_t value = bitmap[(character >> 8)];
+
+ if (value) {
+ bitmap = bitmap + (value * 256);
+ return bitmap[character % 256];
+ }
+ return (0);
+}
+
+
static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);
static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);
+static void priortysort(u_int16_t* characters, int count);
char utf_extrabytes[32] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
ucs_ch = *ucsp++;
if (swapbytes)
- ucs_ch = NXSwapShort(ucs_ch);
+ ucs_ch = OSSwapInt16(ucs_ch);
if (ucs_ch == '/')
ucs_ch = altslash ? altslash : '_';
else if (ucs_ch == '\0')
--extra;
ucs_ch = *chp++;
} else {
- ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;
+ ucs_ch = swapbytes ? OSSwapInt16(*ucsp++) : *ucsp++;
if (decompose && unicode_decomposeable(ucs_ch)) {
extra = unicode_decompose(ucs_ch, sequence) - 1;
u_int16_t ch2;
u_int32_t pair;
- ch2 = swapbytes ? NXSwapShort(*ucsp) : *ucsp;
+ ch2 = swapbytes ? OSSwapInt16(*ucsp) : *ucsp;
if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
+ (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
u_int16_t* bufend;
unsigned int ucs_ch;
unsigned int byte;
+ int combcharcnt = 0;
int result = 0;
int decompose, precompose, swapbytes;
ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
goto invalid;
- *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+ *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
if (ucsp >= bufend)
goto toolong;
ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
goto invalid;
- *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+ *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
continue;
default:
goto invalid;
for (i = 0; i < count; ++i) {
ucs_ch = sequence[i];
- *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+ *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
if (ucsp >= bufend)
goto toolong;
}
+ combcharcnt += count - 1;
continue;
}
} else if (precompose && (ucsp != bufstart)) {
u_int16_t composite, base;
if (unicode_combinable(ucs_ch)) {
- base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1);
+ base = swapbytes ? OSSwapInt16(*(ucsp - 1)) : *(ucsp - 1);
composite = unicode_combine(base, ucs_ch);
if (composite) {
--ucsp;
if (ucs_ch == altslash)
ucs_ch = '/';
- *ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+ /*
+ * Make multiple combining character sequences canonical
+ */
+ if (unicode_combinable(ucs_ch)) {
+ ++combcharcnt; /* start tracking a run */
+ } else if (combcharcnt) {
+ if (combcharcnt > 1) {
+ priortysort(ucsp - combcharcnt, combcharcnt);
+ }
+ combcharcnt = 0; /* start over */
+ }
+ *ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
+ }
+ /*
+ * Make a previous combining sequence canonical
+ */
+ if (combcharcnt > 1) {
+ priortysort(ucsp - combcharcnt, combcharcnt);
}
exit:
}
+/*
+ * utf8_validatestr - Check for a valid UTF-8 string.
+ */
+int
+utf8_validatestr(const u_int8_t* utf8p, size_t utf8len)
+{
+ unsigned int byte;
+ u_int32_t ch;
+ unsigned int ucs_ch;
+ size_t extrabytes;
+
+ while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
+ if (byte < 0x80)
+ continue; /* plain ascii */
+
+ extrabytes = utf_extrabytes[byte >> 3];
+
+ if (utf8len < extrabytes)
+ goto invalid;
+ utf8len -= extrabytes;
+
+ switch (extrabytes) {
+ case 1:
+ ch = byte; ch <<= 6; /* 1st byte */
+ byte = *utf8p++; /* 2nd byte */
+ if ((byte >> 6) != 2)
+ goto invalid;
+ ch += byte;
+ ch -= 0x00003080UL;
+ if (ch < 0x0080)
+ goto invalid;
+ break;
+ case 2:
+ ch = byte; ch <<= 6; /* 1st byte */
+ byte = *utf8p++; /* 2nd byte */
+ if ((byte >> 6) != 2)
+ goto invalid;
+ ch += byte; ch <<= 6;
+ byte = *utf8p++; /* 3rd byte */
+ if ((byte >> 6) != 2)
+ goto invalid;
+ ch += byte;
+ ch -= 0x000E2080UL;
+ if (ch < 0x0800)
+ goto invalid;
+ if (ch >= 0xD800) {
+ if (ch <= 0xDFFF)
+ goto invalid;
+ if (ch == 0xFFFE || ch == 0xFFFF)
+ goto invalid;
+ }
+ break;
+ case 3:
+ ch = byte; ch <<= 6; /* 1st byte */
+ byte = *utf8p++; /* 2nd byte */
+ if ((byte >> 6) != 2)
+ goto invalid;
+ ch += byte; ch <<= 6;
+ byte = *utf8p++; /* 3rd byte */
+ if ((byte >> 6) != 2)
+ goto invalid;
+ ch += byte; ch <<= 6;
+ byte = *utf8p++; /* 4th byte */
+ if ((byte >> 6) != 2)
+ goto invalid;
+ ch += byte;
+ ch -= 0x03C82080UL + SP_HALF_BASE;
+ ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
+ if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
+ goto invalid;
+ ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
+ if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
+ goto invalid;
+ break;
+ default:
+ goto invalid;
+ }
+
+ }
+ return (0);
+invalid:
+ return (EINVAL);
+}
+
+
/*
* Unicode 3.2 decomposition code (derived from Core Foundation)
*/
return (value);
}
+
+/*
+ * priortysort - order combining chars into canonical order
+ *
+ * Similar to CFUniCharPrioritySort
+ */
+static void
+priortysort(u_int16_t* characters, int count)
+{
+ u_int32_t p1, p2;
+ u_int16_t *ch1, *ch2;
+ u_int16_t *end;
+ int changes = 1;
+
+ end = characters + count;
+ do {
+ changes = 0;
+ ch1 = characters;
+ ch2 = characters + 1;
+ p2 = get_combining_class(*ch1);
+ while (ch2 < end) {
+ p1 = p2;
+ p2 = get_combining_class(*ch2);
+ if (p1 > p2) {
+ u_int32_t tmp;
+
+ tmp = *ch1;
+ *ch1 = *ch2;
+ *ch2 = tmp;
+ changes = 1;
+ }
+ ++ch1;
+ ++ch2;
+ }
+ } while (changes);
+}