X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/55e303ae13a4cf49d70f2294092726f2fffb9ef2..c0fea4742e91338fffdcf79f86a7c1d5e2b97eb1:/bsd/vfs/vfs_utfconv.c

diff --git a/bsd/vfs/vfs_utfconv.c b/bsd/vfs/vfs_utfconv.c
index 45e2b0c7e..97b08226f 100644
--- a/bsd/vfs/vfs_utfconv.c
+++ b/bsd/vfs/vfs_utfconv.c
@@ -3,22 +3,19 @@
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
  * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
@@ -30,7 +27,7 @@
 #include <sys/param.h>
 #include <sys/utfconv.h>
 #include <sys/errno.h>
-#include <architecture/byte_order.h>
+#include <libkern/OSByteOrder.h>
 
 /*
  * UTF-8 (Unicode Transformation Format)
@@ -120,10 +117,31 @@ unicode_decomposeable(u_int16_t character) {
     	return (0);
 }
 
+
+/*
+ * Get the combing class.
+ *
+ * Similar to CFUniCharGetCombiningPropertyForCharacter.
+ */
+static inline u_int8_t
+get_combining_class(u_int16_t character) {
+	const u_int8_t *bitmap = __CFUniCharCombiningPropertyBitmap;
+
+	u_int8_t value = bitmap[(character >> 8)];
+
+	if (value) {
+		bitmap = bitmap + (value * 256);
+		return bitmap[character % 256];
+	}
+	return (0);
+}
+
+
 static int unicode_decompose(u_int16_t character, u_int16_t *convertedChars);
 
 static u_int16_t unicode_combine(u_int16_t base, u_int16_t combining);
 
+static void priortysort(u_int16_t* characters, int count);
 
 char utf_extrabytes[32] = {
 	 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -157,7 +175,7 @@ utf8_encodelen(const u_int16_t * ucsp, size_t ucslen, u_int16_t altslash,
 		ucs_ch = *ucsp++;
 
 		if (swapbytes)
-			ucs_ch = NXSwapShort(ucs_ch);
+			ucs_ch = OSSwapInt16(ucs_ch);
 		if (ucs_ch == '/')
 			ucs_ch = altslash ? altslash : '_';
 		else if (ucs_ch == '\0')
@@ -214,7 +232,7 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
 			--extra;
 			ucs_ch = *chp++;
 		} else {
-			ucs_ch = swapbytes ? NXSwapShort(*ucsp++) : *ucsp++;
+			ucs_ch = swapbytes ? OSSwapInt16(*ucsp++) : *ucsp++;
 
 			if (decompose && unicode_decomposeable(ucs_ch)) {
 				extra = unicode_decompose(ucs_ch, sequence) - 1;
@@ -258,7 +276,7 @@ utf8_encodestr(const u_int16_t * ucsp, size_t ucslen, u_int8_t * utf8p,
 				u_int16_t ch2;
 				u_int32_t pair;
 
-				ch2 = swapbytes ? NXSwapShort(*ucsp) : *ucsp;
+				ch2 = swapbytes ? OSSwapInt16(*ucsp) : *ucsp;
 				if (ch2 >= SP_LOW_FIRST && ch2 <= SP_LOW_LAST) {
 					pair = ((ucs_ch - SP_HIGH_FIRST) << SP_HALF_SHIFT)
 						+ (ch2 - SP_LOW_FIRST) + SP_HALF_BASE;
@@ -319,6 +337,7 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
 	u_int16_t* bufend;
 	unsigned int ucs_ch;
 	unsigned int byte;
+	int combcharcnt = 0;
 	int result = 0;
 	int decompose, precompose, swapbytes;
 
@@ -395,13 +414,13 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
 				ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
 				if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
 					goto invalid;
-				*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+				*ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
 				if (ucsp >= bufend)
 					goto toolong;
 				ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
 				if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
 					goto invalid;
-				*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+				*ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
 			        continue;
 			default:
 				goto invalid;
@@ -415,17 +434,18 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
 
 					for (i = 0; i < count; ++i) {
 						ucs_ch = sequence[i];
-						*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+						*ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
 						if (ucsp >= bufend)
 							goto toolong;
 					}
+					combcharcnt += count - 1;
 					continue;			
 				}
 			} else if (precompose && (ucsp != bufstart)) {
 				u_int16_t composite, base;
 
 				if (unicode_combinable(ucs_ch)) {
-					base = swapbytes ? NXSwapShort(*(ucsp - 1)) : *(ucsp - 1);
+					base = swapbytes ? OSSwapInt16(*(ucsp - 1)) : *(ucsp - 1);
 					composite = unicode_combine(base, ucs_ch);
 					if (composite) {
 						--ucsp;
@@ -439,7 +459,24 @@ utf8_decodestr(const u_int8_t* utf8p, size_t utf8len, u_int16_t* ucsp,
 		if (ucs_ch == altslash)
 			ucs_ch = '/';
 
-		*ucsp++ = swapbytes ? NXSwapShort(ucs_ch) : ucs_ch;
+		/*
+		 * Make multiple combining character sequences canonical
+		 */
+		if (unicode_combinable(ucs_ch)) {
+			++combcharcnt;   /* start tracking a run */
+		} else if (combcharcnt) {
+			if (combcharcnt > 1) {
+				priortysort(ucsp - combcharcnt, combcharcnt);
+			}
+			combcharcnt = 0;  /* start over */
+		}
+		*ucsp++ = swapbytes ? OSSwapInt16(ucs_ch) : ucs_ch;
+	}
+	/*
+	 * Make a previous combining sequence canonical
+	 */
+	if (combcharcnt > 1) {
+		priortysort(ucsp - combcharcnt, combcharcnt);
 	}
 
 exit:
@@ -457,6 +494,91 @@ toolong:
 }
 
 
+/*
+ * utf8_validatestr - Check for a valid UTF-8 string.
+ */
+int
+utf8_validatestr(const u_int8_t* utf8p, size_t utf8len)
+{
+	unsigned int byte;
+	u_int32_t ch;
+	unsigned int ucs_ch;
+	size_t extrabytes;
+
+	while (utf8len-- > 0 && (byte = *utf8p++) != '\0') {
+		if (byte < 0x80)
+			continue;  /* plain ascii */
+
+		extrabytes = utf_extrabytes[byte >> 3];
+
+		if (utf8len < extrabytes)
+			goto invalid;
+		utf8len -= extrabytes;
+
+		switch (extrabytes) {
+		case 1:
+			ch = byte; ch <<= 6;   /* 1st byte */
+			byte = *utf8p++;       /* 2nd byte */
+			if ((byte >> 6) != 2)
+				goto invalid;
+			ch += byte;
+			ch -= 0x00003080UL;
+			if (ch < 0x0080)
+				goto invalid;
+			break;
+		case 2:
+			ch = byte; ch <<= 6;   /* 1st byte */
+			byte = *utf8p++;       /* 2nd byte */
+			if ((byte >> 6) != 2)
+				goto invalid;
+			ch += byte; ch <<= 6;
+			byte = *utf8p++;       /* 3rd byte */
+			if ((byte >> 6) != 2)
+				goto invalid;
+			ch += byte;
+			ch -= 0x000E2080UL;
+			if (ch < 0x0800)
+				goto invalid;
+			if (ch >= 0xD800) {
+				if (ch <= 0xDFFF)
+					goto invalid;
+				if (ch == 0xFFFE || ch == 0xFFFF)
+					goto invalid;
+			}
+			break;
+		case 3:
+			ch = byte; ch <<= 6;   /* 1st byte */
+			byte = *utf8p++;       /* 2nd byte */
+			if ((byte >> 6) != 2)
+				goto invalid;
+			ch += byte; ch <<= 6;
+			byte = *utf8p++;       /* 3rd byte */
+			if ((byte >> 6) != 2)
+				goto invalid;
+			ch += byte; ch <<= 6;
+			byte = *utf8p++;       /* 4th byte */
+			if ((byte >> 6) != 2)
+				goto invalid;
+			ch += byte;
+			ch -= 0x03C82080UL + SP_HALF_BASE;
+			ucs_ch = (ch >> SP_HALF_SHIFT) + SP_HIGH_FIRST;
+			if (ucs_ch < SP_HIGH_FIRST || ucs_ch > SP_HIGH_LAST)
+				goto invalid;
+			ucs_ch = (ch & SP_HALF_MASK) + SP_LOW_FIRST;
+			if (ucs_ch < SP_LOW_FIRST || ucs_ch > SP_LOW_LAST)
+				goto invalid;
+			break;
+		default:
+			goto invalid;
+		}
+		
+	}
+	return (0);
+invalid:
+	return (EINVAL);
+}
+
+
  /*
   * Unicode 3.2 decomposition code (derived from Core Foundation)
   */
@@ -647,3 +769,39 @@ unicode_combine(u_int16_t base, u_int16_t combining)
 	return (value);
 }
 
+
+/*
+ * priortysort - order combining chars into canonical order
+ *
+ * Similar to CFUniCharPrioritySort
+ */
+static void
+priortysort(u_int16_t* characters, int count)
+{
+	u_int32_t p1, p2;
+	u_int16_t *ch1, *ch2;
+	u_int16_t *end;
+	int changes = 1;
+
+	end = characters + count;
+	do {
+		changes = 0;
+		ch1 = characters;
+		ch2 = characters + 1;
+		p2 = get_combining_class(*ch1);
+		while (ch2 < end) {
+			p1 = p2;
+			p2 = get_combining_class(*ch2);
+			if (p1 > p2) {
+				u_int32_t tmp;
+
+				tmp = *ch1;
+				*ch1 = *ch2;
+				*ch2 = tmp;
+				changes = 1;
+			}
+			++ch1;
+			++ch2;
+		}
+	} while (changes);
+}