X-Git-Url: https://git.saurik.com/apple/hfs.git/blobdiff_plain/de8ee0119e51802e7b74e261b628b6de53b34e6c..ccb1535577c019312b69b95a60bb75c8a3ee22a3:/core/UnicodeWrappers.c

diff --git a/core/UnicodeWrappers.c b/core/UnicodeWrappers.c
new file mode 100644
index 0000000..8e5b6e6
--- /dev/null
+++ b/core/UnicodeWrappers.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+	File:		UnicodeWrappers.c
+
+	Contains:	Wrapper routines for Unicode conversion and comparison.
+
+*/
+
+#include <sys/param.h>
+#include <sys/utfconv.h>
+
+#include "hfs_macos_defs.h"
+#include "UCStringCompareData.h"
+
+#include "FileMgrInternal.h"
+#include "HFSUnicodeWrappers.h"
+
+enum {
+	kMinFileExtensionChars = 1,	/* does not include dot */
+	kMaxFileExtensionChars = 5	/* does not include dot */
+};
+
+
+#define EXTENSIONCHAR(c)	(((c) >= 0x61 && (c) <= 0x7A) || \
+				 ((c) >= 0x41 && (c) <= 0x5A) || \
+				 ((c) >= 0x30 && (c) <= 0x39))
+
+
+#define IsHexDigit(c)		(((c) >= (u_int8_t) '0' && (c) <= (u_int8_t) '9') || \
+				 ((c) >= (u_int8_t) 'A' && (c) <= (u_int8_t) 'F'))
+
+
+static void	GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
+
+
+static u_int32_t	HexStringToInteger( u_int32_t length, const u_int8_t *hexStr );
+
+
+/*
+ * Get filename extension (if any) as a C string
+ */
+static void
+GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
+{
+	u_int32_t	i;
+	UniChar	c;
+	u_int16_t	extChars;	/* number of extension chars (excluding dot) */
+	u_int16_t	maxExtChars;
+	Boolean	foundExtension;
+
+	extStr[0] = '\0';	/* assume there's no extension */
+
+	if ( length < 3 )
+		return;		/* "x.y" is smallest possible extension */
+	
+	if ( length < (kMaxFileExtensionChars + 2) )
+		maxExtChars = length - 2;	/* save room for prefix + dot */
+	else
+		maxExtChars = kMaxFileExtensionChars;
+
+	i = length;
+	extChars = 0;
+	foundExtension = false;
+
+	while ( extChars <= maxExtChars ) {
+		c = unicodeStr[--i];
+
+		/* look for leading dot */
+		if ( c == (UniChar) '.' ) {
+			if ( extChars > 0 )	/* cannot end with a dot */
+				foundExtension = true;
+			break;
+		}
+
+		if ( EXTENSIONCHAR(c) )
+			++extChars;
+		else
+			break;
+	}
+	
+	/* if we found one then copy it */
+	if ( foundExtension ) {
+		u_int8_t *extStrPtr = (u_int8_t *)extStr;
+		const UniChar *unicodeStrPtr = &unicodeStr[i];
+		
+		for ( i = 0; i <= extChars; ++i )
+			*(extStrPtr++) = (u_int8_t) *(unicodeStrPtr++);
+		extStr[extChars + 1] = '\0';	/* terminate extension + dot */
+	}
+}
+
+
+
+/*
+ * Count filename extension characters (if any)
+ */
+u_int32_t
+CountFilenameExtensionChars( const unsigned char * filename, u_int32_t length )
+{
+	u_int32_t	i;
+	UniChar	c;
+	u_int32_t	extChars;	/* number of extension chars (excluding dot) */
+	u_int16_t	maxExtChars;
+	Boolean	foundExtension;
+
+	if ( length < 3 )
+		return 0;	/* "x.y" is smallest possible extension	*/
+	
+	if ( length < (kMaxFileExtensionChars + 2) )
+		maxExtChars = length - 2;	/* save room for prefix + dot */
+	else
+		maxExtChars = kMaxFileExtensionChars;
+
+	extChars = 0;		/* assume there's no extension */
+	i = length - 1;		/* index to last ascii character */
+	foundExtension = false;
+
+	while ( extChars <= maxExtChars ) {
+		c = filename[i--];
+
+		/* look for leading dot */
+		if ( c == (u_int8_t) '.' )	{
+			if ( extChars > 0 )	/* cannot end with a dot */
+				return (extChars);
+
+			break;
+		}
+
+		if ( EXTENSIONCHAR(c) )
+			++extChars;
+		else
+			break;
+	}
+	
+	return 0;
+}
+
+
+/*
+ * extract the file id from a mangled name
+ */
+HFSCatalogNodeID
+GetEmbeddedFileID(const unsigned char * filename, u_int32_t length, u_int32_t *prefixLength)
+{
+	short	extChars;
+	short	i;
+	u_int8_t	c;
+
+	*prefixLength = 0;
+
+	if ( filename == NULL )
+		return 0;
+
+	if ( length < 28 )
+		return 0;	/* too small to have been mangled */
+
+	/* big enough for a file ID (#10) and an extension (.x) ? */
+	if ( length > 5 )
+		extChars = CountFilenameExtensionChars(filename, length);
+	else
+		extChars = 0;
+
+	/* skip over dot plus extension characters */
+	if ( extChars > 0 )
+		length -= (extChars + 1);	
+
+	/* scan for file id digits */
+	for ( i = length - 1; i >= 0; --i) {
+		c = filename[i];
+
+		/* look for file ID marker */
+		if ( c == '#' ) {
+			if ( (length - i) < 3 )
+				break;	/* too small to be a file ID */
+
+			*prefixLength = i;
+			return HexStringToInteger(length - i - 1, &filename[i+1]);
+		}
+
+		if ( !IsHexDigit(c) )
+			break;	/* file ID string must have hex digits */
+	}
+
+	return 0;
+}
+
+
+
+static u_int32_t
+HexStringToInteger(u_int32_t length, const u_int8_t *hexStr)
+{
+	u_int32_t		value;
+	u_int32_t		i;
+	u_int8_t		c;
+	const u_int8_t	*p;
+
+	value = 0;
+	p = hexStr;
+
+	for ( i = 0; i < length; ++i ) {
+		c = *p++;
+
+		if (c >= '0' && c <= '9') {
+			value = value << 4;
+			value += (u_int32_t) c - (u_int32_t) '0';
+		} else if (c >= 'A' && c <= 'F') {
+			value = value << 4;
+			value += 10 + ((unsigned int) c - (unsigned int) 'A');
+		} else {
+			return 0;	/* bad character */
+		}
+	}
+
+	return value;
+}
+
+
+/*
+ * Routine:	FastRelString
+ *
+ * Output:	returns -1 if str1 < str2
+ *		returns  1 if str1 > str2
+ *		return	 0 if equal
+ *
+ */
+int32_t	FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
+{
+	u_int16_t*		compareTable;
+	int32_t	 		bestGuess;
+	u_int8_t 	 	length, length2;
+	u_int8_t 	 	delta;
+
+	delta = 0;
+	length = *(str1++);
+	length2 = *(str2++);
+
+	if (length == length2)
+		bestGuess = 0;
+	else if (length < length2)
+	{
+		bestGuess = -1;
+		delta = length2 - length;
+	}
+	else
+	{
+		bestGuess = 1;
+		length = length2;
+	}
+
+	compareTable = (u_int16_t*) gCompareTable;
+
+	while (length--)
+	{
+		u_int8_t	aChar, bChar;
+
+		aChar = *(str1++);
+		bChar = *(str2++);
+		
+		if (aChar != bChar)		//	If they don't match exacly, do case conversion
+		{	
+			u_int16_t	aSortWord, bSortWord;
+
+			aSortWord = compareTable[aChar];
+			bSortWord = compareTable[bChar];
+
+			if (aSortWord > bSortWord)
+				return 1;
+
+			if (aSortWord < bSortWord)
+				return -1;
+		}
+		
+		//	If characters match exactly, then go on to next character immediately without
+		//	doing any extra work.
+	}
+	
+	//	if you got to here, then return bestGuess
+	return bestGuess;
+}	
+
+
+
+//
+//	FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
+//
+//	    IF				RESULT
+//	--------------------------
+//	str1 < str2		=>	-1
+//	str1 = str2		=>	 0
+//	str1 > str2		=>	+1
+//
+//	The lower case table starts with 256 entries (one for each of the upper bytes
+//	of the original Unicode char).  If that entry is zero, then all characters with
+//	that upper byte are already case folded.  If the entry is non-zero, then it is
+//	the _index_ (not byte offset) of the start of the sub-table for the characters
+//	with that upper byte.  All ignorable characters are folded to the value zero.
+//
+//	In pseudocode:
+//
+//		Let c = source Unicode character
+//		Let table[] = lower case table
+//
+//		lower = table[highbyte(c)]
+//		if (lower == 0)
+//			lower = c
+//		else
+//			lower = table[lower+lowbyte(c)]
+//
+//		if (lower == 0)
+//			ignore this character
+//
+//	To handle ignorable characters, we now need a loop to find the next valid character.
+//	Also, we can't pre-compute the number of characters to compare; the string length might
+//	be larger than the number of non-ignorable characters.  Further, we must be able to handle
+//	ignorable characters at any point in the string, including as the first or last characters.
+//	We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
+//	Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
+//	the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
+//	an invalid Unicode character).
+//
+//	Pseudocode:
+//
+//		while (1) {
+//			c1 = GetNextValidChar(str1)			//	returns zero if at end of string
+//			c2 = GetNextValidChar(str2)
+//
+//			if (c1 != c2) break					//	found a difference
+//
+//			if (c1 == 0)						//	reached end of string on both strings at once?
+//				return 0;						//	yes, so strings are equal
+//		}
+//
+//		// When we get here, c1 != c2.  So, we just need to determine which one is less.
+//		if (c1 < c2)
+//			return -1;
+//		else
+//			return 1;
+//
+
+int32_t FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
+							register ConstUniCharArrayPtr str2, register ItemCount length2)
+{
+	register u_int16_t		c1,c2;
+	register u_int16_t		temp;
+	register u_int16_t*	lowerCaseTable;
+
+	lowerCaseTable = (u_int16_t*) gLowerCaseTable;
+
+	while (1) {
+		/* Set default values for c1, c2 in case there are no more valid chars */
+		c1 = 0;
+		c2 = 0;
+		
+		/* Find next non-ignorable char from str1, or zero if no more */
+		while (length1 && c1 == 0) {
+			c1 = *(str1++);
+			--length1;
+			/* check for basic latin first */
+			if (c1 < 0x0100) {
+				c1 = gLatinCaseFold[c1];
+				break;
+			}
+			/* case fold if neccessary */
+			if ((temp = lowerCaseTable[c1>>8]) != 0)
+				c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
+		}
+		
+		
+		/* Find next non-ignorable char from str2, or zero if no more */
+		while (length2 && c2 == 0) {
+			c2 = *(str2++);
+			--length2;
+			/* check for basic latin first */
+			if (c2 < 0x0100) {
+				c2 = gLatinCaseFold[c2];
+				break;
+			}
+			/* case fold if neccessary */
+			if ((temp = lowerCaseTable[c2>>8]) != 0)
+				c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
+		}
+		
+		if (c1 != c2)		//	found a difference, so stop looping
+			break;
+		
+		if (c1 == 0)		//	did we reach the end of both strings at the same time?
+			return 0;		//	yes, so strings are equal
+	}
+	
+	if (c1 < c2)
+		return -1;
+	else
+		return 1;
+}
+
+/*
+ * UnicodeBinaryCompare
+ * Compare two UTF-16 strings and perform case-sensitive (binary) matching against them.
+ * 
+ * Results are emitted like FastUnicodeCompare:
+ * 
+ * 
+ *	    IF				RESULT
+ *	--------------------------
+ *	str1 < str2		=>	-1
+ *	str1 = str2		=>	 0
+ *	str1 > str2		=>	+1
+ *
+ * The case matching source code is greatly simplified due to the lack of case-folding
+ * in this comparison routine. We compare, in order: the lengths, then do character-by-
+ * character comparisons.
+ * 
+ */
+int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1, register ItemCount len1,
+							register ConstUniCharArrayPtr str2, register ItemCount len2) {
+	uint16_t c1;
+	uint16_t c2;
+	int string_length;
+	int32_t result = 0;
+	
+	/* Set default values for the two character pointers */
+	c1 = 0;
+	c2 = 0;
+
+	/* First generate the string length (for comparison purposes) */
+	if (len1 < len2) {
+		string_length = len1;
+		--result;
+	}
+	else if (len1 > len2) {
+		string_length = len2;
+		++result;
+	}
+	else {
+		string_length = len1;
+	}
+
+	/* now compare the two string pointers */
+	while (string_length--) {
+		c1 = *(str1++);
+		c2 = *(str2++);
+
+		if (c1 > c2) {
+			result = 1;
+			break;
+		}
+		
+		if (c1 < c2) {
+			result = -1;
+			break;
+		}
+		/* If equal, iterate to the next two respective chars */		
+	}
+
+	return result;
+}
+
+
+OSErr
+ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
+					 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
+{
+	ByteCount subMaxLen;
+	size_t utf8len;
+	char fileIDStr[15];
+	char extStr[15];
+
+	snprintf(fileIDStr, sizeof(fileIDStr), "#%X", cnid);
+	GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
+
+	/* remove extension chars from source */
+	srcLen -= strlen(extStr) * sizeof(UniChar);
+	subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
+
+	(void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
+
+	strlcat((char *)dstStr, fileIDStr, maxDstLen);
+	strlcat((char *)dstStr, extStr, maxDstLen);
+	*actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
+
+	return noErr;
+}