[apple/xnu.git] / bsd / hfs / hfscommon / Unicode / UnicodeWrappers.c

/*
 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * The contents of this file constitute Original Code as defined in and
 * are subject to the Apple Public Source License Version 1.1 (the
 * "License").  You may not use this file except in compliance with the
 * License.  Please obtain a copy of the License at
 * http://www.apple.com/publicsource and read it before using this file.
 * 
 * This Original Code and all software distributed under the License are
 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 * License for the specific language governing rights and limitations
 * under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 */
/*
	File:		UnicodeWrappers.c

	Contains:	Wrapper routines for Unicode conversion and comparison.

*/
#include <sys/param.h>
#include <sys/utfconv.h>

#include "../../hfs_macos_defs.h"
#include "UCStringCompareData.h"

#include "../headers/FileMgrInternal.h"
#include "../headers/HFSUnicodeWrappers.h"

enum {
	kMinFileExtensionChars = 1,	/* does not include dot */
	kMaxFileExtensionChars = 5	/* does not include dot */
};


#define EXTENSIONCHAR(c)	(((c) >= 0x61 && (c) <= 0x7A) || \
				 ((c) >= 0x41 && (c) <= 0x5A) || \
				 ((c) >= 0x30 && (c) <= 0x39))


#define IsHexDigit(c)		(((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
				 ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))


static void	GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );

static void	GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr );

static UInt32	HexStringToInteger( UInt32 length, const UInt8 *hexStr );


/*
 * Convert file ID into a hexidecimal string with no leading zeros
 */
static void
GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr )
{
	SInt32	i, b;
	UInt8	*translate = (UInt8 *) "0123456789ABCDEF";
	UInt8	c;
	
	fileIDStr[0] = '#';

	for ( i = 0, b = 28; b >= 0; b -= 4 ) {
		c = *(translate + ((fileID >> b) & 0x0000000F));
		
		/* if its not a leading zero add it to our string */
		if ( (c != (UInt8) '0') || (i > 1) || (b == 0) )
			fileIDStr[++i] = c;
	}

	fileIDStr[++i] = '\0';
}


/*
 * Get filename extension (if any) as a C string
 */
static void
GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
{
	UInt32	i;
	UniChar	c;
	UInt16	extChars;	/* number of extension chars (excluding dot) */
	UInt16	maxExtChars;
	Boolean	foundExtension;

	extStr[0] = '\0';	/* assume there's no extension */

	if ( length < 3 )
		return;		/* "x.y" is smallest possible extension */
	
	if ( length < (kMaxFileExtensionChars + 2) )
		maxExtChars = length - 2;	/* save room for prefix + dot */
	else
		maxExtChars = kMaxFileExtensionChars;

	i = length;
	extChars = 0;
	foundExtension = false;

	while ( extChars <= maxExtChars ) {
		c = unicodeStr[--i];

		/* look for leading dot */
		if ( c == (UniChar) '.' ) {
			if ( extChars > 0 )	/* cannot end with a dot */
				foundExtension = true;
			break;
		}

		if ( EXTENSIONCHAR(c) )
			++extChars;
		else
			break;
	}
	
	/* if we found one then copy it */
	if ( foundExtension ) {
		UInt8 *extStrPtr = extStr;
		const UniChar *unicodeStrPtr = &unicodeStr[i];
		
		for ( i = 0; i <= extChars; ++i )
			*(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
		extStr[extChars + 1] = '\0';	/* terminate extension + dot */
	}
}


/*
 * Count filename extension characters (if any)
 */
static UInt32
CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
{
	UInt32	i;
	UniChar	c;
	UInt32	extChars;	/* number of extension chars (excluding dot) */
	UInt16	maxExtChars;
	Boolean	foundExtension;

	if ( length < 3 )
		return 0;	/* "x.y" is smallest possible extension	*/
	
	if ( length < (kMaxFileExtensionChars + 2) )
		maxExtChars = length - 2;	/* save room for prefix + dot */
	else
		maxExtChars = kMaxFileExtensionChars;

	extChars = 0;		/* assume there's no extension */
	i = length - 1;		/* index to last ascii character */
	foundExtension = false;

	while ( extChars <= maxExtChars ) {
		c = filename[i--];

		/* look for leading dot */
		if ( c == (UInt8) '.' )	{
			if ( extChars > 0 )	/* cannot end with a dot */
				return (extChars);

			break;
		}

		if ( EXTENSIONCHAR(c) )
			++extChars;
		else
			break;
	}
	
	return 0;
}


/*
 * extract the file id from a mangled name
 */
HFSCatalogNodeID
GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
{
	short	extChars;
	short	i;
	UInt8	c;

	*prefixLength = 0;

	if ( filename == NULL )
		return 0;

	if ( length < 28 )
		return 0;	/* too small to have been mangled */

	/* big enough for a file ID (#10) and an extension (.x) ? */
	if ( length > 5 )
		extChars = CountFilenameExtensionChars(filename, length);
	else
		extChars = 0;

	/* skip over dot plus extension characters */
	if ( extChars > 0 )
		length -= (extChars + 1);	

	/* scan for file id digits */
	for ( i = length - 1; i >= 0; --i) {
		c = filename[i];

		/* look for file ID marker */
		if ( c == '#' ) {
			if ( (length - i) < 3 )
				break;	/* too small to be a file ID */

			*prefixLength = i;
			return HexStringToInteger(length - i - 1, &filename[i+1]);
		}

		if ( !IsHexDigit(c) )
			break;	/* file ID string must have hex digits */
	}

	return 0;
}


static UInt32
HexStringToInteger(UInt32 length, const UInt8 *hexStr)
{
	UInt32		value;
	short		i;
	UInt8		c;
	const UInt8	*p;

	value = 0;
	p = hexStr;

	for ( i = 0; i < length; ++i ) {
		c = *p++;

		if (c >= '0' && c <= '9') {
			value = value << 4;
			value += (UInt32) c - (UInt32) '0';
		} else if (c >= 'A' && c <= 'F') {
			value = value << 4;
			value += 10 + ((unsigned int) c - (unsigned int) 'A');
		} else {
			return 0;	/* bad character */
		}
	}

	return value;
}


/*
 * Routine:	FastRelString
 *
 * Output:	returns -1 if str1 < str2
 *		returns  1 if str1 > str2
 *		return	 0 if equal
 *
 */
SInt32	FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
{
	UInt16*			compareTable;
	SInt32	 		bestGuess;
	UInt8 	 		length, length2;
	UInt8 	 		delta;

	delta = 0;
	length = *(str1++);
	length2 = *(str2++);

	if (length == length2)
		bestGuess = 0;
	else if (length < length2)
	{
		bestGuess = -1;
		delta = length2 - length;
	}
	else
	{
		bestGuess = 1;
		length = length2;
	}

	compareTable = (UInt16*) gCompareTable;

	while (length--)
	{
		UInt8	aChar, bChar;

		aChar = *(str1++);
		bChar = *(str2++);
		
		if (aChar != bChar)		//	If they don't match exacly, do case conversion
		{	
			UInt16	aSortWord, bSortWord;

			aSortWord = compareTable[aChar];
			bSortWord = compareTable[bChar];

			if (aSortWord > bSortWord)
				return 1;

			if (aSortWord < bSortWord)
				return -1;
		}
		
		//	If characters match exactly, then go on to next character immediately without
		//	doing any extra work.
	}
	
	//	if you got to here, then return bestGuess
	return bestGuess;
}	


//
//	FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
//
//	    IF				RESULT
//	--------------------------
//	str1 < str2		=>	-1
//	str1 = str2		=>	 0
//	str1 > str2		=>	+1
//
//	The lower case table starts with 256 entries (one for each of the upper bytes
//	of the original Unicode char).  If that entry is zero, then all characters with
//	that upper byte are already case folded.  If the entry is non-zero, then it is
//	the _index_ (not byte offset) of the start of the sub-table for the characters
//	with that upper byte.  All ignorable characters are folded to the value zero.
//
//	In pseudocode:
//
//		Let c = source Unicode character
//		Let table[] = lower case table
//
//		lower = table[highbyte(c)]
//		if (lower == 0)
//			lower = c
//		else
//			lower = table[lower+lowbyte(c)]
//
//		if (lower == 0)
//			ignore this character
//
//	To handle ignorable characters, we now need a loop to find the next valid character.
//	Also, we can't pre-compute the number of characters to compare; the string length might
//	be larger than the number of non-ignorable characters.  Further, we must be able to handle
//	ignorable characters at any point in the string, including as the first or last characters.
//	We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
//	Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
//	the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
//	an invalid Unicode character).
//
//	Pseudocode:
//
//		while (1) {
//			c1 = GetNextValidChar(str1)			//	returns zero if at end of string
//			c2 = GetNextValidChar(str2)
//
//			if (c1 != c2) break					//	found a difference
//
//			if (c1 == 0)						//	reached end of string on both strings at once?
//				return 0;						//	yes, so strings are equal
//		}
//
//		// When we get here, c1 != c2.  So, we just need to determine which one is less.
//		if (c1 < c2)
//			return -1;
//		else
//			return 1;
//

SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
							register ConstUniCharArrayPtr str2, register ItemCount length2)
{
	register UInt16		c1,c2;
	register UInt16		temp;
	register UInt16*	lowerCaseTable;

	lowerCaseTable = (UInt16*) gLowerCaseTable;

	while (1) {
		/* Set default values for c1, c2 in case there are no more valid chars */
		c1 = 0;
		c2 = 0;
		
		/* Find next non-ignorable char from str1, or zero if no more */
		while (length1 && c1 == 0) {
			c1 = *(str1++);
			--length1;
			/* check for basic latin first */
			if (c1 < 0x0100) {
				c1 = gLatinCaseFold[c1];
				break;
			}
			/* case fold if neccessary */
			if ((temp = lowerCaseTable[c1>>8]) != 0)
				c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
		}
		
		
		/* Find next non-ignorable char from str2, or zero if no more */
		while (length2 && c2 == 0) {
			c2 = *(str2++);
			--length2;
			/* check for basic latin first */
			if (c2 < 0x0100) {
				c2 = gLatinCaseFold[c2];
				break;
			}
			/* case fold if neccessary */
			if ((temp = lowerCaseTable[c2>>8]) != 0)
				c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
		}
		
		if (c1 != c2)		//	found a difference, so stop looping
			break;
		
		if (c1 == 0)		//	did we reach the end of both strings at the same time?
			return 0;		//	yes, so strings are equal
	}
	
	if (c1 < c2)
		return -1;
	else
		return 1;
}


OSErr
ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
					 ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
{
	ByteCount subMaxLen;
	size_t utf8len;
	char fileIDStr[15];
	char extStr[15];

	GetFileIDString(cnid, fileIDStr);
	GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);

	/* remove extension chars from source */
	srcLen -= strlen(extStr) * sizeof(UniChar);
	subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));

	(void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);

	strcat(dstStr, fileIDStr);
	strcat(dstStr, extStr);
	*actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));

	return noErr;
}
Commit	Line	Data
1c79356b A	1	/*
	2	* Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
	6	* The contents of this file constitute Original Code as defined in and
	7	* are subject to the Apple Public Source License Version 1.1 (the
	8	* "License"). You may not use this file except in compliance with the
	9	* License. Please obtain a copy of the License at
	10	* http://www.apple.com/publicsource and read it before using this file.
	11	*
	12	* This Original Code and all software distributed under the License are
	13	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	14	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	15	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	16	* FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
	17	* License for the specific language governing rights and limitations
	18	* under the License.
	19	*
	20	* @APPLE_LICENSE_HEADER_END@
	21	*/
	22	/*
	23	File: UnicodeWrappers.c
	24
	25	Contains: Wrapper routines for Unicode conversion and comparison.
	26
	27	*/
	28	#include <sys/param.h>
	29	#include <sys/utfconv.h>
	30
	31	#include "../../hfs_macos_defs.h"
	32	#include "UCStringCompareData.h"
	33
	34	#include "../headers/FileMgrInternal.h"
	35	#include "../headers/HFSUnicodeWrappers.h"
	36
	37	enum {
	38	kMinFileExtensionChars = 1, /* does not include dot */
	39	kMaxFileExtensionChars = 5 /* does not include dot */
	40	};
	41
	42
	43	#define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) \|\| \
	44	((c) >= 0x41 && (c) <= 0x5A) \|\| \
	45	((c) >= 0x30 && (c) <= 0x39))
	46
	47
	48	#define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') \|\| \
	49	((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
	50
	51
	52	static void GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
	53
	54	static void GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr );
	55
	56	static UInt32 HexStringToInteger( UInt32 length, const UInt8 *hexStr );
	57
	58
	59
	60	/*
	61	* Convert file ID into a hexidecimal string with no leading zeros
	62	*/
	63	static void
	64	GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr )
65	{
66	SInt32 i, b;
67	UInt8 translate = (UInt8 ) "0123456789ABCDEF";
68	UInt8 c;
69
70	fileIDStr[0] = '#';
71
72	for ( i = 0, b = 28; b >= 0; b -= 4 ) {
73	c = *(translate + ((fileID >> b) & 0x0000000F));
74
75	/* if its not a leading zero add it to our string */
76	if ( (c != (UInt8) '0') \|\| (i > 1) \|\| (b == 0) )
77	fileIDStr[++i] = c;
78	}
79
80	fileIDStr[++i] = '\0';
81	}
82
83
84	/*
85	* Get filename extension (if any) as a C string
86	*/
87	static void
88	GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
89	{
90	UInt32 i;
91	UniChar c;
92	UInt16 extChars; /* number of extension chars (excluding dot) */
93	UInt16 maxExtChars;
94	Boolean foundExtension;
95
96	extStr[0] = '\0'; /* assume there's no extension */
97
98	if ( length < 3 )
99	return; /* "x.y" is smallest possible extension */
100
101	if ( length < (kMaxFileExtensionChars + 2) )
102	maxExtChars = length - 2; /* save room for prefix + dot */
103	else
104	maxExtChars = kMaxFileExtensionChars;
105
106	i = length;
107	extChars = 0;
108	foundExtension = false;
109
110	while ( extChars <= maxExtChars ) {
111	c = unicodeStr[--i];
112
113	/* look for leading dot */
114	if ( c == (UniChar) '.' ) {
115	if ( extChars > 0 ) /* cannot end with a dot */
116	foundExtension = true;
117	break;
118	}
119
120	if ( EXTENSIONCHAR(c) )
121	++extChars;
122	else
123	break;
124	}
125
126	/* if we found one then copy it */
127	if ( foundExtension ) {
128	UInt8 *extStrPtr = extStr;
129	const UniChar *unicodeStrPtr = &unicodeStr[i];
130
131	for ( i = 0; i <= extChars; ++i )
132	(extStrPtr++) = (UInt8) (unicodeStrPtr++);
133	extStr[extChars + 1] = '\0'; /* terminate extension + dot */
134	}
135	}
136
137
138
139	/*
140	* Count filename extension characters (if any)
141	*/
142	static UInt32
143	CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
144	{
145	UInt32 i;
146	UniChar c;
147	UInt32 extChars; /* number of extension chars (excluding dot) */
148	UInt16 maxExtChars;
149	Boolean foundExtension;
150
1c79356b A	151	if ( length < 3 )
	152	return 0; /* "x.y" is smallest possible extension */
	153
	154	if ( length < (kMaxFileExtensionChars + 2) )
	155	maxExtChars = length - 2; /* save room for prefix + dot */
	156	else
	157	maxExtChars = kMaxFileExtensionChars;
	158
	159	extChars = 0; /* assume there's no extension */
	160	i = length - 1; /* index to last ascii character */
	161	foundExtension = false;
	162
	163	while ( extChars <= maxExtChars ) {
	164	c = filename[i--];
	165
	166	/* look for leading dot */
	167	if ( c == (UInt8) '.' ) {
	168	if ( extChars > 0 ) /* cannot end with a dot */
	169	return (extChars);
	170
	171	break;
	172	}
	173
	174	if ( EXTENSIONCHAR(c) )
	175	++extChars;
	176	else
	177	break;
	178	}
	179
	180	return 0;
	181	}
	182
	183
	184	/*
	185	* extract the file id from a mangled name
	186	*/
	187	HFSCatalogNodeID
	188	GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
	189	{
	190	short extChars;
	191	short i;
	192	UInt8 c;
	193
	194	*prefixLength = 0;
	195
	196	if ( filename == NULL )
	197	return 0;
	198
1c79356b A	199	if ( length < 28 )
	200	return 0; /* too small to have been mangled */
	201
	202	/* big enough for a file ID (#10) and an extension (.x) ? */
	203	if ( length > 5 )
	204	extChars = CountFilenameExtensionChars(filename, length);
	205	else
	206	extChars = 0;
	207
	208	/* skip over dot plus extension characters */
	209	if ( extChars > 0 )
	210	length -= (extChars + 1);
	211
	212	/* scan for file id digits */
	213	for ( i = length - 1; i >= 0; --i) {
	214	c = filename[i];
	215
	216	/* look for file ID marker */
	217	if ( c == '#' ) {
	218	if ( (length - i) < 3 )
	219	break; /* too small to be a file ID */
	220
	221	*prefixLength = i;
	222	return HexStringToInteger(length - i - 1, &filename[i+1]);
	223	}
	224
	225	if ( !IsHexDigit(c) )
	226	break; /* file ID string must have hex digits */
	227	}
	228
	229	return 0;
	230	}
	231
	232
	233
	234	static UInt32
	235	HexStringToInteger(UInt32 length, const UInt8 *hexStr)
	236	{
	237	UInt32 value;
	238	short i;
	239	UInt8 c;
	240	const UInt8 *p;
	241
	242	value = 0;
	243	p = hexStr;
	244
	245	for ( i = 0; i < length; ++i ) {
	246	c = *p++;
	247
	248	if (c >= '0' && c <= '9') {
	249	value = value << 4;
	250	value += (UInt32) c - (UInt32) '0';
	251	} else if (c >= 'A' && c <= 'F') {
	252	value = value << 4;
	253	value += 10 + ((unsigned int) c - (unsigned int) 'A');
	254	} else {
	255	return 0; /* bad character */
	256	}
	257	}
	258
	259	return value;
	260	}
	261
	262
263	/*
264	* Routine: FastRelString
265	*
266	* Output: returns -1 if str1 < str2
267	* returns 1 if str1 > str2
268	* return 0 if equal
269	*
270	*/
1c79356b A	271	SInt32 FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
	272	{
	273	UInt16* compareTable;
	274	SInt32 bestGuess;
	275	UInt8 length, length2;
	276	UInt8 delta;
	277
	278	delta = 0;
	279	length = *(str1++);
	280	length2 = *(str2++);
	281
	282	if (length == length2)
	283	bestGuess = 0;
	284	else if (length < length2)
	285	{
	286	bestGuess = -1;
	287	delta = length2 - length;
	288	}
	289	else
	290	{
	291	bestGuess = 1;
	292	length = length2;
	293	}
	294
	295	compareTable = (UInt16*) gCompareTable;
	296
	297	while (length--)
	298	{
	299	UInt8 aChar, bChar;
	300
	301	aChar = *(str1++);
	302	bChar = *(str2++);
	303
	304	if (aChar != bChar) // If they don't match exacly, do case conversion
	305	{
	306	UInt16 aSortWord, bSortWord;
	307
	308	aSortWord = compareTable[aChar];
	309	bSortWord = compareTable[bChar];
	310
	311	if (aSortWord > bSortWord)
	312	return 1;
	313
	314	if (aSortWord < bSortWord)
	315	return -1;
	316	}
	317
	318	// If characters match exactly, then go on to next character immediately without
	319	// doing any extra work.
	320	}
	321
	322	// if you got to here, then return bestGuess
	323	return bestGuess;
	324	}
	325
	326
	327
	328	//
	329	// FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
	330	//
	331	// IF RESULT
	332	// --------------------------
	333	// str1 < str2 => -1
	334	// str1 = str2 => 0
335	// str1 > str2 => +1
336	//
337	// The lower case table starts with 256 entries (one for each of the upper bytes
338	// of the original Unicode char). If that entry is zero, then all characters with
339	// that upper byte are already case folded. If the entry is non-zero, then it is
340	// the _index_ (not byte offset) of the start of the sub-table for the characters
341	// with that upper byte. All ignorable characters are folded to the value zero.
342	//
343	// In pseudocode:
344	//
345	// Let c = source Unicode character
346	// Let table[] = lower case table
347	//
348	// lower = table[highbyte(c)]
349	// if (lower == 0)
350	// lower = c
351	// else
352	// lower = table[lower+lowbyte(c)]
353	//
354	// if (lower == 0)
355	// ignore this character
356	//
357	// To handle ignorable characters, we now need a loop to find the next valid character.
358	// Also, we can't pre-compute the number of characters to compare; the string length might
359	// be larger than the number of non-ignorable characters. Further, we must be able to handle
360	// ignorable characters at any point in the string, including as the first or last characters.
361	// We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
362	// Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
363	// the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
364	// an invalid Unicode character).
365	//
366	// Pseudocode:
367	//
368	// while (1) {
369	// c1 = GetNextValidChar(str1) // returns zero if at end of string
370	// c2 = GetNextValidChar(str2)
371	//
372	// if (c1 != c2) break // found a difference
373	//
374	// if (c1 == 0) // reached end of string on both strings at once?
375	// return 0; // yes, so strings are equal
376	// }
377	//
378	// // When we get here, c1 != c2. So, we just need to determine which one is less.
379	// if (c1 < c2)
380	// return -1;
381	// else
382	// return 1;
383	//
384
1c79356b A	385	SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
	386	register ConstUniCharArrayPtr str2, register ItemCount length2)
	387	{
	388	register UInt16 c1,c2;
	389	register UInt16 temp;
	390	register UInt16* lowerCaseTable;
	391
	392	lowerCaseTable = (UInt16*) gLowerCaseTable;
	393
	394	while (1) {
	395	/* Set default values for c1, c2 in case there are no more valid chars */
	396	c1 = 0;
	397	c2 = 0;
	398
	399	/* Find next non-ignorable char from str1, or zero if no more */
	400	while (length1 && c1 == 0) {
	401	c1 = *(str1++);
	402	--length1;
	403	/* check for basic latin first */
	404	if (c1 < 0x0100) {
	405	c1 = gLatinCaseFold[c1];
	406	break;
	407	}
	408	/* case fold if neccessary */
	409	if ((temp = lowerCaseTable[c1>>8]) != 0)
	410	c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
	411	}
	412
	413
	414	/* Find next non-ignorable char from str2, or zero if no more */
	415	while (length2 && c2 == 0) {
	416	c2 = *(str2++);
	417	--length2;
	418	/* check for basic latin first */
	419	if (c2 < 0x0100) {
	420	c2 = gLatinCaseFold[c2];
	421	break;
	422	}
	423	/* case fold if neccessary */
	424	if ((temp = lowerCaseTable[c2>>8]) != 0)
	425	c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
	426	}
	427
	428	if (c1 != c2) // found a difference, so stop looping
	429	break;
	430
	431	if (c1 == 0) // did we reach the end of both strings at the same time?
	432	return 0; // yes, so strings are equal
	433	}
	434
	435	if (c1 < c2)
	436	return -1;
	437	else
	438	return 1;
	439	}
	440
	441
	442	OSErr
	443	ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
	444	ByteCount actualDstLen, unsigned char dstStr, HFSCatalogNodeID cnid)
	445	{
	446	ByteCount subMaxLen;
	447	size_t utf8len;
	448	char fileIDStr[15];
449	char extStr[15];
450
451	GetFileIDString(cnid, fileIDStr);
452	GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
453
454	/* remove extension chars from source */
455	srcLen -= strlen(extStr) * sizeof(UniChar);
456	subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
457
458	(void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
459
460	strcat(dstStr, fileIDStr);
461	strcat(dstStr, extStr);
462	*actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
463
464	return noErr;
465	}
466