2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 File: UnicodeWrappers.c
25 Contains: Wrapper routines for Unicode conversion and comparison.
28 #include <sys/param.h>
29 #include <sys/utfconv.h>
31 #include "../../hfs_macos_defs.h"
32 #include "UCStringCompareData.h"
34 #include "../headers/FileMgrInternal.h"
35 #include "../headers/HFSUnicodeWrappers.h"
38 kMinFileExtensionChars
= 1, /* does not include dot */
39 kMaxFileExtensionChars
= 5 /* does not include dot */
43 #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
44 ((c) >= 0x41 && (c) <= 0x5A) || \
45 ((c) >= 0x30 && (c) <= 0x39))
48 #define IsHexDigit(c) (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
49 ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
52 static void GetFilenameExtension( ItemCount length
, ConstUniCharArrayPtr unicodeStr
, char* extStr
);
54 static void GetFileIDString( HFSCatalogNodeID fileID
, char* fileIDStr
);
56 static UInt32
HexStringToInteger( UInt32 length
, const UInt8
*hexStr
);
61 * Convert file ID into a hexidecimal string with no leading zeros
64 GetFileIDString( HFSCatalogNodeID fileID
, char * fileIDStr
)
67 UInt8
*translate
= (UInt8
*) "0123456789ABCDEF";
72 for ( i
= 0, b
= 28; b
>= 0; b
-= 4 ) {
73 c
= *(translate
+ ((fileID
>> b
) & 0x0000000F));
75 /* if its not a leading zero add it to our string */
76 if ( (c
!= (UInt8
) '0') || (i
> 1) || (b
== 0) )
80 fileIDStr
[++i
] = '\0';
85 * Get filename extension (if any) as a C string
88 GetFilenameExtension(ItemCount length
, ConstUniCharArrayPtr unicodeStr
, char * extStr
)
92 UInt16 extChars
; /* number of extension chars (excluding dot) */
94 Boolean foundExtension
;
96 extStr
[0] = '\0'; /* assume there's no extension */
99 return; /* "x.y" is smallest possible extension */
101 if ( length
< (kMaxFileExtensionChars
+ 2) )
102 maxExtChars
= length
- 2; /* save room for prefix + dot */
104 maxExtChars
= kMaxFileExtensionChars
;
108 foundExtension
= false;
110 while ( extChars
<= maxExtChars
) {
113 /* look for leading dot */
114 if ( c
== (UniChar
) '.' ) {
115 if ( extChars
> 0 ) /* cannot end with a dot */
116 foundExtension
= true;
120 if ( EXTENSIONCHAR(c
) )
126 /* if we found one then copy it */
127 if ( foundExtension
) {
128 UInt8
*extStrPtr
= extStr
;
129 const UniChar
*unicodeStrPtr
= &unicodeStr
[i
];
131 for ( i
= 0; i
<= extChars
; ++i
)
132 *(extStrPtr
++) = (UInt8
) *(unicodeStrPtr
++);
133 extStr
[extChars
+ 1] = '\0'; /* terminate extension + dot */
140 * Count filename extension characters (if any)
143 CountFilenameExtensionChars( const unsigned char * filename
, UInt32 length
)
147 UInt32 extChars
; /* number of extension chars (excluding dot) */
149 Boolean foundExtension
;
151 if (length
== kUndefinedStrLen
)
152 length
= strlen(filename
);
155 return 0; /* "x.y" is smallest possible extension */
157 if ( length
< (kMaxFileExtensionChars
+ 2) )
158 maxExtChars
= length
- 2; /* save room for prefix + dot */
160 maxExtChars
= kMaxFileExtensionChars
;
162 extChars
= 0; /* assume there's no extension */
163 i
= length
- 1; /* index to last ascii character */
164 foundExtension
= false;
166 while ( extChars
<= maxExtChars
) {
169 /* look for leading dot */
170 if ( c
== (UInt8
) '.' ) {
171 if ( extChars
> 0 ) /* cannot end with a dot */
177 if ( EXTENSIONCHAR(c
) )
188 * extract the file id from a mangled name
191 GetEmbeddedFileID(const unsigned char * filename
, UInt32 length
, UInt32
*prefixLength
)
199 if ( filename
== NULL
)
202 if (length
== kUndefinedStrLen
)
203 length
= strlen(filename
);
206 return 0; /* too small to have been mangled */
208 /* big enough for a file ID (#10) and an extension (.x) ? */
210 extChars
= CountFilenameExtensionChars(filename
, length
);
214 /* skip over dot plus extension characters */
216 length
-= (extChars
+ 1);
218 /* scan for file id digits */
219 for ( i
= length
- 1; i
>= 0; --i
) {
222 /* look for file ID marker */
224 if ( (length
- i
) < 3 )
225 break; /* too small to be a file ID */
228 return HexStringToInteger(length
- i
- 1, &filename
[i
+1]);
231 if ( !IsHexDigit(c
) )
232 break; /* file ID string must have hex digits */
241 HexStringToInteger(UInt32 length
, const UInt8
*hexStr
)
251 for ( i
= 0; i
< length
; ++i
) {
254 if (c
>= '0' && c
<= '9') {
256 value
+= (UInt32
) c
- (UInt32
) '0';
257 } else if (c
>= 'A' && c
<= 'F') {
259 value
+= 10 + ((unsigned int) c
- (unsigned int) 'A');
261 return 0; /* bad character */
270 * Routine: FastRelString
272 * Output: returns -1 if str1 < str2
273 * returns 1 if str1 > str2
277 extern unsigned short gCompareTable
[];
279 SInt32
FastRelString( ConstStr255Param str1
, ConstStr255Param str2
)
281 UInt16
* compareTable
;
283 UInt8 length
, length2
;
290 if (length
== length2
)
292 else if (length
< length2
)
295 delta
= length2
- length
;
303 compareTable
= (UInt16
*) gCompareTable
;
312 if (aChar
!= bChar
) // If they don't match exacly, do case conversion
314 UInt16 aSortWord
, bSortWord
;
316 aSortWord
= compareTable
[aChar
];
317 bSortWord
= compareTable
[bChar
];
319 if (aSortWord
> bSortWord
)
322 if (aSortWord
< bSortWord
)
326 // If characters match exactly, then go on to next character immediately without
327 // doing any extra work.
330 // if you got to here, then return bestGuess
337 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
340 // --------------------------
345 // The lower case table starts with 256 entries (one for each of the upper bytes
346 // of the original Unicode char). If that entry is zero, then all characters with
347 // that upper byte are already case folded. If the entry is non-zero, then it is
348 // the _index_ (not byte offset) of the start of the sub-table for the characters
349 // with that upper byte. All ignorable characters are folded to the value zero.
353 // Let c = source Unicode character
354 // Let table[] = lower case table
356 // lower = table[highbyte(c)]
360 // lower = table[lower+lowbyte(c)]
363 // ignore this character
365 // To handle ignorable characters, we now need a loop to find the next valid character.
366 // Also, we can't pre-compute the number of characters to compare; the string length might
367 // be larger than the number of non-ignorable characters. Further, we must be able to handle
368 // ignorable characters at any point in the string, including as the first or last characters.
369 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
370 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
371 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
372 // an invalid Unicode character).
377 // c1 = GetNextValidChar(str1) // returns zero if at end of string
378 // c2 = GetNextValidChar(str2)
380 // if (c1 != c2) break // found a difference
382 // if (c1 == 0) // reached end of string on both strings at once?
383 // return 0; // yes, so strings are equal
386 // // When we get here, c1 != c2. So, we just need to determine which one is less.
393 extern UInt16 gLowerCaseTable
[];
394 extern UInt16 gLatinCaseFold
[];
396 SInt32
FastUnicodeCompare ( register ConstUniCharArrayPtr str1
, register ItemCount length1
,
397 register ConstUniCharArrayPtr str2
, register ItemCount length2
)
399 register UInt16 c1
,c2
;
400 register UInt16 temp
;
401 register UInt16
* lowerCaseTable
;
403 lowerCaseTable
= (UInt16
*) gLowerCaseTable
;
406 /* Set default values for c1, c2 in case there are no more valid chars */
410 /* Find next non-ignorable char from str1, or zero if no more */
411 while (length1
&& c1
== 0) {
414 /* check for basic latin first */
416 c1
= gLatinCaseFold
[c1
];
419 /* case fold if neccessary */
420 if ((temp
= lowerCaseTable
[c1
>>8]) != 0)
421 c1
= lowerCaseTable
[temp
+ (c1
& 0x00FF)];
425 /* Find next non-ignorable char from str2, or zero if no more */
426 while (length2
&& c2
== 0) {
429 /* check for basic latin first */
431 c2
= gLatinCaseFold
[c2
];
434 /* case fold if neccessary */
435 if ((temp
= lowerCaseTable
[c2
>>8]) != 0)
436 c2
= lowerCaseTable
[temp
+ (c2
& 0x00FF)];
439 if (c1
!= c2
) // found a difference, so stop looping
442 if (c1
== 0) // did we reach the end of both strings at the same time?
443 return 0; // yes, so strings are equal
454 ConvertUnicodeToUTF8Mangled(ByteCount srcLen
, ConstUniCharArrayPtr srcStr
, ByteCount maxDstLen
,
455 ByteCount
*actualDstLen
, unsigned char* dstStr
, HFSCatalogNodeID cnid
)
462 GetFileIDString(cnid
, fileIDStr
);
463 GetFilenameExtension(srcLen
/sizeof(UniChar
), srcStr
, extStr
);
465 /* remove extension chars from source */
466 srcLen
-= strlen(extStr
) * sizeof(UniChar
);
467 subMaxLen
= maxDstLen
- (strlen(extStr
) + strlen(fileIDStr
));
469 (void) utf8_encodestr(srcStr
, srcLen
, dstStr
, &utf8len
, subMaxLen
, ':', 0);
471 strcat(dstStr
, fileIDStr
);
472 strcat(dstStr
, extStr
);
473 *actualDstLen
= utf8len
+ (strlen(extStr
) + strlen(fileIDStr
));