2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 File: UnicodeWrappers.c
31 Contains: Wrapper routines for Unicode conversion and comparison.
35 #include <sys/param.h>
36 #include <sys/utfconv.h>
38 #include "hfs_macos_defs.h"
39 #include "UCStringCompareData.h"
41 #include "FileMgrInternal.h"
42 #include "HFSUnicodeWrappers.h"
45 kMinFileExtensionChars
= 1, /* does not include dot */
46 kMaxFileExtensionChars
= 5 /* does not include dot */
50 #define EXTENSIONCHAR(c) (((c) >= 0x61 && (c) <= 0x7A) || \
51 ((c) >= 0x41 && (c) <= 0x5A) || \
52 ((c) >= 0x30 && (c) <= 0x39))
55 #define IsHexDigit(c) (((c) >= (u_int8_t) '0' && (c) <= (u_int8_t) '9') || \
56 ((c) >= (u_int8_t) 'A' && (c) <= (u_int8_t) 'F'))
59 static void GetFilenameExtension( ItemCount length
, ConstUniCharArrayPtr unicodeStr
, char* extStr
);
62 static u_int32_t
HexStringToInteger( u_int32_t length
, const u_int8_t
*hexStr
);
66 * Get filename extension (if any) as a C string
69 GetFilenameExtension(ItemCount length
, ConstUniCharArrayPtr unicodeStr
, char * extStr
)
73 u_int16_t extChars
; /* number of extension chars (excluding dot) */
74 u_int16_t maxExtChars
;
75 Boolean foundExtension
;
77 extStr
[0] = '\0'; /* assume there's no extension */
80 return; /* "x.y" is smallest possible extension */
82 if ( length
< (kMaxFileExtensionChars
+ 2) )
83 maxExtChars
= length
- 2; /* save room for prefix + dot */
85 maxExtChars
= kMaxFileExtensionChars
;
89 foundExtension
= false;
91 while ( extChars
<= maxExtChars
) {
94 /* look for leading dot */
95 if ( c
== (UniChar
) '.' ) {
96 if ( extChars
> 0 ) /* cannot end with a dot */
97 foundExtension
= true;
101 if ( EXTENSIONCHAR(c
) )
107 /* if we found one then copy it */
108 if ( foundExtension
) {
109 u_int8_t
*extStrPtr
= (u_int8_t
*)extStr
;
110 const UniChar
*unicodeStrPtr
= &unicodeStr
[i
];
112 for ( i
= 0; i
<= extChars
; ++i
)
113 *(extStrPtr
++) = (u_int8_t
) *(unicodeStrPtr
++);
114 extStr
[extChars
+ 1] = '\0'; /* terminate extension + dot */
121 * Count filename extension characters (if any)
124 CountFilenameExtensionChars( const unsigned char * filename
, u_int32_t length
)
128 u_int32_t extChars
; /* number of extension chars (excluding dot) */
129 u_int16_t maxExtChars
;
130 Boolean foundExtension
;
133 return 0; /* "x.y" is smallest possible extension */
135 if ( length
< (kMaxFileExtensionChars
+ 2) )
136 maxExtChars
= length
- 2; /* save room for prefix + dot */
138 maxExtChars
= kMaxFileExtensionChars
;
140 extChars
= 0; /* assume there's no extension */
141 i
= length
- 1; /* index to last ascii character */
142 foundExtension
= false;
144 while ( extChars
<= maxExtChars
) {
147 /* look for leading dot */
148 if ( c
== (u_int8_t
) '.' ) {
149 if ( extChars
> 0 ) /* cannot end with a dot */
155 if ( EXTENSIONCHAR(c
) )
166 * extract the file id from a mangled name
169 GetEmbeddedFileID(const unsigned char * filename
, u_int32_t length
, u_int32_t
*prefixLength
)
177 if ( filename
== NULL
)
181 return 0; /* too small to have been mangled */
183 /* big enough for a file ID (#10) and an extension (.x) ? */
185 extChars
= CountFilenameExtensionChars(filename
, length
);
189 /* skip over dot plus extension characters */
191 length
-= (extChars
+ 1);
193 /* scan for file id digits */
194 for ( i
= length
- 1; i
>= 0; --i
) {
197 /* look for file ID marker */
199 if ( (length
- i
) < 3 )
200 break; /* too small to be a file ID */
203 return HexStringToInteger(length
- i
- 1, &filename
[i
+1]);
206 if ( !IsHexDigit(c
) )
207 break; /* file ID string must have hex digits */
216 HexStringToInteger(u_int32_t length
, const u_int8_t
*hexStr
)
226 for ( i
= 0; i
< length
; ++i
) {
229 if (c
>= '0' && c
<= '9') {
231 value
+= (u_int32_t
) c
- (u_int32_t
) '0';
232 } else if (c
>= 'A' && c
<= 'F') {
234 value
+= 10 + ((unsigned int) c
- (unsigned int) 'A');
236 return 0; /* bad character */
245 * Routine: FastRelString
247 * Output: returns -1 if str1 < str2
248 * returns 1 if str1 > str2
252 int32_t FastRelString( ConstStr255Param str1
, ConstStr255Param str2
)
254 u_int16_t
* compareTable
;
256 u_int8_t length
, length2
;
263 if (length
== length2
)
265 else if (length
< length2
)
268 delta
= length2
- length
;
276 compareTable
= (u_int16_t
*) gCompareTable
;
280 u_int8_t aChar
, bChar
;
285 if (aChar
!= bChar
) // If they don't match exacly, do case conversion
287 u_int16_t aSortWord
, bSortWord
;
289 aSortWord
= compareTable
[aChar
];
290 bSortWord
= compareTable
[bChar
];
292 if (aSortWord
> bSortWord
)
295 if (aSortWord
< bSortWord
)
299 // If characters match exactly, then go on to next character immediately without
300 // doing any extra work.
303 // if you got to here, then return bestGuess
310 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
313 // --------------------------
318 // The lower case table starts with 256 entries (one for each of the upper bytes
319 // of the original Unicode char). If that entry is zero, then all characters with
320 // that upper byte are already case folded. If the entry is non-zero, then it is
321 // the _index_ (not byte offset) of the start of the sub-table for the characters
322 // with that upper byte. All ignorable characters are folded to the value zero.
326 // Let c = source Unicode character
327 // Let table[] = lower case table
329 // lower = table[highbyte(c)]
333 // lower = table[lower+lowbyte(c)]
336 // ignore this character
338 // To handle ignorable characters, we now need a loop to find the next valid character.
339 // Also, we can't pre-compute the number of characters to compare; the string length might
340 // be larger than the number of non-ignorable characters. Further, we must be able to handle
341 // ignorable characters at any point in the string, including as the first or last characters.
342 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
343 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
344 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
345 // an invalid Unicode character).
350 // c1 = GetNextValidChar(str1) // returns zero if at end of string
351 // c2 = GetNextValidChar(str2)
353 // if (c1 != c2) break // found a difference
355 // if (c1 == 0) // reached end of string on both strings at once?
356 // return 0; // yes, so strings are equal
359 // // When we get here, c1 != c2. So, we just need to determine which one is less.
366 int32_t FastUnicodeCompare ( register ConstUniCharArrayPtr str1
, register ItemCount length1
,
367 register ConstUniCharArrayPtr str2
, register ItemCount length2
)
369 register u_int16_t c1
,c2
;
370 register u_int16_t temp
;
371 register u_int16_t
* lowerCaseTable
;
373 lowerCaseTable
= (u_int16_t
*) gLowerCaseTable
;
376 /* Set default values for c1, c2 in case there are no more valid chars */
380 /* Find next non-ignorable char from str1, or zero if no more */
381 while (length1
&& c1
== 0) {
384 /* check for basic latin first */
386 c1
= gLatinCaseFold
[c1
];
389 /* case fold if neccessary */
390 if ((temp
= lowerCaseTable
[c1
>>8]) != 0)
391 c1
= lowerCaseTable
[temp
+ (c1
& 0x00FF)];
395 /* Find next non-ignorable char from str2, or zero if no more */
396 while (length2
&& c2
== 0) {
399 /* check for basic latin first */
401 c2
= gLatinCaseFold
[c2
];
404 /* case fold if neccessary */
405 if ((temp
= lowerCaseTable
[c2
>>8]) != 0)
406 c2
= lowerCaseTable
[temp
+ (c2
& 0x00FF)];
409 if (c1
!= c2
) // found a difference, so stop looping
412 if (c1
== 0) // did we reach the end of both strings at the same time?
413 return 0; // yes, so strings are equal
423 * UnicodeBinaryCompare
424 * Compare two UTF-16 strings and perform case-sensitive (binary) matching against them.
426 * Results are emitted like FastUnicodeCompare:
430 * --------------------------
435 * The case matching source code is greatly simplified due to the lack of case-folding
436 * in this comparison routine. We compare, in order: the lengths, then do character-by-
437 * character comparisons.
440 int32_t UnicodeBinaryCompare (register ConstUniCharArrayPtr str1
, register ItemCount len1
,
441 register ConstUniCharArrayPtr str2
, register ItemCount len2
) {
447 /* Set default values for the two character pointers */
451 /* First generate the string length (for comparison purposes) */
453 string_length
= len1
;
456 else if (len1
> len2
) {
457 string_length
= len2
;
461 string_length
= len1
;
464 /* now compare the two string pointers */
465 while (string_length
--) {
478 /* If equal, iterate to the next two respective chars */
486 ConvertUnicodeToUTF8Mangled(ByteCount srcLen
, ConstUniCharArrayPtr srcStr
, ByteCount maxDstLen
,
487 ByteCount
*actualDstLen
, unsigned char* dstStr
, HFSCatalogNodeID cnid
)
494 snprintf(fileIDStr
, sizeof(fileIDStr
), "#%X", cnid
);
495 GetFilenameExtension(srcLen
/sizeof(UniChar
), srcStr
, extStr
);
497 /* remove extension chars from source */
498 srcLen
-= strlen(extStr
) * sizeof(UniChar
);
499 subMaxLen
= maxDstLen
- (strlen(extStr
) + strlen(fileIDStr
));
501 (void) utf8_encodestr(srcStr
, srcLen
, dstStr
, &utf8len
, subMaxLen
, ':', 0);
503 strlcat((char *)dstStr
, fileIDStr
, maxDstLen
);
504 strlcat((char *)dstStr
, extStr
, maxDstLen
);
505 *actualDstLen
= utf8len
+ (strlen(extStr
) + strlen(fileIDStr
));