2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 2.0 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 * HFSCompare.c - Functions for working with and comparing HFS nams.
25 * Copyright (c) 1999-2000 Apple Computer, Inc.
31 #include "hfs_CaseTables.h"
35 static unsigned short *
36 UncompressStructure(struct compressed_block
*bp
, int count
, int size
)
38 unsigned short *out
= malloc(size
);
39 unsigned short *op
= out
;
43 for (i
=0; i
<count
; i
++, bp
++) {
45 for (j
=0; j
<bp
->count
; j
++) {
47 if (bp
->type
== kTypeAscending
) data
++;
48 else if (bp
->type
== kTypeAscending256
) data
+= 256;
55 InitCompareTables(void)
57 if (gCompareTable
== 0) {
58 gCompareTable
= UncompressStructure(gCompareTableCompressed
,
59 kCompareTableNBlocks
, kCompareTableDataSize
);
60 gLowerCaseTable
= UncompressStructure(gLowerCaseTableCompressed
,
61 kLowerCaseTableNBlocks
, kLowerCaseTableDataSize
);
65 #endif /* ! UNCOMPRESSED */
67 //_______________________________________________________________________
69 // Routine: FastRelString
71 // Output: returns -1 if str1 < str2
72 // returns 1 if str1 > str2
75 //_______________________________________________________________________
77 int32_t FastRelString(u_int8_t
* str1
, u_int8_t
* str2
)
80 u_int8_t length
, length2
;
89 if (length
== length2
)
91 else if (length
< length2
)
101 u_int32_t aChar
, bChar
;
106 if (aChar
!= bChar
) /* If they don't match exacly, do case conversion */
108 u_int16_t aSortWord
, bSortWord
;
110 aSortWord
= gCompareTable
[aChar
];
111 bSortWord
= gCompareTable
[bChar
];
113 if (aSortWord
> bSortWord
)
116 if (aSortWord
< bSortWord
)
121 * If characters match exactly, then go on to next character
122 * immediately without doing any extra work.
126 /* if you got to here, then return bestGuess */
132 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
135 // --------------------------
140 // The lower case table starts with 256 entries (one for each of the upper bytes
141 // of the original Unicode char). If that entry is zero, then all characters with
142 // that upper byte are already case folded. If the entry is non-zero, then it is
143 // the _index_ (not byte offset) of the start of the sub-table for the characters
144 // with that upper byte. All ignorable characters are folded to the value zero.
148 // Let c = source Unicode character
149 // Let table[] = lower case table
151 // lower = table[highbyte(c)]
155 // lower = table[lower+lowbyte(c)]
158 // ignore this character
160 // To handle ignorable characters, we now need a loop to find the next valid character.
161 // Also, we can't pre-compute the number of characters to compare; the string length might
162 // be larger than the number of non-ignorable characters. Further, we must be able to handle
163 // ignorable characters at any point in the string, including as the first or last characters.
164 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
165 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
166 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
167 // an invalid Unicode character).
172 // c1 = GetNextValidChar(str1) // returns zero if at end of string
173 // c2 = GetNextValidChar(str2)
175 // if (c1 != c2) break // found a difference
177 // if (c1 == 0) // reached end of string on both strings at once?
178 // return 0; // yes, so strings are equal
181 // // When we get here, c1 != c2. So, we just need to determine which one is less.
188 int32_t FastUnicodeCompare( u_int16_t
* str1
, register u_int32_t length1
,
189 u_int16_t
* str2
, register u_int32_t length2
)
191 register u_int16_t c1
,c2
;
192 register u_int16_t temp
;
199 /* Set default values for c1, c2 in case there are no more valid chars */
203 /* Find next non-ignorable char from str1, or zero if no more */
204 while (length1
&& c1
== 0) {
205 c1
= SWAP_BE16(*(str1
++));
207 if ((temp
= gLowerCaseTable
[c1
>>8]) != 0) // is there a subtable for this upper byte?
208 c1
= gLowerCaseTable
[temp
+ (c1
& 0x00FF)]; // yes, so fold the char
211 /* Find next non-ignorable char from str2, or zero if no more */
212 while (length2
&& c2
== 0) {
213 c2
= SWAP_BE16(*(str2
++));
215 if ((temp
= gLowerCaseTable
[c2
>>8]) != 0) // is there a subtable for this upper byte?
216 c2
= gLowerCaseTable
[temp
+ (c2
& 0x00FF)]; // yes, so fold the char
219 if (c1
!= c2
) /* found a difference, so stop looping */
222 if (c1
== 0) /* did we reach the end of both strings at the same time? */
223 return 0; /* yes, so strings are equal */
234 // BinaryUnicodeCompare - Compare two Unicode strings; produce a relative ordering
235 // Compared using a 16-bit binary comparison (no case folding)
237 int32_t BinaryUnicodeCompare (u_int16_t
* str1
, u_int32_t length1
,
238 u_int16_t
* str2
, u_int32_t length2
)
240 register u_int16_t c1
, c2
;
246 if (length1
< length2
) {
249 } else if (length1
> length2
) {
271 * UTF-8 (UCS Transformation Format)
273 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
274 * requires a maximum of three 3 bytes per UCS-2 character. Only the
275 * shortest encoding required to represent the significant UCS-2 bits
278 * UTF-8 Multibyte Codes
280 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
281 * -------------------------------------------------------------------
282 * 1 7 0x0000 0x007F 0xxxxxxx
283 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
284 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
285 * -------------------------------------------------------------------
290 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
291 * null terminated UTF-8 string at utf8p.
293 * ucslen is the number of UCS-2 input characters (not bytes)
294 * bufsize is the size of the output buffer in bytes
297 utf_encodestr( const u_int16_t
* ucsp
, int ucslen
,
298 u_int8_t
* utf8p
, u_int32_t bufsize
, int byte_order
)
303 bufend
= utf8p
+ bufsize
;
305 while (ucslen
-- > 0) {
306 if (byte_order
== OSBigEndian
)
307 ucs_ch
= SWAP_BE16(*ucsp
++);
309 ucs_ch
= SWAP_LE16(*ucsp
++);
311 if (ucs_ch
< 0x0080) {
315 continue; /* skip over embedded NULLs */
318 } else if (ucs_ch
< 0x800) {
319 if ((utf8p
+ 1) >= bufend
)
321 *utf8p
++ = (ucs_ch
>> 6) | 0xc0;
322 *utf8p
++ = (ucs_ch
& 0x3f) | 0x80;
325 if ((utf8p
+ 2) >= bufend
)
327 *utf8p
++ = (ucs_ch
>> 12) | 0xe0;
328 *utf8p
++ = ((ucs_ch
>> 6) & 0x3f) | 0x80;
329 *utf8p
++ = ((ucs_ch
) & 0x3f) | 0x80;
338 * utf_decodestr - Decodes the null terminated UTF-8 string at
339 * utf8p into a UCS-2 (Unicode) string at ucsp.
341 * ucslen is the number of UCS-2 output characters (not bytes)
342 * bufsize is the size of the output buffer in bytes
344 void utf_decodestr(const u_int8_t
* utf8p
, u_int16_t
* ucsp
, u_int16_t
* ucslen
, u_int32_t bufsize
, int byte_order
)
352 bufend
= (u_int16_t
*)((u_int8_t
*)ucsp
+ bufsize
);
354 while ((byte
= *utf8p
++) != '\0') {
358 /* check for ascii */
362 if (byte_order
== OSBigEndian
)
363 *ucsp
++ = SWAP_BE16(ucs_ch
);
365 *ucsp
++ = SWAP_LE16(ucs_ch
);
370 switch (byte
& 0xf0) {
374 /* extract bits 6 - 10 from first byte */
375 ucs_ch
= (byte
& 0x1F) << 6;
379 /* extract bits 12 - 15 from first byte */
380 ucs_ch
= (byte
& 0x0F) << 6;
382 /* extract bits 6 - 11 from second byte */
383 if (((byte
= *utf8p
++) & 0xc0) != 0x80)
386 ucs_ch
+= (byte
& 0x3F);
393 /* extract bits 0 - 5 from final byte */
394 if (((byte
= *utf8p
++) & 0xc0) != 0x80)
396 ucs_ch
+= (byte
& 0x3F);
398 if (byte_order
== OSBigEndian
)
399 *ucsp
++ = SWAP_BE16(ucs_ch
);
401 *ucsp
++ = SWAP_LE16(ucs_ch
);
404 if (byte_order
== OSBigEndian
)
405 *ucslen
= SWAP_BE16(ucsp
- bufstart
);
407 *ucslen
= SWAP_LE16(ucsp
- bufstart
);