]>
git.saurik.com Git - apple/bootx.git/blob - bootx.tproj/fs.subproj/HFSCompare.c
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
26 * HFSCompare.c - Functions for working with and comparing HFS nams.
28 * Copyright (c) 1999-2000 Apple Computer, Inc.
34 #include "CaseTables.h"
37 //_______________________________________________________________________
39 // Routine: FastRelString
41 // Output: returns -1 if str1 < str2
42 // returns 1 if str1 > str2
45 //_______________________________________________________________________
47 int32_t FastRelString(char *str1
, char *str2
)
50 u_int8_t length
, length2
;
56 if (length
== length2
)
58 else if (length
< length2
)
68 u_int32_t aChar
, bChar
;
73 if (aChar
!= bChar
) /* If they don't match exacly, do case conversion */
75 u_int16_t aSortWord
, bSortWord
;
77 aSortWord
= gCompareTable
[aChar
];
78 bSortWord
= gCompareTable
[bChar
];
80 if (aSortWord
> bSortWord
)
83 if (aSortWord
< bSortWord
)
88 * If characters match exactly, then go on to next character
89 * immediately without doing any extra work.
93 /* if you got to here, then return bestGuess */
100 // FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
103 // --------------------------
108 // The lower case table starts with 256 entries (one for each of the upper bytes
109 // of the original Unicode char). If that entry is zero, then all characters with
110 // that upper byte are already case folded. If the entry is non-zero, then it is
111 // the _index_ (not byte offset) of the start of the sub-table for the characters
112 // with that upper byte. All ignorable characters are folded to the value zero.
116 // Let c = source Unicode character
117 // Let table[] = lower case table
119 // lower = table[highbyte(c)]
123 // lower = table[lower+lowbyte(c)]
126 // ignore this character
128 // To handle ignorable characters, we now need a loop to find the next valid character.
129 // Also, we can't pre-compute the number of characters to compare; the string length might
130 // be larger than the number of non-ignorable characters. Further, we must be able to handle
131 // ignorable characters at any point in the string, including as the first or last characters.
132 // We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
133 // Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
134 // the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
135 // an invalid Unicode character).
140 // c1 = GetNextValidChar(str1) // returns zero if at end of string
141 // c2 = GetNextValidChar(str2)
143 // if (c1 != c2) break // found a difference
145 // if (c1 == 0) // reached end of string on both strings at once?
146 // return 0; // yes, so strings are equal
149 // // When we get here, c1 != c2. So, we just need to determine which one is less.
156 int32_t FastUnicodeCompare (u_int16_t
*str1
, register u_int32_t length1
,
157 u_int16_t
*str2
, register u_int32_t length2
)
159 register u_int16_t c1
,c2
;
160 register u_int16_t temp
;
163 /* Set default values for c1, c2 in case there are no more valid chars */
167 /* Find next non-ignorable char from str1, or zero if no more */
168 while (length1
&& c1
== 0) {
171 if ((temp
= gLowerCaseTable
[c1
>>8]) != 0) // is there a subtable for this upper byte?
172 c1
= gLowerCaseTable
[temp
+ (c1
& 0x00FF)]; // yes, so fold the char
176 /* Find next non-ignorable char from str2, or zero if no more */
177 while (length2
&& c2
== 0) {
180 if ((temp
= gLowerCaseTable
[c2
>>8]) != 0) // is there a subtable for this upper byte?
181 c2
= gLowerCaseTable
[temp
+ (c2
& 0x00FF)]; // yes, so fold the char
184 if (c1
!= c2
) /* found a difference, so stop looping */
187 if (c1
== 0) /* did we reach the end of both strings at the same time? */
188 return 0; /* yes, so strings are equal */
199 * UTF-8 (UCS Transformation Format)
201 * The following subset of UTF-8 is used to encode UCS-2 filenames. It
202 * requires a maximum of three 3 bytes per UCS-2 character. Only the
203 * shortest encoding required to represent the significant UCS-2 bits
206 * UTF-8 Multibyte Codes
208 * Bytes Bits UCS-2 Min UCS-2 Max UTF-8 Byte Sequence (binary)
209 * -------------------------------------------------------------------
210 * 1 7 0x0000 0x007F 0xxxxxxx
211 * 2 11 0x0080 0x07FF 110xxxxx 10xxxxxx
212 * 3 16 0x0800 0xFFFF 1110xxxx 10xxxxxx 10xxxxxx
213 * -------------------------------------------------------------------
218 * utf_encodestr - Encodes the UCS-2 (Unicode) string at ucsp into a
219 * null terminated UTF-8 string at utf8p.
221 * ucslen is the number of UCS-2 input characters (not bytes)
222 * bufsize is the size of the output buffer in bytes
225 utf_encodestr(const u_int16_t
*ucsp
, int ucslen
, u_int8_t
*utf8p
, u_int32_t bufsize
)
230 bufend
= utf8p
+ bufsize
;
232 while (ucslen
-- > 0) {
235 if (ucs_ch
< 0x0080) {
239 continue; /* skip over embedded NULLs */
242 } else if (ucs_ch
< 0x800) {
243 if ((utf8p
+ 1) >= bufend
)
245 *utf8p
++ = (ucs_ch
>> 6) | 0xc0;
246 *utf8p
++ = (ucs_ch
& 0x3f) | 0x80;
249 if ((utf8p
+ 2) >= bufend
)
251 *utf8p
++ = (ucs_ch
>> 12) | 0xe0;
252 *utf8p
++ = ((ucs_ch
>> 6) & 0x3f) | 0x80;
253 *utf8p
++ = ((ucs_ch
) & 0x3f) | 0x80;
263 * utf_decodestr - Decodes the null terminated UTF-8 string at
264 * utf8p into a UCS-2 (Unicode) string at ucsp.
266 * ucslen is the number of UCS-2 output characters (not bytes)
267 * bufsize is the size of the output buffer in bytes
270 utf_decodestr(const u_int8_t
*utf8p
, u_int16_t
*ucsp
, u_int16_t
*ucslen
, u_int32_t bufsize
)
278 bufend
= (u_int16_t
*)((u_int8_t
*)ucsp
+ bufsize
);
280 while ((byte
= *utf8p
++) != '\0') {
284 /* check for ascii */
292 switch (byte
& 0xf0) {
296 /* extract bits 6 - 10 from first byte */
297 ucs_ch
= (byte
& 0x1F) << 6;
301 /* extract bits 12 - 15 from first byte */
302 ucs_ch
= (byte
& 0x0F) << 6;
304 /* extract bits 6 - 11 from second byte */
305 if (((byte
= *utf8p
++) & 0xc0) != 0x80)
308 ucs_ch
+= (byte
& 0x3F);
315 /* extract bits 0 - 5 from final byte */
316 if (((byte
= *utf8p
++) & 0xc0) != 0x80)
318 ucs_ch
+= (byte
& 0x3F);
323 *ucslen
= ucsp
- bufstart
;